Line data Source code
1 : /* Medium-level subroutines: convert bit-field store and extract
2 : and shifts, multiplies and divides to rtl instructions.
3 : Copyright (C) 1987-2026 Free Software Foundation, Inc.
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : /* Work around tree-optimization/91825. */
22 : #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
23 :
24 : #include "config.h"
25 : #include "system.h"
26 : #include "coretypes.h"
27 : #include "backend.h"
28 : #include "target.h"
29 : #include "rtl.h"
30 : #include "tree.h"
31 : #include "predict.h"
32 : #include "memmodel.h"
33 : #include "tm_p.h"
34 : #include "optabs.h"
35 : #include "expmed.h"
36 : #include "regs.h"
37 : #include "emit-rtl.h"
38 : #include "diagnostic-core.h"
39 : #include "fold-const.h"
40 : #include "stor-layout.h"
41 : #include "dojump.h"
42 : #include "explow.h"
43 : #include "expr.h"
44 : #include "langhooks.h"
45 : #include "tree-vector-builder.h"
46 : #include "recog.h"
47 :
48 : struct target_expmed default_target_expmed;
49 : #if SWITCHABLE_TARGET
50 : struct target_expmed *this_target_expmed = &default_target_expmed;
51 : #endif
52 :
53 : static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
54 : unsigned HOST_WIDE_INT,
55 : unsigned HOST_WIDE_INT,
56 : poly_uint64, poly_uint64,
57 : machine_mode, rtx, bool, bool);
58 : static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
59 : unsigned HOST_WIDE_INT,
60 : unsigned HOST_WIDE_INT,
61 : poly_uint64, poly_uint64,
62 : rtx, scalar_int_mode, bool);
63 : static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
64 : unsigned HOST_WIDE_INT,
65 : unsigned HOST_WIDE_INT,
66 : rtx, scalar_int_mode, bool);
67 : static void store_split_bit_field (rtx, opt_scalar_int_mode,
68 : unsigned HOST_WIDE_INT,
69 : unsigned HOST_WIDE_INT,
70 : poly_uint64, poly_uint64,
71 : rtx, scalar_int_mode, bool);
72 : static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
73 : unsigned HOST_WIDE_INT,
74 : unsigned HOST_WIDE_INT, int, rtx,
75 : machine_mode, machine_mode, bool, bool);
76 : static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
77 : unsigned HOST_WIDE_INT,
78 : unsigned HOST_WIDE_INT, rtx, int, bool);
79 : static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
80 : unsigned HOST_WIDE_INT,
81 : unsigned HOST_WIDE_INT, rtx, int, bool);
82 : static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
83 : static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
84 : unsigned HOST_WIDE_INT,
85 : unsigned HOST_WIDE_INT, int, bool);
86 : static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
87 : static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
88 : static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
89 :
90 : /* Return a constant integer mask value of mode MODE with BITSIZE ones
91 : followed by BITPOS zeros, or the complement of that if COMPLEMENT.
92 : The mask is truncated if necessary to the width of mode MODE. The
93 : mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
94 :
95 : static inline rtx
96 230267 : mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
97 : {
98 230267 : return immed_wide_int_const
99 230267 : (wi::shifted_mask (bitpos, bitsize, complement,
100 230267 : GET_MODE_PRECISION (mode)), mode);
101 : }
102 :
103 : /* Test whether a value is zero of a power of two. */
104 : #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
105 : (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
106 :
107 : struct init_expmed_rtl
108 : {
109 : rtx reg;
110 : rtx plus;
111 : rtx neg;
112 : rtx mult;
113 : rtx sdiv;
114 : rtx udiv;
115 : rtx sdiv_32;
116 : rtx smod_32;
117 : rtx wide_mult;
118 : rtx wide_lshr;
119 : rtx wide_trunc;
120 : rtx shift;
121 : rtx shift_mult;
122 : rtx shift_add;
123 : rtx shift_sub0;
124 : rtx shift_sub1;
125 : rtx zext;
126 : rtx trunc;
127 :
128 : rtx pow2[MAX_BITS_PER_WORD];
129 : rtx cint[MAX_BITS_PER_WORD];
130 : };
131 :
132 : static void
133 29881600 : init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
134 : scalar_int_mode from_mode, bool speed)
135 : {
136 29881600 : int to_size, from_size;
137 29881600 : rtx which;
138 :
139 29881600 : to_size = GET_MODE_PRECISION (to_mode);
140 29881600 : from_size = GET_MODE_PRECISION (from_mode);
141 :
142 : /* Most partial integers have a precision less than the "full"
143 : integer it requires for storage. In case one doesn't, for
144 : comparison purposes here, reduce the bit size by one in that
145 : case. */
146 29881600 : if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
147 29881600 : && pow2p_hwi (to_size))
148 5976320 : to_size --;
149 29881600 : if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
150 29881600 : && pow2p_hwi (from_size))
151 0 : from_size --;
152 :
153 : /* Assume cost of zero-extend and sign-extend is the same. */
154 29881600 : which = (to_size < from_size ? all->trunc : all->zext);
155 :
156 29881600 : PUT_MODE (all->reg, from_mode);
157 29881600 : set_convert_cost (to_mode, from_mode, speed,
158 : set_src_cost (which, to_mode, speed));
159 : /* Restore all->reg's mode. */
160 29881600 : PUT_MODE (all->reg, to_mode);
161 29881600 : }
162 :
163 : static void
164 17502080 : init_expmed_one_mode (struct init_expmed_rtl *all,
165 : machine_mode mode, int speed)
166 : {
167 17502080 : int m, n, mode_bitsize;
168 17502080 : machine_mode mode_from;
169 :
170 17502080 : mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
171 :
172 17502080 : PUT_MODE (all->reg, mode);
173 17502080 : PUT_MODE (all->plus, mode);
174 17502080 : PUT_MODE (all->neg, mode);
175 17502080 : PUT_MODE (all->mult, mode);
176 17502080 : PUT_MODE (all->sdiv, mode);
177 17502080 : PUT_MODE (all->udiv, mode);
178 17502080 : PUT_MODE (all->sdiv_32, mode);
179 17502080 : PUT_MODE (all->smod_32, mode);
180 17502080 : PUT_MODE (all->wide_trunc, mode);
181 17502080 : PUT_MODE (all->shift, mode);
182 17502080 : PUT_MODE (all->shift_mult, mode);
183 17502080 : PUT_MODE (all->shift_add, mode);
184 17502080 : PUT_MODE (all->shift_sub0, mode);
185 17502080 : PUT_MODE (all->shift_sub1, mode);
186 17502080 : PUT_MODE (all->zext, mode);
187 17502080 : PUT_MODE (all->trunc, mode);
188 :
189 17502080 : set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
190 17502080 : set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
191 17502080 : set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
192 17502080 : set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
193 17502080 : set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
194 :
195 17502080 : set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
196 17502080 : <= 2 * add_cost (speed, mode)));
197 17502080 : set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
198 17502080 : <= 4 * add_cost (speed, mode)));
199 :
200 17502080 : set_shift_cost (speed, mode, 0, 0);
201 17502080 : {
202 17502080 : int cost = add_cost (speed, mode);
203 17502080 : set_shiftadd_cost (speed, mode, 0, cost);
204 17502080 : set_shiftsub0_cost (speed, mode, 0, cost);
205 17502080 : set_shiftsub1_cost (speed, mode, 0, cost);
206 : }
207 :
208 17502080 : n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
209 587386880 : for (m = 1; m < n; m++)
210 : {
211 569884800 : XEXP (all->shift, 1) = all->cint[m];
212 569884800 : XEXP (all->shift_mult, 1) = all->pow2[m];
213 :
214 569884800 : set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
215 569884800 : set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
216 : speed));
217 569884800 : set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
218 : speed));
219 569884800 : set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
220 : speed));
221 : }
222 :
223 17502080 : scalar_int_mode int_mode_to;
224 17502080 : if (is_a <scalar_int_mode> (mode, &int_mode_to))
225 : {
226 34150400 : for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
227 29881600 : mode_from = (machine_mode)(mode_from + 1))
228 29881600 : init_expmed_one_conv (all, int_mode_to,
229 : as_a <scalar_int_mode> (mode_from), speed);
230 :
231 4268800 : scalar_int_mode wider_mode;
232 4268800 : if (GET_MODE_CLASS (int_mode_to) == MODE_INT
233 4268800 : && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
234 : {
235 2561280 : PUT_MODE (all->reg, mode);
236 2561280 : PUT_MODE (all->zext, wider_mode);
237 2561280 : PUT_MODE (all->wide_mult, wider_mode);
238 2561280 : PUT_MODE (all->wide_lshr, wider_mode);
239 2561280 : XEXP (all->wide_lshr, 1)
240 2561280 : = gen_int_shift_amount (wider_mode, mode_bitsize);
241 :
242 2561280 : set_mul_widen_cost (speed, wider_mode,
243 : set_src_cost (all->wide_mult, wider_mode, speed));
244 2561280 : set_mul_highpart_cost (speed, int_mode_to,
245 : set_src_cost (all->wide_trunc,
246 : int_mode_to, speed));
247 : }
248 : }
249 17502080 : }
250 :
251 : void
252 213440 : init_expmed (void)
253 : {
254 213440 : struct init_expmed_rtl all;
255 213440 : machine_mode mode = QImode;
256 213440 : int m, speed;
257 :
258 213440 : memset (&all, 0, sizeof all);
259 13660160 : for (m = 1; m < MAX_BITS_PER_WORD; m++)
260 : {
261 13446720 : all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
262 13446720 : all.cint[m] = GEN_INT (m);
263 : }
264 :
265 : /* Avoid using hard regs in ways which may be unsupported. */
266 213440 : all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
267 213440 : all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
268 213440 : all.neg = gen_rtx_NEG (mode, all.reg);
269 213440 : all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
270 213440 : all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
271 213440 : all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
272 213440 : all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
273 213440 : all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
274 213440 : all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
275 213440 : all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
276 213440 : all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
277 213440 : all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
278 213440 : all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
279 213440 : all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
280 213440 : all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
281 213440 : all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
282 213440 : all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
283 213440 : all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
284 :
285 640320 : for (speed = 0; speed < 2; speed++)
286 : {
287 426880 : crtl->maybe_hot_insn_p = speed;
288 426880 : set_zero_cost (speed, set_src_cost (const0_rtx, QImode, speed));
289 :
290 3415040 : for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
291 2988160 : mode = (machine_mode)(mode + 1))
292 2988160 : init_expmed_one_mode (&all, mode, speed);
293 :
294 : if (MIN_MODE_PARTIAL_INT != VOIDmode)
295 1707520 : for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
296 1280640 : mode = (machine_mode)(mode + 1))
297 1280640 : init_expmed_one_mode (&all, mode, speed);
298 :
299 : if (MIN_MODE_VECTOR_INT != VOIDmode)
300 13660160 : for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
301 13233280 : mode = (machine_mode)(mode + 1))
302 13233280 : init_expmed_one_mode (&all, mode, speed);
303 : }
304 :
305 213440 : if (alg_hash_used_p ())
306 : {
307 1043 : struct alg_hash_entry *p = alg_hash_entry_ptr (0);
308 1043 : memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
309 : }
310 : else
311 212397 : set_alg_hash_used_p (true);
312 213440 : default_rtl_profile ();
313 :
314 213440 : ggc_free (all.trunc);
315 213440 : ggc_free (all.shift_sub1);
316 213440 : ggc_free (all.shift_sub0);
317 213440 : ggc_free (all.shift_add);
318 213440 : ggc_free (all.shift_mult);
319 213440 : ggc_free (all.shift);
320 213440 : ggc_free (all.wide_trunc);
321 213440 : ggc_free (all.wide_lshr);
322 213440 : ggc_free (all.wide_mult);
323 213440 : ggc_free (all.zext);
324 213440 : ggc_free (all.smod_32);
325 213440 : ggc_free (all.sdiv_32);
326 213440 : ggc_free (all.udiv);
327 213440 : ggc_free (all.sdiv);
328 213440 : ggc_free (all.mult);
329 213440 : ggc_free (all.neg);
330 213440 : ggc_free (all.plus);
331 213440 : ggc_free (all.reg);
332 213440 : }
333 :
334 : /* Return an rtx representing minus the value of X.
335 : MODE is the intended mode of the result,
336 : useful if X is a CONST_INT. */
337 :
338 : rtx
339 1035472 : negate_rtx (machine_mode mode, rtx x)
340 : {
341 1035472 : rtx result = simplify_unary_operation (NEG, mode, x, mode);
342 :
343 1035472 : if (result == 0)
344 1989 : result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
345 :
346 1035472 : return result;
347 : }
348 :
349 : /* Whether reverse storage order is supported on the target. */
350 : static int reverse_storage_order_supported = -1;
351 :
352 : /* Check whether reverse storage order is supported on the target. */
353 :
354 : static void
355 286 : check_reverse_storage_order_support (void)
356 : {
357 286 : if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
358 : {
359 : reverse_storage_order_supported = 0;
360 : sorry ("reverse scalar storage order");
361 : }
362 : else
363 286 : reverse_storage_order_supported = 1;
364 286 : }
365 :
366 : /* Whether reverse FP storage order is supported on the target. */
367 : static int reverse_float_storage_order_supported = -1;
368 :
369 : /* Check whether reverse FP storage order is supported on the target. */
370 :
371 : static void
372 55 : check_reverse_float_storage_order_support (void)
373 : {
374 55 : if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
375 : {
376 : reverse_float_storage_order_supported = 0;
377 : sorry ("reverse floating-point scalar storage order");
378 : }
379 : else
380 55 : reverse_float_storage_order_supported = 1;
381 55 : }
382 :
383 : /* Return an rtx representing value of X with reverse storage order.
384 : MODE is the intended mode of the result,
385 : useful if X is a CONST_INT. */
386 :
387 : rtx
388 3265 : flip_storage_order (machine_mode mode, rtx x)
389 : {
390 3265 : scalar_int_mode int_mode;
391 3265 : rtx result;
392 :
393 3265 : if (mode == QImode)
394 : return x;
395 :
396 2415 : if (COMPLEX_MODE_P (mode))
397 : {
398 44 : rtx real = read_complex_part (x, false);
399 44 : rtx imag = read_complex_part (x, true);
400 :
401 88 : real = flip_storage_order (GET_MODE_INNER (mode), real);
402 88 : imag = flip_storage_order (GET_MODE_INNER (mode), imag);
403 :
404 44 : return gen_rtx_CONCAT (mode, real, imag);
405 : }
406 :
407 2371 : if (UNLIKELY (reverse_storage_order_supported < 0))
408 286 : check_reverse_storage_order_support ();
409 :
410 2371 : if (!is_a <scalar_int_mode> (mode, &int_mode))
411 : {
412 243 : if (FLOAT_MODE_P (mode)
413 243 : && UNLIKELY (reverse_float_storage_order_supported < 0))
414 55 : check_reverse_float_storage_order_support ();
415 :
416 243 : if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
417 243 : || !targetm.scalar_mode_supported_p (int_mode))
418 : {
419 0 : sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
420 0 : return x;
421 : }
422 243 : x = gen_lowpart (int_mode, x);
423 : }
424 :
425 2371 : result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
426 2371 : if (result == 0)
427 1051 : result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
428 :
429 2371 : if (int_mode != mode)
430 243 : result = gen_lowpart (mode, result);
431 :
432 : return result;
433 : }
434 :
435 : /* If MODE is set, adjust bitfield memory MEM so that it points to the
436 : first unit of mode MODE that contains a bitfield of size BITSIZE at
437 : bit position BITNUM. If MODE is not set, return a BLKmode reference
438 : to every byte in the bitfield. Set *NEW_BITNUM to the bit position
439 : of the field within the new memory. */
440 :
441 : static rtx
442 428084 : narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
443 : unsigned HOST_WIDE_INT bitsize,
444 : unsigned HOST_WIDE_INT bitnum,
445 : unsigned HOST_WIDE_INT *new_bitnum)
446 : {
447 428084 : scalar_int_mode imode;
448 428084 : if (mode.exists (&imode))
449 : {
450 428084 : unsigned int unit = GET_MODE_BITSIZE (imode);
451 428084 : *new_bitnum = bitnum % unit;
452 428084 : HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
453 428084 : return adjust_bitfield_address (mem, imode, offset);
454 : }
455 : else
456 : {
457 0 : *new_bitnum = bitnum % BITS_PER_UNIT;
458 0 : HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
459 0 : HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
460 0 : / BITS_PER_UNIT);
461 0 : return adjust_bitfield_address_size (mem, BLKmode, offset, size);
462 : }
463 : }
464 :
465 : /* The caller wants to perform insertion or extraction PATTERN on a
466 : bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
467 : BITREGION_START and BITREGION_END are as for store_bit_field
468 : and FIELDMODE is the natural mode of the field.
469 :
470 : Search for a mode that is compatible with the memory access
471 : restrictions and (where applicable) with a register insertion or
472 : extraction. Return the new memory on success, storing the adjusted
473 : bit position in *NEW_BITNUM. Return null otherwise. */
474 :
475 : static rtx
476 206915 : adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
477 : rtx op0, HOST_WIDE_INT bitsize,
478 : HOST_WIDE_INT bitnum,
479 : poly_uint64 bitregion_start,
480 : poly_uint64 bitregion_end,
481 : machine_mode fieldmode,
482 : unsigned HOST_WIDE_INT *new_bitnum)
483 : {
484 413830 : bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
485 206915 : bitregion_end, MEM_ALIGN (op0),
486 206919 : MEM_VOLATILE_P (op0));
487 206915 : scalar_int_mode best_mode;
488 206915 : if (iter.next_mode (&best_mode))
489 : {
490 : /* We can use a memory in BEST_MODE. See whether this is true for
491 : any wider modes. All other things being equal, we prefer to
492 : use the widest mode possible because it tends to expose more
493 : CSE opportunities. */
494 200437 : if (!iter.prefer_smaller_modes ())
495 : {
496 : /* Limit the search to the mode required by the corresponding
497 : register insertion or extraction instruction, if any. */
498 317 : scalar_int_mode limit_mode = word_mode;
499 317 : extraction_insn insn;
500 634 : if (get_best_reg_extraction_insn (&insn, pattern,
501 317 : GET_MODE_BITSIZE (best_mode),
502 : fieldmode))
503 317 : limit_mode = insn.field_mode;
504 :
505 317 : scalar_int_mode wider_mode;
506 317 : while (iter.next_mode (&wider_mode)
507 1052 : && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
508 111 : best_mode = wider_mode;
509 : }
510 200437 : return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
511 : new_bitnum);
512 : }
513 : return NULL_RTX;
514 : }
515 :
516 : /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
517 : a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
518 : offset is then BITNUM / BITS_PER_UNIT. */
519 :
520 : static bool
521 786123 : lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
522 : machine_mode struct_mode)
523 : {
524 786123 : poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
525 786123 : if (BYTES_BIG_ENDIAN)
526 : return (multiple_p (bitnum, BITS_PER_UNIT)
527 : && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
528 : || multiple_p (bitnum + bitsize,
529 : regsize * BITS_PER_UNIT)));
530 : else
531 786123 : return multiple_p (bitnum, regsize * BITS_PER_UNIT);
532 : }
533 :
534 : /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
535 : containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
536 : Return false if the access would touch memory outside the range
537 : BITREGION_START to BITREGION_END for conformance to the C++ memory
538 : model. */
539 :
540 : static bool
541 1571055 : strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
542 : unsigned HOST_WIDE_INT bitnum,
543 : scalar_int_mode fieldmode,
544 : poly_uint64 bitregion_start,
545 : poly_uint64 bitregion_end)
546 : {
547 1571055 : unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
548 :
549 : /* -fstrict-volatile-bitfields must be enabled and we must have a
550 : volatile MEM. */
551 1571055 : if (!MEM_P (op0)
552 170964 : || !MEM_VOLATILE_P (op0)
553 1571267 : || flag_strict_volatile_bitfields <= 0)
554 : return false;
555 :
556 : /* The bit size must not be larger than the field mode, and
557 : the field mode must not be larger than a word. */
558 14 : if (bitsize > modesize || modesize > BITS_PER_WORD)
559 : return false;
560 :
561 : /* Check for cases of unaligned fields that must be split. */
562 14 : if (bitnum % modesize + bitsize > modesize)
563 : return false;
564 :
565 : /* The memory must be sufficiently aligned for a MODESIZE access.
566 : This condition guarantees, that the memory access will not
567 : touch anything after the end of the structure. */
568 11 : if (MEM_ALIGN (op0) < modesize)
569 : return false;
570 :
571 : /* Check for cases where the C++ memory model applies. */
572 11 : if (maybe_ne (bitregion_end, 0U)
573 11 : && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
574 4 : || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
575 : bitregion_end)))
576 0 : return false;
577 :
578 : return true;
579 : }
580 :
581 : /* Return true if OP is a memory and if a bitfield of size BITSIZE at
582 : bit number BITNUM can be treated as a simple value of mode MODE.
583 : Store the byte offset in *BYTENUM if so. */
584 :
585 : static bool
586 547152 : simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
587 : machine_mode mode, poly_uint64 *bytenum)
588 : {
589 547152 : return (MEM_P (op0)
590 266730 : && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
591 213487 : && known_eq (bitsize, GET_MODE_BITSIZE (mode))
592 604293 : && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
593 0 : || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
594 0 : && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
595 : }
596 :
597 : /* Try to use instruction INSV to store VALUE into a field of OP0.
598 : If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
599 : BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
600 : are as for store_bit_field. */
601 :
602 : static bool
603 108500 : store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
604 : opt_scalar_int_mode op0_mode,
605 : unsigned HOST_WIDE_INT bitsize,
606 : unsigned HOST_WIDE_INT bitnum,
607 : rtx value, scalar_int_mode value_mode)
608 : {
609 108500 : class expand_operand ops[4];
610 108500 : rtx value1;
611 108500 : rtx xop0 = op0;
612 108500 : rtx_insn *last = get_last_insn ();
613 108500 : bool copy_back = false;
614 :
615 108500 : scalar_int_mode op_mode = insv->field_mode;
616 108500 : unsigned int unit = GET_MODE_BITSIZE (op_mode);
617 108500 : if (bitsize == 0 || bitsize > unit)
618 : return false;
619 :
620 108491 : if (MEM_P (xop0))
621 : /* Get a reference to the first byte of the field. */
622 0 : xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
623 : &bitnum);
624 : else
625 : {
626 : /* Convert from counting within OP0 to counting in OP_MODE. */
627 108491 : if (BYTES_BIG_ENDIAN)
628 : bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
629 :
630 : /* If xop0 is a register, we need it in OP_MODE
631 : to make it acceptable to the format of insv. */
632 108491 : if (GET_CODE (xop0) == SUBREG)
633 : {
634 : /* If such a SUBREG can't be created, give up. */
635 32853 : if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
636 32853 : SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
637 : return false;
638 : /* We can't just change the mode, because this might clobber op0,
639 : and we will need the original value of op0 if insv fails. */
640 32853 : xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
641 32853 : SUBREG_BYTE (xop0));
642 : }
643 108491 : if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
644 27719 : xop0 = gen_lowpart_SUBREG (op_mode, xop0);
645 : }
646 :
647 : /* If the destination is a paradoxical subreg such that we need a
648 : truncate to the inner mode, perform the insertion on a temporary and
649 : truncate the result to the original destination. Note that we can't
650 : just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
651 : X) 0)) is (reg:N X). */
652 108491 : if (GET_CODE (xop0) == SUBREG
653 60572 : && REG_P (SUBREG_REG (xop0))
654 169063 : && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
655 : op_mode))
656 : {
657 0 : rtx tem = gen_reg_rtx (op_mode);
658 0 : emit_move_insn (tem, xop0);
659 0 : xop0 = tem;
660 0 : copy_back = true;
661 : }
662 :
663 : /* There are similar overflow check at the start of store_bit_field_1,
664 : but that only check the situation where the field lies completely
665 : outside the register, while there do have situation where the field
666 : lies partialy in the register, we need to adjust bitsize for this
667 : partial overflow situation. Without this fix, pr48335-2.c on big-endian
668 : will broken on those arch support bit insert instruction, like arm, aarch64
669 : etc. */
670 108491 : if (bitsize + bitnum > unit && bitnum < unit)
671 : {
672 2 : warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
673 : "destination object, data truncated into %wu-bit",
674 : bitsize, unit - bitnum);
675 2 : bitsize = unit - bitnum;
676 : }
677 :
678 : /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
679 : "backwards" from the size of the unit we are inserting into.
680 : Otherwise, we count bits from the most significant on a
681 : BYTES/BITS_BIG_ENDIAN machine. */
682 :
683 108491 : if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
684 : bitnum = unit - bitsize - bitnum;
685 :
686 : /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
687 108491 : value1 = value;
688 108491 : if (value_mode != op_mode)
689 : {
690 142086 : if (GET_MODE_BITSIZE (value_mode) >= bitsize)
691 : {
692 71043 : rtx tmp;
693 : /* Optimization: Don't bother really extending VALUE
694 : if it has all the bits we will actually use. However,
695 : if we must narrow it, be sure we do it correctly. */
696 :
697 213129 : if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
698 : {
699 32523 : tmp = simplify_subreg (op_mode, value1, value_mode, 0);
700 32523 : if (! tmp)
701 32054 : tmp = simplify_gen_subreg (op_mode,
702 : force_reg (value_mode, value1),
703 : value_mode, 0);
704 : }
705 : else
706 : {
707 38520 : if (targetm.mode_rep_extended (op_mode, value_mode) != UNKNOWN)
708 0 : tmp = simplify_gen_unary (TRUNCATE, op_mode,
709 : value1, value_mode);
710 : else
711 : {
712 38520 : tmp = gen_lowpart_if_possible (op_mode, value1);
713 38520 : if (! tmp)
714 0 : tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
715 : }
716 : }
717 : value1 = tmp;
718 : }
719 0 : else if (CONST_INT_P (value))
720 0 : value1 = gen_int_mode (INTVAL (value), op_mode);
721 : else
722 : /* Parse phase is supposed to make VALUE's data type
723 : match that of the component reference, which is a type
724 : at least as wide as the field; so VALUE should have
725 : a mode that corresponds to that type. */
726 0 : gcc_assert (CONSTANT_P (value));
727 : }
728 :
729 108491 : create_fixed_operand (&ops[0], xop0);
730 108491 : create_integer_operand (&ops[1], bitsize);
731 108491 : create_integer_operand (&ops[2], bitnum);
732 108491 : create_input_operand (&ops[3], value1, op_mode);
733 108491 : if (maybe_expand_insn (insv->icode, 4, ops))
734 : {
735 2040 : if (copy_back)
736 0 : convert_move (op0, xop0, true);
737 2040 : return true;
738 : }
739 106451 : delete_insns_since (last);
740 106451 : return false;
741 : }
742 :
743 : /* A subroutine of store_bit_field, with the same arguments. Return true
744 : if the operation could be implemented.
745 :
746 : If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
747 : no other way of implementing the operation. If FALLBACK_P is false,
748 : return false instead.
749 :
750 : if UNDEFINED_P is true then STR_RTX is undefined and may be set using
751 : a subreg instead. */
752 :
753 : static bool
754 876885 : store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
755 : poly_uint64 bitregion_start, poly_uint64 bitregion_end,
756 : machine_mode fieldmode,
757 : rtx value, bool reverse, bool fallback_p, bool undefined_p)
758 : {
759 876885 : rtx op0 = str_rtx;
760 :
761 876891 : while (GET_CODE (op0) == SUBREG)
762 : {
763 6 : bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
764 6 : op0 = SUBREG_REG (op0);
765 : }
766 :
767 : /* No action is needed if the target is a register and if the field
768 : lies completely outside that register. This can occur if the source
769 : code contains an out-of-bounds access to a small array. */
770 1678656 : if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
771 : return true;
772 :
773 : /* Use vec_set patterns for inserting parts of vectors whenever
774 : available. */
775 876882 : machine_mode outermode = GET_MODE (op0);
776 876882 : scalar_mode innermode = GET_MODE_INNER (outermode);
777 876882 : poly_uint64 pos;
778 875156 : if (VECTOR_MODE_P (outermode)
779 2096 : && !MEM_P (op0)
780 2096 : && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
781 1044 : && fieldmode == innermode
782 939 : && known_eq (bitsize, GET_MODE_PRECISION (innermode))
783 877821 : && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
784 : {
785 939 : class expand_operand ops[3];
786 939 : enum insn_code icode = optab_handler (vec_set_optab, outermode);
787 :
788 939 : create_fixed_operand (&ops[0], op0);
789 939 : create_input_operand (&ops[1], value, innermode);
790 939 : create_integer_operand (&ops[2], pos);
791 939 : if (maybe_expand_insn (icode, 3, ops))
792 939 : return true;
793 : }
794 :
795 : /* If the target is a register, overwriting the entire object, or storing
796 : a full-word or multi-word field can be done with just a SUBREG. */
797 875943 : if (!MEM_P (op0)
798 1676772 : && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
799 : {
800 : /* Use the subreg machinery either to narrow OP0 to the required
801 : words or to cope with mode punning between equal-sized modes.
802 : In the latter case, use subreg on the rhs side, not lhs. */
803 733611 : rtx sub;
804 733611 : poly_uint64 bytenum;
805 733611 : poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
806 733611 : if (known_eq (bitnum, 0U)
807 1101167 : && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
808 : {
809 53768 : sub = force_subreg (GET_MODE (op0), value, fieldmode, 0);
810 53768 : if (sub)
811 : {
812 53768 : if (reverse)
813 1 : sub = flip_storage_order (GET_MODE (op0), sub);
814 53768 : emit_move_insn (op0, sub);
815 53768 : return true;
816 : }
817 : }
818 869912 : else if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
819 679838 : && (undefined_p
820 676192 : || (multiple_p (bitnum, regsize * BITS_PER_UNIT)
821 663645 : && multiple_p (bitsize, regsize * BITS_PER_UNIT)))
822 1264230 : && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
823 : {
824 632103 : sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), bytenum);
825 632103 : if (sub)
826 : {
827 632101 : if (reverse)
828 0 : value = flip_storage_order (fieldmode, value);
829 632101 : emit_move_insn (sub, value);
830 632101 : return true;
831 : }
832 : }
833 : }
834 :
835 : /* If the target is memory, storing any naturally aligned field can be
836 : done with a simple store. For targets that support fast unaligned
837 : memory, any naturally sized, unit aligned field can be done directly. */
838 190074 : poly_uint64 bytenum;
839 190074 : if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
840 : {
841 6866 : op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
842 6866 : if (reverse)
843 0 : value = flip_storage_order (fieldmode, value);
844 6866 : emit_move_insn (op0, value);
845 6866 : return true;
846 : }
847 :
848 : /* It's possible we'll need to handle other cases here for
849 : polynomial bitnum and bitsize. */
850 :
851 : /* From here on we need to be looking at a fixed-size insertion. */
852 183208 : unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
853 183208 : unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
854 :
855 : /* Make sure we are playing with integral modes. Pun with subregs
856 : if we aren't. This must come after the entire register case above,
857 : since that case is valid for any mode. The following cases are only
858 : valid for integral modes. */
859 183208 : opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
860 183208 : scalar_int_mode imode;
861 183208 : if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
862 : {
863 19419 : if (MEM_P (op0))
864 14909 : op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
865 : 0, MEM_SIZE (op0));
866 4510 : else if (!op0_mode.exists ())
867 : {
868 0 : if (ibitnum == 0
869 0 : && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
870 0 : && MEM_P (value)
871 0 : && !reverse)
872 : {
873 0 : value = adjust_address (value, GET_MODE (op0), 0);
874 0 : emit_move_insn (op0, value);
875 0 : return true;
876 : }
877 0 : if (!fallback_p)
878 : return false;
879 0 : rtx temp = assign_stack_temp (GET_MODE (op0),
880 0 : GET_MODE_SIZE (GET_MODE (op0)));
881 0 : emit_move_insn (temp, op0);
882 0 : store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
883 : reverse, fallback_p, undefined_p);
884 0 : emit_move_insn (op0, temp);
885 0 : return true;
886 : }
887 : else
888 4510 : op0 = gen_lowpart (op0_mode.require (), op0);
889 : }
890 :
891 183208 : return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
892 : bitregion_start, bitregion_end,
893 183208 : fieldmode, value, reverse, fallback_p);
894 : }
895 :
896 : /* Subroutine of store_bit_field_1, with the same arguments, except
897 : that BITSIZE and BITNUM are constant. Handle cases specific to
898 : integral modes. If OP0_MODE is defined, it is the mode of OP0,
899 : otherwise OP0 is a BLKmode MEM. */
900 :
901 : static bool
902 183208 : store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
903 : unsigned HOST_WIDE_INT bitsize,
904 : unsigned HOST_WIDE_INT bitnum,
905 : poly_uint64 bitregion_start,
906 : poly_uint64 bitregion_end,
907 : machine_mode fieldmode,
908 : rtx value, bool reverse, bool fallback_p)
909 : {
910 : /* Storing an lsb-aligned field in a register
911 : can be done with a movstrict instruction. */
912 :
913 183208 : if (!MEM_P (op0)
914 114960 : && !reverse
915 374087 : && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
916 81569 : && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
917 218398 : && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
918 : {
919 5623 : class expand_operand ops[2];
920 5623 : enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
921 5623 : rtx arg0 = op0;
922 5623 : unsigned HOST_WIDE_INT subreg_off;
923 :
924 5623 : if (GET_CODE (arg0) == SUBREG)
925 : {
926 : /* Else we've got some float mode source being extracted into
927 : a different float mode destination -- this combination of
928 : subregs results in Severe Tire Damage. */
929 405 : gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
930 : || GET_MODE_CLASS (fieldmode) == MODE_INT
931 : || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
932 : arg0 = SUBREG_REG (arg0);
933 : }
934 :
935 5623 : subreg_off = bitnum / BITS_PER_UNIT;
936 5638 : if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
937 : /* STRICT_LOW_PART must have a non-paradoxical subreg as
938 : operand. */
939 5623 : && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
940 : {
941 5608 : arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
942 :
943 5608 : create_fixed_operand (&ops[0], arg0);
944 : /* Shrink the source operand to FIELDMODE. */
945 5608 : create_convert_operand_to (&ops[1], value, fieldmode, false);
946 5608 : if (maybe_expand_insn (icode, 2, ops))
947 5607 : return true;
948 : }
949 : }
950 :
951 : /* Handle fields bigger than a word. */
952 :
953 178986 : if (bitsize > BITS_PER_WORD)
954 : {
955 : /* Here we transfer the words of the field
956 : in the order least significant first.
957 : This is because the most significant word is the one which may
958 : be less than full.
959 : However, only do that if the value is not BLKmode. */
960 :
961 792 : const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
962 792 : const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
963 792 : rtx_insn *last;
964 :
965 : /* This is the mode we must force value to, so that there will be enough
966 : subwords to extract. Note that fieldmode will often (always?) be
967 : VOIDmode, because that is what store_field uses to indicate that this
968 : is a bit field, but passing VOIDmode to operand_subword_force
969 : is not allowed.
970 :
971 : The mode must be fixed-size, since insertions into variable-sized
972 : objects are meant to be handled before calling this function. */
973 792 : fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
974 792 : if (value_mode == VOIDmode)
975 22 : value_mode
976 22 : = smallest_int_mode_for_size (nwords * BITS_PER_WORD).require ();
977 :
978 792 : last = get_last_insn ();
979 2371 : for (int i = 0; i < nwords; i++)
980 : {
981 : /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
982 : except maybe for the last iteration. */
983 3168 : const unsigned HOST_WIDE_INT new_bitsize
984 1620 : = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
985 : /* Bit offset from the starting bit number in the target. */
986 3180 : const unsigned int bit_offset
987 : = backwards ^ reverse
988 1584 : ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
989 : : i * BITS_PER_WORD;
990 :
991 : /* No further action is needed if the target is a register and if
992 : this field lies completely outside that register. */
993 1854 : if (REG_P (op0) && known_ge (bitnum + bit_offset,
994 : GET_MODE_BITSIZE (GET_MODE (op0))))
995 : {
996 5 : if (backwards ^ reverse)
997 0 : continue;
998 : /* For forward operation we are finished. */
999 183208 : return true;
1000 : }
1001 :
1002 : /* Starting word number in the value. */
1003 1579 : const unsigned int wordnum
1004 : = backwards
1005 1579 : ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
1006 : : i;
1007 : /* The chunk of the value in word_mode. We use bit-field extraction
1008 : in BLKmode to handle unaligned memory references and to shift the
1009 : last chunk right on big-endian machines if need be. */
1010 1579 : rtx value_word
1011 : = fieldmode == BLKmode
1012 1615 : ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
1013 : 1, NULL_RTX, word_mode, word_mode, false,
1014 : NULL)
1015 1332 : : operand_subword_force (value, wordnum, value_mode);
1016 :
1017 1579 : if (!store_bit_field_1 (op0, new_bitsize,
1018 1579 : bitnum + bit_offset,
1019 : bitregion_start, bitregion_end,
1020 : word_mode,
1021 : value_word, reverse, fallback_p, false))
1022 : {
1023 0 : delete_insns_since (last);
1024 0 : return false;
1025 : }
1026 : }
1027 : return true;
1028 : }
1029 :
1030 : /* If VALUE has a floating-point or complex mode, access it as an
1031 : integer of the corresponding size. This can occur on a machine
1032 : with 64 bit registers that uses SFmode for float. It can also
1033 : occur for unaligned float or complex fields. */
1034 176809 : rtx orig_value = value;
1035 176809 : scalar_int_mode value_mode;
1036 176809 : if (GET_MODE (value) == VOIDmode)
1037 : /* By this point we've dealt with values that are bigger than a word,
1038 : so word_mode is a conservatively correct choice. */
1039 105683 : value_mode = word_mode;
1040 71126 : else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1041 : {
1042 1046 : value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1043 1046 : value = gen_reg_rtx (value_mode);
1044 1046 : emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1045 : }
1046 :
1047 : /* If OP0 is a multi-word register, narrow it to the affected word.
1048 : If the region spans two words, defer to store_split_bit_field.
1049 : Don't do this if op0 is a single hard register wider than word
1050 : such as a float or vector register. */
1051 176809 : if (!MEM_P (op0)
1052 219239 : && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1053 209835 : && (!REG_P (op0)
1054 33005 : || !HARD_REGISTER_P (op0)
1055 143785 : || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1056 : {
1057 33099 : if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1058 : {
1059 696 : if (!fallback_p)
1060 : return false;
1061 :
1062 71 : store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1063 : bitregion_start, bitregion_end,
1064 : value, value_mode, reverse);
1065 71 : return true;
1066 : }
1067 32328 : op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1068 32403 : bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1069 32328 : gcc_assert (op0);
1070 32328 : op0_mode = word_mode;
1071 32403 : bitnum %= BITS_PER_WORD;
1072 : }
1073 :
1074 : /* From here on we can assume that the field to be stored in fits
1075 : within a word. If the destination is a register, it too fits
1076 : in a word. */
1077 :
1078 176113 : extraction_insn insv;
1079 176113 : if (!MEM_P (op0)
1080 108519 : && !reverse
1081 108502 : && get_best_reg_extraction_insn (&insv, EP_insv,
1082 217004 : GET_MODE_BITSIZE (op0_mode.require ()),
1083 : fieldmode)
1084 284613 : && store_bit_field_using_insv (&insv, op0, op0_mode,
1085 : bitsize, bitnum, value, value_mode))
1086 2040 : return true;
1087 :
1088 : /* If OP0 is a memory, try copying it to a register and seeing if a
1089 : cheap register alternative is available. */
1090 174073 : if (MEM_P (op0) && !reverse)
1091 : {
1092 67195 : if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1093 : fieldmode)
1094 67195 : && store_bit_field_using_insv (&insv, op0, op0_mode,
1095 : bitsize, bitnum, value, value_mode))
1096 0 : return true;
1097 :
1098 67195 : rtx_insn *last = get_last_insn ();
1099 :
1100 : /* Try loading part of OP0 into a register, inserting the bitfield
1101 : into that, and then copying the result back to OP0. */
1102 67195 : unsigned HOST_WIDE_INT bitpos;
1103 67195 : rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1104 : bitregion_start, bitregion_end,
1105 : fieldmode, &bitpos);
1106 67195 : if (xop0)
1107 : {
1108 63361 : rtx tempreg = copy_to_reg (xop0);
1109 63361 : if (store_bit_field_1 (tempreg, bitsize, bitpos,
1110 : bitregion_start, bitregion_end,
1111 : fieldmode, orig_value, reverse, false, false))
1112 : {
1113 0 : emit_move_insn (xop0, tempreg);
1114 0 : return true;
1115 : }
1116 63361 : delete_insns_since (last);
1117 : }
1118 : }
1119 :
1120 174073 : if (!fallback_p)
1121 : return false;
1122 :
1123 111337 : store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1124 : bitregion_end, value, value_mode, reverse);
1125 111337 : return true;
1126 : }
1127 :
1128 : /* Generate code to store value from rtx VALUE
1129 : into a bit-field within structure STR_RTX
1130 : containing BITSIZE bits starting at bit BITNUM.
1131 :
1132 : BITREGION_START is bitpos of the first bitfield in this region.
1133 : BITREGION_END is the bitpos of the ending bitfield in this region.
1134 : These two fields are 0, if the C++ memory model does not apply,
1135 : or we are not interested in keeping track of bitfield regions.
1136 :
1137 : FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1138 :
1139 : If REVERSE is true, the store is to be done in reverse order.
1140 :
1141 : If UNDEFINED_P is true then STR_RTX is currently undefined. */
1142 :
1143 : void
1144 811945 : store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1145 : poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1146 : machine_mode fieldmode,
1147 : rtx value, bool reverse, bool undefined_p)
1148 : {
1149 : /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1150 811945 : unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1151 811945 : scalar_int_mode int_mode;
1152 811945 : if (bitsize.is_constant (&ibitsize)
1153 811945 : && bitnum.is_constant (&ibitnum)
1154 1528191 : && is_a <scalar_int_mode> (fieldmode, &int_mode)
1155 716250 : && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1156 : bitregion_start, bitregion_end))
1157 : {
1158 : /* Storing of a full word can be done with a simple store.
1159 : We know here that the field can be accessed with one single
1160 : instruction. For targets that support unaligned memory,
1161 : an unaligned access may be necessary. */
1162 8 : if (ibitsize == GET_MODE_BITSIZE (int_mode))
1163 : {
1164 0 : str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1165 : ibitnum / BITS_PER_UNIT);
1166 0 : if (reverse)
1167 0 : value = flip_storage_order (int_mode, value);
1168 0 : gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1169 0 : emit_move_insn (str_rtx, value);
1170 : }
1171 : else
1172 : {
1173 4 : rtx temp;
1174 :
1175 4 : str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1176 : ibitnum, &ibitnum);
1177 8 : gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1178 4 : temp = copy_to_reg (str_rtx);
1179 4 : if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1180 : int_mode, value, reverse, true, undefined_p))
1181 0 : gcc_unreachable ();
1182 :
1183 4 : emit_move_insn (str_rtx, temp);
1184 : }
1185 :
1186 4 : return;
1187 : }
1188 :
1189 : /* Under the C++0x memory model, we must not touch bits outside the
1190 : bit region. Adjust the address to start at the beginning of the
1191 : bit region. */
1192 811941 : if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1193 : {
1194 51509 : scalar_int_mode best_mode;
1195 51509 : machine_mode addr_mode = VOIDmode;
1196 :
1197 51509 : poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1198 103018 : bitnum -= bitregion_start;
1199 51509 : poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1200 51509 : bitregion_end -= bitregion_start;
1201 51509 : bitregion_start = 0;
1202 51509 : if (bitsize.is_constant (&ibitsize)
1203 51509 : && bitnum.is_constant (&ibitnum)
1204 51509 : && get_best_mode (ibitsize, ibitnum,
1205 : bitregion_start, bitregion_end,
1206 51509 : MEM_ALIGN (str_rtx), INT_MAX,
1207 51509 : MEM_VOLATILE_P (str_rtx), &best_mode))
1208 48107 : addr_mode = best_mode;
1209 51509 : str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1210 : offset, size);
1211 : }
1212 :
1213 811941 : if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1214 : bitregion_start, bitregion_end,
1215 : fieldmode, value, reverse, true, undefined_p))
1216 0 : gcc_unreachable ();
1217 : }
1218 :
1219 : /* Use shifts and boolean operations to store VALUE into a bit field of
1220 : width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1221 : it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1222 : the mode of VALUE.
1223 :
1224 : If REVERSE is true, the store is to be done in reverse order. */
1225 :
1226 : static void
1227 128780 : store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1228 : unsigned HOST_WIDE_INT bitsize,
1229 : unsigned HOST_WIDE_INT bitnum,
1230 : poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1231 : rtx value, scalar_int_mode value_mode, bool reverse)
1232 : {
1233 : /* There is a case not handled here:
1234 : a structure with a known alignment of just a halfword
1235 : and a field split across two aligned halfwords within the structure.
1236 : Or likewise a structure with a known alignment of just a byte
1237 : and a field split across two bytes.
1238 : Such cases are not supposed to be able to occur. */
1239 :
1240 128780 : scalar_int_mode best_mode;
1241 128780 : if (MEM_P (op0))
1242 : {
1243 84895 : unsigned int max_bitsize = BITS_PER_WORD;
1244 84895 : scalar_int_mode imode;
1245 145244 : if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1246 91834 : max_bitsize = GET_MODE_BITSIZE (imode);
1247 :
1248 84895 : if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1249 84895 : MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1250 : &best_mode))
1251 : {
1252 : /* The only way this should occur is if the field spans word
1253 : boundaries. */
1254 6541 : store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1255 : bitregion_start, bitregion_end,
1256 : value, value_mode, reverse);
1257 6541 : return;
1258 : }
1259 :
1260 78354 : op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1261 : }
1262 : else
1263 43885 : best_mode = op0_mode.require ();
1264 :
1265 122239 : store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1266 : value, value_mode, reverse);
1267 : }
1268 :
1269 : /* Helper function for store_fixed_bit_field, stores
1270 : the bit field always using MODE, which is the mode of OP0. The other
1271 : arguments are as for store_fixed_bit_field. */
1272 :
1273 : static void
1274 122239 : store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1275 : unsigned HOST_WIDE_INT bitsize,
1276 : unsigned HOST_WIDE_INT bitnum,
1277 : rtx value, scalar_int_mode value_mode, bool reverse)
1278 : {
1279 122239 : rtx temp;
1280 122239 : int all_zero = 0;
1281 122239 : int all_one = 0;
1282 :
1283 : /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1284 : for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1285 :
1286 122239 : if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1287 : /* BITNUM is the distance between our msb
1288 : and that of the containing datum.
1289 : Convert it to the distance from the lsb. */
1290 1056 : bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1291 :
1292 : /* Now BITNUM is always the distance between our lsb
1293 : and that of OP0. */
1294 :
1295 : /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1296 : we must first convert its mode to MODE. */
1297 :
1298 122239 : if (CONST_INT_P (value))
1299 : {
1300 73417 : unsigned HOST_WIDE_INT v = UINTVAL (value);
1301 :
1302 73417 : if (bitsize < HOST_BITS_PER_WIDE_INT)
1303 73399 : v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1304 :
1305 73417 : if (v == 0)
1306 : all_zero = 1;
1307 58569 : else if ((bitsize < HOST_BITS_PER_WIDE_INT
1308 58561 : && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1309 50008 : || (bitsize == HOST_BITS_PER_WIDE_INT
1310 50008 : && v == HOST_WIDE_INT_M1U))
1311 8561 : all_one = 1;
1312 :
1313 73417 : value = lshift_value (mode, v, bitnum);
1314 : }
1315 : else
1316 : {
1317 48822 : int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1318 77034 : && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1319 :
1320 48822 : if (value_mode != mode)
1321 25761 : value = convert_to_mode (mode, value, 1);
1322 :
1323 48822 : if (must_and)
1324 21579 : value = expand_binop (mode, and_optab, value,
1325 : mask_rtx (mode, 0, bitsize, 0),
1326 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
1327 48822 : if (bitnum > 0)
1328 14315 : value = expand_shift (LSHIFT_EXPR, mode, value,
1329 14315 : bitnum, NULL_RTX, 1);
1330 : }
1331 :
1332 122239 : if (reverse)
1333 528 : value = flip_storage_order (mode, value);
1334 :
1335 : /* Now clear the chosen bits in OP0,
1336 : except that if VALUE is -1 we need not bother. */
1337 : /* We keep the intermediates in registers to allow CSE to combine
1338 : consecutive bitfield assignments. */
1339 :
1340 122239 : temp = force_reg (mode, op0);
1341 :
1342 122239 : if (! all_one)
1343 : {
1344 113678 : rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1345 113678 : if (reverse)
1346 516 : mask = flip_storage_order (mode, mask);
1347 113678 : temp = expand_binop (mode, and_optab, temp, mask,
1348 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
1349 113678 : temp = force_reg (mode, temp);
1350 : }
1351 :
1352 : /* Now logical-or VALUE into OP0, unless it is zero. */
1353 :
1354 122239 : if (! all_zero)
1355 : {
1356 107391 : temp = expand_binop (mode, ior_optab, temp, value,
1357 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
1358 107391 : temp = force_reg (mode, temp);
1359 : }
1360 :
1361 122239 : if (op0 != temp)
1362 : {
1363 122239 : op0 = copy_rtx (op0);
1364 122239 : emit_move_insn (op0, temp);
1365 : }
1366 122239 : }
1367 :
1368 : /* Store a bit field that is split across multiple accessible memory objects.
1369 :
1370 : OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1371 : BITSIZE is the field width; BITPOS the position of its first bit
1372 : (within the word).
1373 : VALUE is the value to store, which has mode VALUE_MODE.
1374 : If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1375 : a BLKmode MEM.
1376 :
1377 : If REVERSE is true, the store is to be done in reverse order.
1378 :
1379 : This does not yet handle fields wider than BITS_PER_WORD. */
1380 :
1381 : static void
1382 6612 : store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1383 : unsigned HOST_WIDE_INT bitsize,
1384 : unsigned HOST_WIDE_INT bitpos,
1385 : poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1386 : rtx value, scalar_int_mode value_mode, bool reverse)
1387 : {
1388 6612 : unsigned int unit, total_bits, bitsdone = 0;
1389 :
1390 : /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1391 : much at a time. */
1392 6612 : if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1393 71 : unit = BITS_PER_WORD;
1394 : else
1395 6541 : unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1396 :
1397 : /* If OP0 is a memory with a mode, then UNIT must not be larger than
1398 : OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1399 : again, and we will mutually recurse forever. */
1400 6612 : if (MEM_P (op0) && op0_mode.exists ())
1401 5115 : unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1402 :
1403 : /* If VALUE is a constant other than a CONST_INT, get it into a register in
1404 : WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1405 : that VALUE might be a floating-point constant. */
1406 6612 : if (CONSTANT_P (value) && !CONST_INT_P (value))
1407 : {
1408 0 : rtx word = gen_lowpart_common (word_mode, value);
1409 :
1410 0 : if (word && (value != word))
1411 : value = word;
1412 : else
1413 0 : value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1414 0 : value_mode = word_mode;
1415 : }
1416 :
1417 6612 : total_bits = GET_MODE_BITSIZE (value_mode);
1418 :
1419 30851 : while (bitsdone < bitsize)
1420 : {
1421 24239 : unsigned HOST_WIDE_INT thissize;
1422 24239 : unsigned HOST_WIDE_INT thispos;
1423 24239 : unsigned HOST_WIDE_INT offset;
1424 24239 : rtx part;
1425 :
1426 24239 : offset = (bitpos + bitsdone) / unit;
1427 24239 : thispos = (bitpos + bitsdone) % unit;
1428 :
1429 : /* When region of bytes we can touch is restricted, decrease
1430 : UNIT close to the end of the region as needed. If op0 is a REG
1431 : or SUBREG of REG, don't do this, as there can't be data races
1432 : on a register and we can expand shorter code in some cases. */
1433 31035 : if (maybe_ne (bitregion_end, 0U)
1434 24239 : && unit > BITS_PER_UNIT
1435 14205 : && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1436 6860 : && !REG_P (op0)
1437 31035 : && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1438 : {
1439 6796 : unit = unit / 2;
1440 6796 : continue;
1441 : }
1442 :
1443 : /* THISSIZE must not overrun a word boundary. Otherwise,
1444 : store_fixed_bit_field will call us again, and we will mutually
1445 : recurse forever. */
1446 17443 : thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1447 17443 : thissize = MIN (thissize, unit - thispos);
1448 :
1449 17443 : if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1450 : {
1451 : /* Fetch successively less significant portions. */
1452 214 : if (CONST_INT_P (value))
1453 108 : part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1454 : >> (bitsize - bitsdone - thissize))
1455 : & ((HOST_WIDE_INT_1 << thissize) - 1));
1456 : /* Likewise, but the source is little-endian. */
1457 106 : else if (reverse)
1458 106 : part = extract_fixed_bit_field (word_mode, value, value_mode,
1459 : thissize,
1460 : bitsize - bitsdone - thissize,
1461 : NULL_RTX, 1, false);
1462 : else
1463 : /* The args are chosen so that the last part includes the
1464 : lsb. Give extract_bit_field the value it needs (with
1465 : endianness compensation) to fetch the piece we want. */
1466 : part = extract_fixed_bit_field (word_mode, value, value_mode,
1467 : thissize,
1468 : total_bits - bitsize + bitsdone,
1469 : NULL_RTX, 1, false);
1470 : }
1471 : else
1472 : {
1473 : /* Fetch successively more significant portions. */
1474 17229 : if (CONST_INT_P (value))
1475 12867 : part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1476 : >> bitsdone)
1477 : & ((HOST_WIDE_INT_1 << thissize) - 1));
1478 : /* Likewise, but the source is big-endian. */
1479 4362 : else if (reverse)
1480 : part = extract_fixed_bit_field (word_mode, value, value_mode,
1481 : thissize,
1482 : total_bits - bitsdone - thissize,
1483 : NULL_RTX, 1, false);
1484 : else
1485 4362 : part = extract_fixed_bit_field (word_mode, value, value_mode,
1486 : thissize, bitsdone, NULL_RTX,
1487 : 1, false);
1488 : }
1489 :
1490 : /* If OP0 is a register, then handle OFFSET here. */
1491 17443 : rtx op0_piece = op0;
1492 17443 : opt_scalar_int_mode op0_piece_mode = op0_mode;
1493 17443 : if (SUBREG_P (op0) || REG_P (op0))
1494 : {
1495 142 : scalar_int_mode imode;
1496 142 : if (op0_mode.exists (&imode)
1497 142 : && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1498 : {
1499 0 : if (offset)
1500 0 : op0_piece = const0_rtx;
1501 : }
1502 : else
1503 : {
1504 142 : op0_piece = operand_subword_force (op0,
1505 142 : offset * unit / BITS_PER_WORD,
1506 142 : GET_MODE (op0));
1507 142 : op0_piece_mode = word_mode;
1508 : }
1509 142 : offset &= BITS_PER_WORD / unit - 1;
1510 : }
1511 :
1512 : /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1513 : it is just an out-of-bounds access. Ignore it. */
1514 17443 : if (op0_piece != const0_rtx)
1515 17443 : store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1516 17443 : offset * unit + thispos, bitregion_start,
1517 : bitregion_end, part, word_mode, reverse);
1518 17443 : bitsdone += thissize;
1519 : }
1520 6612 : }
1521 :
1522 : /* A subroutine of extract_bit_field_1 that converts return value X
1523 : to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1524 : to extract_bit_field. */
1525 :
1526 : static rtx
1527 868059 : convert_extracted_bit_field (rtx x, machine_mode mode,
1528 : machine_mode tmode, bool unsignedp)
1529 : {
1530 868059 : if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1531 : return x;
1532 :
1533 : /* If the x mode is not a scalar integral, first convert to the
1534 : integer mode of that size and then access it as a floating-point
1535 : value via a SUBREG. */
1536 21476 : if (!SCALAR_INT_MODE_P (tmode))
1537 : {
1538 11534 : scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1539 11534 : x = convert_to_mode (int_mode, x, unsignedp);
1540 11534 : x = force_reg (int_mode, x);
1541 11534 : return gen_lowpart (tmode, x);
1542 : }
1543 :
1544 9942 : return convert_to_mode (tmode, x, unsignedp);
1545 : }
1546 :
1547 : /* Try to use an ext(z)v pattern to extract a field from OP0.
1548 : Return the extracted value on success, otherwise return null.
1549 : EXTV describes the extraction instruction to use. If OP0_MODE
1550 : is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1551 : The other arguments are as for extract_bit_field. */
1552 :
1553 : static rtx
1554 150595 : extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1555 : opt_scalar_int_mode op0_mode,
1556 : unsigned HOST_WIDE_INT bitsize,
1557 : unsigned HOST_WIDE_INT bitnum,
1558 : int unsignedp, rtx target,
1559 : machine_mode mode, machine_mode tmode)
1560 : {
1561 150595 : class expand_operand ops[4];
1562 150595 : rtx spec_target = target;
1563 150595 : rtx spec_target_subreg = 0;
1564 150595 : scalar_int_mode ext_mode = extv->field_mode;
1565 150595 : unsigned unit = GET_MODE_BITSIZE (ext_mode);
1566 :
1567 150595 : if (bitsize == 0 || unit < bitsize)
1568 : return NULL_RTX;
1569 :
1570 150595 : if (MEM_P (op0))
1571 : /* Get a reference to the first byte of the field. */
1572 0 : op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1573 : &bitnum);
1574 : else
1575 : {
1576 : /* Convert from counting within OP0 to counting in EXT_MODE. */
1577 150595 : if (BYTES_BIG_ENDIAN)
1578 : bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1579 :
1580 : /* If op0 is a register, we need it in EXT_MODE to make it
1581 : acceptable to the format of ext(z)v. */
1582 150595 : if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1583 0 : return NULL_RTX;
1584 150595 : if (REG_P (op0) && op0_mode.require () != ext_mode)
1585 51781 : op0 = gen_lowpart_SUBREG (ext_mode, op0);
1586 : }
1587 :
1588 : /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1589 : "backwards" from the size of the unit we are extracting from.
1590 : Otherwise, we count bits from the most significant on a
1591 : BYTES/BITS_BIG_ENDIAN machine. */
1592 :
1593 150595 : if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1594 : bitnum = unit - bitsize - bitnum;
1595 :
1596 150595 : if (target == 0)
1597 30258 : target = spec_target = gen_reg_rtx (tmode);
1598 :
1599 150595 : if (GET_MODE (target) != ext_mode)
1600 : {
1601 83171 : rtx temp;
1602 : /* Don't use LHS paradoxical subreg if explicit truncation is needed
1603 : between the mode of the extraction (word_mode) and the target
1604 : mode. Instead, create a temporary and use convert_move to set
1605 : the target. */
1606 83171 : if (REG_P (target)
1607 82172 : && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1608 165343 : && (temp = gen_lowpart_if_possible (ext_mode, target)))
1609 : {
1610 81662 : target = temp;
1611 81662 : if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1612 80284 : spec_target_subreg = target;
1613 : }
1614 : else
1615 1509 : target = gen_reg_rtx (ext_mode);
1616 : }
1617 :
1618 150595 : create_output_operand (&ops[0], target, ext_mode);
1619 150595 : create_fixed_operand (&ops[1], op0);
1620 150595 : create_integer_operand (&ops[2], bitsize);
1621 150595 : create_integer_operand (&ops[3], bitnum);
1622 150595 : if (maybe_expand_insn (extv->icode, 4, ops))
1623 : {
1624 1754 : target = ops[0].value;
1625 1754 : if (target == spec_target)
1626 : return target;
1627 1754 : if (target == spec_target_subreg)
1628 : return spec_target;
1629 56 : return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1630 : }
1631 : return NULL_RTX;
1632 : }
1633 :
1634 : /* See whether it would be valid to extract the part of OP0 with
1635 : mode OP0_MODE described by BITNUM and BITSIZE into a value of
1636 : mode MODE using a subreg operation.
1637 : Return the subreg if so, otherwise return null. */
1638 :
1639 : static rtx
1640 816746 : extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1641 : machine_mode op0_mode,
1642 : poly_uint64 bitsize, poly_uint64 bitnum)
1643 : {
1644 816746 : poly_uint64 bytenum;
1645 816746 : if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1646 777282 : && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1647 816746 : && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1648 1594028 : && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1649 657426 : return force_subreg (mode, op0, op0_mode, bytenum);
1650 : return NULL_RTX;
1651 : }
1652 :
1653 : /* A subroutine of extract_bit_field, with the same arguments.
1654 : If UNSIGNEDP is -1, the result need not be sign or zero extended.
1655 : If FALLBACK_P is true, fall back to extract_fixed_bit_field
1656 : if we can find no other means of implementing the operation.
1657 : if FALLBACK_P is false, return NULL instead. */
1658 :
1659 : static rtx
1660 1108774 : extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1661 : int unsignedp, rtx target, machine_mode mode,
1662 : machine_mode tmode, bool reverse, bool fallback_p,
1663 : rtx *alt_rtl)
1664 : {
1665 1108774 : rtx op0 = str_rtx;
1666 1108774 : machine_mode mode1;
1667 :
1668 1108774 : if (tmode == VOIDmode)
1669 0 : tmode = mode;
1670 :
1671 1119132 : while (GET_CODE (op0) == SUBREG)
1672 : {
1673 10358 : bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1674 10358 : op0 = SUBREG_REG (op0);
1675 : }
1676 :
1677 : /* If we have an out-of-bounds access to a register, just return an
1678 : uninitialized register of the required mode. This can occur if the
1679 : source code contains an out-of-bounds access to a small array. */
1680 2025963 : if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1681 0 : return gen_reg_rtx (tmode);
1682 :
1683 1108774 : if (REG_P (op0)
1684 917189 : && mode == GET_MODE (op0)
1685 153632 : && known_eq (bitnum, 0U)
1686 1352046 : && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1687 : {
1688 20432 : if (reverse)
1689 0 : op0 = flip_storage_order (mode, op0);
1690 : /* We're trying to extract a full register from itself. */
1691 20432 : return op0;
1692 : }
1693 :
1694 : /* First try to check for vector from vector extractions. */
1695 1024946 : if (VECTOR_MODE_P (GET_MODE (op0))
1696 82612 : && !MEM_P (op0)
1697 82113 : && VECTOR_MODE_P (tmode)
1698 12980 : && known_eq (bitsize, GET_MODE_PRECISION (tmode))
1699 2202644 : && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1700 : {
1701 12980 : machine_mode new_mode = GET_MODE (op0);
1702 38940 : if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1703 : {
1704 174 : scalar_mode inner_mode = GET_MODE_INNER (tmode);
1705 174 : poly_uint64 nunits;
1706 348 : if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1707 174 : GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1708 348 : || !related_vector_mode (tmode, inner_mode,
1709 174 : nunits).exists (&new_mode)
1710 332 : || maybe_ne (GET_MODE_SIZE (new_mode),
1711 474 : GET_MODE_SIZE (GET_MODE (op0))))
1712 16 : new_mode = VOIDmode;
1713 : }
1714 12980 : poly_uint64 pos;
1715 12980 : if (new_mode != VOIDmode
1716 12964 : && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1717 : != CODE_FOR_nothing)
1718 25944 : && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1719 : {
1720 9316 : class expand_operand ops[3];
1721 9316 : machine_mode outermode = new_mode;
1722 9316 : machine_mode innermode = tmode;
1723 9316 : enum insn_code icode
1724 9316 : = convert_optab_handler (vec_extract_optab, outermode, innermode);
1725 :
1726 9316 : if (new_mode != GET_MODE (op0))
1727 33 : op0 = gen_lowpart (new_mode, op0);
1728 9316 : create_output_operand (&ops[0], target, innermode);
1729 9316 : ops[0].target = 1;
1730 9316 : create_input_operand (&ops[1], op0, outermode);
1731 9316 : create_integer_operand (&ops[2], pos);
1732 9316 : if (maybe_expand_insn (icode, 3, ops))
1733 : {
1734 9316 : if (alt_rtl && ops[0].target)
1735 207 : *alt_rtl = target;
1736 9316 : target = ops[0].value;
1737 9316 : if (GET_MODE (target) != mode)
1738 9316 : return gen_lowpart (tmode, target);
1739 : return target;
1740 : }
1741 : }
1742 : }
1743 :
1744 : /* See if we can get a better vector mode before extracting. */
1745 1017565 : if (VECTOR_MODE_P (GET_MODE (op0))
1746 73296 : && !MEM_P (op0)
1747 1224620 : && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1748 : {
1749 9832 : machine_mode new_mode;
1750 :
1751 9832 : if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1752 557 : new_mode = MIN_MODE_VECTOR_FLOAT;
1753 : else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1754 0 : new_mode = MIN_MODE_VECTOR_FRACT;
1755 : else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1756 0 : new_mode = MIN_MODE_VECTOR_UFRACT;
1757 : else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1758 0 : new_mode = MIN_MODE_VECTOR_ACCUM;
1759 : else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1760 0 : new_mode = MIN_MODE_VECTOR_UACCUM;
1761 : else
1762 9275 : new_mode = MIN_MODE_VECTOR_INT;
1763 :
1764 157780 : FOR_EACH_MODE_FROM (new_mode, new_mode)
1765 314772 : if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1766 65510 : && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1767 177050 : && known_eq (bitsize, GET_MODE_UNIT_PRECISION (new_mode))
1768 19664 : && multiple_p (bitnum, GET_MODE_UNIT_PRECISION (new_mode))
1769 9807 : && targetm.vector_mode_supported_p (new_mode)
1770 167127 : && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1771 : break;
1772 9832 : if (new_mode != VOIDmode)
1773 9438 : op0 = gen_lowpart (new_mode, op0);
1774 : }
1775 :
1776 : /* Use vec_extract patterns for extracting parts of vectors whenever
1777 : available. If that fails, see whether the current modes and bitregion
1778 : give a natural subreg. */
1779 1079026 : machine_mode outermode = GET_MODE (op0);
1780 1079026 : if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1781 : {
1782 72797 : scalar_mode innermode = GET_MODE_INNER (outermode);
1783 :
1784 72797 : enum insn_code icode
1785 72797 : = convert_optab_handler (vec_extract_optab, outermode, innermode);
1786 :
1787 72797 : poly_uint64 pos;
1788 72797 : if (icode != CODE_FOR_nothing
1789 72800 : && known_eq (bitsize, GET_MODE_PRECISION (innermode))
1790 143484 : && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
1791 : {
1792 70684 : class expand_operand ops[3];
1793 :
1794 70684 : create_output_operand (&ops[0], target,
1795 70684 : insn_data[icode].operand[0].mode);
1796 70684 : ops[0].target = 1;
1797 70684 : create_input_operand (&ops[1], op0, outermode);
1798 70684 : create_integer_operand (&ops[2], pos);
1799 70684 : if (maybe_expand_insn (icode, 3, ops))
1800 : {
1801 70684 : if (alt_rtl && ops[0].target)
1802 16456 : *alt_rtl = target;
1803 70684 : target = ops[0].value;
1804 70684 : if (GET_MODE (target) != mode)
1805 70684 : return gen_lowpart (tmode, target);
1806 : return target;
1807 : }
1808 : }
1809 : /* Using subregs is useful if we're extracting one register vector
1810 : from a multi-register vector. extract_bit_field_as_subreg checks
1811 : for valid bitsize and bitnum, so we don't need to do that here. */
1812 2113 : if (VECTOR_MODE_P (mode))
1813 : {
1814 93 : rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1815 : bitsize, bitnum);
1816 93 : if (sub)
1817 : return sub;
1818 : }
1819 : }
1820 :
1821 : /* Make sure we are playing with integral modes. Pun with subregs
1822 : if we aren't. */
1823 1008277 : opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1824 1008277 : scalar_int_mode imode;
1825 1008277 : if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1826 : {
1827 169350 : if (MEM_P (op0))
1828 165591 : op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1829 : 0, MEM_SIZE (op0));
1830 3759 : else if (op0_mode.exists (&imode))
1831 : {
1832 3728 : op0 = gen_lowpart (imode, op0);
1833 :
1834 : /* If we got a SUBREG, force it into a register since we
1835 : aren't going to be able to do another SUBREG on it. */
1836 3728 : if (GET_CODE (op0) == SUBREG)
1837 3451 : op0 = force_reg (imode, op0);
1838 : }
1839 : else
1840 : {
1841 62 : poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1842 31 : rtx mem = assign_stack_temp (GET_MODE (op0), size);
1843 31 : emit_move_insn (mem, op0);
1844 31 : op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1845 : }
1846 : }
1847 :
1848 : /* ??? We currently assume TARGET is at least as big as BITSIZE.
1849 : If that's wrong, the solution is to test for it and set TARGET to 0
1850 : if needed. */
1851 :
1852 : /* Get the mode of the field to use for atomic access or subreg
1853 : conversion. */
1854 1008277 : if (!SCALAR_INT_MODE_P (tmode)
1855 1008277 : || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1856 314563 : mode1 = mode;
1857 1008277 : gcc_assert (mode1 != BLKmode);
1858 :
1859 : /* Extraction of a full MODE1 value can be done with a subreg as long
1860 : as the least significant bit of the value is the least significant
1861 : bit of either OP0 or a word of OP0. */
1862 1008277 : if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1863 : {
1864 816653 : rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1865 : bitsize, bitnum);
1866 816653 : if (sub)
1867 651199 : return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1868 : }
1869 :
1870 : /* Extraction of a full MODE1 value can be done with a load as long as
1871 : the field is on a byte boundary and is sufficiently aligned. */
1872 357078 : poly_uint64 bytenum;
1873 357078 : if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1874 : {
1875 50258 : op0 = adjust_bitfield_address (op0, mode1, bytenum);
1876 50258 : if (reverse)
1877 51 : op0 = flip_storage_order (mode1, op0);
1878 50258 : return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1879 : }
1880 :
1881 : /* If we have a memory source and a non-constant bit offset, restrict
1882 : the memory to the referenced bytes. This is a worst-case fallback
1883 : but is useful for things like vector booleans. */
1884 306820 : if (MEM_P (op0) && !bitnum.is_constant ())
1885 : {
1886 : bytenum = bits_to_bytes_round_down (bitnum);
1887 : bitnum = num_trailing_bits (bitnum);
1888 : poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1889 : op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1890 : op0_mode = opt_scalar_int_mode ();
1891 : }
1892 :
1893 : /* It's possible we'll need to handle other cases here for
1894 : polynomial bitnum and bitsize. */
1895 :
1896 : /* From here on we need to be looking at a fixed-size insertion. */
1897 306820 : return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1898 : bitnum.to_constant (), unsignedp,
1899 306820 : target, mode, tmode, reverse, fallback_p);
1900 : }
1901 :
1902 : /* Subroutine of extract_bit_field_1, with the same arguments, except
1903 : that BITSIZE and BITNUM are constant. Handle cases specific to
1904 : integral modes. If OP0_MODE is defined, it is the mode of OP0,
1905 : otherwise OP0 is a BLKmode MEM. */
1906 :
1907 : static rtx
1908 306820 : extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1909 : unsigned HOST_WIDE_INT bitsize,
1910 : unsigned HOST_WIDE_INT bitnum, int unsignedp,
1911 : rtx target, machine_mode mode, machine_mode tmode,
1912 : bool reverse, bool fallback_p)
1913 : {
1914 : /* Handle fields bigger than a word. */
1915 :
1916 309680 : if (bitsize > BITS_PER_WORD)
1917 : {
1918 : /* Here we transfer the words of the field
1919 : in the order least significant first.
1920 : This is because the most significant word is the one which may
1921 : be less than full. */
1922 :
1923 1444 : const bool backwards = WORDS_BIG_ENDIAN;
1924 1444 : unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1925 1444 : unsigned int i;
1926 1444 : rtx_insn *last;
1927 :
1928 1444 : if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1929 1422 : target = gen_reg_rtx (mode);
1930 :
1931 : /* In case we're about to clobber a base register or something
1932 : (see gcc.c-torture/execute/20040625-1.c). */
1933 1444 : if (reg_mentioned_p (target, op0))
1934 0 : target = gen_reg_rtx (mode);
1935 :
1936 : /* Indicate for flow that the entire target reg is being set. */
1937 1444 : emit_clobber (target);
1938 :
1939 : /* The mode must be fixed-size, since extract_bit_field_1 handles
1940 : extractions from variable-sized objects before calling this
1941 : function. */
1942 1444 : unsigned int target_size
1943 1444 : = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1944 1444 : last = get_last_insn ();
1945 4332 : for (i = 0; i < nwords; i++)
1946 : {
1947 : /* If I is 0, use the low-order word in both field and target;
1948 : if I is 1, use the next to lowest word; and so on. */
1949 : /* Word number in TARGET to use. */
1950 2888 : unsigned int wordnum
1951 : = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1952 : /* Offset from start of field in OP0. */
1953 5776 : unsigned int bit_offset = (backwards ^ reverse
1954 2888 : ? MAX ((int) bitsize - ((int) i + 1)
1955 : * BITS_PER_WORD,
1956 : 0)
1957 2888 : : (int) i * BITS_PER_WORD);
1958 2888 : rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1959 2888 : rtx result_part
1960 2928 : = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1961 : bitsize - i * BITS_PER_WORD),
1962 2888 : bitnum + bit_offset,
1963 : (unsignedp ? 1 : -1), target_part,
1964 : mode, word_mode, reverse, fallback_p, NULL);
1965 :
1966 2888 : gcc_assert (target_part);
1967 2888 : if (!result_part)
1968 : {
1969 0 : delete_insns_since (last);
1970 0 : return NULL;
1971 : }
1972 :
1973 2888 : if (result_part != target_part)
1974 2774 : emit_move_insn (target_part, result_part);
1975 : }
1976 :
1977 1444 : if (unsignedp)
1978 : {
1979 : /* Unless we've filled TARGET, the upper regs in a multi-reg value
1980 : need to be zero'd out. */
1981 1424 : if (target_size > nwords * UNITS_PER_WORD)
1982 : {
1983 0 : unsigned int i, total_words;
1984 :
1985 0 : total_words = target_size / UNITS_PER_WORD;
1986 0 : for (i = nwords; i < total_words; i++)
1987 0 : emit_move_insn
1988 0 : (operand_subword (target,
1989 0 : backwards ? total_words - i - 1 : i,
1990 : 1, VOIDmode),
1991 : const0_rtx);
1992 : }
1993 1424 : return target;
1994 : }
1995 :
1996 : /* Signed bit field: sign-extend with two arithmetic shifts. */
1997 40 : target = expand_shift (LSHIFT_EXPR, mode, target,
1998 20 : GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1999 40 : return expand_shift (RSHIFT_EXPR, mode, target,
2000 20 : GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
2001 : }
2002 :
2003 : /* If OP0 is a multi-word register, narrow it to the affected word.
2004 : If the region spans two words, defer to extract_split_bit_field. */
2005 473166 : if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
2006 : {
2007 3997 : if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
2008 : {
2009 1096 : if (!fallback_p)
2010 : return NULL_RTX;
2011 50 : target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2012 : unsignedp, reverse);
2013 50 : return convert_extracted_bit_field (target, mode, tmode, unsignedp);
2014 : }
2015 : /* If OP0 is a hard register, copy it to a pseudo before calling
2016 : force_subreg. */
2017 2901 : if (REG_P (op0) && HARD_REGISTER_P (op0))
2018 1 : op0 = copy_to_reg (op0);
2019 2901 : op0 = force_subreg (word_mode, op0, op0_mode.require (),
2020 3365 : bitnum / BITS_PER_WORD * UNITS_PER_WORD);
2021 2901 : op0_mode = word_mode;
2022 3133 : bitnum %= BITS_PER_WORD;
2023 : }
2024 :
2025 : /* From here on we know the desired field is smaller than a word.
2026 : If OP0 is a register, it too fits within a word. */
2027 304280 : enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
2028 304280 : extraction_insn extv;
2029 304280 : if (!MEM_P (op0)
2030 164357 : && !reverse
2031 : /* ??? We could limit the structure size to the part of OP0 that
2032 : contains the field, with appropriate checks for endianness
2033 : and TARGET_TRULY_NOOP_TRUNCATION. */
2034 468629 : && get_best_reg_extraction_insn (&extv, pattern,
2035 482383 : GET_MODE_BITSIZE (op0_mode.require ()),
2036 : tmode))
2037 : {
2038 150595 : rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2039 : bitsize, bitnum,
2040 : unsignedp, target, mode,
2041 : tmode);
2042 150595 : if (result)
2043 : return result;
2044 : }
2045 :
2046 : /* If OP0 is a memory, try copying it to a register and seeing if a
2047 : cheap register alternative is available. */
2048 302526 : if (MEM_P (op0) & !reverse)
2049 : {
2050 139720 : if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2051 : tmode))
2052 : {
2053 0 : rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2054 : bitsize, bitnum,
2055 : unsignedp, target, mode,
2056 : tmode);
2057 0 : if (result)
2058 0 : return result;
2059 : }
2060 :
2061 139720 : rtx_insn *last = get_last_insn ();
2062 :
2063 : /* Try loading part of OP0 into a register and extracting the
2064 : bitfield from that. */
2065 139720 : unsigned HOST_WIDE_INT bitpos;
2066 139720 : rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2067 : 0, 0, tmode, &bitpos);
2068 139720 : if (xop0)
2069 : {
2070 137076 : xop0 = copy_to_reg (xop0);
2071 137076 : rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2072 : unsignedp, target,
2073 : mode, tmode, reverse, false, NULL);
2074 137076 : if (result)
2075 : return result;
2076 137076 : delete_insns_since (last);
2077 : }
2078 : }
2079 :
2080 302526 : if (!fallback_p)
2081 : return NULL;
2082 :
2083 : /* Find a correspondingly-sized integer field, so we can apply
2084 : shifts and masks to it. */
2085 166496 : scalar_int_mode int_mode;
2086 166496 : if (!int_mode_for_mode (tmode).exists (&int_mode))
2087 : /* If this fails, we should probably push op0 out to memory and then
2088 : do a load. */
2089 0 : int_mode = int_mode_for_mode (mode).require ();
2090 :
2091 166496 : target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2092 : bitnum, target, unsignedp, reverse);
2093 :
2094 : /* Complex values must be reversed piecewise, so we need to undo the global
2095 : reversal, convert to the complex mode and reverse again. */
2096 166496 : if (reverse && COMPLEX_MODE_P (tmode))
2097 : {
2098 0 : target = flip_storage_order (int_mode, target);
2099 0 : target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2100 0 : target = flip_storage_order (tmode, target);
2101 : }
2102 : else
2103 166496 : target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2104 :
2105 : return target;
2106 : }
2107 :
2108 : /* Generate code to extract a byte-field from STR_RTX
2109 : containing BITSIZE bits, starting at BITNUM,
2110 : and put it in TARGET if possible (if TARGET is nonzero).
2111 : Regardless of TARGET, we return the rtx for where the value is placed.
2112 :
2113 : STR_RTX is the structure containing the byte (a REG or MEM).
2114 : UNSIGNEDP is nonzero if this is an unsigned bit field.
2115 : MODE is the natural mode of the field value once extracted.
2116 : TMODE is the mode the caller would like the value to have;
2117 : but the value may be returned with type MODE instead.
2118 :
2119 : If REVERSE is true, the extraction is to be done in reverse order.
2120 :
2121 : If a TARGET is specified and we can store in it at no extra cost,
2122 : we do so, and return TARGET.
2123 : Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2124 : if they are equally easy.
2125 :
2126 : If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2127 : then *ALT_RTL is set to TARGET (before legitimziation). */
2128 :
2129 : rtx
2130 968810 : extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2131 : int unsignedp, rtx target, machine_mode mode,
2132 : machine_mode tmode, bool reverse, rtx *alt_rtl)
2133 : {
2134 968810 : machine_mode mode1;
2135 :
2136 : /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2137 1937620 : if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2138 : mode1 = GET_MODE (str_rtx);
2139 262962 : else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2140 : mode1 = GET_MODE (target);
2141 : else
2142 : mode1 = tmode;
2143 :
2144 968810 : unsigned HOST_WIDE_INT ibitsize, ibitnum;
2145 968810 : scalar_int_mode int_mode;
2146 968810 : if (bitsize.is_constant (&ibitsize)
2147 968810 : && bitnum.is_constant (&ibitnum)
2148 1823608 : && is_a <scalar_int_mode> (mode1, &int_mode)
2149 854805 : && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2150 : int_mode, 0, 0))
2151 : {
2152 : /* Extraction of a full INT_MODE value can be done with a simple load.
2153 : We know here that the field can be accessed with one single
2154 : instruction. For targets that support unaligned memory,
2155 : an unaligned access may be necessary. */
2156 14 : if (ibitsize == GET_MODE_BITSIZE (int_mode))
2157 : {
2158 0 : rtx result = adjust_bitfield_address (str_rtx, int_mode,
2159 : ibitnum / BITS_PER_UNIT);
2160 0 : if (reverse)
2161 0 : result = flip_storage_order (int_mode, result);
2162 0 : gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2163 0 : return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2164 : }
2165 :
2166 7 : str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2167 : &ibitnum);
2168 14 : gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2169 7 : str_rtx = copy_to_reg (str_rtx);
2170 7 : return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2171 : target, mode, tmode, reverse, true, alt_rtl);
2172 : }
2173 :
2174 968803 : return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2175 968803 : target, mode, tmode, reverse, true, alt_rtl);
2176 : }
2177 :
2178 : /* Use shifts and boolean operations to extract a field of BITSIZE bits
2179 : from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2180 : otherwise OP0 is a BLKmode MEM.
2181 :
2182 : UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2183 : If REVERSE is true, the extraction is to be done in reverse order.
2184 :
2185 : If TARGET is nonzero, attempts to store the value there
2186 : and return TARGET, but this is not guaranteed.
2187 : If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2188 :
2189 : static rtx
2190 184644 : extract_fixed_bit_field (machine_mode tmode, rtx op0,
2191 : opt_scalar_int_mode op0_mode,
2192 : unsigned HOST_WIDE_INT bitsize,
2193 : unsigned HOST_WIDE_INT bitnum, rtx target,
2194 : int unsignedp, bool reverse)
2195 : {
2196 184644 : scalar_int_mode mode;
2197 184644 : if (MEM_P (op0))
2198 : {
2199 153527 : if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2200 153527 : BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2201 : /* The only way this should occur is if the field spans word
2202 : boundaries. */
2203 4245 : return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2204 4245 : unsignedp, reverse);
2205 :
2206 149282 : op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2207 : }
2208 : else
2209 31117 : mode = op0_mode.require ();
2210 :
2211 180399 : return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2212 180399 : target, unsignedp, reverse);
2213 : }
2214 :
2215 : /* Helper function for extract_fixed_bit_field, extracts
2216 : the bit field always using MODE, which is the mode of OP0.
2217 : If UNSIGNEDP is -1, the result need not be sign or zero extended.
2218 : The other arguments are as for extract_fixed_bit_field. */
2219 :
2220 : static rtx
2221 180399 : extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2222 : unsigned HOST_WIDE_INT bitsize,
2223 : unsigned HOST_WIDE_INT bitnum, rtx target,
2224 : int unsignedp, bool reverse)
2225 : {
2226 : /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2227 : for invalid input, such as extract equivalent of f5 from
2228 : gcc.dg/pr48335-2.c. */
2229 :
2230 180399 : if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2231 : /* BITNUM is the distance between our msb and that of OP0.
2232 : Convert it to the distance from the lsb. */
2233 426 : bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2234 :
2235 : /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2236 : We have reduced the big-endian case to the little-endian case. */
2237 180399 : if (reverse)
2238 213 : op0 = flip_storage_order (mode, op0);
2239 :
2240 180399 : if (unsignedp)
2241 : {
2242 121078 : if (bitnum)
2243 : {
2244 : /* If the field does not already start at the lsb,
2245 : shift it so it does. */
2246 : /* Maybe propagate the target for the shift. */
2247 45034 : rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2248 45034 : if (tmode != mode)
2249 24080 : subtarget = 0;
2250 45034 : op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2251 : }
2252 : /* Convert the value to the desired mode. TMODE must also be a
2253 : scalar integer for this conversion to make sense, since we
2254 : shouldn't reinterpret the bits. */
2255 121078 : scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2256 121078 : if (mode != new_mode)
2257 43303 : op0 = convert_to_mode (new_mode, op0, 1);
2258 :
2259 : /* Unless the msb of the field used to be the msb when we shifted,
2260 : mask out the upper bits. */
2261 :
2262 121078 : if (GET_MODE_BITSIZE (mode) != bitnum + bitsize
2263 121078 : && unsignedp != -1)
2264 95010 : return expand_binop (new_mode, and_optab, op0,
2265 : mask_rtx (new_mode, 0, bitsize, 0),
2266 95010 : target, 1, OPTAB_LIB_WIDEN);
2267 : return op0;
2268 : }
2269 :
2270 : /* To extract a signed bit-field, first shift its msb to the msb of the word,
2271 : then arithmetic-shift its lsb to the lsb of the word. */
2272 59321 : op0 = force_reg (mode, op0);
2273 :
2274 : /* Find the narrowest integer mode that contains the field. */
2275 :
2276 59321 : opt_scalar_int_mode mode_iter;
2277 147139 : FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2278 294278 : if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2279 : break;
2280 :
2281 59321 : mode = mode_iter.require ();
2282 59321 : op0 = convert_to_mode (mode, op0, 0);
2283 :
2284 59321 : if (mode != tmode)
2285 4769 : target = 0;
2286 :
2287 118642 : if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2288 : {
2289 54406 : int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2290 : /* Maybe propagate the target for the shift. */
2291 54406 : rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2292 54406 : op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2293 : }
2294 :
2295 118642 : return expand_shift (RSHIFT_EXPR, mode, op0,
2296 59321 : GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2297 : }
2298 :
2299 : /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2300 : VALUE << BITPOS. */
2301 :
2302 : static rtx
2303 73417 : lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2304 : int bitpos)
2305 : {
2306 73417 : return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2307 : }
2308 :
2309 : /* Extract a bit field that is split across two words
2310 : and return an RTX for the result.
2311 :
2312 : OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2313 : BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2314 : UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2315 : If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2316 : a BLKmode MEM.
2317 :
2318 : If REVERSE is true, the extraction is to be done in reverse order. */
2319 :
2320 : static rtx
2321 4295 : extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2322 : unsigned HOST_WIDE_INT bitsize,
2323 : unsigned HOST_WIDE_INT bitpos, int unsignedp,
2324 : bool reverse)
2325 : {
2326 4295 : unsigned int unit;
2327 4295 : unsigned int bitsdone = 0;
2328 4295 : rtx result = NULL_RTX;
2329 4295 : int first = 1;
2330 :
2331 : /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2332 : much at a time. */
2333 4295 : if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2334 50 : unit = BITS_PER_WORD;
2335 : else
2336 6278 : unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2337 :
2338 17975 : while (bitsdone < bitsize)
2339 : {
2340 13680 : unsigned HOST_WIDE_INT thissize;
2341 13680 : rtx part;
2342 13680 : unsigned HOST_WIDE_INT thispos;
2343 13680 : unsigned HOST_WIDE_INT offset;
2344 :
2345 13680 : offset = (bitpos + bitsdone) / unit;
2346 13680 : thispos = (bitpos + bitsdone) % unit;
2347 :
2348 : /* THISSIZE must not overrun a word boundary. Otherwise,
2349 : extract_fixed_bit_field will call us again, and we will mutually
2350 : recurse forever. */
2351 13680 : thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2352 13680 : thissize = MIN (thissize, unit - thispos);
2353 :
2354 : /* If OP0 is a register, then handle OFFSET here. */
2355 13680 : rtx op0_piece = op0;
2356 13680 : opt_scalar_int_mode op0_piece_mode = op0_mode;
2357 13680 : if (SUBREG_P (op0) || REG_P (op0))
2358 : {
2359 100 : op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2360 100 : op0_piece_mode = word_mode;
2361 100 : offset = 0;
2362 : }
2363 :
2364 : /* Extract the parts in bit-counting order,
2365 : whose meaning is determined by BYTES_PER_UNIT.
2366 : OFFSET is in UNITs, and UNIT is in bits. */
2367 27360 : part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2368 13680 : thissize, offset * unit + thispos,
2369 : 0, 1, reverse);
2370 13680 : bitsdone += thissize;
2371 :
2372 : /* Shift this part into place for the result. */
2373 13680 : if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2374 : {
2375 4 : if (bitsize != bitsdone)
2376 2 : part = expand_shift (LSHIFT_EXPR, word_mode, part,
2377 2 : bitsize - bitsdone, 0, 1);
2378 : }
2379 : else
2380 : {
2381 13676 : if (bitsdone != thissize)
2382 9383 : part = expand_shift (LSHIFT_EXPR, word_mode, part,
2383 9383 : bitsdone - thissize, 0, 1);
2384 : }
2385 :
2386 13680 : if (first)
2387 : result = part;
2388 : else
2389 : /* Combine the parts with bitwise or. This works
2390 : because we extracted each part as an unsigned bit field. */
2391 9385 : result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2392 : OPTAB_LIB_WIDEN);
2393 :
2394 13680 : first = 0;
2395 : }
2396 :
2397 : /* Unsigned bit field: we are done. */
2398 4295 : if (unsignedp)
2399 : return result;
2400 : /* Signed bit field: sign-extend with two arithmetic shifts. */
2401 1464 : result = expand_shift (LSHIFT_EXPR, word_mode, result,
2402 1464 : BITS_PER_WORD - bitsize, NULL_RTX, 0);
2403 1464 : return expand_shift (RSHIFT_EXPR, word_mode, result,
2404 1464 : BITS_PER_WORD - bitsize, NULL_RTX, 0);
2405 : }
2406 :
2407 : /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2408 : the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2409 : MODE, fill the upper bits with zeros. Fail if the layout of either
2410 : mode is unknown (as for CC modes) or if the extraction would involve
2411 : unprofitable mode punning. Return the value on success, otherwise
2412 : return null.
2413 :
2414 : This is different from gen_lowpart* in these respects:
2415 :
2416 : - the returned value must always be considered an rvalue
2417 :
2418 : - when MODE is wider than SRC_MODE, the extraction involves
2419 : a zero extension
2420 :
2421 : - when MODE is smaller than SRC_MODE, the extraction involves
2422 : a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2423 :
2424 : In other words, this routine performs a computation, whereas the
2425 : gen_lowpart* routines are conceptually lvalue or rvalue subreg
2426 : operations. */
2427 :
2428 : rtx
2429 114053 : extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2430 : {
2431 114053 : scalar_int_mode int_mode, src_int_mode;
2432 :
2433 114053 : if (mode == src_mode)
2434 : return src;
2435 :
2436 78147 : if (CONSTANT_P (src))
2437 : {
2438 : /* simplify_gen_subreg can't be used here, as if simplify_subreg
2439 : fails, it will happily create (subreg (symbol_ref)) or similar
2440 : invalid SUBREGs. */
2441 14464 : poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2442 14464 : rtx ret = simplify_subreg (mode, src, src_mode, byte);
2443 14464 : if (ret)
2444 : return ret;
2445 :
2446 22 : if (GET_MODE (src) == VOIDmode
2447 22 : || !validate_subreg (mode, src_mode, src, byte))
2448 5 : return NULL_RTX;
2449 :
2450 17 : src = force_reg (GET_MODE (src), src);
2451 17 : return gen_rtx_SUBREG (mode, src, byte);
2452 : }
2453 :
2454 63683 : if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2455 : return NULL_RTX;
2456 :
2457 127366 : if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2458 63683 : && targetm.modes_tieable_p (mode, src_mode))
2459 : {
2460 3922 : rtx x = gen_lowpart_common (mode, src);
2461 3922 : if (x)
2462 : return x;
2463 : }
2464 :
2465 59772 : if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2466 59759 : || !int_mode_for_mode (mode).exists (&int_mode))
2467 13 : return NULL_RTX;
2468 :
2469 59759 : if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2470 : return NULL_RTX;
2471 58683 : if (!targetm.modes_tieable_p (int_mode, mode))
2472 : return NULL_RTX;
2473 :
2474 56622 : src = gen_lowpart (src_int_mode, src);
2475 56622 : if (!validate_subreg (int_mode, src_int_mode, src,
2476 : subreg_lowpart_offset (int_mode, src_int_mode)))
2477 : return NULL_RTX;
2478 :
2479 56622 : src = convert_modes (int_mode, src_int_mode, src, true);
2480 56622 : src = gen_lowpart (mode, src);
2481 56622 : return src;
2482 : }
2483 :
2484 : /* Add INC into TARGET. */
2485 :
2486 : void
2487 1183 : expand_inc (rtx target, rtx inc)
2488 : {
2489 1183 : rtx value = expand_binop (GET_MODE (target), add_optab,
2490 : target, inc,
2491 : target, 0, OPTAB_LIB_WIDEN);
2492 1183 : if (value != target)
2493 61 : emit_move_insn (target, value);
2494 1183 : }
2495 :
2496 : /* Subtract DEC from TARGET. */
2497 :
2498 : void
2499 1218 : expand_dec (rtx target, rtx dec)
2500 : {
2501 1218 : rtx value = expand_binop (GET_MODE (target), sub_optab,
2502 : target, dec,
2503 : target, 0, OPTAB_LIB_WIDEN);
2504 1218 : if (value != target)
2505 0 : emit_move_insn (target, value);
2506 1218 : }
2507 :
2508 : /* Output a shift instruction for expression code CODE,
2509 : with SHIFTED being the rtx for the value to shift,
2510 : and AMOUNT the rtx for the amount to shift by.
2511 : Store the result in the rtx TARGET, if that is convenient.
2512 : If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2513 : Return the rtx for where the value is.
2514 : If that cannot be done, abort the compilation unless MAY_FAIL is true,
2515 : in which case 0 is returned. */
2516 :
2517 : static rtx
2518 1489543 : expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2519 : rtx amount, rtx target, int unsignedp, bool may_fail = false)
2520 : {
2521 1489543 : rtx op1, temp = 0;
2522 1489543 : int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2523 1489543 : int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2524 1489543 : optab lshift_optab = ashl_optab;
2525 1489543 : optab rshift_arith_optab = ashr_optab;
2526 1489543 : optab rshift_uns_optab = lshr_optab;
2527 1489543 : optab lrotate_optab = rotl_optab;
2528 1489543 : optab rrotate_optab = rotr_optab;
2529 1489543 : machine_mode op1_mode;
2530 1489543 : scalar_mode scalar_mode = GET_MODE_INNER (mode);
2531 1489543 : int attempt;
2532 1489543 : bool speed = optimize_insn_for_speed_p ();
2533 :
2534 1489543 : op1 = amount;
2535 1489543 : op1_mode = GET_MODE (op1);
2536 :
2537 : /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2538 : shift amount is a vector, use the vector/vector shift patterns. */
2539 1489543 : if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2540 : {
2541 1489543 : lshift_optab = vashl_optab;
2542 1489543 : rshift_arith_optab = vashr_optab;
2543 1489543 : rshift_uns_optab = vlshr_optab;
2544 1489543 : lrotate_optab = vrotl_optab;
2545 1489543 : rrotate_optab = vrotr_optab;
2546 : }
2547 :
2548 : /* Previously detected shift-counts computed by NEGATE_EXPR
2549 : and shifted in the other direction; but that does not work
2550 : on all machines. */
2551 :
2552 1489543 : if (SHIFT_COUNT_TRUNCATED)
2553 : {
2554 : if (CONST_INT_P (op1)
2555 : && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2556 : (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2557 : op1 = gen_int_shift_amount (mode,
2558 : (unsigned HOST_WIDE_INT) INTVAL (op1)
2559 : % GET_MODE_BITSIZE (scalar_mode));
2560 : else if (GET_CODE (op1) == SUBREG
2561 : && subreg_lowpart_p (op1)
2562 : && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2563 : && SCALAR_INT_MODE_P (GET_MODE (op1)))
2564 : op1 = SUBREG_REG (op1);
2565 : }
2566 :
2567 : /* Canonicalize rotates by constant amount. We may canonicalize
2568 : to reduce the immediate or if the ISA can rotate by constants
2569 : in only on direction. */
2570 1489543 : if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1))
2571 : {
2572 3001 : op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2573 3001 : - INTVAL (op1)));
2574 3001 : left = !left;
2575 3001 : code = left ? LROTATE_EXPR : RROTATE_EXPR;
2576 : }
2577 :
2578 : /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2579 : Note that this is not the case for bigger values. For instance a rotation
2580 : of 0x01020304 by 16 bits gives 0x03040102 which is different from
2581 : 0x04030201 (bswapsi). */
2582 1489543 : if (rotate
2583 7951 : && CONST_INT_P (op1)
2584 5081 : && INTVAL (op1) == BITS_PER_UNIT
2585 990 : && GET_MODE_SIZE (scalar_mode) == 2
2586 1490366 : && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2587 822 : return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2588 :
2589 1488721 : if (op1 == const0_rtx)
2590 : return shifted;
2591 :
2592 : /* Check whether its cheaper to implement a left shift by a constant
2593 : bit count by a sequence of additions. */
2594 1441325 : if (code == LSHIFT_EXPR
2595 871432 : && CONST_INT_P (op1)
2596 842483 : && INTVAL (op1) > 0
2597 842454 : && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2598 842454 : && INTVAL (op1) < MAX_BITS_PER_WORD
2599 838067 : && (shift_cost (speed, mode, INTVAL (op1))
2600 838067 : > INTVAL (op1) * add_cost (speed, mode))
2601 1444101 : && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2602 : {
2603 : int i;
2604 5731 : for (i = 0; i < INTVAL (op1); i++)
2605 : {
2606 2955 : temp = force_reg (mode, shifted);
2607 2955 : shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2608 : unsignedp, OPTAB_LIB_WIDEN);
2609 : }
2610 : return shifted;
2611 : }
2612 :
2613 2877132 : for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2614 : {
2615 1438617 : enum optab_methods methods;
2616 :
2617 1438617 : if (attempt == 0)
2618 : methods = OPTAB_DIRECT;
2619 68 : else if (attempt == 1)
2620 : methods = OPTAB_WIDEN;
2621 : else
2622 34 : methods = OPTAB_LIB_WIDEN;
2623 :
2624 1438617 : if (rotate)
2625 : {
2626 : /* Widening does not work for rotation. */
2627 7197 : if (methods == OPTAB_WIDEN)
2628 34 : continue;
2629 7163 : else if (methods == OPTAB_LIB_WIDEN)
2630 : {
2631 : /* If we have been unable to open-code this by a rotation,
2632 : do it as the IOR or PLUS of two shifts. I.e., to rotate
2633 : A by N bits, compute
2634 : (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2635 : where C is the bitsize of A. If N cannot be zero,
2636 : use PLUS instead of IOR.
2637 :
2638 : It is theoretically possible that the target machine might
2639 : not be able to perform either shift and hence we would
2640 : be making two libcalls rather than just the one for the
2641 : shift (similarly if IOR could not be done). We will allow
2642 : this extremely unlikely lossage to avoid complicating the
2643 : code below. */
2644 :
2645 34 : rtx subtarget = target == shifted ? 0 : target;
2646 34 : rtx new_amount, other_amount;
2647 34 : rtx temp1;
2648 :
2649 34 : new_amount = op1;
2650 34 : if (op1 == const0_rtx)
2651 : return shifted;
2652 34 : else if (CONST_INT_P (op1))
2653 23 : other_amount = gen_int_shift_amount
2654 23 : (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2655 : else
2656 : {
2657 11 : other_amount
2658 22 : = simplify_gen_unary (NEG, GET_MODE (op1),
2659 11 : op1, GET_MODE (op1));
2660 11 : HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2661 11 : other_amount
2662 11 : = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2663 11 : gen_int_mode (mask, GET_MODE (op1)));
2664 : }
2665 :
2666 34 : shifted = force_reg (mode, shifted);
2667 :
2668 45 : temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2669 : mode, shifted, new_amount, 0, 1);
2670 45 : temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2671 : mode, shifted, other_amount,
2672 : subtarget, 1);
2673 34 : return expand_binop (mode,
2674 34 : CONST_INT_P (op1) ? add_optab : ior_optab,
2675 34 : temp, temp1, target, unsignedp, methods);
2676 : }
2677 :
2678 10481 : temp = expand_binop (mode,
2679 : left ? lrotate_optab : rrotate_optab,
2680 : shifted, op1, target, unsignedp, methods);
2681 : }
2682 1431420 : else if (unsignedp)
2683 1184271 : temp = expand_binop (mode,
2684 : left ? lshift_optab : rshift_uns_optab,
2685 : shifted, op1, target, unsignedp, methods);
2686 :
2687 : /* Do arithmetic shifts.
2688 : Also, if we are going to widen the operand, we can just as well
2689 : use an arithmetic right-shift instead of a logical one. */
2690 1438549 : if (temp == 0 && ! rotate
2691 575158 : && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2692 : {
2693 : enum optab_methods methods1 = methods;
2694 :
2695 : /* If trying to widen a log shift to an arithmetic shift,
2696 : don't accept an arithmetic shift of the same size. */
2697 : if (unsignedp)
2698 : methods1 = OPTAB_MUST_WIDEN;
2699 :
2700 : /* Arithmetic shift */
2701 :
2702 809913 : temp = expand_binop (mode,
2703 : left ? lshift_optab : rshift_arith_optab,
2704 : shifted, op1, target, unsignedp, methods1);
2705 : }
2706 :
2707 : /* We used to try extzv here for logical right shifts, but that was
2708 : only useful for one machine, the VAX, and caused poor code
2709 : generation there for lshrdi3, so the code was deleted and a
2710 : define_expand for lshrsi3 was added to vax.md. */
2711 : }
2712 :
2713 1438515 : gcc_assert (temp != NULL_RTX || may_fail);
2714 : return temp;
2715 : }
2716 :
2717 : /* Output a shift instruction for expression code CODE,
2718 : with SHIFTED being the rtx for the value to shift,
2719 : and AMOUNT the amount to shift by.
2720 : Store the result in the rtx TARGET, if that is convenient.
2721 : If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2722 : Return the rtx for where the value is. */
2723 :
2724 : rtx
2725 1206531 : expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2726 : poly_int64 amount, rtx target, int unsignedp)
2727 : {
2728 1206531 : return expand_shift_1 (code, mode, shifted,
2729 : gen_int_shift_amount (mode, amount),
2730 1206531 : target, unsignedp);
2731 : }
2732 :
2733 : /* Likewise, but return 0 if that cannot be done. */
2734 :
2735 : rtx
2736 315 : maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2737 : int amount, rtx target, int unsignedp)
2738 : {
2739 315 : return expand_shift_1 (code, mode,
2740 315 : shifted, GEN_INT (amount), target, unsignedp, true);
2741 : }
2742 :
2743 : /* Output a shift instruction for expression code CODE,
2744 : with SHIFTED being the rtx for the value to shift,
2745 : and AMOUNT the tree for the amount to shift by.
2746 : Store the result in the rtx TARGET, if that is convenient.
2747 : If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2748 : Return the rtx for where the value is. */
2749 :
2750 : rtx
2751 282629 : expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2752 : tree amount, rtx target, int unsignedp)
2753 : {
2754 282629 : return expand_shift_1 (code, mode,
2755 282629 : shifted, expand_normal (amount), target, unsignedp);
2756 : }
2757 :
2758 :
2759 : static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2760 : const struct mult_cost *, machine_mode mode);
2761 : static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2762 : const struct algorithm *, enum mult_variant);
2763 : static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2764 : static rtx extract_high_half (scalar_int_mode, rtx);
2765 : static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2766 :
2767 : /* Compute and return the best algorithm for multiplying by T.
2768 : The algorithm must cost less than cost_limit
2769 : If retval.cost >= COST_LIMIT, no algorithm was found and all
2770 : other field of the returned struct are undefined.
2771 : MODE is the machine mode of the multiplication. */
2772 :
2773 : static void
2774 35168381 : synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2775 : const struct mult_cost *cost_limit, machine_mode mode)
2776 : {
2777 35168381 : int m;
2778 35168381 : struct algorithm *alg_in, *best_alg;
2779 35168381 : struct mult_cost best_cost;
2780 35168381 : struct mult_cost new_limit;
2781 35168381 : int op_cost, op_latency;
2782 35168381 : unsigned HOST_WIDE_INT orig_t = t;
2783 35168381 : unsigned HOST_WIDE_INT q;
2784 35168381 : int maxm, hash_index;
2785 35168381 : bool cache_hit = false;
2786 35168381 : enum alg_code cache_alg = alg_zero;
2787 35168381 : bool speed = optimize_insn_for_speed_p ();
2788 35168381 : scalar_int_mode imode;
2789 35168381 : struct alg_hash_entry *entry_ptr;
2790 :
2791 : /* Indicate that no algorithm is yet found. If no algorithm
2792 : is found, this value will be returned and indicate failure. */
2793 35168381 : alg_out->cost.cost = cost_limit->cost + 1;
2794 35168381 : alg_out->cost.latency = cost_limit->latency + 1;
2795 :
2796 35168381 : if (cost_limit->cost < 0
2797 28982157 : || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2798 27743561 : return;
2799 :
2800 : /* Be prepared for vector modes. */
2801 48159292 : imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2802 :
2803 71671264 : maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2804 :
2805 : /* Restrict the bits of "t" to the multiplication's mode. */
2806 24079646 : t &= GET_MODE_MASK (imode);
2807 :
2808 : /* t == 1 can be done in zero cost. */
2809 24079646 : if (t == 1)
2810 : {
2811 6001264 : alg_out->ops = 1;
2812 6001264 : alg_out->cost.cost = 0;
2813 6001264 : alg_out->cost.latency = 0;
2814 6001264 : alg_out->op[0] = alg_m;
2815 6001264 : return;
2816 : }
2817 :
2818 : /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2819 : fail now. */
2820 18078382 : if (t == 0)
2821 : {
2822 556230 : if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2823 : return;
2824 : else
2825 : {
2826 556230 : alg_out->ops = 1;
2827 556230 : alg_out->cost.cost = zero_cost (speed);
2828 556230 : alg_out->cost.latency = zero_cost (speed);
2829 556230 : alg_out->op[0] = alg_zero;
2830 556230 : return;
2831 : }
2832 : }
2833 :
2834 : /* We'll be needing a couple extra algorithm structures now. */
2835 :
2836 17522152 : alg_in = XALLOCA (struct algorithm);
2837 17522152 : best_alg = XALLOCA (struct algorithm);
2838 17522152 : best_cost = *cost_limit;
2839 :
2840 : /* Compute the hash index. */
2841 17522152 : hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2842 :
2843 : /* See if we already know what to do for T. */
2844 17522152 : entry_ptr = alg_hash_entry_ptr (hash_index);
2845 17522152 : if (entry_ptr->t == t
2846 14541479 : && entry_ptr->mode == mode
2847 14541479 : && entry_ptr->speed == speed
2848 14541479 : && entry_ptr->alg != alg_unknown)
2849 : {
2850 14541479 : cache_alg = entry_ptr->alg;
2851 :
2852 14541479 : if (cache_alg == alg_impossible)
2853 : {
2854 : /* The cache tells us that it's impossible to synthesize
2855 : multiplication by T within entry_ptr->cost. */
2856 6729924 : if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2857 : /* COST_LIMIT is at least as restrictive as the one
2858 : recorded in the hash table, in which case we have no
2859 : hope of synthesizing a multiplication. Just
2860 : return. */
2861 : return;
2862 :
2863 : /* If we get here, COST_LIMIT is less restrictive than the
2864 : one recorded in the hash table, so we may be able to
2865 : synthesize a multiplication. Proceed as if we didn't
2866 : have the cache entry. */
2867 : }
2868 : else
2869 : {
2870 7811555 : if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2871 : /* The cached algorithm shows that this multiplication
2872 : requires more cost than COST_LIMIT. Just return. This
2873 : way, we don't clobber this cache entry with
2874 : alg_impossible but retain useful information. */
2875 : return;
2876 :
2877 7049718 : cache_hit = true;
2878 :
2879 7049718 : switch (cache_alg)
2880 : {
2881 4618505 : case alg_shift:
2882 4618505 : goto do_alg_shift;
2883 :
2884 963796 : case alg_add_t_m2:
2885 963796 : case alg_sub_t_m2:
2886 963796 : goto do_alg_addsub_t_m2;
2887 :
2888 123105 : case alg_add_factor:
2889 123105 : case alg_sub_factor:
2890 123105 : goto do_alg_addsub_factor;
2891 :
2892 1344304 : case alg_add_t2_m:
2893 1344304 : goto do_alg_add_t2_m;
2894 :
2895 8 : case alg_sub_t2_m:
2896 8 : goto do_alg_sub_t2_m;
2897 :
2898 0 : default:
2899 0 : gcc_unreachable ();
2900 : }
2901 : }
2902 : }
2903 :
2904 : /* If we have a group of zero bits at the low-order part of T, try
2905 : multiplying by the remaining bits and then doing a shift. */
2906 :
2907 3842842 : if ((t & 1) == 0)
2908 : {
2909 1955322 : do_alg_shift:
2910 6573827 : m = ctz_or_zero (t); /* m = number of low zero bits */
2911 6573827 : if (m < maxm)
2912 : {
2913 6573749 : q = t >> m;
2914 : /* The function expand_shift will choose between a shift and
2915 : a sequence of additions, so the observed cost is given as
2916 : MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2917 6573749 : op_cost = m * add_cost (speed, mode);
2918 6573749 : if (shift_cost (speed, mode, m) < op_cost)
2919 : op_cost = shift_cost (speed, mode, m);
2920 6573749 : new_limit.cost = best_cost.cost - op_cost;
2921 6573749 : new_limit.latency = best_cost.latency - op_cost;
2922 6573749 : synth_mult (alg_in, q, &new_limit, mode);
2923 :
2924 6573749 : alg_in->cost.cost += op_cost;
2925 6573749 : alg_in->cost.latency += op_cost;
2926 6573749 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2927 : {
2928 4227376 : best_cost = alg_in->cost;
2929 4227376 : std::swap (alg_in, best_alg);
2930 4227376 : best_alg->log[best_alg->ops] = m;
2931 4227376 : best_alg->op[best_alg->ops] = alg_shift;
2932 : }
2933 :
2934 : /* See if treating ORIG_T as a signed number yields a better
2935 : sequence. Try this sequence only for a negative ORIG_T
2936 : as it would be useless for a non-negative ORIG_T. */
2937 6573749 : if ((HOST_WIDE_INT) orig_t < 0)
2938 : {
2939 : /* Shift ORIG_T as follows because a right shift of a
2940 : negative-valued signed type is implementation
2941 : defined. */
2942 637876 : q = ~(~orig_t >> m);
2943 : /* The function expand_shift will choose between a shift
2944 : and a sequence of additions, so the observed cost is
2945 : given as MIN (m * add_cost(speed, mode),
2946 : shift_cost(speed, mode, m)). */
2947 637876 : op_cost = m * add_cost (speed, mode);
2948 637876 : if (shift_cost (speed, mode, m) < op_cost)
2949 : op_cost = shift_cost (speed, mode, m);
2950 637876 : new_limit.cost = best_cost.cost - op_cost;
2951 637876 : new_limit.latency = best_cost.latency - op_cost;
2952 637876 : synth_mult (alg_in, q, &new_limit, mode);
2953 :
2954 637876 : alg_in->cost.cost += op_cost;
2955 637876 : alg_in->cost.latency += op_cost;
2956 637876 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2957 : {
2958 608586 : best_cost = alg_in->cost;
2959 608586 : std::swap (alg_in, best_alg);
2960 608586 : best_alg->log[best_alg->ops] = m;
2961 608586 : best_alg->op[best_alg->ops] = alg_shift;
2962 : }
2963 : }
2964 : }
2965 6573827 : if (cache_hit)
2966 4618505 : goto done;
2967 : }
2968 :
2969 : /* If we have an odd number, add or subtract one. */
2970 1955322 : if ((t & 1) != 0)
2971 : {
2972 2851316 : unsigned HOST_WIDE_INT w;
2973 :
2974 0 : do_alg_addsub_t_m2:
2975 41616579 : for (w = 1; (w & t) != 0; w <<= 1)
2976 : ;
2977 : /* If T was -1, then W will be zero after the loop. This is another
2978 : case where T ends with ...111. Handling this with (T + 1) and
2979 : subtract 1 produces slightly better code and results in algorithm
2980 : selection much faster than treating it like the ...0111 case
2981 : below. */
2982 2851316 : if (w == 0
2983 2417866 : || (w > 2
2984 : /* Reject the case where t is 3.
2985 : Thus we prefer addition in that case. */
2986 2417866 : && t != 3))
2987 : {
2988 : /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2989 :
2990 1617320 : op_cost = add_cost (speed, mode);
2991 1617320 : new_limit.cost = best_cost.cost - op_cost;
2992 1617320 : new_limit.latency = best_cost.latency - op_cost;
2993 1617320 : synth_mult (alg_in, t + 1, &new_limit, mode);
2994 :
2995 1617320 : alg_in->cost.cost += op_cost;
2996 1617320 : alg_in->cost.latency += op_cost;
2997 1617320 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2998 : {
2999 740513 : best_cost = alg_in->cost;
3000 740513 : std::swap (alg_in, best_alg);
3001 740513 : best_alg->log[best_alg->ops] = 0;
3002 740513 : best_alg->op[best_alg->ops] = alg_sub_t_m2;
3003 : }
3004 : }
3005 : else
3006 : {
3007 : /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
3008 :
3009 1233996 : op_cost = add_cost (speed, mode);
3010 1233996 : new_limit.cost = best_cost.cost - op_cost;
3011 1233996 : new_limit.latency = best_cost.latency - op_cost;
3012 1233996 : synth_mult (alg_in, t - 1, &new_limit, mode);
3013 :
3014 1233996 : alg_in->cost.cost += op_cost;
3015 1233996 : alg_in->cost.latency += op_cost;
3016 1233996 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3017 : {
3018 180823 : best_cost = alg_in->cost;
3019 180823 : std::swap (alg_in, best_alg);
3020 180823 : best_alg->log[best_alg->ops] = 0;
3021 180823 : best_alg->op[best_alg->ops] = alg_add_t_m2;
3022 : }
3023 : }
3024 :
3025 : /* We may be able to calculate a * -7, a * -15, a * -31, etc
3026 : quickly with a - a * n for some appropriate constant n. */
3027 2851316 : m = exact_log2 (-orig_t + 1);
3028 2851316 : if (m >= 0 && m < maxm)
3029 : {
3030 762248 : op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3031 : /* If the target has a cheap shift-and-subtract insn use
3032 : that in preference to a shift insn followed by a sub insn.
3033 : Assume that the shift-and-sub is "atomic" with a latency
3034 : equal to it's cost, otherwise assume that on superscalar
3035 : hardware the shift may be executed concurrently with the
3036 : earlier steps in the algorithm. */
3037 762248 : if (shiftsub1_cost (speed, mode, m) <= op_cost)
3038 : {
3039 : op_cost = shiftsub1_cost (speed, mode, m);
3040 : op_latency = op_cost;
3041 : }
3042 : else
3043 756558 : op_latency = add_cost (speed, mode);
3044 :
3045 762248 : new_limit.cost = best_cost.cost - op_cost;
3046 762248 : new_limit.latency = best_cost.latency - op_latency;
3047 762248 : synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3048 : &new_limit, mode);
3049 :
3050 762248 : alg_in->cost.cost += op_cost;
3051 762248 : alg_in->cost.latency += op_latency;
3052 762248 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3053 : {
3054 190852 : best_cost = alg_in->cost;
3055 190852 : std::swap (alg_in, best_alg);
3056 190852 : best_alg->log[best_alg->ops] = m;
3057 190852 : best_alg->op[best_alg->ops] = alg_sub_t_m2;
3058 : }
3059 : }
3060 :
3061 2851316 : if (cache_hit)
3062 963796 : goto done;
3063 : }
3064 :
3065 : /* Look for factors of t of the form
3066 : t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3067 : If we find such a factor, we can multiply by t using an algorithm that
3068 : multiplies by q, shift the result by m and add/subtract it to itself.
3069 :
3070 : We search for large factors first and loop down, even if large factors
3071 : are less probable than small; if we find a large factor we will find a
3072 : good sequence quickly, and therefore be able to prune (by decreasing
3073 : COST_LIMIT) the search. */
3074 :
3075 1955322 : do_alg_addsub_factor:
3076 73837743 : for (m = floor_log2 (t - 1); m >= 2; m--)
3077 : {
3078 71914795 : unsigned HOST_WIDE_INT d;
3079 :
3080 71914795 : d = (HOST_WIDE_INT_1U << m) + 1;
3081 71914795 : if (t % d == 0 && t > d && m < maxm
3082 978634 : && (!cache_hit || cache_alg == alg_add_factor))
3083 : {
3084 978634 : op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3085 978634 : if (shiftadd_cost (speed, mode, m) <= op_cost)
3086 : op_cost = shiftadd_cost (speed, mode, m);
3087 :
3088 978634 : op_latency = op_cost;
3089 :
3090 :
3091 978634 : new_limit.cost = best_cost.cost - op_cost;
3092 978634 : new_limit.latency = best_cost.latency - op_latency;
3093 978634 : synth_mult (alg_in, t / d, &new_limit, mode);
3094 :
3095 978634 : alg_in->cost.cost += op_cost;
3096 978634 : alg_in->cost.latency += op_latency;
3097 978634 : if (alg_in->cost.latency < op_cost)
3098 203678 : alg_in->cost.latency = op_cost;
3099 978634 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3100 : {
3101 121697 : best_cost = alg_in->cost;
3102 121697 : std::swap (alg_in, best_alg);
3103 121697 : best_alg->log[best_alg->ops] = m;
3104 121697 : best_alg->op[best_alg->ops] = alg_add_factor;
3105 : }
3106 : /* Other factors will have been taken care of in the recursion. */
3107 : break;
3108 : }
3109 :
3110 70936161 : d = (HOST_WIDE_INT_1U << m) - 1;
3111 70936161 : if (t % d == 0 && t > d && m < maxm
3112 1064365 : && (!cache_hit || cache_alg == alg_sub_factor))
3113 : {
3114 1064365 : op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3115 1064365 : if (shiftsub0_cost (speed, mode, m) <= op_cost)
3116 : op_cost = shiftsub0_cost (speed, mode, m);
3117 :
3118 1064365 : op_latency = op_cost;
3119 :
3120 1064365 : new_limit.cost = best_cost.cost - op_cost;
3121 1064365 : new_limit.latency = best_cost.latency - op_latency;
3122 1064365 : synth_mult (alg_in, t / d, &new_limit, mode);
3123 :
3124 1064365 : alg_in->cost.cost += op_cost;
3125 1064365 : alg_in->cost.latency += op_latency;
3126 1064365 : if (alg_in->cost.latency < op_cost)
3127 285111 : alg_in->cost.latency = op_cost;
3128 1064365 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3129 : {
3130 33644 : best_cost = alg_in->cost;
3131 33644 : std::swap (alg_in, best_alg);
3132 33644 : best_alg->log[best_alg->ops] = m;
3133 33644 : best_alg->op[best_alg->ops] = alg_sub_factor;
3134 : }
3135 : break;
3136 : }
3137 : }
3138 3965947 : if (cache_hit)
3139 123105 : goto done;
3140 :
3141 : /* Try shift-and-add (load effective address) instructions,
3142 : i.e. do a*3, a*5, a*9. */
3143 3842842 : if ((t & 1) != 0)
3144 : {
3145 1887520 : do_alg_add_t2_m:
3146 3231824 : q = t - 1;
3147 3231824 : m = ctz_hwi (q);
3148 3231824 : if (q && m < maxm)
3149 : {
3150 3231808 : op_cost = shiftadd_cost (speed, mode, m);
3151 3231808 : new_limit.cost = best_cost.cost - op_cost;
3152 3231808 : new_limit.latency = best_cost.latency - op_cost;
3153 3231808 : synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3154 :
3155 3231808 : alg_in->cost.cost += op_cost;
3156 3231808 : alg_in->cost.latency += op_cost;
3157 3231808 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3158 : {
3159 1400208 : best_cost = alg_in->cost;
3160 1400208 : std::swap (alg_in, best_alg);
3161 1400208 : best_alg->log[best_alg->ops] = m;
3162 1400208 : best_alg->op[best_alg->ops] = alg_add_t2_m;
3163 : }
3164 : }
3165 3231824 : if (cache_hit)
3166 1344304 : goto done;
3167 :
3168 1887520 : do_alg_sub_t2_m:
3169 1887528 : q = t + 1;
3170 1887528 : m = ctz_hwi (q);
3171 1887528 : if (q && m < maxm)
3172 : {
3173 1865984 : op_cost = shiftsub0_cost (speed, mode, m);
3174 1865984 : new_limit.cost = best_cost.cost - op_cost;
3175 1865984 : new_limit.latency = best_cost.latency - op_cost;
3176 1865984 : synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3177 :
3178 1865984 : alg_in->cost.cost += op_cost;
3179 1865984 : alg_in->cost.latency += op_cost;
3180 1865984 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3181 : {
3182 63 : best_cost = alg_in->cost;
3183 63 : std::swap (alg_in, best_alg);
3184 63 : best_alg->log[best_alg->ops] = m;
3185 63 : best_alg->op[best_alg->ops] = alg_sub_t2_m;
3186 : }
3187 : }
3188 1887528 : if (cache_hit)
3189 : goto done;
3190 : }
3191 :
3192 1955322 : done:
3193 : /* If best_cost has not decreased, we have not found any algorithm. */
3194 10892560 : if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3195 : {
3196 : /* We failed to find an algorithm. Record alg_impossible for
3197 : this case (that is, <T, MODE, COST_LIMIT>) so that next time
3198 : we are asked to find an algorithm for T within the same or
3199 : lower COST_LIMIT, we can immediately return to the
3200 : caller. */
3201 3467740 : entry_ptr->t = t;
3202 3467740 : entry_ptr->mode = mode;
3203 3467740 : entry_ptr->speed = speed;
3204 3467740 : entry_ptr->alg = alg_impossible;
3205 3467740 : entry_ptr->cost = *cost_limit;
3206 3467740 : return;
3207 : }
3208 :
3209 : /* Cache the result. */
3210 7424820 : if (!cache_hit)
3211 : {
3212 686365 : entry_ptr->t = t;
3213 686365 : entry_ptr->mode = mode;
3214 686365 : entry_ptr->speed = speed;
3215 686365 : entry_ptr->alg = best_alg->op[best_alg->ops];
3216 686365 : entry_ptr->cost.cost = best_cost.cost;
3217 686365 : entry_ptr->cost.latency = best_cost.latency;
3218 : }
3219 :
3220 : /* If we are getting a too long sequence for `struct algorithm'
3221 : to record, make this search fail. */
3222 7424820 : if (best_alg->ops == MAX_BITS_PER_WORD)
3223 : return;
3224 :
3225 : /* Copy the algorithm from temporary space to the space at alg_out.
3226 : We avoid using structure assignment because the majority of
3227 : best_alg is normally undefined, and this is a critical function. */
3228 7424820 : alg_out->ops = best_alg->ops + 1;
3229 7424820 : alg_out->cost = best_cost;
3230 7424820 : memcpy (alg_out->op, best_alg->op,
3231 7424820 : alg_out->ops * sizeof *alg_out->op);
3232 7424820 : memcpy (alg_out->log, best_alg->log,
3233 : alg_out->ops * sizeof *alg_out->log);
3234 : }
3235 :
3236 : /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3237 : Try three variations:
3238 :
3239 : - a shift/add sequence based on VAL itself
3240 : - a shift/add sequence based on -VAL, followed by a negation
3241 : - a shift/add sequence based on VAL - 1, followed by an addition.
3242 :
3243 : Return true if the cheapest of these cost less than MULT_COST,
3244 : describing the algorithm in *ALG and final fixup in *VARIANT. */
3245 :
3246 : bool
3247 7294705 : choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3248 : struct algorithm *alg, enum mult_variant *variant,
3249 : int mult_cost)
3250 : {
3251 7294705 : struct algorithm alg2;
3252 7294705 : struct mult_cost limit;
3253 7294705 : int op_cost;
3254 7294705 : bool speed = optimize_insn_for_speed_p ();
3255 :
3256 : /* Fail quickly for impossible bounds. */
3257 7294705 : if (mult_cost < 0)
3258 : return false;
3259 :
3260 : /* Ensure that mult_cost provides a reasonable upper bound.
3261 : Any constant multiplication can be performed with less
3262 : than 2 * bits additions. */
3263 14586254 : op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3264 7293127 : if (mult_cost > op_cost)
3265 : mult_cost = op_cost;
3266 :
3267 7293127 : *variant = basic_variant;
3268 7293127 : limit.cost = mult_cost;
3269 7293127 : limit.latency = mult_cost;
3270 7293127 : synth_mult (alg, val, &limit, mode);
3271 :
3272 : /* This works only if the inverted value actually fits in an
3273 : `unsigned int' */
3274 14586254 : if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3275 : {
3276 2616147 : op_cost = neg_cost (speed, mode);
3277 2616147 : if (MULT_COST_LESS (&alg->cost, mult_cost))
3278 : {
3279 2513046 : limit.cost = alg->cost.cost - op_cost;
3280 2513046 : limit.latency = alg->cost.latency - op_cost;
3281 : }
3282 : else
3283 : {
3284 103101 : limit.cost = mult_cost - op_cost;
3285 103101 : limit.latency = mult_cost - op_cost;
3286 : }
3287 :
3288 2616147 : synth_mult (&alg2, -val, &limit, mode);
3289 2616147 : alg2.cost.cost += op_cost;
3290 2616147 : alg2.cost.latency += op_cost;
3291 2616147 : if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3292 12528 : *alg = alg2, *variant = negate_variant;
3293 : }
3294 :
3295 : /* This proves very useful for division-by-constant. */
3296 7293127 : op_cost = add_cost (speed, mode);
3297 7293127 : if (MULT_COST_LESS (&alg->cost, mult_cost))
3298 : {
3299 6474144 : limit.cost = alg->cost.cost - op_cost;
3300 6474144 : limit.latency = alg->cost.latency - op_cost;
3301 : }
3302 : else
3303 : {
3304 818983 : limit.cost = mult_cost - op_cost;
3305 818983 : limit.latency = mult_cost - op_cost;
3306 : }
3307 :
3308 7293127 : if (val != HOST_WIDE_INT_MIN
3309 7293137 : || GET_MODE_UNIT_PRECISION (mode) == HOST_BITS_PER_WIDE_INT)
3310 : {
3311 7293127 : synth_mult (&alg2, val - HOST_WIDE_INT_1U, &limit, mode);
3312 7293127 : alg2.cost.cost += op_cost;
3313 7293127 : alg2.cost.latency += op_cost;
3314 7293127 : if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3315 3263 : *alg = alg2, *variant = add_variant;
3316 : }
3317 :
3318 7293127 : return MULT_COST_LESS (&alg->cost, mult_cost);
3319 : }
3320 :
3321 : /* A subroutine of expand_mult, used for constant multiplications.
3322 : Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3323 : convenient. Use the shift/add sequence described by ALG and apply
3324 : the final fixup specified by VARIANT. */
3325 :
3326 : static rtx
3327 136127 : expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3328 : rtx target, const struct algorithm *alg,
3329 : enum mult_variant variant)
3330 : {
3331 136127 : unsigned HOST_WIDE_INT val_so_far;
3332 136127 : rtx_insn *insn;
3333 136127 : rtx accum, tem;
3334 136127 : int opno;
3335 136127 : machine_mode nmode;
3336 :
3337 : /* Avoid referencing memory over and over and invalid sharing
3338 : on SUBREGs. */
3339 136127 : op0 = force_reg (mode, op0);
3340 :
3341 : /* ACCUM starts out either as OP0 or as a zero, depending on
3342 : the first operation. */
3343 :
3344 136127 : if (alg->op[0] == alg_zero)
3345 : {
3346 5141 : accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3347 5141 : val_so_far = 0;
3348 : }
3349 130986 : else if (alg->op[0] == alg_m)
3350 : {
3351 130986 : accum = copy_to_mode_reg (mode, op0);
3352 130986 : val_so_far = 1;
3353 : }
3354 : else
3355 0 : gcc_unreachable ();
3356 :
3357 384594 : for (opno = 1; opno < alg->ops; opno++)
3358 : {
3359 248467 : int log = alg->log[opno];
3360 248467 : rtx shift_subtarget = optimize ? 0 : accum;
3361 232539 : rtx add_target
3362 136127 : = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3363 39849 : && !optimize)
3364 248467 : ? target : 0;
3365 248467 : rtx accum_target = optimize ? 0 : accum;
3366 248467 : rtx accum_inner;
3367 :
3368 248467 : switch (alg->op[opno])
3369 : {
3370 108034 : case alg_shift:
3371 108034 : tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3372 : /* REG_EQUAL note will be attached to the following insn. */
3373 108034 : emit_move_insn (accum, tem);
3374 108034 : val_so_far <<= log;
3375 108034 : break;
3376 :
3377 6033 : case alg_add_t_m2:
3378 6033 : tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3379 12066 : accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3380 : add_target ? add_target : accum_target);
3381 6033 : val_so_far += HOST_WIDE_INT_1U << log;
3382 6033 : break;
3383 :
3384 18403 : case alg_sub_t_m2:
3385 18403 : tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3386 36806 : accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3387 : add_target ? add_target : accum_target);
3388 18403 : val_so_far -= HOST_WIDE_INT_1U << log;
3389 18403 : break;
3390 :
3391 114846 : case alg_add_t2_m:
3392 114846 : accum = expand_shift (LSHIFT_EXPR, mode, accum,
3393 114846 : log, shift_subtarget, 0);
3394 229692 : accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3395 : add_target ? add_target : accum_target);
3396 114846 : val_so_far = (val_so_far << log) + 1;
3397 114846 : break;
3398 :
3399 0 : case alg_sub_t2_m:
3400 0 : accum = expand_shift (LSHIFT_EXPR, mode, accum,
3401 0 : log, shift_subtarget, 0);
3402 0 : accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3403 : add_target ? add_target : accum_target);
3404 0 : val_so_far = (val_so_far << log) - 1;
3405 0 : break;
3406 :
3407 1058 : case alg_add_factor:
3408 1058 : tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3409 2116 : accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3410 : add_target ? add_target : accum_target);
3411 1058 : val_so_far += val_so_far << log;
3412 1058 : break;
3413 :
3414 93 : case alg_sub_factor:
3415 93 : tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3416 186 : accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3417 : (add_target
3418 93 : ? add_target : (optimize ? 0 : tem)));
3419 93 : val_so_far = (val_so_far << log) - val_so_far;
3420 93 : break;
3421 :
3422 0 : default:
3423 0 : gcc_unreachable ();
3424 : }
3425 :
3426 248467 : if (SCALAR_INT_MODE_P (mode))
3427 : {
3428 : /* Write a REG_EQUAL note on the last insn so that we can cse
3429 : multiplication sequences. Note that if ACCUM is a SUBREG,
3430 : we've set the inner register and must properly indicate that. */
3431 241668 : tem = op0, nmode = mode;
3432 241668 : accum_inner = accum;
3433 241668 : if (GET_CODE (accum) == SUBREG)
3434 : {
3435 0 : accum_inner = SUBREG_REG (accum);
3436 0 : nmode = GET_MODE (accum_inner);
3437 0 : tem = gen_lowpart (nmode, op0);
3438 : }
3439 :
3440 : /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3441 : In that case, only the low bits of accum would be guaranteed to
3442 : be equal to the content of the REG_EQUAL note, the upper bits
3443 : can be anything. */
3444 241668 : if (!paradoxical_subreg_p (tem))
3445 : {
3446 241668 : insn = get_last_insn ();
3447 241668 : wide_int wval_so_far
3448 241668 : = wi::uhwi (val_so_far,
3449 241668 : GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3450 241668 : rtx c = immed_wide_int_const (wval_so_far, nmode);
3451 241668 : set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3452 : accum_inner);
3453 241668 : }
3454 : }
3455 : }
3456 :
3457 136127 : if (variant == negate_variant)
3458 : {
3459 429 : val_so_far = -val_so_far;
3460 429 : accum = expand_unop (mode, neg_optab, accum, target, 0);
3461 : }
3462 135698 : else if (variant == add_variant)
3463 : {
3464 21 : val_so_far = val_so_far + 1;
3465 21 : accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3466 : }
3467 :
3468 : /* Compare only the bits of val and val_so_far that are significant
3469 : in the result mode, to avoid sign-/zero-extension confusion. */
3470 136127 : nmode = GET_MODE_INNER (mode);
3471 136127 : val &= GET_MODE_MASK (nmode);
3472 136127 : val_so_far &= GET_MODE_MASK (nmode);
3473 136127 : gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3474 :
3475 136127 : return accum;
3476 : }
3477 :
3478 : /* Perform a multiplication and return an rtx for the result.
3479 : MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3480 : TARGET is a suggestion for where to store the result (an rtx).
3481 :
3482 : We check specially for a constant integer as OP1.
3483 : If you want this check for OP0 as well, then before calling
3484 : you should swap the two operands if OP0 would be constant. */
3485 :
3486 : rtx
3487 1074522 : expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3488 : int unsignedp, bool no_libcall)
3489 : {
3490 1074522 : enum mult_variant variant;
3491 1074522 : struct algorithm algorithm;
3492 1074522 : rtx scalar_op1;
3493 1074522 : int max_cost;
3494 1074522 : bool speed = optimize_insn_for_speed_p ();
3495 1074522 : bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3496 :
3497 1074522 : if (CONSTANT_P (op0))
3498 275 : std::swap (op0, op1);
3499 :
3500 : /* For vectors, there are several simplifications that can be made if
3501 : all elements of the vector constant are identical. */
3502 1074522 : scalar_op1 = unwrap_const_vec_duplicate (op1);
3503 :
3504 1074522 : if (INTEGRAL_MODE_P (mode))
3505 : {
3506 957406 : rtx fake_reg;
3507 957406 : HOST_WIDE_INT coeff;
3508 957406 : bool is_neg;
3509 957406 : int mode_bitsize;
3510 :
3511 957406 : if (op1 == CONST0_RTX (mode))
3512 : return op1;
3513 957406 : if (op1 == CONST1_RTX (mode))
3514 : return op0;
3515 914842 : if (op1 == CONSTM1_RTX (mode))
3516 2822 : return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3517 1411 : op0, target, 0);
3518 :
3519 913431 : if (do_trapv)
3520 32 : goto skip_synth;
3521 :
3522 : /* If mode is integer vector mode, check if the backend supports
3523 : vector lshift (by scalar or vector) at all. If not, we can't use
3524 : synthetized multiply. */
3525 913399 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3526 14972 : && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3527 926035 : && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3528 0 : goto skip_synth;
3529 :
3530 : /* These are the operations that are potentially turned into
3531 : a sequence of shifts and additions. */
3532 913399 : mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3533 :
3534 : /* synth_mult does an `unsigned int' multiply. As long as the mode is
3535 : less than or equal in size to `unsigned int' this doesn't matter.
3536 : If the mode is larger than `unsigned int', then synth_mult works
3537 : only if the constant value exactly fits in an `unsigned int' without
3538 : any truncation. This means that multiplying by negative values does
3539 : not work; results are off by 2^32 on a 32 bit machine. */
3540 913399 : if (CONST_INT_P (scalar_op1))
3541 : {
3542 681070 : coeff = INTVAL (scalar_op1);
3543 681070 : is_neg = coeff < 0;
3544 : }
3545 : #if TARGET_SUPPORTS_WIDE_INT
3546 232329 : else if (CONST_WIDE_INT_P (scalar_op1))
3547 : #else
3548 : else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3549 : #endif
3550 : {
3551 1139 : int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3552 : /* Perfect power of 2 (other than 1, which is handled above). */
3553 1139 : if (shift > 0)
3554 106 : return expand_shift (LSHIFT_EXPR, mode, op0,
3555 106 : shift, target, unsignedp);
3556 : else
3557 1033 : goto skip_synth;
3558 : }
3559 : else
3560 231190 : goto skip_synth;
3561 :
3562 : /* We used to test optimize here, on the grounds that it's better to
3563 : produce a smaller program when -O is not used. But this causes
3564 : such a terrible slowdown sometimes that it seems better to always
3565 : use synth_mult. */
3566 :
3567 : /* Special case powers of two. */
3568 681070 : if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3569 458762 : && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3570 458754 : return expand_shift (LSHIFT_EXPR, mode, op0,
3571 917508 : floor_log2 (coeff), target, unsignedp);
3572 :
3573 222316 : fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3574 :
3575 : /* Attempt to handle multiplication of DImode values by negative
3576 : coefficients, by performing the multiplication by a positive
3577 : multiplier and then inverting the result. */
3578 222316 : if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3579 : {
3580 : /* Its safe to use -coeff even for INT_MIN, as the
3581 : result is interpreted as an unsigned coefficient.
3582 : Exclude cost of op0 from max_cost to match the cost
3583 : calculation of the synth_mult. */
3584 216 : coeff = -(unsigned HOST_WIDE_INT) coeff;
3585 216 : max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3586 : mode, speed)
3587 216 : - neg_cost (speed, mode));
3588 216 : if (max_cost <= 0)
3589 0 : goto skip_synth;
3590 :
3591 : /* Special case powers of two. */
3592 216 : if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3593 : {
3594 342 : rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3595 171 : floor_log2 (coeff), target, unsignedp);
3596 171 : return expand_unop (mode, neg_optab, temp, target, 0);
3597 : }
3598 :
3599 45 : if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3600 : max_cost))
3601 : {
3602 44 : rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3603 : &algorithm, variant);
3604 44 : return expand_unop (mode, neg_optab, temp, target, 0);
3605 : }
3606 1 : goto skip_synth;
3607 : }
3608 :
3609 : /* Exclude cost of op0 from max_cost to match the cost
3610 : calculation of the synth_mult. */
3611 222100 : max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3612 222100 : if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3613 136033 : return expand_mult_const (mode, op0, coeff, target,
3614 136033 : &algorithm, variant);
3615 : }
3616 86067 : skip_synth:
3617 :
3618 : /* Expand x*2.0 as x+x. */
3619 37752 : if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3620 473191 : && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3621 : {
3622 5786 : op0 = force_reg (GET_MODE (op0), op0);
3623 11572 : return expand_binop (mode, add_optab, op0, op0,
3624 : target, unsignedp,
3625 5786 : no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3626 : }
3627 :
3628 : /* This used to use umul_optab if unsigned, but for non-widening multiply
3629 : there is no difference between signed and unsigned. */
3630 1288927 : op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3631 : op0, op1, target, unsignedp,
3632 : no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3633 429653 : gcc_assert (op0 || no_libcall);
3634 : return op0;
3635 : }
3636 :
3637 : /* Return a cost estimate for multiplying a register by the given
3638 : COEFFicient in the given MODE and SPEED. */
3639 :
3640 : int
3641 6764964 : mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3642 : {
3643 6764964 : int max_cost;
3644 6764964 : struct algorithm algorithm;
3645 6764964 : enum mult_variant variant;
3646 :
3647 6764964 : rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3648 6764964 : max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3649 : mode, speed);
3650 6764964 : if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3651 6036434 : return algorithm.cost.cost;
3652 : else
3653 : return max_cost;
3654 : }
3655 :
3656 : /* Perform a widening multiplication and return an rtx for the result.
3657 : MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3658 : TARGET is a suggestion for where to store the result (an rtx).
3659 : THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3660 : or smul_widen_optab.
3661 :
3662 : We check specially for a constant integer as OP1, comparing the
3663 : cost of a widening multiply against the cost of a sequence of shifts
3664 : and adds. */
3665 :
3666 : rtx
3667 18048 : expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3668 : int unsignedp, optab this_optab)
3669 : {
3670 18048 : bool speed = optimize_insn_for_speed_p ();
3671 18048 : rtx cop1;
3672 :
3673 18048 : if (CONST_INT_P (op1)
3674 4282 : && GET_MODE (op0) != VOIDmode
3675 4282 : && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3676 : this_optab == umul_widen_optab))
3677 4282 : && CONST_INT_P (cop1)
3678 21809 : && (INTVAL (cop1) >= 0
3679 20863 : || HWI_COMPUTABLE_MODE_P (mode)))
3680 : {
3681 3528 : HOST_WIDE_INT coeff = INTVAL (cop1);
3682 3528 : int max_cost;
3683 3528 : enum mult_variant variant;
3684 3528 : struct algorithm algorithm;
3685 :
3686 3528 : if (coeff == 0)
3687 713 : return CONST0_RTX (mode);
3688 :
3689 : /* Special case powers of two. */
3690 3420 : if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3691 : {
3692 568 : op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3693 568 : return expand_shift (LSHIFT_EXPR, mode, op0,
3694 568 : floor_log2 (coeff), target, unsignedp);
3695 : }
3696 :
3697 : /* Exclude cost of op0 from max_cost to match the cost
3698 : calculation of the synth_mult. */
3699 2852 : max_cost = mul_widen_cost (speed, mode);
3700 2852 : if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3701 : max_cost))
3702 : {
3703 37 : op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3704 37 : return expand_mult_const (mode, op0, coeff, target,
3705 37 : &algorithm, variant);
3706 : }
3707 : }
3708 17335 : return expand_binop (mode, this_optab, op0, op1, target,
3709 17335 : unsignedp, OPTAB_LIB_WIDEN);
3710 : }
3711 :
3712 : /* Choose a minimal N + 1 bit approximation to 2**K / D that can be used to
3713 : replace division by D, put the least significant N bits of the result in
3714 : *MULTIPLIER_PTR, the value K - N in *POST_SHIFT_PTR, and return the most
3715 : significant bit.
3716 :
3717 : The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3718 : needed precision is PRECISION (should be <= N).
3719 :
3720 : PRECISION should be as small as possible so this function can choose the
3721 : multiplier more freely. If PRECISION is <= N - 1, the most significant
3722 : bit returned by the function will be zero.
3723 :
3724 : Using this function, x / D is equal to (x*m) / 2**N >> (*POST_SHIFT_PTR),
3725 : where m is the full N + 1 bit multiplier. */
3726 :
3727 : unsigned HOST_WIDE_INT
3728 66306 : choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3729 : unsigned HOST_WIDE_INT *multiplier_ptr,
3730 : int *post_shift_ptr)
3731 : {
3732 66306 : int lgup, post_shift;
3733 66306 : int pow1, pow2;
3734 :
3735 : /* lgup = ceil(log2(d)) */
3736 : /* Assuming d > 1, we have d >= 2^(lgup-1) + 1 */
3737 66306 : lgup = ceil_log2 (d);
3738 :
3739 66306 : gcc_assert (lgup <= n);
3740 66306 : gcc_assert (lgup <= precision);
3741 :
3742 66306 : pow1 = n + lgup;
3743 66306 : pow2 = n + lgup - precision;
3744 :
3745 : /* mlow = 2^(n + lgup)/d */
3746 : /* Trivially from above we have mlow < 2^(n+1) */
3747 66306 : wide_int val = wi::set_bit_in_zero (pow1, HOST_BITS_PER_DOUBLE_INT);
3748 66306 : wide_int mlow = wi::udiv_trunc (val, d);
3749 :
3750 : /* mhigh = (2^(n + lgup) + 2^(n + lgup - precision))/d */
3751 : /* From above we have mhigh < 2^(n+1) assuming lgup <= precision */
3752 : /* From precision <= n, the difference between the numerators of mhigh and
3753 : mlow is >= 2^lgup >= d. Therefore the difference of the quotients in
3754 : the Euclidean division by d is at least 1, so we have mlow < mhigh and
3755 : the exact value of 2^(n + lgup)/d lies in the interval [mlow; mhigh). */
3756 66306 : val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3757 66306 : wide_int mhigh = wi::udiv_trunc (val, d);
3758 :
3759 : /* Reduce to lowest terms. */
3760 : /* If precision <= n - 1, then the difference between the numerators of
3761 : mhigh and mlow is >= 2^(lgup + 1) >= 2 * 2^lgup >= 2 * d. Therefore
3762 : the difference of the quotients in the Euclidean division by d is at
3763 : least 2, which means that mhigh and mlow differ by at least one bit
3764 : not in the last place. The conclusion is that the first iteration of
3765 : the loop below completes and shifts mhigh and mlow by 1 bit, which in
3766 : particular means that mhigh < 2^n, that is to say, the most significant
3767 : bit in the n + 1 bit value is zero. */
3768 174663 : for (post_shift = lgup; post_shift > 0; post_shift--)
3769 : {
3770 168709 : unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3771 : HOST_BITS_PER_WIDE_INT);
3772 168709 : unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3773 : HOST_BITS_PER_WIDE_INT);
3774 168709 : if (ml_lo >= mh_lo)
3775 : break;
3776 :
3777 108357 : mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3778 108357 : mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3779 : }
3780 :
3781 66306 : *post_shift_ptr = post_shift;
3782 :
3783 66306 : if (n < HOST_BITS_PER_WIDE_INT)
3784 : {
3785 42430 : unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3786 42430 : *multiplier_ptr = mhigh.to_uhwi () & mask;
3787 42430 : return mhigh.to_uhwi () > mask;
3788 : }
3789 : else
3790 : {
3791 23876 : *multiplier_ptr = mhigh.to_uhwi ();
3792 23876 : return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3793 : }
3794 66306 : }
3795 :
3796 : /* Compute the inverse of X mod 2**N, i.e., find Y such that X * Y is congruent
3797 : to 1 modulo 2**N, assuming that X is odd. Bézout's lemma guarantees that Y
3798 : exists for any given positive N. */
3799 :
3800 : static unsigned HOST_WIDE_INT
3801 47141 : invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3802 : {
3803 47141 : gcc_assert ((x & 1) == 1);
3804 :
3805 : /* The algorithm notes that the choice Y = Z satisfies X*Y == 1 mod 2^3,
3806 : since X is odd. Then each iteration doubles the number of bits of
3807 : significance in Y. */
3808 :
3809 48391 : const unsigned HOST_WIDE_INT mask
3810 : = (n == HOST_BITS_PER_WIDE_INT
3811 47141 : ? HOST_WIDE_INT_M1U
3812 1250 : : (HOST_WIDE_INT_1U << n) - 1);
3813 47141 : unsigned HOST_WIDE_INT y = x;
3814 47141 : int nbit = 3;
3815 :
3816 281566 : while (nbit < n)
3817 : {
3818 234425 : y = y * (2 - x*y) & mask; /* Modulo 2^N */
3819 234425 : nbit *= 2;
3820 : }
3821 :
3822 47141 : return y;
3823 : }
3824 :
3825 : /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3826 : flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3827 : product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3828 : to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3829 : become signed.
3830 :
3831 : The result is put in TARGET if that is convenient.
3832 :
3833 : MODE is the mode of operation. */
3834 :
3835 : rtx
3836 0 : expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3837 : rtx op1, rtx target, int unsignedp)
3838 : {
3839 0 : rtx tem;
3840 0 : enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3841 :
3842 0 : tem = expand_shift (RSHIFT_EXPR, mode, op0,
3843 0 : GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3844 0 : tem = expand_and (mode, tem, op1, NULL_RTX);
3845 0 : adj_operand
3846 0 : = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3847 : adj_operand);
3848 :
3849 0 : tem = expand_shift (RSHIFT_EXPR, mode, op1,
3850 0 : GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3851 0 : tem = expand_and (mode, tem, op0, NULL_RTX);
3852 0 : target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3853 : target);
3854 :
3855 0 : return target;
3856 : }
3857 :
3858 : /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3859 :
3860 : static rtx
3861 21014 : extract_high_half (scalar_int_mode mode, rtx op)
3862 : {
3863 21014 : if (mode == word_mode)
3864 0 : return gen_highpart (mode, op);
3865 :
3866 21014 : scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3867 :
3868 42028 : op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3869 21014 : GET_MODE_BITSIZE (mode), 0, 1);
3870 21014 : return convert_modes (mode, wider_mode, op, 0);
3871 : }
3872 :
3873 : /* Like expmed_mult_highpart, but only consider using multiplication optab. */
3874 :
3875 : rtx
3876 47038 : expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3877 : rtx target, int unsignedp, int max_cost)
3878 : {
3879 47038 : const scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3880 47038 : const bool speed = optimize_insn_for_speed_p ();
3881 47038 : const int size = GET_MODE_BITSIZE (mode);
3882 47038 : optab moptab;
3883 47038 : rtx tem;
3884 :
3885 : /* Firstly, try using a multiplication insn that only generates the needed
3886 : high part of the product, and in the sign flavor of unsignedp. */
3887 47038 : if (mul_highpart_cost (speed, mode) < max_cost)
3888 : {
3889 45059 : moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3890 45059 : tem = expand_binop (mode, moptab, op0, op1, target, unsignedp,
3891 : OPTAB_DIRECT);
3892 45059 : if (tem)
3893 : return tem;
3894 : }
3895 :
3896 : /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3897 : Need to adjust the result after the multiplication. */
3898 23110 : if (size - 1 < BITS_PER_WORD
3899 46008 : && (mul_highpart_cost (speed, mode)
3900 22898 : + 2 * shift_cost (speed, mode, size-1)
3901 22898 : + 4 * add_cost (speed, mode) < max_cost))
3902 : {
3903 5431 : moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3904 5431 : tem = expand_binop (mode, moptab, op0, op1, target, !unsignedp,
3905 : OPTAB_DIRECT);
3906 5431 : if (tem)
3907 : /* We used the wrong signedness. Adjust the result. */
3908 0 : return expand_mult_highpart_adjust (mode, tem, op0, op1, tem,
3909 0 : unsignedp);
3910 : }
3911 :
3912 : /* Try widening multiplication. */
3913 23110 : moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3914 23110 : if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3915 23110 : && mul_widen_cost (speed, wider_mode) < max_cost)
3916 : {
3917 383 : tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, unsignedp,
3918 : OPTAB_WIDEN);
3919 383 : if (tem)
3920 383 : return extract_high_half (mode, tem);
3921 : }
3922 :
3923 : /* Try widening the mode and perform a non-widening multiplication. */
3924 22727 : if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3925 22216 : && size - 1 < BITS_PER_WORD
3926 44936 : && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3927 : < max_cost))
3928 : {
3929 20618 : rtx_insn *insns;
3930 20618 : rtx wop0, wop1;
3931 :
3932 : /* We need to widen the operands, for example to ensure the
3933 : constant multiplier is correctly sign or zero extended.
3934 : Use a sequence to clean-up any instructions emitted by
3935 : the conversions if things don't work out. */
3936 20618 : start_sequence ();
3937 20618 : wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3938 20618 : wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3939 20618 : tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3940 : unsignedp, OPTAB_WIDEN);
3941 20618 : insns = end_sequence ();
3942 :
3943 20618 : if (tem)
3944 : {
3945 20618 : emit_insn (insns);
3946 20618 : return extract_high_half (mode, tem);
3947 : }
3948 : }
3949 :
3950 : /* Try widening multiplication of opposite signedness, and adjust. */
3951 2109 : moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3952 2109 : if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3953 411 : && size - 1 < BITS_PER_WORD
3954 2811 : && (mul_widen_cost (speed, wider_mode)
3955 351 : + 2 * shift_cost (speed, mode, size-1)
3956 351 : + 4 * add_cost (speed, mode) < max_cost))
3957 : {
3958 0 : tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, !unsignedp,
3959 : OPTAB_WIDEN);
3960 0 : if (tem != 0)
3961 : {
3962 0 : tem = extract_high_half (mode, tem);
3963 : /* We used the wrong signedness. Adjust the result. */
3964 0 : return expand_mult_highpart_adjust (mode, tem, op0, op1, target,
3965 0 : unsignedp);
3966 : }
3967 : }
3968 :
3969 : return 0;
3970 : }
3971 :
3972 : /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3973 : putting the high half of the result in TARGET if that is convenient,
3974 : and return where the result is. If the operation cannot be performed,
3975 : 0 is returned.
3976 :
3977 : MODE is the mode of operation and result.
3978 :
3979 : UNSIGNEDP nonzero means unsigned multiply.
3980 :
3981 : MAX_COST is the total allowed cost for the expanded RTL. */
3982 :
3983 : static rtx
3984 47038 : expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3985 : rtx target, int unsignedp, int max_cost)
3986 : {
3987 47038 : const bool speed = optimize_insn_for_speed_p ();
3988 47038 : unsigned HOST_WIDE_INT cnst1;
3989 47038 : int extra_cost;
3990 47038 : bool sign_adjust = false;
3991 47038 : enum mult_variant variant;
3992 47038 : struct algorithm alg;
3993 47038 : rtx narrow_op1, tem;
3994 :
3995 : /* We can't support modes wider than HOST_BITS_PER_INT. */
3996 47038 : gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3997 :
3998 47038 : cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3999 47038 : narrow_op1 = gen_int_mode (INTVAL (op1), mode);
4000 :
4001 : /* We can't optimize modes wider than BITS_PER_WORD.
4002 : ??? We might be able to perform double-word arithmetic if
4003 : mode == word_mode, however all the cost calculations in
4004 : synth_mult etc. assume single-word operations. */
4005 47038 : scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
4006 97100 : if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
4007 24446 : return expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
4008 24446 : unsignedp, max_cost);
4009 :
4010 45184 : extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
4011 :
4012 : /* Check whether we try to multiply by a negative constant. */
4013 33185 : if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
4014 : {
4015 2194 : sign_adjust = true;
4016 2194 : extra_cost += add_cost (speed, mode);
4017 : }
4018 :
4019 : /* See whether shift/add multiplication is cheap enough. */
4020 22592 : if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
4021 : max_cost - extra_cost))
4022 : {
4023 : /* See whether the specialized multiplication optabs are
4024 : cheaper than the shift/add version. */
4025 41868 : tem = expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
4026 : unsignedp,
4027 20934 : alg.cost.cost + extra_cost);
4028 20934 : if (tem)
4029 : return tem;
4030 :
4031 13 : tem = convert_to_mode (wider_mode, op0, unsignedp);
4032 13 : tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
4033 13 : tem = extract_high_half (mode, tem);
4034 :
4035 : /* Adjust result for signedness. */
4036 13 : if (sign_adjust)
4037 0 : tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
4038 :
4039 13 : return tem;
4040 : }
4041 1658 : return expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
4042 1658 : unsignedp, max_cost);
4043 : }
4044 :
4045 :
4046 : /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
4047 :
4048 : static rtx
4049 2555 : expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4050 : {
4051 2555 : rtx result, temp, shift;
4052 2555 : rtx_code_label *label;
4053 2555 : int logd;
4054 2555 : int prec = GET_MODE_PRECISION (mode);
4055 :
4056 2555 : logd = floor_log2 (d);
4057 2555 : result = gen_reg_rtx (mode);
4058 :
4059 : /* Avoid conditional branches when they're expensive. */
4060 2555 : if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4061 2555 : && optimize_insn_for_speed_p ())
4062 : {
4063 2551 : rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4064 : mode, 0, -1);
4065 2551 : if (signmask)
4066 : {
4067 2551 : HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4068 2551 : signmask = force_reg (mode, signmask);
4069 5102 : shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4070 :
4071 : /* Use the rtx_cost of a LSHIFTRT instruction to determine
4072 : which instruction sequence to use. If logical right shifts
4073 : are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4074 : use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4075 :
4076 2551 : temp = gen_rtx_LSHIFTRT (mode, result, shift);
4077 2551 : if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4078 2551 : || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4079 : > COSTS_N_INSNS (2)))
4080 : {
4081 88 : temp = expand_binop (mode, xor_optab, op0, signmask,
4082 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4083 88 : temp = expand_binop (mode, sub_optab, temp, signmask,
4084 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4085 88 : temp = expand_binop (mode, and_optab, temp,
4086 88 : gen_int_mode (masklow, mode),
4087 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4088 88 : temp = expand_binop (mode, xor_optab, temp, signmask,
4089 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4090 88 : temp = expand_binop (mode, sub_optab, temp, signmask,
4091 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4092 : }
4093 : else
4094 : {
4095 2463 : signmask = expand_binop (mode, lshr_optab, signmask, shift,
4096 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4097 2463 : signmask = force_reg (mode, signmask);
4098 :
4099 2463 : temp = expand_binop (mode, add_optab, op0, signmask,
4100 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4101 2463 : temp = expand_binop (mode, and_optab, temp,
4102 2463 : gen_int_mode (masklow, mode),
4103 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4104 2463 : temp = expand_binop (mode, sub_optab, temp, signmask,
4105 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4106 : }
4107 2551 : return temp;
4108 : }
4109 : }
4110 :
4111 : /* Mask contains the mode's signbit and the significant bits of the
4112 : modulus. By including the signbit in the operation, many targets
4113 : can avoid an explicit compare operation in the following comparison
4114 : against zero. */
4115 4 : wide_int mask = wi::mask (logd, false, prec);
4116 4 : mask = wi::set_bit (mask, prec - 1);
4117 :
4118 8 : temp = expand_binop (mode, and_optab, op0,
4119 4 : immed_wide_int_const (mask, mode),
4120 : result, 1, OPTAB_LIB_WIDEN);
4121 4 : if (temp != result)
4122 0 : emit_move_insn (result, temp);
4123 :
4124 4 : label = gen_label_rtx ();
4125 4 : do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4126 :
4127 4 : temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4128 : 0, OPTAB_LIB_WIDEN);
4129 :
4130 4 : mask = wi::mask (logd, true, prec);
4131 8 : temp = expand_binop (mode, ior_optab, temp,
4132 4 : immed_wide_int_const (mask, mode),
4133 : result, 1, OPTAB_LIB_WIDEN);
4134 4 : temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4135 : 0, OPTAB_LIB_WIDEN);
4136 4 : if (temp != result)
4137 0 : emit_move_insn (result, temp);
4138 4 : emit_label (label);
4139 4 : return result;
4140 4 : }
4141 :
4142 : /* Expand signed division of OP0 by a power of two D in mode MODE.
4143 : This routine is only called for positive values of D. */
4144 :
4145 : static rtx
4146 10121 : expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4147 : {
4148 10121 : rtx temp;
4149 10121 : rtx_code_label *label;
4150 10121 : int logd;
4151 :
4152 10121 : logd = floor_log2 (d);
4153 :
4154 10121 : if (d == 2
4155 10121 : && BRANCH_COST (optimize_insn_for_speed_p (),
4156 : false) >= 1)
4157 : {
4158 6664 : temp = gen_reg_rtx (mode);
4159 6664 : temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4160 6664 : if (temp != NULL_RTX)
4161 : {
4162 6664 : temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4163 : 0, OPTAB_LIB_WIDEN);
4164 6664 : return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4165 : }
4166 : }
4167 :
4168 6913 : if (HAVE_conditional_move
4169 3457 : && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4170 : {
4171 3457 : rtx temp2;
4172 :
4173 3457 : start_sequence ();
4174 3457 : temp2 = copy_to_mode_reg (mode, op0);
4175 3457 : temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4176 : NULL_RTX, 0, OPTAB_LIB_WIDEN);
4177 3457 : temp = force_reg (mode, temp);
4178 :
4179 : /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4180 3457 : temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4181 : temp, temp2, mode, 0);
4182 3457 : if (temp2)
4183 : {
4184 3410 : rtx_insn *seq = end_sequence ();
4185 3410 : emit_insn (seq);
4186 3410 : return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4187 : }
4188 47 : end_sequence ();
4189 : }
4190 :
4191 47 : if (BRANCH_COST (optimize_insn_for_speed_p (),
4192 : false) >= 2)
4193 : {
4194 47 : int ushift = GET_MODE_BITSIZE (mode) - logd;
4195 :
4196 47 : temp = gen_reg_rtx (mode);
4197 47 : temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4198 47 : if (temp != NULL_RTX)
4199 : {
4200 94 : if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4201 47 : || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4202 : > COSTS_N_INSNS (1))
4203 47 : temp = expand_binop (mode, and_optab, temp,
4204 47 : gen_int_mode (d - 1, mode),
4205 : NULL_RTX, 0, OPTAB_LIB_WIDEN);
4206 : else
4207 0 : temp = expand_shift (RSHIFT_EXPR, mode, temp,
4208 0 : ushift, NULL_RTX, 1);
4209 47 : temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4210 : 0, OPTAB_LIB_WIDEN);
4211 47 : return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4212 : }
4213 : }
4214 :
4215 0 : label = gen_label_rtx ();
4216 0 : temp = copy_to_mode_reg (mode, op0);
4217 0 : do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4218 0 : expand_inc (temp, gen_int_mode (d - 1, mode));
4219 0 : emit_label (label);
4220 0 : return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4221 : }
4222 :
4223 : /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4224 : if that is convenient, and returning where the result is.
4225 : You may request either the quotient or the remainder as the result;
4226 : specify REM_FLAG nonzero to get the remainder.
4227 :
4228 : CODE is the expression code for which kind of division this is;
4229 : it controls how rounding is done. MODE is the machine mode to use.
4230 : UNSIGNEDP nonzero means do unsigned division. */
4231 :
4232 : /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4233 : and then correct it by or'ing in missing high bits
4234 : if result of ANDI is nonzero.
4235 : For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4236 : This could optimize to a bfexts instruction.
4237 : But C doesn't use these operations, so their optimizations are
4238 : left for later. */
4239 : /* ??? For modulo, we don't actually need the highpart of the first product,
4240 : the low part will do nicely. And for small divisors, the second multiply
4241 : can also be a low-part only multiply or even be completely left out.
4242 : E.g. to calculate the remainder of a division by 3 with a 32 bit
4243 : multiply, multiply with 0x55555556 and extract the upper two bits;
4244 : the result is exact for inputs up to 0x1fffffff.
4245 : The input range can be reduced by using cross-sum rules.
4246 : For odd divisors >= 3, the following table gives right shift counts
4247 : so that if a number is shifted by an integer multiple of the given
4248 : amount, the remainder stays the same:
4249 : 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4250 : 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4251 : 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4252 : 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4253 : 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4254 :
4255 : Cross-sum rules for even numbers can be derived by leaving as many bits
4256 : to the right alone as the divisor has zeros to the right.
4257 : E.g. if x is an unsigned 32 bit number:
4258 : (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4259 : */
4260 :
4261 : rtx
4262 228661 : expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4263 : rtx op0, rtx op1, rtx target, int unsignedp,
4264 : enum optab_methods methods)
4265 : {
4266 228661 : machine_mode compute_mode;
4267 228661 : rtx tquotient;
4268 228661 : rtx quotient = 0, remainder = 0;
4269 228661 : rtx_insn *last;
4270 228661 : rtx_insn *insn;
4271 228661 : optab optab1, optab2;
4272 228661 : int op1_is_constant, op1_is_pow2 = 0;
4273 228661 : int max_cost, extra_cost;
4274 228661 : static HOST_WIDE_INT last_div_const = 0;
4275 228661 : bool speed = optimize_insn_for_speed_p ();
4276 :
4277 228661 : op1_is_constant = CONST_INT_P (op1);
4278 228661 : if (op1_is_constant)
4279 : {
4280 144427 : wide_int ext_op1 = rtx_mode_t (op1, mode);
4281 144427 : op1_is_pow2 = (wi::popcount (ext_op1) == 1
4282 288854 : || (! unsignedp
4283 173729 : && wi::popcount (wi::neg (ext_op1)) == 1));
4284 144427 : }
4285 :
4286 : /*
4287 : This is the structure of expand_divmod:
4288 :
4289 : First comes code to fix up the operands so we can perform the operations
4290 : correctly and efficiently.
4291 :
4292 : Second comes a switch statement with code specific for each rounding mode.
4293 : For some special operands this code emits all RTL for the desired
4294 : operation, for other cases, it generates only a quotient and stores it in
4295 : QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4296 : to indicate that it has not done anything.
4297 :
4298 : Last comes code that finishes the operation. If QUOTIENT is set and
4299 : REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4300 : QUOTIENT is not set, it is computed using trunc rounding.
4301 :
4302 : We try to generate special code for division and remainder when OP1 is a
4303 : constant. If |OP1| = 2**n we can use shifts and some other fast
4304 : operations. For other values of OP1, we compute a carefully selected
4305 : fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4306 : by m.
4307 :
4308 : In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4309 : half of the product. Different strategies for generating the product are
4310 : implemented in expmed_mult_highpart.
4311 :
4312 : If what we actually want is the remainder, we generate that by another
4313 : by-constant multiplication and a subtraction. */
4314 :
4315 : /* We shouldn't be called with OP1 == const1_rtx, but some of the
4316 : code below will malfunction if we are, so check here and handle
4317 : the special case if so. */
4318 228661 : if (op1 == const1_rtx)
4319 0 : return rem_flag ? const0_rtx : op0;
4320 :
4321 : /* When dividing by -1, we could get an overflow.
4322 : negv_optab can handle overflows. */
4323 228661 : if (! unsignedp && op1 == constm1_rtx)
4324 : {
4325 0 : if (rem_flag)
4326 0 : return const0_rtx;
4327 0 : return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4328 0 : ? negv_optab : neg_optab, op0, target, 0);
4329 : }
4330 :
4331 228661 : if (target
4332 : /* Don't use the function value register as a target
4333 : since we have to read it as well as write it,
4334 : and function-inlining gets confused by this. */
4335 228661 : && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4336 : /* Don't clobber an operand while doing a multi-step calculation. */
4337 97338 : || ((rem_flag || op1_is_constant)
4338 78172 : && (reg_mentioned_p (target, op0)
4339 75694 : || (MEM_P (op0) && MEM_P (target))))
4340 94145 : || reg_mentioned_p (target, op1)
4341 94066 : || (MEM_P (op1) && MEM_P (target))))
4342 : target = 0;
4343 :
4344 : /* Get the mode in which to perform this computation. Normally it will
4345 : be MODE, but sometimes we can't do the desired operation in MODE.
4346 : If so, pick a wider mode in which we can do the operation. Convert
4347 : to that mode at the start to avoid repeated conversions.
4348 :
4349 : First see what operations we need. These depend on the expression
4350 : we are evaluating. (We assume that divxx3 insns exist under the
4351 : same conditions that modxx3 insns and that these insns don't normally
4352 : fail. If these assumptions are not correct, we may generate less
4353 : efficient code in some cases.)
4354 :
4355 : Then see if we find a mode in which we can open-code that operation
4356 : (either a division, modulus, or shift). Finally, check for the smallest
4357 : mode for which we can do the operation with a library call. */
4358 :
4359 : /* We might want to refine this now that we have division-by-constant
4360 : optimization. Since expmed_mult_highpart tries so many variants, it is
4361 : not straightforward to generalize this. Maybe we should make an array
4362 : of possible modes in init_expmed? Save this for GCC 2.7. */
4363 :
4364 119735 : optab1 = (op1_is_pow2
4365 228661 : ? (unsignedp ? lshr_optab : ashr_optab)
4366 140044 : : (unsignedp ? udiv_optab : sdiv_optab));
4367 291908 : optab2 = (op1_is_pow2 ? optab1
4368 140044 : : (unsignedp ? udivmod_optab : sdivmod_optab));
4369 :
4370 228661 : if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4371 : {
4372 240756 : FOR_EACH_MODE_FROM (compute_mode, mode)
4373 236405 : if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4374 236405 : || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4375 : break;
4376 :
4377 226906 : if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4378 4351 : FOR_EACH_MODE_FROM (compute_mode, mode)
4379 4351 : if (optab_libfunc (optab1, compute_mode)
4380 4351 : || optab_libfunc (optab2, compute_mode))
4381 : break;
4382 : }
4383 : else
4384 : compute_mode = mode;
4385 :
4386 : /* If we still couldn't find a mode, use MODE, but expand_binop will
4387 : probably die. */
4388 6106 : if (compute_mode == VOIDmode)
4389 0 : compute_mode = mode;
4390 :
4391 228661 : if (target && GET_MODE (target) == compute_mode)
4392 : tquotient = target;
4393 : else
4394 134756 : tquotient = gen_reg_rtx (compute_mode);
4395 :
4396 : #if 0
4397 : /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4398 : (mode), and thereby get better code when OP1 is a constant. Do that
4399 : later. It will require going over all usages of SIZE below. */
4400 : size = GET_MODE_BITSIZE (mode);
4401 : #endif
4402 :
4403 : /* Only deduct something for a REM if the last divide done was
4404 : for a different constant. Then set the constant of the last
4405 : divide. */
4406 228661 : max_cost = (unsignedp
4407 337587 : ? udiv_cost (speed, compute_mode)
4408 119735 : : sdiv_cost (speed, compute_mode));
4409 228661 : if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4410 7780 : && INTVAL (op1) == last_div_const))
4411 52003 : max_cost -= (mul_cost (speed, compute_mode)
4412 52003 : + add_cost (speed, compute_mode));
4413 :
4414 228661 : last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4415 :
4416 : /* Now convert to the best mode to use. */
4417 228661 : if (compute_mode != mode)
4418 : {
4419 0 : op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4420 0 : op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4421 :
4422 : /* convert_modes may have placed op1 into a register, so we
4423 : must recompute the following. */
4424 0 : op1_is_constant = CONST_INT_P (op1);
4425 0 : if (op1_is_constant)
4426 : {
4427 0 : wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4428 0 : op1_is_pow2 = (wi::popcount (ext_op1) == 1
4429 0 : || (! unsignedp
4430 0 : && wi::popcount (wi::neg (ext_op1)) == 1));
4431 0 : }
4432 : else
4433 : op1_is_pow2 = 0;
4434 : }
4435 :
4436 : /* If one of the operands is a volatile MEM, copy it into a register. */
4437 :
4438 228661 : if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4439 0 : op0 = force_reg (compute_mode, op0);
4440 228661 : if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4441 0 : op1 = force_reg (compute_mode, op1);
4442 :
4443 : /* If we need the remainder or if OP1 is constant, we need to
4444 : put OP0 in a register in case it has any queued subexpressions. */
4445 228661 : if (rem_flag || op1_is_constant)
4446 173736 : op0 = force_reg (compute_mode, op0);
4447 :
4448 228661 : last = get_last_insn ();
4449 :
4450 : /* Promote floor rounding to trunc rounding for unsigned operations. */
4451 228661 : if (unsignedp)
4452 : {
4453 108926 : if (code == FLOOR_DIV_EXPR)
4454 : code = TRUNC_DIV_EXPR;
4455 108869 : if (code == FLOOR_MOD_EXPR)
4456 156 : code = TRUNC_MOD_EXPR;
4457 108926 : if (code == EXACT_DIV_EXPR && op1_is_pow2)
4458 4662 : code = TRUNC_DIV_EXPR;
4459 : }
4460 :
4461 228661 : if (op1 != const0_rtx)
4462 228284 : switch (code)
4463 : {
4464 178982 : case TRUNC_MOD_EXPR:
4465 178982 : case TRUNC_DIV_EXPR:
4466 178982 : if (op1_is_constant)
4467 : {
4468 95859 : scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4469 95859 : int size = GET_MODE_BITSIZE (int_mode);
4470 95859 : if (unsignedp)
4471 : {
4472 58460 : unsigned HOST_WIDE_INT mh, ml;
4473 58460 : int pre_shift, post_shift;
4474 58460 : wide_int wd = rtx_mode_t (op1, int_mode);
4475 58460 : unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4476 :
4477 58460 : if (wi::popcount (wd) == 1)
4478 : {
4479 32129 : pre_shift = floor_log2 (d);
4480 32129 : if (rem_flag)
4481 : {
4482 287 : unsigned HOST_WIDE_INT mask
4483 287 : = (HOST_WIDE_INT_1U << pre_shift) - 1;
4484 287 : remainder
4485 287 : = expand_binop (int_mode, and_optab, op0,
4486 287 : gen_int_mode (mask, int_mode),
4487 : remainder, 1, methods);
4488 287 : if (remainder)
4489 287 : return gen_lowpart (mode, remainder);
4490 : }
4491 31842 : quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4492 31842 : pre_shift, tquotient, 1);
4493 : }
4494 26331 : else if (size <= HOST_BITS_PER_WIDE_INT)
4495 : {
4496 24526 : if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4497 : {
4498 : /* Most significant bit of divisor is set; emit an scc
4499 : insn. */
4500 155 : quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4501 : int_mode, 1, 1);
4502 : }
4503 : else
4504 : {
4505 : /* Find a suitable multiplier and right shift count
4506 : instead of directly dividing by D. */
4507 24371 : mh = choose_multiplier (d, size, size,
4508 : &ml, &post_shift);
4509 :
4510 : /* If the suggested multiplier is more than SIZE bits,
4511 : we can do better for even divisors, using an
4512 : initial right shift. */
4513 24371 : if (mh != 0 && (d & 1) == 0)
4514 : {
4515 2428 : pre_shift = ctz_or_zero (d);
4516 2428 : mh = choose_multiplier (d >> pre_shift, size,
4517 : size - pre_shift,
4518 : &ml, &post_shift);
4519 2428 : gcc_assert (!mh);
4520 : }
4521 : else
4522 : pre_shift = 0;
4523 :
4524 2447 : if (mh != 0)
4525 : {
4526 2447 : rtx t1, t2, t3, t4;
4527 :
4528 2533 : if (post_shift - 1 >= BITS_PER_WORD)
4529 0 : goto fail1;
4530 :
4531 2447 : extra_cost
4532 2447 : = (shift_cost (speed, int_mode, post_shift - 1)
4533 2447 : + shift_cost (speed, int_mode, 1)
4534 2447 : + 2 * add_cost (speed, int_mode));
4535 2447 : t1 = expmed_mult_highpart
4536 2447 : (int_mode, op0, gen_int_mode (ml, int_mode),
4537 : NULL_RTX, 1, max_cost - extra_cost);
4538 2447 : if (t1 == 0)
4539 103 : goto fail1;
4540 2344 : t2 = force_operand (gen_rtx_MINUS (int_mode,
4541 : op0, t1),
4542 : NULL_RTX);
4543 2344 : t3 = expand_shift (RSHIFT_EXPR, int_mode,
4544 : t2, 1, NULL_RTX, 1);
4545 2344 : t4 = force_operand (gen_rtx_PLUS (int_mode,
4546 : t1, t3),
4547 : NULL_RTX);
4548 2344 : quotient = expand_shift
4549 2344 : (RSHIFT_EXPR, int_mode, t4,
4550 2344 : post_shift - 1, tquotient, 1);
4551 : }
4552 : else
4553 : {
4554 21924 : rtx t1, t2;
4555 :
4556 23654 : if (pre_shift >= BITS_PER_WORD
4557 21924 : || post_shift >= BITS_PER_WORD)
4558 3 : goto fail1;
4559 :
4560 21921 : t1 = expand_shift
4561 43842 : (RSHIFT_EXPR, int_mode, op0,
4562 21921 : pre_shift, NULL_RTX, 1);
4563 21921 : extra_cost
4564 21921 : = (shift_cost (speed, int_mode, pre_shift)
4565 21921 : + shift_cost (speed, int_mode, post_shift));
4566 21921 : t2 = expmed_mult_highpart
4567 21921 : (int_mode, t1,
4568 21921 : gen_int_mode (ml, int_mode),
4569 : NULL_RTX, 1, max_cost - extra_cost);
4570 21921 : if (t2 == 0)
4571 922 : goto fail1;
4572 20999 : quotient = expand_shift
4573 20999 : (RSHIFT_EXPR, int_mode, t2,
4574 20999 : post_shift, tquotient, 1);
4575 : }
4576 : }
4577 : }
4578 : else /* Too wide mode to use tricky code */
4579 : break;
4580 :
4581 55340 : insn = get_last_insn ();
4582 55340 : if (insn != last)
4583 55340 : set_dst_reg_note (insn, REG_EQUAL,
4584 : gen_rtx_UDIV (int_mode, op0, op1),
4585 : quotient);
4586 56655 : }
4587 : else /* TRUNC_DIV, signed */
4588 : {
4589 37399 : unsigned HOST_WIDE_INT ml;
4590 37399 : int post_shift;
4591 37399 : rtx mlr;
4592 37399 : HOST_WIDE_INT d = INTVAL (op1);
4593 37399 : unsigned HOST_WIDE_INT abs_d;
4594 :
4595 : /* Not prepared to handle division/remainder by
4596 : 0xffffffffffffffff8000000000000000 etc. */
4597 37399 : if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4598 : break;
4599 :
4600 : /* Since d might be INT_MIN, we have to cast to
4601 : unsigned HOST_WIDE_INT before negating to avoid
4602 : undefined signed overflow. */
4603 37399 : abs_d = (d >= 0
4604 37399 : ? (unsigned HOST_WIDE_INT) d
4605 : : - (unsigned HOST_WIDE_INT) d);
4606 :
4607 : /* n rem d = n rem -d */
4608 37399 : if (rem_flag && d < 0)
4609 : {
4610 141 : d = abs_d;
4611 141 : op1 = gen_int_mode (abs_d, int_mode);
4612 : }
4613 :
4614 37399 : if (d == 1)
4615 : quotient = op0;
4616 37399 : else if (d == -1)
4617 0 : quotient = expand_unop (int_mode, neg_optab, op0,
4618 : tquotient, 0);
4619 37399 : else if (size <= HOST_BITS_PER_WIDE_INT
4620 35998 : && abs_d == HOST_WIDE_INT_1U << (size - 1))
4621 : {
4622 : /* This case is not handled correctly below. */
4623 133 : quotient = emit_store_flag (tquotient, EQ, op0, op1,
4624 : int_mode, 1, 1);
4625 133 : if (quotient == 0)
4626 1315 : goto fail1;
4627 : }
4628 37266 : else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4629 12772 : && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4630 2683 : && (rem_flag
4631 2683 : ? smod_pow2_cheap (speed, int_mode)
4632 10089 : : sdiv_pow2_cheap (speed, int_mode))
4633 : /* We assume that cheap metric is true if the
4634 : optab has an expander for this mode. */
4635 51128 : && ((optab_handler ((rem_flag ? smod_optab
4636 : : sdiv_optab),
4637 : int_mode)
4638 : != CODE_FOR_nothing)
4639 611 : || (optab_handler (sdivmod_optab, int_mode)
4640 : != CODE_FOR_nothing)))
4641 : ;
4642 36661 : else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4643 : {
4644 12683 : if (rem_flag)
4645 : {
4646 2555 : remainder = expand_smod_pow2 (int_mode, op0, d);
4647 2555 : if (remainder)
4648 2555 : return gen_lowpart (mode, remainder);
4649 : }
4650 :
4651 10128 : if (sdiv_pow2_cheap (speed, int_mode)
4652 10128 : && ((optab_handler (sdiv_optab, int_mode)
4653 : != CODE_FOR_nothing)
4654 10 : || (optab_handler (sdivmod_optab, int_mode)
4655 : != CODE_FOR_nothing)))
4656 7 : quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4657 : int_mode, op0,
4658 7 : gen_int_mode (abs_d,
4659 : int_mode),
4660 : NULL_RTX, 0);
4661 : else
4662 10121 : quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4663 :
4664 : /* We have computed OP0 / abs(OP1). If OP1 is negative,
4665 : negate the quotient. */
4666 10128 : if (d < 0)
4667 : {
4668 516 : insn = get_last_insn ();
4669 516 : if (insn != last
4670 516 : && abs_d < (HOST_WIDE_INT_1U
4671 : << (HOST_BITS_PER_WIDE_INT - 1)))
4672 516 : set_dst_reg_note (insn, REG_EQUAL,
4673 516 : gen_rtx_DIV (int_mode, op0,
4674 : gen_int_mode
4675 : (abs_d,
4676 : int_mode)),
4677 : quotient);
4678 :
4679 516 : quotient = expand_unop (int_mode, neg_optab,
4680 : quotient, quotient, 0);
4681 : }
4682 : }
4683 23978 : else if (size <= HOST_BITS_PER_WIDE_INT)
4684 : {
4685 22640 : choose_multiplier (abs_d, size, size - 1,
4686 : &ml, &post_shift);
4687 22640 : if (ml < HOST_WIDE_INT_1U << (size - 1))
4688 : {
4689 17275 : rtx t1, t2, t3;
4690 :
4691 18388 : if (post_shift >= BITS_PER_WORD
4692 17275 : || size - 1 >= BITS_PER_WORD)
4693 251 : goto fail1;
4694 :
4695 17024 : extra_cost = (shift_cost (speed, int_mode, post_shift)
4696 17024 : + shift_cost (speed, int_mode, size - 1)
4697 17024 : + add_cost (speed, int_mode));
4698 17024 : t1 = expmed_mult_highpart
4699 17024 : (int_mode, op0, gen_int_mode (ml, int_mode),
4700 : NULL_RTX, 0, max_cost - extra_cost);
4701 17024 : if (t1 == 0)
4702 830 : goto fail1;
4703 16194 : t2 = expand_shift
4704 32388 : (RSHIFT_EXPR, int_mode, t1,
4705 16194 : post_shift, NULL_RTX, 0);
4706 16194 : t3 = expand_shift
4707 16194 : (RSHIFT_EXPR, int_mode, op0,
4708 16194 : size - 1, NULL_RTX, 0);
4709 16194 : if (d < 0)
4710 197 : quotient
4711 197 : = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4712 : tquotient);
4713 : else
4714 15997 : quotient
4715 15997 : = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4716 : tquotient);
4717 : }
4718 : else
4719 : {
4720 5365 : rtx t1, t2, t3, t4;
4721 :
4722 5716 : if (post_shift >= BITS_PER_WORD
4723 5360 : || size - 1 >= BITS_PER_WORD)
4724 24 : goto fail1;
4725 :
4726 5341 : ml |= HOST_WIDE_INT_M1U << (size - 1);
4727 5341 : mlr = gen_int_mode (ml, int_mode);
4728 5341 : extra_cost = (shift_cost (speed, int_mode, post_shift)
4729 5341 : + shift_cost (speed, int_mode, size - 1)
4730 5341 : + 2 * add_cost (speed, int_mode));
4731 5341 : t1 = expmed_mult_highpart (int_mode, op0, mlr,
4732 : NULL_RTX, 0,
4733 : max_cost - extra_cost);
4734 5341 : if (t1 == 0)
4735 210 : goto fail1;
4736 5131 : t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4737 : NULL_RTX);
4738 5131 : t3 = expand_shift
4739 10262 : (RSHIFT_EXPR, int_mode, t2,
4740 5131 : post_shift, NULL_RTX, 0);
4741 5131 : t4 = expand_shift
4742 5131 : (RSHIFT_EXPR, int_mode, op0,
4743 5131 : size - 1, NULL_RTX, 0);
4744 5131 : if (d < 0)
4745 53 : quotient
4746 53 : = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4747 : tquotient);
4748 : else
4749 5078 : quotient
4750 5078 : = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4751 : tquotient);
4752 : }
4753 : }
4754 : else /* Too wide mode to use tricky code */
4755 : break;
4756 :
4757 32191 : insn = get_last_insn ();
4758 32191 : if (insn != last)
4759 31586 : set_dst_reg_note (insn, REG_EQUAL,
4760 : gen_rtx_DIV (int_mode, op0, op1),
4761 : quotient);
4762 : }
4763 : break;
4764 : }
4765 83123 : fail1:
4766 85466 : delete_insns_since (last);
4767 85466 : break;
4768 :
4769 1768 : case FLOOR_DIV_EXPR:
4770 1768 : case FLOOR_MOD_EXPR:
4771 : /* We will come here only for signed operations. */
4772 1768 : if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4773 : {
4774 976 : scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4775 976 : int size = GET_MODE_BITSIZE (int_mode);
4776 976 : unsigned HOST_WIDE_INT mh, ml;
4777 976 : int pre_shift, post_shift;
4778 976 : HOST_WIDE_INT d = INTVAL (op1);
4779 :
4780 976 : if (d > 0)
4781 : {
4782 : /* We could just as easily deal with negative constants here,
4783 : but it does not seem worth the trouble for GCC 2.6. */
4784 951 : if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4785 : {
4786 644 : pre_shift = floor_log2 (d);
4787 644 : if (rem_flag)
4788 : {
4789 70 : unsigned HOST_WIDE_INT mask
4790 70 : = (HOST_WIDE_INT_1U << pre_shift) - 1;
4791 70 : remainder = expand_binop
4792 70 : (int_mode, and_optab, op0,
4793 70 : gen_int_mode (mask, int_mode),
4794 : remainder, 0, methods);
4795 70 : if (remainder)
4796 70 : return gen_lowpart (mode, remainder);
4797 : }
4798 574 : quotient = expand_shift
4799 574 : (RSHIFT_EXPR, int_mode, op0,
4800 574 : pre_shift, tquotient, 0);
4801 : }
4802 : else
4803 : {
4804 307 : rtx t1, t2, t3, t4;
4805 :
4806 307 : mh = choose_multiplier (d, size, size - 1,
4807 : &ml, &post_shift);
4808 307 : gcc_assert (!mh);
4809 :
4810 331 : if (post_shift < BITS_PER_WORD
4811 307 : && size - 1 < BITS_PER_WORD)
4812 : {
4813 305 : t1 = expand_shift
4814 305 : (RSHIFT_EXPR, int_mode, op0,
4815 305 : size - 1, NULL_RTX, 0);
4816 305 : t2 = expand_binop (int_mode, xor_optab, op0, t1,
4817 : NULL_RTX, 0, OPTAB_WIDEN);
4818 305 : extra_cost = (shift_cost (speed, int_mode, post_shift)
4819 305 : + shift_cost (speed, int_mode, size - 1)
4820 305 : + 2 * add_cost (speed, int_mode));
4821 305 : t3 = expmed_mult_highpart
4822 305 : (int_mode, t2, gen_int_mode (ml, int_mode),
4823 : NULL_RTX, 1, max_cost - extra_cost);
4824 305 : if (t3 != 0)
4825 : {
4826 274 : t4 = expand_shift
4827 548 : (RSHIFT_EXPR, int_mode, t3,
4828 274 : post_shift, NULL_RTX, 1);
4829 274 : quotient = expand_binop (int_mode, xor_optab,
4830 : t4, t1, tquotient, 0,
4831 : OPTAB_WIDEN);
4832 : }
4833 : }
4834 : }
4835 : }
4836 : else
4837 : {
4838 25 : rtx nsign, t1, t2, t3, t4;
4839 25 : t1 = force_operand (gen_rtx_PLUS (int_mode,
4840 : op0, constm1_rtx), NULL_RTX);
4841 25 : t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4842 : 0, OPTAB_WIDEN);
4843 50 : nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4844 25 : size - 1, NULL_RTX, 0);
4845 25 : t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4846 : NULL_RTX);
4847 25 : t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4848 : NULL_RTX, 0);
4849 25 : if (t4)
4850 : {
4851 25 : rtx t5;
4852 25 : t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4853 : NULL_RTX, 0);
4854 25 : quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4855 : tquotient);
4856 : }
4857 : }
4858 : }
4859 :
4860 906 : if (quotient != 0)
4861 : break;
4862 825 : delete_insns_since (last);
4863 :
4864 : /* Try using an instruction that produces both the quotient and
4865 : remainder, using truncation. We can easily compensate the quotient
4866 : or remainder to get floor rounding, once we have the remainder.
4867 : Notice that we compute also the final remainder value here,
4868 : and return the result right away. */
4869 825 : if (target == 0 || GET_MODE (target) != compute_mode)
4870 123 : target = gen_reg_rtx (compute_mode);
4871 :
4872 825 : if (rem_flag)
4873 : {
4874 327 : remainder
4875 327 : = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4876 327 : quotient = gen_reg_rtx (compute_mode);
4877 : }
4878 : else
4879 : {
4880 498 : quotient
4881 498 : = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4882 498 : remainder = gen_reg_rtx (compute_mode);
4883 : }
4884 :
4885 825 : if (expand_twoval_binop (sdivmod_optab, op0, op1,
4886 : quotient, remainder, 0))
4887 : {
4888 : /* This could be computed with a branch-less sequence.
4889 : Save that for later. */
4890 790 : rtx tem;
4891 790 : rtx_code_label *label = gen_label_rtx ();
4892 790 : do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4893 790 : tem = expand_binop (compute_mode, xor_optab, op0, op1,
4894 : NULL_RTX, 0, OPTAB_WIDEN);
4895 790 : do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4896 790 : expand_dec (quotient, const1_rtx);
4897 790 : expand_inc (remainder, op1);
4898 790 : emit_label (label);
4899 1279 : return gen_lowpart (mode, rem_flag ? remainder : quotient);
4900 : }
4901 :
4902 : /* No luck with division elimination or divmod. Have to do it
4903 : by conditionally adjusting op0 *and* the result. */
4904 35 : {
4905 35 : rtx_code_label *label1, *label2, *label3, *label4, *label5;
4906 35 : rtx adjusted_op0;
4907 35 : rtx tem;
4908 :
4909 35 : quotient = gen_reg_rtx (compute_mode);
4910 35 : adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4911 35 : label1 = gen_label_rtx ();
4912 35 : label2 = gen_label_rtx ();
4913 35 : label3 = gen_label_rtx ();
4914 35 : label4 = gen_label_rtx ();
4915 35 : label5 = gen_label_rtx ();
4916 35 : do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4917 35 : do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4918 35 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4919 : quotient, 0, methods);
4920 35 : if (tem != quotient)
4921 35 : emit_move_insn (quotient, tem);
4922 35 : emit_jump_insn (targetm.gen_jump (label5));
4923 35 : emit_barrier ();
4924 35 : emit_label (label1);
4925 35 : expand_inc (adjusted_op0, const1_rtx);
4926 35 : emit_jump_insn (targetm.gen_jump (label4));
4927 35 : emit_barrier ();
4928 35 : emit_label (label2);
4929 35 : do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4930 35 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4931 : quotient, 0, methods);
4932 35 : if (tem != quotient)
4933 35 : emit_move_insn (quotient, tem);
4934 35 : emit_jump_insn (targetm.gen_jump (label5));
4935 35 : emit_barrier ();
4936 35 : emit_label (label3);
4937 35 : expand_dec (adjusted_op0, const1_rtx);
4938 35 : emit_label (label4);
4939 35 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4940 : quotient, 0, methods);
4941 35 : if (tem != quotient)
4942 35 : emit_move_insn (quotient, tem);
4943 35 : expand_dec (quotient, const1_rtx);
4944 35 : emit_label (label5);
4945 : }
4946 35 : break;
4947 :
4948 383 : case CEIL_DIV_EXPR:
4949 383 : case CEIL_MOD_EXPR:
4950 383 : if (unsignedp)
4951 : {
4952 0 : if (op1_is_constant
4953 0 : && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4954 0 : && (HWI_COMPUTABLE_MODE_P (compute_mode)
4955 0 : || INTVAL (op1) >= 0))
4956 : {
4957 0 : scalar_int_mode int_mode
4958 0 : = as_a <scalar_int_mode> (compute_mode);
4959 0 : rtx t1, t2, t3;
4960 0 : unsigned HOST_WIDE_INT d = INTVAL (op1);
4961 0 : t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4962 0 : floor_log2 (d), tquotient, 1);
4963 0 : t2 = expand_binop (int_mode, and_optab, op0,
4964 0 : gen_int_mode (d - 1, int_mode),
4965 : NULL_RTX, 1, methods);
4966 0 : t3 = gen_reg_rtx (int_mode);
4967 0 : t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4968 0 : if (t3 == 0)
4969 : {
4970 0 : rtx_code_label *lab;
4971 0 : lab = gen_label_rtx ();
4972 0 : do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4973 0 : expand_inc (t1, const1_rtx);
4974 0 : emit_label (lab);
4975 0 : quotient = t1;
4976 : }
4977 : else
4978 0 : quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4979 : tquotient);
4980 : break;
4981 : }
4982 :
4983 : /* Try using an instruction that produces both the quotient and
4984 : remainder, using truncation. We can easily compensate the
4985 : quotient or remainder to get ceiling rounding, once we have the
4986 : remainder. Notice that we compute also the final remainder
4987 : value here, and return the result right away. */
4988 0 : if (target == 0 || GET_MODE (target) != compute_mode)
4989 0 : target = gen_reg_rtx (compute_mode);
4990 :
4991 0 : if (rem_flag)
4992 : {
4993 0 : remainder = (REG_P (target)
4994 0 : ? target : gen_reg_rtx (compute_mode));
4995 0 : quotient = gen_reg_rtx (compute_mode);
4996 : }
4997 : else
4998 : {
4999 0 : quotient = (REG_P (target)
5000 0 : ? target : gen_reg_rtx (compute_mode));
5001 0 : remainder = gen_reg_rtx (compute_mode);
5002 : }
5003 :
5004 0 : if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
5005 : remainder, 1))
5006 : {
5007 : /* This could be computed with a branch-less sequence.
5008 : Save that for later. */
5009 0 : rtx_code_label *label = gen_label_rtx ();
5010 0 : do_cmp_and_jump (remainder, const0_rtx, EQ,
5011 : compute_mode, label);
5012 0 : expand_inc (quotient, const1_rtx);
5013 0 : expand_dec (remainder, op1);
5014 0 : emit_label (label);
5015 0 : return gen_lowpart (mode, rem_flag ? remainder : quotient);
5016 : }
5017 :
5018 : /* No luck with division elimination or divmod. Have to do it
5019 : by conditionally adjusting op0 *and* the result. */
5020 0 : {
5021 0 : rtx_code_label *label1, *label2;
5022 0 : rtx adjusted_op0, tem;
5023 :
5024 0 : quotient = gen_reg_rtx (compute_mode);
5025 0 : adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5026 0 : label1 = gen_label_rtx ();
5027 0 : label2 = gen_label_rtx ();
5028 0 : do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
5029 : compute_mode, label1);
5030 0 : emit_move_insn (quotient, const0_rtx);
5031 0 : emit_jump_insn (targetm.gen_jump (label2));
5032 0 : emit_barrier ();
5033 0 : emit_label (label1);
5034 0 : expand_dec (adjusted_op0, const1_rtx);
5035 0 : tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
5036 : quotient, 1, methods);
5037 0 : if (tem != quotient)
5038 0 : emit_move_insn (quotient, tem);
5039 0 : expand_inc (quotient, const1_rtx);
5040 0 : emit_label (label2);
5041 : }
5042 : }
5043 : else /* signed */
5044 : {
5045 383 : if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5046 27 : && INTVAL (op1) >= 0)
5047 : {
5048 : /* This is extremely similar to the code for the unsigned case
5049 : above. For 2.7 we should merge these variants, but for
5050 : 2.6.1 I don't want to touch the code for unsigned since that
5051 : get used in C. The signed case will only be used by other
5052 : languages (Ada). */
5053 :
5054 27 : rtx t1, t2, t3;
5055 27 : unsigned HOST_WIDE_INT d = INTVAL (op1);
5056 54 : t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5057 27 : floor_log2 (d), tquotient, 0);
5058 27 : t2 = expand_binop (compute_mode, and_optab, op0,
5059 27 : gen_int_mode (d - 1, compute_mode),
5060 : NULL_RTX, 1, methods);
5061 27 : t3 = gen_reg_rtx (compute_mode);
5062 27 : t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5063 : compute_mode, 1, 1);
5064 27 : if (t3 == 0)
5065 : {
5066 0 : rtx_code_label *lab;
5067 0 : lab = gen_label_rtx ();
5068 0 : do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5069 0 : expand_inc (t1, const1_rtx);
5070 0 : emit_label (lab);
5071 0 : quotient = t1;
5072 : }
5073 : else
5074 27 : quotient = force_operand (gen_rtx_PLUS (compute_mode,
5075 : t1, t3),
5076 : tquotient);
5077 : break;
5078 : }
5079 :
5080 : /* Try using an instruction that produces both the quotient and
5081 : remainder, using truncation. We can easily compensate the
5082 : quotient or remainder to get ceiling rounding, once we have the
5083 : remainder. Notice that we compute also the final remainder
5084 : value here, and return the result right away. */
5085 356 : if (target == 0 || GET_MODE (target) != compute_mode)
5086 15 : target = gen_reg_rtx (compute_mode);
5087 356 : if (rem_flag)
5088 : {
5089 149 : remainder= (REG_P (target)
5090 149 : ? target : gen_reg_rtx (compute_mode));
5091 149 : quotient = gen_reg_rtx (compute_mode);
5092 : }
5093 : else
5094 : {
5095 207 : quotient = (REG_P (target)
5096 207 : ? target : gen_reg_rtx (compute_mode));
5097 207 : remainder = gen_reg_rtx (compute_mode);
5098 : }
5099 :
5100 356 : if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5101 : remainder, 0))
5102 : {
5103 : /* This could be computed with a branch-less sequence.
5104 : Save that for later. */
5105 356 : rtx tem;
5106 356 : rtx_code_label *label = gen_label_rtx ();
5107 356 : do_cmp_and_jump (remainder, const0_rtx, EQ,
5108 : compute_mode, label);
5109 356 : tem = expand_binop (compute_mode, xor_optab, op0, op1,
5110 : NULL_RTX, 0, OPTAB_WIDEN);
5111 356 : do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5112 356 : expand_inc (quotient, const1_rtx);
5113 356 : expand_dec (remainder, op1);
5114 356 : emit_label (label);
5115 563 : return gen_lowpart (mode, rem_flag ? remainder : quotient);
5116 : }
5117 :
5118 : /* No luck with division elimination or divmod. Have to do it
5119 : by conditionally adjusting op0 *and* the result. */
5120 0 : {
5121 0 : rtx_code_label *label1, *label2, *label3, *label4, *label5;
5122 0 : rtx adjusted_op0;
5123 0 : rtx tem;
5124 :
5125 0 : quotient = gen_reg_rtx (compute_mode);
5126 0 : adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5127 0 : label1 = gen_label_rtx ();
5128 0 : label2 = gen_label_rtx ();
5129 0 : label3 = gen_label_rtx ();
5130 0 : label4 = gen_label_rtx ();
5131 0 : label5 = gen_label_rtx ();
5132 0 : do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5133 0 : do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5134 : compute_mode, label1);
5135 0 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5136 : quotient, 0, methods);
5137 0 : if (tem != quotient)
5138 0 : emit_move_insn (quotient, tem);
5139 0 : emit_jump_insn (targetm.gen_jump (label5));
5140 0 : emit_barrier ();
5141 0 : emit_label (label1);
5142 0 : expand_dec (adjusted_op0, const1_rtx);
5143 0 : emit_jump_insn (targetm.gen_jump (label4));
5144 0 : emit_barrier ();
5145 0 : emit_label (label2);
5146 0 : do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5147 : compute_mode, label3);
5148 0 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5149 : quotient, 0, methods);
5150 0 : if (tem != quotient)
5151 0 : emit_move_insn (quotient, tem);
5152 0 : emit_jump_insn (targetm.gen_jump (label5));
5153 0 : emit_barrier ();
5154 0 : emit_label (label3);
5155 0 : expand_inc (adjusted_op0, const1_rtx);
5156 0 : emit_label (label4);
5157 0 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5158 : quotient, 0, methods);
5159 0 : if (tem != quotient)
5160 0 : emit_move_insn (quotient, tem);
5161 0 : expand_inc (quotient, const1_rtx);
5162 0 : emit_label (label5);
5163 : }
5164 : }
5165 : break;
5166 :
5167 47149 : case EXACT_DIV_EXPR:
5168 47149 : if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5169 : {
5170 47141 : scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5171 47141 : int size = GET_MODE_BITSIZE (int_mode);
5172 47141 : HOST_WIDE_INT d = INTVAL (op1);
5173 47141 : unsigned HOST_WIDE_INT ml;
5174 47141 : int pre_shift;
5175 47141 : rtx t1;
5176 :
5177 47141 : pre_shift = ctz_or_zero (d);
5178 47141 : ml = invert_mod2n (d >> pre_shift, size);
5179 47141 : t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5180 47141 : pre_shift, NULL_RTX, unsignedp);
5181 47141 : quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5182 : NULL_RTX, 1);
5183 :
5184 47141 : insn = get_last_insn ();
5185 94282 : set_dst_reg_note (insn, REG_EQUAL,
5186 : gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5187 : int_mode, op0, op1),
5188 : quotient);
5189 : }
5190 : break;
5191 :
5192 2 : case ROUND_DIV_EXPR:
5193 2 : case ROUND_MOD_EXPR:
5194 2 : if (unsignedp)
5195 : {
5196 0 : scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5197 0 : rtx tem;
5198 0 : rtx_code_label *label;
5199 0 : label = gen_label_rtx ();
5200 0 : quotient = gen_reg_rtx (int_mode);
5201 0 : remainder = gen_reg_rtx (int_mode);
5202 0 : if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5203 : {
5204 0 : rtx tem;
5205 0 : quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5206 : quotient, 1, methods);
5207 0 : tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5208 0 : remainder = expand_binop (int_mode, sub_optab, op0, tem,
5209 : remainder, 1, methods);
5210 : }
5211 0 : tem = plus_constant (int_mode, op1, -1);
5212 0 : tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5213 0 : do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5214 0 : expand_inc (quotient, const1_rtx);
5215 0 : expand_dec (remainder, op1);
5216 0 : emit_label (label);
5217 : }
5218 : else
5219 : {
5220 2 : scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5221 2 : int size = GET_MODE_BITSIZE (int_mode);
5222 2 : rtx abs_rem, abs_op1, tem, mask;
5223 2 : rtx_code_label *label;
5224 2 : label = gen_label_rtx ();
5225 2 : quotient = gen_reg_rtx (int_mode);
5226 2 : remainder = gen_reg_rtx (int_mode);
5227 2 : if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5228 : {
5229 0 : rtx tem;
5230 0 : quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5231 : quotient, 0, methods);
5232 0 : tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5233 0 : remainder = expand_binop (int_mode, sub_optab, op0, tem,
5234 : remainder, 0, methods);
5235 : }
5236 2 : abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5237 2 : abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5238 2 : tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5239 : 1, NULL_RTX, 1);
5240 2 : do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5241 2 : tem = expand_binop (int_mode, xor_optab, op0, op1,
5242 : NULL_RTX, 0, OPTAB_WIDEN);
5243 4 : mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5244 2 : size - 1, NULL_RTX, 0);
5245 2 : tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5246 : NULL_RTX, 0, OPTAB_WIDEN);
5247 2 : tem = expand_binop (int_mode, sub_optab, tem, mask,
5248 : NULL_RTX, 0, OPTAB_WIDEN);
5249 2 : expand_inc (quotient, tem);
5250 2 : tem = expand_binop (int_mode, xor_optab, mask, op1,
5251 : NULL_RTX, 0, OPTAB_WIDEN);
5252 2 : tem = expand_binop (int_mode, sub_optab, tem, mask,
5253 : NULL_RTX, 0, OPTAB_WIDEN);
5254 2 : expand_dec (remainder, tem);
5255 2 : emit_label (label);
5256 : }
5257 3 : return gen_lowpart (mode, rem_flag ? remainder : quotient);
5258 :
5259 0 : default:
5260 0 : gcc_unreachable ();
5261 : }
5262 :
5263 223343 : if (quotient == 0)
5264 : {
5265 89599 : if (target && GET_MODE (target) != compute_mode)
5266 48807 : target = 0;
5267 :
5268 89599 : if (rem_flag)
5269 : {
5270 : /* Try to produce the remainder without producing the quotient.
5271 : If we seem to have a divmod pattern that does not require widening,
5272 : don't try widening here. We should really have a WIDEN argument
5273 : to expand_twoval_binop, since what we'd really like to do here is
5274 : 1) try a mod insn in compute_mode
5275 : 2) try a divmod insn in compute_mode
5276 : 3) try a div insn in compute_mode and multiply-subtract to get
5277 : remainder
5278 : 4) try the same things with widening allowed. */
5279 31408 : remainder
5280 32873 : = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5281 : op0, op1, target,
5282 : unsignedp,
5283 31408 : ((optab_handler (optab2, compute_mode)
5284 : != CODE_FOR_nothing)
5285 : ? OPTAB_DIRECT : OPTAB_WIDEN));
5286 31408 : if (remainder == 0)
5287 : {
5288 : /* No luck there. Can we do remainder and divide at once
5289 : without a library call? */
5290 31201 : remainder = gen_reg_rtx (compute_mode);
5291 46169 : if (! expand_twoval_binop ((unsignedp
5292 : ? udivmod_optab
5293 : : sdivmod_optab),
5294 : op0, op1,
5295 : NULL_RTX, remainder, unsignedp))
5296 : remainder = 0;
5297 : }
5298 :
5299 29943 : if (remainder)
5300 30150 : return gen_lowpart (mode, remainder);
5301 : }
5302 :
5303 : /* Produce the quotient. Try a quotient insn, but not a library call.
5304 : If we have a divmod in this mode, use it in preference to widening
5305 : the div (for this test we assume it will not fail). Note that optab2
5306 : is set to the one of the two optabs that the call below will use. */
5307 59449 : quotient
5308 64818 : = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5309 : op0, op1, rem_flag ? NULL_RTX : target,
5310 : unsignedp,
5311 59449 : ((optab_handler (optab2, compute_mode)
5312 : != CODE_FOR_nothing)
5313 : ? OPTAB_DIRECT : OPTAB_WIDEN));
5314 :
5315 59449 : if (quotient == 0)
5316 : {
5317 : /* No luck there. Try a quotient-and-remainder insn,
5318 : keeping the quotient alone. */
5319 59064 : quotient = gen_reg_rtx (compute_mode);
5320 81398 : if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5321 : op0, op1,
5322 : quotient, NULL_RTX, unsignedp))
5323 : {
5324 3727 : quotient = 0;
5325 3727 : if (! rem_flag)
5326 : /* Still no luck. If we are not computing the remainder,
5327 : use a library call for the quotient. */
5328 2491 : quotient = sign_expand_binop (compute_mode,
5329 : udiv_optab, sdiv_optab,
5330 : op0, op1, target,
5331 : unsignedp, methods);
5332 : }
5333 : }
5334 : }
5335 :
5336 193215 : if (rem_flag)
5337 : {
5338 21826 : if (target && GET_MODE (target) != compute_mode)
5339 14564 : target = 0;
5340 :
5341 21826 : if (quotient == 0)
5342 : {
5343 : /* No divide instruction either. Use library for remainder. */
5344 1236 : remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5345 : op0, op1, target,
5346 : unsignedp, methods);
5347 : /* No remainder function. Try a quotient-and-remainder
5348 : function, keeping the remainder. */
5349 1236 : if (!remainder
5350 0 : && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5351 : {
5352 0 : remainder = gen_reg_rtx (compute_mode);
5353 0 : if (!expand_twoval_binop_libfunc
5354 0 : (unsignedp ? udivmod_optab : sdivmod_optab,
5355 : op0, op1,
5356 : NULL_RTX, remainder,
5357 : unsignedp ? UMOD : MOD))
5358 0 : remainder = NULL_RTX;
5359 : }
5360 : }
5361 : else
5362 : {
5363 : /* We divided. Now finish doing X - Y * (X / Y). */
5364 20590 : remainder = expand_mult (compute_mode, quotient, op1,
5365 : NULL_RTX, unsignedp);
5366 20590 : remainder = expand_binop (compute_mode, sub_optab, op0,
5367 : remainder, target, unsignedp,
5368 : methods);
5369 : }
5370 : }
5371 :
5372 194451 : if (methods != OPTAB_LIB_WIDEN
5373 1755 : && (rem_flag ? remainder : quotient) == NULL_RTX)
5374 : return NULL_RTX;
5375 :
5376 367076 : return gen_lowpart (mode, rem_flag ? remainder : quotient);
5377 : }
5378 :
5379 : /* Return a tree node with data type TYPE, describing the value of X.
5380 : Usually this is an VAR_DECL, if there is no obvious better choice.
5381 : X may be an expression, however we only support those expressions
5382 : generated by loop.c. */
5383 :
5384 : tree
5385 670161 : make_tree (tree type, rtx x)
5386 : {
5387 670161 : tree t;
5388 :
5389 670161 : switch (GET_CODE (x))
5390 : {
5391 22261 : case CONST_INT:
5392 22261 : case CONST_WIDE_INT:
5393 22261 : t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5394 22261 : return t;
5395 :
5396 0 : case CONST_DOUBLE:
5397 0 : STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5398 0 : if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5399 : t = wide_int_to_tree (type,
5400 : wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5401 : HOST_BITS_PER_WIDE_INT * 2));
5402 : else
5403 0 : t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5404 :
5405 0 : return t;
5406 :
5407 0 : case CONST_VECTOR:
5408 0 : {
5409 0 : unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5410 0 : unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5411 0 : tree itype = TREE_TYPE (type);
5412 :
5413 : /* Build a tree with vector elements. */
5414 0 : tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5415 0 : unsigned int count = elts.encoded_nelts ();
5416 0 : for (unsigned int i = 0; i < count; ++i)
5417 : {
5418 0 : rtx elt = CONST_VECTOR_ELT (x, i);
5419 0 : elts.quick_push (make_tree (itype, elt));
5420 : }
5421 :
5422 0 : return elts.build ();
5423 0 : }
5424 :
5425 0 : case PLUS:
5426 0 : return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5427 : make_tree (type, XEXP (x, 1)));
5428 :
5429 0 : case MINUS:
5430 0 : return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5431 : make_tree (type, XEXP (x, 1)));
5432 :
5433 0 : case NEG:
5434 0 : return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5435 :
5436 0 : case MULT:
5437 0 : return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5438 : make_tree (type, XEXP (x, 1)));
5439 :
5440 0 : case ASHIFT:
5441 0 : return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5442 : make_tree (type, XEXP (x, 1)));
5443 :
5444 0 : case LSHIFTRT:
5445 0 : t = unsigned_type_for (type);
5446 0 : return fold_convert (type, build2 (RSHIFT_EXPR, t,
5447 : make_tree (t, XEXP (x, 0)),
5448 : make_tree (type, XEXP (x, 1))));
5449 :
5450 0 : case ASHIFTRT:
5451 0 : t = signed_type_for (type);
5452 0 : return fold_convert (type, build2 (RSHIFT_EXPR, t,
5453 : make_tree (t, XEXP (x, 0)),
5454 : make_tree (type, XEXP (x, 1))));
5455 :
5456 0 : case DIV:
5457 0 : if (TREE_CODE (type) != REAL_TYPE)
5458 0 : t = signed_type_for (type);
5459 : else
5460 : t = type;
5461 :
5462 0 : return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5463 : make_tree (t, XEXP (x, 0)),
5464 : make_tree (t, XEXP (x, 1))));
5465 0 : case UDIV:
5466 0 : t = unsigned_type_for (type);
5467 0 : return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5468 : make_tree (t, XEXP (x, 0)),
5469 : make_tree (t, XEXP (x, 1))));
5470 :
5471 0 : case SIGN_EXTEND:
5472 0 : case ZERO_EXTEND:
5473 0 : t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5474 : GET_CODE (x) == ZERO_EXTEND);
5475 0 : return fold_convert (type, make_tree (t, XEXP (x, 0)));
5476 :
5477 0 : case CONST:
5478 0 : return make_tree (type, XEXP (x, 0));
5479 :
5480 0 : case SYMBOL_REF:
5481 0 : t = SYMBOL_REF_DECL (x);
5482 0 : if (t)
5483 0 : return fold_convert (type, build_fold_addr_expr (t));
5484 : /* fall through. */
5485 :
5486 647900 : default:
5487 647900 : if (CONST_POLY_INT_P (x))
5488 : return wide_int_to_tree (t, const_poly_int_value (x));
5489 :
5490 647900 : t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5491 :
5492 : /* If TYPE is a POINTER_TYPE, we might need to convert X from
5493 : address mode to pointer mode. */
5494 647900 : if (POINTER_TYPE_P (type))
5495 803788 : x = convert_memory_address_addr_space
5496 401894 : (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5497 :
5498 : /* Note that we do *not* use SET_DECL_RTL here, because we do not
5499 : want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5500 647900 : t->decl_with_rtl.rtl = x;
5501 :
5502 647900 : return t;
5503 : }
5504 : }
5505 :
5506 : /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5507 : and returning TARGET.
5508 :
5509 : If TARGET is 0, a pseudo-register or constant is returned. */
5510 :
5511 : rtx
5512 73812 : expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5513 : {
5514 73812 : rtx tem = 0;
5515 :
5516 73812 : if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5517 81 : tem = simplify_binary_operation (AND, mode, op0, op1);
5518 81 : if (tem == 0)
5519 73731 : tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5520 :
5521 73812 : if (target == 0)
5522 : target = tem;
5523 44384 : else if (tem != target)
5524 13 : emit_move_insn (target, tem);
5525 73812 : return target;
5526 : }
5527 :
5528 : /* Helper function for emit_store_flag. */
5529 : rtx
5530 705251 : emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5531 : machine_mode mode, machine_mode compare_mode,
5532 : int unsignedp, rtx x, rtx y, int normalizep,
5533 : machine_mode target_mode)
5534 : {
5535 705251 : class expand_operand ops[4];
5536 705251 : rtx op0, comparison, subtarget;
5537 705251 : rtx_insn *last;
5538 705251 : scalar_int_mode result_mode = targetm.cstore_mode (icode);
5539 705251 : scalar_int_mode int_target_mode;
5540 :
5541 705251 : last = get_last_insn ();
5542 705251 : x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5543 705251 : y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5544 705251 : if (!x || !y)
5545 : {
5546 310 : delete_insns_since (last);
5547 310 : return NULL_RTX;
5548 : }
5549 :
5550 704941 : if (target_mode == VOIDmode)
5551 : int_target_mode = result_mode;
5552 : else
5553 704925 : int_target_mode = as_a <scalar_int_mode> (target_mode);
5554 704941 : if (!target)
5555 66352 : target = gen_reg_rtx (int_target_mode);
5556 :
5557 704941 : comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5558 :
5559 704941 : create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5560 704941 : create_fixed_operand (&ops[1], comparison);
5561 704941 : create_fixed_operand (&ops[2], x);
5562 704941 : create_fixed_operand (&ops[3], y);
5563 704941 : if (!maybe_expand_insn (icode, 4, ops))
5564 : {
5565 148970 : delete_insns_since (last);
5566 148970 : return NULL_RTX;
5567 : }
5568 555971 : subtarget = ops[0].value;
5569 :
5570 : /* If we are converting to a wider mode, first convert to
5571 : INT_TARGET_MODE, then normalize. This produces better combining
5572 : opportunities on machines that have a SIGN_EXTRACT when we are
5573 : testing a single bit. This mostly benefits the 68k.
5574 :
5575 : If STORE_FLAG_VALUE does not have the sign bit set when
5576 : interpreted in MODE, we can do this conversion as unsigned, which
5577 : is usually more efficient. */
5578 555971 : if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5579 : {
5580 108861 : gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5581 : || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5582 :
5583 108861 : bool unsignedp = (STORE_FLAG_VALUE >= 0);
5584 108861 : convert_move (target, subtarget, unsignedp);
5585 :
5586 108861 : op0 = target;
5587 108861 : result_mode = int_target_mode;
5588 : }
5589 : else
5590 : op0 = subtarget;
5591 :
5592 : /* If we want to keep subexpressions around, don't reuse our last
5593 : target. */
5594 555971 : if (optimize)
5595 441697 : subtarget = 0;
5596 :
5597 : /* Now normalize to the proper value in MODE. Sometimes we don't
5598 : have to do anything. */
5599 555971 : if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5600 : ;
5601 : /* STORE_FLAG_VALUE might be the most negative number, so write
5602 : the comparison this way to avoid a compiler-time warning. */
5603 375 : else if (- normalizep == STORE_FLAG_VALUE)
5604 375 : op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5605 :
5606 : /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5607 : it hard to use a value of just the sign bit due to ANSI integer
5608 : constant typing rules. */
5609 0 : else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5610 0 : op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5611 0 : GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5612 : normalizep == 1);
5613 : else
5614 : {
5615 0 : gcc_assert (STORE_FLAG_VALUE & 1);
5616 :
5617 0 : op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5618 0 : if (normalizep == -1)
5619 : op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5620 : }
5621 :
5622 : /* If we were converting to a smaller mode, do the conversion now. */
5623 555971 : if (int_target_mode != result_mode)
5624 : {
5625 0 : convert_move (target, op0, 0);
5626 0 : return target;
5627 : }
5628 : else
5629 : return op0;
5630 : }
5631 :
5632 :
5633 : /* A subroutine of emit_store_flag only including "tricks" that do not
5634 : need a recursive call. These are kept separate to avoid infinite
5635 : loops. */
5636 :
5637 : static rtx
5638 671651 : emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5639 : machine_mode mode, int unsignedp, int normalizep,
5640 : machine_mode target_mode)
5641 : {
5642 671651 : rtx subtarget;
5643 671651 : enum insn_code icode;
5644 671651 : machine_mode compare_mode;
5645 671651 : enum mode_class mclass;
5646 :
5647 671651 : if (unsignedp)
5648 155614 : code = unsigned_condition (code);
5649 :
5650 : /* If one operand is constant, make it the second one. Only do this
5651 : if the other operand is not constant as well. */
5652 :
5653 671651 : if (swap_commutative_operands_p (op0, op1))
5654 : {
5655 4589 : std::swap (op0, op1);
5656 4589 : code = swap_condition (code);
5657 : }
5658 :
5659 671651 : if (mode == VOIDmode)
5660 37733 : mode = GET_MODE (op0);
5661 :
5662 671651 : if (CONST_SCALAR_INT_P (op1))
5663 295065 : canonicalize_comparison (mode, &code, &op1);
5664 :
5665 : /* For some comparisons with 1 and -1, we can convert this to
5666 : comparisons with zero. This will often produce more opportunities for
5667 : store-flag insns. */
5668 :
5669 671651 : switch (code)
5670 : {
5671 43676 : case LT:
5672 43676 : if (op1 == const1_rtx)
5673 51 : op1 = const0_rtx, code = LE;
5674 : break;
5675 25065 : case LE:
5676 25065 : if (op1 == constm1_rtx)
5677 0 : op1 = const0_rtx, code = LT;
5678 : break;
5679 37501 : case GE:
5680 37501 : if (op1 == const1_rtx)
5681 0 : op1 = const0_rtx, code = GT;
5682 : break;
5683 32240 : case GT:
5684 32240 : if (op1 == constm1_rtx)
5685 63 : op1 = const0_rtx, code = GE;
5686 : break;
5687 3816 : case GEU:
5688 3816 : if (op1 == const1_rtx)
5689 0 : op1 = const0_rtx, code = NE;
5690 : break;
5691 7279 : case LTU:
5692 7279 : if (op1 == const1_rtx)
5693 10 : op1 = const0_rtx, code = EQ;
5694 : break;
5695 : default:
5696 : break;
5697 : }
5698 :
5699 : /* If this is A < 0 or A >= 0, we can do this by taking the ones
5700 : complement of A (for GE) and shifting the sign bit to the low bit. */
5701 671651 : scalar_int_mode int_mode;
5702 190745 : if (op1 == const0_rtx && (code == LT || code == GE)
5703 671651 : && is_int_mode (mode, &int_mode)
5704 671651 : && (normalizep || STORE_FLAG_VALUE == 1
5705 : || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5706 : {
5707 39434 : scalar_int_mode int_target_mode;
5708 39434 : subtarget = target;
5709 :
5710 39434 : if (!target)
5711 : int_target_mode = int_mode;
5712 : else
5713 : {
5714 : /* If the result is to be wider than OP0, it is best to convert it
5715 : first. If it is to be narrower, it is *incorrect* to convert it
5716 : first. */
5717 39434 : int_target_mode = as_a <scalar_int_mode> (target_mode);
5718 118302 : if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5719 : {
5720 383 : op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5721 383 : int_mode = int_target_mode;
5722 : }
5723 : }
5724 :
5725 39434 : if (int_target_mode != int_mode)
5726 26652 : subtarget = 0;
5727 :
5728 39434 : if (code == GE)
5729 20358 : op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5730 : ((STORE_FLAG_VALUE == 1 || normalizep)
5731 : ? 0 : subtarget), 0);
5732 :
5733 39434 : if (STORE_FLAG_VALUE == 1 || normalizep)
5734 : /* If we are supposed to produce a 0/1 value, we want to do
5735 : a logical shift from the sign bit to the low-order bit; for
5736 : a -1/0 value, we do an arithmetic shift. */
5737 78868 : op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5738 39434 : GET_MODE_BITSIZE (int_mode) - 1,
5739 : subtarget, normalizep != -1);
5740 :
5741 39434 : if (int_mode != int_target_mode)
5742 26652 : op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5743 :
5744 39434 : return op0;
5745 : }
5746 :
5747 : /* Next try expanding this via the backend's cstore<mode>4. */
5748 632217 : mclass = GET_MODE_CLASS (mode);
5749 643276 : FOR_EACH_WIDER_MODE_FROM (compare_mode, mode)
5750 : {
5751 636135 : machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5752 636135 : icode = optab_handler (cstore_optab, optab_mode);
5753 636135 : if (icode != CODE_FOR_nothing)
5754 : {
5755 625076 : do_pending_stack_adjust ();
5756 625076 : rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5757 : unsignedp, op0, op1, normalizep, target_mode);
5758 625076 : if (tem)
5759 : return tem;
5760 :
5761 85057 : if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5762 : {
5763 80157 : enum rtx_code scode = swap_condition (code);
5764 :
5765 80157 : tem = emit_cstore (target, icode, scode, mode, compare_mode,
5766 : unsignedp, op1, op0, normalizep, target_mode);
5767 80157 : if (tem)
5768 : return tem;
5769 : }
5770 : break;
5771 : }
5772 : }
5773 :
5774 : /* If we are comparing a double-word integer with zero or -1, we can
5775 : convert the comparison into one involving a single word. */
5776 76264 : if (is_int_mode (mode, &int_mode)
5777 7662 : && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5778 4590 : && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5779 : {
5780 4590 : rtx tem;
5781 4590 : if ((code == EQ || code == NE)
5782 0 : && (op1 == const0_rtx || op1 == constm1_rtx))
5783 : {
5784 0 : rtx op00, op01;
5785 :
5786 : /* Do a logical OR or AND of the two words and compare the
5787 : result. */
5788 0 : op00 = force_subreg (word_mode, op0, int_mode, 0);
5789 0 : op01 = force_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5790 0 : tem = expand_binop (word_mode,
5791 0 : op1 == const0_rtx ? ior_optab : and_optab,
5792 : op00, op01, NULL_RTX, unsignedp,
5793 : OPTAB_DIRECT);
5794 :
5795 0 : if (tem != 0)
5796 0 : tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5797 : unsignedp, normalizep);
5798 : }
5799 4590 : else if ((code == LT || code == GE) && op1 == const0_rtx)
5800 : {
5801 0 : rtx op0h;
5802 :
5803 : /* If testing the sign bit, can just test on high word. */
5804 0 : op0h = force_highpart_subreg (word_mode, op0, int_mode);
5805 0 : tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5806 : unsignedp, normalizep);
5807 0 : }
5808 : else
5809 : tem = NULL_RTX;
5810 :
5811 0 : if (tem)
5812 : {
5813 0 : if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5814 : return tem;
5815 0 : if (!target)
5816 0 : target = gen_reg_rtx (target_mode);
5817 :
5818 0 : convert_move (target, tem,
5819 0 : !val_signbit_known_set_p (word_mode,
5820 : (normalizep ? normalizep
5821 : : STORE_FLAG_VALUE)));
5822 0 : return target;
5823 : }
5824 : }
5825 :
5826 : return 0;
5827 : }
5828 :
5829 : /* Subroutine of emit_store_flag that handles cases in which the operands
5830 : are scalar integers. SUBTARGET is the target to use for temporary
5831 : operations and TRUEVAL is the value to store when the condition is
5832 : true. All other arguments are as for emit_store_flag. */
5833 :
5834 : rtx
5835 2450 : emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5836 : rtx op1, scalar_int_mode mode, int unsignedp,
5837 : int normalizep, rtx trueval)
5838 : {
5839 2450 : machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5840 2450 : rtx_insn *last = get_last_insn ();
5841 :
5842 : /* If this is an equality comparison of integers, we can try to exclusive-or
5843 : (or subtract) the two operands and use a recursive call to try the
5844 : comparison with zero. Don't do any of these cases if branches are
5845 : very cheap. */
5846 :
5847 2450 : if ((code == EQ || code == NE) && op1 != const0_rtx)
5848 : {
5849 0 : rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5850 : OPTAB_WIDEN);
5851 :
5852 0 : if (tem == 0)
5853 0 : tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5854 : OPTAB_WIDEN);
5855 0 : if (tem != 0)
5856 0 : tem = emit_store_flag (target, code, tem, const0_rtx,
5857 : mode, unsignedp, normalizep);
5858 0 : if (tem != 0)
5859 : return tem;
5860 :
5861 0 : delete_insns_since (last);
5862 : }
5863 :
5864 : /* For integer comparisons, try the reverse comparison. However, for
5865 : small X and if we'd have anyway to extend, implementing "X != 0"
5866 : as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5867 2450 : rtx_code rcode = reverse_condition (code);
5868 2450 : if (can_compare_p (rcode, mode, ccp_store_flag)
5869 2450 : && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5870 0 : && code == NE
5871 0 : && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5872 0 : && op1 == const0_rtx))
5873 : {
5874 2450 : int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5875 : || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5876 :
5877 : /* Again, for the reverse comparison, use either an addition or a XOR. */
5878 2450 : if (want_add
5879 2450 : && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5880 0 : optimize_insn_for_speed_p ()) == 0)
5881 : {
5882 0 : rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5883 : STORE_FLAG_VALUE, target_mode);
5884 0 : if (tem != 0)
5885 0 : tem = expand_binop (target_mode, add_optab, tem,
5886 0 : gen_int_mode (normalizep, target_mode),
5887 : target, 0, OPTAB_WIDEN);
5888 0 : if (tem != 0)
5889 : return tem;
5890 : }
5891 2450 : else if (!want_add
5892 4900 : && rtx_cost (trueval, mode, XOR, 1,
5893 2450 : optimize_insn_for_speed_p ()) == 0)
5894 : {
5895 2450 : rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5896 : normalizep, target_mode);
5897 2450 : if (tem != 0)
5898 0 : tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5899 0 : INTVAL (trueval) >= 0, OPTAB_WIDEN);
5900 0 : if (tem != 0)
5901 : return tem;
5902 : }
5903 :
5904 2450 : delete_insns_since (last);
5905 : }
5906 :
5907 : /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5908 : the constant zero. Reject all other comparisons at this point. Only
5909 : do LE and GT if branches are expensive since they are expensive on
5910 : 2-operand machines. */
5911 :
5912 2450 : if (op1 != const0_rtx
5913 2450 : || (code != EQ && code != NE
5914 87 : && (BRANCH_COST (optimize_insn_for_speed_p (),
5915 87 : false) <= 1 || (code != LE && code != GT))))
5916 2208 : return 0;
5917 :
5918 : /* Try to put the result of the comparison in the sign bit. Assume we can't
5919 : do the necessary operation below. */
5920 :
5921 242 : rtx tem = 0;
5922 :
5923 : /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5924 : the sign bit set. */
5925 :
5926 242 : if (code == LE)
5927 : {
5928 : /* This is destructive, so SUBTARGET can't be OP0. */
5929 59 : if (rtx_equal_p (subtarget, op0))
5930 0 : subtarget = 0;
5931 :
5932 59 : tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5933 : OPTAB_WIDEN);
5934 59 : if (tem)
5935 59 : tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5936 : OPTAB_WIDEN);
5937 : }
5938 :
5939 : /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5940 : number of bits in the mode of OP0, minus one. */
5941 :
5942 242 : if (code == GT)
5943 : {
5944 28 : if (rtx_equal_p (subtarget, op0))
5945 0 : subtarget = 0;
5946 :
5947 28 : tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5948 28 : GET_MODE_BITSIZE (mode) - 1,
5949 : subtarget, 0);
5950 28 : if (tem)
5951 28 : tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5952 : OPTAB_WIDEN);
5953 : }
5954 :
5955 242 : if (code == EQ || code == NE)
5956 : {
5957 : /* For EQ or NE, one way to do the comparison is to apply an operation
5958 : that converts the operand into a positive number if it is nonzero
5959 : or zero if it was originally zero. Then, for EQ, we subtract 1 and
5960 : for NE we negate. This puts the result in the sign bit. Then we
5961 : normalize with a shift, if needed.
5962 :
5963 : Two operations that can do the above actions are ABS and FFS, so try
5964 : them. If that doesn't work, and MODE is smaller than a full word,
5965 : we can use zero-extension to the wider mode (an unsigned conversion)
5966 : as the operation. */
5967 :
5968 : /* Note that ABS doesn't yield a positive number for INT_MIN, but
5969 : that is compensated by the subsequent overflow when subtracting
5970 : one / negating. */
5971 :
5972 155 : if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5973 155 : tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5974 0 : else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5975 0 : tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5976 0 : else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5977 : {
5978 0 : tem = convert_modes (word_mode, mode, op0, 1);
5979 0 : mode = word_mode;
5980 : }
5981 :
5982 155 : if (tem != 0)
5983 : {
5984 155 : if (code == EQ)
5985 0 : tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5986 : 0, OPTAB_WIDEN);
5987 : else
5988 155 : tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5989 : }
5990 :
5991 : /* If we couldn't do it that way, for NE we can "or" the two's complement
5992 : of the value with itself. For EQ, we take the one's complement of
5993 : that "or", which is an extra insn, so we only handle EQ if branches
5994 : are expensive. */
5995 :
5996 155 : if (tem == 0
5997 155 : && (code == NE
5998 0 : || BRANCH_COST (optimize_insn_for_speed_p (),
5999 : false) > 1))
6000 : {
6001 0 : if (rtx_equal_p (subtarget, op0))
6002 0 : subtarget = 0;
6003 :
6004 0 : tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
6005 0 : tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
6006 : OPTAB_WIDEN);
6007 :
6008 0 : if (tem && code == EQ)
6009 0 : tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
6010 : }
6011 : }
6012 :
6013 242 : if (tem && normalizep)
6014 242 : tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
6015 242 : GET_MODE_BITSIZE (mode) - 1,
6016 : subtarget, normalizep == 1);
6017 :
6018 242 : if (tem)
6019 : {
6020 242 : if (!target)
6021 : ;
6022 242 : else if (GET_MODE (tem) != target_mode)
6023 : {
6024 87 : convert_move (target, tem, 0);
6025 87 : tem = target;
6026 : }
6027 155 : else if (!subtarget)
6028 : {
6029 86 : emit_move_insn (target, tem);
6030 86 : tem = target;
6031 : }
6032 : }
6033 : else
6034 0 : delete_insns_since (last);
6035 :
6036 : return tem;
6037 : }
6038 :
6039 : /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
6040 : and storing in TARGET. Normally return TARGET.
6041 : Return 0 if that cannot be done.
6042 :
6043 : MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
6044 : it is VOIDmode, they cannot both be CONST_INT.
6045 :
6046 : UNSIGNEDP is for the case where we have to widen the operands
6047 : to perform the operation. It says to use zero-extension.
6048 :
6049 : NORMALIZEP is 1 if we should convert the result to be either zero
6050 : or one. Normalize is -1 if we should convert the result to be
6051 : either zero or -1. If NORMALIZEP is zero, the result will be left
6052 : "raw" out of the scc insn. */
6053 :
6054 : rtx
6055 604766 : emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6056 : machine_mode mode, int unsignedp, int normalizep)
6057 : {
6058 604766 : machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6059 604766 : enum rtx_code rcode;
6060 604766 : rtx subtarget;
6061 604766 : rtx tem, trueval;
6062 604766 : rtx_insn *last;
6063 :
6064 : /* If we compare constants, we shouldn't use a store-flag operation,
6065 : but a constant load. We can get there via the vanilla route that
6066 : usually generates a compare-branch sequence, but will in this case
6067 : fold the comparison to a constant, and thus elide the branch. */
6068 604766 : if (CONSTANT_P (op0) && CONSTANT_P (op1))
6069 : return NULL_RTX;
6070 :
6071 604474 : tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6072 : target_mode);
6073 604474 : if (tem)
6074 : return tem;
6075 :
6076 : /* If we reached here, we can't do this with a scc insn, however there
6077 : are some comparisons that can be done in other ways. Don't do any
6078 : of these cases if branches are very cheap. */
6079 73310 : if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6080 : return 0;
6081 :
6082 : /* See what we need to return. We can only return a 1, -1, or the
6083 : sign bit. */
6084 :
6085 73310 : if (normalizep == 0)
6086 : {
6087 0 : if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6088 0 : normalizep = STORE_FLAG_VALUE;
6089 :
6090 : else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6091 : ;
6092 : else
6093 : return 0;
6094 : }
6095 :
6096 73310 : last = get_last_insn ();
6097 :
6098 : /* If optimizing, use different pseudo registers for each insn, instead
6099 : of reusing the same pseudo. This leads to better CSE, but slows
6100 : down the compiler, since there are more pseudos. */
6101 73241 : subtarget = (!optimize
6102 73310 : && (target_mode == mode)) ? target : NULL_RTX;
6103 73310 : trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6104 :
6105 : /* For floating-point comparisons, try the reverse comparison or try
6106 : changing the "orderedness" of the comparison. */
6107 73310 : if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6108 : {
6109 67369 : enum rtx_code first_code;
6110 67369 : bool and_them;
6111 :
6112 67369 : rcode = reverse_condition_maybe_unordered (code);
6113 67369 : if (can_compare_p (rcode, mode, ccp_store_flag)
6114 67369 : && (code == ORDERED || code == UNORDERED
6115 0 : || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6116 0 : || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6117 : {
6118 0 : int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6119 : || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6120 :
6121 : /* For the reverse comparison, use either an addition or a XOR. */
6122 0 : if (want_add
6123 0 : && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6124 0 : optimize_insn_for_speed_p ()) == 0)
6125 : {
6126 0 : tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6127 : STORE_FLAG_VALUE, target_mode);
6128 0 : if (tem)
6129 0 : return expand_binop (target_mode, add_optab, tem,
6130 0 : gen_int_mode (normalizep, target_mode),
6131 : target, 0, OPTAB_WIDEN);
6132 : }
6133 0 : else if (!want_add
6134 0 : && rtx_cost (trueval, mode, XOR, 1,
6135 0 : optimize_insn_for_speed_p ()) == 0)
6136 : {
6137 0 : tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6138 : normalizep, target_mode);
6139 0 : if (tem)
6140 0 : return expand_binop (target_mode, xor_optab, tem, trueval,
6141 0 : target, INTVAL (trueval) >= 0,
6142 0 : OPTAB_WIDEN);
6143 : }
6144 : }
6145 :
6146 67369 : delete_insns_since (last);
6147 :
6148 : /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6149 67369 : if (code == ORDERED || code == UNORDERED)
6150 : return 0;
6151 :
6152 67251 : and_them = split_comparison (code, mode, &first_code, &code);
6153 :
6154 : /* If there are no NaNs, the first comparison should always fall through.
6155 : Effectively change the comparison to the other one. */
6156 67251 : if (!HONOR_NANS (mode))
6157 : {
6158 282 : gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6159 182 : return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6160 182 : target_mode);
6161 : }
6162 :
6163 67069 : if (!HAVE_conditional_move)
6164 : return 0;
6165 :
6166 : /* Do not turn a trapping comparison into a non-trapping one. */
6167 67069 : if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6168 2524 : && flag_trapping_math)
6169 : return 0;
6170 :
6171 : /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6172 : conditional move. */
6173 64545 : tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6174 : normalizep, target_mode);
6175 64545 : if (tem == 0)
6176 : return 0;
6177 :
6178 64041 : if (and_them)
6179 2120 : tem = emit_conditional_move (target, { code, op0, op1, mode },
6180 2120 : tem, const0_rtx, GET_MODE (tem), 0);
6181 : else
6182 61921 : tem = emit_conditional_move (target, { code, op0, op1, mode },
6183 61921 : trueval, tem, GET_MODE (tem), 0);
6184 :
6185 64041 : if (tem == 0)
6186 476 : delete_insns_since (last);
6187 64041 : return tem;
6188 : }
6189 :
6190 : /* The remaining tricks only apply to integer comparisons. */
6191 :
6192 5941 : scalar_int_mode int_mode;
6193 5941 : if (is_int_mode (mode, &int_mode))
6194 2450 : return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6195 2450 : unsignedp, normalizep, trueval);
6196 :
6197 : return 0;
6198 : }
6199 :
6200 : /* Like emit_store_flag, but always succeeds. */
6201 :
6202 : rtx
6203 557595 : emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6204 : machine_mode mode, int unsignedp, int normalizep)
6205 : {
6206 557595 : rtx tem;
6207 557595 : rtx_code_label *label;
6208 557595 : rtx trueval, falseval;
6209 :
6210 : /* First see if emit_store_flag can do the job. */
6211 557595 : tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6212 557595 : if (tem != 0)
6213 : return tem;
6214 :
6215 : /* If one operand is constant, make it the second one. Only do this
6216 : if the other operand is not constant as well. */
6217 9613 : if (swap_commutative_operands_p (op0, op1))
6218 : {
6219 8 : std::swap (op0, op1);
6220 8 : code = swap_condition (code);
6221 : }
6222 :
6223 9613 : if (mode == VOIDmode)
6224 0 : mode = GET_MODE (op0);
6225 :
6226 9613 : if (!target)
6227 0 : target = gen_reg_rtx (word_mode);
6228 :
6229 : /* If this failed, we have to do this with set/compare/jump/set code.
6230 : For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6231 9613 : trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6232 9613 : if (code == NE
6233 1575 : && GET_MODE_CLASS (mode) == MODE_INT
6234 26 : && REG_P (target)
6235 26 : && op0 == target
6236 0 : && op1 == const0_rtx)
6237 : {
6238 0 : label = gen_label_rtx ();
6239 0 : do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6240 : NULL_RTX, NULL, label,
6241 : profile_probability::uninitialized ());
6242 0 : emit_move_insn (target, trueval);
6243 0 : emit_label (label);
6244 0 : return target;
6245 : }
6246 :
6247 9613 : if (!REG_P (target)
6248 9613 : || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6249 5 : target = gen_reg_rtx (GET_MODE (target));
6250 :
6251 : /* Jump in the right direction if the target cannot implement CODE
6252 : but can jump on its reverse condition. */
6253 9613 : falseval = const0_rtx;
6254 9613 : if (! can_compare_p (code, mode, ccp_jump)
6255 9613 : && (! FLOAT_MODE_P (mode)
6256 7100 : || code == ORDERED || code == UNORDERED
6257 6937 : || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6258 6937 : || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6259 : {
6260 2494 : enum rtx_code rcode;
6261 2494 : if (FLOAT_MODE_P (mode))
6262 2494 : rcode = reverse_condition_maybe_unordered (code);
6263 : else
6264 0 : rcode = reverse_condition (code);
6265 :
6266 : /* Canonicalize to UNORDERED for the libcall. */
6267 2494 : if (can_compare_p (rcode, mode, ccp_jump)
6268 2494 : || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6269 : {
6270 139 : falseval = trueval;
6271 139 : trueval = const0_rtx;
6272 139 : code = rcode;
6273 : }
6274 : }
6275 :
6276 9613 : emit_move_insn (target, trueval);
6277 9613 : label = gen_label_rtx ();
6278 9613 : do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6279 : label, profile_probability::uninitialized ());
6280 :
6281 9613 : emit_move_insn (target, falseval);
6282 9613 : emit_label (label);
6283 :
6284 9613 : return target;
6285 : }
6286 :
6287 : /* Expand a vector (left) rotate of MODE of X by an immediate AMT as a vector
6288 : permute operation. Emit code to put the result in DST if successfull and
6289 : return it. Otherwise return NULL. This is intended to implement vector
6290 : rotates by byte amounts using vector permutes when the target does not offer
6291 : native vector rotate operations. */
6292 : rtx
6293 0 : expand_rotate_as_vec_perm (machine_mode mode, rtx dst, rtx x, rtx amt)
6294 : {
6295 0 : rtx amt_unwrap = unwrap_const_vec_duplicate (amt);
6296 : /* For now handle only rotate by the same integer constant in all lanes.
6297 : In principle rotates by any constant vector are representable through
6298 : permutes as long as the individual rotate amounts are multiples of
6299 : BITS_PER_UNIT. */
6300 0 : if (!CONST_INT_P (amt_unwrap))
6301 : return NULL_RTX;
6302 :
6303 0 : int rotamnt = INTVAL (amt_unwrap);
6304 0 : if (rotamnt % BITS_PER_UNIT != 0)
6305 : return NULL_RTX;
6306 0 : machine_mode qimode;
6307 0 : if (!qimode_for_vec_perm (mode).exists (&qimode))
6308 0 : return NULL_RTX;
6309 :
6310 0 : vec_perm_builder builder;
6311 0 : unsigned nunits = GET_MODE_SIZE (GET_MODE_INNER (mode));
6312 0 : poly_uint64 total_units = GET_MODE_SIZE (mode);
6313 0 : builder.new_vector (total_units, nunits, 3);
6314 0 : unsigned rot_bytes = rotamnt / BITS_PER_UNIT;
6315 0 : unsigned rot_to_perm = BYTES_BIG_ENDIAN ? rot_bytes : nunits - rot_bytes;
6316 0 : for (unsigned j = 0; j < 3 * nunits; j += nunits)
6317 0 : for (unsigned i = 0; i < nunits; i++)
6318 0 : builder.quick_push ((rot_to_perm + i) % nunits + j);
6319 :
6320 0 : rtx perm_src = lowpart_subreg (qimode, x, mode);
6321 0 : rtx perm_dst = lowpart_subreg (qimode, dst, mode);
6322 0 : rtx res
6323 0 : = expand_vec_perm_const (qimode, perm_src, perm_src, builder,
6324 : qimode, perm_dst);
6325 0 : if (!res)
6326 : return NULL_RTX;
6327 0 : if (!rtx_equal_p (res, perm_dst))
6328 0 : emit_move_insn (dst, lowpart_subreg (mode, res, qimode));
6329 : return dst;
6330 0 : }
6331 :
6332 : /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6333 : and exclusive ranges in order to create an equivalent comparison. See
6334 : canonicalize_cmp_for_target for the possible cases. */
6335 :
6336 : static enum rtx_code
6337 47 : equivalent_cmp_code (enum rtx_code code)
6338 : {
6339 47 : switch (code)
6340 : {
6341 : case GT:
6342 : return GE;
6343 0 : case GE:
6344 0 : return GT;
6345 0 : case LT:
6346 0 : return LE;
6347 0 : case LE:
6348 0 : return LT;
6349 2 : case GTU:
6350 2 : return GEU;
6351 0 : case GEU:
6352 0 : return GTU;
6353 1 : case LTU:
6354 1 : return LEU;
6355 2 : case LEU:
6356 2 : return LTU;
6357 :
6358 0 : default:
6359 0 : return code;
6360 : }
6361 : }
6362 :
6363 : /* Choose the more appropiate immediate in scalar integer comparisons. The
6364 : purpose of this is to end up with an immediate which can be loaded into a
6365 : register in fewer moves, if possible.
6366 :
6367 : For each integer comparison there exists an equivalent choice:
6368 : i) a > b or a >= b + 1
6369 : ii) a <= b or a < b + 1
6370 : iii) a >= b or a > b - 1
6371 : iv) a < b or a <= b - 1
6372 :
6373 : MODE is the mode of the first operand.
6374 : CODE points to the comparison code.
6375 : IMM points to the rtx containing the immediate. *IMM must satisfy
6376 : CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6377 : on exit. */
6378 :
6379 : void
6380 4698774 : canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6381 : {
6382 4698774 : if (!SCALAR_INT_MODE_P (mode))
6383 3892137 : return;
6384 :
6385 4695030 : int to_add = 0;
6386 4695030 : enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6387 :
6388 : /* Extract the immediate value from the rtx. */
6389 4695030 : wide_int imm_val = rtx_mode_t (*imm, mode);
6390 :
6391 4695030 : if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6392 : to_add = 1;
6393 : else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6394 : to_add = -1;
6395 : else
6396 : return;
6397 :
6398 : /* Check for overflow/underflow in the case of signed values and
6399 : wrapping around in the case of unsigned values. If any occur
6400 : cancel the optimization. */
6401 806781 : wi::overflow_type overflow = wi::OVF_NONE;
6402 806781 : wide_int imm_modif;
6403 :
6404 806781 : if (to_add == 1)
6405 583468 : imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6406 : else
6407 223313 : imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6408 :
6409 806781 : if (overflow)
6410 144 : return;
6411 :
6412 806637 : rtx new_imm = immed_wide_int_const (imm_modif, mode);
6413 :
6414 806637 : int old_cost = rtx_cost (*imm, mode, COMPARE, 0, true);
6415 806637 : int new_cost = rtx_cost (new_imm, mode, COMPARE, 0, true);
6416 :
6417 806637 : if (dump_file && (dump_flags & TDF_DETAILS))
6418 : {
6419 7 : fprintf (dump_file, ";; cmp: %s, old cst: ",
6420 7 : GET_RTX_NAME (*code));
6421 7 : print_rtl (dump_file, *imm);
6422 7 : fprintf (dump_file, " new cst: ");
6423 7 : print_rtl (dump_file, new_imm);
6424 7 : fprintf (dump_file, "\n");
6425 7 : fprintf (dump_file, ";; old cst cost: %d, new cst cost: %d\n",
6426 : old_cost, new_cost);
6427 : }
6428 :
6429 : /* Update the immediate and the code. */
6430 806637 : if (old_cost > new_cost)
6431 : {
6432 47 : *code = equivalent_cmp_code (*code);
6433 47 : *imm = new_imm;
6434 : }
6435 4695174 : }
6436 :
6437 :
6438 :
6439 : /* Perform possibly multi-word comparison and conditional jump to LABEL
6440 : if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6441 : now a thin wrapper around do_compare_rtx_and_jump. */
6442 :
6443 : static void
6444 2403 : do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6445 : rtx_code_label *label)
6446 : {
6447 2403 : int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6448 2403 : do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6449 : NULL, label, profile_probability::uninitialized ());
6450 2403 : }
|