Line data Source code
1 : /* Medium-level subroutines: convert bit-field store and extract
2 : and shifts, multiplies and divides to rtl instructions.
3 : Copyright (C) 1987-2026 Free Software Foundation, Inc.
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : /* Work around tree-optimization/91825. */
22 : #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
23 :
24 : #include "config.h"
25 : #include "system.h"
26 : #include "coretypes.h"
27 : #include "backend.h"
28 : #include "target.h"
29 : #include "rtl.h"
30 : #include "tree.h"
31 : #include "predict.h"
32 : #include "memmodel.h"
33 : #include "tm_p.h"
34 : #include "optabs.h"
35 : #include "expmed.h"
36 : #include "regs.h"
37 : #include "emit-rtl.h"
38 : #include "diagnostic-core.h"
39 : #include "fold-const.h"
40 : #include "stor-layout.h"
41 : #include "dojump.h"
42 : #include "explow.h"
43 : #include "expr.h"
44 : #include "langhooks.h"
45 : #include "tree-vector-builder.h"
46 : #include "recog.h"
47 :
48 : struct target_expmed default_target_expmed;
49 : #if SWITCHABLE_TARGET
50 : struct target_expmed *this_target_expmed = &default_target_expmed;
51 : #endif
52 :
53 : static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
54 : unsigned HOST_WIDE_INT,
55 : unsigned HOST_WIDE_INT,
56 : poly_uint64, poly_uint64,
57 : machine_mode, rtx, bool, bool);
58 : static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
59 : unsigned HOST_WIDE_INT,
60 : unsigned HOST_WIDE_INT,
61 : poly_uint64, poly_uint64,
62 : rtx, scalar_int_mode, bool);
63 : static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
64 : unsigned HOST_WIDE_INT,
65 : unsigned HOST_WIDE_INT,
66 : rtx, scalar_int_mode, bool);
67 : static void store_split_bit_field (rtx, opt_scalar_int_mode,
68 : unsigned HOST_WIDE_INT,
69 : unsigned HOST_WIDE_INT,
70 : poly_uint64, poly_uint64,
71 : rtx, scalar_int_mode, bool);
72 : static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
73 : unsigned HOST_WIDE_INT,
74 : unsigned HOST_WIDE_INT, int, rtx,
75 : machine_mode, machine_mode, bool, bool);
76 : static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
77 : unsigned HOST_WIDE_INT,
78 : unsigned HOST_WIDE_INT, rtx, int, bool);
79 : static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
80 : unsigned HOST_WIDE_INT,
81 : unsigned HOST_WIDE_INT, rtx, int, bool);
82 : static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
83 : static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
84 : unsigned HOST_WIDE_INT,
85 : unsigned HOST_WIDE_INT, int, bool);
86 : static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
87 : static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
88 : static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
89 :
90 : /* Return a constant integer mask value of mode MODE with BITSIZE ones
91 : followed by BITPOS zeros, or the complement of that if COMPLEMENT.
92 : The mask is truncated if necessary to the width of mode MODE. The
93 : mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
94 :
95 : static inline rtx
96 230226 : mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
97 : {
98 230226 : return immed_wide_int_const
99 230226 : (wi::shifted_mask (bitpos, bitsize, complement,
100 230226 : GET_MODE_PRECISION (mode)), mode);
101 : }
102 :
103 : /* Test whether a value is zero of a power of two. */
104 : #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
105 : (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
106 :
107 : struct init_expmed_rtl
108 : {
109 : rtx reg;
110 : rtx plus;
111 : rtx neg;
112 : rtx mult;
113 : rtx sdiv;
114 : rtx udiv;
115 : rtx sdiv_32;
116 : rtx smod_32;
117 : rtx wide_mult;
118 : rtx wide_lshr;
119 : rtx wide_trunc;
120 : rtx shift;
121 : rtx shift_mult;
122 : rtx shift_add;
123 : rtx shift_sub0;
124 : rtx shift_sub1;
125 : rtx zext;
126 : rtx trunc;
127 :
128 : rtx pow2[MAX_BITS_PER_WORD];
129 : rtx cint[MAX_BITS_PER_WORD];
130 : };
131 :
132 : static void
133 30925300 : init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
134 : scalar_int_mode from_mode, bool speed)
135 : {
136 30925300 : int to_size, from_size;
137 30925300 : rtx which;
138 :
139 30925300 : to_size = GET_MODE_PRECISION (to_mode);
140 30925300 : from_size = GET_MODE_PRECISION (from_mode);
141 :
142 : /* Most partial integers have a precision less than the "full"
143 : integer it requires for storage. In case one doesn't, for
144 : comparison purposes here, reduce the bit size by one in that
145 : case. */
146 30925300 : if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
147 30925300 : && pow2p_hwi (to_size))
148 6185060 : to_size --;
149 30925300 : if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
150 30925300 : && pow2p_hwi (from_size))
151 0 : from_size --;
152 :
153 : /* Assume cost of zero-extend and sign-extend is the same. */
154 30925300 : which = (to_size < from_size ? all->trunc : all->zext);
155 :
156 30925300 : PUT_MODE (all->reg, from_mode);
157 30925300 : set_convert_cost (to_mode, from_mode, speed,
158 : set_src_cost (which, to_mode, speed));
159 : /* Restore all->reg's mode. */
160 30925300 : PUT_MODE (all->reg, to_mode);
161 30925300 : }
162 :
163 : static void
164 18113390 : init_expmed_one_mode (struct init_expmed_rtl *all,
165 : machine_mode mode, int speed)
166 : {
167 18113390 : int m, n, mode_bitsize;
168 18113390 : machine_mode mode_from;
169 :
170 18113390 : mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
171 :
172 18113390 : PUT_MODE (all->reg, mode);
173 18113390 : PUT_MODE (all->plus, mode);
174 18113390 : PUT_MODE (all->neg, mode);
175 18113390 : PUT_MODE (all->mult, mode);
176 18113390 : PUT_MODE (all->sdiv, mode);
177 18113390 : PUT_MODE (all->udiv, mode);
178 18113390 : PUT_MODE (all->sdiv_32, mode);
179 18113390 : PUT_MODE (all->smod_32, mode);
180 18113390 : PUT_MODE (all->wide_trunc, mode);
181 18113390 : PUT_MODE (all->shift, mode);
182 18113390 : PUT_MODE (all->shift_mult, mode);
183 18113390 : PUT_MODE (all->shift_add, mode);
184 18113390 : PUT_MODE (all->shift_sub0, mode);
185 18113390 : PUT_MODE (all->shift_sub1, mode);
186 18113390 : PUT_MODE (all->zext, mode);
187 18113390 : PUT_MODE (all->trunc, mode);
188 :
189 18113390 : set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
190 18113390 : set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
191 18113390 : set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
192 18113390 : set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
193 18113390 : set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
194 :
195 18113390 : set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
196 18113390 : <= 2 * add_cost (speed, mode)));
197 18113390 : set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
198 18113390 : <= 4 * add_cost (speed, mode)));
199 :
200 18113390 : set_shift_cost (speed, mode, 0, 0);
201 18113390 : {
202 18113390 : int cost = add_cost (speed, mode);
203 18113390 : set_shiftadd_cost (speed, mode, 0, cost);
204 18113390 : set_shiftsub0_cost (speed, mode, 0, cost);
205 18113390 : set_shiftsub1_cost (speed, mode, 0, cost);
206 : }
207 :
208 18113390 : n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
209 607903040 : for (m = 1; m < n; m++)
210 : {
211 589789650 : XEXP (all->shift, 1) = all->cint[m];
212 589789650 : XEXP (all->shift_mult, 1) = all->pow2[m];
213 :
214 589789650 : set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
215 589789650 : set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
216 : speed));
217 589789650 : set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
218 : speed));
219 589789650 : set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
220 : speed));
221 : }
222 :
223 18113390 : scalar_int_mode int_mode_to;
224 18113390 : if (is_a <scalar_int_mode> (mode, &int_mode_to))
225 : {
226 35343200 : for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
227 30925300 : mode_from = (machine_mode)(mode_from + 1))
228 30925300 : init_expmed_one_conv (all, int_mode_to,
229 : as_a <scalar_int_mode> (mode_from), speed);
230 :
231 4417900 : scalar_int_mode wider_mode;
232 4417900 : if (GET_MODE_CLASS (int_mode_to) == MODE_INT
233 4417900 : && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
234 : {
235 2650740 : PUT_MODE (all->reg, mode);
236 2650740 : PUT_MODE (all->zext, wider_mode);
237 2650740 : PUT_MODE (all->wide_mult, wider_mode);
238 2650740 : PUT_MODE (all->wide_lshr, wider_mode);
239 2650740 : XEXP (all->wide_lshr, 1)
240 2650740 : = gen_int_shift_amount (wider_mode, mode_bitsize);
241 :
242 2650740 : set_mul_widen_cost (speed, wider_mode,
243 : set_src_cost (all->wide_mult, wider_mode, speed));
244 2650740 : set_mul_highpart_cost (speed, int_mode_to,
245 : set_src_cost (all->wide_trunc,
246 : int_mode_to, speed));
247 : }
248 : }
249 18113390 : }
250 :
251 : void
252 220895 : init_expmed (void)
253 : {
254 220895 : struct init_expmed_rtl all;
255 220895 : machine_mode mode = QImode;
256 220895 : int m, speed;
257 :
258 220895 : memset (&all, 0, sizeof all);
259 14137280 : for (m = 1; m < MAX_BITS_PER_WORD; m++)
260 : {
261 13916385 : all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
262 13916385 : all.cint[m] = GEN_INT (m);
263 : }
264 :
265 : /* Avoid using hard regs in ways which may be unsupported. */
266 220895 : all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
267 220895 : all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
268 220895 : all.neg = gen_rtx_NEG (mode, all.reg);
269 220895 : all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
270 220895 : all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
271 220895 : all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
272 220895 : all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
273 220895 : all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
274 220895 : all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
275 220895 : all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
276 220895 : all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
277 220895 : all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
278 220895 : all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
279 220895 : all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
280 220895 : all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
281 220895 : all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
282 220895 : all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
283 220895 : all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
284 :
285 662685 : for (speed = 0; speed < 2; speed++)
286 : {
287 441790 : crtl->maybe_hot_insn_p = speed;
288 441790 : set_zero_cost (speed, set_src_cost (const0_rtx, QImode, speed));
289 :
290 3534320 : for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
291 3092530 : mode = (machine_mode)(mode + 1))
292 3092530 : init_expmed_one_mode (&all, mode, speed);
293 :
294 : if (MIN_MODE_PARTIAL_INT != VOIDmode)
295 1767160 : for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
296 1325370 : mode = (machine_mode)(mode + 1))
297 1325370 : init_expmed_one_mode (&all, mode, speed);
298 :
299 : if (MIN_MODE_VECTOR_INT != VOIDmode)
300 14137280 : for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
301 13695490 : mode = (machine_mode)(mode + 1))
302 13695490 : init_expmed_one_mode (&all, mode, speed);
303 : }
304 :
305 220895 : if (alg_hash_used_p ())
306 : {
307 1057 : struct alg_hash_entry *p = alg_hash_entry_ptr (0);
308 1057 : memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
309 : }
310 : else
311 219838 : set_alg_hash_used_p (true);
312 220895 : default_rtl_profile ();
313 :
314 220895 : ggc_free (all.trunc);
315 220895 : ggc_free (all.shift_sub1);
316 220895 : ggc_free (all.shift_sub0);
317 220895 : ggc_free (all.shift_add);
318 220895 : ggc_free (all.shift_mult);
319 220895 : ggc_free (all.shift);
320 220895 : ggc_free (all.wide_trunc);
321 220895 : ggc_free (all.wide_lshr);
322 220895 : ggc_free (all.wide_mult);
323 220895 : ggc_free (all.zext);
324 220895 : ggc_free (all.smod_32);
325 220895 : ggc_free (all.sdiv_32);
326 220895 : ggc_free (all.udiv);
327 220895 : ggc_free (all.sdiv);
328 220895 : ggc_free (all.mult);
329 220895 : ggc_free (all.neg);
330 220895 : ggc_free (all.plus);
331 220895 : ggc_free (all.reg);
332 220895 : }
333 :
334 : /* Return an rtx representing minus the value of X.
335 : MODE is the intended mode of the result,
336 : useful if X is a CONST_INT. */
337 :
338 : rtx
339 1037597 : negate_rtx (machine_mode mode, rtx x)
340 : {
341 1037597 : rtx result = simplify_unary_operation (NEG, mode, x, mode);
342 :
343 1037597 : if (result == 0)
344 2122 : result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
345 :
346 1037597 : return result;
347 : }
348 :
349 : /* Whether reverse storage order is supported on the target. */
350 : static int reverse_storage_order_supported = -1;
351 :
352 : /* Check whether reverse storage order is supported on the target. */
353 :
354 : static void
355 286 : check_reverse_storage_order_support (void)
356 : {
357 286 : if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
358 : {
359 : reverse_storage_order_supported = 0;
360 : sorry ("reverse scalar storage order");
361 : }
362 : else
363 286 : reverse_storage_order_supported = 1;
364 286 : }
365 :
366 : /* Whether reverse FP storage order is supported on the target. */
367 : static int reverse_float_storage_order_supported = -1;
368 :
369 : /* Check whether reverse FP storage order is supported on the target. */
370 :
371 : static void
372 55 : check_reverse_float_storage_order_support (void)
373 : {
374 55 : if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
375 : {
376 : reverse_float_storage_order_supported = 0;
377 : sorry ("reverse floating-point scalar storage order");
378 : }
379 : else
380 55 : reverse_float_storage_order_supported = 1;
381 55 : }
382 :
383 : /* Return an rtx representing value of X with reverse storage order.
384 : MODE is the intended mode of the result,
385 : useful if X is a CONST_INT. */
386 :
387 : rtx
388 3266 : flip_storage_order (machine_mode mode, rtx x)
389 : {
390 3266 : scalar_int_mode int_mode;
391 3266 : rtx result;
392 :
393 3266 : if (mode == QImode)
394 : return x;
395 :
396 2415 : if (COMPLEX_MODE_P (mode))
397 : {
398 44 : rtx real = read_complex_part (x, false);
399 44 : rtx imag = read_complex_part (x, true);
400 :
401 88 : real = flip_storage_order (GET_MODE_INNER (mode), real);
402 88 : imag = flip_storage_order (GET_MODE_INNER (mode), imag);
403 :
404 44 : return gen_rtx_CONCAT (mode, real, imag);
405 : }
406 :
407 2371 : if (UNLIKELY (reverse_storage_order_supported < 0))
408 286 : check_reverse_storage_order_support ();
409 :
410 2371 : if (!is_a <scalar_int_mode> (mode, &int_mode))
411 : {
412 243 : if (FLOAT_MODE_P (mode)
413 243 : && UNLIKELY (reverse_float_storage_order_supported < 0))
414 55 : check_reverse_float_storage_order_support ();
415 :
416 243 : if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
417 243 : || !targetm.scalar_mode_supported_p (int_mode))
418 : {
419 0 : sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
420 0 : return x;
421 : }
422 243 : x = gen_lowpart (int_mode, x);
423 : }
424 :
425 2371 : result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
426 2371 : if (result == 0)
427 1051 : result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
428 :
429 2371 : if (int_mode != mode)
430 243 : result = gen_lowpart (mode, result);
431 :
432 : return result;
433 : }
434 :
435 : /* If MODE is set, adjust bitfield memory MEM so that it points to the
436 : first unit of mode MODE that contains a bitfield of size BITSIZE at
437 : bit position BITNUM. If MODE is not set, return a BLKmode reference
438 : to every byte in the bitfield. Set *NEW_BITNUM to the bit position
439 : of the field within the new memory. */
440 :
441 : static rtx
442 423300 : narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
443 : unsigned HOST_WIDE_INT bitsize,
444 : unsigned HOST_WIDE_INT bitnum,
445 : unsigned HOST_WIDE_INT *new_bitnum)
446 : {
447 423300 : scalar_int_mode imode;
448 423300 : if (mode.exists (&imode))
449 : {
450 423300 : unsigned int unit = GET_MODE_BITSIZE (imode);
451 423300 : *new_bitnum = bitnum % unit;
452 423300 : HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
453 423300 : return adjust_bitfield_address (mem, imode, offset);
454 : }
455 : else
456 : {
457 0 : *new_bitnum = bitnum % BITS_PER_UNIT;
458 0 : HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
459 0 : HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
460 0 : / BITS_PER_UNIT);
461 0 : return adjust_bitfield_address_size (mem, BLKmode, offset, size);
462 : }
463 : }
464 :
465 : /* The caller wants to perform insertion or extraction PATTERN on a
466 : bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
467 : BITREGION_START and BITREGION_END are as for store_bit_field
468 : and FIELDMODE is the natural mode of the field.
469 :
470 : Search for a mode that is compatible with the memory access
471 : restrictions and (where applicable) with a register insertion or
472 : extraction. Return the new memory on success, storing the adjusted
473 : bit position in *NEW_BITNUM. Return null otherwise. */
474 :
475 : static rtx
476 204501 : adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
477 : rtx op0, HOST_WIDE_INT bitsize,
478 : HOST_WIDE_INT bitnum,
479 : poly_uint64 bitregion_start,
480 : poly_uint64 bitregion_end,
481 : machine_mode fieldmode,
482 : unsigned HOST_WIDE_INT *new_bitnum)
483 : {
484 409002 : bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
485 204501 : bitregion_end, MEM_ALIGN (op0),
486 204505 : MEM_VOLATILE_P (op0));
487 204501 : scalar_int_mode best_mode;
488 204501 : if (iter.next_mode (&best_mode))
489 : {
490 : /* We can use a memory in BEST_MODE. See whether this is true for
491 : any wider modes. All other things being equal, we prefer to
492 : use the widest mode possible because it tends to expose more
493 : CSE opportunities. */
494 197997 : if (!iter.prefer_smaller_modes ())
495 : {
496 : /* Limit the search to the mode required by the corresponding
497 : register insertion or extraction instruction, if any. */
498 317 : scalar_int_mode limit_mode = word_mode;
499 317 : extraction_insn insn;
500 634 : if (get_best_reg_extraction_insn (&insn, pattern,
501 317 : GET_MODE_BITSIZE (best_mode),
502 : fieldmode))
503 317 : limit_mode = insn.field_mode;
504 :
505 317 : scalar_int_mode wider_mode;
506 317 : while (iter.next_mode (&wider_mode)
507 1052 : && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
508 111 : best_mode = wider_mode;
509 : }
510 197997 : return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
511 : new_bitnum);
512 : }
513 : return NULL_RTX;
514 : }
515 :
516 : /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
517 : a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
518 : offset is then BITNUM / BITS_PER_UNIT. */
519 :
520 : static bool
521 790313 : lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
522 : machine_mode struct_mode)
523 : {
524 790313 : poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
525 790313 : if (BYTES_BIG_ENDIAN)
526 : return (multiple_p (bitnum, BITS_PER_UNIT)
527 : && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
528 : || multiple_p (bitnum + bitsize,
529 : regsize * BITS_PER_UNIT)));
530 : else
531 790313 : return multiple_p (bitnum, regsize * BITS_PER_UNIT);
532 : }
533 :
534 : /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
535 : containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
536 : Return false if the access would touch memory outside the range
537 : BITREGION_START to BITREGION_END for conformance to the C++ memory
538 : model. */
539 :
540 : static bool
541 1571131 : strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
542 : unsigned HOST_WIDE_INT bitnum,
543 : scalar_int_mode fieldmode,
544 : poly_uint64 bitregion_start,
545 : poly_uint64 bitregion_end)
546 : {
547 1571131 : unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
548 :
549 : /* -fstrict-volatile-bitfields must be enabled and we must have a
550 : volatile MEM. */
551 1571131 : if (!MEM_P (op0)
552 162806 : || !MEM_VOLATILE_P (op0)
553 1571343 : || flag_strict_volatile_bitfields <= 0)
554 : return false;
555 :
556 : /* The bit size must not be larger than the field mode, and
557 : the field mode must not be larger than a word. */
558 14 : if (bitsize > modesize || modesize > BITS_PER_WORD)
559 : return false;
560 :
561 : /* Check for cases of unaligned fields that must be split. */
562 14 : if (bitnum % modesize + bitsize > modesize)
563 : return false;
564 :
565 : /* The memory must be sufficiently aligned for a MODESIZE access.
566 : This condition guarantees, that the memory access will not
567 : touch anything after the end of the structure. */
568 11 : if (MEM_ALIGN (op0) < modesize)
569 : return false;
570 :
571 : /* Check for cases where the C++ memory model applies. */
572 11 : if (maybe_ne (bitregion_end, 0U)
573 11 : && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
574 4 : || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
575 : bitregion_end)))
576 0 : return false;
577 :
578 : return true;
579 : }
580 :
581 : /* Return true if OP is a memory and if a bitfield of size BITSIZE at
582 : bit number BITNUM can be treated as a simple value of mode MODE.
583 : Store the byte offset in *BYTENUM if so. */
584 :
585 : static bool
586 540038 : simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
587 : machine_mode mode, poly_uint64 *bytenum)
588 : {
589 540038 : return (MEM_P (op0)
590 259081 : && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
591 207076 : && known_eq (bitsize, GET_MODE_BITSIZE (mode))
592 591758 : && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
593 0 : || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
594 0 : && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
595 : }
596 :
597 : /* Try to use instruction INSV to store VALUE into a field of OP0.
598 : If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
599 : BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
600 : are as for store_bit_field. */
601 :
602 : static bool
603 111355 : store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
604 : opt_scalar_int_mode op0_mode,
605 : unsigned HOST_WIDE_INT bitsize,
606 : unsigned HOST_WIDE_INT bitnum,
607 : rtx value, scalar_int_mode value_mode)
608 : {
609 111355 : class expand_operand ops[4];
610 111355 : rtx value1;
611 111355 : rtx xop0 = op0;
612 111355 : rtx_insn *last = get_last_insn ();
613 111355 : bool copy_back = false;
614 :
615 111355 : scalar_int_mode op_mode = insv->field_mode;
616 111355 : unsigned int unit = GET_MODE_BITSIZE (op_mode);
617 111355 : if (bitsize == 0 || bitsize > unit)
618 : return false;
619 :
620 111346 : if (MEM_P (xop0))
621 : /* Get a reference to the first byte of the field. */
622 0 : xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
623 : &bitnum);
624 : else
625 : {
626 : /* Convert from counting within OP0 to counting in OP_MODE. */
627 111346 : if (BYTES_BIG_ENDIAN)
628 : bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
629 :
630 : /* If xop0 is a register, we need it in OP_MODE
631 : to make it acceptable to the format of insv. */
632 111346 : if (GET_CODE (xop0) == SUBREG)
633 : {
634 : /* If such a SUBREG can't be created, give up. */
635 36021 : if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
636 36021 : SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
637 : return false;
638 : /* We can't just change the mode, because this might clobber op0,
639 : and we will need the original value of op0 if insv fails. */
640 36021 : xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
641 36021 : SUBREG_BYTE (xop0));
642 : }
643 111346 : if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
644 26880 : xop0 = gen_lowpart_SUBREG (op_mode, xop0);
645 : }
646 :
647 : /* If the destination is a paradoxical subreg such that we need a
648 : truncate to the inner mode, perform the insertion on a temporary and
649 : truncate the result to the original destination. Note that we can't
650 : just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
651 : X) 0)) is (reg:N X). */
652 111346 : if (GET_CODE (xop0) == SUBREG
653 62901 : && REG_P (SUBREG_REG (xop0))
654 174247 : && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
655 : op_mode))
656 : {
657 0 : rtx tem = gen_reg_rtx (op_mode);
658 0 : emit_move_insn (tem, xop0);
659 0 : xop0 = tem;
660 0 : copy_back = true;
661 : }
662 :
663 : /* There are similar overflow check at the start of store_bit_field_1,
664 : but that only check the situation where the field lies completely
665 : outside the register, while there do have situation where the field
666 : lies partially in the register, we need to adjust bitsize for this
667 : partial overflow situation. Without this fix, pr48335-2.c on big-endian
668 : will broken on those arch support bit insert instruction, like arm, aarch64
669 : etc. */
670 111346 : if (bitsize + bitnum > unit && bitnum < unit)
671 : {
672 2 : warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
673 : "destination object, data truncated into %wu-bit",
674 : bitsize, unit - bitnum);
675 2 : bitsize = unit - bitnum;
676 : }
677 :
678 : /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
679 : "backwards" from the size of the unit we are inserting into.
680 : Otherwise, we count bits from the most significant on a
681 : BYTES/BITS_BIG_ENDIAN machine. */
682 :
683 111346 : if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
684 : bitnum = unit - bitsize - bitnum;
685 :
686 : /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
687 111346 : value1 = value;
688 111346 : if (value_mode != op_mode)
689 : {
690 144960 : if (GET_MODE_BITSIZE (value_mode) >= bitsize)
691 : {
692 72480 : rtx tmp;
693 : /* Optimization: Don't bother really extending VALUE
694 : if it has all the bits we will actually use. However,
695 : if we must narrow it, be sure we do it correctly. */
696 :
697 217440 : if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
698 : {
699 33718 : tmp = simplify_subreg (op_mode, value1, value_mode, 0);
700 33718 : if (! tmp)
701 33165 : tmp = simplify_gen_subreg (op_mode,
702 : force_reg (value_mode, value1),
703 : value_mode, 0);
704 : }
705 : else
706 : {
707 38762 : if (targetm.mode_rep_extended (op_mode, value_mode) != UNKNOWN)
708 0 : tmp = simplify_gen_unary (TRUNCATE, op_mode,
709 : value1, value_mode);
710 : else
711 : {
712 38762 : tmp = gen_lowpart_if_possible (op_mode, value1);
713 38762 : if (! tmp)
714 0 : tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
715 : }
716 : }
717 : value1 = tmp;
718 : }
719 0 : else if (CONST_INT_P (value))
720 0 : value1 = gen_int_mode (INTVAL (value), op_mode);
721 : else
722 : /* Parse phase is supposed to make VALUE's data type
723 : match that of the component reference, which is a type
724 : at least as wide as the field; so VALUE should have
725 : a mode that corresponds to that type. */
726 0 : gcc_assert (CONSTANT_P (value));
727 : }
728 :
729 111346 : create_fixed_operand (&ops[0], xop0);
730 111346 : create_integer_operand (&ops[1], bitsize);
731 111346 : create_integer_operand (&ops[2], bitnum);
732 111346 : create_input_operand (&ops[3], value1, op_mode);
733 111346 : if (maybe_expand_insn (insv->icode, 4, ops))
734 : {
735 2045 : if (copy_back)
736 0 : convert_move (op0, xop0, true);
737 2045 : return true;
738 : }
739 109301 : delete_insns_since (last);
740 109301 : return false;
741 : }
742 :
743 : /* A subroutine of store_bit_field, with the same arguments. Return true
744 : if the operation could be implemented.
745 :
746 : If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
747 : no other way of implementing the operation. If FALLBACK_P is false,
748 : return false instead.
749 :
750 : if UNDEFINED_P is true then STR_RTX is undefined and may be set using
751 : a subreg instead. */
752 :
753 : static bool
754 885611 : store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
755 : poly_uint64 bitregion_start, poly_uint64 bitregion_end,
756 : machine_mode fieldmode,
757 : rtx value, bool reverse, bool fallback_p, bool undefined_p)
758 : {
759 885611 : rtx op0 = str_rtx;
760 :
761 885617 : while (GET_CODE (op0) == SUBREG)
762 : {
763 6 : bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
764 6 : op0 = SUBREG_REG (op0);
765 : }
766 :
767 : /* No action is needed if the target is a register and if the field
768 : lies completely outside that register. This can occur if the source
769 : code contains an out-of-bounds access to a small array. */
770 1696110 : if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
771 : return true;
772 :
773 : /* Use vec_set patterns for inserting parts of vectors whenever
774 : available. */
775 885608 : machine_mode outermode = GET_MODE (op0);
776 885608 : scalar_mode innermode = GET_MODE_INNER (outermode);
777 885608 : poly_uint64 pos;
778 883833 : if (VECTOR_MODE_P (outermode)
779 2122 : && !MEM_P (op0)
780 2116 : && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
781 1038 : && fieldmode == innermode
782 888 : && known_eq (bitsize, GET_MODE_PRECISION (innermode))
783 886496 : && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
784 : {
785 888 : class expand_operand ops[3];
786 888 : enum insn_code icode = optab_handler (vec_set_optab, outermode);
787 :
788 888 : create_fixed_operand (&ops[0], op0);
789 888 : create_input_operand (&ops[1], value, innermode);
790 888 : create_integer_operand (&ops[2], pos);
791 888 : if (maybe_expand_insn (icode, 3, ops))
792 888 : return true;
793 : }
794 :
795 : /* If the target is a register, overwriting the entire object, or storing
796 : a full-word or multi-word field can be done with just a SUBREG. */
797 884720 : if (!MEM_P (op0)
798 1694328 : && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
799 : {
800 : /* Use the subreg machinery either to narrow OP0 to the required
801 : words or to cope with mode punning between equal-sized modes.
802 : In the latter case, use subreg on the rhs side, not lhs. */
803 742703 : rtx sub;
804 742703 : poly_uint64 bytenum;
805 742703 : poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
806 742703 : if (known_eq (bitnum, 0U)
807 1114859 : && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
808 : {
809 54075 : sub = force_subreg (GET_MODE (op0), value, fieldmode, 0);
810 54075 : if (sub)
811 : {
812 54075 : if (reverse)
813 1 : sub = flip_storage_order (GET_MODE (op0), sub);
814 54075 : emit_move_insn (op0, sub);
815 54075 : return true;
816 : }
817 : }
818 881608 : else if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
819 688621 : && (undefined_p
820 684975 : || (multiple_p (bitnum, regsize * BITS_PER_UNIT)
821 672360 : && multiple_p (bitsize, regsize * BITS_PER_UNIT)))
822 1275344 : && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
823 : {
824 637660 : sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), bytenum);
825 637660 : if (sub)
826 : {
827 637658 : if (reverse)
828 0 : value = flip_storage_order (fieldmode, value);
829 637658 : emit_move_insn (sub, value);
830 637658 : return true;
831 : }
832 : }
833 : }
834 :
835 : /* If the target is memory, storing any naturally aligned field can be
836 : done with a simple store. For targets that support fast unaligned
837 : memory, any naturally sized, unit aligned field can be done directly. */
838 192987 : poly_uint64 bytenum;
839 192987 : if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
840 : {
841 7034 : op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
842 7034 : if (reverse)
843 0 : value = flip_storage_order (fieldmode, value);
844 7034 : emit_move_insn (op0, value);
845 7034 : return true;
846 : }
847 :
848 : /* It's possible we'll need to handle other cases here for
849 : polynomial bitnum and bitsize. */
850 :
851 : /* From here on we need to be looking at a fixed-size insertion. */
852 185953 : unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
853 185953 : unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
854 :
855 : /* Make sure we are playing with integral modes. Pun with subregs
856 : if we aren't. This must come after the entire register case above,
857 : since that case is valid for any mode. The following cases are only
858 : valid for integral modes. */
859 185953 : opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
860 185953 : scalar_int_mode imode;
861 185953 : if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
862 : {
863 19756 : if (MEM_P (op0))
864 15204 : op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
865 : 0, MEM_SIZE (op0));
866 4552 : else if (!op0_mode.exists ())
867 : {
868 0 : if (ibitnum == 0
869 0 : && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
870 0 : && MEM_P (value)
871 0 : && !reverse)
872 : {
873 0 : value = adjust_address (value, GET_MODE (op0), 0);
874 0 : emit_move_insn (op0, value);
875 0 : return true;
876 : }
877 0 : if (!fallback_p)
878 : return false;
879 0 : rtx temp = assign_stack_temp (GET_MODE (op0),
880 0 : GET_MODE_SIZE (GET_MODE (op0)));
881 0 : emit_move_insn (temp, op0);
882 0 : store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
883 : reverse, fallback_p, undefined_p);
884 0 : emit_move_insn (op0, temp);
885 0 : return true;
886 : }
887 : else
888 4552 : op0 = gen_lowpart (op0_mode.require (), op0);
889 : }
890 :
891 185953 : return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
892 : bitregion_start, bitregion_end,
893 185953 : fieldmode, value, reverse, fallback_p);
894 : }
895 :
896 : /* Subroutine of store_bit_field_1, with the same arguments, except
897 : that BITSIZE and BITNUM are constant. Handle cases specific to
898 : integral modes. If OP0_MODE is defined, it is the mode of OP0,
899 : otherwise OP0 is a BLKmode MEM. */
900 :
901 : static bool
902 185953 : store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
903 : unsigned HOST_WIDE_INT bitsize,
904 : unsigned HOST_WIDE_INT bitnum,
905 : poly_uint64 bitregion_start,
906 : poly_uint64 bitregion_end,
907 : machine_mode fieldmode,
908 : rtx value, bool reverse, bool fallback_p)
909 : {
910 : /* Storing an lsb-aligned field in a register
911 : can be done with a movstrict instruction. */
912 :
913 185953 : if (!MEM_P (op0)
914 117875 : && !reverse
915 383024 : && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
916 84913 : && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
917 224301 : && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
918 : {
919 5690 : class expand_operand ops[2];
920 5690 : enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
921 5690 : rtx arg0 = op0;
922 5690 : unsigned HOST_WIDE_INT subreg_off;
923 :
924 5690 : if (GET_CODE (arg0) == SUBREG)
925 : {
926 : /* Else we've got some float mode source being extracted into
927 : a different float mode destination -- this combination of
928 : subregs results in Severe Tire Damage. */
929 418 : gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
930 : || GET_MODE_CLASS (fieldmode) == MODE_INT
931 : || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
932 : arg0 = SUBREG_REG (arg0);
933 : }
934 :
935 5690 : subreg_off = bitnum / BITS_PER_UNIT;
936 5715 : if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
937 : /* STRICT_LOW_PART must have a non-paradoxical subreg as
938 : operand. */
939 5690 : && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
940 : {
941 5665 : arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
942 :
943 5665 : create_fixed_operand (&ops[0], arg0);
944 : /* Shrink the source operand to FIELDMODE. */
945 5665 : create_convert_operand_to (&ops[1], value, fieldmode, false);
946 5665 : if (maybe_expand_insn (icode, 2, ops))
947 5664 : return true;
948 : }
949 : }
950 :
951 : /* Handle fields bigger than a word. */
952 :
953 181834 : if (bitsize > BITS_PER_WORD)
954 : {
955 : /* Here we transfer the words of the field
956 : in the order least significant first.
957 : This is because the most significant word is the one which may
958 : be less than full.
959 : However, only do that if the value is not BLKmode. */
960 :
961 911 : const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
962 911 : const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
963 911 : rtx_insn *last;
964 :
965 : /* This is the mode we must force value to, so that there will be enough
966 : subwords to extract. Note that fieldmode will often (always?) be
967 : VOIDmode, because that is what store_field uses to indicate that this
968 : is a bit field, but passing VOIDmode to operand_subword_force
969 : is not allowed.
970 :
971 : The mode must be fixed-size, since insertions into variable-sized
972 : objects are meant to be handled before calling this function. */
973 911 : fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
974 911 : if (value_mode == VOIDmode)
975 24 : value_mode
976 24 : = smallest_int_mode_for_size (nwords * BITS_PER_WORD).require ();
977 :
978 911 : last = get_last_insn ();
979 2728 : for (int i = 0; i < nwords; i++)
980 : {
981 : /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
982 : except maybe for the last iteration. */
983 3644 : const unsigned HOST_WIDE_INT new_bitsize
984 1858 : = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
985 : /* Bit offset from the starting bit number in the target. */
986 3656 : const unsigned int bit_offset
987 : = backwards ^ reverse
988 1822 : ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
989 : : i * BITS_PER_WORD;
990 :
991 : /* No further action is needed if the target is a register and if
992 : this field lies completely outside that register. */
993 2094 : if (REG_P (op0) && known_ge (bitnum + bit_offset,
994 : GET_MODE_BITSIZE (GET_MODE (op0))))
995 : {
996 5 : if (backwards ^ reverse)
997 0 : continue;
998 : /* For forward operation we are finished. */
999 185953 : return true;
1000 : }
1001 :
1002 : /* Starting word number in the value. */
1003 1817 : const unsigned int wordnum
1004 : = backwards
1005 1817 : ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
1006 : : i;
1007 : /* The chunk of the value in word_mode. We use bit-field extraction
1008 : in BLKmode to handle unaligned memory references and to shift the
1009 : last chunk right on big-endian machines if need be. */
1010 1817 : rtx value_word
1011 : = fieldmode == BLKmode
1012 1853 : ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
1013 : 1, NULL_RTX, word_mode, word_mode, false,
1014 : NULL)
1015 1568 : : operand_subword_force (value, wordnum, value_mode);
1016 :
1017 1817 : if (!store_bit_field_1 (op0, new_bitsize,
1018 1817 : bitnum + bit_offset,
1019 : bitregion_start, bitregion_end,
1020 : word_mode,
1021 : value_word, reverse, fallback_p, false))
1022 : {
1023 0 : delete_insns_since (last);
1024 0 : return false;
1025 : }
1026 : }
1027 : return true;
1028 : }
1029 :
1030 : /* If VALUE has a floating-point or complex mode, access it as an
1031 : integer of the corresponding size. This can occur on a machine
1032 : with 64 bit registers that uses SFmode for float. It can also
1033 : occur for unaligned float or complex fields. */
1034 179378 : rtx orig_value = value;
1035 179378 : scalar_int_mode value_mode;
1036 179378 : if (GET_MODE (value) == VOIDmode)
1037 : /* By this point we've dealt with values that are bigger than a word,
1038 : so word_mode is a conservatively correct choice. */
1039 106953 : value_mode = word_mode;
1040 72425 : else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1041 : {
1042 1092 : value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1043 1092 : value = gen_reg_rtx (value_mode);
1044 1092 : emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1045 : }
1046 :
1047 : /* If OP0 is a multi-word register, narrow it to the affected word.
1048 : If the region spans two words, defer to store_split_bit_field.
1049 : Don't do this if op0 is a single hard register wider than word
1050 : such as a float or vector register. */
1051 179378 : if (!MEM_P (op0)
1052 225032 : && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1053 215535 : && (!REG_P (op0)
1054 36136 : || !HARD_REGISTER_P (op0)
1055 143223 : || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1056 : {
1057 36230 : if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1058 : {
1059 698 : if (!fallback_p)
1060 : return false;
1061 :
1062 71 : store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1063 : bitregion_start, bitregion_end,
1064 : value, value_mode, reverse);
1065 71 : return true;
1066 : }
1067 35457 : rtx new_op0
1068 35457 : = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1069 35532 : bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1070 35457 : if (!new_op0)
1071 : {
1072 : /* No valid word-mode SUBREG of op0 at this offset. Defer to
1073 : store_split_bit_field, which addresses op0 a word at a time. */
1074 0 : if (!fallback_p)
1075 : return false;
1076 0 : store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1077 : bitregion_start, bitregion_end,
1078 : value, value_mode, reverse);
1079 0 : return true;
1080 : }
1081 35457 : op0 = new_op0;
1082 35457 : op0_mode = word_mode;
1083 35532 : bitnum %= BITS_PER_WORD;
1084 : }
1085 :
1086 : /* From here on we can assume that the field to be stored in fits
1087 : within a word. If the destination is a register, it too fits
1088 : in a word. */
1089 :
1090 178680 : extraction_insn insv;
1091 178680 : if (!MEM_P (op0)
1092 111374 : && !reverse
1093 111357 : && get_best_reg_extraction_insn (&insv, EP_insv,
1094 222714 : GET_MODE_BITSIZE (op0_mode.require ()),
1095 : fieldmode)
1096 290035 : && store_bit_field_using_insv (&insv, op0, op0_mode,
1097 : bitsize, bitnum, value, value_mode))
1098 2045 : return true;
1099 :
1100 : /* If OP0 is a memory, try copying it to a register and seeing if a
1101 : cheap register alternative is available. */
1102 176635 : if (MEM_P (op0) && !reverse)
1103 : {
1104 66906 : if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1105 : fieldmode)
1106 66906 : && store_bit_field_using_insv (&insv, op0, op0_mode,
1107 : bitsize, bitnum, value, value_mode))
1108 0 : return true;
1109 :
1110 66906 : rtx_insn *last = get_last_insn ();
1111 :
1112 : /* Try loading part of OP0 into a register, inserting the bitfield
1113 : into that, and then copying the result back to OP0. */
1114 66906 : unsigned HOST_WIDE_INT bitpos;
1115 66906 : rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1116 : bitregion_start, bitregion_end,
1117 : fieldmode, &bitpos);
1118 66906 : if (xop0)
1119 : {
1120 63067 : rtx tempreg = copy_to_reg (xop0);
1121 63067 : if (store_bit_field_1 (tempreg, bitsize, bitpos,
1122 : bitregion_start, bitregion_end,
1123 : fieldmode, orig_value, reverse, false, false))
1124 : {
1125 0 : emit_move_insn (xop0, tempreg);
1126 0 : return true;
1127 : }
1128 63067 : delete_insns_since (last);
1129 : }
1130 : }
1131 :
1132 176635 : if (!fallback_p)
1133 : return false;
1134 :
1135 114195 : store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1136 : bitregion_end, value, value_mode, reverse);
1137 114195 : return true;
1138 : }
1139 :
1140 : /* Generate code to store value from rtx VALUE
1141 : into a bit-field within structure STR_RTX
1142 : containing BITSIZE bits starting at bit BITNUM.
1143 :
1144 : BITREGION_START is bitpos of the first bitfield in this region.
1145 : BITREGION_END is the bitpos of the ending bitfield in this region.
1146 : These two fields are 0, if the C++ memory model does not apply,
1147 : or we are not interested in keeping track of bitfield regions.
1148 :
1149 : FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1150 :
1151 : If REVERSE is true, the store is to be done in reverse order.
1152 :
1153 : If UNDEFINED_P is true then STR_RTX is currently undefined. */
1154 :
1155 : void
1156 820727 : store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1157 : poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1158 : machine_mode fieldmode,
1159 : rtx value, bool reverse, bool undefined_p)
1160 : {
1161 : /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1162 820727 : unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1163 820727 : scalar_int_mode int_mode;
1164 820727 : if (bitsize.is_constant (&ibitsize)
1165 820727 : && bitnum.is_constant (&ibitnum)
1166 1546378 : && is_a <scalar_int_mode> (fieldmode, &int_mode)
1167 725655 : && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1168 : bitregion_start, bitregion_end))
1169 : {
1170 : /* Storing of a full word can be done with a simple store.
1171 : We know here that the field can be accessed with one single
1172 : instruction. For targets that support unaligned memory,
1173 : an unaligned access may be necessary. */
1174 8 : if (ibitsize == GET_MODE_BITSIZE (int_mode))
1175 : {
1176 0 : str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1177 : ibitnum / BITS_PER_UNIT);
1178 0 : if (reverse)
1179 0 : value = flip_storage_order (int_mode, value);
1180 0 : gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1181 0 : emit_move_insn (str_rtx, value);
1182 : }
1183 : else
1184 : {
1185 4 : rtx temp;
1186 :
1187 4 : str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1188 : ibitnum, &ibitnum);
1189 8 : gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1190 4 : temp = copy_to_reg (str_rtx);
1191 4 : if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1192 : int_mode, value, reverse, true, undefined_p))
1193 0 : gcc_unreachable ();
1194 :
1195 4 : emit_move_insn (str_rtx, temp);
1196 : }
1197 :
1198 4 : return;
1199 : }
1200 :
1201 : /* Under the C++0x memory model, we must not touch bits outside the
1202 : bit region. Adjust the address to start at the beginning of the
1203 : bit region. */
1204 820723 : if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1205 : {
1206 51044 : scalar_int_mode best_mode;
1207 51044 : machine_mode addr_mode = VOIDmode;
1208 :
1209 51044 : poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1210 102088 : bitnum -= bitregion_start;
1211 51044 : poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1212 51044 : bitregion_end -= bitregion_start;
1213 51044 : bitregion_start = 0;
1214 51044 : if (bitsize.is_constant (&ibitsize)
1215 51044 : && bitnum.is_constant (&ibitnum)
1216 51044 : && get_best_mode (ibitsize, ibitnum,
1217 : bitregion_start, bitregion_end,
1218 51044 : MEM_ALIGN (str_rtx), INT_MAX,
1219 51044 : MEM_VOLATILE_P (str_rtx), &best_mode))
1220 47642 : addr_mode = best_mode;
1221 51044 : str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1222 : offset, size);
1223 : }
1224 :
1225 820723 : if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1226 : bitregion_start, bitregion_end,
1227 : fieldmode, value, reverse, true, undefined_p))
1228 0 : gcc_unreachable ();
1229 : }
1230 :
1231 : /* Use shifts and boolean operations to store VALUE into a bit field of
1232 : width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1233 : it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1234 : the mode of VALUE.
1235 :
1236 : If REVERSE is true, the store is to be done in reverse order. */
1237 :
1238 : static void
1239 131650 : store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1240 : unsigned HOST_WIDE_INT bitsize,
1241 : unsigned HOST_WIDE_INT bitnum,
1242 : poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1243 : rtx value, scalar_int_mode value_mode, bool reverse)
1244 : {
1245 : /* There is a case not handled here:
1246 : a structure with a known alignment of just a halfword
1247 : and a field split across two aligned halfwords within the structure.
1248 : Or likewise a structure with a known alignment of just a byte
1249 : and a field split across two bytes.
1250 : Such cases are not supposed to be able to occur. */
1251 :
1252 131650 : scalar_int_mode best_mode;
1253 131650 : if (MEM_P (op0))
1254 : {
1255 84619 : unsigned int max_bitsize = BITS_PER_WORD;
1256 84619 : scalar_int_mode imode;
1257 144499 : if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1258 90886 : max_bitsize = GET_MODE_BITSIZE (imode);
1259 :
1260 84619 : if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1261 84619 : MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1262 : &best_mode))
1263 : {
1264 : /* The only way this should occur is if the field spans word
1265 : boundaries. */
1266 6547 : store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1267 : bitregion_start, bitregion_end,
1268 : value, value_mode, reverse);
1269 6547 : return;
1270 : }
1271 :
1272 78072 : op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1273 : }
1274 : else
1275 47031 : best_mode = op0_mode.require ();
1276 :
1277 125103 : store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1278 : value, value_mode, reverse);
1279 : }
1280 :
1281 : /* Helper function for store_fixed_bit_field, stores
1282 : the bit field always using MODE, which is the mode of OP0. The other
1283 : arguments are as for store_fixed_bit_field. */
1284 :
1285 : static void
1286 125103 : store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1287 : unsigned HOST_WIDE_INT bitsize,
1288 : unsigned HOST_WIDE_INT bitnum,
1289 : rtx value, scalar_int_mode value_mode, bool reverse)
1290 : {
1291 125103 : rtx temp;
1292 125103 : int all_zero = 0;
1293 125103 : int all_one = 0;
1294 :
1295 : /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1296 : for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1297 :
1298 125103 : if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1299 : /* BITNUM is the distance between our msb
1300 : and that of the containing datum.
1301 : Convert it to the distance from the lsb. */
1302 1058 : bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1303 :
1304 : /* Now BITNUM is always the distance between our lsb
1305 : and that of OP0. */
1306 :
1307 : /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1308 : we must first convert its mode to MODE. */
1309 :
1310 125103 : if (CONST_INT_P (value))
1311 : {
1312 74553 : unsigned HOST_WIDE_INT v = UINTVAL (value);
1313 :
1314 74553 : if (bitsize < HOST_BITS_PER_WIDE_INT)
1315 74535 : v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1316 :
1317 74553 : if (v == 0)
1318 : all_zero = 1;
1319 59375 : else if ((bitsize < HOST_BITS_PER_WIDE_INT
1320 59367 : && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1321 50894 : || (bitsize == HOST_BITS_PER_WIDE_INT
1322 50894 : && v == HOST_WIDE_INT_M1U))
1323 8481 : all_one = 1;
1324 :
1325 74553 : value = lshift_value (mode, v, bitnum);
1326 : }
1327 : else
1328 : {
1329 50550 : int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1330 77930 : && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1331 :
1332 50550 : if (value_mode != mode)
1333 28028 : value = convert_to_mode (mode, value, 1);
1334 :
1335 50550 : if (must_and)
1336 20626 : value = expand_binop (mode, and_optab, value,
1337 : mask_rtx (mode, 0, bitsize, 0),
1338 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
1339 50550 : if (bitnum > 0)
1340 13985 : value = expand_shift (LSHIFT_EXPR, mode, value,
1341 13985 : bitnum, NULL_RTX, 1);
1342 : }
1343 :
1344 125103 : if (reverse)
1345 529 : value = flip_storage_order (mode, value);
1346 :
1347 : /* Now clear the chosen bits in OP0,
1348 : except that if VALUE is -1 we need not bother. */
1349 : /* We keep the intermediates in registers to allow CSE to combine
1350 : consecutive bitfield assignments. */
1351 :
1352 125103 : temp = force_reg (mode, op0);
1353 :
1354 125103 : if (! all_one)
1355 : {
1356 116622 : rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1357 116622 : if (reverse)
1358 517 : mask = flip_storage_order (mode, mask);
1359 116622 : temp = expand_binop (mode, and_optab, temp, mask,
1360 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
1361 116622 : temp = force_reg (mode, temp);
1362 : }
1363 :
1364 : /* Now logical-or VALUE into OP0, unless it is zero. */
1365 :
1366 125103 : if (! all_zero)
1367 : {
1368 109925 : temp = expand_binop (mode, ior_optab, temp, value,
1369 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
1370 109925 : temp = force_reg (mode, temp);
1371 : }
1372 :
1373 125103 : if (op0 != temp)
1374 : {
1375 125103 : op0 = copy_rtx (op0);
1376 125103 : emit_move_insn (op0, temp);
1377 : }
1378 125103 : }
1379 :
1380 : /* Store a bit field that is split across multiple accessible memory objects.
1381 :
1382 : OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1383 : BITSIZE is the field width; BITPOS the position of its first bit
1384 : (within the word).
1385 : VALUE is the value to store, which has mode VALUE_MODE.
1386 : If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1387 : a BLKmode MEM.
1388 :
1389 : If REVERSE is true, the store is to be done in reverse order.
1390 :
1391 : This does not yet handle fields wider than BITS_PER_WORD. */
1392 :
1393 : static void
1394 6618 : store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1395 : unsigned HOST_WIDE_INT bitsize,
1396 : unsigned HOST_WIDE_INT bitpos,
1397 : poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1398 : rtx value, scalar_int_mode value_mode, bool reverse)
1399 : {
1400 6618 : unsigned int unit, total_bits, bitsdone = 0;
1401 :
1402 : /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1403 : much at a time. */
1404 6618 : if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1405 71 : unit = BITS_PER_WORD;
1406 : else
1407 6547 : unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1408 :
1409 : /* If OP0 is a memory with a mode, then UNIT must not be larger than
1410 : OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1411 : again, and we will mutually recurse forever. */
1412 6618 : if (MEM_P (op0) && op0_mode.exists ())
1413 5111 : unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1414 :
1415 : /* If VALUE is a constant other than a CONST_INT, get it into a register in
1416 : WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1417 : that VALUE might be a floating-point constant. */
1418 6618 : if (CONSTANT_P (value) && !CONST_INT_P (value))
1419 : {
1420 0 : rtx word = gen_lowpart_common (word_mode, value);
1421 :
1422 0 : if (word && (value != word))
1423 : value = word;
1424 : else
1425 0 : value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1426 0 : value_mode = word_mode;
1427 : }
1428 :
1429 6618 : total_bits = GET_MODE_BITSIZE (value_mode);
1430 :
1431 30881 : while (bitsdone < bitsize)
1432 : {
1433 24263 : unsigned HOST_WIDE_INT thissize;
1434 24263 : unsigned HOST_WIDE_INT thispos;
1435 24263 : unsigned HOST_WIDE_INT offset;
1436 24263 : rtx part;
1437 :
1438 24263 : offset = (bitpos + bitsdone) / unit;
1439 24263 : thispos = (bitpos + bitsdone) % unit;
1440 :
1441 : /* When region of bytes we can touch is restricted, decrease
1442 : UNIT close to the end of the region as needed. If op0 is a REG
1443 : or SUBREG of REG, don't do this, as there can't be data races
1444 : on a register and we can expand shorter code in some cases. */
1445 31071 : if (maybe_ne (bitregion_end, 0U)
1446 24263 : && unit > BITS_PER_UNIT
1447 14225 : && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1448 6872 : && !REG_P (op0)
1449 31071 : && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1450 : {
1451 6808 : unit = unit / 2;
1452 6808 : continue;
1453 : }
1454 :
1455 : /* THISSIZE must not overrun a word boundary. Otherwise,
1456 : store_fixed_bit_field will call us again, and we will mutually
1457 : recurse forever. */
1458 17455 : thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1459 17455 : thissize = MIN (thissize, unit - thispos);
1460 :
1461 17455 : if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1462 : {
1463 : /* Fetch successively less significant portions. */
1464 214 : if (CONST_INT_P (value))
1465 108 : part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1466 : >> (bitsize - bitsdone - thissize))
1467 : & ((HOST_WIDE_INT_1 << thissize) - 1));
1468 : /* Likewise, but the source is little-endian. */
1469 106 : else if (reverse)
1470 106 : part = extract_fixed_bit_field (word_mode, value, value_mode,
1471 : thissize,
1472 : bitsize - bitsdone - thissize,
1473 : NULL_RTX, 1, false);
1474 : else
1475 : /* The args are chosen so that the last part includes the
1476 : lsb. Give extract_bit_field the value it needs (with
1477 : endianness compensation) to fetch the piece we want. */
1478 : part = extract_fixed_bit_field (word_mode, value, value_mode,
1479 : thissize,
1480 : total_bits - bitsize + bitsdone,
1481 : NULL_RTX, 1, false);
1482 : }
1483 : else
1484 : {
1485 : /* Fetch successively more significant portions. */
1486 17241 : if (CONST_INT_P (value))
1487 12867 : part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1488 : >> bitsdone)
1489 : & ((HOST_WIDE_INT_1 << thissize) - 1));
1490 : /* Likewise, but the source is big-endian. */
1491 4374 : else if (reverse)
1492 : part = extract_fixed_bit_field (word_mode, value, value_mode,
1493 : thissize,
1494 : total_bits - bitsdone - thissize,
1495 : NULL_RTX, 1, false);
1496 : else
1497 4374 : part = extract_fixed_bit_field (word_mode, value, value_mode,
1498 : thissize, bitsdone, NULL_RTX,
1499 : 1, false);
1500 : }
1501 :
1502 : /* If OP0 is a register, then handle OFFSET here. */
1503 17455 : rtx op0_piece = op0;
1504 17455 : opt_scalar_int_mode op0_piece_mode = op0_mode;
1505 17455 : if (SUBREG_P (op0) || REG_P (op0))
1506 : {
1507 142 : scalar_int_mode imode;
1508 142 : if (op0_mode.exists (&imode)
1509 142 : && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1510 : {
1511 0 : if (offset)
1512 0 : op0_piece = const0_rtx;
1513 : }
1514 : else
1515 : {
1516 142 : op0_piece = operand_subword_force (op0,
1517 142 : offset * unit / BITS_PER_WORD,
1518 142 : GET_MODE (op0));
1519 142 : op0_piece_mode = word_mode;
1520 : }
1521 142 : offset &= BITS_PER_WORD / unit - 1;
1522 : }
1523 :
1524 : /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1525 : it is just an out-of-bounds access. Ignore it. */
1526 17455 : if (op0_piece != const0_rtx)
1527 17455 : store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1528 17455 : offset * unit + thispos, bitregion_start,
1529 : bitregion_end, part, word_mode, reverse);
1530 17455 : bitsdone += thissize;
1531 : }
1532 6618 : }
1533 :
1534 : /* A subroutine of extract_bit_field_1 that converts return value X
1535 : to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1536 : to extract_bit_field. */
1537 :
1538 : static rtx
1539 861848 : convert_extracted_bit_field (rtx x, machine_mode mode,
1540 : machine_mode tmode, bool unsignedp)
1541 : {
1542 861848 : if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1543 : return x;
1544 :
1545 : /* If the x mode is not a scalar integral, first convert to the
1546 : integer mode of that size and then access it as a floating-point
1547 : value via a SUBREG. */
1548 21513 : if (!SCALAR_INT_MODE_P (tmode))
1549 : {
1550 11528 : scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1551 11528 : x = convert_to_mode (int_mode, x, unsignedp);
1552 11528 : x = force_reg (int_mode, x);
1553 11528 : return gen_lowpart (tmode, x);
1554 : }
1555 :
1556 9985 : return convert_to_mode (tmode, x, unsignedp);
1557 : }
1558 :
1559 : /* Try to use an ext(z)v pattern to extract a field from OP0.
1560 : Return the extracted value on success, otherwise return null.
1561 : EXTV describes the extraction instruction to use. If OP0_MODE
1562 : is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1563 : The other arguments are as for extract_bit_field. */
1564 :
1565 : static rtx
1566 148154 : extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1567 : opt_scalar_int_mode op0_mode,
1568 : unsigned HOST_WIDE_INT bitsize,
1569 : unsigned HOST_WIDE_INT bitnum,
1570 : int unsignedp, rtx target,
1571 : machine_mode mode, machine_mode tmode)
1572 : {
1573 148154 : class expand_operand ops[4];
1574 148154 : rtx spec_target = target;
1575 148154 : rtx spec_target_subreg = 0;
1576 148154 : scalar_int_mode ext_mode = extv->field_mode;
1577 148154 : unsigned unit = GET_MODE_BITSIZE (ext_mode);
1578 :
1579 148154 : if (bitsize == 0 || unit < bitsize)
1580 : return NULL_RTX;
1581 :
1582 148154 : if (MEM_P (op0))
1583 : /* Get a reference to the first byte of the field. */
1584 0 : op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1585 : &bitnum);
1586 : else
1587 : {
1588 : /* Convert from counting within OP0 to counting in EXT_MODE. */
1589 148154 : if (BYTES_BIG_ENDIAN)
1590 : bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1591 :
1592 : /* If op0 is a register, we need it in EXT_MODE to make it
1593 : acceptable to the format of ext(z)v. */
1594 148154 : if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1595 0 : return NULL_RTX;
1596 148154 : if (REG_P (op0) && op0_mode.require () != ext_mode)
1597 49857 : op0 = gen_lowpart_SUBREG (ext_mode, op0);
1598 : }
1599 :
1600 : /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1601 : "backwards" from the size of the unit we are extracting from.
1602 : Otherwise, we count bits from the most significant on a
1603 : BYTES/BITS_BIG_ENDIAN machine. */
1604 :
1605 148154 : if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1606 : bitnum = unit - bitsize - bitnum;
1607 :
1608 148154 : if (target == 0)
1609 28782 : target = spec_target = gen_reg_rtx (tmode);
1610 :
1611 148154 : if (GET_MODE (target) != ext_mode)
1612 : {
1613 81025 : rtx temp;
1614 : /* Don't use LHS paradoxical subreg if explicit truncation is needed
1615 : between the mode of the extraction (word_mode) and the target
1616 : mode. Instead, create a temporary and use convert_move to set
1617 : the target. */
1618 81025 : if (REG_P (target)
1619 79917 : && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1620 160942 : && (temp = gen_lowpart_if_possible (ext_mode, target)))
1621 : {
1622 79407 : target = temp;
1623 79407 : if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1624 78269 : spec_target_subreg = target;
1625 : }
1626 : else
1627 1618 : target = gen_reg_rtx (ext_mode);
1628 : }
1629 :
1630 148154 : create_output_operand (&ops[0], target, ext_mode);
1631 148154 : create_fixed_operand (&ops[1], op0);
1632 148154 : create_integer_operand (&ops[2], bitsize);
1633 148154 : create_integer_operand (&ops[3], bitnum);
1634 148154 : if (maybe_expand_insn (extv->icode, 4, ops))
1635 : {
1636 1698 : target = ops[0].value;
1637 1698 : if (target == spec_target)
1638 : return target;
1639 1698 : if (target == spec_target_subreg)
1640 : return spec_target;
1641 56 : return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1642 : }
1643 : return NULL_RTX;
1644 : }
1645 :
1646 : /* See whether it would be valid to extract the part of OP0 with
1647 : mode OP0_MODE described by BITNUM and BITSIZE into a value of
1648 : mode MODE using a subreg operation.
1649 : Return the subreg if so, otherwise return null. */
1650 :
1651 : static rtx
1652 816002 : extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1653 : machine_mode op0_mode,
1654 : poly_uint64 bitsize, poly_uint64 bitnum)
1655 : {
1656 816002 : poly_uint64 bytenum;
1657 816002 : if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1658 777162 : && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1659 816002 : && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1660 1593164 : && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1661 659062 : return force_subreg (mode, op0, op0_mode, bytenum);
1662 : return NULL_RTX;
1663 : }
1664 :
1665 : /* A subroutine of extract_bit_field, with the same arguments.
1666 : If UNSIGNEDP is -1, the result need not be sign or zero extended.
1667 : If FALLBACK_P is true, fall back to extract_fixed_bit_field
1668 : if we can find no other means of implementing the operation.
1669 : if FALLBACK_P is false, return NULL instead. */
1670 :
1671 : static rtx
1672 1101873 : extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1673 : int unsignedp, rtx target, machine_mode mode,
1674 : machine_mode tmode, bool reverse, bool fallback_p,
1675 : rtx *alt_rtl)
1676 : {
1677 1101873 : rtx op0 = str_rtx;
1678 1101873 : machine_mode mode1;
1679 :
1680 1101873 : if (tmode == VOIDmode)
1681 0 : tmode = mode;
1682 :
1683 1112228 : while (GET_CODE (op0) == SUBREG)
1684 : {
1685 10355 : bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1686 10355 : op0 = SUBREG_REG (op0);
1687 : }
1688 :
1689 : /* If we have an out-of-bounds access to a register, just return an
1690 : uninitialized register of the required mode. This can occur if the
1691 : source code contains an out-of-bounds access to a small array. */
1692 2019808 : if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1693 0 : return gen_reg_rtx (tmode);
1694 :
1695 1101873 : if (REG_P (op0)
1696 917935 : && mode == GET_MODE (op0)
1697 148679 : && known_eq (bitnum, 0U)
1698 1336589 : && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1699 : {
1700 17716 : if (reverse)
1701 0 : op0 = flip_storage_order (mode, op0);
1702 : /* We're trying to extract a full register from itself. */
1703 17716 : return op0;
1704 : }
1705 :
1706 : /* First try to check for vector from vector extractions. */
1707 1016450 : if (VECTOR_MODE_P (GET_MODE (op0))
1708 86916 : && !MEM_P (op0)
1709 86124 : && VECTOR_MODE_P (tmode)
1710 12879 : && known_eq (bitsize, GET_MODE_PRECISION (tmode))
1711 2194072 : && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1712 : {
1713 12879 : machine_mode new_mode = GET_MODE (op0);
1714 38637 : if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1715 : {
1716 174 : scalar_mode inner_mode = GET_MODE_INNER (tmode);
1717 174 : poly_uint64 nunits;
1718 348 : if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1719 174 : GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1720 348 : || !related_vector_mode (tmode, inner_mode,
1721 174 : nunits).exists (&new_mode)
1722 332 : || maybe_ne (GET_MODE_SIZE (new_mode),
1723 474 : GET_MODE_SIZE (GET_MODE (op0))))
1724 16 : new_mode = VOIDmode;
1725 : }
1726 12879 : poly_uint64 pos;
1727 12879 : if (new_mode != VOIDmode
1728 12863 : && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1729 : != CODE_FOR_nothing)
1730 25742 : && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1731 : {
1732 9005 : class expand_operand ops[3];
1733 9005 : machine_mode outermode = new_mode;
1734 9005 : machine_mode innermode = tmode;
1735 9005 : enum insn_code icode
1736 9005 : = convert_optab_handler (vec_extract_optab, outermode, innermode);
1737 :
1738 9005 : if (new_mode != GET_MODE (op0))
1739 33 : op0 = gen_lowpart (new_mode, op0);
1740 9005 : create_output_operand (&ops[0], target, innermode);
1741 9005 : ops[0].target = 1;
1742 9005 : create_input_operand (&ops[1], op0, outermode);
1743 9005 : create_integer_operand (&ops[2], pos);
1744 9005 : if (maybe_expand_insn (icode, 3, ops))
1745 : {
1746 9005 : if (alt_rtl && ops[0].target)
1747 205 : *alt_rtl = target;
1748 9005 : target = ops[0].value;
1749 9005 : if (GET_MODE (target) != mode)
1750 9005 : return gen_lowpart (tmode, target);
1751 : return target;
1752 : }
1753 : }
1754 : }
1755 :
1756 : /* See if we can get a better vector mode before extracting. */
1757 1009053 : if (VECTOR_MODE_P (GET_MODE (op0))
1758 77911 : && !MEM_P (op0)
1759 1229390 : && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1760 : {
1761 9484 : machine_mode new_mode;
1762 :
1763 9484 : if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1764 557 : new_mode = MIN_MODE_VECTOR_FLOAT;
1765 : else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1766 0 : new_mode = MIN_MODE_VECTOR_FRACT;
1767 : else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1768 0 : new_mode = MIN_MODE_VECTOR_UFRACT;
1769 : else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1770 0 : new_mode = MIN_MODE_VECTOR_ACCUM;
1771 : else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1772 0 : new_mode = MIN_MODE_VECTOR_UACCUM;
1773 : else
1774 8927 : new_mode = MIN_MODE_VECTOR_INT;
1775 :
1776 150308 : FOR_EACH_MODE_FROM (new_mode, new_mode)
1777 299900 : if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1778 62494 : && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1779 168918 : && known_eq (bitsize, GET_MODE_UNIT_PRECISION (new_mode))
1780 18968 : && multiple_p (bitnum, GET_MODE_UNIT_PRECISION (new_mode))
1781 9447 : && targetm.vector_mode_supported_p (new_mode)
1782 159379 : && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1783 : break;
1784 9484 : if (new_mode != VOIDmode)
1785 9126 : op0 = gen_lowpart (new_mode, op0);
1786 : }
1787 :
1788 : /* Use vec_extract patterns for extracting parts of vectors whenever
1789 : available. If that fails, see whether the current modes and bitregion
1790 : give a natural subreg. */
1791 1075152 : machine_mode outermode = GET_MODE (op0);
1792 1075152 : if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1793 : {
1794 77119 : scalar_mode innermode = GET_MODE_INNER (outermode);
1795 :
1796 77119 : enum insn_code icode
1797 77119 : = convert_optab_handler (vec_extract_optab, outermode, innermode);
1798 :
1799 77119 : poly_uint64 pos;
1800 77119 : if (icode != CODE_FOR_nothing
1801 77122 : && known_eq (bitsize, GET_MODE_PRECISION (innermode))
1802 152325 : && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
1803 : {
1804 75203 : class expand_operand ops[3];
1805 :
1806 75203 : create_output_operand (&ops[0], target,
1807 75203 : insn_data[icode].operand[0].mode);
1808 75203 : ops[0].target = 1;
1809 75203 : create_input_operand (&ops[1], op0, outermode);
1810 75203 : create_integer_operand (&ops[2], pos);
1811 75203 : if (maybe_expand_insn (icode, 3, ops))
1812 : {
1813 75203 : if (alt_rtl && ops[0].target)
1814 17340 : *alt_rtl = target;
1815 75203 : target = ops[0].value;
1816 75203 : if (GET_MODE (target) != mode)
1817 75203 : return gen_lowpart (tmode, target);
1818 : return target;
1819 : }
1820 : }
1821 : /* Using subregs is useful if we're extracting one register vector
1822 : from a multi-register vector. extract_bit_field_as_subreg checks
1823 : for valid bitsize and bitnum, so we don't need to do that here. */
1824 1916 : if (VECTOR_MODE_P (mode))
1825 : {
1826 47 : rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1827 : bitsize, bitnum);
1828 47 : if (sub)
1829 : return sub;
1830 : }
1831 : }
1832 :
1833 : /* Make sure we are playing with integral modes. Pun with subregs
1834 : if we aren't. */
1835 999932 : opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1836 999932 : scalar_int_mode imode;
1837 999932 : if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1838 : {
1839 161971 : if (MEM_P (op0))
1840 158353 : op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1841 : 0, MEM_SIZE (op0));
1842 3618 : else if (op0_mode.exists (&imode))
1843 : {
1844 3587 : op0 = gen_lowpart (imode, op0);
1845 :
1846 : /* If we got a SUBREG, force it into a register since we
1847 : aren't going to be able to do another SUBREG on it. */
1848 3587 : if (GET_CODE (op0) == SUBREG)
1849 3310 : op0 = force_reg (imode, op0);
1850 : }
1851 : else
1852 : {
1853 62 : poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1854 31 : rtx mem = assign_stack_temp (GET_MODE (op0), size);
1855 31 : emit_move_insn (mem, op0);
1856 31 : op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1857 : }
1858 : }
1859 :
1860 : /* ??? We currently assume TARGET is at least as big as BITSIZE.
1861 : If that's wrong, the solution is to test for it and set TARGET to 0
1862 : if needed. */
1863 :
1864 : /* Get the mode of the field to use for atomic access or subreg
1865 : conversion. */
1866 999932 : if (!SCALAR_INT_MODE_P (tmode)
1867 999932 : || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1868 310428 : mode1 = mode;
1869 999932 : gcc_assert (mode1 != BLKmode);
1870 :
1871 : /* Extraction of a full MODE1 value can be done with a subreg as long
1872 : as the least significant bit of the value is the least significant
1873 : bit of either OP0 or a word of OP0. */
1874 999932 : if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1875 : {
1876 815955 : rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1877 : bitsize, bitnum);
1878 815955 : if (sub)
1879 652881 : return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1880 : }
1881 :
1882 : /* Extraction of a full MODE1 value can be done with a load as long as
1883 : the field is on a byte boundary and is sufficiently aligned. */
1884 347051 : poly_uint64 bytenum;
1885 347051 : if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1886 : {
1887 44669 : op0 = adjust_bitfield_address (op0, mode1, bytenum);
1888 44669 : if (reverse)
1889 51 : op0 = flip_storage_order (mode1, op0);
1890 44669 : return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1891 : }
1892 :
1893 : /* If we have a memory source and a non-constant bit offset, restrict
1894 : the memory to the referenced bytes. This is a worst-case fallback
1895 : but is useful for things like vector booleans. */
1896 302382 : if (MEM_P (op0) && !bitnum.is_constant ())
1897 : {
1898 : bytenum = bits_to_bytes_round_down (bitnum);
1899 : bitnum = num_trailing_bits (bitnum);
1900 : poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1901 : op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1902 : op0_mode = opt_scalar_int_mode ();
1903 : }
1904 :
1905 : /* It's possible we'll need to handle other cases here for
1906 : polynomial bitnum and bitsize. */
1907 :
1908 : /* From here on we need to be looking at a fixed-size insertion. */
1909 302382 : return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1910 : bitnum.to_constant (), unsignedp,
1911 302382 : target, mode, tmode, reverse, fallback_p);
1912 : }
1913 :
1914 : /* Subroutine of extract_bit_field_1, with the same arguments, except
1915 : that BITSIZE and BITNUM are constant. Handle cases specific to
1916 : integral modes. If OP0_MODE is defined, it is the mode of OP0,
1917 : otherwise OP0 is a BLKmode MEM. */
1918 :
1919 : static rtx
1920 302382 : extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1921 : unsigned HOST_WIDE_INT bitsize,
1922 : unsigned HOST_WIDE_INT bitnum, int unsignedp,
1923 : rtx target, machine_mode mode, machine_mode tmode,
1924 : bool reverse, bool fallback_p)
1925 : {
1926 : /* Handle fields bigger than a word. */
1927 :
1928 305650 : if (bitsize > BITS_PER_WORD)
1929 : {
1930 : /* Here we transfer the words of the field
1931 : in the order least significant first.
1932 : This is because the most significant word is the one which may
1933 : be less than full. */
1934 :
1935 1512 : const bool backwards = WORDS_BIG_ENDIAN;
1936 1512 : unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1937 1512 : unsigned int i;
1938 1512 : rtx_insn *last;
1939 :
1940 1512 : if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1941 1488 : target = gen_reg_rtx (mode);
1942 :
1943 : /* In case we're about to clobber a base register or something
1944 : (see gcc.c-torture/execute/20040625-1.c). */
1945 1512 : if (reg_mentioned_p (target, op0))
1946 0 : target = gen_reg_rtx (mode);
1947 :
1948 : /* Indicate for flow that the entire target reg is being set. */
1949 1512 : emit_clobber (target);
1950 :
1951 : /* The mode must be fixed-size, since extract_bit_field_1 handles
1952 : extractions from variable-sized objects before calling this
1953 : function. */
1954 1512 : unsigned int target_size
1955 1512 : = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1956 1512 : last = get_last_insn ();
1957 4536 : for (i = 0; i < nwords; i++)
1958 : {
1959 : /* If I is 0, use the low-order word in both field and target;
1960 : if I is 1, use the next to lowest word; and so on. */
1961 : /* Word number in TARGET to use. */
1962 3024 : unsigned int wordnum
1963 : = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1964 : /* Offset from start of field in OP0. */
1965 6048 : unsigned int bit_offset = (backwards ^ reverse
1966 3024 : ? MAX ((int) bitsize - ((int) i + 1)
1967 : * BITS_PER_WORD,
1968 : 0)
1969 3092 : : (int) i * BITS_PER_WORD);
1970 3024 : rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1971 3024 : rtx result_part
1972 3200 : = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1973 : bitsize - i * BITS_PER_WORD),
1974 3024 : bitnum + bit_offset,
1975 : (unsignedp ? 1 : -1), target_part,
1976 : mode, word_mode, reverse, fallback_p, NULL);
1977 :
1978 3024 : gcc_assert (target_part);
1979 3024 : if (!result_part)
1980 : {
1981 0 : delete_insns_since (last);
1982 0 : return NULL;
1983 : }
1984 :
1985 3024 : if (result_part != target_part)
1986 2908 : emit_move_insn (target_part, result_part);
1987 : }
1988 :
1989 1512 : if (unsignedp)
1990 : {
1991 : /* Unless we've filled TARGET, the upper regs in a multi-reg value
1992 : need to be zero'd out. */
1993 1526 : if (target_size > nwords * UNITS_PER_WORD)
1994 : {
1995 0 : unsigned int i, total_words;
1996 :
1997 0 : total_words = target_size / UNITS_PER_WORD;
1998 0 : for (i = nwords; i < total_words; i++)
1999 0 : emit_move_insn
2000 0 : (operand_subword (target,
2001 0 : backwards ? total_words - i - 1 : i,
2002 : 1, VOIDmode),
2003 : const0_rtx);
2004 : }
2005 1492 : return target;
2006 : }
2007 :
2008 : /* Signed bit field: sign-extend with two arithmetic shifts. */
2009 40 : target = expand_shift (LSHIFT_EXPR, mode, target,
2010 20 : GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
2011 40 : return expand_shift (RSHIFT_EXPR, mode, target,
2012 20 : GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
2013 : }
2014 :
2015 : /* If OP0 is a multi-word register, narrow it to the affected word.
2016 : If the region spans two words, defer to extract_split_bit_field. */
2017 466464 : if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
2018 : {
2019 3890 : if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
2020 : {
2021 1144 : if (!fallback_p)
2022 : return NULL_RTX;
2023 62 : target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2024 : unsignedp, reverse);
2025 62 : return convert_extracted_bit_field (target, mode, tmode, unsignedp);
2026 : }
2027 : /* If OP0 is a hard register, copy it to a pseudo before calling
2028 : force_subreg. */
2029 2746 : if (REG_P (op0) && HARD_REGISTER_P (op0))
2030 1 : op0 = copy_to_reg (op0);
2031 2746 : op0 = force_subreg (word_mode, op0, op0_mode.require (),
2032 3210 : bitnum / BITS_PER_WORD * UNITS_PER_WORD);
2033 2746 : op0_mode = word_mode;
2034 2978 : bitnum %= BITS_PER_WORD;
2035 : }
2036 :
2037 : /* From here on we know the desired field is smaller than a word.
2038 : If OP0 is a register, it too fits within a word. */
2039 299726 : enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
2040 299726 : extraction_insn extv;
2041 299726 : if (!MEM_P (op0)
2042 161929 : && !reverse
2043 : /* ??? We could limit the structure size to the part of OP0 that
2044 : contains the field, with appropriate checks for endianness
2045 : and TARGET_TRULY_NOOP_TRUNCATION. */
2046 461647 : && get_best_reg_extraction_insn (&extv, pattern,
2047 475414 : GET_MODE_BITSIZE (op0_mode.require ()),
2048 : tmode))
2049 : {
2050 148154 : rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2051 : bitsize, bitnum,
2052 : unsignedp, target, mode,
2053 : tmode);
2054 148154 : if (result)
2055 : return result;
2056 : }
2057 :
2058 : /* If OP0 is a memory, try copying it to a register and seeing if a
2059 : cheap register alternative is available. */
2060 298028 : if (MEM_P (op0) & !reverse)
2061 : {
2062 137595 : if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2063 : tmode))
2064 : {
2065 0 : rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2066 : bitsize, bitnum,
2067 : unsignedp, target, mode,
2068 : tmode);
2069 0 : if (result)
2070 0 : return result;
2071 : }
2072 :
2073 137595 : rtx_insn *last = get_last_insn ();
2074 :
2075 : /* Try loading part of OP0 into a register and extracting the
2076 : bitfield from that. */
2077 137595 : unsigned HOST_WIDE_INT bitpos;
2078 137595 : rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2079 : 0, 0, tmode, &bitpos);
2080 137595 : if (xop0)
2081 : {
2082 134930 : xop0 = copy_to_reg (xop0);
2083 134930 : rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2084 : unsignedp, target,
2085 : mode, tmode, reverse, false, NULL);
2086 134930 : if (result)
2087 : return result;
2088 134930 : delete_insns_since (last);
2089 : }
2090 : }
2091 :
2092 298028 : if (!fallback_p)
2093 : return NULL;
2094 :
2095 : /* Find a correspondingly-sized integer field, so we can apply
2096 : shifts and masks to it. */
2097 164180 : scalar_int_mode int_mode;
2098 164180 : if (!int_mode_for_mode (tmode).exists (&int_mode))
2099 : /* If this fails, we should probably push op0 out to memory and then
2100 : do a load. */
2101 0 : int_mode = int_mode_for_mode (mode).require ();
2102 :
2103 164180 : target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2104 : bitnum, target, unsignedp, reverse);
2105 :
2106 : /* Complex values must be reversed piecewise, so we need to undo the global
2107 : reversal, convert to the complex mode and reverse again. */
2108 164180 : if (reverse && COMPLEX_MODE_P (tmode))
2109 : {
2110 0 : target = flip_storage_order (int_mode, target);
2111 0 : target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2112 0 : target = flip_storage_order (tmode, target);
2113 : }
2114 : else
2115 164180 : target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2116 :
2117 : return target;
2118 : }
2119 :
2120 : /* Generate code to extract a byte-field from STR_RTX
2121 : containing BITSIZE bits, starting at BITNUM,
2122 : and put it in TARGET if possible (if TARGET is nonzero).
2123 : Regardless of TARGET, we return the rtx for where the value is placed.
2124 :
2125 : STR_RTX is the structure containing the byte (a REG or MEM).
2126 : UNSIGNEDP is nonzero if this is an unsigned bit field.
2127 : MODE is the natural mode of the field value once extracted.
2128 : TMODE is the mode the caller would like the value to have;
2129 : but the value may be returned with type MODE instead.
2130 :
2131 : If REVERSE is true, the extraction is to be done in reverse order.
2132 :
2133 : If a TARGET is specified and we can store in it at no extra cost,
2134 : we do so, and return TARGET.
2135 : Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2136 : if they are equally easy.
2137 :
2138 : If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2139 : then *ALT_RTL is set to TARGET (before legitimziation). */
2140 :
2141 : rtx
2142 963919 : extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2143 : int unsignedp, rtx target, machine_mode mode,
2144 : machine_mode tmode, bool reverse, rtx *alt_rtl)
2145 : {
2146 963919 : machine_mode mode1;
2147 :
2148 : /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2149 1927838 : if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2150 : mode1 = GET_MODE (str_rtx);
2151 253562 : else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2152 : mode1 = GET_MODE (target);
2153 : else
2154 : mode1 = tmode;
2155 :
2156 963919 : unsigned HOST_WIDE_INT ibitsize, ibitnum;
2157 963919 : scalar_int_mode int_mode;
2158 963919 : if (bitsize.is_constant (&ibitsize)
2159 963919 : && bitnum.is_constant (&ibitnum)
2160 1809388 : && is_a <scalar_int_mode> (mode1, &int_mode)
2161 845476 : && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2162 : int_mode, 0, 0))
2163 : {
2164 : /* Extraction of a full INT_MODE value can be done with a simple load.
2165 : We know here that the field can be accessed with one single
2166 : instruction. For targets that support unaligned memory,
2167 : an unaligned access may be necessary. */
2168 14 : if (ibitsize == GET_MODE_BITSIZE (int_mode))
2169 : {
2170 0 : rtx result = adjust_bitfield_address (str_rtx, int_mode,
2171 : ibitnum / BITS_PER_UNIT);
2172 0 : if (reverse)
2173 0 : result = flip_storage_order (int_mode, result);
2174 0 : gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2175 0 : return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2176 : }
2177 :
2178 7 : str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2179 : &ibitnum);
2180 14 : gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2181 7 : str_rtx = copy_to_reg (str_rtx);
2182 7 : return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2183 : target, mode, tmode, reverse, true, alt_rtl);
2184 : }
2185 :
2186 963912 : return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2187 963912 : target, mode, tmode, reverse, true, alt_rtl);
2188 : }
2189 :
2190 : /* Use shifts and boolean operations to extract a field of BITSIZE bits
2191 : from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2192 : otherwise OP0 is a BLKmode MEM.
2193 :
2194 : UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2195 : If REVERSE is true, the extraction is to be done in reverse order.
2196 :
2197 : If TARGET is nonzero, attempts to store the value there
2198 : and return TARGET, but this is not guaranteed.
2199 : If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2200 :
2201 : static rtx
2202 182484 : extract_fixed_bit_field (machine_mode tmode, rtx op0,
2203 : opt_scalar_int_mode op0_mode,
2204 : unsigned HOST_WIDE_INT bitsize,
2205 : unsigned HOST_WIDE_INT bitnum, rtx target,
2206 : int unsignedp, bool reverse)
2207 : {
2208 182484 : scalar_int_mode mode;
2209 182484 : if (MEM_P (op0))
2210 : {
2211 151521 : if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2212 151521 : BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2213 : /* The only way this should occur is if the field spans word
2214 : boundaries. */
2215 4301 : return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2216 4301 : unsignedp, reverse);
2217 :
2218 147220 : op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2219 : }
2220 : else
2221 30963 : mode = op0_mode.require ();
2222 :
2223 178183 : return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2224 178183 : target, unsignedp, reverse);
2225 : }
2226 :
2227 : /* Helper function for extract_fixed_bit_field, extracts
2228 : the bit field always using MODE, which is the mode of OP0.
2229 : If UNSIGNEDP is -1, the result need not be sign or zero extended.
2230 : The other arguments are as for extract_fixed_bit_field. */
2231 :
2232 : static rtx
2233 178183 : extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2234 : unsigned HOST_WIDE_INT bitsize,
2235 : unsigned HOST_WIDE_INT bitnum, rtx target,
2236 : int unsignedp, bool reverse)
2237 : {
2238 : /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2239 : for invalid input, such as extract equivalent of f5 from
2240 : gcc.dg/pr48335-2.c. */
2241 :
2242 178183 : if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2243 : /* BITNUM is the distance between our msb and that of OP0.
2244 : Convert it to the distance from the lsb. */
2245 424 : bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2246 :
2247 : /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2248 : We have reduced the big-endian case to the little-endian case. */
2249 178183 : if (reverse)
2250 212 : op0 = flip_storage_order (mode, op0);
2251 :
2252 178183 : if (unsignedp)
2253 : {
2254 118832 : if (bitnum)
2255 : {
2256 : /* If the field does not already start at the lsb,
2257 : shift it so it does. */
2258 : /* Maybe propagate the target for the shift. */
2259 44090 : rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2260 44090 : if (tmode != mode)
2261 23708 : subtarget = 0;
2262 44090 : op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2263 : }
2264 : /* Convert the value to the desired mode. TMODE must also be a
2265 : scalar integer for this conversion to make sense, since we
2266 : shouldn't reinterpret the bits. */
2267 118832 : scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2268 118832 : if (mode != new_mode)
2269 42971 : op0 = convert_to_mode (new_mode, op0, 1);
2270 :
2271 : /* Unless the msb of the field used to be the msb when we shifted,
2272 : mask out the upper bits. */
2273 :
2274 118832 : if (GET_MODE_BITSIZE (mode) != bitnum + bitsize
2275 118832 : && unsignedp != -1)
2276 92978 : return expand_binop (new_mode, and_optab, op0,
2277 : mask_rtx (new_mode, 0, bitsize, 0),
2278 92978 : target, 1, OPTAB_LIB_WIDEN);
2279 : return op0;
2280 : }
2281 :
2282 : /* To extract a signed bit-field, first shift its msb to the msb of the word,
2283 : then arithmetic-shift its lsb to the lsb of the word. */
2284 59351 : op0 = force_reg (mode, op0);
2285 :
2286 : /* Find the narrowest integer mode that contains the field. */
2287 :
2288 59351 : opt_scalar_int_mode mode_iter;
2289 147215 : FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2290 294430 : if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2291 : break;
2292 :
2293 59351 : mode = mode_iter.require ();
2294 59351 : op0 = convert_to_mode (mode, op0, 0);
2295 :
2296 59351 : if (mode != tmode)
2297 4794 : target = 0;
2298 :
2299 118702 : if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2300 : {
2301 54411 : int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2302 : /* Maybe propagate the target for the shift. */
2303 54411 : rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2304 54411 : op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2305 : }
2306 :
2307 118702 : return expand_shift (RSHIFT_EXPR, mode, op0,
2308 59351 : GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2309 : }
2310 :
2311 : /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2312 : VALUE << BITPOS. */
2313 :
2314 : static rtx
2315 74553 : lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2316 : int bitpos)
2317 : {
2318 74553 : return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2319 : }
2320 :
2321 : /* Extract a bit field that is split across two words
2322 : and return an RTX for the result.
2323 :
2324 : OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2325 : BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2326 : UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2327 : If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2328 : a BLKmode MEM.
2329 :
2330 : If REVERSE is true, the extraction is to be done in reverse order. */
2331 :
2332 : static rtx
2333 4363 : extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2334 : unsigned HOST_WIDE_INT bitsize,
2335 : unsigned HOST_WIDE_INT bitpos, int unsignedp,
2336 : bool reverse)
2337 : {
2338 4363 : unsigned int unit;
2339 4363 : unsigned int bitsdone = 0;
2340 4363 : rtx result = NULL_RTX;
2341 4363 : int first = 1;
2342 :
2343 : /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2344 : much at a time. */
2345 4363 : if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2346 62 : unit = BITS_PER_WORD;
2347 : else
2348 6382 : unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2349 :
2350 18187 : while (bitsdone < bitsize)
2351 : {
2352 13824 : unsigned HOST_WIDE_INT thissize;
2353 13824 : rtx part;
2354 13824 : unsigned HOST_WIDE_INT thispos;
2355 13824 : unsigned HOST_WIDE_INT offset;
2356 :
2357 13824 : offset = (bitpos + bitsdone) / unit;
2358 13824 : thispos = (bitpos + bitsdone) % unit;
2359 :
2360 : /* THISSIZE must not overrun a word boundary. Otherwise,
2361 : extract_fixed_bit_field will call us again, and we will mutually
2362 : recurse forever. */
2363 13824 : thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2364 13824 : thissize = MIN (thissize, unit - thispos);
2365 :
2366 : /* If OP0 is a register, then handle OFFSET here. */
2367 13824 : rtx op0_piece = op0;
2368 13824 : opt_scalar_int_mode op0_piece_mode = op0_mode;
2369 13824 : if (SUBREG_P (op0) || REG_P (op0))
2370 : {
2371 124 : op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2372 124 : op0_piece_mode = word_mode;
2373 124 : offset = 0;
2374 : }
2375 :
2376 : /* Extract the parts in bit-counting order,
2377 : whose meaning is determined by BYTES_PER_UNIT.
2378 : OFFSET is in UNITs, and UNIT is in bits. */
2379 27648 : part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2380 13824 : thissize, offset * unit + thispos,
2381 : 0, 1, reverse);
2382 13824 : bitsdone += thissize;
2383 :
2384 : /* Shift this part into place for the result. */
2385 13824 : if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2386 : {
2387 4 : if (bitsize != bitsdone)
2388 2 : part = expand_shift (LSHIFT_EXPR, word_mode, part,
2389 2 : bitsize - bitsdone, 0, 1);
2390 : }
2391 : else
2392 : {
2393 13820 : if (bitsdone != thissize)
2394 9459 : part = expand_shift (LSHIFT_EXPR, word_mode, part,
2395 9459 : bitsdone - thissize, 0, 1);
2396 : }
2397 :
2398 13824 : if (first)
2399 : result = part;
2400 : else
2401 : /* Combine the parts with bitwise or. This works
2402 : because we extracted each part as an unsigned bit field. */
2403 9461 : result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2404 : OPTAB_LIB_WIDEN);
2405 :
2406 13824 : first = 0;
2407 : }
2408 :
2409 : /* Unsigned bit field: we are done. */
2410 4363 : if (unsignedp)
2411 : return result;
2412 : /* Signed bit field: sign-extend with two arithmetic shifts. */
2413 1464 : result = expand_shift (LSHIFT_EXPR, word_mode, result,
2414 1464 : BITS_PER_WORD - bitsize, NULL_RTX, 0);
2415 1464 : return expand_shift (RSHIFT_EXPR, word_mode, result,
2416 1464 : BITS_PER_WORD - bitsize, NULL_RTX, 0);
2417 : }
2418 :
2419 : /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2420 : the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2421 : MODE, fill the upper bits with zeros. Fail if the layout of either
2422 : mode is unknown (as for CC modes) or if the extraction would involve
2423 : unprofitable mode punning. Return the value on success, otherwise
2424 : return null.
2425 :
2426 : This is different from gen_lowpart* in these respects:
2427 :
2428 : - the returned value must always be considered an rvalue
2429 :
2430 : - when MODE is wider than SRC_MODE, the extraction involves
2431 : a zero extension
2432 :
2433 : - when MODE is smaller than SRC_MODE, the extraction involves
2434 : a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2435 :
2436 : In other words, this routine performs a computation, whereas the
2437 : gen_lowpart* routines are conceptually lvalue or rvalue subreg
2438 : operations. */
2439 :
2440 : rtx
2441 115060 : extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2442 : {
2443 115060 : scalar_int_mode int_mode, src_int_mode;
2444 :
2445 115060 : if (mode == src_mode)
2446 : return src;
2447 :
2448 79012 : if (CONSTANT_P (src))
2449 : {
2450 : /* simplify_gen_subreg can't be used here, as if simplify_subreg
2451 : fails, it will happily create (subreg (symbol_ref)) or similar
2452 : invalid SUBREGs. */
2453 15246 : poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2454 15246 : rtx ret = simplify_subreg (mode, src, src_mode, byte);
2455 15246 : if (ret)
2456 : return ret;
2457 :
2458 22 : if (GET_MODE (src) == VOIDmode
2459 22 : || !validate_subreg (mode, src_mode, src, byte))
2460 5 : return NULL_RTX;
2461 :
2462 17 : src = force_reg (GET_MODE (src), src);
2463 17 : return gen_rtx_SUBREG (mode, src, byte);
2464 : }
2465 :
2466 63766 : if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2467 : return NULL_RTX;
2468 :
2469 127532 : if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2470 63766 : && targetm.modes_tieable_p (mode, src_mode))
2471 : {
2472 3954 : rtx x = gen_lowpart_common (mode, src);
2473 3954 : if (x)
2474 : return x;
2475 : }
2476 :
2477 59823 : if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2478 59810 : || !int_mode_for_mode (mode).exists (&int_mode))
2479 13 : return NULL_RTX;
2480 :
2481 59810 : if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2482 : return NULL_RTX;
2483 58729 : if (!targetm.modes_tieable_p (int_mode, mode))
2484 : return NULL_RTX;
2485 :
2486 56671 : src = gen_lowpart (src_int_mode, src);
2487 56671 : if (!validate_subreg (int_mode, src_int_mode, src,
2488 : subreg_lowpart_offset (int_mode, src_int_mode)))
2489 : return NULL_RTX;
2490 :
2491 56665 : src = convert_modes (int_mode, src_int_mode, src, true);
2492 56665 : src = gen_lowpart (mode, src);
2493 56665 : return src;
2494 : }
2495 :
2496 : /* Add INC into TARGET. */
2497 :
2498 : void
2499 1185 : expand_inc (rtx target, rtx inc)
2500 : {
2501 1185 : rtx value = expand_binop (GET_MODE (target), add_optab,
2502 : target, inc,
2503 : target, 0, OPTAB_LIB_WIDEN);
2504 1185 : if (value != target)
2505 61 : emit_move_insn (target, value);
2506 1185 : }
2507 :
2508 : /* Subtract DEC from TARGET. */
2509 :
2510 : void
2511 1220 : expand_dec (rtx target, rtx dec)
2512 : {
2513 1220 : rtx value = expand_binop (GET_MODE (target), sub_optab,
2514 : target, dec,
2515 : target, 0, OPTAB_LIB_WIDEN);
2516 1220 : if (value != target)
2517 0 : emit_move_insn (target, value);
2518 1220 : }
2519 :
2520 : /* Output a shift instruction for expression code CODE,
2521 : with SHIFTED being the rtx for the value to shift,
2522 : and AMOUNT the rtx for the amount to shift by.
2523 : Store the result in the rtx TARGET, if that is convenient.
2524 : If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2525 : Return the rtx for where the value is.
2526 : If that cannot be done, abort the compilation unless MAY_FAIL is true,
2527 : in which case 0 is returned. */
2528 :
2529 : static rtx
2530 1498653 : expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2531 : rtx amount, rtx target, int unsignedp, bool may_fail = false)
2532 : {
2533 1498653 : rtx op1, temp = 0;
2534 1498653 : int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2535 1498653 : int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2536 1498653 : optab lshift_optab = ashl_optab;
2537 1498653 : optab rshift_arith_optab = ashr_optab;
2538 1498653 : optab rshift_uns_optab = lshr_optab;
2539 1498653 : optab lrotate_optab = rotl_optab;
2540 1498653 : optab rrotate_optab = rotr_optab;
2541 1498653 : machine_mode op1_mode;
2542 1498653 : scalar_mode scalar_mode = GET_MODE_INNER (mode);
2543 1498653 : int attempt;
2544 1498653 : bool speed = optimize_insn_for_speed_p ();
2545 :
2546 1498653 : op1 = amount;
2547 1498653 : op1_mode = GET_MODE (op1);
2548 :
2549 : /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2550 : shift amount is a vector, use the vector/vector shift patterns. */
2551 1498653 : if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2552 : {
2553 1498653 : lshift_optab = vashl_optab;
2554 1498653 : rshift_arith_optab = vashr_optab;
2555 1498653 : rshift_uns_optab = vlshr_optab;
2556 1498653 : lrotate_optab = vrotl_optab;
2557 1498653 : rrotate_optab = vrotr_optab;
2558 : }
2559 :
2560 : /* Previously detected shift-counts computed by NEGATE_EXPR
2561 : and shifted in the other direction; but that does not work
2562 : on all machines. */
2563 :
2564 1498653 : if (SHIFT_COUNT_TRUNCATED)
2565 : {
2566 : if (CONST_INT_P (op1)
2567 : && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2568 : (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2569 : op1 = gen_int_shift_amount (mode,
2570 : (unsigned HOST_WIDE_INT) INTVAL (op1)
2571 : % GET_MODE_BITSIZE (scalar_mode));
2572 : else if (GET_CODE (op1) == SUBREG
2573 : && subreg_lowpart_p (op1)
2574 : && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2575 : && SCALAR_INT_MODE_P (GET_MODE (op1)))
2576 : op1 = SUBREG_REG (op1);
2577 : }
2578 :
2579 : /* Canonicalize rotates by constant amount. We may canonicalize
2580 : to reduce the immediate or if the ISA can rotate by constants
2581 : in only on direction. */
2582 1498653 : if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1))
2583 : {
2584 2990 : op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2585 2990 : - INTVAL (op1)));
2586 2990 : left = !left;
2587 2990 : code = left ? LROTATE_EXPR : RROTATE_EXPR;
2588 : }
2589 :
2590 : /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2591 : Note that this is not the case for bigger values. For instance a rotation
2592 : of 0x01020304 by 16 bits gives 0x03040102 which is different from
2593 : 0x04030201 (bswapsi). */
2594 1498653 : if (rotate
2595 7936 : && CONST_INT_P (op1)
2596 5083 : && INTVAL (op1) == BITS_PER_UNIT
2597 990 : && GET_MODE_SIZE (scalar_mode) == 2
2598 1499476 : && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2599 822 : return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2600 :
2601 1497831 : if (op1 == const0_rtx)
2602 : return shifted;
2603 :
2604 : /* Check whether its cheaper to implement a left shift by a constant
2605 : bit count by a sequence of additions. */
2606 1450724 : if (code == LSHIFT_EXPR
2607 878143 : && CONST_INT_P (op1)
2608 848533 : && INTVAL (op1) > 0
2609 848504 : && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2610 848504 : && INTVAL (op1) < MAX_BITS_PER_WORD
2611 844090 : && (shift_cost (speed, mode, INTVAL (op1))
2612 844090 : > INTVAL (op1) * add_cost (speed, mode))
2613 1453535 : && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2614 : {
2615 : int i;
2616 5800 : for (i = 0; i < INTVAL (op1); i++)
2617 : {
2618 2989 : temp = force_reg (mode, shifted);
2619 2989 : shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2620 : unsignedp, OPTAB_LIB_WIDEN);
2621 : }
2622 : return shifted;
2623 : }
2624 :
2625 2895860 : for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2626 : {
2627 1447981 : enum optab_methods methods;
2628 :
2629 1447981 : if (attempt == 0)
2630 : methods = OPTAB_DIRECT;
2631 68 : else if (attempt == 1)
2632 : methods = OPTAB_WIDEN;
2633 : else
2634 34 : methods = OPTAB_LIB_WIDEN;
2635 :
2636 1447981 : if (rotate)
2637 : {
2638 : /* Widening does not work for rotation. */
2639 7182 : if (methods == OPTAB_WIDEN)
2640 34 : continue;
2641 7148 : else if (methods == OPTAB_LIB_WIDEN)
2642 : {
2643 : /* If we have been unable to open-code this by a rotation,
2644 : do it as the IOR or PLUS of two shifts. I.e., to rotate
2645 : A by N bits, compute
2646 : (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2647 : where C is the bitsize of A. If N cannot be zero,
2648 : use PLUS instead of IOR.
2649 :
2650 : It is theoretically possible that the target machine might
2651 : not be able to perform either shift and hence we would
2652 : be making two libcalls rather than just the one for the
2653 : shift (similarly if IOR could not be done). We will allow
2654 : this extremely unlikely lossage to avoid complicating the
2655 : code below. */
2656 :
2657 34 : rtx subtarget = target == shifted ? 0 : target;
2658 34 : rtx new_amount, other_amount;
2659 34 : rtx temp1;
2660 :
2661 34 : new_amount = op1;
2662 34 : if (op1 == const0_rtx)
2663 : return shifted;
2664 34 : else if (CONST_INT_P (op1))
2665 23 : other_amount = gen_int_shift_amount
2666 23 : (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2667 : else
2668 : {
2669 11 : other_amount
2670 22 : = simplify_gen_unary (NEG, GET_MODE (op1),
2671 11 : op1, GET_MODE (op1));
2672 11 : HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2673 11 : other_amount
2674 11 : = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2675 11 : gen_int_mode (mask, GET_MODE (op1)));
2676 : }
2677 :
2678 34 : shifted = force_reg (mode, shifted);
2679 :
2680 45 : temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2681 : mode, shifted, new_amount, 0, 1);
2682 45 : temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2683 : mode, shifted, other_amount,
2684 : subtarget, 1);
2685 34 : return expand_binop (mode,
2686 34 : CONST_INT_P (op1) ? add_optab : ior_optab,
2687 34 : temp, temp1, target, unsignedp, methods);
2688 : }
2689 :
2690 10479 : temp = expand_binop (mode,
2691 : left ? lrotate_optab : rrotate_optab,
2692 : shifted, op1, target, unsignedp, methods);
2693 : }
2694 1440799 : else if (unsignedp)
2695 1182018 : temp = expand_binop (mode,
2696 : left ? lshift_optab : rshift_uns_optab,
2697 : shifted, op1, target, unsignedp, methods);
2698 :
2699 : /* Do arithmetic shifts.
2700 : Also, if we are going to widen the operand, we can just as well
2701 : use an arithmetic right-shift instead of a logical one. */
2702 1447913 : if (temp == 0 && ! rotate
2703 583573 : && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2704 : {
2705 : enum optab_methods methods1 = methods;
2706 :
2707 : /* If trying to widen a log shift to an arithmetic shift,
2708 : don't accept an arithmetic shift of the same size. */
2709 : if (unsignedp)
2710 : methods1 = OPTAB_MUST_WIDEN;
2711 :
2712 : /* Arithmetic shift */
2713 :
2714 824248 : temp = expand_binop (mode,
2715 : left ? lshift_optab : rshift_arith_optab,
2716 : shifted, op1, target, unsignedp, methods1);
2717 : }
2718 :
2719 : /* We used to try extzv here for logical right shifts, but that was
2720 : only useful for one machine, the VAX, and caused poor code
2721 : generation there for lshrdi3, so the code was deleted and a
2722 : define_expand for lshrsi3 was added to vax.md. */
2723 : }
2724 :
2725 1447879 : gcc_assert (temp != NULL_RTX || may_fail);
2726 : return temp;
2727 : }
2728 :
2729 : /* Output a shift instruction for expression code CODE,
2730 : with SHIFTED being the rtx for the value to shift,
2731 : and AMOUNT the amount to shift by.
2732 : Store the result in the rtx TARGET, if that is convenient.
2733 : If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2734 : Return the rtx for where the value is. */
2735 :
2736 : rtx
2737 1212642 : expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2738 : poly_int64 amount, rtx target, int unsignedp)
2739 : {
2740 1212642 : return expand_shift_1 (code, mode, shifted,
2741 : gen_int_shift_amount (mode, amount),
2742 1212642 : target, unsignedp);
2743 : }
2744 :
2745 : /* Likewise, but return 0 if that cannot be done. */
2746 :
2747 : rtx
2748 314 : maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2749 : int amount, rtx target, int unsignedp)
2750 : {
2751 314 : return expand_shift_1 (code, mode,
2752 314 : shifted, GEN_INT (amount), target, unsignedp, true);
2753 : }
2754 :
2755 : /* Output a shift instruction for expression code CODE,
2756 : with SHIFTED being the rtx for the value to shift,
2757 : and AMOUNT the tree for the amount to shift by.
2758 : Store the result in the rtx TARGET, if that is convenient.
2759 : If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2760 : Return the rtx for where the value is. */
2761 :
2762 : rtx
2763 285629 : expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2764 : tree amount, rtx target, int unsignedp)
2765 : {
2766 285629 : return expand_shift_1 (code, mode,
2767 285629 : shifted, expand_normal (amount), target, unsignedp);
2768 : }
2769 :
2770 :
2771 : static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2772 : const struct mult_cost *, machine_mode mode);
2773 : static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2774 : const struct algorithm *, enum mult_variant);
2775 : static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2776 : static rtx extract_high_half (scalar_int_mode, rtx);
2777 : static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2778 :
2779 : /* Compute and return the best algorithm for multiplying by T.
2780 : The algorithm must cost less than cost_limit
2781 : If retval.cost >= COST_LIMIT, no algorithm was found and all
2782 : other field of the returned struct are undefined.
2783 : MODE is the machine mode of the multiplication. */
2784 :
2785 : static void
2786 36753577 : synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2787 : const struct mult_cost *cost_limit, machine_mode mode)
2788 : {
2789 36753577 : int m;
2790 36753577 : struct algorithm *alg_in, *best_alg;
2791 36753577 : struct mult_cost best_cost;
2792 36753577 : struct mult_cost new_limit;
2793 36753577 : int op_cost, op_latency;
2794 36753577 : unsigned HOST_WIDE_INT orig_t = t;
2795 36753577 : unsigned HOST_WIDE_INT q;
2796 36753577 : int maxm, hash_index;
2797 36753577 : bool cache_hit = false;
2798 36753577 : enum alg_code cache_alg = alg_zero;
2799 36753577 : bool speed = optimize_insn_for_speed_p ();
2800 36753577 : scalar_int_mode imode;
2801 36753577 : struct alg_hash_entry *entry_ptr;
2802 :
2803 : /* Indicate that no algorithm is yet found. If no algorithm
2804 : is found, this value will be returned and indicate failure. */
2805 36753577 : alg_out->cost.cost = cost_limit->cost + 1;
2806 36753577 : alg_out->cost.latency = cost_limit->latency + 1;
2807 :
2808 36753577 : if (cost_limit->cost < 0
2809 30447385 : || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2810 28879093 : return;
2811 :
2812 : /* Be prepared for vector modes. */
2813 50390276 : imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2814 :
2815 75011773 : maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2816 :
2817 : /* Restrict the bits of "t" to the multiplication's mode. */
2818 25195138 : t &= GET_MODE_MASK (imode);
2819 :
2820 : /* t == 1 can be done in zero cost. */
2821 25195138 : if (t == 1)
2822 : {
2823 6376938 : alg_out->ops = 1;
2824 6376938 : alg_out->cost.cost = 0;
2825 6376938 : alg_out->cost.latency = 0;
2826 6376938 : alg_out->op[0] = alg_m;
2827 6376938 : return;
2828 : }
2829 :
2830 : /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2831 : fail now. */
2832 18818200 : if (t == 0)
2833 : {
2834 577268 : if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2835 : return;
2836 : else
2837 : {
2838 577268 : alg_out->ops = 1;
2839 577268 : alg_out->cost.cost = zero_cost (speed);
2840 577268 : alg_out->cost.latency = zero_cost (speed);
2841 577268 : alg_out->op[0] = alg_zero;
2842 577268 : return;
2843 : }
2844 : }
2845 :
2846 : /* We'll be needing a couple extra algorithm structures now. */
2847 :
2848 18240932 : alg_in = XALLOCA (struct algorithm);
2849 18240932 : best_alg = XALLOCA (struct algorithm);
2850 18240932 : best_cost = *cost_limit;
2851 :
2852 : /* Compute the hash index. */
2853 18240932 : hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2854 :
2855 : /* See if we already know what to do for T. */
2856 18240932 : entry_ptr = alg_hash_entry_ptr (hash_index);
2857 18240932 : if (entry_ptr->t == t
2858 15235765 : && entry_ptr->mode == mode
2859 15235765 : && entry_ptr->speed == speed
2860 15235765 : && entry_ptr->alg != alg_unknown)
2861 : {
2862 15235765 : cache_alg = entry_ptr->alg;
2863 :
2864 15235765 : if (cache_alg == alg_impossible)
2865 : {
2866 : /* The cache tells us that it's impossible to synthesize
2867 : multiplication by T within entry_ptr->cost. */
2868 6945622 : if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2869 : /* COST_LIMIT is at least as restrictive as the one
2870 : recorded in the hash table, in which case we have no
2871 : hope of synthesizing a multiplication. Just
2872 : return. */
2873 : return;
2874 :
2875 : /* If we get here, COST_LIMIT is less restrictive than the
2876 : one recorded in the hash table, so we may be able to
2877 : synthesize a multiplication. Proceed as if we didn't
2878 : have the cache entry. */
2879 : }
2880 : else
2881 : {
2882 8290143 : if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2883 : /* The cached algorithm shows that this multiplication
2884 : requires more cost than COST_LIMIT. Just return. This
2885 : way, we don't clobber this cache entry with
2886 : alg_impossible but retain useful information. */
2887 : return;
2888 :
2889 7492255 : cache_hit = true;
2890 :
2891 7492255 : switch (cache_alg)
2892 : {
2893 4964273 : case alg_shift:
2894 4964273 : goto do_alg_shift;
2895 :
2896 992947 : case alg_add_t_m2:
2897 992947 : case alg_sub_t_m2:
2898 992947 : goto do_alg_addsub_t_m2;
2899 :
2900 119409 : case alg_add_factor:
2901 119409 : case alg_sub_factor:
2902 119409 : goto do_alg_addsub_factor;
2903 :
2904 1415618 : case alg_add_t2_m:
2905 1415618 : goto do_alg_add_t2_m;
2906 :
2907 8 : case alg_sub_t2_m:
2908 8 : goto do_alg_sub_t2_m;
2909 :
2910 0 : default:
2911 0 : gcc_unreachable ();
2912 : }
2913 : }
2914 : }
2915 :
2916 : /* If we have a group of zero bits at the low-order part of T, try
2917 : multiplying by the remaining bits and then doing a shift. */
2918 :
2919 3878727 : if ((t & 1) == 0)
2920 : {
2921 1978165 : do_alg_shift:
2922 6942438 : m = ctz_or_zero (t); /* m = number of low zero bits */
2923 6942438 : if (m < maxm)
2924 : {
2925 6941452 : q = t >> m;
2926 : /* The function expand_shift will choose between a shift and
2927 : a sequence of additions, so the observed cost is given as
2928 : MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2929 6941452 : op_cost = m * add_cost (speed, mode);
2930 6941452 : if (shift_cost (speed, mode, m) < op_cost)
2931 : op_cost = shift_cost (speed, mode, m);
2932 6941452 : new_limit.cost = best_cost.cost - op_cost;
2933 6941452 : new_limit.latency = best_cost.latency - op_cost;
2934 6941452 : synth_mult (alg_in, q, &new_limit, mode);
2935 :
2936 6941452 : alg_in->cost.cost += op_cost;
2937 6941452 : alg_in->cost.latency += op_cost;
2938 6941452 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2939 : {
2940 4539617 : best_cost = alg_in->cost;
2941 4539617 : std::swap (alg_in, best_alg);
2942 4539617 : best_alg->log[best_alg->ops] = m;
2943 4539617 : best_alg->op[best_alg->ops] = alg_shift;
2944 : }
2945 :
2946 : /* See if treating ORIG_T as a signed number yields a better
2947 : sequence. Try this sequence only for a negative ORIG_T
2948 : as it would be useless for a non-negative ORIG_T. */
2949 6941452 : if ((HOST_WIDE_INT) orig_t < 0)
2950 : {
2951 : /* Shift ORIG_T as follows because a right shift of a
2952 : negative-valued signed type is implementation
2953 : defined. */
2954 676388 : q = ~(~orig_t >> m);
2955 : /* The function expand_shift will choose between a shift
2956 : and a sequence of additions, so the observed cost is
2957 : given as MIN (m * add_cost(speed, mode),
2958 : shift_cost(speed, mode, m)). */
2959 676388 : op_cost = m * add_cost (speed, mode);
2960 676388 : if (shift_cost (speed, mode, m) < op_cost)
2961 : op_cost = shift_cost (speed, mode, m);
2962 676388 : new_limit.cost = best_cost.cost - op_cost;
2963 676388 : new_limit.latency = best_cost.latency - op_cost;
2964 676388 : synth_mult (alg_in, q, &new_limit, mode);
2965 :
2966 676388 : alg_in->cost.cost += op_cost;
2967 676388 : alg_in->cost.latency += op_cost;
2968 676388 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2969 : {
2970 646067 : best_cost = alg_in->cost;
2971 646067 : std::swap (alg_in, best_alg);
2972 646067 : best_alg->log[best_alg->ops] = m;
2973 646067 : best_alg->op[best_alg->ops] = alg_shift;
2974 : }
2975 : }
2976 : }
2977 986 : else if (2 * BITS_PER_WORD <= HOST_BITS_PER_WIDE_INT
2978 986 : && GET_MODE_BITSIZE (imode) == 2 * BITS_PER_WORD
2979 986 : && m >= BITS_PER_WORD
2980 1972 : && imode == mode)
2981 : {
2982 986 : q = t >> m;
2983 986 : int op1_cost = shift_cost (speed, mode, m - BITS_PER_WORD);
2984 986 : int op2_cost = zero_cost (speed);
2985 986 : op_latency = MAX (op1_cost, op2_cost);
2986 986 : op_cost = op1_cost + op2_cost;
2987 :
2988 986 : new_limit.cost = best_cost.cost - op_cost;
2989 986 : new_limit.latency = best_cost.latency - op_latency;
2990 986 : synth_mult (alg_in, q, &new_limit, mode);
2991 986 : alg_in->cost.cost += op_cost;
2992 986 : alg_in->cost.latency += op_latency;
2993 986 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2994 : {
2995 912 : best_cost = alg_in->cost;
2996 912 : std::swap (alg_in, best_alg);
2997 912 : best_alg->log[best_alg->ops] = m;
2998 912 : best_alg->op[best_alg->ops] = alg_shift;
2999 : }
3000 : }
3001 6942438 : if (cache_hit)
3002 4964273 : goto done;
3003 : }
3004 :
3005 : /* If we have an odd number, add or subtract one. */
3006 1978165 : if ((t & 1) != 0)
3007 : {
3008 2893509 : unsigned HOST_WIDE_INT w;
3009 :
3010 0 : do_alg_addsub_t_m2:
3011 42423953 : for (w = 1; (w & t) != 0; w <<= 1)
3012 : ;
3013 : /* If T was -1, then W will be zero after the loop. This is another
3014 : case where T ends with ...111. Handling this with (T + 1) and
3015 : subtract 1 produces slightly better code and results in algorithm
3016 : selection much faster than treating it like the ...0111 case
3017 : below. */
3018 2893509 : if (w == 0
3019 2454331 : || (w > 2
3020 : /* Reject the case where t is 3.
3021 : Thus we prefer addition in that case. */
3022 2454331 : && t != 3))
3023 : {
3024 : /* T ends with ...111. Multiply by (T + 1) and subtract T. */
3025 :
3026 1639965 : op_cost = add_cost (speed, mode);
3027 1639965 : new_limit.cost = best_cost.cost - op_cost;
3028 1639965 : new_limit.latency = best_cost.latency - op_cost;
3029 1639965 : synth_mult (alg_in, t + 1, &new_limit, mode);
3030 :
3031 1639965 : alg_in->cost.cost += op_cost;
3032 1639965 : alg_in->cost.latency += op_cost;
3033 1639965 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3034 : {
3035 757385 : best_cost = alg_in->cost;
3036 757385 : std::swap (alg_in, best_alg);
3037 757385 : best_alg->log[best_alg->ops] = 0;
3038 757385 : best_alg->op[best_alg->ops] = alg_sub_t_m2;
3039 : }
3040 : }
3041 : else
3042 : {
3043 : /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
3044 :
3045 1253544 : op_cost = add_cost (speed, mode);
3046 1253544 : new_limit.cost = best_cost.cost - op_cost;
3047 1253544 : new_limit.latency = best_cost.latency - op_cost;
3048 1253544 : synth_mult (alg_in, t - 1, &new_limit, mode);
3049 :
3050 1253544 : alg_in->cost.cost += op_cost;
3051 1253544 : alg_in->cost.latency += op_cost;
3052 1253544 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3053 : {
3054 185699 : best_cost = alg_in->cost;
3055 185699 : std::swap (alg_in, best_alg);
3056 185699 : best_alg->log[best_alg->ops] = 0;
3057 185699 : best_alg->op[best_alg->ops] = alg_add_t_m2;
3058 : }
3059 : }
3060 :
3061 : /* We may be able to calculate a * -7, a * -15, a * -31, etc
3062 : quickly with a - a * n for some appropriate constant n. */
3063 2893509 : m = exact_log2 (-orig_t + 1);
3064 2893509 : if (m >= 0 && m < maxm)
3065 : {
3066 793507 : op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3067 : /* If the target has a cheap shift-and-subtract insn use
3068 : that in preference to a shift insn followed by a sub insn.
3069 : Assume that the shift-and-sub is "atomic" with a latency
3070 : equal to it's cost, otherwise assume that on superscalar
3071 : hardware the shift may be executed concurrently with the
3072 : earlier steps in the algorithm. */
3073 793507 : if (shiftsub1_cost (speed, mode, m) <= op_cost)
3074 : {
3075 : op_cost = shiftsub1_cost (speed, mode, m);
3076 : op_latency = op_cost;
3077 : }
3078 : else
3079 787739 : op_latency = add_cost (speed, mode);
3080 :
3081 793507 : new_limit.cost = best_cost.cost - op_cost;
3082 793507 : new_limit.latency = best_cost.latency - op_latency;
3083 793507 : synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3084 : &new_limit, mode);
3085 :
3086 793507 : alg_in->cost.cost += op_cost;
3087 793507 : alg_in->cost.latency += op_latency;
3088 793507 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3089 : {
3090 200679 : best_cost = alg_in->cost;
3091 200679 : std::swap (alg_in, best_alg);
3092 200679 : best_alg->log[best_alg->ops] = m;
3093 200679 : best_alg->op[best_alg->ops] = alg_sub_t_m2;
3094 : }
3095 : }
3096 :
3097 2893509 : if (cache_hit)
3098 992947 : goto done;
3099 : }
3100 :
3101 : /* Look for factors of t of the form
3102 : t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3103 : If we find such a factor, we can multiply by t using an algorithm that
3104 : multiplies by q, shift the result by m and add/subtract it to itself.
3105 :
3106 : We search for large factors first and loop down, even if large factors
3107 : are less probable than small; if we find a large factor we will find a
3108 : good sequence quickly, and therefore be able to prune (by decreasing
3109 : COST_LIMIT) the search. */
3110 :
3111 1978165 : do_alg_addsub_factor:
3112 74214674 : for (m = floor_log2 (t - 1); m >= 2; m--)
3113 : {
3114 72268791 : unsigned HOST_WIDE_INT d;
3115 :
3116 72268791 : d = (HOST_WIDE_INT_1U << m) + 1;
3117 72268791 : if (t % d == 0 && t > d && m < maxm
3118 980796 : && (!cache_hit || cache_alg == alg_add_factor))
3119 : {
3120 980796 : op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3121 980796 : if (shiftadd_cost (speed, mode, m) <= op_cost)
3122 : op_cost = shiftadd_cost (speed, mode, m);
3123 :
3124 980796 : op_latency = op_cost;
3125 :
3126 :
3127 980796 : new_limit.cost = best_cost.cost - op_cost;
3128 980796 : new_limit.latency = best_cost.latency - op_latency;
3129 980796 : synth_mult (alg_in, t / d, &new_limit, mode);
3130 :
3131 980796 : alg_in->cost.cost += op_cost;
3132 980796 : alg_in->cost.latency += op_latency;
3133 980796 : if (alg_in->cost.latency < op_cost)
3134 205764 : alg_in->cost.latency = op_cost;
3135 980796 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3136 : {
3137 118402 : best_cost = alg_in->cost;
3138 118402 : std::swap (alg_in, best_alg);
3139 118402 : best_alg->log[best_alg->ops] = m;
3140 118402 : best_alg->op[best_alg->ops] = alg_add_factor;
3141 : }
3142 : /* Other factors will have been taken care of in the recursion. */
3143 : break;
3144 : }
3145 :
3146 71287995 : d = (HOST_WIDE_INT_1U << m) - 1;
3147 71287995 : if (t % d == 0 && t > d && m < maxm
3148 1071457 : && (!cache_hit || cache_alg == alg_sub_factor))
3149 : {
3150 1071457 : op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3151 1071457 : if (shiftsub0_cost (speed, mode, m) <= op_cost)
3152 : op_cost = shiftsub0_cost (speed, mode, m);
3153 :
3154 1071457 : op_latency = op_cost;
3155 :
3156 1071457 : new_limit.cost = best_cost.cost - op_cost;
3157 1071457 : new_limit.latency = best_cost.latency - op_latency;
3158 1071457 : synth_mult (alg_in, t / d, &new_limit, mode);
3159 :
3160 1071457 : alg_in->cost.cost += op_cost;
3161 1071457 : alg_in->cost.latency += op_latency;
3162 1071457 : if (alg_in->cost.latency < op_cost)
3163 286108 : alg_in->cost.latency = op_cost;
3164 1071457 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3165 : {
3166 32851 : best_cost = alg_in->cost;
3167 32851 : std::swap (alg_in, best_alg);
3168 32851 : best_alg->log[best_alg->ops] = m;
3169 32851 : best_alg->op[best_alg->ops] = alg_sub_factor;
3170 : }
3171 : break;
3172 : }
3173 : }
3174 3998136 : if (cache_hit)
3175 119409 : goto done;
3176 :
3177 : /* Try shift-and-add (load effective address) instructions,
3178 : i.e. do a*3, a*5, a*9. */
3179 3878727 : if ((t & 1) != 0)
3180 : {
3181 1900562 : do_alg_add_t2_m:
3182 3316180 : q = t - 1;
3183 3316180 : m = ctz_hwi (q);
3184 3316180 : if (q && m < maxm)
3185 : {
3186 3316164 : op_cost = shiftadd_cost (speed, mode, m);
3187 3316164 : new_limit.cost = best_cost.cost - op_cost;
3188 3316164 : new_limit.latency = best_cost.latency - op_cost;
3189 3316164 : synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3190 :
3191 3316164 : alg_in->cost.cost += op_cost;
3192 3316164 : alg_in->cost.latency += op_cost;
3193 3316164 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3194 : {
3195 1471769 : best_cost = alg_in->cost;
3196 1471769 : std::swap (alg_in, best_alg);
3197 1471769 : best_alg->log[best_alg->ops] = m;
3198 1471769 : best_alg->op[best_alg->ops] = alg_add_t2_m;
3199 : }
3200 : }
3201 3316180 : if (cache_hit)
3202 1415618 : goto done;
3203 :
3204 1900562 : do_alg_sub_t2_m:
3205 1900570 : q = t + 1;
3206 1900570 : m = ctz_hwi (q);
3207 1900570 : if (q && m < maxm)
3208 : {
3209 1878672 : op_cost = shiftsub0_cost (speed, mode, m);
3210 1878672 : new_limit.cost = best_cost.cost - op_cost;
3211 1878672 : new_limit.latency = best_cost.latency - op_cost;
3212 1878672 : synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3213 :
3214 1878672 : alg_in->cost.cost += op_cost;
3215 1878672 : alg_in->cost.latency += op_cost;
3216 1878672 : if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3217 : {
3218 63 : best_cost = alg_in->cost;
3219 63 : std::swap (alg_in, best_alg);
3220 63 : best_alg->log[best_alg->ops] = m;
3221 63 : best_alg->op[best_alg->ops] = alg_sub_t2_m;
3222 : }
3223 : }
3224 1900570 : if (cache_hit)
3225 : goto done;
3226 : }
3227 :
3228 1978165 : done:
3229 : /* If best_cost has not decreased, we have not found any algorithm. */
3230 11370982 : if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3231 : {
3232 : /* We failed to find an algorithm. Record alg_impossible for
3233 : this case (that is, <T, MODE, COST_LIMIT>) so that next time
3234 : we are asked to find an algorithm for T within the same or
3235 : lower COST_LIMIT, we can immediately return to the
3236 : caller. */
3237 3496498 : entry_ptr->t = t;
3238 3496498 : entry_ptr->mode = mode;
3239 3496498 : entry_ptr->speed = speed;
3240 3496498 : entry_ptr->alg = alg_impossible;
3241 3496498 : entry_ptr->cost = *cost_limit;
3242 3496498 : return;
3243 : }
3244 :
3245 : /* Cache the result. */
3246 7874484 : if (!cache_hit)
3247 : {
3248 704290 : entry_ptr->t = t;
3249 704290 : entry_ptr->mode = mode;
3250 704290 : entry_ptr->speed = speed;
3251 704290 : entry_ptr->alg = best_alg->op[best_alg->ops];
3252 704290 : entry_ptr->cost.cost = best_cost.cost;
3253 704290 : entry_ptr->cost.latency = best_cost.latency;
3254 : }
3255 :
3256 : /* If we are getting a too long sequence for `struct algorithm'
3257 : to record, make this search fail. */
3258 7874484 : if (best_alg->ops == MAX_BITS_PER_WORD)
3259 : return;
3260 :
3261 : /* Copy the algorithm from temporary space to the space at alg_out.
3262 : We avoid using structure assignment because the majority of
3263 : best_alg is normally undefined, and this is a critical function. */
3264 7874484 : alg_out->ops = best_alg->ops + 1;
3265 7874484 : alg_out->cost = best_cost;
3266 7874484 : memcpy (alg_out->op, best_alg->op,
3267 7874484 : alg_out->ops * sizeof *alg_out->op);
3268 7874484 : memcpy (alg_out->log, best_alg->log,
3269 : alg_out->ops * sizeof *alg_out->log);
3270 : }
3271 :
3272 : /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3273 : Try three variations:
3274 :
3275 : - a shift/add sequence based on VAL itself
3276 : - a shift/add sequence based on -VAL, followed by a negation
3277 : - a shift/add sequence based on VAL - 1, followed by an addition.
3278 :
3279 : Return true if the cheapest of these cost less than MULT_COST,
3280 : describing the algorithm in *ALG and final fixup in *VARIANT. */
3281 :
3282 : bool
3283 7708923 : choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3284 : struct algorithm *alg, enum mult_variant *variant,
3285 : int mult_cost)
3286 : {
3287 7708923 : struct algorithm alg2;
3288 7708923 : struct mult_cost limit;
3289 7708923 : int op_cost;
3290 7708923 : bool speed = optimize_insn_for_speed_p ();
3291 :
3292 : /* Fail quickly for impossible bounds. */
3293 7708923 : if (mult_cost < 0)
3294 : return false;
3295 :
3296 : /* Ensure that mult_cost provides a reasonable upper bound.
3297 : Any constant multiplication can be performed with less
3298 : than 2 * bits additions. */
3299 15414684 : op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3300 7707342 : if (mult_cost > op_cost)
3301 : mult_cost = op_cost;
3302 :
3303 7707342 : *variant = basic_variant;
3304 7707342 : limit.cost = mult_cost;
3305 7707342 : limit.latency = mult_cost;
3306 7707342 : synth_mult (alg, val, &limit, mode);
3307 :
3308 : /* This works only if the inverted value actually fits in an
3309 : `unsigned int' */
3310 15414684 : if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3311 : {
3312 2785962 : op_cost = neg_cost (speed, mode);
3313 2785962 : if (MULT_COST_LESS (&alg->cost, mult_cost))
3314 : {
3315 2678921 : limit.cost = alg->cost.cost - op_cost;
3316 2678921 : limit.latency = alg->cost.latency - op_cost;
3317 : }
3318 : else
3319 : {
3320 107041 : limit.cost = mult_cost - op_cost;
3321 107041 : limit.latency = mult_cost - op_cost;
3322 : }
3323 :
3324 2785962 : synth_mult (&alg2, -val, &limit, mode);
3325 2785962 : alg2.cost.cost += op_cost;
3326 2785962 : alg2.cost.latency += op_cost;
3327 2785962 : if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3328 13806 : *alg = alg2, *variant = negate_variant;
3329 : }
3330 :
3331 : /* This proves very useful for division-by-constant. */
3332 7707342 : op_cost = add_cost (speed, mode);
3333 7707342 : if (MULT_COST_LESS (&alg->cost, mult_cost))
3334 : {
3335 6871179 : limit.cost = alg->cost.cost - op_cost;
3336 6871179 : limit.latency = alg->cost.latency - op_cost;
3337 : }
3338 : else
3339 : {
3340 836163 : limit.cost = mult_cost - op_cost;
3341 836163 : limit.latency = mult_cost - op_cost;
3342 : }
3343 :
3344 7707342 : if (val != HOST_WIDE_INT_MIN
3345 7707352 : || GET_MODE_UNIT_PRECISION (mode) == HOST_BITS_PER_WIDE_INT)
3346 : {
3347 7707342 : synth_mult (&alg2, val - HOST_WIDE_INT_1U, &limit, mode);
3348 7707342 : alg2.cost.cost += op_cost;
3349 7707342 : alg2.cost.latency += op_cost;
3350 7707342 : if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3351 2738 : *alg = alg2, *variant = add_variant;
3352 : }
3353 :
3354 7707342 : return MULT_COST_LESS (&alg->cost, mult_cost);
3355 : }
3356 :
3357 : /* A subroutine of expand_mult, used for constant multiplications.
3358 : Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3359 : convenient. Use the shift/add sequence described by ALG and apply
3360 : the final fixup specified by VARIANT. */
3361 :
3362 : static rtx
3363 137358 : expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3364 : rtx target, const struct algorithm *alg,
3365 : enum mult_variant variant)
3366 : {
3367 137358 : unsigned HOST_WIDE_INT val_so_far;
3368 137358 : rtx_insn *insn;
3369 137358 : rtx accum, tem;
3370 137358 : int opno;
3371 137358 : machine_mode nmode;
3372 :
3373 : /* Avoid referencing memory over and over and invalid sharing
3374 : on SUBREGs. */
3375 137358 : op0 = force_reg (mode, op0);
3376 :
3377 : /* ACCUM starts out either as OP0 or as a zero, depending on
3378 : the first operation. */
3379 :
3380 137358 : if (alg->op[0] == alg_zero)
3381 : {
3382 5392 : accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3383 5392 : val_so_far = 0;
3384 : }
3385 131966 : else if (alg->op[0] == alg_m)
3386 : {
3387 131966 : accum = copy_to_mode_reg (mode, op0);
3388 131966 : val_so_far = 1;
3389 : }
3390 : else
3391 0 : gcc_unreachable ();
3392 :
3393 388544 : for (opno = 1; opno < alg->ops; opno++)
3394 : {
3395 251186 : int log = alg->log[opno];
3396 251186 : rtx shift_subtarget = optimize ? 0 : accum;
3397 235113 : rtx add_target
3398 137358 : = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3399 40163 : && !optimize)
3400 251186 : ? target : 0;
3401 251186 : rtx accum_target = optimize ? 0 : accum;
3402 251186 : rtx accum_inner;
3403 :
3404 251186 : switch (alg->op[opno])
3405 : {
3406 109465 : case alg_shift:
3407 109465 : tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3408 : /* REG_EQUAL note will be attached to the following insn. */
3409 109465 : emit_move_insn (accum, tem);
3410 109465 : val_so_far <<= log;
3411 109465 : break;
3412 :
3413 6072 : case alg_add_t_m2:
3414 6072 : tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3415 12144 : accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3416 : add_target ? add_target : accum_target);
3417 6072 : val_so_far += HOST_WIDE_INT_1U << log;
3418 6072 : break;
3419 :
3420 19289 : case alg_sub_t_m2:
3421 19289 : tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3422 38578 : accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3423 : add_target ? add_target : accum_target);
3424 19289 : val_so_far -= HOST_WIDE_INT_1U << log;
3425 19289 : break;
3426 :
3427 115229 : case alg_add_t2_m:
3428 115229 : accum = expand_shift (LSHIFT_EXPR, mode, accum,
3429 115229 : log, shift_subtarget, 0);
3430 230458 : accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3431 : add_target ? add_target : accum_target);
3432 115229 : val_so_far = (val_so_far << log) + 1;
3433 115229 : break;
3434 :
3435 0 : case alg_sub_t2_m:
3436 0 : accum = expand_shift (LSHIFT_EXPR, mode, accum,
3437 0 : log, shift_subtarget, 0);
3438 0 : accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3439 : add_target ? add_target : accum_target);
3440 0 : val_so_far = (val_so_far << log) - 1;
3441 0 : break;
3442 :
3443 1038 : case alg_add_factor:
3444 1038 : tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3445 2076 : accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3446 : add_target ? add_target : accum_target);
3447 1038 : val_so_far += val_so_far << log;
3448 1038 : break;
3449 :
3450 93 : case alg_sub_factor:
3451 93 : tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3452 186 : accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3453 : (add_target
3454 93 : ? add_target : (optimize ? 0 : tem)));
3455 93 : val_so_far = (val_so_far << log) - val_so_far;
3456 93 : break;
3457 :
3458 0 : default:
3459 0 : gcc_unreachable ();
3460 : }
3461 :
3462 251186 : if (SCALAR_INT_MODE_P (mode))
3463 : {
3464 : /* Write a REG_EQUAL note on the last insn so that we can cse
3465 : multiplication sequences. Note that if ACCUM is a SUBREG,
3466 : we've set the inner register and must properly indicate that. */
3467 244599 : tem = op0, nmode = mode;
3468 244599 : accum_inner = accum;
3469 244599 : if (GET_CODE (accum) == SUBREG)
3470 : {
3471 0 : accum_inner = SUBREG_REG (accum);
3472 0 : nmode = GET_MODE (accum_inner);
3473 0 : tem = gen_lowpart (nmode, op0);
3474 : }
3475 :
3476 : /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3477 : In that case, only the low bits of accum would be guaranteed to
3478 : be equal to the content of the REG_EQUAL note, the upper bits
3479 : can be anything. */
3480 244599 : if (!paradoxical_subreg_p (tem))
3481 : {
3482 244599 : insn = get_last_insn ();
3483 244599 : wide_int wval_so_far
3484 244599 : = wi::uhwi (val_so_far,
3485 244599 : GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3486 244599 : rtx c = immed_wide_int_const (wval_so_far, nmode);
3487 244599 : set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3488 : accum_inner);
3489 244599 : }
3490 : }
3491 : }
3492 :
3493 137358 : if (variant == negate_variant)
3494 : {
3495 483 : val_so_far = -val_so_far;
3496 483 : accum = expand_unop (mode, neg_optab, accum, target, 0);
3497 : }
3498 136875 : else if (variant == add_variant)
3499 : {
3500 25 : val_so_far = val_so_far + 1;
3501 25 : accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3502 : }
3503 :
3504 : /* Compare only the bits of val and val_so_far that are significant
3505 : in the result mode, to avoid sign-/zero-extension confusion. */
3506 137358 : nmode = GET_MODE_INNER (mode);
3507 137358 : val &= GET_MODE_MASK (nmode);
3508 137358 : val_so_far &= GET_MODE_MASK (nmode);
3509 137358 : gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3510 :
3511 137358 : return accum;
3512 : }
3513 :
3514 : /* Perform a multiplication and return an rtx for the result.
3515 : MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3516 : TARGET is a suggestion for where to store the result (an rtx).
3517 :
3518 : We check specially for a constant integer as OP1.
3519 : If you want this check for OP0 as well, then before calling
3520 : you should swap the two operands if OP0 would be constant. */
3521 :
3522 : rtx
3523 1087783 : expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3524 : int unsignedp, bool no_libcall)
3525 : {
3526 1087783 : enum mult_variant variant;
3527 1087783 : struct algorithm algorithm;
3528 1087783 : rtx scalar_op1;
3529 1087783 : int max_cost;
3530 1087783 : bool speed = optimize_insn_for_speed_p ();
3531 1087783 : bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3532 :
3533 1087783 : if (CONSTANT_P (op0))
3534 275 : std::swap (op0, op1);
3535 :
3536 : /* For vectors, there are several simplifications that can be made if
3537 : all elements of the vector constant are identical. */
3538 1087783 : scalar_op1 = unwrap_const_vec_duplicate (op1);
3539 :
3540 1087783 : if (INTEGRAL_MODE_P (mode))
3541 : {
3542 970802 : rtx fake_reg;
3543 970802 : HOST_WIDE_INT coeff;
3544 970802 : bool is_neg;
3545 970802 : int mode_bitsize;
3546 :
3547 970802 : if (op1 == CONST0_RTX (mode))
3548 : return op1;
3549 970802 : if (op1 == CONST1_RTX (mode))
3550 : return op0;
3551 927601 : if (op1 == CONSTM1_RTX (mode))
3552 2822 : return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3553 1411 : op0, target, 0);
3554 :
3555 926190 : if (do_trapv)
3556 32 : goto skip_synth;
3557 :
3558 : /* If mode is integer vector mode, check if the backend supports
3559 : vector lshift (by scalar or vector) at all. If not, we can't use
3560 : synthesized multiply. */
3561 926158 : if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3562 14330 : && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3563 938090 : && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3564 0 : goto skip_synth;
3565 :
3566 : /* These are the operations that are potentially turned into
3567 : a sequence of shifts and additions. */
3568 926158 : mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3569 :
3570 : /* synth_mult does an `unsigned int' multiply. As long as the mode is
3571 : less than or equal in size to `unsigned int' this doesn't matter.
3572 : If the mode is larger than `unsigned int', then synth_mult works
3573 : only if the constant value exactly fits in an `unsigned int' without
3574 : any truncation. This means that multiplying by negative values does
3575 : not work; results are off by 2^32 on a 32 bit machine. */
3576 926158 : if (CONST_INT_P (scalar_op1))
3577 : {
3578 691149 : coeff = INTVAL (scalar_op1);
3579 691149 : is_neg = coeff < 0;
3580 : }
3581 : #if TARGET_SUPPORTS_WIDE_INT
3582 235009 : else if (CONST_WIDE_INT_P (scalar_op1))
3583 : #else
3584 : else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3585 : #endif
3586 : {
3587 1140 : int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3588 : /* Perfect power of 2 (other than 1, which is handled above). */
3589 1140 : if (shift > 0)
3590 106 : return expand_shift (LSHIFT_EXPR, mode, op0,
3591 106 : shift, target, unsignedp);
3592 : else
3593 1034 : goto skip_synth;
3594 : }
3595 : else
3596 233869 : goto skip_synth;
3597 :
3598 : /* We used to test optimize here, on the grounds that it's better to
3599 : produce a smaller program when -O is not used. But this causes
3600 : such a terrible slowdown sometimes that it seems better to always
3601 : use synth_mult. */
3602 :
3603 : /* Special case powers of two. */
3604 691149 : if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3605 466399 : && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3606 466391 : return expand_shift (LSHIFT_EXPR, mode, op0,
3607 932782 : floor_log2 (coeff), target, unsignedp);
3608 :
3609 224758 : fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3610 :
3611 : /* Attempt to handle multiplication of DImode values by negative
3612 : coefficients, by performing the multiplication by a positive
3613 : multiplier and then inverting the result. */
3614 224758 : if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3615 : {
3616 : /* Its safe to use -coeff even for INT_MIN, as the
3617 : result is interpreted as an unsigned coefficient.
3618 : Exclude cost of op0 from max_cost to match the cost
3619 : calculation of the synth_mult. */
3620 216 : coeff = -(unsigned HOST_WIDE_INT) coeff;
3621 216 : max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3622 : mode, speed)
3623 216 : - neg_cost (speed, mode));
3624 216 : if (max_cost <= 0)
3625 0 : goto skip_synth;
3626 :
3627 : /* Special case powers of two. */
3628 216 : if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3629 : {
3630 342 : rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3631 171 : floor_log2 (coeff), target, unsignedp);
3632 171 : return expand_unop (mode, neg_optab, temp, target, 0);
3633 : }
3634 :
3635 45 : if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3636 : max_cost))
3637 : {
3638 44 : rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3639 : &algorithm, variant);
3640 44 : return expand_unop (mode, neg_optab, temp, target, 0);
3641 : }
3642 1 : goto skip_synth;
3643 : }
3644 :
3645 : /* Exclude cost of op0 from max_cost to match the cost
3646 : calculation of the synth_mult. */
3647 224542 : max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3648 224542 : if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3649 136924 : return expand_mult_const (mode, op0, coeff, target,
3650 136924 : &algorithm, variant);
3651 : }
3652 87618 : skip_synth:
3653 :
3654 : /* Expand x*2.0 as x+x. */
3655 37500 : if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3656 477035 : && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3657 : {
3658 5765 : op0 = force_reg (GET_MODE (op0), op0);
3659 11530 : return expand_binop (mode, add_optab, op0, op0,
3660 : target, unsignedp,
3661 5765 : no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3662 : }
3663 :
3664 : /* This used to use umul_optab if unsigned, but for non-widening multiply
3665 : there is no difference between signed and unsigned. */
3666 1301278 : op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3667 : op0, op1, target, unsignedp,
3668 : no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3669 433770 : gcc_assert (op0 || no_libcall);
3670 : return op0;
3671 : }
3672 :
3673 : /* Return a cost estimate for multiplying a register by the given
3674 : COEFFicient in the given MODE and SPEED. */
3675 :
3676 : int
3677 7157534 : mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3678 : {
3679 7157534 : int max_cost;
3680 7157534 : struct algorithm algorithm;
3681 7157534 : enum mult_variant variant;
3682 :
3683 7157534 : rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3684 7157534 : max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3685 : mode, speed);
3686 7157534 : if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3687 6413079 : return algorithm.cost.cost;
3688 : else
3689 : return max_cost;
3690 : }
3691 :
3692 : /* Perform a widening multiplication and return an rtx for the result.
3693 : MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3694 : TARGET is a suggestion for where to store the result (an rtx).
3695 : THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3696 : or smul_widen_optab.
3697 :
3698 : We check specially for a constant integer as OP1, comparing the
3699 : cost of a widening multiply against the cost of a sequence of shifts
3700 : and adds. */
3701 :
3702 : rtx
3703 17989 : expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3704 : int unsignedp, optab this_optab)
3705 : {
3706 17989 : bool speed = optimize_insn_for_speed_p ();
3707 17989 : rtx cop1;
3708 :
3709 17989 : if (CONST_INT_P (op1)
3710 4221 : && GET_MODE (op0) != VOIDmode
3711 4221 : && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3712 : this_optab == umul_widen_optab))
3713 4221 : && CONST_INT_P (cop1)
3714 21687 : && (INTVAL (cop1) >= 0
3715 20470 : || HWI_COMPUTABLE_MODE_P (mode)))
3716 : {
3717 3465 : HOST_WIDE_INT coeff = INTVAL (cop1);
3718 3465 : int max_cost;
3719 3465 : enum mult_variant variant;
3720 3465 : struct algorithm algorithm;
3721 :
3722 3465 : if (coeff == 0)
3723 984 : return CONST0_RTX (mode);
3724 :
3725 : /* Special case powers of two. */
3726 3357 : if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3727 : {
3728 499 : op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3729 499 : return expand_shift (LSHIFT_EXPR, mode, op0,
3730 499 : floor_log2 (coeff), target, unsignedp);
3731 : }
3732 :
3733 : /* Exclude cost of op0 from max_cost to match the cost
3734 : calculation of the synth_mult. */
3735 2858 : max_cost = mul_widen_cost (speed, mode);
3736 2858 : if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3737 : max_cost))
3738 : {
3739 377 : op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3740 377 : return expand_mult_const (mode, op0, coeff, target,
3741 377 : &algorithm, variant);
3742 : }
3743 : }
3744 17005 : return expand_binop (mode, this_optab, op0, op1, target,
3745 17005 : unsignedp, OPTAB_LIB_WIDEN);
3746 : }
3747 :
3748 : /* Choose a minimal N + 1 bit approximation to 2**K / D that can be used to
3749 : replace division by D, put the least significant N bits of the result in
3750 : *MULTIPLIER_PTR, the value K - N in *POST_SHIFT_PTR, and return the most
3751 : significant bit.
3752 :
3753 : The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3754 : needed precision is PRECISION (should be <= N).
3755 :
3756 : PRECISION should be as small as possible so this function can choose the
3757 : multiplier more freely. If PRECISION is <= N - 1, the most significant
3758 : bit returned by the function will be zero.
3759 :
3760 : Using this function, x / D is equal to (x*m) / 2**N >> (*POST_SHIFT_PTR),
3761 : where m is the full N + 1 bit multiplier. */
3762 :
3763 : unsigned HOST_WIDE_INT
3764 64157 : choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3765 : unsigned HOST_WIDE_INT *multiplier_ptr,
3766 : int *post_shift_ptr)
3767 : {
3768 64157 : int lgup, post_shift;
3769 64157 : int pow1, pow2;
3770 :
3771 : /* lgup = ceil(log2(d)) */
3772 : /* Assuming d > 1, we have d >= 2^(lgup-1) + 1 */
3773 64157 : lgup = ceil_log2 (d);
3774 :
3775 64157 : gcc_assert (lgup <= n);
3776 64157 : gcc_assert (lgup <= precision);
3777 :
3778 64157 : pow1 = n + lgup;
3779 64157 : pow2 = n + lgup - precision;
3780 :
3781 : /* mlow = 2^(n + lgup)/d */
3782 : /* Trivially from above we have mlow < 2^(n+1) */
3783 64157 : wide_int val = wi::set_bit_in_zero (pow1, HOST_BITS_PER_DOUBLE_INT);
3784 64157 : wide_int mlow = wi::udiv_trunc (val, d);
3785 :
3786 : /* mhigh = (2^(n + lgup) + 2^(n + lgup - precision))/d */
3787 : /* From above we have mhigh < 2^(n+1) assuming lgup <= precision */
3788 : /* From precision <= n, the difference between the numerators of mhigh and
3789 : mlow is >= 2^lgup >= d. Therefore the difference of the quotients in
3790 : the Euclidean division by d is at least 1, so we have mlow < mhigh and
3791 : the exact value of 2^(n + lgup)/d lies in the interval [mlow; mhigh). */
3792 64157 : val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3793 64157 : wide_int mhigh = wi::udiv_trunc (val, d);
3794 :
3795 : /* Reduce to lowest terms. */
3796 : /* If precision <= n - 1, then the difference between the numerators of
3797 : mhigh and mlow is >= 2^(lgup + 1) >= 2 * 2^lgup >= 2 * d. Therefore
3798 : the difference of the quotients in the Euclidean division by d is at
3799 : least 2, which means that mhigh and mlow differ by at least one bit
3800 : not in the last place. The conclusion is that the first iteration of
3801 : the loop below completes and shifts mhigh and mlow by 1 bit, which in
3802 : particular means that mhigh < 2^n, that is to say, the most significant
3803 : bit in the n + 1 bit value is zero. */
3804 170026 : for (post_shift = lgup; post_shift > 0; post_shift--)
3805 : {
3806 164554 : unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3807 : HOST_BITS_PER_WIDE_INT);
3808 164554 : unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3809 : HOST_BITS_PER_WIDE_INT);
3810 164554 : if (ml_lo >= mh_lo)
3811 : break;
3812 :
3813 105869 : mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3814 105869 : mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3815 : }
3816 :
3817 64157 : *post_shift_ptr = post_shift;
3818 :
3819 64157 : if (n < HOST_BITS_PER_WIDE_INT)
3820 : {
3821 41335 : unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3822 41335 : *multiplier_ptr = mhigh.to_uhwi () & mask;
3823 41335 : return mhigh.to_uhwi () > mask;
3824 : }
3825 : else
3826 : {
3827 22822 : *multiplier_ptr = mhigh.to_uhwi ();
3828 22822 : return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3829 : }
3830 64157 : }
3831 :
3832 : /* Compute the inverse of X mod 2**N, i.e., find Y such that X * Y is congruent
3833 : to 1 modulo 2**N, assuming that X is odd. Bézout's lemma guarantees that Y
3834 : exists for any given positive N. */
3835 :
3836 : static unsigned HOST_WIDE_INT
3837 49953 : invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3838 : {
3839 49953 : gcc_assert ((x & 1) == 1);
3840 :
3841 : /* The algorithm notes that the choice Y = Z satisfies X*Y == 1 mod 2^3,
3842 : since X is odd. Then each iteration doubles the number of bits of
3843 : significance in Y. */
3844 :
3845 51285 : const unsigned HOST_WIDE_INT mask
3846 : = (n == HOST_BITS_PER_WIDE_INT
3847 49953 : ? HOST_WIDE_INT_M1U
3848 1332 : : (HOST_WIDE_INT_1U << n) - 1);
3849 49953 : unsigned HOST_WIDE_INT y = x;
3850 49953 : int nbit = 3;
3851 :
3852 298356 : while (nbit < n)
3853 : {
3854 248403 : y = y * (2 - x*y) & mask; /* Modulo 2^N */
3855 248403 : nbit *= 2;
3856 : }
3857 :
3858 49953 : return y;
3859 : }
3860 :
3861 : /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3862 : flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3863 : product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3864 : to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3865 : become signed.
3866 :
3867 : The result is put in TARGET if that is convenient.
3868 :
3869 : MODE is the mode of operation. */
3870 :
3871 : rtx
3872 0 : expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3873 : rtx op1, rtx target, int unsignedp)
3874 : {
3875 0 : rtx tem;
3876 0 : enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3877 :
3878 0 : tem = expand_shift (RSHIFT_EXPR, mode, op0,
3879 0 : GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3880 0 : tem = expand_and (mode, tem, op1, NULL_RTX);
3881 0 : adj_operand
3882 0 : = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3883 : adj_operand);
3884 :
3885 0 : tem = expand_shift (RSHIFT_EXPR, mode, op1,
3886 0 : GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3887 0 : tem = expand_and (mode, tem, op0, NULL_RTX);
3888 0 : target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3889 : target);
3890 :
3891 0 : return target;
3892 : }
3893 :
3894 : /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3895 :
3896 : static rtx
3897 20088 : extract_high_half (scalar_int_mode mode, rtx op)
3898 : {
3899 20088 : if (mode == word_mode)
3900 0 : return gen_highpart (mode, op);
3901 :
3902 20088 : scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3903 :
3904 40176 : op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3905 20088 : GET_MODE_BITSIZE (mode), 0, 1);
3906 20088 : return convert_modes (mode, wider_mode, op, 0);
3907 : }
3908 :
3909 : /* Like expmed_mult_highpart, but only consider using multiplication optab. */
3910 :
3911 : rtx
3912 45360 : expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3913 : rtx target, int unsignedp, int max_cost)
3914 : {
3915 45360 : const scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3916 45360 : const bool speed = optimize_insn_for_speed_p ();
3917 45360 : const int size = GET_MODE_BITSIZE (mode);
3918 45360 : optab moptab;
3919 45360 : rtx tem;
3920 :
3921 : /* Firstly, try using a multiplication insn that only generates the needed
3922 : high part of the product, and in the sign flavor of unsignedp. */
3923 45360 : if (mul_highpart_cost (speed, mode) < max_cost)
3924 : {
3925 43379 : moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3926 43379 : tem = expand_binop (mode, moptab, op0, op1, target, unsignedp,
3927 : OPTAB_DIRECT);
3928 43379 : if (tem)
3929 : return tem;
3930 : }
3931 :
3932 : /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3933 : Need to adjust the result after the multiplication. */
3934 22196 : if (size - 1 < BITS_PER_WORD
3935 44170 : && (mul_highpart_cost (speed, mode)
3936 21974 : + 2 * shift_cost (speed, mode, size-1)
3937 21974 : + 4 * add_cost (speed, mode) < max_cost))
3938 : {
3939 5542 : moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3940 5542 : tem = expand_binop (mode, moptab, op0, op1, target, !unsignedp,
3941 : OPTAB_DIRECT);
3942 5542 : if (tem)
3943 : /* We used the wrong signedness. Adjust the result. */
3944 0 : return expand_mult_highpart_adjust (mode, tem, op0, op1, tem,
3945 0 : unsignedp);
3946 : }
3947 :
3948 : /* Try widening multiplication. */
3949 22196 : moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3950 22196 : if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3951 22196 : && mul_widen_cost (speed, wider_mode) < max_cost)
3952 : {
3953 383 : tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, unsignedp,
3954 : OPTAB_WIDEN);
3955 383 : if (tem)
3956 383 : return extract_high_half (mode, tem);
3957 : }
3958 :
3959 : /* Try widening the mode and perform a non-widening multiplication. */
3960 21813 : if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3961 21293 : && size - 1 < BITS_PER_WORD
3962 43099 : && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3963 : < max_cost))
3964 : {
3965 19692 : rtx_insn *insns;
3966 19692 : rtx wop0, wop1;
3967 :
3968 : /* We need to widen the operands, for example to ensure the
3969 : constant multiplier is correctly sign or zero extended.
3970 : Use a sequence to clean-up any instructions emitted by
3971 : the conversions if things don't work out. */
3972 19692 : start_sequence ();
3973 19692 : wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3974 19692 : wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3975 19692 : tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3976 : unsignedp, OPTAB_WIDEN);
3977 19692 : insns = end_sequence ();
3978 :
3979 19692 : if (tem)
3980 : {
3981 19692 : emit_insn (insns);
3982 19692 : return extract_high_half (mode, tem);
3983 : }
3984 : }
3985 :
3986 : /* Try widening multiplication of opposite signedness, and adjust. */
3987 2121 : moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3988 2121 : if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3989 410 : && size - 1 < BITS_PER_WORD
3990 2821 : && (mul_widen_cost (speed, wider_mode)
3991 350 : + 2 * shift_cost (speed, mode, size-1)
3992 350 : + 4 * add_cost (speed, mode) < max_cost))
3993 : {
3994 0 : tem = expand_binop (wider_mode, moptab, op0, op1, NULL_RTX, !unsignedp,
3995 : OPTAB_WIDEN);
3996 0 : if (tem != 0)
3997 : {
3998 0 : tem = extract_high_half (mode, tem);
3999 : /* We used the wrong signedness. Adjust the result. */
4000 0 : return expand_mult_highpart_adjust (mode, tem, op0, op1, target,
4001 0 : unsignedp);
4002 : }
4003 : }
4004 :
4005 : return 0;
4006 : }
4007 :
4008 : /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
4009 : putting the high half of the result in TARGET if that is convenient,
4010 : and return where the result is. If the operation cannot be performed,
4011 : 0 is returned.
4012 :
4013 : MODE is the mode of operation and result.
4014 :
4015 : UNSIGNEDP nonzero means unsigned multiply.
4016 :
4017 : MAX_COST is the total allowed cost for the expanded RTL. */
4018 :
4019 : static rtx
4020 45360 : expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
4021 : rtx target, int unsignedp, int max_cost)
4022 : {
4023 45360 : const bool speed = optimize_insn_for_speed_p ();
4024 45360 : unsigned HOST_WIDE_INT cnst1;
4025 45360 : int extra_cost;
4026 45360 : bool sign_adjust = false;
4027 45360 : enum mult_variant variant;
4028 45360 : struct algorithm alg;
4029 45360 : rtx narrow_op1, tem;
4030 :
4031 : /* We can't support modes wider than HOST_BITS_PER_INT. */
4032 45360 : gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
4033 :
4034 45360 : cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
4035 45360 : narrow_op1 = gen_int_mode (INTVAL (op1), mode);
4036 :
4037 : /* We can't optimize modes wider than BITS_PER_WORD.
4038 : ??? We might be able to perform double-word arithmetic if
4039 : mode == word_mode, however all the cost calculations in
4040 : synth_mult etc. assume single-word operations. */
4041 45360 : scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
4042 93824 : if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
4043 23691 : return expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
4044 23691 : unsignedp, max_cost);
4045 :
4046 43338 : extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
4047 :
4048 : /* Check whether we try to multiply by a negative constant. */
4049 31844 : if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
4050 : {
4051 2193 : sign_adjust = true;
4052 2193 : extra_cost += add_cost (speed, mode);
4053 : }
4054 :
4055 : /* See whether shift/add multiplication is cheap enough. */
4056 21669 : if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
4057 : max_cost - extra_cost))
4058 : {
4059 : /* See whether the specialized multiplication optabs are
4060 : cheaper than the shift/add version. */
4061 40016 : tem = expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
4062 : unsignedp,
4063 20008 : alg.cost.cost + extra_cost);
4064 20008 : if (tem)
4065 : return tem;
4066 :
4067 13 : tem = convert_to_mode (wider_mode, op0, unsignedp);
4068 13 : tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
4069 13 : tem = extract_high_half (mode, tem);
4070 :
4071 : /* Adjust result for signedness. */
4072 13 : if (sign_adjust)
4073 0 : tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
4074 :
4075 13 : return tem;
4076 : }
4077 1661 : return expmed_mult_highpart_optab (mode, op0, narrow_op1, target,
4078 1661 : unsignedp, max_cost);
4079 : }
4080 :
4081 :
4082 : /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
4083 :
4084 : static rtx
4085 2509 : expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4086 : {
4087 2509 : rtx result, temp, shift;
4088 2509 : rtx_code_label *label;
4089 2509 : int logd;
4090 2509 : int prec = GET_MODE_PRECISION (mode);
4091 :
4092 2509 : logd = floor_log2 (d);
4093 2509 : result = gen_reg_rtx (mode);
4094 :
4095 : /* Avoid conditional branches when they're expensive. */
4096 2509 : if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4097 2509 : && optimize_insn_for_speed_p ())
4098 : {
4099 2505 : rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4100 : mode, 0, -1);
4101 2505 : if (signmask)
4102 : {
4103 2505 : HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4104 2505 : signmask = force_reg (mode, signmask);
4105 5010 : shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4106 :
4107 : /* Use the rtx_cost of a LSHIFTRT instruction to determine
4108 : which instruction sequence to use. If logical right shifts
4109 : are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4110 : use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4111 :
4112 2505 : temp = gen_rtx_LSHIFTRT (mode, result, shift);
4113 2505 : if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4114 2505 : || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4115 : > COSTS_N_INSNS (2)))
4116 : {
4117 89 : temp = expand_binop (mode, xor_optab, op0, signmask,
4118 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4119 89 : temp = expand_binop (mode, sub_optab, temp, signmask,
4120 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4121 89 : temp = expand_binop (mode, and_optab, temp,
4122 89 : gen_int_mode (masklow, mode),
4123 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4124 89 : temp = expand_binop (mode, xor_optab, temp, signmask,
4125 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4126 89 : temp = expand_binop (mode, sub_optab, temp, signmask,
4127 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4128 : }
4129 : else
4130 : {
4131 2416 : signmask = expand_binop (mode, lshr_optab, signmask, shift,
4132 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4133 2416 : signmask = force_reg (mode, signmask);
4134 :
4135 2416 : temp = expand_binop (mode, add_optab, op0, signmask,
4136 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4137 2416 : temp = expand_binop (mode, and_optab, temp,
4138 2416 : gen_int_mode (masklow, mode),
4139 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4140 2416 : temp = expand_binop (mode, sub_optab, temp, signmask,
4141 : NULL_RTX, 1, OPTAB_LIB_WIDEN);
4142 : }
4143 2505 : return temp;
4144 : }
4145 : }
4146 :
4147 : /* Mask contains the mode's signbit and the significant bits of the
4148 : modulus. By including the signbit in the operation, many targets
4149 : can avoid an explicit compare operation in the following comparison
4150 : against zero. */
4151 4 : wide_int mask = wi::mask (logd, false, prec);
4152 4 : mask = wi::set_bit (mask, prec - 1);
4153 :
4154 8 : temp = expand_binop (mode, and_optab, op0,
4155 4 : immed_wide_int_const (mask, mode),
4156 : result, 1, OPTAB_LIB_WIDEN);
4157 4 : if (temp != result)
4158 0 : emit_move_insn (result, temp);
4159 :
4160 4 : label = gen_label_rtx ();
4161 4 : do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4162 :
4163 4 : temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4164 : 0, OPTAB_LIB_WIDEN);
4165 :
4166 4 : mask = wi::mask (logd, true, prec);
4167 8 : temp = expand_binop (mode, ior_optab, temp,
4168 4 : immed_wide_int_const (mask, mode),
4169 : result, 1, OPTAB_LIB_WIDEN);
4170 4 : temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4171 : 0, OPTAB_LIB_WIDEN);
4172 4 : if (temp != result)
4173 0 : emit_move_insn (result, temp);
4174 4 : emit_label (label);
4175 4 : return result;
4176 4 : }
4177 :
4178 : /* Expand signed division of OP0 by a power of two D in mode MODE.
4179 : This routine is only called for positive values of D. */
4180 :
4181 : static rtx
4182 10234 : expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4183 : {
4184 10234 : rtx temp;
4185 10234 : rtx_code_label *label;
4186 10234 : int logd;
4187 :
4188 10234 : logd = floor_log2 (d);
4189 :
4190 10234 : if (d == 2
4191 10234 : && BRANCH_COST (optimize_insn_for_speed_p (),
4192 : false) >= 1)
4193 : {
4194 6790 : temp = gen_reg_rtx (mode);
4195 6790 : temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4196 6790 : if (temp != NULL_RTX)
4197 : {
4198 6790 : temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4199 : 0, OPTAB_LIB_WIDEN);
4200 6790 : return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4201 : }
4202 : }
4203 :
4204 6887 : if (HAVE_conditional_move
4205 3444 : && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4206 : {
4207 3444 : rtx temp2;
4208 :
4209 3444 : start_sequence ();
4210 3444 : temp2 = copy_to_mode_reg (mode, op0);
4211 3444 : temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4212 : NULL_RTX, 0, OPTAB_LIB_WIDEN);
4213 3444 : temp = force_reg (mode, temp);
4214 :
4215 : /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4216 3444 : temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4217 : temp, temp2, mode, 0);
4218 3444 : if (temp2)
4219 : {
4220 3396 : rtx_insn *seq = end_sequence ();
4221 3396 : emit_insn (seq);
4222 3396 : return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4223 : }
4224 48 : end_sequence ();
4225 : }
4226 :
4227 48 : if (BRANCH_COST (optimize_insn_for_speed_p (),
4228 : false) >= 2)
4229 : {
4230 48 : int ushift = GET_MODE_BITSIZE (mode) - logd;
4231 :
4232 48 : temp = gen_reg_rtx (mode);
4233 48 : temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4234 48 : if (temp != NULL_RTX)
4235 : {
4236 96 : if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4237 48 : || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4238 : > COSTS_N_INSNS (1))
4239 48 : temp = expand_binop (mode, and_optab, temp,
4240 48 : gen_int_mode (d - 1, mode),
4241 : NULL_RTX, 0, OPTAB_LIB_WIDEN);
4242 : else
4243 0 : temp = expand_shift (RSHIFT_EXPR, mode, temp,
4244 0 : ushift, NULL_RTX, 1);
4245 48 : temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4246 : 0, OPTAB_LIB_WIDEN);
4247 48 : return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4248 : }
4249 : }
4250 :
4251 0 : label = gen_label_rtx ();
4252 0 : temp = copy_to_mode_reg (mode, op0);
4253 0 : do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4254 0 : expand_inc (temp, gen_int_mode (d - 1, mode));
4255 0 : emit_label (label);
4256 0 : return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4257 : }
4258 :
4259 : /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4260 : if that is convenient, and returning where the result is.
4261 : You may request either the quotient or the remainder as the result;
4262 : specify REM_FLAG nonzero to get the remainder.
4263 :
4264 : CODE is the expression code for which kind of division this is;
4265 : it controls how rounding is done. MODE is the machine mode to use.
4266 : UNSIGNEDP nonzero means do unsigned division. */
4267 :
4268 : /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4269 : and then correct it by or'ing in missing high bits
4270 : if result of ANDI is nonzero.
4271 : For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4272 : This could optimize to a bfexts instruction.
4273 : But C doesn't use these operations, so their optimizations are
4274 : left for later. */
4275 : /* ??? For modulo, we don't actually need the highpart of the first product,
4276 : the low part will do nicely. And for small divisors, the second multiply
4277 : can also be a low-part only multiply or even be completely left out.
4278 : E.g. to calculate the remainder of a division by 3 with a 32 bit
4279 : multiply, multiply with 0x55555556 and extract the upper two bits;
4280 : the result is exact for inputs up to 0x1fffffff.
4281 : The input range can be reduced by using cross-sum rules.
4282 : For odd divisors >= 3, the following table gives right shift counts
4283 : so that if a number is shifted by an integer multiple of the given
4284 : amount, the remainder stays the same:
4285 : 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4286 : 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4287 : 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4288 : 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4289 : 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4290 :
4291 : Cross-sum rules for even numbers can be derived by leaving as many bits
4292 : to the right alone as the divisor has zeros to the right.
4293 : E.g. if x is an unsigned 32 bit number:
4294 : (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4295 : */
4296 :
4297 : rtx
4298 229823 : expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4299 : rtx op0, rtx op1, rtx target, int unsignedp,
4300 : enum optab_methods methods)
4301 : {
4302 229823 : machine_mode compute_mode;
4303 229823 : rtx tquotient;
4304 229823 : rtx quotient = 0, remainder = 0;
4305 229823 : rtx_insn *last;
4306 229823 : rtx_insn *insn;
4307 229823 : optab optab1, optab2;
4308 229823 : int op1_is_constant, op1_is_pow2 = 0;
4309 229823 : int max_cost, extra_cost;
4310 229823 : static HOST_WIDE_INT last_div_const = 0;
4311 229823 : bool speed = optimize_insn_for_speed_p ();
4312 :
4313 229823 : op1_is_constant = CONST_INT_P (op1);
4314 229823 : if (op1_is_constant)
4315 : {
4316 145300 : wide_int ext_op1 = rtx_mode_t (op1, mode);
4317 145300 : op1_is_pow2 = (wi::popcount (ext_op1) == 1
4318 290600 : || (! unsignedp
4319 174942 : && wi::popcount (wi::neg (ext_op1)) == 1));
4320 145300 : }
4321 :
4322 : /*
4323 : This is the structure of expand_divmod:
4324 :
4325 : First comes code to fix up the operands so we can perform the operations
4326 : correctly and efficiently.
4327 :
4328 : Second comes a switch statement with code specific for each rounding mode.
4329 : For some special operands this code emits all RTL for the desired
4330 : operation, for other cases, it generates only a quotient and stores it in
4331 : QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4332 : to indicate that it has not done anything.
4333 :
4334 : Last comes code that finishes the operation. If QUOTIENT is set and
4335 : REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4336 : QUOTIENT is not set, it is computed using trunc rounding.
4337 :
4338 : We try to generate special code for division and remainder when OP1 is a
4339 : constant. If |OP1| = 2**n we can use shifts and some other fast
4340 : operations. For other values of OP1, we compute a carefully selected
4341 : fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4342 : by m.
4343 :
4344 : In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4345 : half of the product. Different strategies for generating the product are
4346 : implemented in expmed_mult_highpart.
4347 :
4348 : If what we actually want is the remainder, we generate that by another
4349 : by-constant multiplication and a subtraction. */
4350 :
4351 : /* We shouldn't be called with OP1 == const1_rtx, but some of the
4352 : code below will malfunction if we are, so check here and handle
4353 : the special case if so. */
4354 229823 : if (op1 == const1_rtx)
4355 0 : return rem_flag ? const0_rtx : op0;
4356 :
4357 : /* When dividing by -1, we could get an overflow.
4358 : negv_optab can handle overflows. */
4359 229823 : if (! unsignedp && op1 == constm1_rtx)
4360 : {
4361 0 : if (rem_flag)
4362 0 : return const0_rtx;
4363 0 : return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4364 0 : ? negv_optab : neg_optab, op0, target, 0);
4365 : }
4366 :
4367 229823 : if (target
4368 : /* Don't use the function value register as a target
4369 : since we have to read it as well as write it,
4370 : and function-inlining gets confused by this. */
4371 229823 : && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4372 : /* Don't clobber an operand while doing a multi-step calculation. */
4373 96793 : || ((rem_flag || op1_is_constant)
4374 77534 : && (reg_mentioned_p (target, op0)
4375 75184 : || (MEM_P (op0) && MEM_P (target))))
4376 93725 : || reg_mentioned_p (target, op1)
4377 93646 : || (MEM_P (op1) && MEM_P (target))))
4378 : target = 0;
4379 :
4380 : /* Get the mode in which to perform this computation. Normally it will
4381 : be MODE, but sometimes we can't do the desired operation in MODE.
4382 : If so, pick a wider mode in which we can do the operation. Convert
4383 : to that mode at the start to avoid repeated conversions.
4384 :
4385 : First see what operations we need. These depend on the expression
4386 : we are evaluating. (We assume that divxx3 insns exist under the
4387 : same conditions that modxx3 insns and that these insns don't normally
4388 : fail. If these assumptions are not correct, we may generate less
4389 : efficient code in some cases.)
4390 :
4391 : Then see if we find a mode in which we can open-code that operation
4392 : (either a division, modulus, or shift). Finally, check for the smallest
4393 : mode for which we can do the operation with a library call. */
4394 :
4395 : /* We might want to refine this now that we have division-by-constant
4396 : optimization. Since expmed_mult_highpart tries so many variants, it is
4397 : not straightforward to generalize this. Maybe we should make an array
4398 : of possible modes in init_expmed? Save this for GCC 2.7. */
4399 :
4400 121193 : optab1 = (op1_is_pow2
4401 229823 : ? (unsignedp ? lshr_optab : ashr_optab)
4402 140604 : : (unsignedp ? udiv_optab : sdiv_optab));
4403 293814 : optab2 = (op1_is_pow2 ? optab1
4404 140604 : : (unsignedp ? udivmod_optab : sdivmod_optab));
4405 :
4406 229823 : if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4407 : {
4408 241381 : FOR_EACH_MODE_FROM (compute_mode, mode)
4409 237240 : if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4410 237240 : || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4411 : break;
4412 :
4413 228154 : if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4414 4141 : FOR_EACH_MODE_FROM (compute_mode, mode)
4415 4141 : if (optab_libfunc (optab1, compute_mode)
4416 4141 : || optab_libfunc (optab2, compute_mode))
4417 : break;
4418 : }
4419 : else
4420 : compute_mode = mode;
4421 :
4422 : /* If we still couldn't find a mode, use MODE, but expand_binop will
4423 : probably die. */
4424 5810 : if (compute_mode == VOIDmode)
4425 0 : compute_mode = mode;
4426 :
4427 229823 : if (target && GET_MODE (target) == compute_mode)
4428 : tquotient = target;
4429 : else
4430 136340 : tquotient = gen_reg_rtx (compute_mode);
4431 :
4432 : #if 0
4433 : /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4434 : (mode), and thereby get better code when OP1 is a constant. Do that
4435 : later. It will require going over all usages of SIZE below. */
4436 : size = GET_MODE_BITSIZE (mode);
4437 : #endif
4438 :
4439 : /* Only deduct something for a REM if the last divide done was
4440 : for a different constant. Then set the constant of the last
4441 : divide. */
4442 229823 : max_cost = (unsignedp
4443 338453 : ? udiv_cost (speed, compute_mode)
4444 121193 : : sdiv_cost (speed, compute_mode));
4445 229823 : if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4446 7628 : && INTVAL (op1) == last_div_const))
4447 51996 : max_cost -= (mul_cost (speed, compute_mode)
4448 51996 : + add_cost (speed, compute_mode));
4449 :
4450 229823 : last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4451 :
4452 : /* Now convert to the best mode to use. */
4453 229823 : if (compute_mode != mode)
4454 : {
4455 0 : op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4456 0 : op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4457 :
4458 : /* convert_modes may have placed op1 into a register, so we
4459 : must recompute the following. */
4460 0 : op1_is_constant = CONST_INT_P (op1);
4461 0 : if (op1_is_constant)
4462 : {
4463 0 : wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4464 0 : op1_is_pow2 = (wi::popcount (ext_op1) == 1
4465 0 : || (! unsignedp
4466 0 : && wi::popcount (wi::neg (ext_op1)) == 1));
4467 0 : }
4468 : else
4469 : op1_is_pow2 = 0;
4470 : }
4471 :
4472 : /* If one of the operands is a volatile MEM, copy it into a register. */
4473 :
4474 229823 : if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4475 0 : op0 = force_reg (compute_mode, op0);
4476 229823 : if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4477 0 : op1 = force_reg (compute_mode, op1);
4478 :
4479 : /* If we need the remainder or if OP1 is constant, we need to
4480 : put OP0 in a register in case it has any queued subexpressions. */
4481 229823 : if (rem_flag || op1_is_constant)
4482 174687 : op0 = force_reg (compute_mode, op0);
4483 :
4484 229823 : last = get_last_insn ();
4485 :
4486 : /* Promote floor rounding to trunc rounding for unsigned operations. */
4487 229823 : if (unsignedp)
4488 : {
4489 108630 : if (code == FLOOR_DIV_EXPR)
4490 : code = TRUNC_DIV_EXPR;
4491 108573 : if (code == FLOOR_MOD_EXPR)
4492 156 : code = TRUNC_MOD_EXPR;
4493 108630 : if (code == EXACT_DIV_EXPR && op1_is_pow2)
4494 4617 : code = TRUNC_DIV_EXPR;
4495 : }
4496 :
4497 229823 : if (op1 != const0_rtx)
4498 229444 : switch (code)
4499 : {
4500 177328 : case TRUNC_MOD_EXPR:
4501 177328 : case TRUNC_DIV_EXPR:
4502 177328 : if (op1_is_constant)
4503 : {
4504 93918 : scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4505 93918 : int size = GET_MODE_BITSIZE (int_mode);
4506 93918 : if (unsignedp)
4507 : {
4508 57077 : unsigned HOST_WIDE_INT mh, ml;
4509 57077 : int pre_shift, post_shift;
4510 57077 : wide_int wd = rtx_mode_t (op1, int_mode);
4511 57077 : unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4512 :
4513 57077 : if (wi::popcount (wd) == 1)
4514 : {
4515 32017 : pre_shift = floor_log2 (d);
4516 32017 : if (rem_flag)
4517 : {
4518 257 : unsigned HOST_WIDE_INT mask
4519 257 : = (HOST_WIDE_INT_1U << pre_shift) - 1;
4520 257 : remainder
4521 257 : = expand_binop (int_mode, and_optab, op0,
4522 257 : gen_int_mode (mask, int_mode),
4523 : remainder, 1, methods);
4524 257 : if (remainder)
4525 257 : return gen_lowpart (mode, remainder);
4526 : }
4527 31760 : quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4528 31760 : pre_shift, tquotient, 1);
4529 : }
4530 25060 : else if (size <= HOST_BITS_PER_WIDE_INT)
4531 : {
4532 23478 : if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4533 : {
4534 : /* Most significant bit of divisor is set; emit an scc
4535 : insn. */
4536 155 : quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4537 : int_mode, 1, 1);
4538 : }
4539 : else
4540 : {
4541 : /* Find a suitable multiplier and right shift count
4542 : instead of directly dividing by D. */
4543 23323 : mh = choose_multiplier (d, size, size,
4544 : &ml, &post_shift);
4545 :
4546 : /* If the suggested multiplier is more than SIZE bits,
4547 : we can do better for even divisors, using an
4548 : initial right shift. */
4549 23323 : if (mh != 0 && (d & 1) == 0)
4550 : {
4551 2244 : pre_shift = ctz_or_zero (d);
4552 2244 : mh = choose_multiplier (d >> pre_shift, size,
4553 : size - pre_shift,
4554 : &ml, &post_shift);
4555 2244 : gcc_assert (!mh);
4556 : }
4557 : else
4558 : pre_shift = 0;
4559 :
4560 2254 : if (mh != 0)
4561 : {
4562 2254 : rtx t1, t2, t3, t4;
4563 :
4564 2347 : if (post_shift - 1 >= BITS_PER_WORD)
4565 0 : goto fail1;
4566 :
4567 2254 : extra_cost
4568 2254 : = (shift_cost (speed, int_mode, post_shift - 1)
4569 2254 : + shift_cost (speed, int_mode, 1)
4570 2254 : + 2 * add_cost (speed, int_mode));
4571 2254 : t1 = expmed_mult_highpart
4572 2254 : (int_mode, op0, gen_int_mode (ml, int_mode),
4573 : NULL_RTX, 1, max_cost - extra_cost);
4574 2254 : if (t1 == 0)
4575 103 : goto fail1;
4576 2151 : t2 = force_operand (gen_rtx_MINUS (int_mode,
4577 : op0, t1),
4578 : NULL_RTX);
4579 2151 : t3 = expand_shift (RSHIFT_EXPR, int_mode,
4580 : t2, 1, NULL_RTX, 1);
4581 2151 : t4 = force_operand (gen_rtx_PLUS (int_mode,
4582 : t1, t3),
4583 : NULL_RTX);
4584 2151 : quotient = expand_shift
4585 2151 : (RSHIFT_EXPR, int_mode, t4,
4586 2151 : post_shift - 1, tquotient, 1);
4587 : }
4588 : else
4589 : {
4590 21069 : rtx t1, t2;
4591 :
4592 22857 : if (pre_shift >= BITS_PER_WORD
4593 21069 : || post_shift >= BITS_PER_WORD)
4594 3 : goto fail1;
4595 :
4596 21066 : t1 = expand_shift
4597 42132 : (RSHIFT_EXPR, int_mode, op0,
4598 21066 : pre_shift, NULL_RTX, 1);
4599 21066 : extra_cost
4600 21066 : = (shift_cost (speed, int_mode, pre_shift)
4601 21066 : + shift_cost (speed, int_mode, post_shift));
4602 21066 : t2 = expmed_mult_highpart
4603 21066 : (int_mode, t1,
4604 21066 : gen_int_mode (ml, int_mode),
4605 : NULL_RTX, 1, max_cost - extra_cost);
4606 21066 : if (t2 == 0)
4607 931 : goto fail1;
4608 20135 : quotient = expand_shift
4609 20135 : (RSHIFT_EXPR, int_mode, t2,
4610 20135 : post_shift, tquotient, 1);
4611 : }
4612 : }
4613 : }
4614 : else /* Too wide mode to use tricky code */
4615 : break;
4616 :
4617 54201 : insn = get_last_insn ();
4618 54201 : if (insn != last)
4619 54201 : set_dst_reg_note (insn, REG_EQUAL,
4620 : gen_rtx_UDIV (int_mode, op0, op1),
4621 : quotient);
4622 55495 : }
4623 : else /* TRUNC_DIV, signed */
4624 : {
4625 36841 : unsigned HOST_WIDE_INT ml;
4626 36841 : int post_shift;
4627 36841 : rtx mlr;
4628 36841 : HOST_WIDE_INT d = INTVAL (op1);
4629 36841 : unsigned HOST_WIDE_INT abs_d;
4630 :
4631 : /* Not prepared to handle division/remainder by
4632 : 0xffffffffffffffff8000000000000000 etc. */
4633 36841 : if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4634 : break;
4635 :
4636 : /* Since d might be INT_MIN, we have to cast to
4637 : unsigned HOST_WIDE_INT before negating to avoid
4638 : undefined signed overflow. */
4639 36841 : abs_d = (d >= 0
4640 36841 : ? (unsigned HOST_WIDE_INT) d
4641 : : - (unsigned HOST_WIDE_INT) d);
4642 :
4643 : /* n rem d = n rem -d */
4644 36841 : if (rem_flag && d < 0)
4645 : {
4646 141 : d = abs_d;
4647 141 : op1 = gen_int_mode (abs_d, int_mode);
4648 : }
4649 :
4650 36841 : if (d == 1)
4651 : quotient = op0;
4652 36841 : else if (d == -1)
4653 0 : quotient = expand_unop (int_mode, neg_optab, op0,
4654 : tquotient, 0);
4655 36841 : else if (size <= HOST_BITS_PER_WIDE_INT
4656 35440 : && abs_d == HOST_WIDE_INT_1U << (size - 1))
4657 : {
4658 : /* This case is not handled correctly below. */
4659 133 : quotient = emit_store_flag (tquotient, EQ, op0, op1,
4660 : int_mode, 1, 1);
4661 133 : if (quotient == 0)
4662 1313 : goto fail1;
4663 : }
4664 36708 : else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4665 12849 : && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4666 2639 : && (rem_flag
4667 2639 : ? smod_pow2_cheap (speed, int_mode)
4668 10210 : : sdiv_pow2_cheap (speed, int_mode))
4669 : /* We assume that cheap metric is true if the
4670 : optab has an expander for this mode. */
4671 50665 : && ((optab_handler ((rem_flag ? smod_optab
4672 : : sdiv_optab),
4673 : int_mode)
4674 : != CODE_FOR_nothing)
4675 621 : || (optab_handler (sdivmod_optab, int_mode)
4676 : != CODE_FOR_nothing)))
4677 : ;
4678 36093 : else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4679 : {
4680 12750 : if (rem_flag)
4681 : {
4682 2509 : remainder = expand_smod_pow2 (int_mode, op0, d);
4683 2509 : if (remainder)
4684 2509 : return gen_lowpart (mode, remainder);
4685 : }
4686 :
4687 10241 : if (sdiv_pow2_cheap (speed, int_mode)
4688 10241 : && ((optab_handler (sdiv_optab, int_mode)
4689 : != CODE_FOR_nothing)
4690 10 : || (optab_handler (sdivmod_optab, int_mode)
4691 : != CODE_FOR_nothing)))
4692 7 : quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4693 : int_mode, op0,
4694 7 : gen_int_mode (abs_d,
4695 : int_mode),
4696 : NULL_RTX, 0);
4697 : else
4698 10234 : quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4699 :
4700 : /* We have computed OP0 / abs(OP1). If OP1 is negative,
4701 : negate the quotient. */
4702 10241 : if (d < 0)
4703 : {
4704 516 : insn = get_last_insn ();
4705 516 : if (insn != last
4706 516 : && abs_d < (HOST_WIDE_INT_1U
4707 : << (HOST_BITS_PER_WIDE_INT - 1)))
4708 516 : set_dst_reg_note (insn, REG_EQUAL,
4709 516 : gen_rtx_DIV (int_mode, op0,
4710 : gen_int_mode
4711 : (abs_d,
4712 : int_mode)),
4713 : quotient);
4714 :
4715 516 : quotient = expand_unop (int_mode, neg_optab,
4716 : quotient, quotient, 0);
4717 : }
4718 : }
4719 23343 : else if (size <= HOST_BITS_PER_WIDE_INT)
4720 : {
4721 22005 : choose_multiplier (abs_d, size, size - 1,
4722 : &ml, &post_shift);
4723 22005 : if (ml < HOST_WIDE_INT_1U << (size - 1))
4724 : {
4725 16643 : rtx t1, t2, t3;
4726 :
4727 17769 : if (post_shift >= BITS_PER_WORD
4728 16643 : || size - 1 >= BITS_PER_WORD)
4729 245 : goto fail1;
4730 :
4731 16398 : extra_cost = (shift_cost (speed, int_mode, post_shift)
4732 16398 : + shift_cost (speed, int_mode, size - 1)
4733 16398 : + add_cost (speed, int_mode));
4734 16398 : t1 = expmed_mult_highpart
4735 16398 : (int_mode, op0, gen_int_mode (ml, int_mode),
4736 : NULL_RTX, 0, max_cost - extra_cost);
4737 16398 : if (t1 == 0)
4738 833 : goto fail1;
4739 15565 : t2 = expand_shift
4740 31130 : (RSHIFT_EXPR, int_mode, t1,
4741 15565 : post_shift, NULL_RTX, 0);
4742 15565 : t3 = expand_shift
4743 15565 : (RSHIFT_EXPR, int_mode, op0,
4744 15565 : size - 1, NULL_RTX, 0);
4745 15565 : if (d < 0)
4746 197 : quotient
4747 197 : = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4748 : tquotient);
4749 : else
4750 15368 : quotient
4751 15368 : = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4752 : tquotient);
4753 : }
4754 : else
4755 : {
4756 5362 : rtx t1, t2, t3, t4;
4757 :
4758 5710 : if (post_shift >= BITS_PER_WORD
4759 5357 : || size - 1 >= BITS_PER_WORD)
4760 25 : goto fail1;
4761 :
4762 5337 : ml |= HOST_WIDE_INT_M1U << (size - 1);
4763 5337 : mlr = gen_int_mode (ml, int_mode);
4764 5337 : extra_cost = (shift_cost (speed, int_mode, post_shift)
4765 5337 : + shift_cost (speed, int_mode, size - 1)
4766 5337 : + 2 * add_cost (speed, int_mode));
4767 5337 : t1 = expmed_mult_highpart (int_mode, op0, mlr,
4768 : NULL_RTX, 0,
4769 : max_cost - extra_cost);
4770 5337 : if (t1 == 0)
4771 210 : goto fail1;
4772 5127 : t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4773 : NULL_RTX);
4774 5127 : t3 = expand_shift
4775 10254 : (RSHIFT_EXPR, int_mode, t2,
4776 5127 : post_shift, NULL_RTX, 0);
4777 5127 : t4 = expand_shift
4778 5127 : (RSHIFT_EXPR, int_mode, op0,
4779 5127 : size - 1, NULL_RTX, 0);
4780 5127 : if (d < 0)
4781 53 : quotient
4782 53 : = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4783 : tquotient);
4784 : else
4785 5074 : quotient
4786 5074 : = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4787 : tquotient);
4788 : }
4789 : }
4790 : else /* Too wide mode to use tricky code */
4791 : break;
4792 :
4793 31681 : insn = get_last_insn ();
4794 31681 : if (insn != last)
4795 31066 : set_dst_reg_note (insn, REG_EQUAL,
4796 : gen_rtx_DIV (int_mode, op0, op1),
4797 : quotient);
4798 : }
4799 : break;
4800 : }
4801 83410 : fail1:
4802 85760 : delete_insns_since (last);
4803 85760 : break;
4804 :
4805 1770 : case FLOOR_DIV_EXPR:
4806 1770 : case FLOOR_MOD_EXPR:
4807 : /* We will come here only for signed operations. */
4808 1770 : if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4809 : {
4810 976 : scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4811 976 : int size = GET_MODE_BITSIZE (int_mode);
4812 976 : unsigned HOST_WIDE_INT mh, ml;
4813 976 : int pre_shift, post_shift;
4814 976 : HOST_WIDE_INT d = INTVAL (op1);
4815 :
4816 976 : if (d > 0)
4817 : {
4818 : /* We could just as easily deal with negative constants here,
4819 : but it does not seem worth the trouble for GCC 2.6. */
4820 951 : if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4821 : {
4822 644 : pre_shift = floor_log2 (d);
4823 644 : if (rem_flag)
4824 : {
4825 70 : unsigned HOST_WIDE_INT mask
4826 70 : = (HOST_WIDE_INT_1U << pre_shift) - 1;
4827 70 : remainder = expand_binop
4828 70 : (int_mode, and_optab, op0,
4829 70 : gen_int_mode (mask, int_mode),
4830 : remainder, 0, methods);
4831 70 : if (remainder)
4832 70 : return gen_lowpart (mode, remainder);
4833 : }
4834 574 : quotient = expand_shift
4835 574 : (RSHIFT_EXPR, int_mode, op0,
4836 574 : pre_shift, tquotient, 0);
4837 : }
4838 : else
4839 : {
4840 307 : rtx t1, t2, t3, t4;
4841 :
4842 307 : mh = choose_multiplier (d, size, size - 1,
4843 : &ml, &post_shift);
4844 307 : gcc_assert (!mh);
4845 :
4846 331 : if (post_shift < BITS_PER_WORD
4847 307 : && size - 1 < BITS_PER_WORD)
4848 : {
4849 305 : t1 = expand_shift
4850 305 : (RSHIFT_EXPR, int_mode, op0,
4851 305 : size - 1, NULL_RTX, 0);
4852 305 : t2 = expand_binop (int_mode, xor_optab, op0, t1,
4853 : NULL_RTX, 0, OPTAB_WIDEN);
4854 305 : extra_cost = (shift_cost (speed, int_mode, post_shift)
4855 305 : + shift_cost (speed, int_mode, size - 1)
4856 305 : + 2 * add_cost (speed, int_mode));
4857 305 : t3 = expmed_mult_highpart
4858 305 : (int_mode, t2, gen_int_mode (ml, int_mode),
4859 : NULL_RTX, 1, max_cost - extra_cost);
4860 305 : if (t3 != 0)
4861 : {
4862 274 : t4 = expand_shift
4863 548 : (RSHIFT_EXPR, int_mode, t3,
4864 274 : post_shift, NULL_RTX, 1);
4865 274 : quotient = expand_binop (int_mode, xor_optab,
4866 : t4, t1, tquotient, 0,
4867 : OPTAB_WIDEN);
4868 : }
4869 : }
4870 : }
4871 : }
4872 : else
4873 : {
4874 25 : rtx nsign, t1, t2, t3, t4;
4875 25 : t1 = force_operand (gen_rtx_PLUS (int_mode,
4876 : op0, constm1_rtx), NULL_RTX);
4877 25 : t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4878 : 0, OPTAB_WIDEN);
4879 50 : nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4880 25 : size - 1, NULL_RTX, 0);
4881 25 : t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4882 : NULL_RTX);
4883 25 : t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4884 : NULL_RTX, 0);
4885 25 : if (t4)
4886 : {
4887 25 : rtx t5;
4888 25 : t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4889 : NULL_RTX, 0);
4890 25 : quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4891 : tquotient);
4892 : }
4893 : }
4894 : }
4895 :
4896 906 : if (quotient != 0)
4897 : break;
4898 827 : delete_insns_since (last);
4899 :
4900 : /* Try using an instruction that produces both the quotient and
4901 : remainder, using truncation. We can easily compensate the quotient
4902 : or remainder to get floor rounding, once we have the remainder.
4903 : Notice that we compute also the final remainder value here,
4904 : and return the result right away. */
4905 827 : if (target == 0 || GET_MODE (target) != compute_mode)
4906 125 : target = gen_reg_rtx (compute_mode);
4907 :
4908 827 : if (rem_flag)
4909 : {
4910 329 : remainder
4911 329 : = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4912 329 : quotient = gen_reg_rtx (compute_mode);
4913 : }
4914 : else
4915 : {
4916 498 : quotient
4917 498 : = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4918 498 : remainder = gen_reg_rtx (compute_mode);
4919 : }
4920 :
4921 827 : if (expand_twoval_binop (sdivmod_optab, op0, op1,
4922 : quotient, remainder, 0))
4923 : {
4924 : /* This could be computed with a branch-less sequence.
4925 : Save that for later. */
4926 792 : rtx tem;
4927 792 : rtx_code_label *label = gen_label_rtx ();
4928 792 : do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4929 792 : tem = expand_binop (compute_mode, xor_optab, op0, op1,
4930 : NULL_RTX, 0, OPTAB_WIDEN);
4931 792 : do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4932 792 : expand_dec (quotient, const1_rtx);
4933 792 : expand_inc (remainder, op1);
4934 792 : emit_label (label);
4935 1281 : return gen_lowpart (mode, rem_flag ? remainder : quotient);
4936 : }
4937 :
4938 : /* No luck with division elimination or divmod. Have to do it
4939 : by conditionally adjusting op0 *and* the result. */
4940 35 : {
4941 35 : rtx_code_label *label1, *label2, *label3, *label4, *label5;
4942 35 : rtx adjusted_op0;
4943 35 : rtx tem;
4944 :
4945 35 : quotient = gen_reg_rtx (compute_mode);
4946 35 : adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4947 35 : label1 = gen_label_rtx ();
4948 35 : label2 = gen_label_rtx ();
4949 35 : label3 = gen_label_rtx ();
4950 35 : label4 = gen_label_rtx ();
4951 35 : label5 = gen_label_rtx ();
4952 35 : do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4953 35 : do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4954 35 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4955 : quotient, 0, methods);
4956 35 : if (tem != quotient)
4957 35 : emit_move_insn (quotient, tem);
4958 35 : emit_jump_insn (targetm.gen_jump (label5));
4959 35 : emit_barrier ();
4960 35 : emit_label (label1);
4961 35 : expand_inc (adjusted_op0, const1_rtx);
4962 35 : emit_jump_insn (targetm.gen_jump (label4));
4963 35 : emit_barrier ();
4964 35 : emit_label (label2);
4965 35 : do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4966 35 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4967 : quotient, 0, methods);
4968 35 : if (tem != quotient)
4969 35 : emit_move_insn (quotient, tem);
4970 35 : emit_jump_insn (targetm.gen_jump (label5));
4971 35 : emit_barrier ();
4972 35 : emit_label (label3);
4973 35 : expand_dec (adjusted_op0, const1_rtx);
4974 35 : emit_label (label4);
4975 35 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4976 : quotient, 0, methods);
4977 35 : if (tem != quotient)
4978 35 : emit_move_insn (quotient, tem);
4979 35 : expand_dec (quotient, const1_rtx);
4980 35 : emit_label (label5);
4981 : }
4982 35 : break;
4983 :
4984 383 : case CEIL_DIV_EXPR:
4985 383 : case CEIL_MOD_EXPR:
4986 383 : if (unsignedp)
4987 : {
4988 0 : if (op1_is_constant
4989 0 : && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4990 0 : && (HWI_COMPUTABLE_MODE_P (compute_mode)
4991 0 : || INTVAL (op1) >= 0))
4992 : {
4993 0 : scalar_int_mode int_mode
4994 0 : = as_a <scalar_int_mode> (compute_mode);
4995 0 : rtx t1, t2, t3;
4996 0 : unsigned HOST_WIDE_INT d = INTVAL (op1);
4997 0 : t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4998 0 : floor_log2 (d), tquotient, 1);
4999 0 : t2 = expand_binop (int_mode, and_optab, op0,
5000 0 : gen_int_mode (d - 1, int_mode),
5001 : NULL_RTX, 1, methods);
5002 0 : t3 = gen_reg_rtx (int_mode);
5003 0 : t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
5004 0 : if (t3 == 0)
5005 : {
5006 0 : rtx_code_label *lab;
5007 0 : lab = gen_label_rtx ();
5008 0 : do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
5009 0 : expand_inc (t1, const1_rtx);
5010 0 : emit_label (lab);
5011 0 : quotient = t1;
5012 : }
5013 : else
5014 0 : quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
5015 : tquotient);
5016 : break;
5017 : }
5018 :
5019 : /* Try using an instruction that produces both the quotient and
5020 : remainder, using truncation. We can easily compensate the
5021 : quotient or remainder to get ceiling rounding, once we have the
5022 : remainder. Notice that we compute also the final remainder
5023 : value here, and return the result right away. */
5024 0 : if (target == 0 || GET_MODE (target) != compute_mode)
5025 0 : target = gen_reg_rtx (compute_mode);
5026 :
5027 0 : if (rem_flag)
5028 : {
5029 0 : remainder = (REG_P (target)
5030 0 : ? target : gen_reg_rtx (compute_mode));
5031 0 : quotient = gen_reg_rtx (compute_mode);
5032 : }
5033 : else
5034 : {
5035 0 : quotient = (REG_P (target)
5036 0 : ? target : gen_reg_rtx (compute_mode));
5037 0 : remainder = gen_reg_rtx (compute_mode);
5038 : }
5039 :
5040 0 : if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
5041 : remainder, 1))
5042 : {
5043 : /* This could be computed with a branch-less sequence.
5044 : Save that for later. */
5045 0 : rtx_code_label *label = gen_label_rtx ();
5046 0 : do_cmp_and_jump (remainder, const0_rtx, EQ,
5047 : compute_mode, label);
5048 0 : expand_inc (quotient, const1_rtx);
5049 0 : expand_dec (remainder, op1);
5050 0 : emit_label (label);
5051 0 : return gen_lowpart (mode, rem_flag ? remainder : quotient);
5052 : }
5053 :
5054 : /* No luck with division elimination or divmod. Have to do it
5055 : by conditionally adjusting op0 *and* the result. */
5056 0 : {
5057 0 : rtx_code_label *label1, *label2;
5058 0 : rtx adjusted_op0, tem;
5059 :
5060 0 : quotient = gen_reg_rtx (compute_mode);
5061 0 : adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5062 0 : label1 = gen_label_rtx ();
5063 0 : label2 = gen_label_rtx ();
5064 0 : do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
5065 : compute_mode, label1);
5066 0 : emit_move_insn (quotient, const0_rtx);
5067 0 : emit_jump_insn (targetm.gen_jump (label2));
5068 0 : emit_barrier ();
5069 0 : emit_label (label1);
5070 0 : expand_dec (adjusted_op0, const1_rtx);
5071 0 : tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
5072 : quotient, 1, methods);
5073 0 : if (tem != quotient)
5074 0 : emit_move_insn (quotient, tem);
5075 0 : expand_inc (quotient, const1_rtx);
5076 0 : emit_label (label2);
5077 : }
5078 : }
5079 : else /* signed */
5080 : {
5081 383 : if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5082 27 : && INTVAL (op1) >= 0)
5083 : {
5084 : /* This is extremely similar to the code for the unsigned case
5085 : above. For 2.7 we should merge these variants, but for
5086 : 2.6.1 I don't want to touch the code for unsigned since that
5087 : get used in C. The signed case will only be used by other
5088 : languages (Ada). */
5089 :
5090 27 : rtx t1, t2, t3;
5091 27 : unsigned HOST_WIDE_INT d = INTVAL (op1);
5092 54 : t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5093 27 : floor_log2 (d), tquotient, 0);
5094 27 : t2 = expand_binop (compute_mode, and_optab, op0,
5095 27 : gen_int_mode (d - 1, compute_mode),
5096 : NULL_RTX, 1, methods);
5097 27 : t3 = gen_reg_rtx (compute_mode);
5098 27 : t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5099 : compute_mode, 1, 1);
5100 27 : if (t3 == 0)
5101 : {
5102 0 : rtx_code_label *lab;
5103 0 : lab = gen_label_rtx ();
5104 0 : do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5105 0 : expand_inc (t1, const1_rtx);
5106 0 : emit_label (lab);
5107 0 : quotient = t1;
5108 : }
5109 : else
5110 27 : quotient = force_operand (gen_rtx_PLUS (compute_mode,
5111 : t1, t3),
5112 : tquotient);
5113 : break;
5114 : }
5115 :
5116 : /* Try using an instruction that produces both the quotient and
5117 : remainder, using truncation. We can easily compensate the
5118 : quotient or remainder to get ceiling rounding, once we have the
5119 : remainder. Notice that we compute also the final remainder
5120 : value here, and return the result right away. */
5121 356 : if (target == 0 || GET_MODE (target) != compute_mode)
5122 15 : target = gen_reg_rtx (compute_mode);
5123 356 : if (rem_flag)
5124 : {
5125 149 : remainder= (REG_P (target)
5126 149 : ? target : gen_reg_rtx (compute_mode));
5127 149 : quotient = gen_reg_rtx (compute_mode);
5128 : }
5129 : else
5130 : {
5131 207 : quotient = (REG_P (target)
5132 207 : ? target : gen_reg_rtx (compute_mode));
5133 207 : remainder = gen_reg_rtx (compute_mode);
5134 : }
5135 :
5136 356 : if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5137 : remainder, 0))
5138 : {
5139 : /* This could be computed with a branch-less sequence.
5140 : Save that for later. */
5141 356 : rtx tem;
5142 356 : rtx_code_label *label = gen_label_rtx ();
5143 356 : do_cmp_and_jump (remainder, const0_rtx, EQ,
5144 : compute_mode, label);
5145 356 : tem = expand_binop (compute_mode, xor_optab, op0, op1,
5146 : NULL_RTX, 0, OPTAB_WIDEN);
5147 356 : do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5148 356 : expand_inc (quotient, const1_rtx);
5149 356 : expand_dec (remainder, op1);
5150 356 : emit_label (label);
5151 563 : return gen_lowpart (mode, rem_flag ? remainder : quotient);
5152 : }
5153 :
5154 : /* No luck with division elimination or divmod. Have to do it
5155 : by conditionally adjusting op0 *and* the result. */
5156 0 : {
5157 0 : rtx_code_label *label1, *label2, *label3, *label4, *label5;
5158 0 : rtx adjusted_op0;
5159 0 : rtx tem;
5160 :
5161 0 : quotient = gen_reg_rtx (compute_mode);
5162 0 : adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5163 0 : label1 = gen_label_rtx ();
5164 0 : label2 = gen_label_rtx ();
5165 0 : label3 = gen_label_rtx ();
5166 0 : label4 = gen_label_rtx ();
5167 0 : label5 = gen_label_rtx ();
5168 0 : do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5169 0 : do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5170 : compute_mode, label1);
5171 0 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5172 : quotient, 0, methods);
5173 0 : if (tem != quotient)
5174 0 : emit_move_insn (quotient, tem);
5175 0 : emit_jump_insn (targetm.gen_jump (label5));
5176 0 : emit_barrier ();
5177 0 : emit_label (label1);
5178 0 : expand_dec (adjusted_op0, const1_rtx);
5179 0 : emit_jump_insn (targetm.gen_jump (label4));
5180 0 : emit_barrier ();
5181 0 : emit_label (label2);
5182 0 : do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5183 : compute_mode, label3);
5184 0 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5185 : quotient, 0, methods);
5186 0 : if (tem != quotient)
5187 0 : emit_move_insn (quotient, tem);
5188 0 : emit_jump_insn (targetm.gen_jump (label5));
5189 0 : emit_barrier ();
5190 0 : emit_label (label3);
5191 0 : expand_inc (adjusted_op0, const1_rtx);
5192 0 : emit_label (label4);
5193 0 : tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5194 : quotient, 0, methods);
5195 0 : if (tem != quotient)
5196 0 : emit_move_insn (quotient, tem);
5197 0 : expand_inc (quotient, const1_rtx);
5198 0 : emit_label (label5);
5199 : }
5200 : }
5201 : break;
5202 :
5203 49961 : case EXACT_DIV_EXPR:
5204 49961 : if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5205 : {
5206 49953 : scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5207 49953 : int size = GET_MODE_BITSIZE (int_mode);
5208 49953 : HOST_WIDE_INT d = INTVAL (op1);
5209 49953 : unsigned HOST_WIDE_INT ml;
5210 49953 : int pre_shift;
5211 49953 : rtx t1;
5212 :
5213 49953 : pre_shift = ctz_or_zero (d);
5214 49953 : ml = invert_mod2n (d >> pre_shift, size);
5215 49953 : t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5216 49953 : pre_shift, NULL_RTX, unsignedp);
5217 49953 : quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5218 : NULL_RTX, 1);
5219 :
5220 49953 : insn = get_last_insn ();
5221 99906 : set_dst_reg_note (insn, REG_EQUAL,
5222 : gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5223 : int_mode, op0, op1),
5224 : quotient);
5225 : }
5226 : break;
5227 :
5228 2 : case ROUND_DIV_EXPR:
5229 2 : case ROUND_MOD_EXPR:
5230 2 : if (unsignedp)
5231 : {
5232 0 : scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5233 0 : rtx tem;
5234 0 : rtx_code_label *label;
5235 0 : label = gen_label_rtx ();
5236 0 : quotient = gen_reg_rtx (int_mode);
5237 0 : remainder = gen_reg_rtx (int_mode);
5238 0 : if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5239 : {
5240 0 : rtx tem;
5241 0 : quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5242 : quotient, 1, methods);
5243 0 : tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5244 0 : remainder = expand_binop (int_mode, sub_optab, op0, tem,
5245 : remainder, 1, methods);
5246 : }
5247 0 : tem = plus_constant (int_mode, op1, -1);
5248 0 : tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5249 0 : do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5250 0 : expand_inc (quotient, const1_rtx);
5251 0 : expand_dec (remainder, op1);
5252 0 : emit_label (label);
5253 : }
5254 : else
5255 : {
5256 2 : scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5257 2 : int size = GET_MODE_BITSIZE (int_mode);
5258 2 : rtx abs_rem, abs_op1, tem, mask;
5259 2 : rtx_code_label *label;
5260 2 : label = gen_label_rtx ();
5261 2 : quotient = gen_reg_rtx (int_mode);
5262 2 : remainder = gen_reg_rtx (int_mode);
5263 2 : if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5264 : {
5265 0 : rtx tem;
5266 0 : quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5267 : quotient, 0, methods);
5268 0 : tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5269 0 : remainder = expand_binop (int_mode, sub_optab, op0, tem,
5270 : remainder, 0, methods);
5271 : }
5272 2 : abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5273 2 : abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5274 2 : tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5275 : 1, NULL_RTX, 1);
5276 2 : do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5277 2 : tem = expand_binop (int_mode, xor_optab, op0, op1,
5278 : NULL_RTX, 0, OPTAB_WIDEN);
5279 4 : mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5280 2 : size - 1, NULL_RTX, 0);
5281 2 : tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5282 : NULL_RTX, 0, OPTAB_WIDEN);
5283 2 : tem = expand_binop (int_mode, sub_optab, tem, mask,
5284 : NULL_RTX, 0, OPTAB_WIDEN);
5285 2 : expand_inc (quotient, tem);
5286 2 : tem = expand_binop (int_mode, xor_optab, mask, op1,
5287 : NULL_RTX, 0, OPTAB_WIDEN);
5288 2 : tem = expand_binop (int_mode, sub_optab, tem, mask,
5289 : NULL_RTX, 0, OPTAB_WIDEN);
5290 2 : expand_dec (remainder, tem);
5291 2 : emit_label (label);
5292 : }
5293 3 : return gen_lowpart (mode, rem_flag ? remainder : quotient);
5294 :
5295 0 : default:
5296 0 : gcc_unreachable ();
5297 : }
5298 :
5299 224577 : if (quotient == 0)
5300 : {
5301 89682 : if (target && GET_MODE (target) != compute_mode)
5302 48962 : target = 0;
5303 :
5304 89682 : if (rem_flag)
5305 : {
5306 : /* Try to produce the remainder without producing the quotient.
5307 : If we seem to have a divmod pattern that does not require widening,
5308 : don't try widening here. We should really have a WIDEN argument
5309 : to expand_twoval_binop, since what we'd really like to do here is
5310 : 1) try a mod insn in compute_mode
5311 : 2) try a divmod insn in compute_mode
5312 : 3) try a div insn in compute_mode and multiply-subtract to get
5313 : remainder
5314 : 4) try the same things with widening allowed. */
5315 31492 : remainder
5316 32959 : = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5317 : op0, op1, target,
5318 : unsignedp,
5319 31492 : ((optab_handler (optab2, compute_mode)
5320 : != CODE_FOR_nothing)
5321 : ? OPTAB_DIRECT : OPTAB_WIDEN));
5322 31492 : if (remainder == 0)
5323 : {
5324 : /* No luck there. Can we do remainder and divide at once
5325 : without a library call? */
5326 31285 : remainder = gen_reg_rtx (compute_mode);
5327 46473 : if (! expand_twoval_binop ((unsignedp
5328 : ? udivmod_optab
5329 : : sdivmod_optab),
5330 : op0, op1,
5331 : NULL_RTX, remainder, unsignedp))
5332 : remainder = 0;
5333 : }
5334 :
5335 30025 : if (remainder)
5336 30232 : return gen_lowpart (mode, remainder);
5337 : }
5338 :
5339 : /* Produce the quotient. Try a quotient insn, but not a library call.
5340 : If we have a divmod in this mode, use it in preference to widening
5341 : the div (for this test we assume it will not fail). Note that optab2
5342 : is set to the one of the two optabs that the call below will use. */
5343 59450 : quotient
5344 64611 : = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5345 : op0, op1, rem_flag ? NULL_RTX : target,
5346 : unsignedp,
5347 59450 : ((optab_handler (optab2, compute_mode)
5348 : != CODE_FOR_nothing)
5349 : ? OPTAB_DIRECT : OPTAB_WIDEN));
5350 :
5351 59450 : if (quotient == 0)
5352 : {
5353 : /* No luck there. Try a quotient-and-remainder insn,
5354 : keeping the quotient alone. */
5355 59062 : quotient = gen_reg_rtx (compute_mode);
5356 81588 : if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5357 : op0, op1,
5358 : quotient, NULL_RTX, unsignedp))
5359 : {
5360 3514 : quotient = 0;
5361 3514 : if (! rem_flag)
5362 : /* Still no luck. If we are not computing the remainder,
5363 : use a library call for the quotient. */
5364 2276 : quotient = sign_expand_binop (compute_mode,
5365 : udiv_optab, sdiv_optab,
5366 : op0, op1, target,
5367 : unsignedp, methods);
5368 : }
5369 : }
5370 : }
5371 :
5372 194367 : if (rem_flag)
5373 : {
5374 21799 : if (target && GET_MODE (target) != compute_mode)
5375 14471 : target = 0;
5376 :
5377 21799 : if (quotient == 0)
5378 : {
5379 : /* No divide instruction either. Use library for remainder. */
5380 1238 : remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5381 : op0, op1, target,
5382 : unsignedp, methods);
5383 : /* No remainder function. Try a quotient-and-remainder
5384 : function, keeping the remainder. */
5385 1238 : if (!remainder
5386 0 : && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5387 : {
5388 0 : remainder = gen_reg_rtx (compute_mode);
5389 0 : if (!expand_twoval_binop_libfunc
5390 0 : (unsignedp ? udivmod_optab : sdivmod_optab,
5391 : op0, op1,
5392 : NULL_RTX, remainder,
5393 : unsignedp ? UMOD : MOD))
5394 0 : remainder = NULL_RTX;
5395 : }
5396 : }
5397 : else
5398 : {
5399 : /* We divided. Now finish doing X - Y * (X / Y). */
5400 20561 : remainder = expand_mult (compute_mode, quotient, op1,
5401 : NULL_RTX, unsignedp);
5402 20561 : remainder = expand_binop (compute_mode, sub_optab, op0,
5403 : remainder, target, unsignedp,
5404 : methods);
5405 : }
5406 : }
5407 :
5408 195605 : if (methods != OPTAB_LIB_WIDEN
5409 1669 : && (rem_flag ? remainder : quotient) == NULL_RTX)
5410 : return NULL_RTX;
5411 :
5412 369411 : return gen_lowpart (mode, rem_flag ? remainder : quotient);
5413 : }
5414 :
5415 : /* Return a tree node with data type TYPE, describing the value of X.
5416 : Usually this is an VAR_DECL, if there is no obvious better choice.
5417 : X may be an expression, however we only support those expressions
5418 : generated by loop.c. */
5419 :
5420 : tree
5421 672090 : make_tree (tree type, rtx x)
5422 : {
5423 672090 : tree t;
5424 :
5425 672090 : switch (GET_CODE (x))
5426 : {
5427 22402 : case CONST_INT:
5428 22402 : case CONST_WIDE_INT:
5429 22402 : t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5430 22402 : return t;
5431 :
5432 0 : case CONST_POLY_INT:
5433 0 : return wide_int_to_tree (type, const_poly_int_value (x));
5434 :
5435 0 : case CONST_DOUBLE:
5436 0 : STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5437 0 : if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5438 : t = wide_int_to_tree (type,
5439 : wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5440 : HOST_BITS_PER_WIDE_INT * 2));
5441 : else
5442 0 : t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5443 :
5444 0 : return t;
5445 :
5446 0 : case CONST_VECTOR:
5447 0 : {
5448 0 : unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5449 0 : unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5450 0 : tree itype = TREE_TYPE (type);
5451 :
5452 : /* Build a tree with vector elements. */
5453 0 : tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5454 0 : unsigned int count = elts.encoded_nelts ();
5455 0 : for (unsigned int i = 0; i < count; ++i)
5456 : {
5457 0 : rtx elt = CONST_VECTOR_ELT (x, i);
5458 0 : elts.quick_push (make_tree (itype, elt));
5459 : }
5460 :
5461 0 : return elts.build ();
5462 0 : }
5463 :
5464 0 : case PLUS:
5465 0 : return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5466 : make_tree (type, XEXP (x, 1)));
5467 :
5468 0 : case MINUS:
5469 0 : return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5470 : make_tree (type, XEXP (x, 1)));
5471 :
5472 0 : case NEG:
5473 0 : return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5474 :
5475 0 : case MULT:
5476 0 : return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5477 : make_tree (type, XEXP (x, 1)));
5478 :
5479 0 : case ASHIFT:
5480 0 : return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5481 : make_tree (type, XEXP (x, 1)));
5482 :
5483 0 : case LSHIFTRT:
5484 0 : t = unsigned_type_for (type);
5485 0 : return fold_convert (type, build2 (RSHIFT_EXPR, t,
5486 : make_tree (t, XEXP (x, 0)),
5487 : make_tree (type, XEXP (x, 1))));
5488 :
5489 0 : case ASHIFTRT:
5490 0 : t = signed_type_for (type);
5491 0 : return fold_convert (type, build2 (RSHIFT_EXPR, t,
5492 : make_tree (t, XEXP (x, 0)),
5493 : make_tree (type, XEXP (x, 1))));
5494 :
5495 0 : case DIV:
5496 0 : if (TREE_CODE (type) != REAL_TYPE)
5497 0 : t = signed_type_for (type);
5498 : else
5499 : t = type;
5500 :
5501 0 : return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5502 : make_tree (t, XEXP (x, 0)),
5503 : make_tree (t, XEXP (x, 1))));
5504 0 : case UDIV:
5505 0 : t = unsigned_type_for (type);
5506 0 : return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5507 : make_tree (t, XEXP (x, 0)),
5508 : make_tree (t, XEXP (x, 1))));
5509 :
5510 0 : case SIGN_EXTEND:
5511 0 : case ZERO_EXTEND:
5512 0 : t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5513 : GET_CODE (x) == ZERO_EXTEND);
5514 0 : return fold_convert (type, make_tree (t, XEXP (x, 0)));
5515 :
5516 0 : case CONST:
5517 0 : return make_tree (type, XEXP (x, 0));
5518 :
5519 0 : case SYMBOL_REF:
5520 0 : t = SYMBOL_REF_DECL (x);
5521 0 : if (t)
5522 0 : return fold_convert (type, build_fold_addr_expr (t));
5523 : /* fall through. */
5524 :
5525 649688 : default:
5526 649688 : t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5527 :
5528 : /* If TYPE is a POINTER_TYPE, we might need to convert X from
5529 : address mode to pointer mode. */
5530 649688 : if (POINTER_TYPE_P (type))
5531 806212 : x = convert_memory_address_addr_space
5532 403106 : (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5533 :
5534 : /* Note that we do *not* use SET_DECL_RTL here, because we do not
5535 : want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5536 649688 : t->decl_with_rtl.rtl = x;
5537 :
5538 649688 : return t;
5539 : }
5540 : }
5541 :
5542 : /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5543 : and returning TARGET.
5544 :
5545 : If TARGET is 0, a pseudo-register or constant is returned. */
5546 :
5547 : rtx
5548 69822 : expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5549 : {
5550 69822 : rtx tem = 0;
5551 :
5552 69822 : if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5553 81 : tem = simplify_binary_operation (AND, mode, op0, op1);
5554 81 : if (tem == 0)
5555 69741 : tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5556 :
5557 69822 : if (target == 0)
5558 : target = tem;
5559 44310 : else if (tem != target)
5560 13 : emit_move_insn (target, tem);
5561 69822 : return target;
5562 : }
5563 :
5564 : /* Helper function for emit_store_flag. */
5565 : rtx
5566 708908 : emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5567 : machine_mode mode, machine_mode compare_mode,
5568 : int unsignedp, rtx x, rtx y, int normalizep,
5569 : machine_mode target_mode)
5570 : {
5571 708908 : class expand_operand ops[4];
5572 708908 : rtx op0, comparison, subtarget;
5573 708908 : rtx_insn *last;
5574 708908 : scalar_int_mode result_mode = targetm.cstore_mode (icode);
5575 708908 : scalar_int_mode int_target_mode;
5576 :
5577 708908 : last = get_last_insn ();
5578 708908 : x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5579 708908 : y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5580 708908 : if (!x || !y)
5581 : {
5582 284 : delete_insns_since (last);
5583 284 : return NULL_RTX;
5584 : }
5585 :
5586 708624 : if (target_mode == VOIDmode)
5587 : int_target_mode = result_mode;
5588 : else
5589 708608 : int_target_mode = as_a <scalar_int_mode> (target_mode);
5590 708624 : if (!target)
5591 66441 : target = gen_reg_rtx (int_target_mode);
5592 :
5593 708624 : comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5594 :
5595 708624 : create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5596 708624 : create_fixed_operand (&ops[1], comparison);
5597 708624 : create_fixed_operand (&ops[2], x);
5598 708624 : create_fixed_operand (&ops[3], y);
5599 708624 : if (!maybe_expand_insn (icode, 4, ops))
5600 : {
5601 149147 : delete_insns_since (last);
5602 149147 : return NULL_RTX;
5603 : }
5604 559477 : subtarget = ops[0].value;
5605 :
5606 : /* If we are converting to a wider mode, first convert to
5607 : INT_TARGET_MODE, then normalize. This produces better combining
5608 : opportunities on machines that have a SIGN_EXTRACT when we are
5609 : testing a single bit. This mostly benefits the 68k.
5610 :
5611 : If STORE_FLAG_VALUE does not have the sign bit set when
5612 : interpreted in MODE, we can do this conversion as unsigned, which
5613 : is usually more efficient. */
5614 559477 : if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5615 : {
5616 110486 : gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5617 : || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5618 :
5619 110486 : bool unsignedp = (STORE_FLAG_VALUE >= 0);
5620 110486 : convert_move (target, subtarget, unsignedp);
5621 :
5622 110486 : op0 = target;
5623 110486 : result_mode = int_target_mode;
5624 : }
5625 : else
5626 : op0 = subtarget;
5627 :
5628 : /* If we want to keep subexpressions around, don't reuse our last
5629 : target. */
5630 559477 : if (optimize)
5631 443059 : subtarget = 0;
5632 :
5633 : /* Now normalize to the proper value in MODE. Sometimes we don't
5634 : have to do anything. */
5635 559477 : if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5636 : ;
5637 : /* STORE_FLAG_VALUE might be the most negative number, so write
5638 : the comparison this way to avoid a compiler-time warning. */
5639 394 : else if (- normalizep == STORE_FLAG_VALUE)
5640 394 : op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5641 :
5642 : /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5643 : it hard to use a value of just the sign bit due to ANSI integer
5644 : constant typing rules. */
5645 0 : else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5646 0 : op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5647 0 : GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5648 : normalizep == 1);
5649 : else
5650 : {
5651 0 : gcc_assert (STORE_FLAG_VALUE & 1);
5652 :
5653 0 : op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5654 0 : if (normalizep == -1)
5655 : op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5656 : }
5657 :
5658 : /* If we were converting to a smaller mode, do the conversion now. */
5659 559477 : if (int_target_mode != result_mode)
5660 : {
5661 0 : convert_move (target, op0, 0);
5662 0 : return target;
5663 : }
5664 : else
5665 : return op0;
5666 : }
5667 :
5668 :
5669 : /* A subroutine of emit_store_flag only including "tricks" that do not
5670 : need a recursive call. These are kept separate to avoid infinite
5671 : loops. */
5672 :
5673 : static rtx
5674 676014 : emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5675 : machine_mode mode, int unsignedp, int normalizep,
5676 : machine_mode target_mode)
5677 : {
5678 676014 : rtx subtarget;
5679 676014 : enum insn_code icode;
5680 676014 : machine_mode compare_mode;
5681 676014 : enum mode_class mclass;
5682 :
5683 676014 : if (unsignedp)
5684 154864 : code = unsigned_condition (code);
5685 :
5686 : /* If one operand is constant, make it the second one. Only do this
5687 : if the other operand is not constant as well. */
5688 :
5689 676014 : if (swap_commutative_operands_p (op0, op1))
5690 : {
5691 4860 : std::swap (op0, op1);
5692 4860 : code = swap_condition (code);
5693 : }
5694 :
5695 676014 : if (mode == VOIDmode)
5696 38727 : mode = GET_MODE (op0);
5697 :
5698 676014 : if (CONST_SCALAR_INT_P (op1))
5699 298157 : canonicalize_comparison (mode, &code, &op1);
5700 :
5701 : /* For some comparisons with 1 and -1, we can convert this to
5702 : comparisons with zero. This will often produce more opportunities for
5703 : store-flag insns. */
5704 :
5705 676014 : switch (code)
5706 : {
5707 44180 : case LT:
5708 44180 : if (op1 == const1_rtx)
5709 55 : op1 = const0_rtx, code = LE;
5710 : break;
5711 25494 : case LE:
5712 25494 : if (op1 == constm1_rtx)
5713 0 : op1 = const0_rtx, code = LT;
5714 : break;
5715 38256 : case GE:
5716 38256 : if (op1 == const1_rtx)
5717 0 : op1 = const0_rtx, code = GT;
5718 : break;
5719 32420 : case GT:
5720 32420 : if (op1 == constm1_rtx)
5721 88 : op1 = const0_rtx, code = GE;
5722 : break;
5723 4006 : case GEU:
5724 4006 : if (op1 == const1_rtx)
5725 0 : op1 = const0_rtx, code = NE;
5726 : break;
5727 7782 : case LTU:
5728 7782 : if (op1 == const1_rtx)
5729 10 : op1 = const0_rtx, code = EQ;
5730 : break;
5731 : default:
5732 : break;
5733 : }
5734 :
5735 : /* If this is A < 0 or A >= 0, we can do this by taking the ones
5736 : complement of A (for GE) and shifting the sign bit to the low bit. */
5737 676014 : scalar_int_mode int_mode;
5738 191977 : if (op1 == const0_rtx && (code == LT || code == GE)
5739 676014 : && is_int_mode (mode, &int_mode)
5740 676014 : && (normalizep || STORE_FLAG_VALUE == 1
5741 : || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5742 : {
5743 40139 : scalar_int_mode int_target_mode;
5744 40139 : subtarget = target;
5745 :
5746 40139 : if (!target)
5747 : int_target_mode = int_mode;
5748 : else
5749 : {
5750 : /* If the result is to be wider than OP0, it is best to convert it
5751 : first. If it is to be narrower, it is *incorrect* to convert it
5752 : first. */
5753 40139 : int_target_mode = as_a <scalar_int_mode> (target_mode);
5754 120417 : if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5755 : {
5756 404 : op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5757 404 : int_mode = int_target_mode;
5758 : }
5759 : }
5760 :
5761 40139 : if (int_target_mode != int_mode)
5762 26774 : subtarget = 0;
5763 :
5764 40139 : if (code == GE)
5765 20655 : op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5766 : ((STORE_FLAG_VALUE == 1 || normalizep)
5767 : ? 0 : subtarget), 0);
5768 :
5769 40139 : if (STORE_FLAG_VALUE == 1 || normalizep)
5770 : /* If we are supposed to produce a 0/1 value, we want to do
5771 : a logical shift from the sign bit to the low-order bit; for
5772 : a -1/0 value, we do an arithmetic shift. */
5773 80278 : op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5774 40139 : GET_MODE_BITSIZE (int_mode) - 1,
5775 : subtarget, normalizep != -1);
5776 :
5777 40139 : if (int_mode != int_target_mode)
5778 26774 : op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5779 :
5780 40139 : return op0;
5781 : }
5782 :
5783 : /* Next try expanding this via the backend's cstore<mode>4. */
5784 635875 : mclass = GET_MODE_CLASS (mode);
5785 647050 : FOR_EACH_WIDER_MODE_FROM (compare_mode, mode)
5786 : {
5787 639819 : machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5788 639819 : icode = optab_handler (cstore_optab, optab_mode);
5789 639819 : if (icode != CODE_FOR_nothing)
5790 : {
5791 628644 : do_pending_stack_adjust ();
5792 628644 : rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5793 : unsignedp, op0, op1, normalizep, target_mode);
5794 628644 : if (tem)
5795 : return tem;
5796 :
5797 85118 : if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5798 : {
5799 80246 : enum rtx_code scode = swap_condition (code);
5800 :
5801 80246 : tem = emit_cstore (target, icode, scode, mode, compare_mode,
5802 : unsignedp, op1, op0, normalizep, target_mode);
5803 80246 : if (tem)
5804 : return tem;
5805 : }
5806 : break;
5807 : }
5808 : }
5809 :
5810 : /* If we are comparing a double-word integer with zero or -1, we can
5811 : convert the comparison into one involving a single word. */
5812 76416 : if (is_int_mode (mode, &int_mode)
5813 7624 : && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5814 4588 : && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5815 : {
5816 4588 : rtx tem;
5817 4588 : if ((code == EQ || code == NE)
5818 0 : && (op1 == const0_rtx || op1 == constm1_rtx))
5819 : {
5820 0 : rtx op00, op01;
5821 :
5822 : /* Do a logical OR or AND of the two words and compare the
5823 : result. */
5824 0 : op00 = force_subreg (word_mode, op0, int_mode, 0);
5825 0 : op01 = force_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5826 0 : tem = expand_binop (word_mode,
5827 0 : op1 == const0_rtx ? ior_optab : and_optab,
5828 : op00, op01, NULL_RTX, unsignedp,
5829 : OPTAB_DIRECT);
5830 :
5831 0 : if (tem != 0)
5832 0 : tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5833 : unsignedp, normalizep);
5834 : }
5835 4588 : else if ((code == LT || code == GE) && op1 == const0_rtx)
5836 : {
5837 0 : rtx op0h;
5838 :
5839 : /* If testing the sign bit, can just test on high word. */
5840 0 : op0h = force_highpart_subreg (word_mode, op0, int_mode);
5841 0 : tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5842 : unsignedp, normalizep);
5843 0 : }
5844 : else
5845 : tem = NULL_RTX;
5846 :
5847 0 : if (tem)
5848 : {
5849 0 : if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5850 : return tem;
5851 0 : if (!target)
5852 0 : target = gen_reg_rtx (target_mode);
5853 :
5854 0 : convert_move (target, tem,
5855 0 : !val_signbit_known_set_p (word_mode,
5856 : (normalizep ? normalizep
5857 : : STORE_FLAG_VALUE)));
5858 0 : return target;
5859 : }
5860 : }
5861 :
5862 : return 0;
5863 : }
5864 :
5865 : /* Subroutine of emit_store_flag that handles cases in which the operands
5866 : are scalar integers. SUBTARGET is the target to use for temporary
5867 : operations and TRUEVAL is the value to store when the condition is
5868 : true. All other arguments are as for emit_store_flag. */
5869 :
5870 : rtx
5871 2436 : emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5872 : rtx op1, scalar_int_mode mode, int unsignedp,
5873 : int normalizep, rtx trueval)
5874 : {
5875 2436 : machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5876 2436 : rtx_insn *last = get_last_insn ();
5877 :
5878 : /* If this is an equality comparison of integers, we can try to exclusive-or
5879 : (or subtract) the two operands and use a recursive call to try the
5880 : comparison with zero. Don't do any of these cases if branches are
5881 : very cheap. */
5882 :
5883 2436 : if ((code == EQ || code == NE) && op1 != const0_rtx)
5884 : {
5885 0 : rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5886 : OPTAB_WIDEN);
5887 :
5888 0 : if (tem == 0)
5889 0 : tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5890 : OPTAB_WIDEN);
5891 0 : if (tem != 0)
5892 0 : tem = emit_store_flag (target, code, tem, const0_rtx,
5893 : mode, unsignedp, normalizep);
5894 0 : if (tem != 0)
5895 : return tem;
5896 :
5897 0 : delete_insns_since (last);
5898 : }
5899 :
5900 : /* For integer comparisons, try the reverse comparison. However, for
5901 : small X and if we'd have anyway to extend, implementing "X != 0"
5902 : as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5903 2436 : rtx_code rcode = reverse_condition (code);
5904 2436 : if (can_compare_p (rcode, mode, ccp_store_flag)
5905 2436 : && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5906 0 : && code == NE
5907 0 : && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5908 0 : && op1 == const0_rtx))
5909 : {
5910 2436 : int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5911 : || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5912 :
5913 : /* Again, for the reverse comparison, use either an addition or a XOR. */
5914 2436 : if (want_add
5915 2436 : && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5916 0 : optimize_insn_for_speed_p ()) == 0)
5917 : {
5918 0 : rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5919 : STORE_FLAG_VALUE, target_mode);
5920 0 : if (tem != 0)
5921 0 : tem = expand_binop (target_mode, add_optab, tem,
5922 0 : gen_int_mode (normalizep, target_mode),
5923 : target, 0, OPTAB_WIDEN);
5924 0 : if (tem != 0)
5925 : return tem;
5926 : }
5927 2436 : else if (!want_add
5928 4872 : && rtx_cost (trueval, mode, XOR, 1,
5929 2436 : optimize_insn_for_speed_p ()) == 0)
5930 : {
5931 2436 : rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5932 : normalizep, target_mode);
5933 2436 : if (tem != 0)
5934 0 : tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5935 0 : INTVAL (trueval) >= 0, OPTAB_WIDEN);
5936 0 : if (tem != 0)
5937 : return tem;
5938 : }
5939 :
5940 2436 : delete_insns_since (last);
5941 : }
5942 :
5943 : /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5944 : the constant zero. Reject all other comparisons at this point. Only
5945 : do LE and GT if branches are expensive since they are expensive on
5946 : 2-operand machines. */
5947 :
5948 2436 : if (op1 != const0_rtx
5949 2436 : || (code != EQ && code != NE
5950 87 : && (BRANCH_COST (optimize_insn_for_speed_p (),
5951 87 : false) <= 1 || (code != LE && code != GT))))
5952 2207 : return 0;
5953 :
5954 : /* Try to put the result of the comparison in the sign bit. Assume we can't
5955 : do the necessary operation below. */
5956 :
5957 229 : rtx tem = 0;
5958 :
5959 : /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5960 : the sign bit set. */
5961 :
5962 229 : if (code == LE)
5963 : {
5964 : /* This is destructive, so SUBTARGET can't be OP0. */
5965 59 : if (rtx_equal_p (subtarget, op0))
5966 0 : subtarget = 0;
5967 :
5968 59 : tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5969 : OPTAB_WIDEN);
5970 59 : if (tem)
5971 59 : tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5972 : OPTAB_WIDEN);
5973 : }
5974 :
5975 : /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5976 : number of bits in the mode of OP0, minus one. */
5977 :
5978 229 : if (code == GT)
5979 : {
5980 28 : if (rtx_equal_p (subtarget, op0))
5981 0 : subtarget = 0;
5982 :
5983 28 : tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5984 28 : GET_MODE_BITSIZE (mode) - 1,
5985 : subtarget, 0);
5986 28 : if (tem)
5987 28 : tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5988 : OPTAB_WIDEN);
5989 : }
5990 :
5991 229 : if (code == EQ || code == NE)
5992 : {
5993 : /* For EQ or NE, one way to do the comparison is to apply an operation
5994 : that converts the operand into a positive number if it is nonzero
5995 : or zero if it was originally zero. Then, for EQ, we subtract 1 and
5996 : for NE we negate. This puts the result in the sign bit. Then we
5997 : normalize with a shift, if needed.
5998 :
5999 : Two operations that can do the above actions are ABS and FFS, so try
6000 : them. If that doesn't work, and MODE is smaller than a full word,
6001 : we can use zero-extension to the wider mode (an unsigned conversion)
6002 : as the operation. */
6003 :
6004 : /* Note that ABS doesn't yield a positive number for INT_MIN, but
6005 : that is compensated by the subsequent overflow when subtracting
6006 : one / negating. */
6007 :
6008 142 : if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
6009 142 : tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
6010 0 : else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
6011 0 : tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
6012 0 : else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6013 : {
6014 0 : tem = convert_modes (word_mode, mode, op0, 1);
6015 0 : mode = word_mode;
6016 : }
6017 :
6018 142 : if (tem != 0)
6019 : {
6020 142 : if (code == EQ)
6021 0 : tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
6022 : 0, OPTAB_WIDEN);
6023 : else
6024 142 : tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
6025 : }
6026 :
6027 : /* If we couldn't do it that way, for NE we can "or" the two's complement
6028 : of the value with itself. For EQ, we take the one's complement of
6029 : that "or", which is an extra insn, so we only handle EQ if branches
6030 : are expensive. */
6031 :
6032 142 : if (tem == 0
6033 142 : && (code == NE
6034 0 : || BRANCH_COST (optimize_insn_for_speed_p (),
6035 : false) > 1))
6036 : {
6037 0 : if (rtx_equal_p (subtarget, op0))
6038 0 : subtarget = 0;
6039 :
6040 0 : tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
6041 0 : tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
6042 : OPTAB_WIDEN);
6043 :
6044 0 : if (tem && code == EQ)
6045 0 : tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
6046 : }
6047 : }
6048 :
6049 229 : if (tem && normalizep)
6050 229 : tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
6051 229 : GET_MODE_BITSIZE (mode) - 1,
6052 : subtarget, normalizep == 1);
6053 :
6054 229 : if (tem)
6055 : {
6056 229 : if (!target)
6057 : ;
6058 229 : else if (GET_MODE (tem) != target_mode)
6059 : {
6060 87 : convert_move (target, tem, 0);
6061 87 : tem = target;
6062 : }
6063 142 : else if (!subtarget)
6064 : {
6065 73 : emit_move_insn (target, tem);
6066 73 : tem = target;
6067 : }
6068 : }
6069 : else
6070 0 : delete_insns_since (last);
6071 :
6072 : return tem;
6073 : }
6074 :
6075 : /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
6076 : and storing in TARGET. Normally return TARGET.
6077 : Return 0 if that cannot be done.
6078 :
6079 : MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
6080 : it is VOIDmode, they cannot both be CONST_INT.
6081 :
6082 : UNSIGNEDP is for the case where we have to widen the operands
6083 : to perform the operation. It says to use zero-extension.
6084 :
6085 : NORMALIZEP is 1 if we should convert the result to be either zero
6086 : or one. Normalize is -1 if we should convert the result to be
6087 : either zero or -1. If NORMALIZEP is zero, the result will be left
6088 : "raw" out of the scc insn. */
6089 :
6090 : rtx
6091 609033 : emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6092 : machine_mode mode, int unsignedp, int normalizep)
6093 : {
6094 609033 : machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6095 609033 : enum rtx_code rcode;
6096 609033 : rtx subtarget;
6097 609033 : rtx tem, trueval;
6098 609033 : rtx_insn *last;
6099 :
6100 : /* If we compare constants, we shouldn't use a store-flag operation,
6101 : but a constant load. We can get there via the vanilla route that
6102 : usually generates a compare-branch sequence, but will in this case
6103 : fold the comparison to a constant, and thus elide the branch. */
6104 609033 : if (CONSTANT_P (op0) && CONSTANT_P (op1))
6105 : return NULL_RTX;
6106 :
6107 608742 : tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6108 : target_mode);
6109 608742 : if (tem)
6110 : return tem;
6111 :
6112 : /* If we reached here, we can't do this with a scc insn, however there
6113 : are some comparisons that can be done in other ways. Don't do any
6114 : of these cases if branches are very cheap. */
6115 73457 : if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6116 : return 0;
6117 :
6118 : /* See what we need to return. We can only return a 1, -1, or the
6119 : sign bit. */
6120 :
6121 73457 : if (normalizep == 0)
6122 : {
6123 0 : if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6124 0 : normalizep = STORE_FLAG_VALUE;
6125 :
6126 : else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6127 : ;
6128 : else
6129 : return 0;
6130 : }
6131 :
6132 73457 : last = get_last_insn ();
6133 :
6134 : /* If optimizing, use different pseudo registers for each insn, instead
6135 : of reusing the same pseudo. This leads to better CSE, but slows
6136 : down the compiler, since there are more pseudos. */
6137 73388 : subtarget = (!optimize
6138 73457 : && (target_mode == mode)) ? target : NULL_RTX;
6139 73457 : trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6140 :
6141 : /* For floating-point comparisons, try the reverse comparison or try
6142 : changing the "orderedness" of the comparison. */
6143 73457 : if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6144 : {
6145 67497 : enum rtx_code first_code;
6146 67497 : bool and_them;
6147 :
6148 67497 : rcode = reverse_condition_maybe_unordered (code);
6149 67497 : if (can_compare_p (rcode, mode, ccp_store_flag)
6150 67497 : && (code == ORDERED || code == UNORDERED
6151 0 : || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6152 0 : || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6153 : {
6154 0 : int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6155 : || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6156 :
6157 : /* For the reverse comparison, use either an addition or a XOR. */
6158 0 : if (want_add
6159 0 : && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6160 0 : optimize_insn_for_speed_p ()) == 0)
6161 : {
6162 0 : tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6163 : STORE_FLAG_VALUE, target_mode);
6164 0 : if (tem)
6165 0 : return expand_binop (target_mode, add_optab, tem,
6166 0 : gen_int_mode (normalizep, target_mode),
6167 : target, 0, OPTAB_WIDEN);
6168 : }
6169 0 : else if (!want_add
6170 0 : && rtx_cost (trueval, mode, XOR, 1,
6171 0 : optimize_insn_for_speed_p ()) == 0)
6172 : {
6173 0 : tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6174 : normalizep, target_mode);
6175 0 : if (tem)
6176 0 : return expand_binop (target_mode, xor_optab, tem, trueval,
6177 0 : target, INTVAL (trueval) >= 0,
6178 0 : OPTAB_WIDEN);
6179 : }
6180 : }
6181 :
6182 67497 : delete_insns_since (last);
6183 :
6184 : /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6185 67497 : if (code == ORDERED || code == UNORDERED)
6186 : return 0;
6187 :
6188 67379 : and_them = split_comparison (code, mode, &first_code, &code);
6189 :
6190 : /* If there are no NaNs, the first comparison should always fall through.
6191 : Effectively change the comparison to the other one. */
6192 67379 : if (!HONOR_NANS (mode))
6193 : {
6194 282 : gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6195 182 : return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6196 182 : target_mode);
6197 : }
6198 :
6199 67197 : if (!HAVE_conditional_move)
6200 : return 0;
6201 :
6202 : /* Do not turn a trapping comparison into a non-trapping one. */
6203 67197 : if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6204 2543 : && flag_trapping_math)
6205 : return 0;
6206 :
6207 : /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6208 : conditional move. */
6209 64654 : tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6210 : normalizep, target_mode);
6211 64654 : if (tem == 0)
6212 : return 0;
6213 :
6214 64131 : if (and_them)
6215 2130 : tem = emit_conditional_move (target, { code, op0, op1, mode },
6216 2130 : tem, const0_rtx, GET_MODE (tem), 0);
6217 : else
6218 62001 : tem = emit_conditional_move (target, { code, op0, op1, mode },
6219 62001 : trueval, tem, GET_MODE (tem), 0);
6220 :
6221 64131 : if (tem == 0)
6222 474 : delete_insns_since (last);
6223 64131 : return tem;
6224 : }
6225 :
6226 : /* The remaining tricks only apply to integer comparisons. */
6227 :
6228 5960 : scalar_int_mode int_mode;
6229 5960 : if (is_int_mode (mode, &int_mode))
6230 2436 : return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6231 2436 : unsignedp, normalizep, trueval);
6232 :
6233 : return 0;
6234 : }
6235 :
6236 : /* Like emit_store_flag, but always succeeds. */
6237 :
6238 : rtx
6239 560787 : emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6240 : machine_mode mode, int unsignedp, int normalizep)
6241 : {
6242 560787 : rtx tem;
6243 560787 : rtx_code_label *label;
6244 560787 : rtx trueval, falseval;
6245 :
6246 : /* First see if emit_store_flag can do the job. */
6247 560787 : tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6248 560787 : if (tem != 0)
6249 : return tem;
6250 :
6251 : /* If one operand is constant, make it the second one. Only do this
6252 : if the other operand is not constant as well. */
6253 9680 : if (swap_commutative_operands_p (op0, op1))
6254 : {
6255 8 : std::swap (op0, op1);
6256 8 : code = swap_condition (code);
6257 : }
6258 :
6259 9680 : if (mode == VOIDmode)
6260 0 : mode = GET_MODE (op0);
6261 :
6262 9680 : if (!target)
6263 0 : target = gen_reg_rtx (word_mode);
6264 :
6265 : /* If this failed, we have to do this with set/compare/jump/set code.
6266 : For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6267 9680 : trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6268 9680 : if (code == NE
6269 1610 : && GET_MODE_CLASS (mode) == MODE_INT
6270 27 : && REG_P (target)
6271 27 : && op0 == target
6272 0 : && op1 == const0_rtx)
6273 : {
6274 0 : label = gen_label_rtx ();
6275 0 : do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6276 : NULL_RTX, NULL, label,
6277 : profile_probability::uninitialized ());
6278 0 : emit_move_insn (target, trueval);
6279 0 : emit_label (label);
6280 0 : return target;
6281 : }
6282 :
6283 9680 : if (!REG_P (target)
6284 9680 : || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6285 5 : target = gen_reg_rtx (GET_MODE (target));
6286 :
6287 : /* Jump in the right direction if the target cannot implement CODE
6288 : but can jump on its reverse condition. */
6289 9680 : falseval = const0_rtx;
6290 9680 : if (! can_compare_p (code, mode, ccp_jump)
6291 9680 : && (! FLOAT_MODE_P (mode)
6292 7140 : || code == ORDERED || code == UNORDERED
6293 6977 : || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6294 6977 : || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6295 : {
6296 2511 : enum rtx_code rcode;
6297 2511 : if (FLOAT_MODE_P (mode))
6298 2511 : rcode = reverse_condition_maybe_unordered (code);
6299 : else
6300 0 : rcode = reverse_condition (code);
6301 :
6302 : /* Canonicalize to UNORDERED for the libcall. */
6303 2511 : if (can_compare_p (rcode, mode, ccp_jump)
6304 2511 : || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6305 : {
6306 139 : falseval = trueval;
6307 139 : trueval = const0_rtx;
6308 139 : code = rcode;
6309 : }
6310 : }
6311 :
6312 9680 : emit_move_insn (target, trueval);
6313 9680 : label = gen_label_rtx ();
6314 9680 : do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6315 : label, profile_probability::uninitialized ());
6316 :
6317 9680 : emit_move_insn (target, falseval);
6318 9680 : emit_label (label);
6319 :
6320 9680 : return target;
6321 : }
6322 :
6323 : /* Expand a vector (left) rotate of MODE of X by an immediate AMT as a vector
6324 : permute operation. Emit code to put the result in DST if successful and
6325 : return it. Otherwise return NULL. This is intended to implement vector
6326 : rotates by byte amounts using vector permutes when the target does not offer
6327 : native vector rotate operations. */
6328 : rtx
6329 0 : expand_rotate_as_vec_perm (machine_mode mode, rtx dst, rtx x, rtx amt)
6330 : {
6331 0 : rtx amt_unwrap = unwrap_const_vec_duplicate (amt);
6332 : /* For now handle only rotate by the same integer constant in all lanes.
6333 : In principle rotates by any constant vector are representable through
6334 : permutes as long as the individual rotate amounts are multiples of
6335 : BITS_PER_UNIT. */
6336 0 : if (!CONST_INT_P (amt_unwrap))
6337 : return NULL_RTX;
6338 :
6339 0 : int rotamnt = INTVAL (amt_unwrap);
6340 0 : if (rotamnt % BITS_PER_UNIT != 0)
6341 : return NULL_RTX;
6342 0 : machine_mode qimode;
6343 0 : if (!qimode_for_vec_perm (mode).exists (&qimode))
6344 0 : return NULL_RTX;
6345 :
6346 0 : vec_perm_builder builder;
6347 0 : unsigned nunits = GET_MODE_SIZE (GET_MODE_INNER (mode));
6348 0 : poly_uint64 total_units = GET_MODE_SIZE (mode);
6349 0 : builder.new_vector (total_units, nunits, 3);
6350 0 : unsigned rot_bytes = rotamnt / BITS_PER_UNIT;
6351 0 : unsigned rot_to_perm = BYTES_BIG_ENDIAN ? rot_bytes : nunits - rot_bytes;
6352 0 : for (unsigned j = 0; j < 3 * nunits; j += nunits)
6353 0 : for (unsigned i = 0; i < nunits; i++)
6354 0 : builder.quick_push ((rot_to_perm + i) % nunits + j);
6355 :
6356 0 : rtx perm_src = lowpart_subreg (qimode, x, mode);
6357 0 : rtx perm_dst = lowpart_subreg (qimode, dst, mode);
6358 0 : rtx res
6359 0 : = expand_vec_perm_const (qimode, perm_src, perm_src, builder,
6360 : qimode, perm_dst);
6361 0 : if (!res)
6362 : return NULL_RTX;
6363 0 : if (!rtx_equal_p (res, perm_dst))
6364 0 : emit_move_insn (dst, lowpart_subreg (mode, res, qimode));
6365 : return dst;
6366 0 : }
6367 :
6368 : /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6369 : and exclusive ranges in order to create an equivalent comparison. See
6370 : canonicalize_cmp_for_target for the possible cases. */
6371 :
6372 : static enum rtx_code
6373 47 : equivalent_cmp_code (enum rtx_code code)
6374 : {
6375 47 : switch (code)
6376 : {
6377 : case GT:
6378 : return GE;
6379 0 : case GE:
6380 0 : return GT;
6381 0 : case LT:
6382 0 : return LE;
6383 0 : case LE:
6384 0 : return LT;
6385 2 : case GTU:
6386 2 : return GEU;
6387 0 : case GEU:
6388 0 : return GTU;
6389 1 : case LTU:
6390 1 : return LEU;
6391 2 : case LEU:
6392 2 : return LTU;
6393 :
6394 0 : default:
6395 0 : return code;
6396 : }
6397 : }
6398 :
6399 : /* Choose the more appropriate immediate in scalar integer comparisons. The
6400 : purpose of this is to end up with an immediate which can be loaded into a
6401 : register in fewer moves, if possible.
6402 :
6403 : For each integer comparison there exists an equivalent choice:
6404 : i) a > b or a >= b + 1
6405 : ii) a <= b or a < b + 1
6406 : iii) a >= b or a > b - 1
6407 : iv) a < b or a <= b - 1
6408 :
6409 : MODE is the mode of the first operand.
6410 : CODE points to the comparison code.
6411 : IMM points to the rtx containing the immediate. *IMM must satisfy
6412 : CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6413 : on exit. */
6414 :
6415 : void
6416 4651993 : canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6417 : {
6418 4651993 : if (!SCALAR_INT_MODE_P (mode))
6419 3858901 : return;
6420 :
6421 4648271 : int to_add = 0;
6422 4648271 : enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6423 :
6424 : /* Extract the immediate value from the rtx. */
6425 4648271 : wide_int imm_val = rtx_mode_t (*imm, mode);
6426 :
6427 4648271 : if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6428 : to_add = 1;
6429 : else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6430 : to_add = -1;
6431 : else
6432 : return;
6433 :
6434 : /* Check for overflow/underflow in the case of signed values and
6435 : wrapping around in the case of unsigned values. If any occur
6436 : cancel the optimization. */
6437 793236 : wi::overflow_type overflow = wi::OVF_NONE;
6438 793236 : wide_int imm_modif;
6439 :
6440 793236 : if (to_add == 1)
6441 572508 : imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6442 : else
6443 220728 : imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6444 :
6445 793236 : if (overflow)
6446 144 : return;
6447 :
6448 793092 : rtx new_imm = immed_wide_int_const (imm_modif, mode);
6449 :
6450 793092 : int old_cost = rtx_cost (*imm, mode, COMPARE, 0, true);
6451 793092 : int new_cost = rtx_cost (new_imm, mode, COMPARE, 0, true);
6452 :
6453 793092 : if (dump_file && (dump_flags & TDF_DETAILS))
6454 : {
6455 7 : fprintf (dump_file, ";; cmp: %s, old cst: ",
6456 7 : GET_RTX_NAME (*code));
6457 7 : print_rtl (dump_file, *imm);
6458 7 : fprintf (dump_file, " new cst: ");
6459 7 : print_rtl (dump_file, new_imm);
6460 7 : fprintf (dump_file, "\n");
6461 7 : fprintf (dump_file, ";; old cst cost: %d, new cst cost: %d\n",
6462 : old_cost, new_cost);
6463 : }
6464 :
6465 : /* Update the immediate and the code. */
6466 793092 : if (old_cost > new_cost)
6467 : {
6468 47 : *code = equivalent_cmp_code (*code);
6469 47 : *imm = new_imm;
6470 : }
6471 4648415 : }
6472 :
6473 :
6474 :
6475 : /* Perform possibly multi-word comparison and conditional jump to LABEL
6476 : if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6477 : now a thin wrapper around do_compare_rtx_and_jump. */
6478 :
6479 : static void
6480 2407 : do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6481 : rtx_code_label *label)
6482 : {
6483 2407 : int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6484 2407 : do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6485 : NULL, label, profile_probability::uninitialized ());
6486 2407 : }
|