Line data Source code
1 : /* RTL dead zero/sign extension (code) elimination.
2 : Copyright (C) 2000-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "memmodel.h"
27 : #include "insn-config.h"
28 : #include "emit-rtl.h"
29 : #include "expr.h"
30 : #include "recog.h"
31 : #include "cfganal.h"
32 : #include "tree-pass.h"
33 : #include "cfgrtl.h"
34 : #include "rtl-iter.h"
35 : #include "df.h"
36 : #include "print-rtl.h"
37 : #include "dbgcnt.h"
38 : #include "diagnostic-core.h"
39 : #include "target.h"
40 :
41 : /* These should probably move into a C++ class. */
42 : static vec<bitmap_head> livein;
43 : static bitmap all_blocks;
44 : static bitmap livenow;
45 : static bitmap changed_pseudos;
46 : static bool modify;
47 :
48 : /* Chain detection for promotion: we defer promotions and only apply them
49 : when they form chains (one candidate's result feeds another's operand).
50 : Standalone promotions are skipped as they cause regressions on targets
51 : with free sign extension (e.g., RISC-V W-suffix instructions). */
52 : struct promotion_candidate_info {
53 : rtx_insn *insn;
54 : rtx set;
55 : };
56 :
57 : static vec<promotion_candidate_info> promotion_candidates;
58 : static bitmap promotable_dests;
59 : static bitmap consumed_by_candidate;
60 :
61 : /* Copy pairs seen during the reverse scan (from optimized extensions).
62 : Used to propagate chain info transitively. */
63 : struct copy_info {
64 : unsigned int dest_regno;
65 : unsigned int src_regno;
66 : };
67 : static vec<copy_info> promotion_copies;
68 :
69 : /* We consider four bit groups for liveness:
70 : bit 0..7 (least significant byte)
71 : bit 8..15 (second least significant byte)
72 : bit 16..31
73 : bit 32..BITS_PER_WORD-1 */
74 :
75 : /* For the given REG, return the number of bit groups implied by the
76 : size of the REG's mode, up to a maximum of 4 (number of bit groups
77 : tracked by this pass).
78 :
79 : For partial integer and variable sized modes also return 4. This
80 : could possibly be refined for something like PSI mode, but it
81 : does not seem worth the effort. */
82 :
83 : static int
84 234236263 : group_limit (const_rtx reg)
85 : {
86 234236263 : machine_mode mode = GET_MODE (reg);
87 :
88 234236263 : if (!GET_MODE_BITSIZE (mode).is_constant ())
89 : return 4;
90 :
91 234236263 : int size = GET_MODE_SIZE (mode).to_constant ();
92 :
93 234236263 : size = exact_log2 (size);
94 :
95 234142387 : if (size < 0)
96 : return 4;
97 :
98 234142387 : size++;
99 234142387 : return (size > 4 ? 4 : size);
100 : }
101 :
102 : /* Make all bit groups live for REGNO in bitmap BMAP. For hard regs,
103 : we assume all groups are live. For a pseudo we consider the size
104 : of the pseudo to avoid creating unnecessarily live chunks of data. */
105 :
106 : static void
107 4688783 : make_reg_live (bitmap bmap, int regno)
108 : {
109 4688783 : int limit;
110 :
111 : /* For pseudos we can use the mode to limit how many bit groups
112 : are marked as live since a pseudo only has one mode. Hard
113 : registers have to be handled more conservatively. */
114 4688783 : if (regno > FIRST_PSEUDO_REGISTER)
115 : {
116 881705 : rtx reg = regno_reg_rtx[regno];
117 881705 : limit = group_limit (reg);
118 : }
119 : else
120 : limit = 4;
121 :
122 23114505 : for (int i = 0; i < limit; i++)
123 18425722 : bitmap_set_bit (bmap, regno * 4 + i);
124 4688783 : }
125 :
126 : /* Note this pass could be used to narrow memory loads too. It's
127 : not clear if that's profitable or not in general. */
128 :
129 : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
130 :
131 : /* If we know the destination of CODE only uses some low bits
132 : (say just the QI bits of an SI operation), then return true
133 : if we can propagate the need for just the subset of bits
134 : from the destination to the sources.
135 :
136 : FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
137 : operand 0. Thus is likely would need some special casing to handle. */
138 :
139 : static bool
140 141254345 : safe_for_live_propagation (rtx_code code)
141 : {
142 : /* First handle rtx classes which as a whole are known to
143 : be either safe or unsafe. */
144 141254345 : switch (GET_RTX_CLASS (code))
145 : {
146 : case RTX_OBJ:
147 : case RTX_CONST_OBJ:
148 : return true;
149 :
150 : case RTX_COMPARE:
151 : case RTX_COMM_COMPARE:
152 : case RTX_TERNARY:
153 : return false;
154 :
155 73379139 : default:
156 73379139 : break;
157 : }
158 :
159 : /* What's left are specific codes. We only need to identify those
160 : which are safe. */
161 73379139 : switch (code)
162 : {
163 : /* These are trivially safe. */
164 : case SUBREG:
165 : case NOT:
166 : case ZERO_EXTEND:
167 : case SIGN_EXTEND:
168 : case TRUNCATE:
169 : case PLUS:
170 : case MINUS:
171 : case MULT:
172 : case SMUL_HIGHPART:
173 : case UMUL_HIGHPART:
174 : case AND:
175 : case IOR:
176 : case XOR:
177 : return true;
178 :
179 : /* We can propagate for the shifted operand, but not the shift
180 : count. The count is handled specially. */
181 : case ASHIFT:
182 : case LSHIFTRT:
183 : case ASHIFTRT:
184 : case SS_ASHIFT:
185 : case US_ASHIFT:
186 : return true;
187 :
188 : /* There may be other safe codes. If so they can be added
189 : individually when discovered. */
190 : default:
191 : return false;
192 : }
193 : }
194 :
195 : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
196 : set/clobbered by OBJ contained in INSN.
197 :
198 : Conceptually it is always safe to ignore a particular destination
199 : here as that will result in more chunks of data being considered
200 : live. That's what happens when we "continue" the main loop when
201 : we see something we don't know how to handle such as a vector
202 : mode destination.
203 :
204 : The more accurate we are in identifying what objects (and chunks
205 : within an object) are set by INSN, the more aggressive the
206 : optimization phase during use handling will be. */
207 :
208 : static bool
209 137109191 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
210 : {
211 137109191 : bool skipped_dest = false;
212 :
213 137109191 : subrtx_iterator::array_type array;
214 394111964 : FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
215 : {
216 257002773 : const_rtx x = *iter;
217 :
218 : /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
219 257002773 : if (x == NULL_RTX)
220 9452414 : continue;
221 :
222 247550359 : if (UNSPEC_P (x))
223 572017 : continue;
224 :
225 246978342 : if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
226 : {
227 142734977 : unsigned bit = 0;
228 142734977 : x = SET_DEST (x);
229 :
230 : /* We don't support vector destinations or destinations
231 : wider than DImode. */
232 142734977 : scalar_mode outer_mode;
233 146608655 : if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
234 90227606 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
235 : {
236 : /* Skip the subrtxs of this destination. There is
237 : little value in iterating into the subobjects, so
238 : just skip them for a bit of efficiency. */
239 56381049 : skipped_dest = true;
240 56381049 : iter.skip_subrtxes ();
241 313383822 : continue;
242 : }
243 :
244 : /* We could have (strict_low_part (subreg ...)). We can not just
245 : strip the STRICT_LOW_PART as that would result in clearing
246 : some bits in LIVENOW that are still live. So process the
247 : STRICT_LOW_PART specially. */
248 86353928 : if (GET_CODE (x) == STRICT_LOW_PART)
249 : {
250 0 : x = XEXP (x, 0);
251 :
252 : /* The only valid operand of a STRICT_LOW_PART is a non
253 : paradoxical SUBREG. */
254 0 : gcc_assert (SUBREG_P (x)
255 : && !paradoxical_subreg_p (x)
256 : && SUBREG_BYTE (x).is_constant ());
257 :
258 : /* I think we should always see a REG here. But let's
259 : be sure. */
260 0 : gcc_assert (REG_P (SUBREG_REG (x)));
261 :
262 : /* The inner mode might be larger, just punt for
263 : that case. Remember, we can not just continue to process
264 : the inner RTXs due to the STRICT_LOW_PART. */
265 0 : if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
266 0 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
267 : {
268 : /* Skip the subrtxs of the STRICT_LOW_PART. We can't
269 : process them because it'll set objects as no longer
270 : live when they are in fact still live. */
271 0 : skipped_dest = true;
272 0 : iter.skip_subrtxes ();
273 0 : continue;
274 : }
275 :
276 : /* LIVE_TMP contains the set groups that are live-out and set in
277 : this insn. It is used to narrow the groups live-in for the
278 : inputs of this insn.
279 :
280 : The simple thing to do is mark all the groups as live, but
281 : that will significantly inhibit optimization.
282 :
283 : We also need to be careful in the case where we have an in-out
284 : operand. If we're not careful we'd clear LIVE_TMP
285 : incorrectly. */
286 0 : HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
287 0 : int limit = group_limit (SUBREG_REG (x));
288 0 : for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
289 0 : if (bitmap_bit_p (livenow, i))
290 0 : bitmap_set_bit (live_tmp, i);
291 :
292 0 : if (bitmap_empty_p (live_tmp))
293 0 : make_reg_live (live_tmp, rn);
294 :
295 : /* The mode of the SUBREG tells us how many bits we can
296 : clear. */
297 0 : machine_mode mode = GET_MODE (x);
298 0 : HOST_WIDE_INT size
299 0 : = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
300 0 : bitmap_clear_range (livenow, 4 * rn, size);
301 :
302 : /* We have fully processed this destination. */
303 0 : iter.skip_subrtxes ();
304 0 : continue;
305 0 : }
306 :
307 : /* Phase one of destination handling. First remove any wrapper
308 : such as SUBREG or ZERO_EXTRACT. */
309 86353928 : unsigned HOST_WIDE_INT mask
310 86353928 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
311 86353928 : if (SUBREG_P (x))
312 : {
313 : /* If we have a SUBREG destination that is too wide, just
314 : skip the destination rather than continuing this iterator.
315 : While continuing would be better, we'd need to strip the
316 : subreg and restart within the SET processing rather than
317 : the top of the loop which just complicates the flow even
318 : more. */
319 654680 : if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
320 543453 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
321 : {
322 111227 : skipped_dest = true;
323 111227 : iter.skip_subrtxes ();
324 111227 : continue;
325 : }
326 :
327 : /* We can safely strip a paradoxical subreg. The inner mode will
328 : be narrower than the outer mode. We'll clear fewer bits in
329 : LIVENOW than we'd like, but that's always safe. */
330 432714 : if (paradoxical_subreg_p (x))
331 : x = XEXP (x, 0);
332 422878 : else if (SUBREG_BYTE (x).is_constant ())
333 : {
334 422878 : bit = subreg_lsb (x).to_constant ();
335 422878 : mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
336 422878 : gcc_assert (mask);
337 : x = SUBREG_REG (x);
338 : }
339 : else
340 : gcc_unreachable ();
341 : }
342 :
343 86242701 : if (GET_CODE (x) == ZERO_EXTRACT)
344 : {
345 : /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
346 : modifies the bits referenced in the ZERO_EXTRACT, the rest
347 : remain the same. Thus we can not continue here, we must
348 : either figure out what part of the destination is modified
349 : or skip the sub-rtxs. */
350 3542 : skipped_dest = true;
351 3542 : iter.skip_subrtxes ();
352 3542 : continue;
353 : }
354 :
355 : /* BIT >= 64 indicates something went horribly wrong. */
356 86239159 : gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
357 :
358 : /* Now handle the actual object that was changed. */
359 86239159 : if (REG_P (x))
360 : {
361 : /* LIVE_TMP contains the set groups that are live-out and set in
362 : this insn. It is used to narrow the groups live-in for the
363 : inputs of this insn.
364 :
365 : The simple thing to do is mark all the groups as live, but
366 : that will significantly inhibit optimization.
367 :
368 : We also need to be careful in the case where we have an in-out
369 : operand. If we're not careful we'd clear LIVE_TMP
370 : incorrectly. */
371 72303469 : HOST_WIDE_INT rn = REGNO (x);
372 72303469 : int limit = group_limit (x);
373 323351437 : for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
374 251047968 : if (bitmap_bit_p (livenow, i))
375 243925589 : bitmap_set_bit (live_tmp, i);
376 :
377 72303469 : if (bitmap_empty_p (live_tmp))
378 1226104 : make_reg_live (live_tmp, rn);
379 :
380 : /* Now clear the bits known written by this instruction.
381 : Note that BIT need not be a power of two, consider a
382 : ZERO_EXTRACT destination. */
383 72303469 : int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
384 77018393 : int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
385 27921109 : : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
386 5516069 : : (mask & 0xff00) ? 2 : 1);
387 72303469 : bitmap_clear_range (livenow, 4 * rn + start, end - start);
388 : }
389 : /* Some ports generate (clobber (const_int)). */
390 13935690 : else if (CONST_INT_P (x))
391 0 : continue;
392 : else
393 13935690 : gcc_assert (CALL_P (insn)
394 : || MEM_P (x)
395 : || x == pc_rtx
396 : || GET_CODE (x) == SCRATCH);
397 :
398 86239159 : iter.skip_subrtxes ();
399 86239159 : }
400 104243365 : else if (GET_CODE (x) == COND_EXEC)
401 : {
402 : /* This isn't ideal, but may not be so bad in practice. */
403 0 : skipped_dest = true;
404 0 : iter.skip_subrtxes ();
405 : }
406 : }
407 137109191 : return skipped_dest;
408 137109191 : }
409 :
410 : /* INSN is a right shift and the second insn in a shift pair that is a
411 : sign or zero extension (SET is the single set associated with INSN).
412 :
413 : Replace the source of SET with NEW_SRC which is a source register
414 : from NEW_SRC_INSN (the left shift in the pair). This is effectively
415 : the same as the replacement we do for ZERO/SIGN extends on targets
416 : that support those insns. */
417 : static void
418 0 : ext_dce_try_optimize_rshift (rtx_insn *insn, rtx set, rtx new_src, rtx_insn *new_src_insn)
419 : {
420 : /* If the modes are not the same or one is a hard register, then
421 : conservatively do nothing. */
422 0 : if (GET_MODE (SET_SRC (set)) != GET_MODE (new_src)
423 0 : || !REG_P (XEXP (SET_SRC (set), 0))
424 0 : || !REG_P (new_src)
425 0 : || REGNO (XEXP (SET_SRC (set), 0)) < FIRST_PSEUDO_REGISTER
426 0 : || REGNO (new_src) < FIRST_PSEUDO_REGISTER)
427 : return;
428 :
429 0 : if (dump_file)
430 : {
431 0 : fprintf (dump_file, "Processing insn:\n");
432 0 : dump_insn_slim (dump_file, insn);
433 0 : fprintf (dump_file, "Trying to simplify pattern:\n");
434 0 : print_rtl_single (dump_file, SET_SRC (set));
435 : }
436 :
437 : /* We decided to turn do the optimization but allow it to be rejected for
438 : bisection purposes. */
439 0 : if (!dbg_cnt (::ext_dce))
440 : {
441 0 : if (dump_file)
442 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
443 0 : return;
444 : }
445 :
446 : /* We're going to generate a fresh insn for the move, so put it
447 : into a sequence that we can emit after the current insn. */
448 0 : start_sequence ();
449 0 : emit_move_insn (SET_DEST (set), new_src);
450 0 : rtx_insn *seq = end_sequence ();
451 0 : emit_insn_after (seq, insn);
452 :
453 : /* Mark the destination as changed. */
454 0 : rtx x = SET_DEST (set);
455 0 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
456 0 : x = XEXP (x, 0);
457 0 : gcc_assert (REG_P (x));
458 0 : bitmap_set_bit (changed_pseudos, REGNO (x));
459 :
460 0 : if (dump_file)
461 : {
462 0 : fprintf (dump_file, "Successfully transformed to:\n");
463 0 : print_rtl_single (dump_file, PATTERN (seq));
464 0 : fprintf (dump_file, "\n");
465 : }
466 :
467 0 : delete_insn (insn);
468 :
469 : /* If NEW_SRC died in its prior location, then we need to remove the
470 : death note and move it to the new location. */
471 0 : rtx note = find_regno_note (new_src_insn, REG_DEAD, REGNO (new_src));
472 0 : if (note)
473 : {
474 0 : remove_note (new_src_insn, note);
475 0 : add_reg_note (insn, REG_DEAD, new_src);
476 : }
477 : }
478 :
479 :
480 : /* INSN has a sign/zero extended source inside SET that we will
481 : try to turn into a SUBREG. If NEW_SRC is non-null, use that
482 : for the new source of INSN's set. That scenario only happens
483 : when we're optimizing a shift pair. */
484 : static void
485 4847 : ext_dce_try_optimize_extension (rtx_insn *insn, rtx set)
486 : {
487 4847 : rtx src = SET_SRC (set);
488 4847 : rtx inner = XEXP (src, 0);
489 :
490 : /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
491 : not useful for this optimization. */
492 4847 : if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
493 : return;
494 :
495 2273 : rtx new_pattern;
496 2273 : if (dump_file)
497 : {
498 0 : fprintf (dump_file, "Processing insn:\n");
499 0 : dump_insn_slim (dump_file, insn);
500 0 : fprintf (dump_file, "Trying to simplify pattern:\n");
501 0 : print_rtl_single (dump_file, SET_SRC (set));
502 : }
503 :
504 : /* We decided to turn do the optimization but allow it to be rejected for
505 : bisection purposes. */
506 2273 : if (!dbg_cnt (::ext_dce))
507 : {
508 0 : if (dump_file)
509 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
510 0 : return;
511 : }
512 :
513 4546 : new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
514 2273 : GET_MODE (inner), 0);
515 : /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
516 : We must not pass that as a replacement pattern to validate_change. */
517 2273 : if (new_pattern)
518 : {
519 2273 : int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
520 :
521 2273 : rtx x = SET_DEST (set);
522 2273 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
523 0 : x = XEXP (x, 0);
524 :
525 2273 : gcc_assert (REG_P (x));
526 2273 : if (ok)
527 2273 : bitmap_set_bit (changed_pseudos, REGNO (x));
528 :
529 2273 : if (dump_file)
530 : {
531 0 : if (ok)
532 0 : fprintf (dump_file, "Successfully transformed to:\n");
533 : else
534 0 : fprintf (dump_file, "Failed transformation to:\n");
535 :
536 0 : print_rtl_single (dump_file, new_pattern);
537 0 : fprintf (dump_file, "\n");
538 : }
539 :
540 : /* INSN may have a REG_EQUAL note indicating that the value was
541 : sign or zero extended. That note is no longer valid since we've
542 : just removed the extension. Just wipe the notes. */
543 2273 : if (ok)
544 2273 : remove_reg_equal_equiv_notes (insn, false);
545 : }
546 : else
547 : {
548 0 : if (dump_file)
549 0 : fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
550 : }
551 : }
552 :
553 : /* Try to promote a narrow-mode operation wrapped in a sign/zero extension
554 : to the wider mode when the extended bits are dead. For example,
555 : (sign_extend:DI (plus:SI (x) (y))) -> (plus:DI (x') (y'))
556 : where x' and y' are the operands promoted to DI mode.
557 :
558 : This enables the combine pass to match wider-mode target patterns
559 : (e.g., sh2add on RISC-V) that cannot match the narrow-mode operation. */
560 :
561 : static void
562 0 : ext_dce_try_promote_operation (rtx_insn *insn, rtx set)
563 : {
564 0 : rtx src = SET_SRC (set);
565 :
566 : /* If the extension was already optimized away, nothing to do. */
567 0 : if (GET_CODE (src) != SIGN_EXTEND && GET_CODE (src) != ZERO_EXTEND)
568 0 : return;
569 :
570 0 : machine_mode outer_mode = GET_MODE (src);
571 0 : rtx inner = XEXP (src, 0);
572 :
573 : /* Only handle binary and unary arithmetic/logic operations. */
574 0 : if (!BINARY_P (inner) && !UNARY_P (inner))
575 : return;
576 :
577 0 : rtx_code inner_code = GET_CODE (inner);
578 :
579 : /* Restrict to operations whose result in the low bits is identical
580 : regardless of input width (i.e., no high-bit dependencies). */
581 0 : switch (inner_code)
582 : {
583 0 : case PLUS:
584 0 : case MINUS:
585 0 : case MULT:
586 0 : case NEG:
587 0 : case AND:
588 0 : case IOR:
589 0 : case XOR:
590 0 : case NOT:
591 0 : case ASHIFT:
592 0 : break;
593 : default:
594 : return;
595 : }
596 :
597 : /* Promote each operand to the outer mode. */
598 0 : int nops = BINARY_P (inner) ? 2 : 1;
599 0 : rtx new_ops[2];
600 :
601 0 : for (int i = 0; i < nops; i++)
602 : {
603 0 : rtx op = XEXP (inner, i);
604 :
605 0 : if (CONST_INT_P (op))
606 0 : new_ops[i] = op;
607 0 : else if (REG_P (op))
608 : {
609 0 : new_ops[i] = simplify_gen_subreg (outer_mode, op,
610 0 : GET_MODE (op), 0);
611 0 : if (!new_ops[i])
612 : return;
613 : }
614 0 : else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
615 : {
616 : /* The inner register may already be in the target mode
617 : (e.g., subreg:SI (reg:DI ...) 0). Extract it directly
618 : rather than creating a paradoxical subreg of a subreg,
619 : which simplify_gen_subreg rejects. */
620 0 : rtx inner_reg = SUBREG_REG (op);
621 0 : if (GET_MODE (inner_reg) == outer_mode)
622 0 : new_ops[i] = inner_reg;
623 : else
624 : {
625 0 : new_ops[i] = simplify_gen_subreg (outer_mode, inner_reg,
626 : GET_MODE (inner_reg), 0);
627 0 : if (!new_ops[i])
628 : return;
629 : }
630 : }
631 : else
632 : return;
633 : }
634 :
635 : /* Build the promoted operation. */
636 0 : rtx new_src;
637 0 : if (BINARY_P (inner))
638 0 : new_src = gen_rtx_fmt_ee (inner_code, outer_mode,
639 : new_ops[0], new_ops[1]);
640 : else
641 0 : new_src = gen_rtx_fmt_e (inner_code, outer_mode, new_ops[0]);
642 :
643 0 : if (dump_file)
644 : {
645 0 : fprintf (dump_file, "Processing insn:\n");
646 0 : dump_insn_slim (dump_file, insn);
647 0 : fprintf (dump_file, "Trying to promote to wider mode:\n");
648 0 : print_rtl_single (dump_file, new_src);
649 : }
650 :
651 : /* We decided to try the promotion but allow it to be rejected for
652 : bisection purposes. */
653 0 : if (!dbg_cnt (::ext_dce))
654 : {
655 0 : if (dump_file)
656 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
657 0 : return;
658 : }
659 :
660 0 : int ok = validate_change (insn, &SET_SRC (set), new_src, false);
661 :
662 0 : rtx x = SET_DEST (set);
663 0 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
664 0 : x = XEXP (x, 0);
665 :
666 0 : gcc_assert (REG_P (x));
667 0 : if (ok)
668 0 : bitmap_set_bit (changed_pseudos, REGNO (x));
669 :
670 0 : if (dump_file)
671 : {
672 0 : if (ok)
673 0 : fprintf (dump_file, "Successfully promoted to:\n");
674 : else
675 0 : fprintf (dump_file, "Failed promotion to:\n");
676 0 : print_rtl_single (dump_file, new_src);
677 0 : fprintf (dump_file, "\n");
678 : }
679 :
680 0 : if (ok)
681 0 : remove_reg_equal_equiv_notes (insn, false);
682 : }
683 :
684 : /* Record INSN as a promotion candidate if it passes the same validity
685 : checks as ext_dce_try_promote_operation. We defer actual promotion
686 : until we can determine whether the candidate is part of a chain. */
687 :
688 : static void
689 2574 : ext_dce_record_promotion_candidate (rtx_insn *insn, rtx set)
690 : {
691 2574 : rtx src = SET_SRC (set);
692 :
693 2574 : if (GET_CODE (src) != SIGN_EXTEND && GET_CODE (src) != ZERO_EXTEND)
694 : return;
695 :
696 2574 : machine_mode outer_mode = GET_MODE (src);
697 2574 : rtx inner = XEXP (src, 0);
698 :
699 2574 : if (!BINARY_P (inner) && !UNARY_P (inner))
700 : return;
701 :
702 1670 : rtx_code inner_code = GET_CODE (inner);
703 :
704 1670 : switch (inner_code)
705 : {
706 0 : case PLUS:
707 0 : case MINUS:
708 0 : case MULT:
709 0 : case NEG:
710 0 : case AND:
711 0 : case IOR:
712 0 : case XOR:
713 0 : case NOT:
714 0 : case ASHIFT:
715 0 : break;
716 : default:
717 : return;
718 : }
719 :
720 : /* Dry-run: check that all operands can be promoted. */
721 0 : int nops = BINARY_P (inner) ? 2 : 1;
722 0 : for (int i = 0; i < nops; i++)
723 : {
724 0 : rtx op = XEXP (inner, i);
725 0 : if (CONST_INT_P (op))
726 0 : continue;
727 0 : else if (REG_P (op))
728 : {
729 0 : if (!simplify_gen_subreg (outer_mode, op, GET_MODE (op), 0))
730 : return;
731 : }
732 0 : else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
733 : {
734 0 : rtx inner_reg = SUBREG_REG (op);
735 0 : if (GET_MODE (inner_reg) != outer_mode
736 0 : && !simplify_gen_subreg (outer_mode, inner_reg,
737 : GET_MODE (inner_reg), 0))
738 : return;
739 : }
740 : else
741 : return;
742 : }
743 :
744 : /* Find the destination register. */
745 0 : rtx dest = SET_DEST (set);
746 0 : while (SUBREG_P (dest) || GET_CODE (dest) == ZERO_EXTRACT)
747 0 : dest = XEXP (dest, 0);
748 0 : if (!REG_P (dest))
749 : return;
750 0 : unsigned int dest_regno = REGNO (dest);
751 :
752 : /* Record the candidate. */
753 0 : promotion_candidates.safe_push ({insn, set});
754 0 : bitmap_set_bit (promotable_dests, dest_regno);
755 :
756 : /* Mark register operands as consumed by a candidate. */
757 0 : for (int i = 0; i < nops; i++)
758 : {
759 0 : rtx op = XEXP (inner, i);
760 0 : if (REG_P (op))
761 0 : bitmap_set_bit (consumed_by_candidate, REGNO (op));
762 0 : else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
763 0 : bitmap_set_bit (consumed_by_candidate, REGNO (SUBREG_REG (op)));
764 : }
765 : }
766 :
767 : /* Promote candidates that form chains: a candidate whose result feeds
768 : into another candidate's operand, or whose operand comes from another
769 : candidate's result. Skip standalone (isolated) promotions. */
770 :
771 : static void
772 9729754 : ext_dce_promote_chained_candidates (void)
773 : {
774 : /* Propagate chain info through copies recorded during the reverse scan.
775 : Since copies are recorded in reverse order, iterate forward to propagate
776 : promotable_dests (which was set late in the scan) through copies that
777 : were seen earlier. */
778 9729754 : unsigned cix;
779 9729754 : copy_info *cp;
780 9732027 : FOR_EACH_VEC_ELT (promotion_copies, cix, cp)
781 : {
782 2273 : if (bitmap_bit_p (promotable_dests, cp->src_regno))
783 0 : bitmap_set_bit (promotable_dests, cp->dest_regno);
784 2273 : if (bitmap_bit_p (consumed_by_candidate, cp->dest_regno))
785 0 : bitmap_set_bit (consumed_by_candidate, cp->src_regno);
786 : }
787 :
788 : unsigned ix;
789 : promotion_candidate_info *cand;
790 :
791 9729754 : FOR_EACH_VEC_ELT (promotion_candidates, ix, cand)
792 : {
793 : /* Find destination register. */
794 0 : rtx dest = SET_DEST (cand->set);
795 0 : while (SUBREG_P (dest) || GET_CODE (dest) == ZERO_EXTRACT)
796 0 : dest = XEXP (dest, 0);
797 0 : unsigned int dest_regno = REGNO (dest);
798 :
799 : /* Check if this candidate's result feeds into another candidate. */
800 0 : bool is_chained = bitmap_bit_p (consumed_by_candidate, dest_regno);
801 :
802 : /* Check if any operand comes from another candidate's result. */
803 0 : if (!is_chained)
804 : {
805 0 : rtx inner = XEXP (SET_SRC (cand->set), 0);
806 0 : int nops = BINARY_P (inner) ? 2 : 1;
807 0 : for (int i = 0; i < nops && !is_chained; i++)
808 : {
809 0 : rtx op = XEXP (inner, i);
810 0 : if (REG_P (op))
811 0 : is_chained = bitmap_bit_p (promotable_dests, REGNO (op));
812 0 : else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
813 0 : is_chained = bitmap_bit_p (promotable_dests,
814 0 : REGNO (SUBREG_REG (op)));
815 : }
816 : }
817 :
818 0 : if (is_chained)
819 0 : ext_dce_try_promote_operation (cand->insn, cand->set);
820 0 : else if (dump_file)
821 : {
822 0 : fprintf (dump_file, "Skipping standalone promotion for insn:\n");
823 0 : dump_insn_slim (dump_file, cand->insn);
824 0 : fprintf (dump_file, "\n");
825 : }
826 : }
827 :
828 9729754 : promotion_candidates.truncate (0);
829 9729754 : promotion_copies.truncate (0);
830 9729754 : bitmap_clear (promotable_dests);
831 9729754 : bitmap_clear (consumed_by_candidate);
832 9729754 : }
833 :
834 : /* Some operators imply that their second operand is fully live,
835 : regardless of how many bits in the output are live. An example
836 : would be the shift count on a target without SHIFT_COUNT_TRUNCATED
837 : defined.
838 :
839 : Return TRUE if CODE is such an operator. FALSE otherwise. */
840 :
841 : static bool
842 76209628 : binop_implies_op2_fully_live (rtx_code code)
843 : {
844 0 : switch (code)
845 : {
846 : case ASHIFT:
847 : case LSHIFTRT:
848 : case ASHIFTRT:
849 : case ROTATE:
850 : case ROTATERT:
851 : case SS_ASHIFT:
852 : case US_ASHIFT:
853 : return !SHIFT_COUNT_TRUNCATED;
854 :
855 0 : default:
856 0 : return false;
857 : }
858 : }
859 :
860 : /* X, with code CODE, is an operation for which safe_for_live_propagation
861 : holds true, and bits set in MASK are live in the result. Compute a
862 : mask of (potentially) live bits in the non-constant inputs. In case of
863 : binop_implies_op2_fully_live (e.g. shifts), the computed mask may
864 : exclusively pertain to the first operand.
865 :
866 : This looks wrong as we may have some important operations embedded as
867 : operands of another operation. For example, we might have an extension
868 : wrapping a shift. It really feels like this needs to be recursing down
869 : into operands much more often. */
870 :
871 : unsigned HOST_WIDE_INT
872 71102720 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
873 : {
874 72808859 : if (mask == 0)
875 : return 0;
876 :
877 72808833 : enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
878 72808833 : unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
879 :
880 : /* While we don't try to optimize operations on types larger
881 : than 64 bits, we do want to make sure not to invoke undefined
882 : behavior when presented with such operations during use
883 : processing. The safe thing to do is to just return mmask
884 : for that scenario indicating every possible chunk is life. */
885 72808833 : scalar_int_mode smode;
886 72808833 : if (!is_a <scalar_int_mode> (mode, &smode)
887 60726460 : || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
888 : return mmask;
889 :
890 58649492 : switch (code)
891 : {
892 16437728 : case PLUS:
893 16437728 : case MINUS:
894 16437728 : case MULT:
895 16437728 : return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
896 :
897 : /* We propagate for the shifted operand, but not the shift
898 : count. The count is handled specially. */
899 1401452 : case ASHIFT:
900 1401452 : if (CONST_INT_P (XEXP (x, 1))
901 2731606 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
902 1330118 : return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
903 71334 : return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
904 :
905 : /* We propagate for the shifted operand, but not the shift
906 : count. The count is handled specially. */
907 642001 : case LSHIFTRT:
908 642001 : if (CONST_INT_P (XEXP (x, 1))
909 1251901 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
910 609872 : return mmask & (mask << INTVAL (XEXP (x, 1)));
911 : return mmask;
912 :
913 : /* We propagate for the shifted operand, but not the shift
914 : count. The count is handled specially. */
915 293138 : case ASHIFTRT:
916 293138 : if (CONST_INT_P (XEXP (x, 1))
917 572304 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
918 : {
919 279122 : HOST_WIDE_INT sign = 0;
920 279122 : if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
921 279122 : > GET_MODE_BITSIZE (smode))
922 558244 : sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
923 279122 : return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
924 : }
925 : return mmask;
926 :
927 37519 : case SMUL_HIGHPART:
928 37519 : case UMUL_HIGHPART:
929 37519 : if (XEXP (x, 1) == const0_rtx)
930 : return 0;
931 37519 : if (XEXP (x, 1) == const1_rtx)
932 : return mmask;
933 37519 : if (CONST_INT_P (XEXP (x, 1)))
934 : {
935 0 : if (pow2p_hwi (INTVAL (XEXP (x, 1))))
936 0 : return mmask & (mask << (GET_MODE_BITSIZE (smode)
937 0 : - exact_log2 (INTVAL (XEXP (x, 1)))));
938 :
939 0 : int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
940 0 : - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
941 0 : if (bits < GET_MODE_BITSIZE (smode))
942 0 : return (HOST_WIDE_INT_1U << bits) - 1;
943 : }
944 : return mmask;
945 :
946 558687 : case SIGN_EXTEND:
947 558687 : if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
948 558687 : || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
949 : return -1;
950 :
951 : /* We want the mode of the inner object. We need to ensure its
952 : sign bit is on in MASK. */
953 558687 : mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
954 558687 : if (mask & ~GET_MODE_MASK (mode))
955 558350 : mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
956 558350 : - 1);
957 :
958 : /* Recurse into the operand. */
959 558687 : return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
960 :
961 1147452 : case ZERO_EXTEND:
962 1147452 : if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
963 1147452 : || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
964 : return -1;
965 :
966 : /* Recurse into the operand. */
967 1147452 : return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
968 :
969 : /* We propagate for the shifted operand, but not the shift
970 : count. The count is handled specially. */
971 0 : case SS_ASHIFT:
972 0 : case US_ASHIFT:
973 0 : if (CONST_INT_P (XEXP (x, 1))
974 0 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
975 : {
976 0 : return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
977 0 : >> (INTVAL (XEXP (x, 1))
978 0 : + (XEXP (x, 1) != const0_rtx
979 0 : && code == SS_ASHIFT))))
980 0 : | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
981 : }
982 : return mmask;
983 :
984 : default:
985 : return mask;
986 : }
987 : }
988 :
989 : /* Process uses in INSN contained in OBJ. Set appropriate bits in LIVENOW
990 : for any chunks of pseudos that become live, potentially filtering using
991 : bits from LIVE_TMP.
992 :
993 : If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
994 : the extended bits are never read and mark pseudos which had extensions
995 : eliminated in CHANGED_PSEUDOS. */
996 :
997 : static void
998 137109191 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
999 : bitmap live_tmp, bool skipped_dest)
1000 : {
1001 137109191 : subrtx_var_iterator::array_type array_var;
1002 753455382 : FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
1003 : {
1004 : /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
1005 616346191 : rtx x = *iter;
1006 616346191 : if (x == NULL_RTX)
1007 9452414 : continue;
1008 :
1009 : /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
1010 : handle SETs explicitly, possibly propagating live information
1011 : into the uses.
1012 :
1013 : We may continue the loop at various points which will cause
1014 : iteration into the next level of RTL. Breaking from the loop
1015 : is never safe as it can lead us to fail to process some of the
1016 : RTL and thus not make objects live when necessary. */
1017 606893777 : enum rtx_code xcode = GET_CODE (x);
1018 606893777 : if (xcode == SET)
1019 : {
1020 121252155 : const_rtx dst = SET_DEST (x);
1021 121252155 : rtx src = SET_SRC (x);
1022 121252155 : const_rtx y;
1023 121252155 : unsigned HOST_WIDE_INT bit = 0;
1024 :
1025 : /* The code of the RHS of a SET. */
1026 121252155 : enum rtx_code code = GET_CODE (src);
1027 :
1028 : /* ?!? How much of this should mirror SET handling, potentially
1029 : being shared? */
1030 121252155 : if (SUBREG_P (dst) && subreg_lsb (dst).is_constant (&bit))
1031 : {
1032 587847 : if (bit >= HOST_BITS_PER_WIDE_INT)
1033 : bit = HOST_BITS_PER_WIDE_INT - 1;
1034 587847 : dst = SUBREG_REG (dst);
1035 : }
1036 120664308 : else if (GET_CODE (dst) == STRICT_LOW_PART)
1037 13316 : dst = XEXP (dst, 0);
1038 :
1039 : /* Main processing of the uses. Two major goals here.
1040 :
1041 : First, we want to try and propagate liveness (or the lack
1042 : thereof) from the destination register to the source
1043 : register(s).
1044 :
1045 : Second, if the source is an extension, try to optimize
1046 : it into a SUBREG. The SUBREG form indicates we don't
1047 : care about the upper bits and will usually be copy
1048 : propagated away.
1049 :
1050 : If we fail to handle something in here, the expectation
1051 : is the iterator will dive into the sub-components and
1052 : mark all the chunks in any found REGs as live. */
1053 121252155 : if (REG_P (dst) && safe_for_live_propagation (code))
1054 : {
1055 : /* Create a mask representing the bits of this output
1056 : operand that are live after this insn. We can use
1057 : this information to refine the live in state of
1058 : inputs to this insn in many cases.
1059 :
1060 : We have to do this on a per SET basis, we might have
1061 : an INSN with multiple SETS, some of which can narrow
1062 : the source operand liveness, some of which may not. */
1063 71102720 : unsigned HOST_WIDE_INT dst_mask = 0;
1064 71102720 : HOST_WIDE_INT rn = REGNO (dst);
1065 71102720 : unsigned HOST_WIDE_INT mask_array[]
1066 : = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
1067 : -HOST_WIDE_INT_UC (0x100000000) };
1068 355513600 : for (int i = 0; i < 4; i++)
1069 284410880 : if (bitmap_bit_p (live_tmp, 4 * rn + i))
1070 227946870 : dst_mask |= mask_array[i];
1071 71102720 : dst_mask >>= bit;
1072 :
1073 : /* If we ignored a destination during set processing, then
1074 : consider all the bits live. */
1075 71102720 : if (skipped_dest)
1076 25125095 : dst_mask = -1;
1077 :
1078 71102720 : dst_mask = carry_backpropagate (dst_mask, code, src);
1079 :
1080 : /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
1081 : of the source specially to improve optimization. */
1082 71102720 : if (code == SIGN_EXTEND || code == ZERO_EXTEND)
1083 : {
1084 1717796 : rtx inner = XEXP (src, 0);
1085 1717796 : unsigned HOST_WIDE_INT src_mask
1086 1717796 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
1087 :
1088 : /* DST_MASK could be zero if we had something in the SET
1089 : that we couldn't handle. */
1090 1717796 : if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
1091 : {
1092 4847 : ext_dce_try_optimize_extension (insn, x);
1093 :
1094 : /* If the extension was optimized to a copy, propagate
1095 : chain info through it: if the dest is consumed by a
1096 : promotion candidate (seen later in reverse scan),
1097 : the source register is transitively consumed too. */
1098 4847 : rtx opt_src = SET_SRC (x);
1099 4847 : if (GET_CODE (opt_src) != SIGN_EXTEND
1100 4847 : && GET_CODE (opt_src) != ZERO_EXTEND)
1101 : {
1102 2273 : rtx copy_dest = SET_DEST (x);
1103 2273 : while (SUBREG_P (copy_dest)
1104 2273 : || GET_CODE (copy_dest) == ZERO_EXTRACT)
1105 0 : copy_dest = XEXP (copy_dest, 0);
1106 :
1107 2273 : rtx copy_src = opt_src;
1108 2273 : if (SUBREG_P (copy_src))
1109 2016 : copy_src = SUBREG_REG (copy_src);
1110 :
1111 2273 : if (REG_P (copy_dest) && REG_P (copy_src))
1112 : {
1113 2273 : if (bitmap_bit_p (consumed_by_candidate,
1114 2273 : REGNO (copy_dest)))
1115 0 : bitmap_set_bit (consumed_by_candidate,
1116 0 : REGNO (copy_src));
1117 2273 : if (bitmap_bit_p (promotable_dests,
1118 2273 : REGNO (copy_src)))
1119 0 : bitmap_set_bit (promotable_dests,
1120 0 : REGNO (copy_dest));
1121 2273 : promotion_copies.safe_push (
1122 2273 : {REGNO (copy_dest), REGNO (copy_src)});
1123 : }
1124 : }
1125 : else
1126 2574 : ext_dce_record_promotion_candidate (insn, x);
1127 : }
1128 :
1129 : /* Stripping the extension here just seems wrong on multiple
1130 : levels. It's source side handling, so it seems like it
1131 : belongs in the loop below. Stripping here also makes it
1132 : harder than necessary to properly handle live bit groups
1133 : for (ANY_EXTEND (SUBREG)) where the SUBREG has
1134 : SUBREG_PROMOTED state. */
1135 1717796 : dst_mask &= src_mask;
1136 1717796 : src = XEXP (src, 0);
1137 1717796 : code = GET_CODE (src);
1138 : }
1139 :
1140 : /* Special case for (sub)targets that do not have extension
1141 : insns (and thus use shifts). We want to detect when we have
1142 : a shift pair and treat the pair as-if was an extension.
1143 :
1144 : Key on the right shift and use (for now) simplistic tests
1145 : to find the corresponding left shift. */
1146 71102720 : scalar_mode outer_mode;
1147 71102720 : if ((code == LSHIFTRT || code == ASHIFTRT)
1148 1028285 : && CONST_INT_P (XEXP (src, 1))
1149 1135440 : && (INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 8
1150 1131045 : || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 16
1151 968158 : || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 32)
1152 71168573 : && is_a <scalar_mode> (GET_MODE (src), &outer_mode)
1153 71168573 : && GET_MODE_BITSIZE (outer_mode) <= HOST_BITS_PER_WIDE_INT)
1154 : {
1155 : /* So we have a right shift that could correspond to
1156 : the second in a pair impementing QI, HI or SI -> DI
1157 : extension. See if we can find the left shift. For
1158 : now, just look one real instruction back. */
1159 65709 : rtx_insn *prev_insn = prev_nonnote_nondebug_insn_bb (insn);
1160 :
1161 : /* The previous insn must be a left shift by the same
1162 : amount. */
1163 65709 : rtx prev_set;
1164 65709 : if (prev_insn
1165 62884 : && (prev_set = single_set (prev_insn))
1166 : /* The destination of the left shift must be the
1167 : source of the right shift. */
1168 62803 : && SET_DEST (prev_set) == XEXP (src, 0)
1169 30926 : && GET_CODE (SET_SRC (prev_set)) == ASHIFT
1170 811 : && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
1171 : /* The counts must match. */
1172 65709 : && (INTVAL (XEXP (src, 1))
1173 795 : == INTVAL (XEXP (SET_SRC (prev_set), 1))))
1174 : {
1175 15 : unsigned HOST_WIDE_INT src_mask = GET_MODE_BITSIZE (GET_MODE (src)).to_constant ();
1176 15 : src_mask -= INTVAL (XEXP (src, 1));
1177 15 : src_mask = (HOST_WIDE_INT_1U << src_mask) - 1;
1178 :
1179 : /* DST_MASK has been adjusted for INSN. We need its original value. */
1180 15 : unsigned HOST_WIDE_INT tmp_mask = 0;
1181 75 : for (int i = 0; i < 4; i++)
1182 60 : if (bitmap_bit_p (live_tmp, 4 * rn + i))
1183 15 : tmp_mask |= mask_array[i];
1184 15 : tmp_mask >>= bit;
1185 :
1186 15 : if (modify && !skipped_dest && (tmp_mask & ~src_mask) == 0)
1187 : {
1188 0 : ext_dce_try_optimize_rshift (insn, x, XEXP (SET_SRC (prev_set), 0), prev_insn);
1189 :
1190 : /* These may not strictly be necessary, but we might as well try and be
1191 : as accurate as possible. The RHS is now a simple REG. */
1192 0 : dst_mask = src_mask;
1193 0 : src = XEXP (SET_SRC (prev_set), 0);
1194 0 : code = GET_CODE (src);
1195 : }
1196 : }
1197 : }
1198 :
1199 : /* Optimization is done at this point. We just want to make
1200 : sure everything that should get marked as live is marked
1201 : from here onward. */
1202 :
1203 : /* We will handle the other operand of a binary operator
1204 : at the bottom of the loop by resetting Y. */
1205 71102720 : if (BINARY_P (src))
1206 22385088 : y = XEXP (src, 0);
1207 : else
1208 : y = src;
1209 :
1210 : /* We're inside a SET and want to process the source operands
1211 : making things live. Breaking from this loop will cause
1212 : the iterator to work on sub-rtxs, so it is safe to break
1213 : if we see something we don't know how to handle.
1214 :
1215 : This code is just hokey as it really just handles trivial
1216 : unary and binary cases. Otherwise the loop exits and we
1217 : continue iterating on sub-rtxs, but outside the set context. */
1218 : unsigned HOST_WIDE_INT save_mask = dst_mask;
1219 114900646 : for (;;)
1220 : {
1221 : /* In general we want to restore DST_MASK before each loop
1222 : iteration. The exception is when the opcode implies that
1223 : the other operand is fully live. That's handled by
1224 : changing SAVE_MASK below. */
1225 93001683 : dst_mask = save_mask;
1226 : /* Strip an outer paradoxical subreg. The bits outside
1227 : the inner mode are don't cares. So we can just strip
1228 : and process the inner object. */
1229 93001683 : if (paradoxical_subreg_p (y))
1230 : y = XEXP (y, 0);
1231 92899782 : else if (SUBREG_P (y) && subreg_lsb (y).is_constant (&bit))
1232 : {
1233 : /* If !TRULY_NOOP_TRUNCATION_MODES_P, the mode
1234 : change performed by Y would normally need to be a
1235 : TRUNCATE rather than a SUBREG. It is probably the
1236 : guarantee provided by SUBREG_PROMOTED_VAR_P that
1237 : allows the SUBREG in Y as an exception. We must
1238 : therefore preserve that guarantee and treat the
1239 : upper bits of the inner register as live
1240 : regardless of the outer code. See PR 120050. */
1241 1918129 : if (!REG_P (SUBREG_REG (y))
1242 1918129 : || (SUBREG_PROMOTED_VAR_P (y)
1243 13408 : && (!TRULY_NOOP_TRUNCATION_MODES_P (
1244 : GET_MODE (y),
1245 : GET_MODE (SUBREG_REG (y))))))
1246 : break;
1247 :
1248 : /* If this is a wide object (more bits than we can fit
1249 : in a HOST_WIDE_INT), then just break from the SET
1250 : context. That will cause the iterator to walk down
1251 : into the subrtx and if we land on a REG we'll mark
1252 : the whole think live. */
1253 1917155 : if (bit >= HOST_BITS_PER_WIDE_INT)
1254 : break;
1255 :
1256 : /* The SUBREG's mode determines the live width. */
1257 1714586 : if (dst_mask)
1258 : {
1259 1714586 : dst_mask <<= bit;
1260 1714586 : if (!dst_mask)
1261 0 : dst_mask = -HOST_WIDE_INT_UC (0x100000000);
1262 : }
1263 1714586 : y = SUBREG_REG (y);
1264 : }
1265 :
1266 92798140 : if (REG_P (y))
1267 : {
1268 : /* We have found the use of a register. We need to mark
1269 : the appropriate chunks of the register live. The mode
1270 : of the REG is a starting point. We may refine that
1271 : based on what chunks in the output were live. */
1272 49361130 : rn = 4 * REGNO (y);
1273 49361130 : unsigned HOST_WIDE_INT tmp_mask = dst_mask;
1274 :
1275 : /* If the RTX code for the SET_SRC is not one we can
1276 : propagate destination liveness through, then just
1277 : set the mask to the mode's mask. */
1278 49361130 : if (!safe_for_live_propagation (code))
1279 33113 : tmp_mask
1280 66226 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
1281 :
1282 49361130 : if (tmp_mask & 0xff)
1283 48945432 : bitmap_set_bit (livenow, rn);
1284 49361130 : if (tmp_mask & 0xff00)
1285 47533771 : bitmap_set_bit (livenow, rn + 1);
1286 49361130 : if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
1287 47284717 : bitmap_set_bit (livenow, rn + 2);
1288 49361130 : if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
1289 40796008 : bitmap_set_bit (livenow, rn + 3);
1290 : }
1291 43437010 : else if (!CONSTANT_P (y))
1292 : break;
1293 :
1294 : /* We might have (ashift (const_int 1) (reg...))
1295 : By setting dst_mask we can continue iterating on the
1296 : the next operand and it will be considered fully live.
1297 :
1298 : Note that since we restore DST_MASK from SAVE_MASK at the
1299 : top of the loop, we have to change SAVE_MASK to get the
1300 : semantics we want. */
1301 76209628 : if (binop_implies_op2_fully_live (GET_CODE (src)))
1302 2442492 : save_mask = -1;
1303 :
1304 : /* If this was anything but a binary operand, break the inner
1305 : loop. This is conservatively correct as it will cause the
1306 : iterator to look at the sub-rtxs outside the SET context. */
1307 76209628 : if (!BINARY_P (src))
1308 : break;
1309 :
1310 : /* We processed the first operand of a binary operator. Now
1311 : handle the second. */
1312 21898963 : y = XEXP (src, 1), src = pc_rtx;
1313 21898963 : }
1314 :
1315 : /* These are leaf nodes, no need to iterate down into them. */
1316 71102720 : if (REG_P (y) || CONSTANT_P (y))
1317 54310665 : iter.skip_subrtxes ();
1318 : }
1319 : }
1320 : /* If we are reading the low part of a SUBREG, then we can
1321 : refine liveness of the input register, otherwise let the
1322 : iterator continue into SUBREG_REG. */
1323 485641622 : else if (SUBREG_P (x)
1324 1314519 : && REG_P (SUBREG_REG (x))
1325 1312705 : && !paradoxical_subreg_p (x)
1326 1288737 : && subreg_lowpart_p (x)
1327 1016151 : && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
1328 487673924 : && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
1329 : {
1330 503587 : HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
1331 503587 : HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
1332 :
1333 : /* If this is a promoted subreg, then more of it may be live than
1334 : is otherwise obvious. */
1335 503587 : if (SUBREG_PROMOTED_VAR_P (x))
1336 4432 : size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
1337 :
1338 503587 : bitmap_set_bit (livenow, rn);
1339 503587 : if (size > 8)
1340 328958 : bitmap_set_bit (livenow, rn + 1);
1341 328958 : if (size > 16)
1342 286607 : bitmap_set_bit (livenow, rn + 2);
1343 286607 : if (size >= 32)
1344 286607 : bitmap_set_bit (livenow, rn + 3);
1345 503587 : iter.skip_subrtxes ();
1346 : }
1347 : /* If we have a register reference that is not otherwise handled,
1348 : just assume all the chunks are live. */
1349 485138035 : else if (REG_P (x))
1350 161051089 : bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
1351 : }
1352 137109191 : }
1353 :
1354 : /* Process a single basic block BB with current liveness information
1355 : in LIVENOW, returning updated liveness information.
1356 :
1357 : If MODIFY is true, then this is the last pass and unnecessary
1358 : extensions should be eliminated when possible. If an extension
1359 : is removed, the source pseudo is marked in CHANGED_PSEUDOS. */
1360 :
1361 : static void
1362 22781109 : ext_dce_process_bb (basic_block bb)
1363 : {
1364 22781109 : rtx_insn *insn;
1365 :
1366 300557375 : FOR_BB_INSNS_REVERSE (bb, insn)
1367 : {
1368 277776266 : if (!NONDEBUG_INSN_P (insn))
1369 150119489 : continue;
1370 :
1371 : /* Live-out state of the destination of this insn. We can
1372 : use this to refine the live-in state of the sources of
1373 : this insn in many cases. */
1374 127656777 : bitmap live_tmp = BITMAP_ALLOC (NULL);
1375 :
1376 : /* First process any sets/clobbers in INSN. */
1377 127656777 : bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
1378 :
1379 : /* CALL_INSNs need processing their fusage data. */
1380 127656777 : if (CALL_P (insn))
1381 9452414 : skipped_dest |= ext_dce_process_sets (insn,
1382 : CALL_INSN_FUNCTION_USAGE (insn),
1383 : live_tmp);
1384 :
1385 : /* And now uses, optimizing away SIGN/ZERO extensions as we go. */
1386 127656777 : ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
1387 :
1388 : /* A nonlocal goto implicitly uses the frame pointer. */
1389 127656777 : if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
1390 : {
1391 1130 : bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
1392 1130 : if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
1393 1130 : bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
1394 : }
1395 :
1396 : /* And process fusage data for the use as well. */
1397 127656777 : if (CALL_P (insn))
1398 : {
1399 9452414 : if (!FAKE_CALL_P (insn))
1400 9452354 : bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
1401 :
1402 : /* If this is not a call to a const fucntion, then assume it
1403 : can read any global register. */
1404 9452414 : if (!RTL_CONST_CALL_P (insn))
1405 849089814 : for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1406 839959816 : if (global_regs[i])
1407 230 : bitmap_set_range (livenow, i * 4, 4);
1408 :
1409 9452414 : ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
1410 : }
1411 :
1412 127656777 : BITMAP_FREE (live_tmp);
1413 : }
1414 :
1415 22781109 : if (modify)
1416 9729754 : ext_dce_promote_chained_candidates ();
1417 22781109 : }
1418 :
1419 : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
1420 : is usually helpful. However, in some cases setting the value when
1421 : it not strictly needed can cause this pass to miss optimizations.
1422 :
1423 : Specifically consider (set (mem) (subreg (reg))). If set in that
1424 : case it will cause more bit groups to be live for REG than would
1425 : be strictly necessary which in turn can inhibit extension removal.
1426 :
1427 : So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
1428 : is obviously not needed. */
1429 :
1430 : static void
1431 961546 : maybe_clear_subreg_promoted_p (void)
1432 : {
1433 119020217 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
1434 : {
1435 118058671 : if (!NONDEBUG_INSN_P (insn))
1436 63461524 : continue;
1437 :
1438 54597147 : rtx set = single_set (insn);
1439 54597147 : if (!set)
1440 3652697 : continue;
1441 :
1442 : /* There may be other cases where we should clear, but for
1443 : now, this is the only known case where it causes problems. */
1444 50944450 : if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
1445 70804 : && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
1446 61223 : SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
1447 : }
1448 961546 : }
1449 :
1450 : /* Walk the IL and build the transitive closure of all the REGs tied
1451 : together by copies where either the source or destination is
1452 : marked in CHANGED_PSEUDOS. */
1453 :
1454 : static void
1455 961546 : expand_changed_pseudos (void)
1456 : {
1457 : /* Build a vector of registers related by a copy. This is meant to
1458 : speed up the next step by avoiding full IL walks. */
1459 961546 : struct copy_pair { rtx first; rtx second; };
1460 961546 : auto_vec<copy_pair> pairs;
1461 119020217 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
1462 : {
1463 118058671 : if (!NONDEBUG_INSN_P (insn))
1464 63461524 : continue;
1465 :
1466 54597147 : rtx pat = PATTERN (insn);
1467 :
1468 : /* Simple copies to a REG from another REG or SUBREG of a REG. */
1469 54597147 : if (GET_CODE (pat) == SET
1470 43075497 : && REG_P (SET_DEST (pat))
1471 30610351 : && (REG_P (SET_SRC (pat))
1472 22183221 : || (SUBREG_P (SET_SRC (pat))
1473 368518 : && REG_P (SUBREG_REG (SET_SRC (pat))))))
1474 : {
1475 368081 : rtx src = (REG_P (SET_SRC (pat))
1476 8795211 : ? SET_SRC (pat)
1477 : : SUBREG_REG (SET_SRC (pat)));
1478 8795211 : pairs.safe_push ({ SET_DEST (pat), src });
1479 : }
1480 :
1481 : /* Simple copies to a REG from another REG or SUBREG of a REG
1482 : held inside a PARALLEL. */
1483 54597147 : if (GET_CODE (pat) == PARALLEL)
1484 : {
1485 24823290 : for (int i = XVECLEN (pat, 0) - 1; i >= 0; i--)
1486 : {
1487 16664273 : rtx elem = XVECEXP (pat, 0, i);
1488 :
1489 16664273 : if (GET_CODE (elem) == SET
1490 8378385 : && REG_P (SET_DEST (elem))
1491 8228531 : && (REG_P (SET_SRC (elem))
1492 8228531 : || (SUBREG_P (SET_SRC (elem))
1493 0 : && REG_P (SUBREG_REG (SET_SRC (elem))))))
1494 : {
1495 0 : rtx src = (REG_P (SET_SRC (elem))
1496 0 : ? SET_SRC (elem)
1497 : : SUBREG_REG (SET_SRC (elem)));
1498 0 : pairs.safe_push ({ SET_DEST (elem), src });
1499 : }
1500 : }
1501 8159017 : continue;
1502 8159017 : }
1503 : }
1504 :
1505 : /* Now we have a vector with copy pairs. Iterate over that list
1506 : updating CHANGED_PSEUDOS as we go. Eliminate copies from the
1507 : list as we go as they don't need further processing. */
1508 : bool changed = true;
1509 1923151 : while (changed)
1510 : {
1511 : changed = false;
1512 : unsigned int i;
1513 : copy_pair *p;
1514 10719646 : FOR_EACH_VEC_ELT (pairs, i, p)
1515 : {
1516 8796495 : if (bitmap_bit_p (changed_pseudos, REGNO (p->second))
1517 8796495 : && bitmap_set_bit (changed_pseudos, REGNO (p->first)))
1518 : {
1519 75 : pairs.unordered_remove (i);
1520 75 : changed = true;
1521 : }
1522 : }
1523 : }
1524 961546 : }
1525 :
1526 : /* We optimize away sign/zero extensions in this pass and replace
1527 : them with SUBREGs indicating certain bits are don't cares.
1528 :
1529 : This changes the SUBREG_PROMOTED_VAR_P state of the object.
1530 : It is fairly painful to fix this on the fly, so we have
1531 : recorded which pseudos are affected and we look for SUBREGs
1532 : of those pseudos and fix them up. */
1533 :
1534 : static void
1535 961546 : reset_subreg_promoted_p (void)
1536 : {
1537 : /* This pass eliminates zero/sign extensions on pseudo regs found
1538 : in CHANGED_PSEUDOS. Elimination of those extensions changes if
1539 : the pseudos are known to hold values extended to wider modes
1540 : via SUBREG_PROMOTED_VAR. So we wipe the SUBREG_PROMOTED_VAR
1541 : state on all affected pseudos.
1542 :
1543 : But that is insufficient. We might have a copy from one REG
1544 : to another (possibly with the source register wrapped with a
1545 : SUBREG). We need to wipe SUBREG_PROMOTED_VAR on the transitive
1546 : closure of the original CHANGED_PSEUDOS and registers they're
1547 : connected to via copies. So expand the set. */
1548 961546 : expand_changed_pseudos ();
1549 :
1550 : /* If we removed an extension, that changed the promoted state
1551 : of the destination of that extension. Thus we need to go
1552 : find any SUBREGs that reference that pseudo and adjust their
1553 : SUBREG_PROMOTED_P state. */
1554 119020217 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
1555 : {
1556 118058671 : if (!NONDEBUG_INSN_P (insn))
1557 63461524 : continue;
1558 :
1559 54597147 : rtx pat = PATTERN (insn);
1560 54597147 : subrtx_var_iterator::array_type array;
1561 348480935 : FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
1562 : {
1563 293883788 : rtx sub = *iter;
1564 :
1565 : /* We only care about SUBREGs. */
1566 293883788 : if (GET_CODE (sub) != SUBREG)
1567 292348141 : continue;
1568 :
1569 1535647 : const_rtx x = SUBREG_REG (sub);
1570 :
1571 : /* We only care if the inner object is a REG. */
1572 1535647 : if (!REG_P (x))
1573 758 : continue;
1574 :
1575 : /* And only if the SUBREG is a promoted var. */
1576 1534889 : if (!SUBREG_PROMOTED_VAR_P (sub))
1577 1529503 : continue;
1578 :
1579 5386 : if (bitmap_bit_p (changed_pseudos, REGNO (x)))
1580 0 : SUBREG_PROMOTED_VAR_P (sub) = 0;
1581 : }
1582 54597147 : }
1583 961546 : }
1584 :
1585 : /* Initialization of the ext-dce pass. Primarily this means
1586 : setting up the various bitmaps we utilize. */
1587 :
1588 : static void
1589 961546 : ext_dce_init (void)
1590 : {
1591 961546 : livein.create (last_basic_block_for_fn (cfun));
1592 961546 : livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
1593 12622634 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1594 11661088 : bitmap_initialize (&livein[i], &bitmap_default_obstack);
1595 :
1596 961546 : auto_bitmap refs (&bitmap_default_obstack);
1597 961546 : df_get_exit_block_use_set (refs);
1598 :
1599 961546 : unsigned i;
1600 961546 : bitmap_iterator bi;
1601 4424225 : EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
1602 3462679 : make_reg_live (&livein[EXIT_BLOCK], i);
1603 :
1604 961546 : livenow = BITMAP_ALLOC (NULL);
1605 961546 : all_blocks = BITMAP_ALLOC (NULL);
1606 961546 : changed_pseudos = BITMAP_ALLOC (NULL);
1607 961546 : promotable_dests = BITMAP_ALLOC (NULL);
1608 961546 : consumed_by_candidate = BITMAP_ALLOC (NULL);
1609 :
1610 12622634 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1611 11661088 : if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
1612 9737996 : bitmap_set_bit (all_blocks, i);
1613 :
1614 961546 : modify = false;
1615 961546 : }
1616 :
1617 : /* Finalization of the ext-dce pass. Primarily this means
1618 : releasing up the various bitmaps we utilize. */
1619 :
1620 : static void
1621 961546 : ext_dce_finish (void)
1622 : {
1623 12622634 : for (unsigned i = 0; i < livein.length (); i++)
1624 11661088 : bitmap_clear (&livein[i]);
1625 961546 : livein.release ();
1626 :
1627 961546 : BITMAP_FREE (livenow);
1628 961546 : BITMAP_FREE (changed_pseudos);
1629 961546 : BITMAP_FREE (all_blocks);
1630 961546 : BITMAP_FREE (promotable_dests);
1631 961546 : BITMAP_FREE (consumed_by_candidate);
1632 961546 : promotion_candidates.release ();
1633 961546 : promotion_copies.release ();
1634 961546 : }
1635 :
1636 : /* Process block number BB_INDEX as part of the backward
1637 : simple dataflow analysis. Return TRUE if something in
1638 : this block changed or FALSE otherwise. */
1639 :
1640 : static bool
1641 26627293 : ext_dce_rd_transfer_n (int bb_index)
1642 : {
1643 : /* The ENTRY/EXIT blocks never change. */
1644 26627293 : if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
1645 : return false;
1646 :
1647 22781109 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
1648 :
1649 : /* Make everything live that's live in the successors. */
1650 22781109 : bitmap_clear (livenow);
1651 22781109 : edge_iterator ei;
1652 22781109 : edge e;
1653 :
1654 57313177 : FOR_EACH_EDGE (e, ei, bb->succs)
1655 34532068 : bitmap_ior_into (livenow, &livein[e->dest->index]);
1656 :
1657 22781109 : ext_dce_process_bb (bb);
1658 :
1659 : /* We only allow widening the set of objects live at the start
1660 : of a block. Otherwise we run the risk of not converging. */
1661 22781109 : return bitmap_ior_into (&livein[bb_index], livenow);
1662 : }
1663 :
1664 : /* Dummy function for the df_simple_dataflow API. */
1665 33185314 : static bool ext_dce_rd_confluence_n (edge) { return true; }
1666 :
1667 : /* Use lifetime analyis to identify extensions that set bits that
1668 : are never read. Turn such extensions into SUBREGs instead which
1669 : can often be propagated away. */
1670 :
1671 : void
1672 961546 : ext_dce_execute (void)
1673 : {
1674 : /* Limit the amount of memory we use for livein, with 4 bits per
1675 : reg per basic-block including overhead that maps to one byte
1676 : per reg per basic-block. */
1677 961546 : uint64_t memory_request
1678 961546 : = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
1679 961546 : if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
1680 : {
1681 0 : warning (OPT_Wdisabled_optimization,
1682 : "ext-dce disabled: %d basic blocks and %d registers; "
1683 : "increase %<--param max-gcse-memory%> above %wu",
1684 0 : n_basic_blocks_for_fn (cfun), max_reg_num (),
1685 : memory_request / 1024);
1686 0 : return;
1687 : }
1688 :
1689 : /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
1690 : to this pass. Clear it for those cases. */
1691 961546 : maybe_clear_subreg_promoted_p ();
1692 961546 : df_analyze ();
1693 961546 : ext_dce_init ();
1694 :
1695 3846184 : do
1696 : {
1697 1923092 : df_simple_dataflow (DF_BACKWARD, NULL, NULL,
1698 : ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
1699 : all_blocks, df_get_postorder (DF_BACKWARD),
1700 : df_get_n_blocks (DF_BACKWARD));
1701 1923092 : modify = !modify;
1702 : }
1703 : while (modify);
1704 :
1705 961546 : reset_subreg_promoted_p ();
1706 :
1707 961546 : ext_dce_finish ();
1708 : }
1709 :
1710 :
1711 : namespace {
1712 :
1713 : const pass_data pass_data_ext_dce =
1714 : {
1715 : RTL_PASS, /* type */
1716 : "ext_dce", /* name */
1717 : OPTGROUP_NONE, /* optinfo_flags */
1718 : TV_EXT_DCE, /* tv_id */
1719 : PROP_cfglayout, /* properties_required */
1720 : 0, /* properties_provided */
1721 : 0, /* properties_destroyed */
1722 : 0, /* todo_flags_start */
1723 : TODO_df_finish, /* todo_flags_finish */
1724 : };
1725 :
1726 : class pass_ext_dce : public rtl_opt_pass
1727 : {
1728 : public:
1729 288047 : pass_ext_dce (gcc::context *ctxt)
1730 576094 : : rtl_opt_pass (pass_data_ext_dce, ctxt)
1731 : {}
1732 :
1733 : /* opt_pass methods: */
1734 1474422 : virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
1735 961546 : virtual unsigned int execute (function *)
1736 : {
1737 961546 : ext_dce_execute ();
1738 961546 : return 0;
1739 : }
1740 :
1741 : }; // class pass_combine
1742 :
1743 : } // anon namespace
1744 :
1745 : rtl_opt_pass *
1746 288047 : make_pass_ext_dce (gcc::context *ctxt)
1747 : {
1748 288047 : return new pass_ext_dce (ctxt);
1749 : }
|