Line data Source code
1 : /* RTL dead zero/sign extension (code) elimination.
2 : Copyright (C) 2000-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "memmodel.h"
27 : #include "insn-config.h"
28 : #include "emit-rtl.h"
29 : #include "expr.h"
30 : #include "recog.h"
31 : #include "cfganal.h"
32 : #include "tree-pass.h"
33 : #include "cfgrtl.h"
34 : #include "rtl-iter.h"
35 : #include "df.h"
36 : #include "print-rtl.h"
37 : #include "dbgcnt.h"
38 : #include "diagnostic-core.h"
39 : #include "target.h"
40 :
41 : /* These should probably move into a C++ class. */
42 : static vec<bitmap_head> livein;
43 : static bitmap all_blocks;
44 : static bitmap livenow;
45 : static bitmap changed_pseudos;
46 : static bool modify;
47 :
48 : /* Chain detection for promotion: we defer promotions and only apply them
49 : when they form chains (one candidate's result feeds another's operand).
50 : Standalone promotions are skipped as they cause regressions on targets
51 : with free sign extension (e.g., RISC-V W-suffix instructions). */
52 : struct promotion_candidate_info {
53 : rtx_insn *insn;
54 : rtx set;
55 : };
56 :
57 : static vec<promotion_candidate_info> promotion_candidates;
58 : static bitmap promotable_dests;
59 : static bitmap consumed_by_candidate;
60 :
61 : /* Copy pairs seen during the reverse scan (from optimized extensions).
62 : Used to propagate chain info transitively. */
63 : struct copy_info {
64 : unsigned int dest_regno;
65 : unsigned int src_regno;
66 : };
67 : static vec<copy_info> promotion_copies;
68 :
69 : /* We consider four bit groups for liveness:
70 : bit 0..7 (least significant byte)
71 : bit 8..15 (second least significant byte)
72 : bit 16..31
73 : bit 32..BITS_PER_WORD-1 */
74 :
75 : /* For the given REG, return the number of bit groups implied by the
76 : size of the REG's mode, up to a maximum of 4 (number of bit groups
77 : tracked by this pass).
78 :
79 : For partial integer and variable sized modes also return 4. This
80 : could possibly be refined for something like PSI mode, but it
81 : does not seem worth the effort. */
82 :
83 : static int
84 235487052 : group_limit (const_rtx reg)
85 : {
86 235487052 : machine_mode mode = GET_MODE (reg);
87 :
88 235487052 : if (!GET_MODE_BITSIZE (mode).is_constant ())
89 : return 4;
90 :
91 235487052 : int size = GET_MODE_SIZE (mode).to_constant ();
92 :
93 235487052 : size = exact_log2 (size);
94 :
95 235392966 : if (size < 0)
96 : return 4;
97 :
98 235392966 : size++;
99 235392966 : return (size > 4 ? 4 : size);
100 : }
101 :
102 : /* Make all bit groups live for REGNO in bitmap BMAP. For hard regs,
103 : we assume all groups are live. For a pseudo we consider the size
104 : of the pseudo to avoid creating unnecessarily live chunks of data. */
105 :
106 : static void
107 4709955 : make_reg_live (bitmap bmap, int regno)
108 : {
109 4709955 : int limit;
110 :
111 : /* For pseudos we can use the mode to limit how many bit groups
112 : are marked as live since a pseudo only has one mode. Hard
113 : registers have to be handled more conservatively. */
114 4709955 : if (regno > FIRST_PSEUDO_REGISTER)
115 : {
116 884006 : rtx reg = regno_reg_rtx[regno];
117 884006 : limit = group_limit (reg);
118 : }
119 : else
120 : limit = 4;
121 :
122 23220131 : for (int i = 0; i < limit; i++)
123 18510176 : bitmap_set_bit (bmap, regno * 4 + i);
124 4709955 : }
125 :
126 : /* Note this pass could be used to narrow memory loads too. It's
127 : not clear if that's profitable or not in general. */
128 :
129 : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
130 :
131 : /* If we know the destination of CODE only uses some low bits
132 : (say just the QI bits of an SI operation), then return true
133 : if we can propagate the need for just the subset of bits
134 : from the destination to the sources.
135 :
136 : FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
137 : operand 0. Thus is likely would need some special casing to handle. */
138 :
139 : static bool
140 142032701 : safe_for_live_propagation (rtx_code code)
141 : {
142 : /* First handle rtx classes which as a whole are known to
143 : be either safe or unsafe. */
144 142032701 : switch (GET_RTX_CLASS (code))
145 : {
146 : case RTX_OBJ:
147 : case RTX_CONST_OBJ:
148 : return true;
149 :
150 : case RTX_COMPARE:
151 : case RTX_COMM_COMPARE:
152 : case RTX_TERNARY:
153 : return false;
154 :
155 73777040 : default:
156 73777040 : break;
157 : }
158 :
159 : /* What's left are specific codes. We only need to identify those
160 : which are safe. */
161 73777040 : switch (code)
162 : {
163 : /* These are trivially safe. */
164 : case SUBREG:
165 : case NOT:
166 : case ZERO_EXTEND:
167 : case SIGN_EXTEND:
168 : case TRUNCATE:
169 : case PLUS:
170 : case MINUS:
171 : case MULT:
172 : case SMUL_HIGHPART:
173 : case UMUL_HIGHPART:
174 : case AND:
175 : case IOR:
176 : case XOR:
177 : return true;
178 :
179 : /* We can propagate for the shifted operand, but not the shift
180 : count. The count is handled specially. */
181 : case ASHIFT:
182 : case LSHIFTRT:
183 : case ASHIFTRT:
184 : case SS_ASHIFT:
185 : case US_ASHIFT:
186 : return true;
187 :
188 : /* There may be other safe codes. If so they can be added
189 : individually when discovered. */
190 : default:
191 : return false;
192 : }
193 : }
194 :
195 : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
196 : set/clobbered by OBJ contained in INSN.
197 :
198 : Conceptually it is always safe to ignore a particular destination
199 : here as that will result in more chunks of data being considered
200 : live. That's what happens when we "continue" the main loop when
201 : we see something we don't know how to handle such as a vector
202 : mode destination.
203 :
204 : The more accurate we are in identifying what objects (and chunks
205 : within an object) are set by INSN, the more aggressive the
206 : optimization phase during use handling will be. */
207 :
208 : static bool
209 137778816 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
210 : {
211 137778816 : bool skipped_dest = false;
212 :
213 137778816 : subrtx_iterator::array_type array;
214 396399474 : FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
215 : {
216 258620658 : const_rtx x = *iter;
217 :
218 : /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
219 258620658 : if (x == NULL_RTX)
220 9524957 : continue;
221 :
222 249095701 : if (UNSPEC_P (x))
223 572241 : continue;
224 :
225 248523460 : if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
226 : {
227 143419915 : unsigned bit = 0;
228 143419915 : x = SET_DEST (x);
229 :
230 : /* We don't support vector destinations or destinations
231 : wider than DImode. */
232 143419915 : scalar_mode outer_mode;
233 147303554 : if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
234 90749471 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
235 : {
236 : /* Skip the subrtxs of this destination. There is
237 : little value in iterating into the subobjects, so
238 : just skip them for a bit of efficiency. */
239 56554083 : skipped_dest = true;
240 56554083 : iter.skip_subrtxes ();
241 315174741 : continue;
242 : }
243 :
244 : /* We could have (strict_low_part (subreg ...)). We can not just
245 : strip the STRICT_LOW_PART as that would result in clearing
246 : some bits in LIVENOW that are still live. So process the
247 : STRICT_LOW_PART specially. */
248 86865832 : if (GET_CODE (x) == STRICT_LOW_PART)
249 : {
250 0 : x = XEXP (x, 0);
251 :
252 : /* The only valid operand of a STRICT_LOW_PART is a non
253 : paradoxical SUBREG. */
254 0 : gcc_assert (SUBREG_P (x)
255 : && !paradoxical_subreg_p (x)
256 : && SUBREG_BYTE (x).is_constant ());
257 :
258 : /* I think we should always see a REG here. But let's
259 : be sure. */
260 0 : gcc_assert (REG_P (SUBREG_REG (x)));
261 :
262 : /* The inner mode might be larger, just punt for
263 : that case. Remember, we can not just continue to process
264 : the inner RTXs due to the STRICT_LOW_PART. */
265 0 : if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
266 0 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
267 : {
268 : /* Skip the subrtxs of the STRICT_LOW_PART. We can't
269 : process them because it'll set objects as no longer
270 : live when they are in fact still live. */
271 0 : skipped_dest = true;
272 0 : iter.skip_subrtxes ();
273 0 : continue;
274 : }
275 :
276 : /* LIVE_TMP contains the set groups that are live-out and set in
277 : this insn. It is used to narrow the groups live-in for the
278 : inputs of this insn.
279 :
280 : The simple thing to do is mark all the groups as live, but
281 : that will significantly inhibit optimization.
282 :
283 : We also need to be careful in the case where we have an in-out
284 : operand. If we're not careful we'd clear LIVE_TMP
285 : incorrectly. */
286 0 : HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
287 0 : int limit = group_limit (SUBREG_REG (x));
288 0 : for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
289 0 : if (bitmap_bit_p (livenow, i))
290 0 : bitmap_set_bit (live_tmp, i);
291 :
292 0 : if (bitmap_empty_p (live_tmp))
293 0 : make_reg_live (live_tmp, rn);
294 :
295 : /* The mode of the SUBREG tells us how many bits we can
296 : clear. */
297 0 : machine_mode mode = GET_MODE (x);
298 0 : HOST_WIDE_INT size
299 0 : = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
300 0 : bitmap_clear_range (livenow, 4 * rn, size);
301 :
302 : /* We have fully processed this destination. */
303 0 : iter.skip_subrtxes ();
304 0 : continue;
305 0 : }
306 :
307 : /* Phase one of destination handling. First remove any wrapper
308 : such as SUBREG or ZERO_EXTRACT. */
309 86865832 : unsigned HOST_WIDE_INT mask
310 86865832 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
311 86865832 : if (SUBREG_P (x))
312 : {
313 : /* If we have a SUBREG destination that is too wide, just
314 : skip the destination rather than continuing this iterator.
315 : While continuing would be better, we'd need to strip the
316 : subreg and restart within the SET processing rather than
317 : the top of the loop which just complicates the flow even
318 : more. */
319 654497 : if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
320 543350 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
321 : {
322 111147 : skipped_dest = true;
323 111147 : iter.skip_subrtxes ();
324 111147 : continue;
325 : }
326 :
327 : /* We can safely strip a paradoxical subreg. The inner mode will
328 : be narrower than the outer mode. We'll clear fewer bits in
329 : LIVENOW than we'd like, but that's always safe. */
330 432691 : if (paradoxical_subreg_p (x))
331 : x = XEXP (x, 0);
332 422952 : else if (SUBREG_BYTE (x).is_constant ())
333 : {
334 422952 : bit = subreg_lsb (x).to_constant ();
335 422952 : mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
336 422952 : gcc_assert (mask);
337 : x = SUBREG_REG (x);
338 : }
339 : else
340 : gcc_unreachable ();
341 : }
342 :
343 86754685 : if (GET_CODE (x) == ZERO_EXTRACT)
344 : {
345 : /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
346 : modifies the bits referenced in the ZERO_EXTRACT, the rest
347 : remain the same. Thus we can not continue here, we must
348 : either figure out what part of the destination is modified
349 : or skip the sub-rtxs. */
350 3530 : skipped_dest = true;
351 3530 : iter.skip_subrtxes ();
352 3530 : continue;
353 : }
354 :
355 : /* BIT >= 64 indicates something went horribly wrong. */
356 86751155 : gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
357 :
358 : /* Now handle the actual object that was changed. */
359 86751155 : if (REG_P (x))
360 : {
361 : /* LIVE_TMP contains the set groups that are live-out and set in
362 : this insn. It is used to narrow the groups live-in for the
363 : inputs of this insn.
364 :
365 : The simple thing to do is mark all the groups as live, but
366 : that will significantly inhibit optimization.
367 :
368 : We also need to be careful in the case where we have an in-out
369 : operand. If we're not careful we'd clear LIVE_TMP
370 : incorrectly. */
371 72764154 : HOST_WIDE_INT rn = REGNO (x);
372 72764154 : int limit = group_limit (x);
373 325498713 : for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
374 252734559 : if (bitmap_bit_p (livenow, i))
375 244941926 : bitmap_set_bit (live_tmp, i);
376 :
377 72764154 : if (bitmap_empty_p (live_tmp))
378 1231352 : make_reg_live (live_tmp, rn);
379 :
380 : /* Now clear the bits known written by this instruction.
381 : Note that BIT need not be a power of two, consider a
382 : ZERO_EXTRACT destination. */
383 72764154 : int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
384 77517969 : int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
385 28007653 : : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
386 5546900 : : (mask & 0xff00) ? 2 : 1);
387 72764154 : bitmap_clear_range (livenow, 4 * rn + start, end - start);
388 : }
389 : /* Some ports generate (clobber (const_int)). */
390 13987001 : else if (CONST_INT_P (x))
391 0 : continue;
392 : else
393 13987001 : gcc_assert (CALL_P (insn)
394 : || MEM_P (x)
395 : || x == pc_rtx
396 : || GET_CODE (x) == SCRATCH);
397 :
398 86751155 : iter.skip_subrtxes ();
399 86751155 : }
400 105103545 : else if (GET_CODE (x) == COND_EXEC)
401 : {
402 : /* This isn't ideal, but may not be so bad in practice. */
403 0 : skipped_dest = true;
404 0 : iter.skip_subrtxes ();
405 : }
406 : }
407 137778816 : return skipped_dest;
408 137778816 : }
409 :
410 : /* INSN is a right shift and the second insn in a shift pair that is a
411 : sign or zero extension (SET is the single set associated with INSN).
412 :
413 : Replace the source of SET with NEW_SRC which is a source register
414 : from NEW_SRC_INSN (the left shift in the pair). This is effectively
415 : the same as the replacement we do for ZERO/SIGN extends on targets
416 : that support those insns. */
417 : static void
418 0 : ext_dce_try_optimize_rshift (rtx_insn *insn, rtx set, rtx new_src, rtx_insn *new_src_insn)
419 : {
420 : /* If the modes are not the same or one is a hard register, then
421 : conservatively do nothing. */
422 0 : if (GET_MODE (SET_SRC (set)) != GET_MODE (new_src)
423 0 : || !REG_P (XEXP (SET_SRC (set), 0))
424 0 : || !REG_P (new_src)
425 0 : || REGNO (XEXP (SET_SRC (set), 0)) < FIRST_PSEUDO_REGISTER
426 0 : || REGNO (new_src) < FIRST_PSEUDO_REGISTER)
427 : return;
428 :
429 0 : if (dump_file)
430 : {
431 0 : fprintf (dump_file, "Processing insn:\n");
432 0 : dump_insn_slim (dump_file, insn);
433 0 : fprintf (dump_file, "Trying to simplify pattern:\n");
434 0 : print_rtl_single (dump_file, SET_SRC (set));
435 : }
436 :
437 : /* We decided to turn do the optimization but allow it to be rejected for
438 : bisection purposes. */
439 0 : if (!dbg_cnt (::ext_dce))
440 : {
441 0 : if (dump_file)
442 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
443 0 : return;
444 : }
445 :
446 : /* We're going to generate a fresh insn for the move, so put it
447 : into a sequence that we can emit after the current insn. */
448 0 : start_sequence ();
449 0 : emit_move_insn (SET_DEST (set), new_src);
450 0 : rtx_insn *seq = end_sequence ();
451 0 : emit_insn_after (seq, insn);
452 :
453 : /* Mark the destination as changed. */
454 0 : rtx x = SET_DEST (set);
455 0 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
456 0 : x = XEXP (x, 0);
457 0 : gcc_assert (REG_P (x));
458 0 : bitmap_set_bit (changed_pseudos, REGNO (x));
459 :
460 0 : if (dump_file)
461 : {
462 0 : fprintf (dump_file, "Successfully transformed to:\n");
463 0 : print_rtl_single (dump_file, PATTERN (seq));
464 0 : fprintf (dump_file, "\n");
465 : }
466 :
467 0 : delete_insn (insn);
468 :
469 : /* If NEW_SRC died in its prior location, then we need to remove the
470 : death note and move it to the new location. */
471 0 : rtx note = find_regno_note (new_src_insn, REG_DEAD, REGNO (new_src));
472 0 : if (note)
473 : {
474 0 : remove_note (new_src_insn, note);
475 0 : add_reg_note (insn, REG_DEAD, new_src);
476 : }
477 : }
478 :
479 :
480 : /* INSN has a sign/zero extended source inside SET that we will
481 : try to turn into a SUBREG. If NEW_SRC is non-null, use that
482 : for the new source of INSN's set. That scenario only happens
483 : when we're optimizing a shift pair. */
484 : static void
485 8581 : ext_dce_try_optimize_extension (rtx_insn *insn, rtx set)
486 : {
487 8581 : rtx src = SET_SRC (set);
488 8581 : rtx inner = XEXP (src, 0);
489 :
490 : /* For sign-extending loads from memory, try to replace with a
491 : zero-extending load when the upper bits are dead. E.g. on RISC-V
492 : this turns lh+zext.h into just lhu. */
493 8581 : if (MEM_P (inner) && GET_CODE (src) == SIGN_EXTEND)
494 : {
495 28 : if (dump_file)
496 : {
497 0 : fprintf (dump_file, "Processing insn:\n");
498 0 : dump_insn_slim (dump_file, insn);
499 0 : fprintf (dump_file, "Trying to narrow sign_extend to zero_extend:\n");
500 0 : print_rtl_single (dump_file, SET_SRC (set));
501 : }
502 :
503 28 : if (!dbg_cnt (::ext_dce))
504 : {
505 0 : if (dump_file)
506 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
507 0 : return;
508 : }
509 :
510 28 : rtx new_pattern = gen_rtx_ZERO_EXTEND (GET_MODE (src), inner);
511 28 : int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
512 :
513 28 : rtx x = SET_DEST (set);
514 28 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
515 0 : x = XEXP (x, 0);
516 :
517 28 : gcc_assert (REG_P (x));
518 28 : if (ok)
519 : {
520 28 : bitmap_set_bit (changed_pseudos, REGNO (x));
521 28 : remove_reg_equal_equiv_notes (insn, false);
522 : }
523 :
524 28 : if (dump_file)
525 : {
526 0 : if (ok)
527 0 : fprintf (dump_file, "Successfully transformed to:\n");
528 : else
529 0 : fprintf (dump_file, "Failed transformation to:\n");
530 0 : print_rtl_single (dump_file, new_pattern);
531 0 : fprintf (dump_file, "\n");
532 : }
533 28 : return;
534 : }
535 :
536 : /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
537 : not useful for this optimization. */
538 8553 : if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
539 : return;
540 :
541 5828 : rtx new_pattern;
542 5828 : if (dump_file)
543 : {
544 0 : fprintf (dump_file, "Processing insn:\n");
545 0 : dump_insn_slim (dump_file, insn);
546 0 : fprintf (dump_file, "Trying to simplify pattern:\n");
547 0 : print_rtl_single (dump_file, SET_SRC (set));
548 : }
549 :
550 : /* We decided to turn do the optimization but allow it to be rejected for
551 : bisection purposes. */
552 5828 : if (!dbg_cnt (::ext_dce))
553 : {
554 0 : if (dump_file)
555 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
556 0 : return;
557 : }
558 :
559 11656 : new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
560 5828 : GET_MODE (inner), 0);
561 : /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
562 : We must not pass that as a replacement pattern to validate_change. */
563 5828 : if (new_pattern)
564 : {
565 5828 : int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
566 :
567 5828 : rtx x = SET_DEST (set);
568 5828 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
569 0 : x = XEXP (x, 0);
570 :
571 5828 : gcc_assert (REG_P (x));
572 5828 : if (ok)
573 5828 : bitmap_set_bit (changed_pseudos, REGNO (x));
574 :
575 5828 : if (dump_file)
576 : {
577 0 : if (ok)
578 0 : fprintf (dump_file, "Successfully transformed to:\n");
579 : else
580 0 : fprintf (dump_file, "Failed transformation to:\n");
581 :
582 0 : print_rtl_single (dump_file, new_pattern);
583 0 : fprintf (dump_file, "\n");
584 : }
585 :
586 : /* INSN may have a REG_EQUAL note indicating that the value was
587 : sign or zero extended. That note is no longer valid since we've
588 : just removed the extension. Just wipe the notes. */
589 5828 : if (ok)
590 5828 : remove_reg_equal_equiv_notes (insn, false);
591 : }
592 : else
593 : {
594 0 : if (dump_file)
595 0 : fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
596 : }
597 : }
598 :
599 : /* Try to promote a narrow-mode operation wrapped in a sign/zero extension
600 : to the wider mode when the extended bits are dead. For example,
601 : (sign_extend:DI (plus:SI (x) (y))) -> (plus:DI (x') (y'))
602 : where x' and y' are the operands promoted to DI mode.
603 :
604 : This enables the combine pass to match wider-mode target patterns
605 : (e.g., sh2add on RISC-V) that cannot match the narrow-mode operation. */
606 :
607 : static void
608 0 : ext_dce_try_promote_operation (rtx_insn *insn, rtx set)
609 : {
610 0 : rtx src = SET_SRC (set);
611 :
612 : /* If the extension was already optimized away, nothing to do. */
613 0 : if (GET_CODE (src) != SIGN_EXTEND && GET_CODE (src) != ZERO_EXTEND)
614 0 : return;
615 :
616 0 : machine_mode outer_mode = GET_MODE (src);
617 0 : rtx inner = XEXP (src, 0);
618 :
619 : /* Only handle binary and unary arithmetic/logic operations. */
620 0 : if (!BINARY_P (inner) && !UNARY_P (inner))
621 : return;
622 :
623 0 : rtx_code inner_code = GET_CODE (inner);
624 :
625 : /* Restrict to operations whose result in the low bits is identical
626 : regardless of input width (i.e., no high-bit dependencies). */
627 0 : switch (inner_code)
628 : {
629 0 : case PLUS:
630 0 : case MINUS:
631 0 : case MULT:
632 0 : case NEG:
633 0 : case AND:
634 0 : case IOR:
635 0 : case XOR:
636 0 : case NOT:
637 0 : case ASHIFT:
638 0 : break;
639 : default:
640 : return;
641 : }
642 :
643 : /* Promote each operand to the outer mode. */
644 0 : int nops = BINARY_P (inner) ? 2 : 1;
645 0 : rtx new_ops[2];
646 :
647 0 : for (int i = 0; i < nops; i++)
648 : {
649 0 : rtx op = XEXP (inner, i);
650 :
651 0 : if (CONST_INT_P (op))
652 0 : new_ops[i] = op;
653 0 : else if (REG_P (op))
654 : {
655 0 : new_ops[i] = simplify_gen_subreg (outer_mode, op,
656 0 : GET_MODE (op), 0);
657 0 : if (!new_ops[i])
658 : return;
659 : }
660 0 : else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
661 : {
662 : /* The inner register may already be in the target mode
663 : (e.g., subreg:SI (reg:DI ...) 0). Extract it directly
664 : rather than creating a paradoxical subreg of a subreg,
665 : which simplify_gen_subreg rejects. */
666 0 : rtx inner_reg = SUBREG_REG (op);
667 0 : if (GET_MODE (inner_reg) == outer_mode)
668 0 : new_ops[i] = inner_reg;
669 : else
670 : {
671 0 : new_ops[i] = simplify_gen_subreg (outer_mode, inner_reg,
672 : GET_MODE (inner_reg), 0);
673 0 : if (!new_ops[i])
674 : return;
675 : }
676 : }
677 : else
678 : return;
679 : }
680 :
681 : /* Build the promoted operation. */
682 0 : rtx new_src;
683 0 : if (BINARY_P (inner))
684 0 : new_src = gen_rtx_fmt_ee (inner_code, outer_mode,
685 : new_ops[0], new_ops[1]);
686 : else
687 0 : new_src = gen_rtx_fmt_e (inner_code, outer_mode, new_ops[0]);
688 :
689 0 : if (dump_file)
690 : {
691 0 : fprintf (dump_file, "Processing insn:\n");
692 0 : dump_insn_slim (dump_file, insn);
693 0 : fprintf (dump_file, "Trying to promote to wider mode:\n");
694 0 : print_rtl_single (dump_file, new_src);
695 : }
696 :
697 : /* We decided to try the promotion but allow it to be rejected for
698 : bisection purposes. */
699 0 : if (!dbg_cnt (::ext_dce))
700 : {
701 0 : if (dump_file)
702 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
703 0 : return;
704 : }
705 :
706 0 : int ok = validate_change (insn, &SET_SRC (set), new_src, false);
707 :
708 0 : rtx x = SET_DEST (set);
709 0 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
710 0 : x = XEXP (x, 0);
711 :
712 0 : gcc_assert (REG_P (x));
713 0 : if (ok)
714 0 : bitmap_set_bit (changed_pseudos, REGNO (x));
715 :
716 0 : if (dump_file)
717 : {
718 0 : if (ok)
719 0 : fprintf (dump_file, "Successfully promoted to:\n");
720 : else
721 0 : fprintf (dump_file, "Failed promotion to:\n");
722 0 : print_rtl_single (dump_file, new_src);
723 0 : fprintf (dump_file, "\n");
724 : }
725 :
726 0 : if (ok)
727 0 : remove_reg_equal_equiv_notes (insn, false);
728 : }
729 :
730 : /* Record INSN as a promotion candidate if it passes the same validity
731 : checks as ext_dce_try_promote_operation. We defer actual promotion
732 : until we can determine whether the candidate is part of a chain. */
733 :
734 : static void
735 2753 : ext_dce_record_promotion_candidate (rtx_insn *insn, rtx set)
736 : {
737 2753 : rtx src = SET_SRC (set);
738 :
739 2753 : if (GET_CODE (src) != SIGN_EXTEND && GET_CODE (src) != ZERO_EXTEND)
740 : return;
741 :
742 2753 : machine_mode outer_mode = GET_MODE (src);
743 2753 : rtx inner = XEXP (src, 0);
744 :
745 2753 : if (!BINARY_P (inner) && !UNARY_P (inner))
746 : return;
747 :
748 1701 : rtx_code inner_code = GET_CODE (inner);
749 :
750 1701 : switch (inner_code)
751 : {
752 0 : case PLUS:
753 0 : case MINUS:
754 0 : case MULT:
755 0 : case NEG:
756 0 : case AND:
757 0 : case IOR:
758 0 : case XOR:
759 0 : case NOT:
760 0 : case ASHIFT:
761 0 : break;
762 : default:
763 : return;
764 : }
765 :
766 : /* Dry-run: check that all operands can be promoted. */
767 0 : int nops = BINARY_P (inner) ? 2 : 1;
768 0 : for (int i = 0; i < nops; i++)
769 : {
770 0 : rtx op = XEXP (inner, i);
771 0 : if (CONST_INT_P (op))
772 0 : continue;
773 0 : else if (REG_P (op))
774 : {
775 0 : if (!simplify_gen_subreg (outer_mode, op, GET_MODE (op), 0))
776 : return;
777 : }
778 0 : else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
779 : {
780 0 : rtx inner_reg = SUBREG_REG (op);
781 0 : if (GET_MODE (inner_reg) != outer_mode
782 0 : && !simplify_gen_subreg (outer_mode, inner_reg,
783 : GET_MODE (inner_reg), 0))
784 : return;
785 : }
786 : else
787 : return;
788 : }
789 :
790 : /* Find the destination register. */
791 0 : rtx dest = SET_DEST (set);
792 0 : while (SUBREG_P (dest) || GET_CODE (dest) == ZERO_EXTRACT)
793 0 : dest = XEXP (dest, 0);
794 0 : if (!REG_P (dest))
795 : return;
796 0 : unsigned int dest_regno = REGNO (dest);
797 :
798 : /* Record the candidate. */
799 0 : promotion_candidates.safe_push ({insn, set});
800 0 : bitmap_set_bit (promotable_dests, dest_regno);
801 :
802 : /* Mark register operands as consumed by a candidate. */
803 0 : for (int i = 0; i < nops; i++)
804 : {
805 0 : rtx op = XEXP (inner, i);
806 0 : if (REG_P (op))
807 0 : bitmap_set_bit (consumed_by_candidate, REGNO (op));
808 0 : else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
809 0 : bitmap_set_bit (consumed_by_candidate, REGNO (SUBREG_REG (op)));
810 : }
811 : }
812 :
813 : /* Promote candidates that form chains: a candidate whose result feeds
814 : into another candidate's operand, or whose operand comes from another
815 : candidate's result. Skip standalone (isolated) promotions. */
816 :
817 : static void
818 9758674 : ext_dce_promote_chained_candidates (void)
819 : {
820 : /* Propagate chain info through copies recorded during the reverse scan.
821 : Since copies are recorded in reverse order, iterate forward to propagate
822 : promotable_dests (which was set late in the scan) through copies that
823 : were seen earlier. */
824 9758674 : unsigned cix;
825 9758674 : copy_info *cp;
826 9764502 : FOR_EACH_VEC_ELT (promotion_copies, cix, cp)
827 : {
828 5828 : if (bitmap_bit_p (promotable_dests, cp->src_regno))
829 0 : bitmap_set_bit (promotable_dests, cp->dest_regno);
830 5828 : if (bitmap_bit_p (consumed_by_candidate, cp->dest_regno))
831 0 : bitmap_set_bit (consumed_by_candidate, cp->src_regno);
832 : }
833 :
834 : unsigned ix;
835 : promotion_candidate_info *cand;
836 :
837 9758674 : FOR_EACH_VEC_ELT (promotion_candidates, ix, cand)
838 : {
839 : /* Find destination register. */
840 0 : rtx dest = SET_DEST (cand->set);
841 0 : while (SUBREG_P (dest) || GET_CODE (dest) == ZERO_EXTRACT)
842 0 : dest = XEXP (dest, 0);
843 0 : unsigned int dest_regno = REGNO (dest);
844 :
845 : /* Check if this candidate's result feeds into another candidate. */
846 0 : bool is_chained = bitmap_bit_p (consumed_by_candidate, dest_regno);
847 :
848 : /* Check if any operand comes from another candidate's result. */
849 0 : if (!is_chained)
850 : {
851 0 : rtx inner = XEXP (SET_SRC (cand->set), 0);
852 0 : int nops = BINARY_P (inner) ? 2 : 1;
853 0 : for (int i = 0; i < nops && !is_chained; i++)
854 : {
855 0 : rtx op = XEXP (inner, i);
856 0 : if (REG_P (op))
857 0 : is_chained = bitmap_bit_p (promotable_dests, REGNO (op));
858 0 : else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
859 0 : is_chained = bitmap_bit_p (promotable_dests,
860 0 : REGNO (SUBREG_REG (op)));
861 : }
862 : }
863 :
864 0 : if (is_chained)
865 0 : ext_dce_try_promote_operation (cand->insn, cand->set);
866 0 : else if (dump_file)
867 : {
868 0 : fprintf (dump_file, "Skipping standalone promotion for insn:\n");
869 0 : dump_insn_slim (dump_file, cand->insn);
870 0 : fprintf (dump_file, "\n");
871 : }
872 : }
873 :
874 9758674 : promotion_candidates.truncate (0);
875 9758674 : promotion_copies.truncate (0);
876 9758674 : bitmap_clear (promotable_dests);
877 9758674 : bitmap_clear (consumed_by_candidate);
878 9758674 : }
879 :
880 : /* Some operators imply that their second operand is fully live,
881 : regardless of how many bits in the output are live. An example
882 : would be the shift count on a target without SHIFT_COUNT_TRUNCATED
883 : defined.
884 :
885 : Return TRUE if CODE is such an operator. FALSE otherwise. */
886 :
887 : static bool
888 76637193 : binop_implies_op2_fully_live (rtx_code code)
889 : {
890 0 : switch (code)
891 : {
892 : case ASHIFT:
893 : case LSHIFTRT:
894 : case ASHIFTRT:
895 : case ROTATE:
896 : case ROTATERT:
897 : case SS_ASHIFT:
898 : case US_ASHIFT:
899 : return !SHIFT_COUNT_TRUNCATED;
900 :
901 0 : default:
902 0 : return false;
903 : }
904 : }
905 :
906 : /* X, with code CODE, is an operation for which safe_for_live_propagation
907 : holds true, and bits set in MASK are live in the result. Compute a
908 : mask of (potentially) live bits in the non-constant inputs. In case of
909 : binop_implies_op2_fully_live (e.g. shifts), the computed mask may
910 : exclusively pertain to the first operand.
911 :
912 : This looks wrong as we may have some important operations embedded as
913 : operands of another operation. For example, we might have an extension
914 : wrapping a shift. It really feels like this needs to be recursing down
915 : into operands much more often. */
916 :
917 : unsigned HOST_WIDE_INT
918 71483689 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
919 : {
920 73188300 : if (mask == 0)
921 : return 0;
922 :
923 73188258 : enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
924 73188258 : unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
925 :
926 : /* While we don't try to optimize operations on types larger
927 : than 64 bits, we do want to make sure not to invoke undefined
928 : behavior when presented with such operations during use
929 : processing. The safe thing to do is to just return mmask
930 : for that scenario indicating every possible chunk is life. */
931 73188258 : scalar_int_mode smode;
932 73188258 : if (!is_a <scalar_int_mode> (mode, &smode)
933 61031141 : || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
934 : return mmask;
935 :
936 58963978 : switch (code)
937 : {
938 16532405 : case PLUS:
939 16532405 : case MINUS:
940 16532405 : case MULT:
941 16532405 : return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
942 :
943 : /* We propagate for the shifted operand, but not the shift
944 : count. The count is handled specially. */
945 1412936 : case ASHIFT:
946 1412936 : if (CONST_INT_P (XEXP (x, 1))
947 2754617 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
948 1341645 : return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
949 71291 : return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
950 :
951 : /* We propagate for the shifted operand, but not the shift
952 : count. The count is handled specially. */
953 638442 : case LSHIFTRT:
954 638442 : if (CONST_INT_P (XEXP (x, 1))
955 1244954 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
956 606484 : return mmask & (mask << INTVAL (XEXP (x, 1)));
957 : return mmask;
958 :
959 : /* We propagate for the shifted operand, but not the shift
960 : count. The count is handled specially. */
961 306371 : case ASHIFTRT:
962 306371 : if (CONST_INT_P (XEXP (x, 1))
963 598831 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
964 : {
965 292416 : HOST_WIDE_INT sign = 0;
966 292416 : if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
967 292416 : > GET_MODE_BITSIZE (smode))
968 584832 : sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
969 292416 : return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
970 : }
971 : return mmask;
972 :
973 37549 : case SMUL_HIGHPART:
974 37549 : case UMUL_HIGHPART:
975 37549 : if (XEXP (x, 1) == const0_rtx)
976 : return 0;
977 37549 : if (XEXP (x, 1) == const1_rtx)
978 : return mmask;
979 37549 : if (CONST_INT_P (XEXP (x, 1)))
980 : {
981 0 : if (pow2p_hwi (INTVAL (XEXP (x, 1))))
982 0 : return mmask & (mask << (GET_MODE_BITSIZE (smode)
983 0 : - exact_log2 (INTVAL (XEXP (x, 1)))));
984 :
985 0 : int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
986 0 : - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
987 0 : if (bits < GET_MODE_BITSIZE (smode))
988 0 : return (HOST_WIDE_INT_1U << bits) - 1;
989 : }
990 : return mmask;
991 :
992 561291 : case SIGN_EXTEND:
993 561291 : if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
994 561291 : || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
995 : return -1;
996 :
997 : /* We want the mode of the inner object. We need to ensure its
998 : sign bit is on in MASK. */
999 561291 : mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
1000 561291 : if (mask & ~GET_MODE_MASK (mode))
1001 560762 : mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
1002 560762 : - 1);
1003 :
1004 : /* Recurse into the operand. */
1005 561291 : return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
1006 :
1007 1143320 : case ZERO_EXTEND:
1008 1143320 : if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
1009 1143320 : || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
1010 : return -1;
1011 :
1012 : /* Recurse into the operand. */
1013 1143320 : return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
1014 :
1015 : /* If an AND/IOR unconditionally clears/sets bits in the output, then
1016 : we do not care about the liveness of those bits in the inputs.
1017 :
1018 : So AND MASK with the constant for AND and with the complemented constant
1019 : for IOR. */
1020 1542138 : case AND:
1021 1542138 : case IOR:
1022 1542138 : if (CONST_INT_P (XEXP (x, 1)))
1023 859320 : mask &= (code == AND
1024 859320 : ? UINTVAL (XEXP (x, 1))
1025 95710 : : ~UINTVAL (XEXP (x, 1)));
1026 : return mask;
1027 :
1028 : /* We propagate for the shifted operand, but not the shift
1029 : count. The count is handled specially. */
1030 0 : case SS_ASHIFT:
1031 0 : case US_ASHIFT:
1032 0 : if (CONST_INT_P (XEXP (x, 1))
1033 0 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
1034 : {
1035 0 : return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
1036 0 : >> (INTVAL (XEXP (x, 1))
1037 0 : + (XEXP (x, 1) != const0_rtx
1038 0 : && code == SS_ASHIFT))))
1039 0 : | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
1040 : }
1041 : return mmask;
1042 :
1043 : default:
1044 : return mask;
1045 : }
1046 : }
1047 :
1048 : /* Process uses in INSN contained in OBJ. Set appropriate bits in LIVENOW
1049 : for any chunks of pseudos that become live, potentially filtering using
1050 : bits from LIVE_TMP.
1051 :
1052 : If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
1053 : the extended bits are never read and mark pseudos which had extensions
1054 : eliminated in CHANGED_PSEUDOS. */
1055 :
1056 : static void
1057 137778816 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
1058 : bitmap live_tmp, bool skipped_dest)
1059 : {
1060 137778816 : subrtx_var_iterator::array_type array_var;
1061 757036665 : FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
1062 : {
1063 : /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
1064 619257849 : rtx x = *iter;
1065 619257849 : if (x == NULL_RTX)
1066 9524957 : continue;
1067 :
1068 : /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
1069 : handle SETs explicitly, possibly propagating live information
1070 : into the uses.
1071 :
1072 : We may continue the loop at various points which will cause
1073 : iteration into the next level of RTL. Breaking from the loop
1074 : is never safe as it can lead us to fail to process some of the
1075 : RTL and thus not make objects live when necessary. */
1076 609732892 : enum rtx_code xcode = GET_CODE (x);
1077 609732892 : if (xcode == SET)
1078 : {
1079 121793322 : const_rtx dst = SET_DEST (x);
1080 121793322 : rtx src = SET_SRC (x);
1081 121793322 : const_rtx y;
1082 121793322 : unsigned HOST_WIDE_INT bit = 0;
1083 :
1084 : /* The code of the RHS of a SET. */
1085 121793322 : enum rtx_code code = GET_CODE (src);
1086 :
1087 : /* ?!? How much of this should mirror SET handling, potentially
1088 : being shared? */
1089 121793322 : if (SUBREG_P (dst) && subreg_lsb (dst).is_constant (&bit))
1090 : {
1091 587756 : if (bit >= HOST_BITS_PER_WIDE_INT)
1092 : bit = HOST_BITS_PER_WIDE_INT - 1;
1093 587756 : dst = SUBREG_REG (dst);
1094 : }
1095 121205566 : else if (GET_CODE (dst) == STRICT_LOW_PART)
1096 13308 : dst = XEXP (dst, 0);
1097 :
1098 : /* Main processing of the uses. Two major goals here.
1099 :
1100 : First, we want to try and propagate liveness (or the lack
1101 : thereof) from the destination register to the source
1102 : register(s).
1103 :
1104 : Second, if the source is an extension, try to optimize
1105 : it into a SUBREG. The SUBREG form indicates we don't
1106 : care about the upper bits and will usually be copy
1107 : propagated away.
1108 :
1109 : If we fail to handle something in here, the expectation
1110 : is the iterator will dive into the sub-components and
1111 : mark all the chunks in any found REGs as live. */
1112 121793322 : if (REG_P (dst) && safe_for_live_propagation (code))
1113 : {
1114 : /* Create a mask representing the bits of this output
1115 : operand that are live after this insn. We can use
1116 : this information to refine the live in state of
1117 : inputs to this insn in many cases.
1118 :
1119 : We have to do this on a per SET basis, we might have
1120 : an INSN with multiple SETS, some of which can narrow
1121 : the source operand liveness, some of which may not. */
1122 71483689 : unsigned HOST_WIDE_INT dst_mask = 0;
1123 71483689 : HOST_WIDE_INT rn = REGNO (dst);
1124 71483689 : unsigned HOST_WIDE_INT mask_array[]
1125 : = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
1126 : -HOST_WIDE_INT_UC (0x100000000) };
1127 357418445 : for (int i = 0; i < 4; i++)
1128 285934756 : if (bitmap_bit_p (live_tmp, 4 * rn + i))
1129 228887869 : dst_mask |= mask_array[i];
1130 71483689 : dst_mask >>= bit;
1131 :
1132 : /* If we ignored a destination during set processing, then
1133 : consider all the bits live. */
1134 71483689 : if (skipped_dest)
1135 25220965 : dst_mask = -1;
1136 :
1137 71483689 : dst_mask = carry_backpropagate (dst_mask, code, src);
1138 :
1139 : /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
1140 : of the source specially to improve optimization. */
1141 71483689 : if (code == SIGN_EXTEND || code == ZERO_EXTEND)
1142 : {
1143 1716370 : rtx inner = XEXP (src, 0);
1144 1716370 : unsigned HOST_WIDE_INT src_mask
1145 1716370 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
1146 :
1147 : /* DST_MASK could be zero if we had something in the SET
1148 : that we couldn't handle. */
1149 1716370 : if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
1150 : {
1151 8581 : ext_dce_try_optimize_extension (insn, x);
1152 :
1153 : /* If the extension was optimized to a copy, propagate
1154 : chain info through it: if the dest is consumed by a
1155 : promotion candidate (seen later in reverse scan),
1156 : the source register is transitively consumed too. */
1157 8581 : rtx opt_src = SET_SRC (x);
1158 8581 : if (GET_CODE (opt_src) != SIGN_EXTEND
1159 8581 : && GET_CODE (opt_src) != ZERO_EXTEND)
1160 : {
1161 5828 : rtx copy_dest = SET_DEST (x);
1162 5828 : while (SUBREG_P (copy_dest)
1163 5828 : || GET_CODE (copy_dest) == ZERO_EXTRACT)
1164 0 : copy_dest = XEXP (copy_dest, 0);
1165 :
1166 5828 : rtx copy_src = opt_src;
1167 5828 : if (SUBREG_P (copy_src))
1168 5522 : copy_src = SUBREG_REG (copy_src);
1169 :
1170 5828 : if (REG_P (copy_dest) && REG_P (copy_src))
1171 : {
1172 5828 : if (bitmap_bit_p (consumed_by_candidate,
1173 5828 : REGNO (copy_dest)))
1174 0 : bitmap_set_bit (consumed_by_candidate,
1175 0 : REGNO (copy_src));
1176 5828 : if (bitmap_bit_p (promotable_dests,
1177 5828 : REGNO (copy_src)))
1178 0 : bitmap_set_bit (promotable_dests,
1179 0 : REGNO (copy_dest));
1180 5828 : promotion_copies.safe_push (
1181 5828 : {REGNO (copy_dest), REGNO (copy_src)});
1182 : }
1183 : }
1184 : else
1185 2753 : ext_dce_record_promotion_candidate (insn, x);
1186 : }
1187 :
1188 : /* Stripping the extension here just seems wrong on multiple
1189 : levels. It's source side handling, so it seems like it
1190 : belongs in the loop below. Stripping here also makes it
1191 : harder than necessary to properly handle live bit groups
1192 : for (ANY_EXTEND (SUBREG)) where the SUBREG has
1193 : SUBREG_PROMOTED state. */
1194 1716370 : dst_mask &= src_mask;
1195 1716370 : src = XEXP (src, 0);
1196 1716370 : code = GET_CODE (src);
1197 : }
1198 :
1199 : /* Special case for (sub)targets that do not have extension
1200 : insns (and thus use shifts). We want to detect when we have
1201 : a shift pair and treat the pair as-if was an extension.
1202 :
1203 : Key on the right shift and use (for now) simplistic tests
1204 : to find the corresponding left shift. */
1205 71483689 : scalar_mode outer_mode;
1206 71483689 : if ((code == LSHIFTRT || code == ASHIFTRT)
1207 1038030 : && CONST_INT_P (XEXP (src, 1))
1208 1143340 : && (INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 8
1209 1139126 : || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 16
1210 978339 : || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 32)
1211 71549543 : && is_a <scalar_mode> (GET_MODE (src), &outer_mode)
1212 71549543 : && GET_MODE_BITSIZE (outer_mode) <= HOST_BITS_PER_WIDE_INT)
1213 : {
1214 : /* So we have a right shift that could correspond to
1215 : the second in a pair impementing QI, HI or SI -> DI
1216 : extension. See if we can find the left shift. For
1217 : now, just look one real instruction back. */
1218 65710 : rtx_insn *prev_insn = prev_nonnote_nondebug_insn_bb (insn);
1219 :
1220 : /* The previous insn must be a left shift by the same
1221 : amount. */
1222 65710 : rtx prev_set;
1223 65710 : if (prev_insn
1224 62890 : && (prev_set = single_set (prev_insn))
1225 : /* The destination of the left shift must be the
1226 : source of the right shift. */
1227 62809 : && SET_DEST (prev_set) == XEXP (src, 0)
1228 30907 : && GET_CODE (SET_SRC (prev_set)) == ASHIFT
1229 811 : && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
1230 : /* The counts must match. */
1231 65710 : && (INTVAL (XEXP (src, 1))
1232 795 : == INTVAL (XEXP (SET_SRC (prev_set), 1))))
1233 : {
1234 15 : unsigned HOST_WIDE_INT src_mask = GET_MODE_BITSIZE (GET_MODE (src)).to_constant ();
1235 15 : src_mask -= INTVAL (XEXP (src, 1));
1236 15 : src_mask = (HOST_WIDE_INT_1U << src_mask) - 1;
1237 :
1238 : /* DST_MASK has been adjusted for INSN. We need its original value. */
1239 15 : unsigned HOST_WIDE_INT tmp_mask = 0;
1240 75 : for (int i = 0; i < 4; i++)
1241 60 : if (bitmap_bit_p (live_tmp, 4 * rn + i))
1242 15 : tmp_mask |= mask_array[i];
1243 15 : tmp_mask >>= bit;
1244 :
1245 15 : if (modify && !skipped_dest && (tmp_mask & ~src_mask) == 0)
1246 : {
1247 0 : ext_dce_try_optimize_rshift (insn, x, XEXP (SET_SRC (prev_set), 0), prev_insn);
1248 :
1249 : /* These may not strictly be necessary, but we might as well try and be
1250 : as accurate as possible. The RHS is now a simple REG. */
1251 0 : dst_mask = src_mask;
1252 0 : src = XEXP (SET_SRC (prev_set), 0);
1253 0 : code = GET_CODE (src);
1254 : }
1255 : }
1256 : }
1257 :
1258 : /* Optimization is done at this point. We just want to make
1259 : sure everything that should get marked as live is marked
1260 : from here onward. */
1261 :
1262 : /* We will handle the other operand of a binary operator
1263 : at the bottom of the loop by resetting Y. */
1264 71483689 : if (BINARY_P (src))
1265 22510355 : y = XEXP (src, 0);
1266 : else
1267 : y = src;
1268 :
1269 : /* We're inside a SET and want to process the source operands
1270 : making things live. Breaking from this loop will cause
1271 : the iterator to work on sub-rtxs, so it is safe to break
1272 : if we see something we don't know how to handle.
1273 :
1274 : This code is just hokey as it really just handles trivial
1275 : unary and binary cases. Otherwise the loop exits and we
1276 : continue iterating on sub-rtxs, but outside the set context. */
1277 : unsigned HOST_WIDE_INT save_mask = dst_mask;
1278 115525199 : for (;;)
1279 : {
1280 : /* In general we want to restore DST_MASK before each loop
1281 : iteration. The exception is when the opcode implies that
1282 : the other operand is fully live. That's handled by
1283 : changing SAVE_MASK below. */
1284 93504444 : dst_mask = save_mask;
1285 : /* Strip an outer paradoxical subreg. The bits outside
1286 : the inner mode are don't cares. So we can just strip
1287 : and process the inner object. */
1288 93504444 : if (paradoxical_subreg_p (y))
1289 : y = XEXP (y, 0);
1290 93402515 : else if (SUBREG_P (y) && subreg_lsb (y).is_constant (&bit))
1291 : {
1292 : /* If !TRULY_NOOP_TRUNCATION_MODES_P, the mode
1293 : change performed by Y would normally need to be a
1294 : TRUNCATE rather than a SUBREG. It is probably the
1295 : guarantee provided by SUBREG_PROMOTED_VAR_P that
1296 : allows the SUBREG in Y as an exception. We must
1297 : therefore preserve that guarantee and treat the
1298 : upper bits of the inner register as live
1299 : regardless of the outer code. See PR 120050. */
1300 1921278 : if (!REG_P (SUBREG_REG (y))
1301 1921278 : || (SUBREG_PROMOTED_VAR_P (y)
1302 13482 : && (!TRULY_NOOP_TRUNCATION_MODES_P (
1303 : GET_MODE (y),
1304 : GET_MODE (SUBREG_REG (y))))))
1305 : break;
1306 :
1307 : /* If this is a wide object (more bits than we can fit
1308 : in a HOST_WIDE_INT), then just break from the SET
1309 : context. That will cause the iterator to walk down
1310 : into the subrtx and if we land on a REG we'll mark
1311 : the whole think live. */
1312 1920230 : if (bit >= HOST_BITS_PER_WIDE_INT)
1313 : break;
1314 :
1315 : /* The SUBREG's mode determines the live width. */
1316 1712694 : if (dst_mask)
1317 : {
1318 1712534 : dst_mask <<= bit;
1319 1712534 : if (!dst_mask)
1320 0 : dst_mask = -HOST_WIDE_INT_UC (0x100000000);
1321 : }
1322 1712694 : y = SUBREG_REG (y);
1323 : }
1324 :
1325 93295860 : if (REG_P (y))
1326 : {
1327 : /* We have found the use of a register. We need to mark
1328 : the appropriate chunks of the register live. The mode
1329 : of the REG is a starting point. We may refine that
1330 : based on what chunks in the output were live. */
1331 49651359 : rn = 4 * REGNO (y);
1332 49651359 : unsigned HOST_WIDE_INT tmp_mask = dst_mask;
1333 :
1334 : /* If the RTX code for the SET_SRC is not one we can
1335 : propagate destination liveness through, then just
1336 : set the mask to the mode's mask. */
1337 49651359 : if (!safe_for_live_propagation (code))
1338 33025 : tmp_mask
1339 66050 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
1340 :
1341 49651359 : if (tmp_mask & 0xff)
1342 49164345 : bitmap_set_bit (livenow, rn);
1343 49651359 : if (tmp_mask & 0xff00)
1344 47269601 : bitmap_set_bit (livenow, rn + 1);
1345 49651359 : if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
1346 46971184 : bitmap_set_bit (livenow, rn + 2);
1347 49651359 : if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
1348 40424673 : bitmap_set_bit (livenow, rn + 3);
1349 : }
1350 43644501 : else if (!CONSTANT_P (y))
1351 : break;
1352 :
1353 : /* We might have (ashift (const_int 1) (reg...))
1354 : By setting dst_mask we can continue iterating on the
1355 : the next operand and it will be considered fully live.
1356 :
1357 : Note that since we restore DST_MASK from SAVE_MASK at the
1358 : top of the loop, we have to change SAVE_MASK to get the
1359 : semantics we want. */
1360 76637193 : if (binop_implies_op2_fully_live (GET_CODE (src)))
1361 2463738 : save_mask = -1;
1362 :
1363 : /* If this was anything but a binary operand, break the inner
1364 : loop. This is conservatively correct as it will cause the
1365 : iterator to look at the sub-rtxs outside the SET context. */
1366 76637193 : if (!BINARY_P (src))
1367 : break;
1368 :
1369 : /* We processed the first operand of a binary operator. Now
1370 : handle the second. */
1371 22020755 : y = XEXP (src, 1), src = pc_rtx;
1372 22020755 : }
1373 :
1374 : /* These are leaf nodes, no need to iterate down into them. */
1375 71483689 : if (REG_P (y) || CONSTANT_P (y))
1376 54616438 : iter.skip_subrtxes ();
1377 : }
1378 : }
1379 : /* If we are reading the low part of a SUBREG, then we can
1380 : refine liveness of the input register, otherwise let the
1381 : iterator continue into SUBREG_REG. */
1382 487939570 : else if (SUBREG_P (x)
1383 1315066 : && REG_P (SUBREG_REG (x))
1384 1313202 : && !paradoxical_subreg_p (x)
1385 1289393 : && subreg_lowpart_p (x)
1386 1011775 : && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
1387 489963120 : && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
1388 : {
1389 498907 : HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
1390 498907 : HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
1391 :
1392 : /* If this is a promoted subreg, then more of it may be live than
1393 : is otherwise obvious. */
1394 498907 : if (SUBREG_PROMOTED_VAR_P (x))
1395 4432 : size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
1396 :
1397 498907 : bitmap_set_bit (livenow, rn);
1398 498907 : if (size > 8)
1399 324850 : bitmap_set_bit (livenow, rn + 1);
1400 324850 : if (size > 16)
1401 283673 : bitmap_set_bit (livenow, rn + 2);
1402 283673 : if (size > 32)
1403 74 : bitmap_set_bit (livenow, rn + 3);
1404 498907 : iter.skip_subrtxes ();
1405 : }
1406 : /* If we have a register reference that is not otherwise handled,
1407 : just assume all the chunks are live. */
1408 487440663 : else if (REG_P (x))
1409 161838892 : bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
1410 : }
1411 137778816 : }
1412 :
1413 : /* Process a single basic block BB with current liveness information
1414 : in LIVENOW, returning updated liveness information.
1415 :
1416 : If MODIFY is true, then this is the last pass and unnecessary
1417 : extensions should be eliminated when possible. If an extension
1418 : is removed, the source pseudo is marked in CHANGED_PSEUDOS. */
1419 :
1420 : static void
1421 22861547 : ext_dce_process_bb (basic_block bb)
1422 : {
1423 22861547 : rtx_insn *insn;
1424 :
1425 303895741 : FOR_BB_INSNS_REVERSE (bb, insn)
1426 : {
1427 281034194 : if (!NONDEBUG_INSN_P (insn))
1428 152780335 : continue;
1429 :
1430 : /* Live-out state of the destination of this insn. We can
1431 : use this to refine the live-in state of the sources of
1432 : this insn in many cases. */
1433 128253859 : bitmap live_tmp = BITMAP_ALLOC (NULL);
1434 :
1435 : /* First process any sets/clobbers in INSN. */
1436 128253859 : bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
1437 :
1438 : /* CALL_INSNs need processing their fusage data. */
1439 128253859 : if (CALL_P (insn))
1440 9524957 : skipped_dest |= ext_dce_process_sets (insn,
1441 : CALL_INSN_FUNCTION_USAGE (insn),
1442 : live_tmp);
1443 :
1444 : /* And now uses, optimizing away SIGN/ZERO extensions as we go. */
1445 128253859 : ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
1446 :
1447 : /* A nonlocal goto implicitly uses the frame pointer. */
1448 128253859 : if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
1449 : {
1450 1130 : bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
1451 1130 : if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
1452 1130 : bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
1453 : }
1454 :
1455 : /* And process fusage data for the use as well. */
1456 128253859 : if (CALL_P (insn))
1457 : {
1458 9524957 : if (!FAKE_CALL_P (insn))
1459 9524897 : bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
1460 :
1461 : /* If this is not a call to a const fucntion, then assume it
1462 : can read any global register. */
1463 9524957 : if (!RTL_CONST_CALL_P (insn))
1464 855648639 : for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1465 846448116 : if (global_regs[i])
1466 230 : bitmap_set_range (livenow, i * 4, 4);
1467 :
1468 9524957 : ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
1469 : }
1470 :
1471 128253859 : BITMAP_FREE (live_tmp);
1472 : }
1473 :
1474 22861547 : if (modify)
1475 9758674 : ext_dce_promote_chained_candidates ();
1476 22861547 : }
1477 :
1478 : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
1479 : is usually helpful. However, in some cases setting the value when
1480 : it not strictly needed can cause this pass to miss optimizations.
1481 :
1482 : Specifically consider (set (mem) (subreg (reg))). If set in that
1483 : case it will cause more bit groups to be live for REG than would
1484 : be strictly necessary which in turn can inhibit extension removal.
1485 :
1486 : So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
1487 : is obviously not needed. */
1488 :
1489 : static void
1490 966050 : maybe_clear_subreg_promoted_p (void)
1491 : {
1492 120258508 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
1493 : {
1494 119292458 : if (!NONDEBUG_INSN_P (insn))
1495 64457056 : continue;
1496 :
1497 54835402 : rtx set = single_set (insn);
1498 54835402 : if (!set)
1499 3682349 : continue;
1500 :
1501 : /* There may be other cases where we should clear, but for
1502 : now, this is the only known case where it causes problems. */
1503 51153053 : if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
1504 71105 : && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
1505 61291 : SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
1506 : }
1507 966050 : }
1508 :
1509 : /* Walk the IL and build the transitive closure of all the REGs tied
1510 : together by copies where either the source or destination is
1511 : marked in CHANGED_PSEUDOS. */
1512 :
1513 : static void
1514 966050 : expand_changed_pseudos (void)
1515 : {
1516 : /* Build a vector of registers related by a copy. This is meant to
1517 : speed up the next step by avoiding full IL walks. */
1518 966050 : struct copy_pair { rtx first; rtx second; };
1519 966050 : auto_vec<copy_pair> pairs;
1520 120258508 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
1521 : {
1522 119292458 : if (!NONDEBUG_INSN_P (insn))
1523 64457056 : continue;
1524 :
1525 54835402 : rtx pat = PATTERN (insn);
1526 :
1527 : /* Simple copies to a REG from another REG or SUBREG of a REG. */
1528 54835402 : if (GET_CODE (pat) == SET
1529 43225743 : && REG_P (SET_DEST (pat))
1530 30738844 : && (REG_P (SET_SRC (pat))
1531 22265183 : || (SUBREG_P (SET_SRC (pat))
1532 375749 : && REG_P (SUBREG_REG (SET_SRC (pat))))))
1533 : {
1534 375295 : rtx src = (REG_P (SET_SRC (pat))
1535 8848956 : ? SET_SRC (pat)
1536 : : SUBREG_REG (SET_SRC (pat)));
1537 8848956 : pairs.safe_push ({ SET_DEST (pat), src });
1538 : }
1539 :
1540 : /* Simple copies to a REG from another REG or SUBREG of a REG
1541 : held inside a PARALLEL. */
1542 54835402 : if (GET_CODE (pat) == PARALLEL)
1543 : {
1544 25002867 : for (int i = XVECLEN (pat, 0) - 1; i >= 0; i--)
1545 : {
1546 16785137 : rtx elem = XVECEXP (pat, 0, i);
1547 :
1548 16785137 : if (GET_CODE (elem) == SET
1549 8437929 : && REG_P (SET_DEST (elem))
1550 8287962 : && (REG_P (SET_SRC (elem))
1551 8287962 : || (SUBREG_P (SET_SRC (elem))
1552 0 : && REG_P (SUBREG_REG (SET_SRC (elem))))))
1553 : {
1554 0 : rtx src = (REG_P (SET_SRC (elem))
1555 0 : ? SET_SRC (elem)
1556 : : SUBREG_REG (SET_SRC (elem)));
1557 0 : pairs.safe_push ({ SET_DEST (elem), src });
1558 : }
1559 : }
1560 8217730 : continue;
1561 8217730 : }
1562 : }
1563 :
1564 : /* Now we have a vector with copy pairs. Iterate over that list
1565 : updating CHANGED_PSEUDOS as we go. Eliminate copies from the
1566 : list as we go as they don't need further processing. */
1567 : bool changed = true;
1568 1932200 : while (changed)
1569 : {
1570 : changed = false;
1571 : unsigned int i;
1572 : copy_pair *p;
1573 10782586 : FOR_EACH_VEC_ELT (pairs, i, p)
1574 : {
1575 8850386 : if (bitmap_bit_p (changed_pseudos, REGNO (p->second))
1576 8850386 : && bitmap_set_bit (changed_pseudos, REGNO (p->first)))
1577 : {
1578 116 : pairs.unordered_remove (i);
1579 116 : changed = true;
1580 : }
1581 : }
1582 : }
1583 966050 : }
1584 :
1585 : /* We optimize away sign/zero extensions in this pass and replace
1586 : them with SUBREGs indicating certain bits are don't cares.
1587 :
1588 : This changes the SUBREG_PROMOTED_VAR_P state of the object.
1589 : It is fairly painful to fix this on the fly, so we have
1590 : recorded which pseudos are affected and we look for SUBREGs
1591 : of those pseudos and fix them up. */
1592 :
1593 : static void
1594 966050 : reset_subreg_promoted_p (void)
1595 : {
1596 : /* This pass eliminates zero/sign extensions on pseudo regs found
1597 : in CHANGED_PSEUDOS. Elimination of those extensions changes if
1598 : the pseudos are known to hold values extended to wider modes
1599 : via SUBREG_PROMOTED_VAR. So we wipe the SUBREG_PROMOTED_VAR
1600 : state on all affected pseudos.
1601 :
1602 : But that is insufficient. We might have a copy from one REG
1603 : to another (possibly with the source register wrapped with a
1604 : SUBREG). We need to wipe SUBREG_PROMOTED_VAR on the transitive
1605 : closure of the original CHANGED_PSEUDOS and registers they're
1606 : connected to via copies. So expand the set. */
1607 966050 : expand_changed_pseudos ();
1608 :
1609 : /* If we removed an extension, that changed the promoted state
1610 : of the destination of that extension. Thus we need to go
1611 : find any SUBREGs that reference that pseudo and adjust their
1612 : SUBREG_PROMOTED_P state. */
1613 120258508 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
1614 : {
1615 119292458 : if (!NONDEBUG_INSN_P (insn))
1616 64457056 : continue;
1617 :
1618 54835402 : rtx pat = PATTERN (insn);
1619 54835402 : subrtx_var_iterator::array_type array;
1620 349998515 : FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
1621 : {
1622 295163113 : rtx sub = *iter;
1623 :
1624 : /* We only care about SUBREGs. */
1625 295163113 : if (GET_CODE (sub) != SUBREG)
1626 293626509 : continue;
1627 :
1628 1536604 : const_rtx x = SUBREG_REG (sub);
1629 :
1630 : /* We only care if the inner object is a REG. */
1631 1536604 : if (!REG_P (x))
1632 787 : continue;
1633 :
1634 : /* And only if the SUBREG is a promoted var. */
1635 1535817 : if (!SUBREG_PROMOTED_VAR_P (sub))
1636 1530431 : continue;
1637 :
1638 5386 : if (bitmap_bit_p (changed_pseudos, REGNO (x)))
1639 0 : SUBREG_PROMOTED_VAR_P (sub) = 0;
1640 : }
1641 54835402 : }
1642 966050 : }
1643 :
1644 : /* Initialization of the ext-dce pass. Primarily this means
1645 : setting up the various bitmaps we utilize. */
1646 :
1647 : static void
1648 966050 : ext_dce_init (void)
1649 : {
1650 966050 : livein.create (last_basic_block_for_fn (cfun));
1651 966050 : livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
1652 12665360 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1653 11699310 : bitmap_initialize (&livein[i], &bitmap_default_obstack);
1654 :
1655 966050 : auto_bitmap refs (&bitmap_default_obstack);
1656 966050 : df_get_exit_block_use_set (refs);
1657 :
1658 966050 : unsigned i;
1659 966050 : bitmap_iterator bi;
1660 4444653 : EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
1661 3478603 : make_reg_live (&livein[EXIT_BLOCK], i);
1662 :
1663 966050 : livenow = BITMAP_ALLOC (NULL);
1664 966050 : all_blocks = BITMAP_ALLOC (NULL);
1665 966050 : changed_pseudos = BITMAP_ALLOC (NULL);
1666 966050 : promotable_dests = BITMAP_ALLOC (NULL);
1667 966050 : consumed_by_candidate = BITMAP_ALLOC (NULL);
1668 :
1669 12665360 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1670 11699310 : if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
1671 9767210 : bitmap_set_bit (all_blocks, i);
1672 :
1673 966050 : modify = false;
1674 966050 : }
1675 :
1676 : /* Finalization of the ext-dce pass. Primarily this means
1677 : releasing up the various bitmaps we utilize. */
1678 :
1679 : static void
1680 966050 : ext_dce_finish (void)
1681 : {
1682 12665360 : for (unsigned i = 0; i < livein.length (); i++)
1683 11699310 : bitmap_clear (&livein[i]);
1684 966050 : livein.release ();
1685 :
1686 966050 : BITMAP_FREE (livenow);
1687 966050 : BITMAP_FREE (changed_pseudos);
1688 966050 : BITMAP_FREE (all_blocks);
1689 966050 : BITMAP_FREE (promotable_dests);
1690 966050 : BITMAP_FREE (consumed_by_candidate);
1691 966050 : promotion_candidates.release ();
1692 966050 : promotion_copies.release ();
1693 966050 : }
1694 :
1695 : /* Process block number BB_INDEX as part of the backward
1696 : simple dataflow analysis. Return TRUE if something in
1697 : this block changed or FALSE otherwise. */
1698 :
1699 : static bool
1700 26725747 : ext_dce_rd_transfer_n (int bb_index)
1701 : {
1702 : /* The ENTRY/EXIT blocks never change. */
1703 26725747 : if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
1704 : return false;
1705 :
1706 22861547 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
1707 :
1708 : /* Make everything live that's live in the successors. */
1709 22861547 : bitmap_clear (livenow);
1710 22861547 : edge_iterator ei;
1711 22861547 : edge e;
1712 :
1713 57499331 : FOR_EACH_EDGE (e, ei, bb->succs)
1714 34637784 : bitmap_ior_into (livenow, &livein[e->dest->index]);
1715 :
1716 22861547 : ext_dce_process_bb (bb);
1717 :
1718 : /* We only allow widening the set of objects live at the start
1719 : of a block. Otherwise we run the risk of not converging. */
1720 22861547 : return bitmap_ior_into (&livein[bb_index], livenow);
1721 : }
1722 :
1723 : /* Dummy function for the df_simple_dataflow API. */
1724 33280793 : static bool ext_dce_rd_confluence_n (edge) { return true; }
1725 :
1726 : /* Use lifetime analyis to identify extensions that set bits that
1727 : are never read. Turn such extensions into SUBREGs instead which
1728 : can often be propagated away. */
1729 :
1730 : void
1731 966050 : ext_dce_execute (void)
1732 : {
1733 : /* Limit the amount of memory we use for livein, with 4 bits per
1734 : reg per basic-block including overhead that maps to one byte
1735 : per reg per basic-block. */
1736 966050 : uint64_t memory_request
1737 966050 : = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
1738 966050 : if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
1739 : {
1740 0 : warning (OPT_Wdisabled_optimization,
1741 : "ext-dce disabled: %d basic blocks and %d registers; "
1742 : "increase %<--param max-gcse-memory%> above %wu",
1743 0 : n_basic_blocks_for_fn (cfun), max_reg_num (),
1744 : memory_request / 1024);
1745 0 : return;
1746 : }
1747 :
1748 : /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
1749 : to this pass. Clear it for those cases. */
1750 966050 : maybe_clear_subreg_promoted_p ();
1751 966050 : df_analyze ();
1752 966050 : ext_dce_init ();
1753 :
1754 3864200 : do
1755 : {
1756 1932100 : df_simple_dataflow (DF_BACKWARD, NULL, NULL,
1757 : ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
1758 : all_blocks, df_get_postorder (DF_BACKWARD),
1759 : df_get_n_blocks (DF_BACKWARD));
1760 1932100 : modify = !modify;
1761 : }
1762 : while (modify);
1763 :
1764 966050 : reset_subreg_promoted_p ();
1765 :
1766 966050 : ext_dce_finish ();
1767 : }
1768 :
1769 :
1770 : namespace {
1771 :
1772 : const pass_data pass_data_ext_dce =
1773 : {
1774 : RTL_PASS, /* type */
1775 : "ext_dce", /* name */
1776 : OPTGROUP_NONE, /* optinfo_flags */
1777 : TV_EXT_DCE, /* tv_id */
1778 : PROP_cfglayout, /* properties_required */
1779 : 0, /* properties_provided */
1780 : 0, /* properties_destroyed */
1781 : 0, /* todo_flags_start */
1782 : TODO_df_finish, /* todo_flags_finish */
1783 : };
1784 :
1785 : class pass_ext_dce : public rtl_opt_pass
1786 : {
1787 : public:
1788 288767 : pass_ext_dce (gcc::context *ctxt)
1789 577534 : : rtl_opt_pass (pass_data_ext_dce, ctxt)
1790 : {}
1791 :
1792 : /* opt_pass methods: */
1793 1481491 : virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
1794 966050 : virtual unsigned int execute (function *)
1795 : {
1796 966050 : ext_dce_execute ();
1797 966050 : return 0;
1798 : }
1799 :
1800 : }; // class pass_combine
1801 :
1802 : } // anon namespace
1803 :
1804 : rtl_opt_pass *
1805 288767 : make_pass_ext_dce (gcc::context *ctxt)
1806 : {
1807 288767 : return new pass_ext_dce (ctxt);
1808 : }
|