Line data Source code
1 : /* RTL dead zero/sign extension (code) elimination.
2 : Copyright (C) 2000-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "memmodel.h"
27 : #include "insn-config.h"
28 : #include "emit-rtl.h"
29 : #include "expr.h"
30 : #include "recog.h"
31 : #include "cfganal.h"
32 : #include "tree-pass.h"
33 : #include "cfgrtl.h"
34 : #include "rtl-iter.h"
35 : #include "df.h"
36 : #include "print-rtl.h"
37 : #include "dbgcnt.h"
38 : #include "diagnostic-core.h"
39 : #include "target.h"
40 :
41 : /* These should probably move into a C++ class. */
42 : static vec<bitmap_head> livein;
43 : static bitmap all_blocks;
44 : static bitmap livenow;
45 : static bitmap changed_pseudos;
46 : static bool modify;
47 :
48 : /* We consider four bit groups for liveness:
49 : bit 0..7 (least significant byte)
50 : bit 8..15 (second least significant byte)
51 : bit 16..31
52 : bit 32..BITS_PER_WORD-1 */
53 :
54 : /* For the given REG, return the number of bit groups implied by the
55 : size of the REG's mode, up to a maximum of 4 (number of bit groups
56 : tracked by this pass).
57 :
58 : For partial integer and variable sized modes also return 4. This
59 : could possibly be refined for something like PSI mode, but it
60 : does not seem worth the effort. */
61 :
62 : static int
63 236158975 : group_limit (const_rtx reg)
64 : {
65 236158975 : machine_mode mode = GET_MODE (reg);
66 :
67 236158975 : if (!GET_MODE_BITSIZE (mode).is_constant ())
68 : return 4;
69 :
70 236158975 : int size = GET_MODE_SIZE (mode).to_constant ();
71 :
72 236158975 : size = exact_log2 (size);
73 :
74 236069420 : if (size < 0)
75 : return 4;
76 :
77 236069420 : size++;
78 236069420 : return (size > 4 ? 4 : size);
79 : }
80 :
81 : /* Make all bit groups live for REGNO in bitmap BMAP. For hard regs,
82 : we assume all groups are live. For a pseudo we consider the size
83 : of the pseudo to avoid creating unnecessarily live chunks of data. */
84 :
85 : static void
86 4691535 : make_reg_live (bitmap bmap, int regno)
87 : {
88 4691535 : int limit;
89 :
90 : /* For pseudos we can use the mode to limit how many bit groups
91 : are marked as live since a pseudo only has one mode. Hard
92 : registers have to be handled more conservatively. */
93 4691535 : if (regno > FIRST_PSEUDO_REGISTER)
94 : {
95 877789 : rtx reg = regno_reg_rtx[regno];
96 877789 : limit = group_limit (reg);
97 : }
98 : else
99 : limit = 4;
100 :
101 23126587 : for (int i = 0; i < limit; i++)
102 18435052 : bitmap_set_bit (bmap, regno * 4 + i);
103 4691535 : }
104 :
105 : /* Note this pass could be used to narrow memory loads too. It's
106 : not clear if that's profitable or not in general. */
107 :
108 : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
109 :
110 : /* If we know the destination of CODE only uses some low bits
111 : (say just the QI bits of an SI operation), then return true
112 : if we can propagate the need for just the subset of bits
113 : from the destination to the sources.
114 :
115 : FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
116 : operand 0. Thus is likely would need some special casing to handle. */
117 :
118 : static bool
119 142514375 : safe_for_live_propagation (rtx_code code)
120 : {
121 : /* First handle rtx classes which as a whole are known to
122 : be either safe or unsafe. */
123 142514375 : switch (GET_RTX_CLASS (code))
124 : {
125 : case RTX_OBJ:
126 : case RTX_CONST_OBJ:
127 : return true;
128 :
129 : case RTX_COMPARE:
130 : case RTX_COMM_COMPARE:
131 : case RTX_TERNARY:
132 : return false;
133 :
134 73964051 : default:
135 73964051 : break;
136 : }
137 :
138 : /* What's left are specific codes. We only need to identify those
139 : which are safe. */
140 73964051 : switch (code)
141 : {
142 : /* These are trivially safe. */
143 : case SUBREG:
144 : case NOT:
145 : case ZERO_EXTEND:
146 : case SIGN_EXTEND:
147 : case TRUNCATE:
148 : case PLUS:
149 : case MINUS:
150 : case MULT:
151 : case SMUL_HIGHPART:
152 : case UMUL_HIGHPART:
153 : case AND:
154 : case IOR:
155 : case XOR:
156 : return true;
157 :
158 : /* We can propagate for the shifted operand, but not the shift
159 : count. The count is handled specially. */
160 : case ASHIFT:
161 : case LSHIFTRT:
162 : case ASHIFTRT:
163 : case SS_ASHIFT:
164 : case US_ASHIFT:
165 : return true;
166 :
167 : /* There may be other safe codes. If so they can be added
168 : individually when discovered. */
169 : default:
170 : return false;
171 : }
172 : }
173 :
174 : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
175 : set/clobbered by OBJ contained in INSN.
176 :
177 : Conceptually it is always safe to ignore a particular destination
178 : here as that will result in more chunks of data being considered
179 : live. That's what happens when we "continue" the main loop when
180 : we see something we don't know how to handle such as a vector
181 : mode destination.
182 :
183 : The more accurate we are in identifying what objects (and chunks
184 : within an object) are set by INSN, the more aggressive the
185 : optimization phase during use handling will be. */
186 :
187 : static bool
188 138134916 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
189 : {
190 138134916 : bool skipped_dest = false;
191 :
192 138134916 : subrtx_iterator::array_type array;
193 396884616 : FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
194 : {
195 258749700 : const_rtx x = *iter;
196 :
197 : /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
198 258749700 : if (x == NULL_RTX)
199 9475204 : continue;
200 :
201 249274496 : if (UNSPEC_P (x))
202 571969 : continue;
203 :
204 248702527 : if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
205 : {
206 143909654 : unsigned bit = 0;
207 143909654 : x = SET_DEST (x);
208 :
209 : /* We don't support vector destinations or destinations
210 : wider than DImode. */
211 143909654 : scalar_mode outer_mode;
212 147818840 : if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
213 90977576 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
214 : {
215 : /* Skip the subrtxs of this destination. There is
216 : little value in iterating into the subobjects, so
217 : just skip them for a bit of efficiency. */
218 56841264 : skipped_dest = true;
219 56841264 : iter.skip_subrtxes ();
220 315590964 : continue;
221 : }
222 :
223 : /* We could have (strict_low_part (subreg ...)). We can not just
224 : strip the STRICT_LOW_PART as that would result in clearing
225 : some bits in LIVENOW that are still live. So process the
226 : STRICT_LOW_PART specially. */
227 87068390 : if (GET_CODE (x) == STRICT_LOW_PART)
228 : {
229 0 : x = XEXP (x, 0);
230 :
231 : /* The only valid operand of a STRICT_LOW_PART is a non
232 : paradoxical SUBREG. */
233 0 : gcc_assert (SUBREG_P (x)
234 : && !paradoxical_subreg_p (x)
235 : && SUBREG_BYTE (x).is_constant ());
236 :
237 : /* I think we should always see a REG here. But let's
238 : be sure. */
239 0 : gcc_assert (REG_P (SUBREG_REG (x)));
240 :
241 : /* The inner mode might be larger, just punt for
242 : that case. Remember, we can not just continue to process
243 : the inner RTXs due to the STRICT_LOW_PART. */
244 0 : if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
245 0 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
246 : {
247 : /* Skip the subrtxs of the STRICT_LOW_PART. We can't
248 : process them because it'll set objects as no longer
249 : live when they are in fact still live. */
250 0 : skipped_dest = true;
251 0 : iter.skip_subrtxes ();
252 0 : continue;
253 : }
254 :
255 : /* LIVE_TMP contains the set groups that are live-out and set in
256 : this insn. It is used to narrow the groups live-in for the
257 : inputs of this insn.
258 :
259 : The simple thing to do is mark all the groups as live, but
260 : that will significantly inhibit optimization.
261 :
262 : We also need to be careful in the case where we have an in-out
263 : operand. If we're not careful we'd clear LIVE_TMP
264 : incorrectly. */
265 0 : HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
266 0 : int limit = group_limit (SUBREG_REG (x));
267 0 : for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
268 0 : if (bitmap_bit_p (livenow, i))
269 0 : bitmap_set_bit (live_tmp, i);
270 :
271 0 : if (bitmap_empty_p (live_tmp))
272 0 : make_reg_live (live_tmp, rn);
273 :
274 : /* The mode of the SUBREG tells us how many bits we can
275 : clear. */
276 0 : machine_mode mode = GET_MODE (x);
277 0 : HOST_WIDE_INT size
278 0 : = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
279 0 : bitmap_clear_range (livenow, 4 * rn, size);
280 :
281 : /* We have fully processed this destination. */
282 0 : iter.skip_subrtxes ();
283 0 : continue;
284 0 : }
285 :
286 : /* Phase one of destination handling. First remove any wrapper
287 : such as SUBREG or ZERO_EXTRACT. */
288 87068390 : unsigned HOST_WIDE_INT mask
289 87068390 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
290 87068390 : if (SUBREG_P (x))
291 : {
292 : /* If we have a SUBREG destination that is too wide, just
293 : skip the destination rather than continuing this iterator.
294 : While continuing would be better, we'd need to strip the
295 : subreg and restart within the SET processing rather than
296 : the top of the loop which just complicates the flow even
297 : more. */
298 648649 : if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
299 538707 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
300 : {
301 109942 : skipped_dest = true;
302 109942 : iter.skip_subrtxes ();
303 109942 : continue;
304 : }
305 :
306 : /* We can safely strip a paradoxical subreg. The inner mode will
307 : be narrower than the outer mode. We'll clear fewer bits in
308 : LIVENOW than we'd like, but that's always safe. */
309 429201 : if (paradoxical_subreg_p (x))
310 : x = XEXP (x, 0);
311 419351 : else if (SUBREG_BYTE (x).is_constant ())
312 : {
313 419351 : bit = subreg_lsb (x).to_constant ();
314 419351 : mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
315 419351 : gcc_assert (mask);
316 : x = SUBREG_REG (x);
317 : }
318 : else
319 : gcc_unreachable ();
320 : }
321 :
322 86958448 : if (GET_CODE (x) == ZERO_EXTRACT)
323 : {
324 : /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
325 : modifies the bits referenced in the ZERO_EXTRACT, the rest
326 : remain the same. Thus we can not continue here, we must
327 : either figure out what part of the destination is modified
328 : or skip the sub-rtxs. */
329 3503 : skipped_dest = true;
330 3503 : iter.skip_subrtxes ();
331 3503 : continue;
332 : }
333 :
334 : /* BIT >= 64 indicates something went horribly wrong. */
335 86954945 : gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
336 :
337 : /* Now handle the actual object that was changed. */
338 86954945 : if (REG_P (x))
339 : {
340 : /* LIVE_TMP contains the set groups that are live-out and set in
341 : this insn. It is used to narrow the groups live-in for the
342 : inputs of this insn.
343 :
344 : The simple thing to do is mark all the groups as live, but
345 : that will significantly inhibit optimization.
346 :
347 : We also need to be careful in the case where we have an in-out
348 : operand. If we're not careful we'd clear LIVE_TMP
349 : incorrectly. */
350 72977998 : HOST_WIDE_INT rn = REGNO (x);
351 72977998 : int limit = group_limit (x);
352 326216644 : for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
353 253238646 : if (bitmap_bit_p (livenow, i))
354 246153719 : bitmap_set_bit (live_tmp, i);
355 :
356 72977998 : if (bitmap_empty_p (live_tmp))
357 1220531 : make_reg_live (live_tmp, rn);
358 :
359 : /* Now clear the bits known written by this instruction.
360 : Note that BIT need not be a power of two, consider a
361 : ZERO_EXTRACT destination. */
362 72977998 : int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
363 77835163 : int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
364 28142908 : : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
365 5659421 : : (mask & 0xff00) ? 2 : 1);
366 72977998 : bitmap_clear_range (livenow, 4 * rn + start, end - start);
367 : }
368 : /* Some ports generate (clobber (const_int)). */
369 13976947 : else if (CONST_INT_P (x))
370 0 : continue;
371 : else
372 13976947 : gcc_assert (CALL_P (insn)
373 : || MEM_P (x)
374 : || x == pc_rtx
375 : || GET_CODE (x) == SCRATCH);
376 :
377 86954945 : iter.skip_subrtxes ();
378 86954945 : }
379 104792873 : else if (GET_CODE (x) == COND_EXEC)
380 : {
381 : /* This isn't ideal, but may not be so bad in practice. */
382 0 : skipped_dest = true;
383 0 : iter.skip_subrtxes ();
384 : }
385 : }
386 138134916 : return skipped_dest;
387 138134916 : }
388 :
389 : /* INSN is a right shift and the second insn in a shift pair that is a
390 : sign or zero extension (SET is the single set associated with INSN).
391 :
392 : Replace the source of SET with NEW_SRC which is a source register
393 : from NEW_SRC_INSN (the left shift in the pair). This is effectively
394 : the same as the replacement we do for ZERO/SIGN extends on targets
395 : that support those insns. */
396 : static void
397 0 : ext_dce_try_optimize_rshift (rtx_insn *insn, rtx set, rtx new_src, rtx_insn *new_src_insn)
398 : {
399 : /* If the modes are not the same or one is a hard register, then
400 : conservatively do nothing. */
401 0 : if (GET_MODE (SET_SRC (set)) != GET_MODE (new_src)
402 0 : || !REG_P (XEXP (SET_SRC (set), 0))
403 0 : || !REG_P (new_src)
404 0 : || REGNO (XEXP (SET_SRC (set), 0)) < FIRST_PSEUDO_REGISTER
405 0 : || REGNO (new_src) < FIRST_PSEUDO_REGISTER)
406 : return;
407 :
408 0 : if (dump_file)
409 : {
410 0 : fprintf (dump_file, "Processing insn:\n");
411 0 : dump_insn_slim (dump_file, insn);
412 0 : fprintf (dump_file, "Trying to simplify pattern:\n");
413 0 : print_rtl_single (dump_file, SET_SRC (set));
414 : }
415 :
416 : /* We decided to turn do the optimization but allow it to be rejected for
417 : bisection purposes. */
418 0 : if (!dbg_cnt (::ext_dce))
419 : {
420 0 : if (dump_file)
421 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
422 0 : return;
423 : }
424 :
425 : /* We're going to generate a fresh insn for the move, so put it
426 : into a sequence that we can emit after the current insn. */
427 0 : start_sequence ();
428 0 : emit_move_insn (SET_DEST (set), new_src);
429 0 : rtx_insn *seq = end_sequence ();
430 0 : emit_insn_after (seq, insn);
431 :
432 : /* Mark the destination as changed. */
433 0 : rtx x = SET_DEST (set);
434 0 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
435 0 : x = XEXP (x, 0);
436 0 : gcc_assert (REG_P (x));
437 0 : bitmap_set_bit (changed_pseudos, REGNO (x));
438 :
439 0 : if (dump_file)
440 : {
441 0 : fprintf (dump_file, "Successfully transformed to:\n");
442 0 : print_rtl_single (dump_file, PATTERN (seq));
443 0 : fprintf (dump_file, "\n");
444 : }
445 :
446 0 : delete_insn (insn);
447 :
448 : /* If NEW_SRC died in its prior location, then we need to remove the
449 : death note and move it to the new location. */
450 0 : rtx note = find_regno_note (new_src_insn, REG_DEAD, REGNO (new_src));
451 0 : if (note)
452 : {
453 0 : remove_note (new_src_insn, note);
454 0 : add_reg_note (insn, REG_DEAD, new_src);
455 : }
456 : }
457 :
458 :
459 : /* INSN has a sign/zero extended source inside SET that we will
460 : try to turn into a SUBREG. If NEW_SRC is non-null, use that
461 : for the new source of INSN's set. That scenario only happens
462 : when we're optimizing a shift pair. */
463 : static void
464 4694 : ext_dce_try_optimize_extension (rtx_insn *insn, rtx set)
465 : {
466 4694 : rtx src = SET_SRC (set);
467 4694 : rtx inner = XEXP (src, 0);
468 :
469 : /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
470 : not useful for this optimization. */
471 4694 : if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
472 : return;
473 :
474 2248 : rtx new_pattern;
475 2248 : if (dump_file)
476 : {
477 0 : fprintf (dump_file, "Processing insn:\n");
478 0 : dump_insn_slim (dump_file, insn);
479 0 : fprintf (dump_file, "Trying to simplify pattern:\n");
480 0 : print_rtl_single (dump_file, SET_SRC (set));
481 : }
482 :
483 : /* We decided to turn do the optimization but allow it to be rejected for
484 : bisection purposes. */
485 2248 : if (!dbg_cnt (::ext_dce))
486 : {
487 0 : if (dump_file)
488 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
489 0 : return;
490 : }
491 :
492 4496 : new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
493 2248 : GET_MODE (inner), 0);
494 : /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
495 : We must not pass that as a replacement pattern to validate_change. */
496 2248 : if (new_pattern)
497 : {
498 2248 : int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
499 :
500 2248 : rtx x = SET_DEST (set);
501 2248 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
502 0 : x = XEXP (x, 0);
503 :
504 2248 : gcc_assert (REG_P (x));
505 2248 : if (ok)
506 2248 : bitmap_set_bit (changed_pseudos, REGNO (x));
507 :
508 2248 : if (dump_file)
509 : {
510 0 : if (ok)
511 0 : fprintf (dump_file, "Successfully transformed to:\n");
512 : else
513 0 : fprintf (dump_file, "Failed transformation to:\n");
514 :
515 0 : print_rtl_single (dump_file, new_pattern);
516 0 : fprintf (dump_file, "\n");
517 : }
518 :
519 : /* INSN may have a REG_EQUAL note indicating that the value was
520 : sign or zero extended. That note is no longer valid since we've
521 : just removed the extension. Just wipe the notes. */
522 2248 : remove_reg_equal_equiv_notes (insn, false);
523 : }
524 : else
525 : {
526 0 : if (dump_file)
527 0 : fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
528 : }
529 : }
530 :
531 : /* Some operators imply that their second operand is fully live,
532 : regardless of how many bits in the output are live. An example
533 : would be the shift count on a target without SHIFT_COUNT_TRUNCATED
534 : defined.
535 :
536 : Return TRUE if CODE is such an operator. FALSE otherwise. */
537 :
538 : static bool
539 76898622 : binop_implies_op2_fully_live (rtx_code code)
540 : {
541 0 : switch (code)
542 : {
543 : case ASHIFT:
544 : case LSHIFTRT:
545 : case ASHIFTRT:
546 : case ROTATE:
547 : case ROTATERT:
548 : case SS_ASHIFT:
549 : case US_ASHIFT:
550 : return !SHIFT_COUNT_TRUNCATED;
551 :
552 0 : default:
553 0 : return false;
554 : }
555 : }
556 :
557 : /* X, with code CODE, is an operation for which safe_for_live_propagation
558 : holds true, and bits set in MASK are live in the result. Compute a
559 : mask of (potentially) live bits in the non-constant inputs. In case of
560 : binop_implies_op2_fully_live (e.g. shifts), the computed mask may
561 : exclusively pertain to the first operand.
562 :
563 : This looks wrong as we may have some important operations embedded as
564 : operands of another operation. For example, we might have an extension
565 : wrapping a shift. It really feels like this needs to be recursing down
566 : into operands much more often. */
567 :
568 : unsigned HOST_WIDE_INT
569 71759026 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
570 : {
571 73501529 : if (mask == 0)
572 : return 0;
573 :
574 73501503 : enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
575 73501503 : unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
576 :
577 : /* While we don't try to optimize operations on types larger
578 : than 64 bits, we do want to make sure not to invoke undefined
579 : behavior when presented with such operations during use
580 : processing. The safe thing to do is to just return mmask
581 : for that scenario indicating every possible chunk is life. */
582 73501503 : scalar_int_mode smode;
583 73501503 : if (!is_a <scalar_int_mode> (mode, &smode)
584 61261654 : || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
585 : return mmask;
586 :
587 59153359 : switch (code)
588 : {
589 16547587 : case PLUS:
590 16547587 : case MINUS:
591 16547587 : case MULT:
592 16547587 : return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
593 :
594 : /* We propagate for the shifted operand, but not the shift
595 : count. The count is handled specially. */
596 1409087 : case ASHIFT:
597 1409087 : if (CONST_INT_P (XEXP (x, 1))
598 2748189 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
599 1339066 : return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
600 70021 : return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
601 :
602 : /* We propagate for the shifted operand, but not the shift
603 : count. The count is handled specially. */
604 657985 : case LSHIFTRT:
605 657985 : if (CONST_INT_P (XEXP (x, 1))
606 1283584 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
607 625571 : return mmask & (mask << INTVAL (XEXP (x, 1)));
608 : return mmask;
609 :
610 : /* We propagate for the shifted operand, but not the shift
611 : count. The count is handled specially. */
612 292912 : case ASHIFTRT:
613 292912 : if (CONST_INT_P (XEXP (x, 1))
614 574036 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
615 : {
616 281116 : HOST_WIDE_INT sign = 0;
617 281116 : if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
618 281116 : > GET_MODE_BITSIZE (smode))
619 562232 : sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
620 281116 : return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
621 : }
622 : return mmask;
623 :
624 38878 : case SMUL_HIGHPART:
625 38878 : case UMUL_HIGHPART:
626 38878 : if (XEXP (x, 1) == const0_rtx)
627 : return 0;
628 38878 : if (XEXP (x, 1) == const1_rtx)
629 : return mmask;
630 38878 : if (CONST_INT_P (XEXP (x, 1)))
631 : {
632 0 : if (pow2p_hwi (INTVAL (XEXP (x, 1))))
633 0 : return mmask & (mask << (GET_MODE_BITSIZE (smode)
634 0 : - exact_log2 (INTVAL (XEXP (x, 1)))));
635 :
636 0 : int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
637 0 : - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
638 0 : if (bits < GET_MODE_BITSIZE (smode))
639 0 : return (HOST_WIDE_INT_1U << bits) - 1;
640 : }
641 : return mmask;
642 :
643 652566 : case SIGN_EXTEND:
644 652566 : if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
645 652566 : || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
646 : return -1;
647 :
648 : /* We want the mode of the inner object. We need to ensure its
649 : sign bit is on in MASK. */
650 652566 : mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
651 652566 : if (mask & ~GET_MODE_MASK (mode))
652 652231 : mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
653 652231 : - 1);
654 :
655 : /* Recurse into the operand. */
656 652566 : return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
657 :
658 1089937 : case ZERO_EXTEND:
659 1089937 : if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
660 1089937 : || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
661 : return -1;
662 :
663 : /* Recurse into the operand. */
664 1089937 : return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
665 :
666 : /* We propagate for the shifted operand, but not the shift
667 : count. The count is handled specially. */
668 0 : case SS_ASHIFT:
669 0 : case US_ASHIFT:
670 0 : if (CONST_INT_P (XEXP (x, 1))
671 0 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
672 : {
673 0 : return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
674 0 : >> (INTVAL (XEXP (x, 1))
675 0 : + (XEXP (x, 1) != const0_rtx
676 0 : && code == SS_ASHIFT))))
677 0 : | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
678 : }
679 : return mmask;
680 :
681 : default:
682 : return mask;
683 : }
684 : }
685 :
686 : /* Process uses in INSN contained in OBJ. Set appropriate bits in LIVENOW
687 : for any chunks of pseudos that become live, potentially filtering using
688 : bits from LIVE_TMP.
689 :
690 : If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
691 : the extended bits are never read and mark pseudos which had extensions
692 : eliminated in CHANGED_PSEUDOS. */
693 :
694 : static void
695 138134916 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
696 : bitmap live_tmp, bool skipped_dest)
697 : {
698 138134916 : subrtx_var_iterator::array_type array_var;
699 759051725 : FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
700 : {
701 : /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
702 620916809 : rtx x = *iter;
703 620916809 : if (x == NULL_RTX)
704 9475204 : continue;
705 :
706 : /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
707 : handle SETs explicitly, possibly propagating live information
708 : into the uses.
709 :
710 : We may continue the loop at various points which will cause
711 : iteration into the next level of RTL. Breaking from the loop
712 : is never safe as it can lead us to fail to process some of the
713 : RTL and thus not make objects live when necessary. */
714 611441605 : enum rtx_code xcode = GET_CODE (x);
715 611441605 : if (xcode == SET)
716 : {
717 122235635 : const_rtx dst = SET_DEST (x);
718 122235635 : rtx src = SET_SRC (x);
719 122235635 : const_rtx y;
720 122235635 : unsigned HOST_WIDE_INT bit = 0;
721 :
722 : /* The code of the RHS of a SET. */
723 122235635 : enum rtx_code code = GET_CODE (src);
724 :
725 : /* ?!? How much of this should mirror SET handling, potentially
726 : being shared? */
727 122235635 : if (SUBREG_P (dst) && subreg_lsb (dst).is_constant (&bit))
728 : {
729 582925 : if (bit >= HOST_BITS_PER_WIDE_INT)
730 : bit = HOST_BITS_PER_WIDE_INT - 1;
731 582925 : dst = SUBREG_REG (dst);
732 : }
733 121652710 : else if (GET_CODE (dst) == STRICT_LOW_PART)
734 13271 : dst = XEXP (dst, 0);
735 :
736 : /* Main processing of the uses. Two major goals here.
737 :
738 : First, we want to try and propagate liveness (or the lack
739 : thereof) from the destination register to the source
740 : register(s).
741 :
742 : Second, if the source is an extension, try to optimize
743 : it into a SUBREG. The SUBREG form indicates we don't
744 : care about the upper bits and will usually be copy
745 : propagated away.
746 :
747 : If we fail to handle something in here, the expectation
748 : is the iterator will dive into the sub-components and
749 : mark all the chunks in any found REGs as live. */
750 122235635 : if (REG_P (dst) && safe_for_live_propagation (code))
751 : {
752 : /* Create a mask representing the bits of this output
753 : operand that are live after this insn. We can use
754 : this information to refine the live in state of
755 : inputs to this insn in many cases.
756 :
757 : We have to do this on a per SET basis, we might have
758 : an INSN with multiple SETS, some of which can narrow
759 : the source operand liveness, some of which may not. */
760 71759026 : unsigned HOST_WIDE_INT dst_mask = 0;
761 71759026 : HOST_WIDE_INT rn = REGNO (dst);
762 71759026 : unsigned HOST_WIDE_INT mask_array[]
763 : = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
764 : -HOST_WIDE_INT_UC (0x100000000) };
765 358795130 : for (int i = 0; i < 4; i++)
766 287036104 : if (bitmap_bit_p (live_tmp, 4 * rn + i))
767 230138918 : dst_mask |= mask_array[i];
768 71759026 : dst_mask >>= bit;
769 :
770 : /* If we ignored a destination during set processing, then
771 : consider all the bits live. */
772 71759026 : if (skipped_dest)
773 25285852 : dst_mask = -1;
774 :
775 71759026 : dst_mask = carry_backpropagate (dst_mask, code, src);
776 :
777 : /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
778 : of the source specially to improve optimization. */
779 71759026 : if (code == SIGN_EXTEND || code == ZERO_EXTEND)
780 : {
781 1754356 : rtx inner = XEXP (src, 0);
782 1754356 : unsigned HOST_WIDE_INT src_mask
783 1754356 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
784 :
785 : /* DST_MASK could be zero if we had something in the SET
786 : that we couldn't handle. */
787 1754356 : if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
788 4694 : ext_dce_try_optimize_extension (insn, x);
789 :
790 : /* Stripping the extension here just seems wrong on multiple
791 : levels. It's source side handling, so it seems like it
792 : belongs in the loop below. Stripping here also makes it
793 : harder than necessary to properly handle live bit groups
794 : for (ANY_EXTEND (SUBREG)) where the SUBREG has
795 : SUBREG_PROMOTED state. */
796 1754356 : dst_mask &= src_mask;
797 1754356 : src = XEXP (src, 0);
798 1754356 : code = GET_CODE (src);
799 : }
800 :
801 : /* Special case for (sub)targets that do not have extension
802 : insns (and thus use shifts). We want to detect when we have
803 : a shift pair and treat the pair as-if was an extension.
804 :
805 : Key on the right shift and use (for now) simplistic tests
806 : to find the corresponding left shift. */
807 71759026 : scalar_mode outer_mode;
808 71759026 : if ((code == LSHIFTRT || code == ASHIFTRT)
809 1047172 : && CONST_INT_P (XEXP (src, 1))
810 1155681 : && (INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 8
811 1151162 : || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 16
812 989014 : || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 32)
813 71826313 : && is_a <scalar_mode> (GET_MODE (src), &outer_mode)
814 71826313 : && GET_MODE_BITSIZE (outer_mode) <= HOST_BITS_PER_WIDE_INT)
815 : {
816 : /* So we have a right shift that could correspond to
817 : the second in a pair impementing QI, HI or SI -> DI
818 : extension. See if we can find the left shift. For
819 : now, just look one real instruction back. */
820 67143 : rtx_insn *prev_insn = prev_nonnote_nondebug_insn_bb (insn);
821 :
822 : /* The previous insn must be a left shift by the same
823 : amount. */
824 67143 : rtx prev_set;
825 67143 : if (prev_insn
826 64319 : && (prev_set = single_set (prev_insn))
827 : /* The destination of the left shift must be the
828 : source of the right shift. */
829 64220 : && SET_DEST (prev_set) == XEXP (src, 0)
830 32248 : && GET_CODE (SET_SRC (prev_set)) == ASHIFT
831 811 : && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
832 : /* The counts must match. */
833 67143 : && (INTVAL (XEXP (src, 1))
834 795 : == INTVAL (XEXP (SET_SRC (prev_set), 1))))
835 : {
836 15 : unsigned HOST_WIDE_INT src_mask = GET_MODE_BITSIZE (GET_MODE (src)).to_constant ();
837 15 : src_mask -= INTVAL (XEXP (src, 1));
838 15 : src_mask = (HOST_WIDE_INT_1U << src_mask) - 1;
839 :
840 : /* DST_MASK has been adjusted for INSN. We need its original value. */
841 15 : unsigned HOST_WIDE_INT tmp_mask = 0;
842 75 : for (int i = 0; i < 4; i++)
843 60 : if (bitmap_bit_p (live_tmp, 4 * rn + i))
844 15 : tmp_mask |= mask_array[i];
845 15 : tmp_mask >>= bit;
846 :
847 15 : if (modify && !skipped_dest && (tmp_mask & ~src_mask) == 0)
848 : {
849 0 : ext_dce_try_optimize_rshift (insn, x, XEXP (SET_SRC (prev_set), 0), prev_insn);
850 :
851 : /* These may not strictly be necessary, but we might as well try and be
852 : as accurate as possible. The RHS is now a simple REG. */
853 0 : dst_mask = src_mask;
854 0 : src = XEXP (SET_SRC (prev_set), 0);
855 0 : code = GET_CODE (src);
856 : }
857 : }
858 : }
859 :
860 : /* Optimization is done at this point. We just want to make
861 : sure everything that should get marked as live is marked
862 : from here onward. */
863 :
864 : /* We will handle the other operand of a binary operator
865 : at the bottom of the loop by resetting Y. */
866 71759026 : if (BINARY_P (src))
867 22564860 : y = XEXP (src, 0);
868 : else
869 : y = src;
870 :
871 : /* We're inside a SET and want to process the source operands
872 : making things live. Breaking from this loop will cause
873 : the iterator to work on sub-rtxs, so it is safe to break
874 : if we see something we don't know how to handle.
875 :
876 : This code is just hokey as it really just handles trivial
877 : unary and binary cases. Otherwise the loop exits and we
878 : continue iterating on sub-rtxs, but outside the set context. */
879 : unsigned HOST_WIDE_INT save_mask = dst_mask;
880 115913692 : for (;;)
881 : {
882 : /* In general we want to restore DST_MASK before each loop
883 : iteration. The exception is when the opcode implies that
884 : the other operand is fully live. That's handled by
885 : changing SAVE_MASK below. */
886 93836359 : dst_mask = save_mask;
887 : /* Strip an outer paradoxical subreg. The bits outside
888 : the inner mode are don't cares. So we can just strip
889 : and process the inner object. */
890 93836359 : if (paradoxical_subreg_p (y))
891 : y = XEXP (y, 0);
892 93734740 : else if (SUBREG_P (y) && subreg_lsb (y).is_constant (&bit))
893 : {
894 : /* If !TRULY_NOOP_TRUNCATION_MODES_P, the mode
895 : change performed by Y would normally need to be a
896 : TRUNCATE rather than a SUBREG. It is probably the
897 : guarantee provided by SUBREG_PROMOTED_VAR_P that
898 : allows the SUBREG in Y as an exception. We must
899 : therefore preserve that guarantee and treat the
900 : upper bits of the inner register as live
901 : regardless of the outer code. See PR 120050. */
902 1925257 : if (!REG_P (SUBREG_REG (y))
903 1925257 : || (SUBREG_PROMOTED_VAR_P (y)
904 13482 : && (!TRULY_NOOP_TRUNCATION_MODES_P (
905 : GET_MODE (y),
906 : GET_MODE (SUBREG_REG (y))))))
907 : break;
908 :
909 : /* If this is a wide object (more bits than we can fit
910 : in a HOST_WIDE_INT), then just break from the SET
911 : context. That will cause the iterator to walk down
912 : into the subrtx and if we land on a REG we'll mark
913 : the whole think live. */
914 1924285 : if (bit >= HOST_BITS_PER_WIDE_INT)
915 : break;
916 :
917 : /* The SUBREG's mode determines the live width. */
918 1714487 : if (dst_mask)
919 : {
920 1714487 : dst_mask <<= bit;
921 1714487 : if (!dst_mask)
922 0 : dst_mask = -HOST_WIDE_INT_UC (0x100000000);
923 : }
924 1714487 : y = SUBREG_REG (y);
925 : }
926 :
927 93625589 : if (REG_P (y))
928 : {
929 : /* We have found the use of a register. We need to mark
930 : the appropriate chunks of the register live. The mode
931 : of the REG is a starting point. We may refine that
932 : based on what chunks in the output were live. */
933 49788925 : rn = 4 * REGNO (y);
934 49788925 : unsigned HOST_WIDE_INT tmp_mask = dst_mask;
935 :
936 : /* If the RTX code for the SET_SRC is not one we can
937 : propagate destination liveness through, then just
938 : set the mask to the mode's mask. */
939 49788925 : if (!safe_for_live_propagation (code))
940 32834 : tmp_mask
941 65668 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
942 :
943 49788925 : if (tmp_mask & 0xff)
944 49370983 : bitmap_set_bit (livenow, rn);
945 49788925 : if (tmp_mask & 0xff00)
946 47936192 : bitmap_set_bit (livenow, rn + 1);
947 49788925 : if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
948 47687604 : bitmap_set_bit (livenow, rn + 2);
949 49788925 : if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
950 41184022 : bitmap_set_bit (livenow, rn + 3);
951 : }
952 43836664 : else if (!CONSTANT_P (y))
953 : break;
954 :
955 : /* We might have (ashift (const_int 1) (reg...))
956 : By setting dst_mask we can continue iterating on the
957 : the next operand and it will be considered fully live.
958 :
959 : Note that since we restore DST_MASK from SAVE_MASK at the
960 : top of the loop, we have to change SAVE_MASK to get the
961 : semantics we want. */
962 76898622 : if (binop_implies_op2_fully_live (GET_CODE (src)))
963 2468888 : save_mask = -1;
964 :
965 : /* If this was anything but a binary operand, break the inner
966 : loop. This is conservatively correct as it will cause the
967 : iterator to look at the sub-rtxs outside the SET context. */
968 76898622 : if (!BINARY_P (src))
969 : break;
970 :
971 : /* We processed the first operand of a binary operator. Now
972 : handle the second. */
973 22077333 : y = XEXP (src, 1), src = pc_rtx;
974 22077333 : }
975 :
976 : /* These are leaf nodes, no need to iterate down into them. */
977 71759026 : if (REG_P (y) || CONSTANT_P (y))
978 54821289 : iter.skip_subrtxes ();
979 : }
980 : }
981 : /* If we are reading the low part of a SUBREG, then we can
982 : refine liveness of the input register, otherwise let the
983 : iterator continue into SUBREG_REG. */
984 489205970 : else if (SUBREG_P (x)
985 1328249 : && REG_P (SUBREG_REG (x))
986 1326437 : && !paradoxical_subreg_p (x)
987 1302920 : && subreg_lowpart_p (x)
988 1020268 : && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
989 491246506 : && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
990 : {
991 508039 : HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
992 508039 : HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
993 :
994 : /* If this is a promoted subreg, then more of it may be live than
995 : is otherwise obvious. */
996 508039 : if (SUBREG_PROMOTED_VAR_P (x))
997 4296 : size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
998 :
999 508039 : bitmap_set_bit (livenow, rn);
1000 508039 : if (size > 8)
1001 331405 : bitmap_set_bit (livenow, rn + 1);
1002 331405 : if (size > 16)
1003 289398 : bitmap_set_bit (livenow, rn + 2);
1004 289398 : if (size >= 32)
1005 289398 : bitmap_set_bit (livenow, rn + 3);
1006 508039 : iter.skip_subrtxes ();
1007 : }
1008 : /* If we have a register reference that is not otherwise handled,
1009 : just assume all the chunks are live. */
1010 488697931 : else if (REG_P (x))
1011 162303188 : bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
1012 : }
1013 138134916 : }
1014 :
1015 : /* Process a single basic block BB with current liveness information
1016 : in LIVENOW, returning updated liveness information.
1017 :
1018 : If MODIFY is true, then this is the last pass and unnecessary
1019 : extensions should be eliminated when possible. If an extension
1020 : is removed, the source pseudo is marked in CHANGED_PSEUDOS. */
1021 :
1022 : static void
1023 23026832 : ext_dce_process_bb (basic_block bb)
1024 : {
1025 23026832 : rtx_insn *insn;
1026 :
1027 304087629 : FOR_BB_INSNS_REVERSE (bb, insn)
1028 : {
1029 433461882 : if (!NONDEBUG_INSN_P (insn))
1030 152401085 : continue;
1031 :
1032 : /* Live-out state of the destination of this insn. We can
1033 : use this to refine the live-in state of the sources of
1034 : this insn in many cases. */
1035 128659712 : bitmap live_tmp = BITMAP_ALLOC (NULL);
1036 :
1037 : /* First process any sets/clobbers in INSN. */
1038 128659712 : bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
1039 :
1040 : /* CALL_INSNs need processing their fusage data. */
1041 128659712 : if (CALL_P (insn))
1042 9475204 : skipped_dest |= ext_dce_process_sets (insn,
1043 : CALL_INSN_FUNCTION_USAGE (insn),
1044 : live_tmp);
1045 :
1046 : /* And now uses, optimizing away SIGN/ZERO extensions as we go. */
1047 128659712 : ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
1048 :
1049 : /* A nonlocal goto implicitly uses the frame pointer. */
1050 128659712 : if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
1051 : {
1052 1130 : bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
1053 1130 : if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
1054 1130 : bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
1055 : }
1056 :
1057 : /* And process fusage data for the use as well. */
1058 128659712 : if (CALL_P (insn))
1059 : {
1060 9475204 : if (!FAKE_CALL_P (insn))
1061 9475144 : bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
1062 :
1063 : /* If this is not a call to a const fucntion, then assume it
1064 : can read any global register. */
1065 9475204 : if (!RTL_CONST_CALL_P (insn))
1066 851058717 : for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1067 841907548 : if (global_regs[i])
1068 230 : bitmap_set_range (livenow, i * 4, 4);
1069 :
1070 9475204 : ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
1071 : }
1072 :
1073 128659712 : BITMAP_FREE (live_tmp);
1074 : }
1075 23026832 : }
1076 :
1077 : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
1078 : is usually helpful. However, in some cases setting the value when
1079 : it not strictly needed can cause this pass to miss optimizations.
1080 :
1081 : Specifically consider (set (mem) (subreg (reg))). If set in that
1082 : case it will cause more bit groups to be live for REG than would
1083 : be strictly necessary which in turn can inhibit extension removal.
1084 :
1085 : So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
1086 : is obviously not needed. */
1087 :
1088 : static void
1089 963981 : maybe_clear_subreg_promoted_p (void)
1090 : {
1091 120322219 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
1092 : {
1093 119358238 : if (!NONDEBUG_INSN_P (insn))
1094 64364377 : continue;
1095 :
1096 54993861 : rtx set = single_set (insn);
1097 54993861 : if (!set)
1098 3657489 : continue;
1099 :
1100 : /* There may be other cases where we should clear, but for
1101 : now, this is the only known case where it causes problems. */
1102 51336372 : if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
1103 69773 : && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
1104 60518 : SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
1105 : }
1106 963981 : }
1107 :
1108 : /* Walk the IL and build the transitive closure of all the REGs tied
1109 : together by copies where either the source or destination is
1110 : marked in CHANGED_PSEUDOS. */
1111 :
1112 : static void
1113 963981 : expand_changed_pseudos (void)
1114 : {
1115 : /* Build a vector of registers related by a copy. This is meant to
1116 : speed up the next step by avoiding full IL walks. */
1117 963981 : struct copy_pair { rtx first; rtx second; };
1118 963981 : auto_vec<copy_pair> pairs;
1119 120322219 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
1120 : {
1121 119358238 : if (!NONDEBUG_INSN_P (insn))
1122 64364377 : continue;
1123 :
1124 54993861 : rtx pat = PATTERN (insn);
1125 :
1126 : /* Simple copies to a REG from another REG or SUBREG of a REG. */
1127 54993861 : if (GET_CODE (pat) == SET
1128 43389105 : && REG_P (SET_DEST (pat))
1129 30876850 : && (REG_P (SET_SRC (pat))
1130 22368508 : || (SUBREG_P (SET_SRC (pat))
1131 374574 : && REG_P (SUBREG_REG (SET_SRC (pat))))))
1132 : {
1133 374137 : rtx src = (REG_P (SET_SRC (pat))
1134 8882479 : ? SET_SRC (pat)
1135 : : SUBREG_REG (SET_SRC (pat)));
1136 8882479 : pairs.safe_push ({ SET_DEST (pat), src });
1137 : }
1138 :
1139 : /* Simple copies to a REG from another REG or SUBREG of a REG
1140 : held inside a PARALLEL. */
1141 54993861 : if (GET_CODE (pat) == PARALLEL)
1142 : {
1143 25049722 : for (int i = XVECLEN (pat, 0) - 1; i >= 0; i--)
1144 : {
1145 16813423 : rtx elem = XVECEXP (pat, 0, i);
1146 :
1147 16813423 : if (GET_CODE (elem) == SET
1148 8451547 : && REG_P (SET_DEST (elem))
1149 8301403 : && (REG_P (SET_SRC (elem))
1150 8301403 : || (SUBREG_P (SET_SRC (elem))
1151 0 : && REG_P (SUBREG_REG (SET_SRC (elem))))))
1152 : {
1153 0 : rtx src = (REG_P (SET_SRC (elem))
1154 0 : ? SET_SRC (elem)
1155 : : SUBREG_REG (SET_SRC (elem)));
1156 0 : pairs.safe_push ({ SET_DEST (elem), src });
1157 : }
1158 : }
1159 8236299 : continue;
1160 8236299 : }
1161 : }
1162 :
1163 : /* Now we have a vector with copy pairs. Iterate over that list
1164 : updating CHANGED_PSEUDOS as we go. Eliminate copies from the
1165 : list as we go as they don't need further processing. */
1166 : bool changed = true;
1167 1928002 : while (changed)
1168 : {
1169 : changed = false;
1170 : unsigned int i;
1171 : copy_pair *p;
1172 10810836 : FOR_EACH_VEC_ELT (pairs, i, p)
1173 : {
1174 8882834 : if (bitmap_bit_p (changed_pseudos, REGNO (p->second))
1175 8882834 : && bitmap_set_bit (changed_pseudos, REGNO (p->first)))
1176 : {
1177 52 : pairs.unordered_remove (i);
1178 52 : changed = true;
1179 : }
1180 : }
1181 : }
1182 963981 : }
1183 :
1184 : /* We optimize away sign/zero extensions in this pass and replace
1185 : them with SUBREGs indicating certain bits are don't cares.
1186 :
1187 : This changes the SUBREG_PROMOTED_VAR_P state of the object.
1188 : It is fairly painful to fix this on the fly, so we have
1189 : recorded which pseudos are affected and we look for SUBREGs
1190 : of those pseudos and fix them up. */
1191 :
1192 : static void
1193 963981 : reset_subreg_promoted_p (void)
1194 : {
1195 : /* This pass eliminates zero/sign extensions on pseudo regs found
1196 : in CHANGED_PSEUDOS. Elimination of those extensions changes if
1197 : the pseudos are known to hold values extended to wider modes
1198 : via SUBREG_PROMOTED_VAR. So we wipe the SUBREG_PROMOTED_VAR
1199 : state on all affected pseudos.
1200 :
1201 : But that is insufficient. We might have a copy from one REG
1202 : to another (possibly with the source register wrapped with a
1203 : SUBREG). We need to wipe SUBREG_PROMOTED_VAR on the transitive
1204 : closure of the original CHANGED_PSEUDOS and registers they're
1205 : connected to via copies. So expand the set. */
1206 963981 : expand_changed_pseudos ();
1207 :
1208 : /* If we removed an extension, that changed the promoted state
1209 : of the destination of that extension. Thus we need to go
1210 : find any SUBREGs that reference that pseudo and adjust their
1211 : SUBREG_PROMOTED_P state. */
1212 120322219 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
1213 : {
1214 119358238 : if (!NONDEBUG_INSN_P (insn))
1215 64364377 : continue;
1216 :
1217 54993861 : rtx pat = PATTERN (insn);
1218 54993861 : subrtx_var_iterator::array_type array;
1219 351032729 : FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
1220 : {
1221 296038868 : rtx sub = *iter;
1222 :
1223 : /* We only care about SUBREGs. */
1224 296038868 : if (GET_CODE (sub) != SUBREG)
1225 294504717 : continue;
1226 :
1227 1534151 : const_rtx x = SUBREG_REG (sub);
1228 :
1229 : /* We only care if the inner object is a REG. */
1230 1534151 : if (!REG_P (x))
1231 758 : continue;
1232 :
1233 : /* And only if the SUBREG is a promoted var. */
1234 1533393 : if (!SUBREG_PROMOTED_VAR_P (sub))
1235 1528063 : continue;
1236 :
1237 5330 : if (bitmap_bit_p (changed_pseudos, REGNO (x)))
1238 0 : SUBREG_PROMOTED_VAR_P (sub) = 0;
1239 : }
1240 54993861 : }
1241 963981 : }
1242 :
1243 : /* Initialization of the ext-dce pass. Primarily this means
1244 : setting up the various bitmaps we utilize. */
1245 :
1246 : static void
1247 963981 : ext_dce_init (void)
1248 : {
1249 963981 : livein.create (last_basic_block_for_fn (cfun));
1250 963981 : livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
1251 12732532 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1252 11768551 : bitmap_initialize (&livein[i], &bitmap_default_obstack);
1253 :
1254 963981 : auto_bitmap refs (&bitmap_default_obstack);
1255 963981 : df_get_exit_block_use_set (refs);
1256 :
1257 963981 : unsigned i;
1258 963981 : bitmap_iterator bi;
1259 4434985 : EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
1260 3471004 : make_reg_live (&livein[EXIT_BLOCK], i);
1261 :
1262 963981 : livenow = BITMAP_ALLOC (NULL);
1263 963981 : all_blocks = BITMAP_ALLOC (NULL);
1264 963981 : changed_pseudos = BITMAP_ALLOC (NULL);
1265 :
1266 12732532 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1267 11768551 : if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
1268 9840589 : bitmap_set_bit (all_blocks, i);
1269 :
1270 963981 : modify = false;
1271 963981 : }
1272 :
1273 : /* Finalization of the ext-dce pass. Primarily this means
1274 : releasing up the various bitmaps we utilize. */
1275 :
1276 : static void
1277 963981 : ext_dce_finish (void)
1278 : {
1279 12732532 : for (unsigned i = 0; i < livein.length (); i++)
1280 11768551 : bitmap_clear (&livein[i]);
1281 963981 : livein.release ();
1282 :
1283 963981 : BITMAP_FREE (livenow);
1284 963981 : BITMAP_FREE (changed_pseudos);
1285 963981 : BITMAP_FREE (all_blocks);
1286 963981 : }
1287 :
1288 : /* Process block number BB_INDEX as part of the backward
1289 : simple dataflow analysis. Return TRUE if something in
1290 : this block changed or FALSE otherwise. */
1291 :
1292 : static bool
1293 26882756 : ext_dce_rd_transfer_n (int bb_index)
1294 : {
1295 : /* The ENTRY/EXIT blocks never change. */
1296 26882756 : if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
1297 : return false;
1298 :
1299 23026832 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
1300 :
1301 : /* Make everything live that's live in the successors. */
1302 23026832 : bitmap_clear (livenow);
1303 23026832 : edge_iterator ei;
1304 23026832 : edge e;
1305 :
1306 57963954 : FOR_EACH_EDGE (e, ei, bb->succs)
1307 34937122 : bitmap_ior_into (livenow, &livein[e->dest->index]);
1308 :
1309 23026832 : ext_dce_process_bb (bb);
1310 :
1311 : /* We only allow widening the set of objects live at the start
1312 : of a block. Otherwise we run the risk of not converging. */
1313 23026832 : return bitmap_ior_into (&livein[bb_index], livenow);
1314 : }
1315 :
1316 : /* Dummy function for the df_simple_dataflow API. */
1317 33560402 : static bool ext_dce_rd_confluence_n (edge) { return true; }
1318 :
1319 : /* Use lifetime analyis to identify extensions that set bits that
1320 : are never read. Turn such extensions into SUBREGs instead which
1321 : can often be propagated away. */
1322 :
1323 : void
1324 963981 : ext_dce_execute (void)
1325 : {
1326 : /* Limit the amount of memory we use for livein, with 4 bits per
1327 : reg per basic-block including overhead that maps to one byte
1328 : per reg per basic-block. */
1329 963981 : uint64_t memory_request
1330 963981 : = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
1331 963981 : if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
1332 : {
1333 0 : warning (OPT_Wdisabled_optimization,
1334 : "ext-dce disabled: %d basic blocks and %d registers; "
1335 : "increase %<--param max-gcse-memory%> above %wu",
1336 0 : n_basic_blocks_for_fn (cfun), max_reg_num (),
1337 : memory_request / 1024);
1338 0 : return;
1339 : }
1340 :
1341 : /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
1342 : to this pass. Clear it for those cases. */
1343 963981 : maybe_clear_subreg_promoted_p ();
1344 963981 : df_analyze ();
1345 963981 : ext_dce_init ();
1346 :
1347 3855924 : do
1348 : {
1349 1927962 : df_simple_dataflow (DF_BACKWARD, NULL, NULL,
1350 : ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
1351 : all_blocks, df_get_postorder (DF_BACKWARD),
1352 : df_get_n_blocks (DF_BACKWARD));
1353 1927962 : modify = !modify;
1354 : }
1355 : while (modify);
1356 :
1357 963981 : reset_subreg_promoted_p ();
1358 :
1359 963981 : ext_dce_finish ();
1360 : }
1361 :
1362 :
1363 : namespace {
1364 :
1365 : const pass_data pass_data_ext_dce =
1366 : {
1367 : RTL_PASS, /* type */
1368 : "ext_dce", /* name */
1369 : OPTGROUP_NONE, /* optinfo_flags */
1370 : TV_EXT_DCE, /* tv_id */
1371 : PROP_cfglayout, /* properties_required */
1372 : 0, /* properties_provided */
1373 : 0, /* properties_destroyed */
1374 : 0, /* todo_flags_start */
1375 : TODO_df_finish, /* todo_flags_finish */
1376 : };
1377 :
1378 : class pass_ext_dce : public rtl_opt_pass
1379 : {
1380 : public:
1381 285722 : pass_ext_dce (gcc::context *ctxt)
1382 571444 : : rtl_opt_pass (pass_data_ext_dce, ctxt)
1383 : {}
1384 :
1385 : /* opt_pass methods: */
1386 1471370 : virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
1387 963981 : virtual unsigned int execute (function *)
1388 : {
1389 963981 : ext_dce_execute ();
1390 963981 : return 0;
1391 : }
1392 :
1393 : }; // class pass_combine
1394 :
1395 : } // anon namespace
1396 :
1397 : rtl_opt_pass *
1398 285722 : make_pass_ext_dce (gcc::context *ctxt)
1399 : {
1400 285722 : return new pass_ext_dce (ctxt);
1401 : }
|