Branch data Line data Source code
1 : : /* RTL dead zero/sign extension (code) elimination.
2 : : Copyright (C) 2000-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "rtl.h"
25 : : #include "tree.h"
26 : : #include "memmodel.h"
27 : : #include "insn-config.h"
28 : : #include "emit-rtl.h"
29 : : #include "recog.h"
30 : : #include "cfganal.h"
31 : : #include "tree-pass.h"
32 : : #include "cfgrtl.h"
33 : : #include "rtl-iter.h"
34 : : #include "df.h"
35 : : #include "print-rtl.h"
36 : : #include "dbgcnt.h"
37 : : #include "diagnostic-core.h"
38 : : #include "target.h"
39 : :
40 : : /* These should probably move into a C++ class. */
41 : : static vec<bitmap_head> livein;
42 : : static bitmap all_blocks;
43 : : static bitmap livenow;
44 : : static bitmap changed_pseudos;
45 : : static bool modify;
46 : :
47 : : /* We consider four bit groups for liveness:
48 : : bit 0..7 (least significant byte)
49 : : bit 8..15 (second least significant byte)
50 : : bit 16..31
51 : : bit 32..BITS_PER_WORD-1 */
52 : :
53 : : /* For the given REG, return the number of bit groups implied by the
54 : : size of the REG's mode, up to a maximum of 4 (number of bit groups
55 : : tracked by this pass).
56 : :
57 : : For partial integer and variable sized modes also return 4. This
58 : : could possibly be refined for something like PSI mode, but it
59 : : does not seem worth the effort. */
60 : :
61 : : static int
62 : 229871246 : group_limit (const_rtx reg)
63 : : {
64 : 229871246 : machine_mode mode = GET_MODE (reg);
65 : :
66 : 229871246 : if (!GET_MODE_BITSIZE (mode).is_constant ())
67 : : return 4;
68 : :
69 : 229871246 : int size = GET_MODE_SIZE (mode).to_constant ();
70 : :
71 : 229871246 : size = exact_log2 (size);
72 : :
73 : 229783488 : if (size < 0)
74 : : return 4;
75 : :
76 : 229783488 : size++;
77 : 229783488 : return (size > 4 ? 4 : size);
78 : : }
79 : :
80 : : /* Make all bit groups live for REGNO in bitmap BMAP. For hard regs,
81 : : we assume all groups are live. For a pseudo we consider the size
82 : : of the pseudo to avoid creating unnecessarily live chunks of data. */
83 : :
84 : : static void
85 : 4626776 : make_reg_live (bitmap bmap, int regno)
86 : : {
87 : 4626776 : int limit;
88 : :
89 : : /* For pseudos we can use the mode to limit how many bit groups
90 : : are marked as live since a pseudo only has one mode. Hard
91 : : registers have to be handled more conservatively. */
92 : 4626776 : if (regno > FIRST_PSEUDO_REGISTER)
93 : : {
94 : 864919 : rtx reg = regno_reg_rtx[regno];
95 : 864919 : limit = group_limit (reg);
96 : : }
97 : : else
98 : : limit = 4;
99 : :
100 : 22815015 : for (int i = 0; i < limit; i++)
101 : 18188239 : bitmap_set_bit (bmap, regno * 4 + i);
102 : 4626776 : }
103 : :
104 : : /* Note this pass could be used to narrow memory loads too. It's
105 : : not clear if that's profitable or not in general. */
106 : :
107 : : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
108 : :
109 : : /* If we know the destination of CODE only uses some low bits
110 : : (say just the QI bits of an SI operation), then return true
111 : : if we can propagate the need for just the subset of bits
112 : : from the destination to the sources.
113 : :
114 : : FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
115 : : operand 0. Thus is likely would need some special casing to handle. */
116 : :
117 : : static bool
118 : 140065463 : safe_for_live_propagation (rtx_code code)
119 : : {
120 : : /* First handle rtx classes which as a whole are known to
121 : : be either safe or unsafe. */
122 : 140065463 : switch (GET_RTX_CLASS (code))
123 : : {
124 : : case RTX_OBJ:
125 : : case RTX_CONST_OBJ:
126 : : return true;
127 : :
128 : : case RTX_COMPARE:
129 : : case RTX_COMM_COMPARE:
130 : : case RTX_TERNARY:
131 : : return false;
132 : :
133 : 71956463 : default:
134 : 71956463 : break;
135 : : }
136 : :
137 : : /* What's left are specific codes. We only need to identify those
138 : : which are safe. */
139 : 71956463 : switch (code)
140 : : {
141 : : /* These are trivially safe. */
142 : : case SUBREG:
143 : : case NOT:
144 : : case ZERO_EXTEND:
145 : : case SIGN_EXTEND:
146 : : case TRUNCATE:
147 : : case PLUS:
148 : : case MINUS:
149 : : case MULT:
150 : : case SMUL_HIGHPART:
151 : : case UMUL_HIGHPART:
152 : : case AND:
153 : : case IOR:
154 : : case XOR:
155 : : return true;
156 : :
157 : : /* We can propagate for the shifted operand, but not the shift
158 : : count. The count is handled specially. */
159 : : case ASHIFT:
160 : : case LSHIFTRT:
161 : : case ASHIFTRT:
162 : : case SS_ASHIFT:
163 : : case US_ASHIFT:
164 : : return true;
165 : :
166 : : /* There may be other safe codes. If so they can be added
167 : : individually when discovered. */
168 : : default:
169 : : return false;
170 : : }
171 : : }
172 : :
173 : : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
174 : : set/clobbered by OBJ contained in INSN.
175 : :
176 : : Conceptually it is always safe to ignore a particular destination
177 : : here as that will result in more chunks of data being considered
178 : : live. That's what happens when we "continue" the main loop when
179 : : we see something we don't know how to handle such as a vector
180 : : mode destination.
181 : :
182 : : The more accurate we are in identifying what objects (and chunks
183 : : within an object) are set by INSN, the more aggressive the
184 : : optimization phase during use handling will be. */
185 : :
186 : : static bool
187 : 135832638 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
188 : : {
189 : 135832638 : bool skipped_dest = false;
190 : :
191 : 135832638 : subrtx_iterator::array_type array;
192 : 383711604 : FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
193 : : {
194 : 247878966 : const_rtx x = *iter;
195 : :
196 : : /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
197 : 247878966 : if (x == NULL_RTX)
198 : 9400395 : continue;
199 : :
200 : 238478571 : if (UNSPEC_P (x))
201 : 566360 : continue;
202 : :
203 : 237912211 : if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
204 : : {
205 : 141122308 : unsigned bit = 0;
206 : 141122308 : x = SET_DEST (x);
207 : :
208 : : /* We don't support vector destinations or destinations
209 : : wider than DImode. */
210 : 141122308 : scalar_mode outer_mode;
211 : 145129563 : if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
212 : 90316134 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
213 : : {
214 : : /* Skip the subrtxs of this destination. There is
215 : : little value in iterating into the subobjects, so
216 : : just skip them for a bit of efficiency. */
217 : 54813429 : skipped_dest = true;
218 : 54813429 : iter.skip_subrtxes ();
219 : 302692395 : continue;
220 : : }
221 : :
222 : : /* We could have (strict_low_part (subreg ...)). We can not just
223 : : strip the STRICT_LOW_PART as that would result in clearing
224 : : some bits in LIVENOW that are still live. So process the
225 : : STRICT_LOW_PART specially. */
226 : 86308879 : if (GET_CODE (x) == STRICT_LOW_PART)
227 : : {
228 : 0 : x = XEXP (x, 0);
229 : :
230 : : /* The only valid operand of a STRICT_LOW_PART is a non
231 : : paradoxical SUBREG. */
232 : 0 : gcc_assert (SUBREG_P (x)
233 : : && !paradoxical_subreg_p (x)
234 : : && SUBREG_BYTE (x).is_constant ());
235 : :
236 : : /* I think we should always see a REG here. But let's
237 : : be sure. */
238 : 0 : gcc_assert (REG_P (SUBREG_REG (x)));
239 : :
240 : : /* The inner mode might be larger, just punt for
241 : : that case. Remember, we can not just continue to process
242 : : the inner RTXs due to the STRICT_LOW_PART. */
243 : 0 : if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
244 : 0 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
245 : : {
246 : : /* Skip the subrtxs of the STRICT_LOW_PART. We can't
247 : : process them because it'll set objects as no longer
248 : : live when they are in fact still live. */
249 : 0 : skipped_dest = true;
250 : 0 : iter.skip_subrtxes ();
251 : 0 : continue;
252 : : }
253 : :
254 : : /* LIVE_TMP contains the set groups that are live-out and set in
255 : : this insn. It is used to narrow the groups live-in for the
256 : : inputs of this insn.
257 : :
258 : : The simple thing to do is mark all the groups as live, but
259 : : that will significantly inhibit optimization.
260 : :
261 : : We also need to be careful in the case where we have an in-out
262 : : operand. If we're not careful we'd clear LIVE_TMP
263 : : incorrectly. */
264 : 0 : HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
265 : 0 : int limit = group_limit (SUBREG_REG (x));
266 : 0 : for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
267 : 0 : if (bitmap_bit_p (livenow, i))
268 : 0 : bitmap_set_bit (live_tmp, i);
269 : :
270 : 0 : if (bitmap_empty_p (live_tmp))
271 : 0 : make_reg_live (live_tmp, rn);
272 : :
273 : : /* The mode of the SUBREG tells us how many bits we can
274 : : clear. */
275 : 0 : machine_mode mode = GET_MODE (x);
276 : 0 : HOST_WIDE_INT size
277 : 0 : = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
278 : 0 : bitmap_clear_range (livenow, 4 * rn, size);
279 : :
280 : : /* We have fully processed this destination. */
281 : 0 : iter.skip_subrtxes ();
282 : 0 : continue;
283 : 0 : }
284 : :
285 : : /* Phase one of destination handling. First remove any wrapper
286 : : such as SUBREG or ZERO_EXTRACT. */
287 : 86308879 : unsigned HOST_WIDE_INT mask
288 : 86308879 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
289 : 86308879 : if (SUBREG_P (x))
290 : : {
291 : : /* If we have a SUBREG destination that is too wide, just
292 : : skip the destination rather than continuing this iterator.
293 : : While continuing would be better, we'd need to strip the
294 : : subreg and restart within the SET processing rather than
295 : : the top of the loop which just complicates the flow even
296 : : more. */
297 : 653159 : if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
298 : 541524 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
299 : : {
300 : 111635 : skipped_dest = true;
301 : 111635 : iter.skip_subrtxes ();
302 : 111635 : continue;
303 : : }
304 : :
305 : : /* We can safely strip a paradoxical subreg. The inner mode will
306 : : be narrower than the outer mode. We'll clear fewer bits in
307 : : LIVENOW than we'd like, but that's always safe. */
308 : 430319 : if (paradoxical_subreg_p (x))
309 : : x = XEXP (x, 0);
310 : 422890 : else if (SUBREG_BYTE (x).is_constant ())
311 : : {
312 : 422890 : bit = subreg_lsb (x).to_constant ();
313 : 422890 : mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
314 : 422890 : gcc_assert (mask);
315 : : x = SUBREG_REG (x);
316 : : }
317 : : else
318 : : gcc_unreachable ();
319 : : }
320 : :
321 : 86197244 : if (GET_CODE (x) == ZERO_EXTRACT)
322 : : {
323 : : /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
324 : : modifies the bits referenced in the ZERO_EXTRACT, the rest
325 : : remain the same. Thus we can not continue here, we must
326 : : either figure out what part of the destination is modified
327 : : or skip the sub-rtxs. */
328 : 3404 : skipped_dest = true;
329 : 3404 : iter.skip_subrtxes ();
330 : 3404 : continue;
331 : : }
332 : :
333 : : /* BIT >= 64 indicates something went horribly wrong. */
334 : 86193840 : gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
335 : :
336 : : /* Now handle the actual object that was changed. */
337 : 86193840 : if (REG_P (x))
338 : : {
339 : : /* LIVE_TMP contains the set groups that are live-out and set in
340 : : this insn. It is used to narrow the groups live-in for the
341 : : inputs of this insn.
342 : :
343 : : The simple thing to do is mark all the groups as live, but
344 : : that will significantly inhibit optimization.
345 : :
346 : : We also need to be careful in the case where we have an in-out
347 : : operand. If we're not careful we'd clear LIVE_TMP
348 : : incorrectly. */
349 : 72324968 : HOST_WIDE_INT rn = REGNO (x);
350 : 72324968 : int limit = group_limit (x);
351 : 322696063 : for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
352 : 250371095 : if (bitmap_bit_p (livenow, i))
353 : 243395743 : bitmap_set_bit (live_tmp, i);
354 : :
355 : 72324968 : if (bitmap_empty_p (live_tmp))
356 : 1212616 : make_reg_live (live_tmp, rn);
357 : :
358 : : /* Now clear the bits known written by this instruction.
359 : : Note that BIT need not be a power of two, consider a
360 : : ZERO_EXTRACT destination. */
361 : 72324968 : int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
362 : 77352806 : int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
363 : 28077398 : : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
364 : 5812304 : : (mask & 0xff00) ? 2 : 1);
365 : 72324968 : bitmap_clear_range (livenow, 4 * rn + start, end - start);
366 : : }
367 : : /* Some ports generate (clobber (const_int)). */
368 : 13868872 : else if (CONST_INT_P (x))
369 : 0 : continue;
370 : : else
371 : 13868872 : gcc_assert (CALL_P (insn)
372 : : || MEM_P (x)
373 : : || x == pc_rtx
374 : : || GET_CODE (x) == SCRATCH);
375 : :
376 : 86193840 : iter.skip_subrtxes ();
377 : 86193840 : }
378 : 96789903 : else if (GET_CODE (x) == COND_EXEC)
379 : : {
380 : : /* This isn't ideal, but may not be so bad in practice. */
381 : 0 : skipped_dest = true;
382 : 0 : iter.skip_subrtxes ();
383 : : }
384 : : }
385 : 135832638 : return skipped_dest;
386 : 135832638 : }
387 : :
388 : : /* INSN has a sign/zero extended source inside SET that we will
389 : : try to turn into a SUBREG. */
390 : : static void
391 : 4796 : ext_dce_try_optimize_insn (rtx_insn *insn, rtx set)
392 : : {
393 : 4796 : rtx src = SET_SRC (set);
394 : 4796 : rtx inner = XEXP (src, 0);
395 : :
396 : : /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
397 : : not useful for this optimization. */
398 : 4796 : if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
399 : : return;
400 : :
401 : 2307 : rtx new_pattern;
402 : 2307 : if (dump_file)
403 : : {
404 : 0 : fprintf (dump_file, "Processing insn:\n");
405 : 0 : dump_insn_slim (dump_file, insn);
406 : 0 : fprintf (dump_file, "Trying to simplify pattern:\n");
407 : 0 : print_rtl_single (dump_file, SET_SRC (set));
408 : : }
409 : :
410 : : /* We decided to turn do the optimization but allow it to be rejected for
411 : : bisection purposes. */
412 : 2307 : if (!dbg_cnt (::ext_dce))
413 : : {
414 : 0 : if (dump_file)
415 : 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
416 : 0 : return;
417 : : }
418 : :
419 : 4614 : new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
420 : 2307 : GET_MODE (inner), 0);
421 : : /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
422 : : We must not pass that as a replacement pattern to validate_change. */
423 : 2307 : if (new_pattern)
424 : : {
425 : 2307 : int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
426 : :
427 : 2307 : rtx x = SET_DEST (set);
428 : 2307 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
429 : 0 : x = XEXP (x, 0);
430 : :
431 : 2307 : gcc_assert (REG_P (x));
432 : 2307 : if (ok)
433 : 2307 : bitmap_set_bit (changed_pseudos, REGNO (x));
434 : :
435 : 2307 : if (dump_file)
436 : : {
437 : 0 : if (ok)
438 : 0 : fprintf (dump_file, "Successfully transformed to:\n");
439 : : else
440 : 0 : fprintf (dump_file, "Failed transformation to:\n");
441 : :
442 : 0 : print_rtl_single (dump_file, new_pattern);
443 : 0 : fprintf (dump_file, "\n");
444 : : }
445 : :
446 : : /* INSN may have a REG_EQUAL note indicating that the value was
447 : : sign or zero extended. That note is no longer valid since we've
448 : : just removed the extension. Just wipe the notes. */
449 : 2307 : remove_reg_equal_equiv_notes (insn, false);
450 : : }
451 : : else
452 : : {
453 : 0 : if (dump_file)
454 : 0 : fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
455 : : }
456 : : }
457 : :
458 : : /* Some operators imply that their second operand is fully live,
459 : : regardless of how many bits in the output are live. An example
460 : : would be the shift count on a target without SHIFT_COUNT_TRUNCATED
461 : : defined.
462 : :
463 : : Return TRUE if CODE is such an operator. FALSE otherwise. */
464 : :
465 : : static bool
466 : 75706551 : binop_implies_op2_fully_live (rtx_code code)
467 : : {
468 : 0 : switch (code)
469 : : {
470 : : case ASHIFT:
471 : : case LSHIFTRT:
472 : : case ASHIFTRT:
473 : : case ROTATE:
474 : : case ROTATERT:
475 : : case SS_ASHIFT:
476 : : case US_ASHIFT:
477 : : return !SHIFT_COUNT_TRUNCATED;
478 : :
479 : 0 : default:
480 : 0 : return false;
481 : : }
482 : : }
483 : :
484 : : /* X, with code CODE, is an operation for which safe_for_live_propagation
485 : : holds true, and bits set in MASK are live in the result. Compute a
486 : : mask of (potentially) live bits in the non-constant inputs. In case of
487 : : binop_implies_op2_fully_live (e.g. shifts), the computed mask may
488 : : exclusively pertain to the first operand.
489 : :
490 : : This looks wrong as we may have some important operations embedded as
491 : : operands of another operation. For example, we might have an extension
492 : : wrapping a shift. It really feels like this needs to be recursing down
493 : : into operands much more often. */
494 : :
495 : : unsigned HOST_WIDE_INT
496 : 70778587 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
497 : : {
498 : 72494033 : if (mask == 0)
499 : : return 0;
500 : :
501 : 72494007 : enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
502 : 72494007 : unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
503 : :
504 : : /* While we don't try to optimize operations on types larger
505 : : than 64 bits, we do want to make sure not to invoke undefined
506 : : behavior when presented with such operations during use
507 : : processing. The safe thing to do is to just return mmask
508 : : for that scenario indicating every possible chunk is life. */
509 : 72494007 : scalar_int_mode smode;
510 : 72494007 : if (!is_a <scalar_int_mode> (mode, &smode)
511 : 60190526 : || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
512 : : return mmask;
513 : :
514 : 58082977 : switch (code)
515 : : {
516 : 15990518 : case PLUS:
517 : 15990518 : case MINUS:
518 : 15990518 : case MULT:
519 : 15990518 : return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
520 : :
521 : : /* We propagate for the shifted operand, but not the shift
522 : : count. The count is handled specially. */
523 : 1371131 : case ASHIFT:
524 : 1371131 : if (CONST_INT_P (XEXP (x, 1))
525 : 2671079 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
526 : 1299912 : return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
527 : 71219 : return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
528 : :
529 : : /* We propagate for the shifted operand, but not the shift
530 : : count. The count is handled specially. */
531 : 670624 : case LSHIFTRT:
532 : 670624 : if (CONST_INT_P (XEXP (x, 1))
533 : 1307514 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
534 : 636862 : return mmask & (mask << INTVAL (XEXP (x, 1)));
535 : : return mmask;
536 : :
537 : : /* We propagate for the shifted operand, but not the shift
538 : : count. The count is handled specially. */
539 : 295266 : case ASHIFTRT:
540 : 295266 : if (CONST_INT_P (XEXP (x, 1))
541 : 578694 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
542 : : {
543 : 283420 : HOST_WIDE_INT sign = 0;
544 : 283420 : if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
545 : 283420 : > GET_MODE_BITSIZE (smode))
546 : 566840 : sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
547 : 283420 : return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
548 : : }
549 : : return mmask;
550 : :
551 : 42678 : case SMUL_HIGHPART:
552 : 42678 : case UMUL_HIGHPART:
553 : 42678 : if (XEXP (x, 1) == const0_rtx)
554 : : return 0;
555 : 42678 : if (XEXP (x, 1) == const1_rtx)
556 : : return mmask;
557 : 42678 : if (CONST_INT_P (XEXP (x, 1)))
558 : : {
559 : 0 : if (pow2p_hwi (INTVAL (XEXP (x, 1))))
560 : 0 : return mmask & (mask << (GET_MODE_BITSIZE (smode)
561 : 0 : - exact_log2 (INTVAL (XEXP (x, 1)))));
562 : :
563 : 0 : int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
564 : 0 : - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
565 : 0 : if (bits < GET_MODE_BITSIZE (smode))
566 : 0 : return (HOST_WIDE_INT_1U << bits) - 1;
567 : : }
568 : : return mmask;
569 : :
570 : 658101 : case SIGN_EXTEND:
571 : 658101 : if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
572 : 658101 : || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
573 : : return -1;
574 : :
575 : : /* We want the mode of the inner object. We need to ensure its
576 : : sign bit is on in MASK. */
577 : 658101 : mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
578 : 658101 : if (mask & ~GET_MODE_MASK (mode))
579 : 657644 : mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
580 : 657644 : - 1);
581 : :
582 : : /* Recurse into the operand. */
583 : 658101 : return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
584 : :
585 : 1057345 : case ZERO_EXTEND:
586 : 1057345 : if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
587 : 1057345 : || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
588 : : return -1;
589 : :
590 : : /* Recurse into the operand. */
591 : 1057345 : return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
592 : :
593 : : /* We propagate for the shifted operand, but not the shift
594 : : count. The count is handled specially. */
595 : 0 : case SS_ASHIFT:
596 : 0 : case US_ASHIFT:
597 : 0 : if (CONST_INT_P (XEXP (x, 1))
598 : 0 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
599 : : {
600 : 0 : return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
601 : 0 : >> (INTVAL (XEXP (x, 1))
602 : 0 : + (XEXP (x, 1) != const0_rtx
603 : 0 : && code == SS_ASHIFT))))
604 : 0 : | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
605 : : }
606 : : return mmask;
607 : :
608 : : default:
609 : : return mask;
610 : : }
611 : : }
612 : :
613 : : /* Process uses in INSN contained in OBJ. Set appropriate bits in LIVENOW
614 : : for any chunks of pseudos that become live, potentially filtering using
615 : : bits from LIVE_TMP.
616 : :
617 : : If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
618 : : the extended bits are never read and mark pseudos which had extensions
619 : : eliminated in CHANGED_PSEUDOS. */
620 : :
621 : : static void
622 : 135832638 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
623 : : bitmap live_tmp, bool skipped_dest)
624 : : {
625 : 135832638 : subrtx_var_iterator::array_type array_var;
626 : 736653478 : FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
627 : : {
628 : : /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
629 : 600820840 : rtx x = *iter;
630 : 600820840 : if (x == NULL_RTX)
631 : 9400395 : continue;
632 : :
633 : : /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
634 : : handle SETs explicitly, possibly propagating live information
635 : : into the uses.
636 : :
637 : : We may continue the loop at various points which will cause
638 : : iteration into the next level of RTL. Breaking from the loop
639 : : is never safe as it can lead us to fail to process some of the
640 : : RTL and thus not make objects live when necessary. */
641 : 591420445 : enum rtx_code xcode = GET_CODE (x);
642 : 591420445 : if (xcode == SET)
643 : : {
644 : 120015590 : const_rtx dst = SET_DEST (x);
645 : 120015590 : rtx src = SET_SRC (x);
646 : 120015590 : const_rtx y;
647 : 120015590 : unsigned HOST_WIDE_INT bit = 0;
648 : :
649 : : /* The code of the RHS of a SET. */
650 : 120015590 : enum rtx_code code = GET_CODE (src);
651 : :
652 : : /* ?!? How much of this should mirror SET handling, potentially
653 : : being shared? */
654 : 120015590 : if (SUBREG_P (dst) && SUBREG_BYTE (dst).is_constant ())
655 : : {
656 : 585120 : bit = subreg_lsb (dst).to_constant ();
657 : 585120 : if (bit >= HOST_BITS_PER_WIDE_INT)
658 : : bit = HOST_BITS_PER_WIDE_INT - 1;
659 : 585120 : dst = SUBREG_REG (dst);
660 : : }
661 : 119430470 : else if (GET_CODE (dst) == STRICT_LOW_PART)
662 : 10240 : dst = XEXP (dst, 0);
663 : :
664 : : /* Main processing of the uses. Two major goals here.
665 : :
666 : : First, we want to try and propagate liveness (or the lack
667 : : thereof) from the destination register to the source
668 : : register(s).
669 : :
670 : : Second, if the source is an extension, try to optimize
671 : : it into a SUBREG. The SUBREG form indicates we don't
672 : : care about the upper bits and will usually be copy
673 : : propagated away.
674 : :
675 : : If we fail to handle something in here, the expectation
676 : : is the iterator will dive into the sub-components and
677 : : mark all the chunks in any found REGs as live. */
678 : 120015590 : if (REG_P (dst) && safe_for_live_propagation (code))
679 : : {
680 : : /* Create a mask representing the bits of this output
681 : : operand that are live after this insn. We can use
682 : : this information to refine the live in state of
683 : : inputs to this insn in many cases.
684 : :
685 : : We have to do this on a per SET basis, we might have
686 : : an INSN with multiple SETS, some of which can narrow
687 : : the source operand liveness, some of which may not. */
688 : 70778587 : unsigned HOST_WIDE_INT dst_mask = 0;
689 : 70778587 : HOST_WIDE_INT rn = REGNO (dst);
690 : 70778587 : unsigned HOST_WIDE_INT mask_array[]
691 : : = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
692 : : -HOST_WIDE_INT_UC (0x100000000) };
693 : 353892935 : for (int i = 0; i < 4; i++)
694 : 283114348 : if (bitmap_bit_p (live_tmp, 4 * rn + i))
695 : 227573926 : dst_mask |= mask_array[i];
696 : 70778587 : dst_mask >>= bit;
697 : :
698 : : /* If we ignored a destination during set processing, then
699 : : consider all the bits live. */
700 : 70778587 : if (skipped_dest)
701 : 24342908 : dst_mask = -1;
702 : :
703 : 70778587 : dst_mask = carry_backpropagate (dst_mask, code, src);
704 : :
705 : : /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
706 : : of the source specially to improve optimization. */
707 : 70778587 : if (code == SIGN_EXTEND || code == ZERO_EXTEND)
708 : : {
709 : 1728250 : rtx inner = XEXP (src, 0);
710 : 1728250 : unsigned HOST_WIDE_INT src_mask
711 : 1728250 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
712 : :
713 : : /* DST_MASK could be zero if we had something in the SET
714 : : that we couldn't handle. */
715 : 1728250 : if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
716 : 4796 : ext_dce_try_optimize_insn (insn, x);
717 : :
718 : : /* Stripping the extension here just seems wrong on multiple
719 : : levels. It's source side handling, so it seems like it
720 : : belongs in the loop below. Stripping here also makes it
721 : : harder than necessary to properly handle live bit groups
722 : : for (ANY_EXTEND (SUBREG)) where the SUBREG has
723 : : SUBREG_PROMOTED state. */
724 : 1728250 : dst_mask &= src_mask;
725 : 1728250 : src = XEXP (src, 0);
726 : 1728250 : code = GET_CODE (src);
727 : : }
728 : :
729 : : /* Optimization is done at this point. We just want to make
730 : : sure everything that should get marked as live is marked
731 : : from here onward. */
732 : :
733 : : /* We will handle the other operand of a binary operator
734 : : at the bottom of the loop by resetting Y. */
735 : 70778587 : if (BINARY_P (src))
736 : 21977370 : y = XEXP (src, 0);
737 : : else
738 : : y = src;
739 : :
740 : : /* We're inside a SET and want to process the source operands
741 : : making things live. Breaking from this loop will cause
742 : : the iterator to work on sub-rtxs, so it is safe to break
743 : : if we see something we don't know how to handle.
744 : :
745 : : This code is just hokey as it really just handles trivial
746 : : unary and binary cases. Otherwise the loop exits and we
747 : : continue iterating on sub-rtxs, but outside the set context. */
748 : : unsigned HOST_WIDE_INT save_mask = dst_mask;
749 : 113742545 : for (;;)
750 : : {
751 : : /* In general we want to restore DST_MASK before each loop
752 : : iteration. The exception is when the opcode implies that
753 : : the other operand is fully live. That's handled by
754 : : changing SAVE_MASK below. */
755 : 92260566 : dst_mask = save_mask;
756 : : /* Strip an outer paradoxical subreg. The bits outside
757 : : the inner mode are don't cares. So we can just strip
758 : : and process the inner object. */
759 : 92260566 : if (paradoxical_subreg_p (y))
760 : : y = XEXP (y, 0);
761 : 92175966 : else if (SUBREG_P (y) && SUBREG_BYTE (y).is_constant ())
762 : : {
763 : : /* If !TRULY_NOOP_TRUNCATION_MODES_P, the mode
764 : : change performed by Y would normally need to be a
765 : : TRUNCATE rather than a SUBREG. It is probably the
766 : : guarantee provided by SUBREG_PROMOTED_VAR_P that
767 : : allows the SUBREG in Y as an exception. We must
768 : : therefore preserve that guarantee and treat the
769 : : upper bits of the inner register as live
770 : : regardless of the outer code. See PR 120050. */
771 : 1845374 : if (!REG_P (SUBREG_REG (y))
772 : 1845374 : || (SUBREG_PROMOTED_VAR_P (y)
773 : 12162 : && (!TRULY_NOOP_TRUNCATION_MODES_P (
774 : : GET_MODE (y),
775 : : GET_MODE (SUBREG_REG (y))))))
776 : : break;
777 : :
778 : 1844428 : bit = subreg_lsb (y).to_constant ();
779 : :
780 : : /* If this is a wide object (more bits than we can fit
781 : : in a HOST_WIDE_INT), then just break from the SET
782 : : context. That will cause the iterator to walk down
783 : : into the subrtx and if we land on a REG we'll mark
784 : : the whole think live. */
785 : 1844428 : if (bit >= HOST_BITS_PER_WIDE_INT)
786 : : break;
787 : :
788 : : /* The SUBREG's mode determines the live width. */
789 : 1614471 : if (dst_mask)
790 : : {
791 : 1614471 : dst_mask <<= bit;
792 : 1614471 : if (!dst_mask)
793 : 0 : dst_mask = -HOST_WIDE_INT_UC (0x100000000);
794 : : }
795 : 1614471 : y = SUBREG_REG (y);
796 : : }
797 : :
798 : 92029663 : if (REG_P (y))
799 : : {
800 : : /* We have found the use of a register. We need to mark
801 : : the appropriate chunks of the register live. The mode
802 : : of the REG is a starting point. We may refine that
803 : : based on what chunks in the output were live. */
804 : 48740891 : rn = 4 * REGNO (y);
805 : 48740891 : unsigned HOST_WIDE_INT tmp_mask = dst_mask;
806 : :
807 : : /* If the RTX code for the SET_SRC is not one we can
808 : : propagate destination liveness through, then just
809 : : set the mask to the mode's mask. */
810 : 48740891 : if (!safe_for_live_propagation (code))
811 : 29712 : tmp_mask
812 : 59424 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
813 : :
814 : 48740891 : if (tmp_mask & 0xff)
815 : 48314597 : bitmap_set_bit (livenow, rn);
816 : 48740891 : if (tmp_mask & 0xff00)
817 : 46855708 : bitmap_set_bit (livenow, rn + 1);
818 : 48740891 : if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
819 : 46605680 : bitmap_set_bit (livenow, rn + 2);
820 : 48740891 : if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
821 : 40244820 : bitmap_set_bit (livenow, rn + 3);
822 : : }
823 : 43288772 : else if (!CONSTANT_P (y))
824 : : break;
825 : :
826 : : /* We might have (ashift (const_int 1) (reg...))
827 : : By setting dst_mask we can continue iterating on the
828 : : the next operand and it will be considered fully live.
829 : :
830 : : Note that since we restore DST_MASK from SAVE_MASK at the
831 : : top of the loop, we have to change SAVE_MASK to get the
832 : : semantics we want. */
833 : 75706551 : if (binop_implies_op2_fully_live (GET_CODE (src)))
834 : 2443538 : save_mask = -1;
835 : :
836 : : /* If this was anything but a binary operand, break the inner
837 : : loop. This is conservatively correct as it will cause the
838 : : iterator to look at the sub-rtxs outside the SET context. */
839 : 75706551 : if (!BINARY_P (src))
840 : : break;
841 : :
842 : : /* We processed the first operand of a binary operator. Now
843 : : handle the second. */
844 : 21481979 : y = XEXP (src, 1), src = pc_rtx;
845 : 21481979 : }
846 : :
847 : : /* These are leaf nodes, no need to iterate down into them. */
848 : 70778587 : if (REG_P (y) || CONSTANT_P (y))
849 : 54224572 : iter.skip_subrtxes ();
850 : : }
851 : : }
852 : : /* If we are reading the low part of a SUBREG, then we can
853 : : refine liveness of the input register, otherwise let the
854 : : iterator continue into SUBREG_REG. */
855 : 471404855 : else if (SUBREG_P (x)
856 : 1298130 : && REG_P (SUBREG_REG (x))
857 : 1296373 : && !paradoxical_subreg_p (x)
858 : 1274744 : && subreg_lowpart_p (x)
859 : 968054 : && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
860 : 473340963 : && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
861 : : {
862 : 478841 : HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
863 : 478841 : HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
864 : :
865 : : /* If this is a promoted subreg, then more of it may be live than
866 : : is otherwise obvious. */
867 : 478841 : if (SUBREG_PROMOTED_VAR_P (x))
868 : 3994 : size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
869 : :
870 : 478841 : bitmap_set_bit (livenow, rn);
871 : 478841 : if (size > 8)
872 : 297027 : bitmap_set_bit (livenow, rn + 1);
873 : 297027 : if (size > 16)
874 : 260903 : bitmap_set_bit (livenow, rn + 2);
875 : 260903 : if (size >= 32)
876 : 260903 : bitmap_set_bit (livenow, rn + 3);
877 : 478841 : iter.skip_subrtxes ();
878 : : }
879 : : /* If we have a register reference that is not otherwise handled,
880 : : just assume all the chunks are live. */
881 : 470926014 : else if (REG_P (x))
882 : 156681359 : bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
883 : : }
884 : 135832638 : }
885 : :
886 : : /* Process a single basic block BB with current liveness information
887 : : in LIVENOW, returning updated liveness information.
888 : :
889 : : If MODIFY is true, then this is the last pass and unnecessary
890 : : extensions should be eliminated when possible. If an extension
891 : : is removed, the source pseudo is marked in CHANGED_PSEUDOS. */
892 : :
893 : : static void
894 : 22523555 : ext_dce_process_bb (basic_block bb)
895 : : {
896 : 22523555 : rtx_insn *insn;
897 : :
898 : 295321209 : FOR_BB_INSNS_REVERSE (bb, insn)
899 : : {
900 : 419163065 : if (!NONDEBUG_INSN_P (insn))
901 : 146365411 : continue;
902 : :
903 : : /* Live-out state of the destination of this insn. We can
904 : : use this to refine the live-in state of the sources of
905 : : this insn in many cases. */
906 : 126432243 : bitmap live_tmp = BITMAP_ALLOC (NULL);
907 : :
908 : : /* First process any sets/clobbers in INSN. */
909 : 126432243 : bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
910 : :
911 : : /* CALL_INSNs need processing their fusage data. */
912 : 126432243 : if (CALL_P (insn))
913 : 9400395 : skipped_dest |= ext_dce_process_sets (insn,
914 : : CALL_INSN_FUNCTION_USAGE (insn),
915 : : live_tmp);
916 : :
917 : : /* And now uses, optimizing away SIGN/ZERO extensions as we go. */
918 : 126432243 : ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
919 : :
920 : : /* A nonlocal goto implicitly uses the frame pointer. */
921 : 126432243 : if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
922 : : {
923 : 1130 : bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
924 : 1130 : if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
925 : 1130 : bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
926 : : }
927 : :
928 : : /* And process fusage data for the use as well. */
929 : 126432243 : if (CALL_P (insn))
930 : : {
931 : 9400395 : if (!FAKE_CALL_P (insn))
932 : 9400335 : bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
933 : :
934 : : /* If this is not a call to a const fucntion, then assume it
935 : : can read any global register. */
936 : 9400395 : if (!RTL_CONST_CALL_P (insn))
937 : 844032474 : for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
938 : 834956856 : if (global_regs[i])
939 : 230 : bitmap_set_range (livenow, i * 4, 4);
940 : :
941 : 9400395 : ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
942 : : }
943 : :
944 : 126432243 : BITMAP_FREE (live_tmp);
945 : : }
946 : 22523555 : }
947 : :
948 : : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
949 : : is usually helpful. However, in some cases setting the value when
950 : : it not strictly needed can cause this pass to miss optimizations.
951 : :
952 : : Specifically consider (set (mem) (subreg (reg))). If set in that
953 : : case it will cause more bit groups to be live for REG than would
954 : : be strictly necessary which in turn can inhibit extension removal.
955 : :
956 : : So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
957 : : is obviously not needed. */
958 : :
959 : : static void
960 : 947488 : maybe_clear_subreg_promoted_p (void)
961 : : {
962 : 117007026 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
963 : : {
964 : 116059538 : if (!NONDEBUG_INSN_P (insn))
965 : 61851339 : continue;
966 : :
967 : 54208199 : rtx set = single_set (insn);
968 : 54208199 : if (!set)
969 : 3605385 : continue;
970 : :
971 : : /* There may be other cases where we should clear, but for
972 : : now, this is the only known case where it causes problems. */
973 : 50602814 : if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
974 : 69390 : && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
975 : 60496 : SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
976 : : }
977 : 947488 : }
978 : :
979 : : /* Walk the IL and build the transitive closure of all the REGs tied
980 : : together by copies where either the source or destination is
981 : : marked in CHANGED_PSEUDOS. */
982 : :
983 : : static void
984 : 947488 : expand_changed_pseudos (void)
985 : : {
986 : : /* Build a vector of registers related by a copy. This is meant to
987 : : speed up the next step by avoiding full IL walks. */
988 : 947488 : struct copy_pair { rtx first; rtx second; };
989 : 947488 : auto_vec<copy_pair> pairs;
990 : 117007026 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
991 : : {
992 : 116059538 : if (!NONDEBUG_INSN_P (insn))
993 : 61851339 : continue;
994 : :
995 : 54208199 : rtx pat = PATTERN (insn);
996 : :
997 : : /* Simple copies to a REG from another REG or SUBREG of a REG. */
998 : 54208199 : if (GET_CODE (pat) == SET
999 : 42820278 : && REG_P (SET_DEST (pat))
1000 : 30621575 : && (REG_P (SET_SRC (pat))
1001 : 22140259 : || (SUBREG_P (SET_SRC (pat))
1002 : 386440 : && REG_P (SUBREG_REG (SET_SRC (pat))))))
1003 : : {
1004 : 386003 : rtx src = (REG_P (SET_SRC (pat))
1005 : 8867319 : ? SET_SRC (pat)
1006 : : : SUBREG_REG (SET_SRC (pat)));
1007 : 8867319 : pairs.safe_push ({ SET_DEST (pat), src });
1008 : : }
1009 : :
1010 : : /* Simple copies to a REG from another REG or SUBREG of a REG
1011 : : held inside a PARALLEL. */
1012 : 54208199 : if (GET_CODE (pat) == PARALLEL)
1013 : : {
1014 : 24466859 : for (int i = XVECLEN (pat, 0) - 1; i >= 0; i--)
1015 : : {
1016 : 16420043 : rtx elem = XVECEXP (pat, 0, i);
1017 : :
1018 : 16420043 : if (GET_CODE (elem) == SET
1019 : 8252614 : && REG_P (SET_DEST (elem))
1020 : 8107409 : && (REG_P (SET_SRC (elem))
1021 : 8107409 : || (SUBREG_P (SET_SRC (elem))
1022 : 0 : && REG_P (SUBREG_REG (SET_SRC (elem))))))
1023 : : {
1024 : 0 : rtx src = (REG_P (SET_SRC (elem))
1025 : 0 : ? SET_SRC (elem)
1026 : : : SUBREG_REG (SET_SRC (elem)));
1027 : 0 : pairs.safe_push ({ SET_DEST (elem), src });
1028 : : }
1029 : : }
1030 : 8046816 : continue;
1031 : 8046816 : }
1032 : : }
1033 : :
1034 : : /* Now we have a vector with copy pairs. Iterate over that list
1035 : : updating CHANGED_PSEUDOS as we go. Eliminate copies from the
1036 : : list as we go as they don't need further processing. */
1037 : : bool changed = true;
1038 : 1895012 : while (changed)
1039 : : {
1040 : : changed = false;
1041 : : unsigned int i;
1042 : : copy_pair *p;
1043 : 10762869 : FOR_EACH_VEC_ELT (pairs, i, p)
1044 : : {
1045 : 8867857 : if (bitmap_bit_p (changed_pseudos, REGNO (p->second))
1046 : 8867857 : && bitmap_set_bit (changed_pseudos, REGNO (p->first)))
1047 : : {
1048 : 48 : pairs.unordered_remove (i);
1049 : 48 : changed = true;
1050 : : }
1051 : : }
1052 : : }
1053 : 947488 : }
1054 : :
1055 : : /* We optimize away sign/zero extensions in this pass and replace
1056 : : them with SUBREGs indicating certain bits are don't cares.
1057 : :
1058 : : This changes the SUBREG_PROMOTED_VAR_P state of the object.
1059 : : It is fairly painful to fix this on the fly, so we have
1060 : : recorded which pseudos are affected and we look for SUBREGs
1061 : : of those pseudos and fix them up. */
1062 : :
1063 : : static void
1064 : 947488 : reset_subreg_promoted_p (void)
1065 : : {
1066 : : /* This pass eliminates zero/sign extensions on pseudo regs found
1067 : : in CHANGED_PSEUDOS. Elimination of those extensions changes if
1068 : : the pseudos are known to hold values extended to wider modes
1069 : : via SUBREG_PROMOTED_VAR. So we wipe the SUBREG_PROMOTED_VAR
1070 : : state on all affected pseudos.
1071 : :
1072 : : But that is insufficient. We might have a copy from one REG
1073 : : to another (possibly with the source register wrapped with a
1074 : : SUBREG). We need to wipe SUBREG_PROMOTED_VAR on the transitive
1075 : : closure of the original CHANGED_PSEUDOS and registers they're
1076 : : connected to via copies. So expand the set. */
1077 : 947488 : expand_changed_pseudos ();
1078 : :
1079 : : /* If we removed an extension, that changed the promoted state
1080 : : of the destination of that extension. Thus we need to go
1081 : : find any SUBREGs that reference that pseudo and adjust their
1082 : : SUBREG_PROMOTED_P state. */
1083 : 117007026 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
1084 : : {
1085 : 116059538 : if (!NONDEBUG_INSN_P (insn))
1086 : 61851339 : continue;
1087 : :
1088 : 54208199 : rtx pat = PATTERN (insn);
1089 : 54208199 : subrtx_var_iterator::array_type array;
1090 : 344879878 : FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
1091 : : {
1092 : 290671679 : rtx sub = *iter;
1093 : :
1094 : : /* We only care about SUBREGs. */
1095 : 290671679 : if (GET_CODE (sub) != SUBREG)
1096 : 289196250 : continue;
1097 : :
1098 : 1475429 : const_rtx x = SUBREG_REG (sub);
1099 : :
1100 : : /* We only care if the inner object is a REG. */
1101 : 1475429 : if (!REG_P (x))
1102 : 732 : continue;
1103 : :
1104 : : /* And only if the SUBREG is a promoted var. */
1105 : 1474697 : if (!SUBREG_PROMOTED_VAR_P (sub))
1106 : 1469540 : continue;
1107 : :
1108 : 5157 : if (bitmap_bit_p (changed_pseudos, REGNO (x)))
1109 : 0 : SUBREG_PROMOTED_VAR_P (sub) = 0;
1110 : : }
1111 : 54208199 : }
1112 : 947488 : }
1113 : :
1114 : : /* Initialization of the ext-dce pass. Primarily this means
1115 : : setting up the various bitmaps we utilize. */
1116 : :
1117 : : static void
1118 : 947488 : ext_dce_init (void)
1119 : : {
1120 : 947488 : livein.create (last_basic_block_for_fn (cfun));
1121 : 947488 : livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
1122 : 12494841 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1123 : 11547353 : bitmap_initialize (&livein[i], &bitmap_default_obstack);
1124 : :
1125 : 947488 : auto_bitmap refs (&bitmap_default_obstack);
1126 : 947488 : df_get_exit_block_use_set (refs);
1127 : :
1128 : 947488 : unsigned i;
1129 : 947488 : bitmap_iterator bi;
1130 : 4361648 : EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
1131 : 3414160 : make_reg_live (&livein[EXIT_BLOCK], i);
1132 : :
1133 : 947488 : livenow = BITMAP_ALLOC (NULL);
1134 : 947488 : all_blocks = BITMAP_ALLOC (NULL);
1135 : 947488 : changed_pseudos = BITMAP_ALLOC (NULL);
1136 : :
1137 : 12494841 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1138 : 11547353 : if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
1139 : 9652377 : bitmap_set_bit (all_blocks, i);
1140 : :
1141 : 947488 : modify = false;
1142 : 947488 : }
1143 : :
1144 : : /* Finalization of the ext-dce pass. Primarily this means
1145 : : releasing up the various bitmaps we utilize. */
1146 : :
1147 : : static void
1148 : 947488 : ext_dce_finish (void)
1149 : : {
1150 : 12494841 : for (unsigned i = 0; i < livein.length (); i++)
1151 : 11547353 : bitmap_clear (&livein[i]);
1152 : 947488 : livein.release ();
1153 : :
1154 : 947488 : BITMAP_FREE (livenow);
1155 : 947488 : BITMAP_FREE (changed_pseudos);
1156 : 947488 : BITMAP_FREE (all_blocks);
1157 : 947488 : }
1158 : :
1159 : : /* Process block number BB_INDEX as part of the backward
1160 : : simple dataflow analysis. Return TRUE if something in
1161 : : this block changed or FALSE otherwise. */
1162 : :
1163 : : static bool
1164 : 26313507 : ext_dce_rd_transfer_n (int bb_index)
1165 : : {
1166 : : /* The ENTRY/EXIT blocks never change. */
1167 : 26313507 : if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
1168 : : return false;
1169 : :
1170 : 22523555 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
1171 : :
1172 : : /* Make everything live that's live in the successors. */
1173 : 22523555 : bitmap_clear (livenow);
1174 : 22523555 : edge_iterator ei;
1175 : 22523555 : edge e;
1176 : :
1177 : 56672031 : FOR_EACH_EDGE (e, ei, bb->succs)
1178 : 34148476 : bitmap_ior_into (livenow, &livein[e->dest->index]);
1179 : :
1180 : 22523555 : ext_dce_process_bb (bb);
1181 : :
1182 : : /* We only allow widening the set of objects live at the start
1183 : : of a block. Otherwise we run the risk of not converging. */
1184 : 22523555 : return bitmap_ior_into (&livein[bb_index], livenow);
1185 : : }
1186 : :
1187 : : /* Dummy function for the df_simple_dataflow API. */
1188 : 32779025 : static bool ext_dce_rd_confluence_n (edge) { return true; }
1189 : :
1190 : : /* Use lifetime analyis to identify extensions that set bits that
1191 : : are never read. Turn such extensions into SUBREGs instead which
1192 : : can often be propagated away. */
1193 : :
1194 : : void
1195 : 947488 : ext_dce_execute (void)
1196 : : {
1197 : : /* Limit the amount of memory we use for livein, with 4 bits per
1198 : : reg per basic-block including overhead that maps to one byte
1199 : : per reg per basic-block. */
1200 : 947488 : uint64_t memory_request
1201 : 947488 : = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
1202 : 947488 : if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
1203 : : {
1204 : 0 : warning (OPT_Wdisabled_optimization,
1205 : : "ext-dce disabled: %d basic blocks and %d registers; "
1206 : : "increase %<--param max-gcse-memory%> above %wu",
1207 : 0 : n_basic_blocks_for_fn (cfun), max_reg_num (),
1208 : : memory_request / 1024);
1209 : 0 : return;
1210 : : }
1211 : :
1212 : : /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
1213 : : to this pass. Clear it for those cases. */
1214 : 947488 : maybe_clear_subreg_promoted_p ();
1215 : 947488 : df_analyze ();
1216 : 947488 : ext_dce_init ();
1217 : :
1218 : 3789952 : do
1219 : : {
1220 : 1894976 : df_simple_dataflow (DF_BACKWARD, NULL, NULL,
1221 : : ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
1222 : : all_blocks, df_get_postorder (DF_BACKWARD),
1223 : : df_get_n_blocks (DF_BACKWARD));
1224 : 1894976 : modify = !modify;
1225 : : }
1226 : : while (modify);
1227 : :
1228 : 947488 : reset_subreg_promoted_p ();
1229 : :
1230 : 947488 : ext_dce_finish ();
1231 : : }
1232 : :
1233 : :
1234 : : namespace {
1235 : :
1236 : : const pass_data pass_data_ext_dce =
1237 : : {
1238 : : RTL_PASS, /* type */
1239 : : "ext_dce", /* name */
1240 : : OPTGROUP_NONE, /* optinfo_flags */
1241 : : TV_EXT_DCE, /* tv_id */
1242 : : PROP_cfglayout, /* properties_required */
1243 : : 0, /* properties_provided */
1244 : : 0, /* properties_destroyed */
1245 : : 0, /* todo_flags_start */
1246 : : TODO_df_finish, /* todo_flags_finish */
1247 : : };
1248 : :
1249 : : class pass_ext_dce : public rtl_opt_pass
1250 : : {
1251 : : public:
1252 : 286682 : pass_ext_dce (gcc::context *ctxt)
1253 : 573364 : : rtl_opt_pass (pass_data_ext_dce, ctxt)
1254 : : {}
1255 : :
1256 : : /* opt_pass methods: */
1257 : 1455336 : virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
1258 : 947488 : virtual unsigned int execute (function *)
1259 : : {
1260 : 947488 : ext_dce_execute ();
1261 : 947488 : return 0;
1262 : : }
1263 : :
1264 : : }; // class pass_combine
1265 : :
1266 : : } // anon namespace
1267 : :
1268 : : rtl_opt_pass *
1269 : 286682 : make_pass_ext_dce (gcc::context *ctxt)
1270 : : {
1271 : 286682 : return new pass_ext_dce (ctxt);
1272 : : }
|