Branch data Line data Source code
1 : : /* RTL dead zero/sign extension (code) elimination.
2 : : Copyright (C) 2000-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "rtl.h"
25 : : #include "tree.h"
26 : : #include "memmodel.h"
27 : : #include "insn-config.h"
28 : : #include "emit-rtl.h"
29 : : #include "recog.h"
30 : : #include "cfganal.h"
31 : : #include "tree-pass.h"
32 : : #include "cfgrtl.h"
33 : : #include "rtl-iter.h"
34 : : #include "df.h"
35 : : #include "print-rtl.h"
36 : : #include "dbgcnt.h"
37 : : #include "diagnostic-core.h"
38 : :
39 : : /* These should probably move into a C++ class. */
40 : : static vec<bitmap_head> livein;
41 : : static bitmap all_blocks;
42 : : static bitmap livenow;
43 : : static bitmap changed_pseudos;
44 : : static bool modify;
45 : :
46 : : /* We consider four bit groups for liveness:
47 : : bit 0..7 (least significant byte)
48 : : bit 8..15 (second least significant byte)
49 : : bit 16..31
50 : : bit 32..BITS_PER_WORD-1 */
51 : :
52 : : /* For the given REG, return the number of bit groups implied by the
53 : : size of the REG's mode, up to a maximum of 4 (number of bit groups
54 : : tracked by this pass).
55 : :
56 : : For partial integer and variable sized modes also return 4. This
57 : : could possibly be refined for something like PSI mode, but it
58 : : does not seem worth the effort. */
59 : :
60 : : static int
61 : 222300692 : group_limit (const_rtx reg)
62 : : {
63 : 222300692 : machine_mode mode = GET_MODE (reg);
64 : :
65 : 222300692 : if (!GET_MODE_BITSIZE (mode).is_constant ())
66 : : return 4;
67 : :
68 : 222300692 : int size = GET_MODE_SIZE (mode).to_constant ();
69 : :
70 : 222300692 : size = exact_log2 (size);
71 : :
72 : 222206956 : if (size < 0)
73 : : return 4;
74 : :
75 : 222206956 : size++;
76 : 222206956 : return (size > 4 ? 4 : size);
77 : : }
78 : :
79 : : /* Make all bit groups live for REGNO in bitmap BMAP. For hard regs,
80 : : we assume all groups are live. For a pseudo we consider the size
81 : : of the pseudo to avoid creating unnecessarily live chunks of data. */
82 : :
83 : : static void
84 : 4597050 : make_reg_live (bitmap bmap, int regno)
85 : : {
86 : 4597050 : int limit;
87 : :
88 : : /* For pseudos we can use the mode to limit how many bit groups
89 : : are marked as live since a pseudo only has one mode. Hard
90 : : registers have to be handled more conservatively. */
91 : 4597050 : if (regno > FIRST_PSEUDO_REGISTER)
92 : : {
93 : 890840 : rtx reg = regno_reg_rtx[regno];
94 : 890840 : limit = group_limit (reg);
95 : : }
96 : : else
97 : : limit = 4;
98 : :
99 : 22660719 : for (int i = 0; i < limit; i++)
100 : 18063669 : bitmap_set_bit (bmap, regno * 4 + i);
101 : 4597050 : }
102 : :
103 : : /* Note this pass could be used to narrow memory loads too. It's
104 : : not clear if that's profitable or not in general. */
105 : :
106 : : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
107 : :
108 : : /* If we know the destination of CODE only uses some low bits
109 : : (say just the QI bits of an SI operation), then return true
110 : : if we can propagate the need for just the subset of bits
111 : : from the destination to the sources.
112 : :
113 : : FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
114 : : operand 0. Thus is likely would need some special casing to handle. */
115 : :
116 : : static bool
117 : 136438795 : safe_for_live_propagation (rtx_code code)
118 : : {
119 : : /* First handle rtx classes which as a whole are known to
120 : : be either safe or unsafe. */
121 : 136438795 : switch (GET_RTX_CLASS (code))
122 : : {
123 : : case RTX_OBJ:
124 : : case RTX_CONST_OBJ:
125 : : return true;
126 : :
127 : : case RTX_COMPARE:
128 : : case RTX_COMM_COMPARE:
129 : : case RTX_TERNARY:
130 : : return false;
131 : :
132 : 70472975 : default:
133 : 70472975 : break;
134 : : }
135 : :
136 : : /* What's left are specific codes. We only need to identify those
137 : : which are safe. */
138 : 70472975 : switch (code)
139 : : {
140 : : /* These are trivially safe. */
141 : : case SUBREG:
142 : : case NOT:
143 : : case ZERO_EXTEND:
144 : : case SIGN_EXTEND:
145 : : case TRUNCATE:
146 : : case PLUS:
147 : : case MINUS:
148 : : case MULT:
149 : : case SMUL_HIGHPART:
150 : : case UMUL_HIGHPART:
151 : : case AND:
152 : : case IOR:
153 : : case XOR:
154 : : return true;
155 : :
156 : : /* We can propagate for the shifted operand, but not the shift
157 : : count. The count is handled specially. */
158 : : case ASHIFT:
159 : : case LSHIFTRT:
160 : : case ASHIFTRT:
161 : : case SS_ASHIFT:
162 : : case US_ASHIFT:
163 : : return true;
164 : :
165 : : /* There may be other safe codes. If so they can be added
166 : : individually when discovered. */
167 : : default:
168 : : return false;
169 : : }
170 : : }
171 : :
172 : : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
173 : : set/clobbered by OBJ contained in INSN.
174 : :
175 : : Conceptually it is always safe to ignore a particular destination
176 : : here as that will result in more chunks of data being considered
177 : : live. That's what happens when we "continue" the main loop when
178 : : we see something we don't know how to handle such as a vector
179 : : mode destination.
180 : :
181 : : The more accurate we are in identifying what objects (and chunks
182 : : within an object) are set by INSN, the more aggressive the
183 : : optimization phase during use handling will be. */
184 : :
185 : : static bool
186 : 131438185 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
187 : : {
188 : 131438185 : bool skipped_dest = false;
189 : :
190 : 131438185 : subrtx_iterator::array_type array;
191 : 372142289 : FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
192 : : {
193 : 240704104 : const_rtx x = *iter;
194 : :
195 : : /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
196 : 240704104 : if (x == NULL_RTX)
197 : 9160369 : continue;
198 : :
199 : 231543735 : if (UNSPEC_P (x))
200 : 566551 : continue;
201 : :
202 : 230977184 : if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
203 : : {
204 : 136669064 : unsigned bit = 0;
205 : 136669064 : x = SET_DEST (x);
206 : :
207 : : /* We don't support vector destinations or destinations
208 : : wider than DImode. */
209 : 136669064 : scalar_mode outer_mode;
210 : 140638759 : if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
211 : 87653659 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
212 : : {
213 : : /* Skip the subrtxs of this destination. There is
214 : : little value in iterating into the subobjects, so
215 : : just skip them for a bit of efficiency. */
216 : 52985100 : skipped_dest = true;
217 : 52985100 : iter.skip_subrtxes ();
218 : 293689204 : continue;
219 : : }
220 : :
221 : : /* We could have (strict_low_part (subreg ...)). We can not just
222 : : strip the STRICT_LOW_PART as that would result in clearing
223 : : some bits in LIVENOW that are still live. So process the
224 : : STRICT_LOW_PART specially. */
225 : 83683964 : if (GET_CODE (x) == STRICT_LOW_PART)
226 : : {
227 : 0 : x = XEXP (x, 0);
228 : :
229 : : /* The only valid operand of a STRICT_LOW_PART is a non
230 : : paradoxical SUBREG. */
231 : 0 : gcc_assert (SUBREG_P (x)
232 : : && !paradoxical_subreg_p (x)
233 : : && SUBREG_BYTE (x).is_constant ());
234 : :
235 : : /* I think we should always see a REG here. But let's
236 : : be sure. */
237 : 0 : gcc_assert (REG_P (SUBREG_REG (x)));
238 : :
239 : : /* The inner mode might be larger, just punt for
240 : : that case. Remember, we can not just continue to process
241 : : the inner RTXs due to the STRICT_LOW_PART. */
242 : 0 : if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
243 : 0 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
244 : : {
245 : : /* Skip the subrtxs of the STRICT_LOW_PART. We can't
246 : : process them because it'll set objects as no longer
247 : : live when they are in fact still live. */
248 : 0 : skipped_dest = true;
249 : 0 : iter.skip_subrtxes ();
250 : 0 : continue;
251 : : }
252 : :
253 : : /* LIVE_TMP contains the set groups that are live-out and set in
254 : : this insn. It is used to narrow the groups live-in for the
255 : : inputs of this insn.
256 : :
257 : : The simple thing to do is mark all the groups as live, but
258 : : that will significantly inhibit optimization.
259 : :
260 : : We also need to be careful in the case where we have an in-out
261 : : operand. If we're not careful we'd clear LIVE_TMP
262 : : incorrectly. */
263 : 0 : HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
264 : 0 : int limit = group_limit (SUBREG_REG (x));
265 : 0 : for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
266 : 0 : if (bitmap_bit_p (livenow, i))
267 : 0 : bitmap_set_bit (live_tmp, i);
268 : :
269 : 0 : if (bitmap_empty_p (live_tmp))
270 : 0 : make_reg_live (live_tmp, rn);
271 : :
272 : : /* The mode of the SUBREG tells us how many bits we can
273 : : clear. */
274 : 0 : machine_mode mode = GET_MODE (x);
275 : 0 : HOST_WIDE_INT size
276 : 0 : = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
277 : 0 : bitmap_clear_range (livenow, 4 * rn, size);
278 : :
279 : : /* We have fully processed this destination. */
280 : 0 : iter.skip_subrtxes ();
281 : 0 : continue;
282 : 0 : }
283 : :
284 : : /* Phase one of destination handling. First remove any wrapper
285 : : such as SUBREG or ZERO_EXTRACT. */
286 : 83683964 : unsigned HOST_WIDE_INT mask
287 : 83683964 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
288 : 83683964 : if (SUBREG_P (x))
289 : : {
290 : : /* If we have a SUBREG destination that is too wide, just
291 : : skip the destination rather than continuing this iterator.
292 : : While continuing would be better, we'd need to strip the
293 : : subreg and restart within the SET processing rather than
294 : : the top of the loop which just complicates the flow even
295 : : more. */
296 : 673660 : if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
297 : 558408 : || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
298 : : {
299 : 115252 : skipped_dest = true;
300 : 115252 : iter.skip_subrtxes ();
301 : 115252 : continue;
302 : : }
303 : :
304 : : /* We can safely strip a paradoxical subreg. The inner mode will
305 : : be narrower than the outer mode. We'll clear fewer bits in
306 : : LIVENOW than we'd like, but that's always safe. */
307 : 443776 : if (paradoxical_subreg_p (x))
308 : : x = XEXP (x, 0);
309 : 436200 : else if (SUBREG_BYTE (x).is_constant ())
310 : : {
311 : 436200 : bit = subreg_lsb (x).to_constant ();
312 : 436200 : mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
313 : 436200 : gcc_assert (mask);
314 : : x = SUBREG_REG (x);
315 : : }
316 : : else
317 : : gcc_unreachable ();
318 : : }
319 : :
320 : 83568712 : if (GET_CODE (x) == ZERO_EXTRACT)
321 : : {
322 : : /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
323 : : modifies the bits referenced in the ZERO_EXTRACT, the rest
324 : : remain the same. Thus we can not continue here, we must
325 : : either figure out what part of the destination is modified
326 : : or skip the sub-rtxs. */
327 : 4296 : skipped_dest = true;
328 : 4296 : iter.skip_subrtxes ();
329 : 4296 : continue;
330 : : }
331 : :
332 : : /* BIT >= 64 indicates something went horribly wrong. */
333 : 83564416 : gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
334 : :
335 : : /* Now handle the actual object that was changed. */
336 : 83564416 : if (REG_P (x))
337 : : {
338 : : /* LIVE_TMP contains the set groups that are live-out and set in
339 : : this insn. It is used to narrow the groups live-in for the
340 : : inputs of this insn.
341 : :
342 : : The simple thing to do is mark all the groups as live, but
343 : : that will significantly inhibit optimization.
344 : :
345 : : We also need to be careful in the case where we have an in-out
346 : : operand. If we're not careful we'd clear LIVE_TMP
347 : : incorrectly. */
348 : 70377242 : HOST_WIDE_INT rn = REGNO (x);
349 : 70377242 : int limit = group_limit (x);
350 : 313871147 : for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
351 : 243493905 : if (bitmap_bit_p (livenow, i))
352 : 236063215 : bitmap_set_bit (live_tmp, i);
353 : :
354 : 70377242 : if (bitmap_empty_p (live_tmp))
355 : 1230392 : make_reg_live (live_tmp, rn);
356 : :
357 : : /* Now clear the bits known written by this instruction.
358 : : Note that BIT need not be a power of two, consider a
359 : : ZERO_EXTRACT destination. */
360 : 70377242 : int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
361 : 70377242 : int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
362 : 27621745 : : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
363 : 5581353 : : (mask & 0xff00) ? 2 : 1);
364 : 70377242 : bitmap_clear_range (livenow, 4 * rn + start, end - start);
365 : : }
366 : : /* Some ports generate (clobber (const_int)). */
367 : 13187174 : else if (CONST_INT_P (x))
368 : 0 : continue;
369 : : else
370 : 13187174 : gcc_assert (CALL_P (insn)
371 : : || MEM_P (x)
372 : : || x == pc_rtx
373 : : || GET_CODE (x) == SCRATCH);
374 : :
375 : 83564416 : iter.skip_subrtxes ();
376 : 83564416 : }
377 : 94308120 : else if (GET_CODE (x) == COND_EXEC)
378 : : {
379 : : /* This isn't ideal, but may not be so bad in practice. */
380 : 0 : skipped_dest = true;
381 : 0 : iter.skip_subrtxes ();
382 : : }
383 : : }
384 : 131438185 : return skipped_dest;
385 : 131438185 : }
386 : :
387 : : /* INSN has a sign/zero extended source inside SET that we will
388 : : try to turn into a SUBREG. */
389 : : static void
390 : 29027 : ext_dce_try_optimize_insn (rtx_insn *insn, rtx set)
391 : : {
392 : 29027 : rtx src = SET_SRC (set);
393 : 29027 : rtx inner = XEXP (src, 0);
394 : :
395 : : /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
396 : : not useful for this optimization. */
397 : 29027 : if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
398 : : return;
399 : :
400 : 26278 : rtx new_pattern;
401 : 26278 : if (dump_file)
402 : : {
403 : 0 : fprintf (dump_file, "Processing insn:\n");
404 : 0 : dump_insn_slim (dump_file, insn);
405 : 0 : fprintf (dump_file, "Trying to simplify pattern:\n");
406 : 0 : print_rtl_single (dump_file, SET_SRC (set));
407 : : }
408 : :
409 : : /* We decided to turn do the optimization but allow it to be rejected for
410 : : bisection purposes. */
411 : 26278 : if (!dbg_cnt (::ext_dce))
412 : : {
413 : 0 : if (dump_file)
414 : 0 : fprintf (dump_file, "Rejected due to debug counter.\n");
415 : 0 : return;
416 : : }
417 : :
418 : 52556 : new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
419 : 26278 : GET_MODE (inner), 0);
420 : : /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
421 : : We must not pass that as a replacement pattern to validate_change. */
422 : 26278 : if (new_pattern)
423 : : {
424 : 26278 : int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
425 : :
426 : 26278 : rtx x = SET_DEST (set);
427 : 26278 : while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
428 : 0 : x = XEXP (x, 0);
429 : :
430 : 26278 : gcc_assert (REG_P (x));
431 : 26278 : if (ok)
432 : 26278 : bitmap_set_bit (changed_pseudos, REGNO (x));
433 : :
434 : 26278 : if (dump_file)
435 : : {
436 : 0 : if (ok)
437 : 0 : fprintf (dump_file, "Successfully transformed to:\n");
438 : : else
439 : 0 : fprintf (dump_file, "Failed transformation to:\n");
440 : :
441 : 0 : print_rtl_single (dump_file, new_pattern);
442 : 0 : fprintf (dump_file, "\n");
443 : : }
444 : : }
445 : : else
446 : : {
447 : 0 : if (dump_file)
448 : 0 : fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
449 : : }
450 : : }
451 : :
452 : : /* Some operators imply that their second operand is fully live,
453 : : regardless of how many bits in the output are live. An example
454 : : would be the shift count on a target without SHIFT_COUNT_TRUNCATED
455 : : defined.
456 : :
457 : : Return TRUE if CODE is such an operator. FALSE otherwise. */
458 : :
459 : : static bool
460 : 73891917 : binop_implies_op2_fully_live (rtx_code code)
461 : : {
462 : 0 : switch (code)
463 : : {
464 : : case ASHIFT:
465 : : case LSHIFTRT:
466 : : case ASHIFTRT:
467 : : case ROTATE:
468 : : case ROTATERT:
469 : : case SS_ASHIFT:
470 : : case US_ASHIFT:
471 : : return !SHIFT_COUNT_TRUNCATED;
472 : :
473 : 0 : default:
474 : 0 : return false;
475 : : }
476 : : }
477 : :
478 : : /* X, with code CODE, is an operation for which safe_for_live_propagation
479 : : holds true, and bits set in MASK are live in the result. Compute a
480 : : mask of (potentially) live bits in the non-constant inputs. In case of
481 : : binop_implies_op2_fully_live (e.g. shifts), the computed mask may
482 : : exclusively pertain to the first operand.
483 : :
484 : : This looks wrong as we may have some important operations embedded as
485 : : operands of another operation. For example, we might have an extension
486 : : wrapping a shift. It really feels like this needs to be recursing down
487 : : into operands much more often. */
488 : :
489 : : unsigned HOST_WIDE_INT
490 : 68982436 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
491 : : {
492 : 70605059 : if (mask == 0)
493 : : return 0;
494 : :
495 : 70605035 : enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
496 : 70605035 : unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
497 : :
498 : : /* While we don't try to optimize operations on types larger
499 : : than 64 bits, we do want to make sure not to invoke undefined
500 : : behavior when presented with such operations during use
501 : : processing. The safe thing to do is to just return mmask
502 : : for that scenario indicating every possible chunk is life. */
503 : 70605035 : scalar_int_mode smode;
504 : 70605035 : if (!is_a <scalar_int_mode> (mode, &smode)
505 : 58488891 : || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
506 : : return mmask;
507 : :
508 : 56458993 : switch (code)
509 : : {
510 : 15760966 : case PLUS:
511 : 15760966 : case MINUS:
512 : 15760966 : case MULT:
513 : 15760966 : return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
514 : :
515 : : /* We propagate for the shifted operand, but not the shift
516 : : count. The count is handled specially. */
517 : 1278194 : case ASHIFT:
518 : 1278194 : if (CONST_INT_P (XEXP (x, 1))
519 : 2486266 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
520 : 1208036 : return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
521 : 70158 : return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
522 : :
523 : : /* We propagate for the shifted operand, but not the shift
524 : : count. The count is handled specially. */
525 : 636049 : case LSHIFTRT:
526 : 636049 : if (CONST_INT_P (XEXP (x, 1))
527 : 1240496 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
528 : 604419 : return mmask & (mask << INTVAL (XEXP (x, 1)));
529 : : return mmask;
530 : :
531 : : /* We propagate for the shifted operand, but not the shift
532 : : count. The count is handled specially. */
533 : 265766 : case ASHIFTRT:
534 : 265766 : if (CONST_INT_P (XEXP (x, 1))
535 : 519801 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
536 : : {
537 : 254027 : HOST_WIDE_INT sign = 0;
538 : 254027 : if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
539 : 254027 : > GET_MODE_BITSIZE (smode))
540 : 508054 : sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
541 : 254027 : return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
542 : : }
543 : : return mmask;
544 : :
545 : 45766 : case SMUL_HIGHPART:
546 : 45766 : case UMUL_HIGHPART:
547 : 45766 : if (XEXP (x, 1) == const0_rtx)
548 : : return 0;
549 : 45766 : if (XEXP (x, 1) == const1_rtx)
550 : : return mmask;
551 : 45766 : if (CONST_INT_P (XEXP (x, 1)))
552 : : {
553 : 0 : if (pow2p_hwi (INTVAL (XEXP (x, 1))))
554 : 0 : return mmask & (mask << (GET_MODE_BITSIZE (smode)
555 : 0 : - exact_log2 (INTVAL (XEXP (x, 1)))));
556 : :
557 : 0 : int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
558 : 0 : - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
559 : 0 : if (bits < GET_MODE_BITSIZE (smode))
560 : 0 : return (HOST_WIDE_INT_1U << bits) - 1;
561 : : }
562 : : return mmask;
563 : :
564 : 659509 : case SIGN_EXTEND:
565 : 659509 : if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
566 : 659509 : || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
567 : : return -1;
568 : :
569 : : /* We want the mode of the inner object. We need to ensure its
570 : : sign bit is on in MASK. */
571 : 659509 : mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
572 : 659509 : if (mask & ~GET_MODE_MASK (mode))
573 : 655051 : mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
574 : 655051 : - 1);
575 : :
576 : : /* Recurse into the operand. */
577 : 659509 : return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
578 : :
579 : 963114 : case ZERO_EXTEND:
580 : 963114 : if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
581 : 963114 : || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
582 : : return -1;
583 : :
584 : : /* Recurse into the operand. */
585 : 963114 : return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
586 : :
587 : : /* We propagate for the shifted operand, but not the shift
588 : : count. The count is handled specially. */
589 : 0 : case SS_ASHIFT:
590 : 0 : case US_ASHIFT:
591 : 0 : if (CONST_INT_P (XEXP (x, 1))
592 : 0 : && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
593 : : {
594 : 0 : return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
595 : 0 : >> (INTVAL (XEXP (x, 1))
596 : 0 : + (XEXP (x, 1) != const0_rtx
597 : 0 : && code == SS_ASHIFT))))
598 : 0 : | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
599 : : }
600 : : return mmask;
601 : :
602 : : default:
603 : : return mask;
604 : : }
605 : : }
606 : :
607 : : /* Process uses in INSN contained in OBJ. Set appropriate bits in LIVENOW
608 : : for any chunks of pseudos that become live, potentially filtering using
609 : : bits from LIVE_TMP.
610 : :
611 : : If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
612 : : the extended bits are never read and mark pseudos which had extensions
613 : : eliminated in CHANGED_PSEUDOS. */
614 : :
615 : : static void
616 : 131438185 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
617 : : bitmap live_tmp, bool skipped_dest)
618 : : {
619 : 131438185 : subrtx_var_iterator::array_type array_var;
620 : 711204400 : FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
621 : : {
622 : : /* An EXPR_LIST (from call fusage) ends in NULL_RTX. */
623 : 579766215 : rtx x = *iter;
624 : 579766215 : if (x == NULL_RTX)
625 : 9160369 : continue;
626 : :
627 : : /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
628 : : handle SETs explicitly, possibly propagating live information
629 : : into the uses.
630 : :
631 : : We may continue the loop at various points which will cause
632 : : iteration into the next level of RTL. Breaking from the loop
633 : : is never safe as it can lead us to fail to process some of the
634 : : RTL and thus not make objects live when necessary. */
635 : 570605846 : enum rtx_code xcode = GET_CODE (x);
636 : 570605846 : if (xcode == SET)
637 : : {
638 : 116013917 : const_rtx dst = SET_DEST (x);
639 : 116013917 : rtx src = SET_SRC (x);
640 : 116013917 : const_rtx y;
641 : 116013917 : unsigned HOST_WIDE_INT bit = 0;
642 : :
643 : : /* The code of the RHS of a SET. */
644 : 116013917 : enum rtx_code code = GET_CODE (src);
645 : :
646 : : /* ?!? How much of this should mirror SET handling, potentially
647 : : being shared? */
648 : 116013917 : if (SUBREG_P (dst) && SUBREG_BYTE (dst).is_constant ())
649 : : {
650 : 603141 : bit = subreg_lsb (dst).to_constant ();
651 : 603141 : if (bit >= HOST_BITS_PER_WIDE_INT)
652 : : bit = HOST_BITS_PER_WIDE_INT - 1;
653 : 603141 : dst = SUBREG_REG (dst);
654 : : }
655 : 115410776 : else if (GET_CODE (dst) == STRICT_LOW_PART)
656 : 11009 : dst = XEXP (dst, 0);
657 : :
658 : : /* Main processing of the uses. Two major goals here.
659 : :
660 : : First, we want to try and propagate liveness (or the lack
661 : : thereof) from the destination register to the source
662 : : register(s).
663 : :
664 : : Second, if the source is an extension, try to optimize
665 : : it into a SUBREG. The SUBREG form indicates we don't
666 : : care about the upper bits and will usually be copy
667 : : propagated away.
668 : :
669 : : If we fail to handle something in here, the expectation
670 : : is the iterator will dive into the sub-components and
671 : : mark all the chunks in any found REGs as live. */
672 : 116013917 : if (REG_P (dst) && safe_for_live_propagation (code))
673 : : {
674 : : /* Create a mask representing the bits of this output
675 : : operand that are live after this insn. We can use
676 : : this information to refine the live in state of
677 : : inputs to this insn in many cases.
678 : :
679 : : We have to do this on a per SET basis, we might have
680 : : an INSN with multiple SETS, some of which can narrow
681 : : the source operand liveness, some of which may not. */
682 : 68982436 : unsigned HOST_WIDE_INT dst_mask = 0;
683 : 68982436 : HOST_WIDE_INT rn = REGNO (dst);
684 : 68982436 : unsigned HOST_WIDE_INT mask_array[]
685 : : = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
686 : : -HOST_WIDE_INT_UC (0x100000000) };
687 : 344912180 : for (int i = 0; i < 4; i++)
688 : 275929744 : if (bitmap_bit_p (live_tmp, 4 * rn + i))
689 : 220909284 : dst_mask |= mask_array[i];
690 : 68982436 : dst_mask >>= bit;
691 : :
692 : : /* If we ignored a destination during set processing, then
693 : : consider all the bits live. */
694 : 68982436 : if (skipped_dest)
695 : 23949394 : dst_mask = -1;
696 : :
697 : 68982436 : dst_mask = carry_backpropagate (dst_mask, code, src);
698 : :
699 : : /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
700 : : of the source specially to improve optimization. */
701 : 68982436 : if (code == SIGN_EXTEND || code == ZERO_EXTEND)
702 : : {
703 : 1635106 : rtx inner = XEXP (src, 0);
704 : 1635106 : unsigned HOST_WIDE_INT src_mask
705 : 1635106 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
706 : :
707 : : /* DST_MASK could be zero if we had something in the SET
708 : : that we couldn't handle. */
709 : 1635106 : if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
710 : 29027 : ext_dce_try_optimize_insn (insn, x);
711 : :
712 : : /* Stripping the extension here just seems wrong on multiple
713 : : levels. It's source side handling, so it seems like it
714 : : belongs in the loop below. Stripping here also makes it
715 : : harder than necessary to properly handle live bit groups
716 : : for (ANY_EXTEND (SUBREG)) where the SUBREG has
717 : : SUBREG_PROMOTED state. */
718 : 1635106 : dst_mask &= src_mask;
719 : 1635106 : src = XEXP (src, 0);
720 : 1635106 : code = GET_CODE (src);
721 : : }
722 : :
723 : : /* Optimization is done at this point. We just want to make
724 : : sure everything that should get marked as live is marked
725 : : from here onward. */
726 : :
727 : : /* We will handle the other operand of a binary operator
728 : : at the bottom of the loop by resetting Y. */
729 : 68982436 : if (BINARY_P (src))
730 : 21629364 : y = XEXP (src, 0);
731 : : else
732 : : y = src;
733 : :
734 : : /* We're inside a SET and want to process the source operands
735 : : making things live. Breaking from this loop will cause
736 : : the iterator to work on sub-rtxs, so it is safe to break
737 : : if we see something we don't know how to handle.
738 : :
739 : : This code is just hokey as it really just handles trivial
740 : : unary and binary cases. Otherwise the loop exits and we
741 : : continue iterating on sub-rtxs, but outside the set context. */
742 : : unsigned HOST_WIDE_INT save_mask = dst_mask;
743 : 111086908 : for (;;)
744 : : {
745 : : /* In general we want to restore DST_MASK before each loop
746 : : iteration. The exception is when the opcode implies that
747 : : the other operand is fully live. That's handled by
748 : : changing SAVE_MASK below. */
749 : 90034672 : dst_mask = save_mask;
750 : : /* Strip an outer paradoxical subreg. The bits outside
751 : : the inner mode are don't cares. So we can just strip
752 : : and process the inner object. */
753 : 90034672 : if (paradoxical_subreg_p (y))
754 : 80254 : y = XEXP (y, 0);
755 : 89954418 : else if (SUBREG_P (y) && SUBREG_BYTE (y).is_constant ())
756 : : {
757 : : /* We really want to know the outer code here, ie do we
758 : : have (ANY_EXTEND (SUBREG ...)) as we need to know if
759 : : the extension matches the SUBREG_PROMOTED state. In
760 : : that case optimizers can turn the extension into a
761 : : simple copy. Which means that bits outside the
762 : : SUBREG's mode are actually live.
763 : :
764 : : We don't want to mark those bits live unnecessarily
765 : : as that inhibits extension elimination in important
766 : : cases such as those in Coremark. So we need that
767 : : outer code. */
768 : 1994821 : if (!REG_P (SUBREG_REG (y))
769 : 1994821 : || (SUBREG_PROMOTED_VAR_P (y)
770 : 12216 : && ((GET_CODE (SET_SRC (x)) == SIGN_EXTEND
771 : 1160 : && SUBREG_PROMOTED_SIGNED_P (y))
772 : 12216 : || (GET_CODE (SET_SRC (x)) == ZERO_EXTEND
773 : 0 : && SUBREG_PROMOTED_UNSIGNED_P (y)))))
774 : : break;
775 : :
776 : 1993875 : bit = subreg_lsb (y).to_constant ();
777 : :
778 : : /* If this is a wide object (more bits than we can fit
779 : : in a HOST_WIDE_INT), then just break from the SET
780 : : context. That will cause the iterator to walk down
781 : : into the subrtx and if we land on a REG we'll mark
782 : : the whole think live. */
783 : 1993875 : if (bit >= HOST_BITS_PER_WIDE_INT)
784 : : break;
785 : :
786 : : /* The SUBREG's mode determines the live width. */
787 : 1774139 : if (dst_mask)
788 : : {
789 : 1774139 : dst_mask <<= bit;
790 : 1774139 : if (!dst_mask)
791 : 0 : dst_mask = -HOST_WIDE_INT_UC (0x100000000);
792 : : }
793 : 1774139 : y = SUBREG_REG (y);
794 : : }
795 : :
796 : 89813990 : if (REG_P (y))
797 : : {
798 : : /* We have found the use of a register. We need to mark
799 : : the appropriate chunks of the register live. The mode
800 : : of the REG is a starting point. We may refine that
801 : : based on what chunks in the output were live. */
802 : 47685902 : rn = 4 * REGNO (y);
803 : 47685902 : unsigned HOST_WIDE_INT tmp_mask = dst_mask;
804 : :
805 : : /* If the RTX code for the SET_SRC is not one we can
806 : : propagate destination liveness through, then just
807 : : set the mask to the mode's mask. */
808 : 47685902 : if (!safe_for_live_propagation (code))
809 : 32657 : tmp_mask
810 : 65314 : = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
811 : :
812 : 47685902 : if (tmp_mask & 0xff)
813 : 47254909 : bitmap_set_bit (livenow, rn);
814 : 47685902 : if (tmp_mask & 0xff00)
815 : 45815638 : bitmap_set_bit (livenow, rn + 1);
816 : 47685902 : if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
817 : 45542559 : bitmap_set_bit (livenow, rn + 2);
818 : 47685902 : if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
819 : 39277057 : bitmap_set_bit (livenow, rn + 3);
820 : : }
821 : 42128088 : else if (!CONSTANT_P (y))
822 : : break;
823 : :
824 : : /* We might have (ashift (const_int 1) (reg...))
825 : : By setting dst_mask we can continue iterating on the
826 : : the next operand and it will be considered fully live.
827 : :
828 : : Note that since we restore DST_MASK from SAVE_MASK at the
829 : : top of the loop, we have to change SAVE_MASK to get the
830 : : semantics we want. */
831 : 73891917 : if (binop_implies_op2_fully_live (GET_CODE (src)))
832 : 2286783 : save_mask = -1;
833 : :
834 : : /* If this was anything but a binary operand, break the inner
835 : : loop. This is conservatively correct as it will cause the
836 : : iterator to look at the sub-rtxs outside the SET context. */
837 : 73891917 : if (!BINARY_P (src))
838 : : break;
839 : :
840 : : /* We processed the first operand of a binary operator. Now
841 : : handle the second. */
842 : 21052236 : y = XEXP (src, 1), src = pc_rtx;
843 : 21052236 : }
844 : :
845 : : /* These are leaf nodes, no need to iterate down into them. */
846 : 68982436 : if (REG_P (y) || CONSTANT_P (y))
847 : 52839681 : iter.skip_subrtxes ();
848 : : }
849 : : }
850 : : /* If we are reading the low part of a SUBREG, then we can
851 : : refine liveness of the input register, otherwise let the
852 : : iterator continue into SUBREG_REG. */
853 : 454591929 : else if (SUBREG_P (x)
854 : 1349071 : && REG_P (SUBREG_REG (x))
855 : 1347317 : && !paradoxical_subreg_p (x)
856 : 1322838 : && subreg_lowpart_p (x)
857 : 1034055 : && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
858 : 456660039 : && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
859 : : {
860 : 553622 : HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
861 : 553622 : HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
862 : :
863 : : /* If this is a promoted subreg, then more of it may be live than
864 : : is otherwise obvious. */
865 : 553622 : if (SUBREG_PROMOTED_VAR_P (x))
866 : 4150 : size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
867 : :
868 : 553622 : bitmap_set_bit (livenow, rn);
869 : 553622 : if (size > 8)
870 : 317140 : bitmap_set_bit (livenow, rn + 1);
871 : 317140 : if (size > 16)
872 : 259071 : bitmap_set_bit (livenow, rn + 2);
873 : 259071 : if (size >= 32)
874 : 259071 : bitmap_set_bit (livenow, rn + 3);
875 : 553622 : iter.skip_subrtxes ();
876 : : }
877 : : /* If we have a register reference that is not otherwise handled,
878 : : just assume all the chunks are live. */
879 : 454038307 : else if (REG_P (x))
880 : 151032610 : bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
881 : : }
882 : 131438185 : }
883 : :
884 : : /* Process a single basic block BB with current liveness information
885 : : in LIVENOW, returning updated liveness information.
886 : :
887 : : If MODIFY is true, then this is the last pass and unnecessary
888 : : extensions should be eliminated when possible. If an extension
889 : : is removed, the source pseudo is marked in CHANGED_PSEUDOS. */
890 : :
891 : : static void
892 : 21581896 : ext_dce_process_bb (basic_block bb)
893 : : {
894 : 21581896 : rtx_insn *insn;
895 : :
896 : 278778423 : FOR_BB_INSNS_REVERSE (bb, insn)
897 : : {
898 : 392115238 : if (!NONDEBUG_INSN_P (insn))
899 : 134918711 : continue;
900 : :
901 : : /* Live-out state of the destination of this insn. We can
902 : : use this to refine the live-in state of the sources of
903 : : this insn in many cases. */
904 : 122277816 : bitmap live_tmp = BITMAP_ALLOC (NULL);
905 : :
906 : : /* First process any sets/clobbers in INSN. */
907 : 122277816 : bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
908 : :
909 : : /* CALL_INSNs need processing their fusage data. */
910 : 122277816 : if (CALL_P (insn))
911 : 9160369 : skipped_dest |= ext_dce_process_sets (insn,
912 : : CALL_INSN_FUNCTION_USAGE (insn),
913 : : live_tmp);
914 : :
915 : : /* And now uses, optimizing away SIGN/ZERO extensions as we go. */
916 : 122277816 : ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
917 : :
918 : : /* A nonlocal goto implicitly uses the frame pointer. */
919 : 122277816 : if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
920 : : {
921 : 1130 : bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
922 : 1130 : if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
923 : 1130 : bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
924 : : }
925 : :
926 : : /* And process fusage data for the use as well. */
927 : 122277816 : if (CALL_P (insn))
928 : : {
929 : 9160369 : if (!FAKE_CALL_P (insn))
930 : 9160309 : bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
931 : :
932 : : /* If this is not a call to a const fucntion, then assume it
933 : : can read any global register. */
934 : 9160369 : if (!RTL_CONST_CALL_P (insn))
935 : 821518476 : for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
936 : 812684944 : if (global_regs[i])
937 : 229 : bitmap_set_range (livenow, i * 4, 4);
938 : :
939 : 9160369 : ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
940 : : }
941 : :
942 : 122277816 : BITMAP_FREE (live_tmp);
943 : : }
944 : 21581896 : }
945 : :
946 : : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
947 : : is usually helpful. However, in some cases setting the value when
948 : : it not strictly needed can cause this pass to miss optimizations.
949 : :
950 : : Specifically consider (set (mem) (subreg (reg))). If set in that
951 : : case it will cause more bit groups to be live for REG than would
952 : : be strictly necessary which in turn can inhibit extension removal.
953 : :
954 : : So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
955 : : is obviously not needed. */
956 : :
957 : : static void
958 : 933044 : maybe_clear_subreg_promoted_p (void)
959 : : {
960 : 109916711 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
961 : : {
962 : 108983667 : if (!NONDEBUG_INSN_P (insn))
963 : 56641094 : continue;
964 : :
965 : 52342573 : rtx set = single_set (insn);
966 : 52342573 : if (!set)
967 : 3564397 : continue;
968 : :
969 : : /* There may be other cases where we should clear, but for
970 : : now, this is the only known case where it causes problems. */
971 : 48778176 : if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
972 : 67236 : && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
973 : 59065 : SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
974 : : }
975 : 933044 : }
976 : :
977 : :
978 : : /* We optimize away sign/zero extensions in this pass and replace
979 : : them with SUBREGs indicating certain bits are don't cares.
980 : :
981 : : This changes the SUBREG_PROMOTED_VAR_P state of the object.
982 : : It is fairly painful to fix this on the fly, so we have
983 : : recorded which pseudos are affected and we look for SUBREGs
984 : : of those pseudos and fix them up. */
985 : :
986 : : static void
987 : 933044 : reset_subreg_promoted_p (void)
988 : : {
989 : : /* If we removed an extension, that changed the promoted state
990 : : of the destination of that extension. Thus we need to go
991 : : find any SUBREGs that reference that pseudo and adjust their
992 : : SUBREG_PROMOTED_P state. */
993 : 109916711 : for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
994 : : {
995 : 108983667 : if (!NONDEBUG_INSN_P (insn))
996 : 56641094 : continue;
997 : :
998 : 52342573 : rtx pat = PATTERN (insn);
999 : 52342573 : subrtx_var_iterator::array_type array;
1000 : 333145902 : FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
1001 : : {
1002 : 280803329 : rtx sub = *iter;
1003 : :
1004 : : /* We only care about SUBREGs. */
1005 : 280803329 : if (GET_CODE (sub) != SUBREG)
1006 : 279252422 : continue;
1007 : :
1008 : 1550907 : const_rtx x = SUBREG_REG (sub);
1009 : :
1010 : : /* We only care if the inner object is a REG. */
1011 : 1550907 : if (!REG_P (x))
1012 : 729 : continue;
1013 : :
1014 : : /* And only if the SUBREG is a promoted var. */
1015 : 1550178 : if (!SUBREG_PROMOTED_VAR_P (sub))
1016 : 1545061 : continue;
1017 : :
1018 : 5117 : if (bitmap_bit_p (changed_pseudos, REGNO (x)))
1019 : 0 : SUBREG_PROMOTED_VAR_P (sub) = 0;
1020 : : }
1021 : 52342573 : }
1022 : 933044 : }
1023 : :
1024 : : /* Initialization of the ext-dce pass. Primarily this means
1025 : : setting up the various bitmaps we utilize. */
1026 : :
1027 : : static void
1028 : 933044 : ext_dce_init (void)
1029 : : {
1030 : 933044 : livein.create (last_basic_block_for_fn (cfun));
1031 : 933044 : livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
1032 : 12040945 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1033 : 11107901 : bitmap_initialize (&livein[i], &bitmap_default_obstack);
1034 : :
1035 : 933044 : auto_bitmap refs (&bitmap_default_obstack);
1036 : 933044 : df_get_exit_block_use_set (refs);
1037 : :
1038 : 933044 : unsigned i;
1039 : 933044 : bitmap_iterator bi;
1040 : 4299702 : EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
1041 : 3366658 : make_reg_live (&livein[EXIT_BLOCK], i);
1042 : :
1043 : 933044 : livenow = BITMAP_ALLOC (NULL);
1044 : 933044 : all_blocks = BITMAP_ALLOC (NULL);
1045 : 933044 : changed_pseudos = BITMAP_ALLOC (NULL);
1046 : :
1047 : 12040945 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1048 : 11107901 : if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
1049 : 9241813 : bitmap_set_bit (all_blocks, i);
1050 : :
1051 : 933044 : modify = false;
1052 : 933044 : }
1053 : :
1054 : : /* Finalization of the ext-dce pass. Primarily this means
1055 : : releasing up the various bitmaps we utilize. */
1056 : :
1057 : : static void
1058 : 933044 : ext_dce_finish (void)
1059 : : {
1060 : 12040945 : for (unsigned i = 0; i < livein.length (); i++)
1061 : 11107901 : bitmap_clear (&livein[i]);
1062 : 933044 : livein.release ();
1063 : :
1064 : 933044 : BITMAP_FREE (livenow);
1065 : 933044 : BITMAP_FREE (changed_pseudos);
1066 : 933044 : BITMAP_FREE (all_blocks);
1067 : 933044 : }
1068 : :
1069 : : /* Process block number BB_INDEX as part of the backward
1070 : : simple dataflow analysis. Return TRUE if something in
1071 : : this block changed or FALSE otherwise. */
1072 : :
1073 : : static bool
1074 : 25314072 : ext_dce_rd_transfer_n (int bb_index)
1075 : : {
1076 : : /* The ENTRY/EXIT blocks never change. */
1077 : 25314072 : if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
1078 : : return false;
1079 : :
1080 : 21581896 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
1081 : :
1082 : : /* Make everything live that's live in the successors. */
1083 : 21581896 : bitmap_clear (livenow);
1084 : 21581896 : edge_iterator ei;
1085 : 21581896 : edge e;
1086 : :
1087 : 54272576 : FOR_EACH_EDGE (e, ei, bb->succs)
1088 : 32690680 : bitmap_ior_into (livenow, &livein[e->dest->index]);
1089 : :
1090 : 21581896 : ext_dce_process_bb (bb);
1091 : :
1092 : : /* We only allow widening the set of objects live at the start
1093 : : of a block. Otherwise we run the risk of not converging. */
1094 : 21581896 : return bitmap_ior_into (&livein[bb_index], livenow);
1095 : : }
1096 : :
1097 : : /* Dummy function for the df_simple_dataflow API. */
1098 : 31369817 : static bool ext_dce_rd_confluence_n (edge) { return true; }
1099 : :
1100 : : /* Use lifetime analyis to identify extensions that set bits that
1101 : : are never read. Turn such extensions into SUBREGs instead which
1102 : : can often be propagated away. */
1103 : :
1104 : : void
1105 : 933044 : ext_dce_execute (void)
1106 : : {
1107 : : /* Limit the amount of memory we use for livein, with 4 bits per
1108 : : reg per basic-block including overhead that maps to one byte
1109 : : per reg per basic-block. */
1110 : 933044 : uint64_t memory_request
1111 : 933044 : = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
1112 : 933044 : if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
1113 : : {
1114 : 0 : warning (OPT_Wdisabled_optimization,
1115 : : "ext-dce disabled: %d basic blocks and %d registers; "
1116 : : "increase %<--param max-gcse-memory%> above %wu",
1117 : 0 : n_basic_blocks_for_fn (cfun), max_reg_num (),
1118 : : memory_request / 1024);
1119 : 0 : return;
1120 : : }
1121 : :
1122 : : /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
1123 : : to this pass. Clear it for those cases. */
1124 : 933044 : maybe_clear_subreg_promoted_p ();
1125 : 933044 : df_analyze ();
1126 : 933044 : ext_dce_init ();
1127 : :
1128 : 3732176 : do
1129 : : {
1130 : 1866088 : df_simple_dataflow (DF_BACKWARD, NULL, NULL,
1131 : : ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
1132 : : all_blocks, df_get_postorder (DF_BACKWARD),
1133 : : df_get_n_blocks (DF_BACKWARD));
1134 : 1866088 : modify = !modify;
1135 : : }
1136 : : while (modify);
1137 : :
1138 : 933044 : reset_subreg_promoted_p ();
1139 : :
1140 : 933044 : ext_dce_finish ();
1141 : : }
1142 : :
1143 : :
1144 : : namespace {
1145 : :
1146 : : const pass_data pass_data_ext_dce =
1147 : : {
1148 : : RTL_PASS, /* type */
1149 : : "ext_dce", /* name */
1150 : : OPTGROUP_NONE, /* optinfo_flags */
1151 : : TV_EXT_DCE, /* tv_id */
1152 : : PROP_cfglayout, /* properties_required */
1153 : : 0, /* properties_provided */
1154 : : 0, /* properties_destroyed */
1155 : : 0, /* todo_flags_start */
1156 : : TODO_df_finish, /* todo_flags_finish */
1157 : : };
1158 : :
1159 : : class pass_ext_dce : public rtl_opt_pass
1160 : : {
1161 : : public:
1162 : 283157 : pass_ext_dce (gcc::context *ctxt)
1163 : 566314 : : rtl_opt_pass (pass_data_ext_dce, ctxt)
1164 : : {}
1165 : :
1166 : : /* opt_pass methods: */
1167 : 1435849 : virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
1168 : 933044 : virtual unsigned int execute (function *)
1169 : : {
1170 : 933044 : ext_dce_execute ();
1171 : 933044 : return 0;
1172 : : }
1173 : :
1174 : : }; // class pass_combine
1175 : :
1176 : : } // anon namespace
1177 : :
1178 : : rtl_opt_pass *
1179 : 283157 : make_pass_ext_dce (gcc::context *ctxt)
1180 : : {
1181 : 283157 : return new pass_ext_dce (ctxt);
1182 : : }
|