Line data Source code
1 : /* RTL-based forward propagation pass for GNU compiler.
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 : Contributed by Paolo Bonzini and Steven Bosscher.
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #define INCLUDE_ALGORITHM
22 : #define INCLUDE_FUNCTIONAL
23 : #define INCLUDE_ARRAY
24 : #include "config.h"
25 : #include "system.h"
26 : #include "coretypes.h"
27 : #include "backend.h"
28 : #include "rtl.h"
29 : #include "rtlanal.h"
30 : #include "df.h"
31 : #include "rtl-ssa.h"
32 :
33 : #include "predict.h"
34 : #include "cfgrtl.h"
35 : #include "cfgcleanup.h"
36 : #include "cfgloop.h"
37 : #include "tree-pass.h"
38 : #include "rtl-iter.h"
39 : #include "target.h"
40 :
41 : /* This pass does simple forward propagation and simplification when an
42 : operand of an insn can only come from a single def. This pass uses
43 : RTL SSA, so it is global. However, we only do limited analysis of
44 : available expressions.
45 :
46 : 1) The pass tries to propagate the source of the def into the use,
47 : and checks if the result is independent of the substituted value.
48 : For example, the high word of a (zero_extend:DI (reg:SI M)) is always
49 : zero, independent of the source register.
50 :
51 : In particular, we propagate constants into the use site. Sometimes
52 : RTL expansion did not put the constant in the same insn on purpose,
53 : to satisfy a predicate, and the result will fail to be recognized;
54 : but this happens rarely and in this case we can still create a
55 : REG_EQUAL note. For multi-word operations, this
56 :
57 : (set (subreg:SI (reg:DI 120) 0) (const_int 0))
58 : (set (subreg:SI (reg:DI 120) 4) (const_int -1))
59 : (set (subreg:SI (reg:DI 122) 0)
60 : (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
61 : (set (subreg:SI (reg:DI 122) 4)
62 : (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
63 :
64 : can be simplified to the much simpler
65 :
66 : (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
67 : (set (subreg:SI (reg:DI 122) 4) (const_int -1))
68 :
69 : This particular propagation is also effective at putting together
70 : complex addressing modes. We are more aggressive inside MEMs, in
71 : that all definitions are propagated if the use is in a MEM; if the
72 : result is a valid memory address we check address_cost to decide
73 : whether the substitution is worthwhile.
74 :
75 : 2) The pass propagates register copies. This is not as effective as
76 : the copy propagation done by CSE's canon_reg, which works by walking
77 : the instruction chain, it can help the other transformations.
78 :
79 : We should consider removing this optimization, and instead reorder the
80 : RTL passes, because GCSE does this transformation too. With some luck,
81 : the CSE pass at the end of rest_of_handle_gcse could also go away.
82 :
83 : 3) The pass looks for paradoxical subregs that are actually unnecessary.
84 : Things like this:
85 :
86 : (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
87 : (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
88 : (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
89 : (subreg:SI (reg:QI 121) 0)))
90 :
91 : are very common on machines that can only do word-sized operations.
92 : For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
93 : if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
94 : we can replace the paradoxical subreg with simply (reg:WIDE M). The
95 : above will simplify this to
96 :
97 : (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
98 : (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
99 : (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
100 :
101 : where the first two insns are now dead. */
102 :
103 : using namespace rtl_ssa;
104 :
105 : static int num_changes;
106 :
107 : /* Do not try to replace constant addresses or addresses of local and
108 : argument slots. These MEM expressions are made only once and inserted
109 : in many instructions, as well as being used to control symbol table
110 : output. It is not safe to clobber them.
111 :
112 : There are some uncommon cases where the address is already in a register
113 : for some reason, but we cannot take advantage of that because we have
114 : no easy way to unshare the MEM. In addition, looking up all stack
115 : addresses is costly. */
116 :
117 : static bool
118 2049623 : can_simplify_addr (rtx addr)
119 : {
120 2049623 : rtx reg;
121 :
122 2049623 : if (CONSTANT_ADDRESS_P (addr))
123 : return false;
124 :
125 2049623 : if (GET_CODE (addr) == PLUS)
126 1164639 : reg = XEXP (addr, 0);
127 : else
128 : reg = addr;
129 :
130 2049623 : return (!REG_P (reg)
131 2049623 : || (REGNO (reg) != FRAME_POINTER_REGNUM
132 : && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
133 : && REGNO (reg) != ARG_POINTER_REGNUM));
134 : }
135 :
136 : /* MEM is the result of an address simplification, and temporarily
137 : undoing changes OLD_NUM_CHANGES onwards restores the original address.
138 : Return whether it is good to use the new address instead of the
139 : old one. INSN is the containing instruction. */
140 :
141 : static bool
142 1903570 : should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
143 : {
144 1903570 : int gain;
145 :
146 : /* Prefer the new address if it is less expensive. */
147 1903570 : bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
148 1903570 : {
149 1903570 : undo_recog_changes undo (old_num_changes);
150 1903570 : gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
151 1903570 : MEM_ADDR_SPACE (mem), speed);
152 1903570 : }
153 1903570 : gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
154 1903570 : MEM_ADDR_SPACE (mem), speed);
155 :
156 : /* If the addresses have equivalent cost, prefer the new address
157 : if it has the highest `set_src_cost'. That has the potential of
158 : eliminating the most insns without additional costs, and it
159 : is the same that cse.cc used to do. */
160 1903570 : if (gain == 0)
161 : {
162 339536 : gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
163 339536 : undo_recog_changes undo (old_num_changes);
164 339536 : gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
165 339536 : }
166 :
167 1903570 : return (gain > 0);
168 : }
169 :
170 :
171 : namespace
172 : {
173 : class fwprop_propagation : public insn_propagation
174 : {
175 : public:
176 : static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
177 : static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
178 : static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
179 :
180 : fwprop_propagation (insn_info *, set_info *, rtx, rtx);
181 :
182 57752287 : bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
183 : bool folded_to_constants_p () const;
184 : bool likely_profitable_p () const;
185 :
186 : bool check_mem (int, rtx) final override;
187 : void note_simplification (int, uint16_t, rtx, rtx) final override;
188 : uint16_t classify_result (rtx, rtx);
189 :
190 : private:
191 : const bool single_use_p;
192 : const bool single_ebb_p;
193 : };
194 : }
195 :
196 : /* Prepare to replace FROM with TO in USE_INSN. */
197 :
198 33639784 : fwprop_propagation::fwprop_propagation (insn_info *use_insn,
199 33639784 : set_info *def, rtx from, rtx to)
200 : : insn_propagation (use_insn->rtl (), from, to),
201 33639784 : single_use_p (def->single_nondebug_use ()),
202 33639784 : single_ebb_p (use_insn->ebb () == def->ebb ())
203 : {
204 33639784 : should_check_mems = true;
205 33639784 : should_note_simplifications = true;
206 33639784 : }
207 :
208 : /* MEM is the result of an address simplification, and temporarily
209 : undoing changes OLD_NUM_CHANGES onwards restores the original address.
210 : Return true if the propagation should continue, false if it has failed. */
211 :
212 : bool
213 3408582 : fwprop_propagation::check_mem (int old_num_changes, rtx mem)
214 : {
215 6817164 : if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
216 3749098 : MEM_ADDR_SPACE (mem)))
217 : {
218 1358959 : failure_reason = "would create an invalid MEM";
219 1358959 : return false;
220 : }
221 :
222 6148869 : bool can_simplify = [&]()
223 : {
224 2049623 : undo_recog_changes undo (old_num_changes);
225 2049623 : return can_simplify_addr (XEXP (mem, 0));
226 4099246 : } ();
227 2049623 : if (!can_simplify)
228 : {
229 75 : failure_reason = "would replace a frame address";
230 75 : return false;
231 : }
232 :
233 : /* Copy propagations are always ok. Otherwise check the costs. */
234 2049548 : if (!(REG_P (from) && REG_P (to))
235 3953118 : && !should_replace_address (old_num_changes, mem, insn))
236 : {
237 534619 : failure_reason = "would increase the cost of a MEM";
238 534619 : return false;
239 : }
240 :
241 1514929 : result_flags |= CHANGED_MEM;
242 1514929 : return true;
243 : }
244 :
245 : /* OLDX has been simplified to NEWX. Describe the change in terms of
246 : result_flags. */
247 :
248 : uint16_t
249 3907944 : fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
250 : {
251 3907944 : if (CONSTANT_P (new_rtx))
252 : {
253 : /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
254 : and NEW_RTX is likely not a legitimate address. We want it to
255 : disappear if it is invalid.
256 :
257 : ??? Using the mode of the LO_SUM as the mode of the address
258 : seems odd, but it was what the pre-SSA code did. */
259 702837 : if (GET_CODE (old_rtx) == LO_SUM
260 702837 : && !memory_address_p (GET_MODE (old_rtx), new_rtx))
261 0 : return CONSTANT;
262 702837 : return CONSTANT | PROFITABLE;
263 : }
264 :
265 : /* Allow replacements that simplify operations on a vector or complex
266 : value to a component. The most prominent case is
267 : (subreg ([vec_]concat ...)). */
268 3205107 : if (REG_P (new_rtx)
269 120187 : && !HARD_REGISTER_P (new_rtx)
270 119101 : && (VECTOR_MODE_P (GET_MODE (from))
271 110304 : || COMPLEX_MODE_P (GET_MODE (from)))
272 3222701 : && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
273 : return PROFITABLE;
274 :
275 : /* Allow (subreg (mem)) -> (mem) simplifications with the following
276 : exceptions:
277 : 1) Propagating (mem)s into multiple uses is not profitable.
278 : 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
279 : runs less frequently.
280 : 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
281 : 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
282 : ones. */
283 3203531 : if (single_use_p
284 1599823 : && single_ebb_p
285 1599797 : && SUBREG_P (old_rtx)
286 135360 : && !paradoxical_subreg_p (old_rtx)
287 131788 : && MEM_P (new_rtx)
288 3246825 : && !MEM_VOLATILE_P (new_rtx))
289 43294 : return PROFITABLE;
290 :
291 : return 0;
292 : }
293 :
294 : /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
295 : is the number of unrelated changes that had been made before processing
296 : OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
297 : had at that point. */
298 :
299 : void
300 3907944 : fwprop_propagation::note_simplification (int old_num_changes,
301 : uint16_t old_result_flags,
302 : rtx old_rtx, rtx new_rtx)
303 : {
304 3907944 : result_flags &= ~(CONSTANT | PROFITABLE);
305 3907944 : uint16_t new_flags = classify_result (old_rtx, new_rtx);
306 3907944 : if (old_num_changes)
307 13600 : new_flags &= old_result_flags;
308 3907944 : result_flags |= new_flags;
309 3907944 : }
310 :
311 : /* Return true if all substitutions eventually folded to constants. */
312 :
313 : bool
314 14177545 : fwprop_propagation::folded_to_constants_p () const
315 : {
316 : /* If we're propagating a HIGH, require it to be folded with a
317 : partnering LO_SUM. For example, a REG_EQUAL note with a register
318 : replaced by an unfolded HIGH is not useful. */
319 14177545 : if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
320 : return true;
321 12562555 : return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
322 : }
323 :
324 :
325 : /* Return true if it is worth keeping the result of the propagation,
326 : false if it would increase the complexity of the pattern too much. */
327 :
328 : bool
329 30469001 : fwprop_propagation::likely_profitable_p () const
330 : {
331 30469001 : if (changed_mem_p ())
332 : return true;
333 :
334 28961398 : if (!(result_flags & UNSIMPLIFIED)
335 3493300 : && (result_flags & PROFITABLE))
336 : return true;
337 :
338 28715620 : if (REG_P (to))
339 : return true;
340 :
341 26787438 : if (GET_CODE (to) == SUBREG
342 1256746 : && REG_P (SUBREG_REG (to))
343 28043343 : && !paradoxical_subreg_p (to))
344 : return true;
345 :
346 25564931 : if (CONSTANT_P (to))
347 1777355 : return true;
348 :
349 : return false;
350 : }
351 :
352 : /* Check that X has a single def. */
353 :
354 : static bool
355 85259014 : reg_single_def_p (rtx x)
356 : {
357 85259014 : return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
358 : }
359 :
360 : /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
361 : USE_INSN. Return the number of substitutions on success, otherwise return
362 : -1 and leave USE_INSN unchanged.
363 :
364 : If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
365 : to fold to a constant, so that the note does not use any more registers
366 : than it did previously. If REQUIRE_CONSTANT is false, also allow the
367 : substitution if it's something we'd normally allow for the main
368 : instruction pattern. */
369 :
370 : static int
371 3521019 : try_fwprop_subst_note (insn_info *use_insn, set_info *def,
372 : rtx note, rtx dest, rtx src, bool require_constant)
373 : {
374 3521019 : rtx_insn *use_rtl = use_insn->rtl ();
375 3521019 : insn_info *def_insn = def->insn ();
376 :
377 3521019 : insn_change_watermark watermark;
378 3521019 : fwprop_propagation prop (use_insn, def, dest, src);
379 3521019 : if (!prop.apply_to_rvalue (&XEXP (note, 0)))
380 : {
381 19806 : if (dump_file && (dump_flags & TDF_DETAILS))
382 0 : fprintf (dump_file, "cannot propagate from insn %d into"
383 : " notes of insn %d: %s\n", def_insn->uid (),
384 : use_insn->uid (), prop.failure_reason);
385 19806 : return -1;
386 : }
387 :
388 3501213 : if (prop.num_replacements == 0)
389 : return 0;
390 :
391 540130 : if (require_constant)
392 : {
393 525957 : if (!prop.folded_to_constants_p ())
394 : {
395 494645 : if (dump_file && (dump_flags & TDF_DETAILS))
396 0 : fprintf (dump_file, "cannot propagate from insn %d into"
397 : " notes of insn %d: %s\n", def_insn->uid (),
398 : use_insn->uid (), "wouldn't fold to constants");
399 494645 : return -1;
400 : }
401 : }
402 : else
403 : {
404 14173 : if (!prop.folded_to_constants_p () && !prop.likely_profitable_p ())
405 : {
406 5 : if (dump_file && (dump_flags & TDF_DETAILS))
407 0 : fprintf (dump_file, "cannot propagate from insn %d into"
408 : " notes of insn %d: %s\n", def_insn->uid (),
409 : use_insn->uid (), "would increase complexity of node");
410 5 : return -1;
411 : }
412 : }
413 :
414 45480 : if (dump_file && (dump_flags & TDF_DETAILS))
415 : {
416 0 : fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
417 0 : INSN_UID (use_rtl));
418 0 : {
419 0 : undo_recog_changes undo (0);
420 0 : print_inline_rtx (dump_file, note, 2);
421 0 : }
422 0 : fprintf (dump_file, "\n with:\n ");
423 0 : print_inline_rtx (dump_file, note, 2);
424 0 : fprintf (dump_file, "\n");
425 : }
426 45480 : watermark.keep ();
427 45480 : return prop.num_replacements;
428 3521019 : }
429 :
430 : /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
431 : USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
432 : unchanged. */
433 :
434 : static bool
435 30118765 : try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
436 : set_info *def, rtx *loc, rtx dest, rtx src)
437 : {
438 30118765 : insn_info *use_insn = use_change.insn ();
439 30118765 : rtx_insn *use_rtl = use_insn->rtl ();
440 30118765 : insn_info *def_insn = def->insn ();
441 :
442 30118765 : insn_change_watermark watermark;
443 30118765 : fwprop_propagation prop (use_insn, def, dest, src);
444 30118765 : if (!prop.apply_to_pattern (loc))
445 : {
446 1886691 : if (dump_file && (dump_flags & TDF_DETAILS))
447 0 : fprintf (dump_file, "cannot propagate from insn %d into"
448 : " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
449 : prop.failure_reason);
450 1886691 : return false;
451 : }
452 :
453 28232074 : if (prop.num_replacements == 0)
454 : return false;
455 :
456 24251441 : if (!prop.likely_profitable_p ()
457 24251441 : && (prop.changed_mem_p ()
458 19172916 : || contains_mem_rtx_p (src)
459 12814873 : || use_insn->is_asm ()
460 12813917 : || use_insn->is_debug_insn ()))
461 : {
462 7383867 : if (dump_file && (dump_flags & TDF_DETAILS))
463 0 : fprintf (dump_file, "cannot propagate from insn %d into"
464 : " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
465 : "would increase complexity of pattern");
466 7383867 : return false;
467 : }
468 :
469 16867574 : if (dump_file && (dump_flags & TDF_DETAILS))
470 : {
471 0 : fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
472 : def_insn->uid (), use_insn->uid ());
473 0 : undo_recog_changes undo (0);
474 0 : print_rtl_single (dump_file, PATTERN (use_rtl));
475 0 : }
476 :
477 16867574 : bool ok = recog (attempt, use_change);
478 16867574 : if (ok
479 8110370 : && !prop.changed_mem_p ()
480 6609548 : && !use_insn->is_asm ()
481 23476650 : && !use_insn->is_debug_insn ())
482 : {
483 6204593 : bool strict_p = !prop.likely_profitable_p ();
484 6204593 : if (!change_is_worthwhile (use_change, strict_p))
485 : {
486 4880211 : if (dump_file)
487 63 : fprintf (dump_file, "change not profitable");
488 : ok = false;
489 : }
490 : }
491 :
492 11987363 : if (!ok)
493 : {
494 : /* The pattern didn't match, but if all uses of SRC folded to
495 : constants, we can add a REG_EQUAL note for the result, if there
496 : isn't one already. */
497 13637415 : if (!prop.folded_to_constants_p ())
498 : return false;
499 :
500 : /* Test this first to avoid creating an unnecessary copy of SRC. */
501 1584822 : if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
502 : return false;
503 :
504 788104 : rtx set = set_for_reg_notes (use_rtl);
505 788104 : if (!set || !REG_P (SET_DEST (set)))
506 : return false;
507 :
508 425153 : rtx value = copy_rtx (SET_SRC (set));
509 425153 : cancel_changes (0);
510 :
511 : /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
512 : because the bits in there can be anything and so might not
513 : match the REG_EQUAL note content. See PR70574. */
514 425153 : if (contains_paradoxical_subreg_p (SET_SRC (set)))
515 : return false;
516 :
517 425098 : if (dump_file && (dump_flags & TDF_DETAILS))
518 0 : fprintf (dump_file, " Setting REG_EQUAL note\n");
519 :
520 425098 : return set_unique_reg_note (use_rtl, REG_EQUAL, value);
521 : }
522 :
523 3230159 : rtx *note_ptr = ®_NOTES (use_rtl);
524 6166713 : while (rtx note = *note_ptr)
525 : {
526 2936554 : if ((REG_NOTE_KIND (note) == REG_EQUAL
527 2936554 : || REG_NOTE_KIND (note) == REG_EQUIV)
528 2936554 : && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0)
529 : {
530 9 : *note_ptr = XEXP (note, 1);
531 9 : free_EXPR_LIST_node (note);
532 : }
533 : else
534 2936545 : note_ptr = &XEXP (note, 1);
535 : }
536 :
537 3230159 : confirm_change_group ();
538 3230159 : crtl->ssa->change_insn (use_change);
539 3230159 : num_changes++;
540 3230159 : return true;
541 30118765 : }
542 :
543 : /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
544 : given that it was not possible to do this for USE_INSN's main pattern.
545 : Return true on success, otherwise leave USE_INSN unchanged. */
546 :
547 : static bool
548 26467761 : try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
549 : rtx dest, rtx src)
550 : {
551 26467761 : rtx_insn *use_rtl = use_insn->rtl ();
552 76147384 : for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
553 49710935 : if ((REG_NOTE_KIND (note) == REG_EQUAL
554 49710935 : || REG_NOTE_KIND (note) == REG_EQUIV)
555 49710935 : && try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0)
556 : {
557 31312 : confirm_change_group ();
558 31312 : return true;
559 : }
560 :
561 : return false;
562 : }
563 :
564 : /* Check whether we could validly substitute (set DEST SRC), which defines DEF,
565 : into USE. If so, first try performing the substitution in location LOC
566 : of USE->insn ()'s pattern. If that fails, try instead to substitute
567 : into the notes.
568 :
569 : Return true on success, otherwise leave USE_INSN unchanged. */
570 :
571 : static bool
572 46497302 : try_fwprop_subst (use_info *use, set_info *def,
573 : rtx *loc, rtx dest, rtx src)
574 : {
575 46497302 : insn_info *use_insn = use->insn ();
576 46497302 : insn_info *def_insn = def->insn ();
577 :
578 46497302 : auto attempt = crtl->ssa->new_change_attempt ();
579 46497302 : use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
580 :
581 : /* ??? Not really a meaningful test: it means we can propagate arithmetic
582 : involving hard registers but not bare references to them. A better
583 : test would be to iterate over src_uses looking for hard registers
584 : that are not fixed. */
585 46497302 : if (REG_P (src) && HARD_REGISTER_P (src))
586 : return false;
587 :
588 : /* ??? It would be better to make this EBB-based instead. That would
589 : involve checking for equal EBBs rather than equal BBs and trying
590 : to make the uses available at use_insn->ebb ()->first_bb (). */
591 36514954 : if (def_insn->bb () != use_insn->bb ())
592 : {
593 8615779 : src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
594 : use_insn->bb (),
595 8615779 : use_insn->is_debug_insn ());
596 8615779 : if (!src_uses.is_valid ())
597 : return false;
598 : }
599 :
600 33388690 : insn_change use_change (use_insn);
601 33388690 : use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
602 : src_uses);
603 76203676 : if (!use_change.new_uses.is_valid ())
604 : return false;
605 :
606 : /* ??? We could allow movement within the EBB by adding:
607 :
608 : use_change.move_range = use_insn->ebb ()->insn_range (); */
609 31408647 : if (!restrict_movement (use_change))
610 : return false;
611 :
612 30118765 : return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
613 30118765 : || try_fwprop_subst_notes (use_insn, def, dest, src));
614 46497302 : }
615 :
616 : /* For the given single_set INSN, containing SRC known to be a
617 : ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
618 : is redundant due to the register being set by a LOAD_EXTEND_OP
619 : load from memory. */
620 :
621 : static bool
622 14401 : free_load_extend (rtx src, insn_info *insn)
623 : {
624 14401 : rtx reg = XEXP (src, 0);
625 14401 : if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
626 : return false;
627 :
628 0 : def_info *def = nullptr;
629 0 : for (use_info *use : insn->uses ())
630 0 : if (use->regno () == REGNO (reg))
631 : {
632 0 : def = use->def ();
633 0 : break;
634 : }
635 :
636 0 : if (!def)
637 : return false;
638 :
639 0 : insn_info *def_insn = def->insn ();
640 0 : if (def_insn->is_artificial ())
641 : return false;
642 :
643 0 : rtx_insn *def_rtl = def_insn->rtl ();
644 0 : if (NONJUMP_INSN_P (def_rtl))
645 : {
646 0 : rtx patt = PATTERN (def_rtl);
647 :
648 0 : if (GET_CODE (patt) == SET
649 0 : && GET_CODE (SET_SRC (patt)) == MEM
650 0 : && rtx_equal_p (SET_DEST (patt), reg))
651 : return true;
652 : }
653 : return false;
654 : }
655 :
656 : /* Subroutine of forward_propagate_subreg that handles a use of DEST
657 : in REF. The other parameters are the same. */
658 :
659 : static bool
660 73027 : forward_propagate_subreg (use_info *use, set_info *def,
661 : rtx dest, rtx src, df_ref ref)
662 : {
663 73027 : scalar_int_mode int_use_mode, src_mode;
664 :
665 : /* Only consider subregs... */
666 73027 : rtx use_reg = DF_REF_REG (ref);
667 73027 : machine_mode use_mode = GET_MODE (use_reg);
668 73027 : if (GET_CODE (use_reg) != SUBREG
669 72942 : || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
670 : return false;
671 :
672 : /* ??? Replacing throughout the pattern would help for match_dups. */
673 72942 : rtx *loc = DF_REF_LOC (ref);
674 72942 : if (paradoxical_subreg_p (use_reg))
675 : {
676 : /* If this is a paradoxical SUBREG, we have no idea what value the
677 : extra bits would have. However, if the operand is equivalent to
678 : a SUBREG whose operand is the same as our mode, and all the modes
679 : are within a word, we can just use the inner operand because
680 : these SUBREGs just say how to treat the register. */
681 1834 : if (GET_CODE (src) == SUBREG
682 1784 : && REG_P (SUBREG_REG (src))
683 1744 : && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
684 1744 : && GET_MODE (SUBREG_REG (src)) == use_mode
685 1933 : && subreg_lowpart_p (src))
686 99 : return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src));
687 : }
688 :
689 : /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
690 : is the low part of the reg being extended then just use the inner
691 : operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
692 : be removed due to it matching a LOAD_EXTEND_OP load from memory,
693 : or due to the operation being a no-op when applied to registers.
694 : For example, if we have:
695 :
696 : A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
697 : B: (... (subreg:SI (reg:DI X)) ...)
698 :
699 : and mode_rep_extended says that Y is already sign-extended,
700 : the backend will typically allow A to be combined with the
701 : definition of Y or, failing that, allow A to be deleted after
702 : reload through register tying. Introducing more uses of Y
703 : prevents both optimisations. */
704 71108 : else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
705 60875 : && subreg_lowpart_p (use_reg))
706 : {
707 55918 : if ((GET_CODE (src) == ZERO_EXTEND
708 55918 : || GET_CODE (src) == SIGN_EXTEND)
709 36626 : && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
710 36616 : && REG_P (XEXP (src, 0))
711 17164 : && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
712 17164 : && GET_MODE (XEXP (src, 0)) == use_mode
713 14401 : && !free_load_extend (src, def->insn ())
714 70319 : && (targetm.mode_rep_extended (int_use_mode, src_mode)
715 14401 : != (int) GET_CODE (src)))
716 14401 : return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
717 : }
718 :
719 : return false;
720 : }
721 :
722 : /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
723 : the result, handling cases where DEST is used in a subreg and where
724 : applying that subreg to SRC results in a useful simplification. */
725 :
726 : static bool
727 53700507 : forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
728 : {
729 53700507 : if (!use->includes_subregs () || !REG_P (dest))
730 : return false;
731 :
732 1504145 : if (GET_CODE (src) != SUBREG
733 1473541 : && GET_CODE (src) != ZERO_EXTEND
734 1437262 : && GET_CODE (src) != SIGN_EXTEND)
735 : return false;
736 :
737 70700 : rtx_insn *use_rtl = use->insn ()->rtl ();
738 70700 : df_ref ref;
739 :
740 149438 : FOR_EACH_INSN_USE (ref, use_rtl)
741 91798 : if (DF_REF_REGNO (ref) == use->regno ()
742 91798 : && forward_propagate_subreg (use, def, dest, src, ref))
743 : return true;
744 :
745 62079 : FOR_EACH_INSN_EQ_USE (ref, use_rtl)
746 4441 : if (DF_REF_REGNO (ref) == use->regno ()
747 4441 : && forward_propagate_subreg (use, def, dest, src, ref))
748 : return true;
749 :
750 : return false;
751 : }
752 :
753 : /* Try to substitute (set DEST SRC), which defines DEF, into USE and
754 : simplify the result. */
755 :
756 : static bool
757 57369761 : forward_propagate_and_simplify (use_info *use, set_info *def,
758 : rtx dest, rtx src)
759 : {
760 57369761 : insn_info *use_insn = use->insn ();
761 57369761 : rtx_insn *use_rtl = use_insn->rtl ();
762 57369761 : insn_info *def_insn = def->insn ();
763 :
764 : /* ??? This check seems unnecessary. We should be able to propagate
765 : into any kind of instruction, regardless of whether it's a single set.
766 : It seems odd to be more permissive with asms than normal instructions. */
767 57369761 : bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
768 57369761 : rtx use_set = single_set (use_rtl);
769 57369761 : if (need_single_set && !use_set)
770 : return false;
771 :
772 : /* Do not propagate into PC etc.
773 :
774 : ??? This too seems unnecessary. The current code should work correctly
775 : without it, including cases where jumps become unconditional. */
776 52678853 : if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
777 : return false;
778 :
779 : /* In __asm don't replace if src might need more registers than
780 : reg, as that could increase register pressure on the __asm. */
781 47763667 : if (use_insn->is_asm () && def_insn->uses ().size () > 1)
782 : return false;
783 :
784 : /* Check if the def is loading something from the constant pool; in this
785 : case we would undo optimization such as compress_float_constant.
786 : Still, we can set a REG_EQUAL note. */
787 47761912 : if (MEM_P (src) && MEM_READONLY_P (src))
788 : {
789 1279110 : rtx x = avoid_constant_pool_reference (src);
790 1279110 : rtx note_set;
791 1279110 : if (x != src
792 894293 : && (note_set = set_for_reg_notes (use_rtl))
793 595816 : && REG_P (SET_DEST (note_set))
794 1874804 : && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
795 : {
796 561885 : rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
797 561885 : rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
798 561885 : rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
799 561885 : if (old_rtx != new_rtx)
800 4 : set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
801 : }
802 1279110 : return false;
803 : }
804 :
805 : /* ??? Unconditionally propagating into PATTERN would work better
806 : for instructions that have match_dups. */
807 46482802 : rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
808 46482802 : return try_fwprop_subst (use, def, loc, dest, src);
809 : }
810 :
811 : /* Given a use USE of an insn, if it has a single reaching
812 : definition, try to forward propagate it into that insn.
813 : Return true if something changed.
814 :
815 : REG_PROP_ONLY is true if we should only propagate register copies. */
816 :
817 : static bool
818 161328749 : forward_propagate_into (use_info *use, bool reg_prop_only = false)
819 : {
820 161328749 : if (use->includes_read_writes ())
821 : return false;
822 :
823 : /* Disregard uninitialized uses. */
824 154281471 : set_info *def = use->def ();
825 154281471 : if (!def)
826 : return false;
827 :
828 : /* Only consider single-register definitions. This could be relaxed,
829 : but it should rarely be needed before RA. */
830 154263868 : def = look_through_degenerate_phi (def);
831 154263868 : if (def->includes_multiregs ())
832 : return false;
833 :
834 : /* Only consider uses whose definition comes from a real instruction. */
835 153712440 : insn_info *def_insn = def->insn ();
836 153712440 : if (def_insn->is_artificial ())
837 : return false;
838 :
839 : /* Do not propagate asms. The only kind of propagation that would
840 : succeed is propagation into a register move. Such a propagation
841 : is neutral if the destination of the move is a pseudo and unnecessarily
842 : restricts the register allocator if the destination of the move is
843 : a hard register.
844 :
845 : Furthermore, unlike for a normal instruction, we cannot take a SET from an
846 : asm and try dropping the CLOBBERs. The recog process does not (and should
847 : not try to) second-guess whether what the user wrote can be changed and
848 : so it has to assume that any asm given to it is a fair reflection of
849 : what the user wrote. */
850 113924095 : if (def_insn->is_asm ())
851 : return false;
852 :
853 113809153 : rtx_insn *def_rtl = def_insn->rtl ();
854 113809153 : if (!NONJUMP_INSN_P (def_rtl))
855 : return false;
856 : /* ??? This seems an unnecessary restriction. We can easily tell
857 : which set the definition comes from. */
858 111075473 : if (multiple_sets (def_rtl))
859 : return false;
860 110431346 : rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
861 110431346 : if (!def_set)
862 : return false;
863 :
864 108987566 : rtx dest = SET_DEST (def_set);
865 108987566 : rtx src = SET_SRC (def_set);
866 108987566 : if (volatile_refs_p (src))
867 : return false;
868 :
869 : /* Allow propagations into a loop only for reg-to-reg copies, since
870 : replacing one register by another shouldn't increase the cost.
871 : Propagations from inner loop to outer loop should also be ok. */
872 108044502 : struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
873 216089004 : struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
874 108044502 : if ((reg_prop_only
875 58233605 : || (def_loop != use_loop
876 2372340 : && !flow_loop_nested_p (use_loop, def_loop)))
877 110063843 : && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
878 : return false;
879 :
880 : /* Don't substitute into a non-local goto, this confuses CFG. */
881 57370444 : insn_info *use_insn = use->insn ();
882 57370444 : rtx_insn *use_rtl = use_insn->rtl ();
883 57370444 : if (JUMP_P (use_rtl)
884 57370444 : && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
885 : return false;
886 :
887 57369761 : if (forward_propagate_and_simplify (use, def, dest, src)
888 57369761 : || forward_propagate_subreg (use, def, dest, src))
889 3682316 : return true;
890 :
891 : return false;
892 : }
893 :
894 : static void
895 2087216 : fwprop_init (void)
896 : {
897 2087216 : num_changes = 0;
898 2087216 : calculate_dominance_info (CDI_DOMINATORS);
899 :
900 : /* We do not always want to propagate into loops, so we have to find
901 : loops and be careful about them. Avoid CFG modifications so that
902 : we don't have to update dominance information afterwards for
903 : build_single_def_use_links. */
904 2087216 : loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
905 :
906 2087216 : df_analyze ();
907 2087216 : crtl->ssa = new rtl_ssa::function_info (cfun);
908 2087216 : }
909 :
910 : static void
911 2087216 : fwprop_done (void)
912 : {
913 2087216 : loop_optimizer_finalize ();
914 :
915 2087216 : crtl->ssa->perform_pending_updates ();
916 2087216 : free_dominance_info (CDI_DOMINATORS);
917 2087216 : cleanup_cfg (0);
918 :
919 2087216 : delete crtl->ssa;
920 2087216 : crtl->ssa = nullptr;
921 :
922 2087216 : delete_trivially_dead_insns (get_insns (), max_reg_num ());
923 :
924 2087216 : if (dump_file)
925 63 : fprintf (dump_file,
926 : "\nNumber of successful forward propagations: %d\n\n",
927 : num_changes);
928 2087216 : }
929 :
930 : /* Try to optimize INSN, returning true if something changes.
931 : FWPROP_ADDR_P is true if we are running fwprop_addr rather than
932 : the full fwprop. */
933 :
934 : static bool
935 223264504 : fwprop_insn (insn_info *insn, bool fwprop_addr_p)
936 : {
937 415908406 : for (use_info *use : insn->uses ())
938 : {
939 196326218 : if (use->is_mem ())
940 30427464 : continue;
941 : /* ??? The choices here follow those in the pre-SSA code. */
942 165898754 : if (!use->includes_address_uses ())
943 : {
944 129886891 : if (forward_propagate_into (use, fwprop_addr_p))
945 3682316 : return true;
946 : }
947 : else
948 : {
949 36011863 : struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
950 : /* The outermost loop is not really a loop. */
951 36011863 : if (loop == NULL || loop_outer (loop) == NULL)
952 : {
953 26622635 : if (forward_propagate_into (use, fwprop_addr_p))
954 : return true;
955 : }
956 9389228 : else if (fwprop_addr_p)
957 : {
958 4819223 : if (forward_propagate_into (use, false))
959 : return true;
960 : }
961 : }
962 : }
963 219582188 : return false;
964 : }
965 :
966 : /* Main entry point. */
967 :
968 : static bool
969 2942740 : gate_fwprop (void)
970 : {
971 2087372 : return optimize > 0 && flag_forward_propagate;
972 : }
973 :
974 : static unsigned int
975 2087216 : fwprop (bool fwprop_addr_p)
976 : {
977 2087216 : fwprop_init ();
978 :
979 : /* Go through all the instructions (including debug instructions) looking
980 : for uses that we could propagate into.
981 :
982 : Do not forward propagate addresses into loops until after unrolling.
983 : CSE did so because it was able to fix its own mess, but we are not. */
984 :
985 2087216 : insn_info *next;
986 :
987 : /* ??? This code uses a worklist in order to preserve the behavior
988 : of the pre-SSA implementation. It would be better to instead
989 : iterate on each instruction until no more propagations are
990 : possible, then move on to the next. */
991 2087216 : auto_vec<insn_info *> worklist;
992 289925935 : for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
993 : {
994 287838719 : next = insn->next_any_insn ();
995 287838719 : if (insn->can_be_optimized () || insn->is_debug_insn ())
996 219582188 : if (fwprop_insn (insn, fwprop_addr_p))
997 3527347 : worklist.safe_push (insn);
998 : }
999 5769532 : for (unsigned int i = 0; i < worklist.length (); ++i)
1000 : {
1001 3682316 : insn_info *insn = worklist[i];
1002 3682316 : if (fwprop_insn (insn, fwprop_addr_p))
1003 154969 : worklist.safe_push (insn);
1004 : }
1005 :
1006 2087216 : fwprop_done ();
1007 2087216 : return 0;
1008 2087216 : }
1009 :
1010 : namespace {
1011 :
1012 : const pass_data pass_data_rtl_fwprop =
1013 : {
1014 : RTL_PASS, /* type */
1015 : "fwprop1", /* name */
1016 : OPTGROUP_NONE, /* optinfo_flags */
1017 : TV_FWPROP, /* tv_id */
1018 : 0, /* properties_required */
1019 : 0, /* properties_provided */
1020 : 0, /* properties_destroyed */
1021 : 0, /* todo_flags_start */
1022 : TODO_df_finish, /* todo_flags_finish */
1023 : };
1024 :
1025 : class pass_rtl_fwprop : public rtl_opt_pass
1026 : {
1027 : public:
1028 285722 : pass_rtl_fwprop (gcc::context *ctxt)
1029 571444 : : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1030 : {}
1031 :
1032 : /* opt_pass methods: */
1033 1471370 : bool gate (function *) final override { return gate_fwprop (); }
1034 1043608 : unsigned int execute (function *) final override { return fwprop (false); }
1035 :
1036 : }; // class pass_rtl_fwprop
1037 :
1038 : } // anon namespace
1039 :
1040 : rtl_opt_pass *
1041 285722 : make_pass_rtl_fwprop (gcc::context *ctxt)
1042 : {
1043 285722 : return new pass_rtl_fwprop (ctxt);
1044 : }
1045 :
1046 : namespace {
1047 :
1048 : const pass_data pass_data_rtl_fwprop_addr =
1049 : {
1050 : RTL_PASS, /* type */
1051 : "fwprop2", /* name */
1052 : OPTGROUP_NONE, /* optinfo_flags */
1053 : TV_FWPROP, /* tv_id */
1054 : 0, /* properties_required */
1055 : 0, /* properties_provided */
1056 : 0, /* properties_destroyed */
1057 : 0, /* todo_flags_start */
1058 : TODO_df_finish, /* todo_flags_finish */
1059 : };
1060 :
1061 : class pass_rtl_fwprop_addr : public rtl_opt_pass
1062 : {
1063 : public:
1064 285722 : pass_rtl_fwprop_addr (gcc::context *ctxt)
1065 571444 : : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1066 : {}
1067 :
1068 : /* opt_pass methods: */
1069 1471370 : bool gate (function *) final override { return gate_fwprop (); }
1070 1043608 : unsigned int execute (function *) final override { return fwprop (true); }
1071 :
1072 : }; // class pass_rtl_fwprop_addr
1073 :
1074 : } // anon namespace
1075 :
1076 : rtl_opt_pass *
1077 285722 : make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1078 : {
1079 285722 : return new pass_rtl_fwprop_addr (ctxt);
1080 : }
|