Branch data Line data Source code
1 : : /* RTL-based forward propagation pass for GNU compiler.
2 : : Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 : : Contributed by Paolo Bonzini and Steven Bosscher.
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : #define INCLUDE_ALGORITHM
22 : : #define INCLUDE_FUNCTIONAL
23 : : #define INCLUDE_ARRAY
24 : : #include "config.h"
25 : : #include "system.h"
26 : : #include "coretypes.h"
27 : : #include "backend.h"
28 : : #include "rtl.h"
29 : : #include "rtlanal.h"
30 : : #include "df.h"
31 : : #include "rtl-ssa.h"
32 : :
33 : : #include "predict.h"
34 : : #include "cfgrtl.h"
35 : : #include "cfgcleanup.h"
36 : : #include "cfgloop.h"
37 : : #include "tree-pass.h"
38 : : #include "rtl-iter.h"
39 : : #include "target.h"
40 : :
41 : : /* This pass does simple forward propagation and simplification when an
42 : : operand of an insn can only come from a single def. This pass uses
43 : : RTL SSA, so it is global. However, we only do limited analysis of
44 : : available expressions.
45 : :
46 : : 1) The pass tries to propagate the source of the def into the use,
47 : : and checks if the result is independent of the substituted value.
48 : : For example, the high word of a (zero_extend:DI (reg:SI M)) is always
49 : : zero, independent of the source register.
50 : :
51 : : In particular, we propagate constants into the use site. Sometimes
52 : : RTL expansion did not put the constant in the same insn on purpose,
53 : : to satisfy a predicate, and the result will fail to be recognized;
54 : : but this happens rarely and in this case we can still create a
55 : : REG_EQUAL note. For multi-word operations, this
56 : :
57 : : (set (subreg:SI (reg:DI 120) 0) (const_int 0))
58 : : (set (subreg:SI (reg:DI 120) 4) (const_int -1))
59 : : (set (subreg:SI (reg:DI 122) 0)
60 : : (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
61 : : (set (subreg:SI (reg:DI 122) 4)
62 : : (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
63 : :
64 : : can be simplified to the much simpler
65 : :
66 : : (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
67 : : (set (subreg:SI (reg:DI 122) 4) (const_int -1))
68 : :
69 : : This particular propagation is also effective at putting together
70 : : complex addressing modes. We are more aggressive inside MEMs, in
71 : : that all definitions are propagated if the use is in a MEM; if the
72 : : result is a valid memory address we check address_cost to decide
73 : : whether the substitution is worthwhile.
74 : :
75 : : 2) The pass propagates register copies. This is not as effective as
76 : : the copy propagation done by CSE's canon_reg, which works by walking
77 : : the instruction chain, it can help the other transformations.
78 : :
79 : : We should consider removing this optimization, and instead reorder the
80 : : RTL passes, because GCSE does this transformation too. With some luck,
81 : : the CSE pass at the end of rest_of_handle_gcse could also go away.
82 : :
83 : : 3) The pass looks for paradoxical subregs that are actually unnecessary.
84 : : Things like this:
85 : :
86 : : (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
87 : : (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
88 : : (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
89 : : (subreg:SI (reg:QI 121) 0)))
90 : :
91 : : are very common on machines that can only do word-sized operations.
92 : : For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
93 : : if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
94 : : we can replace the paradoxical subreg with simply (reg:WIDE M). The
95 : : above will simplify this to
96 : :
97 : : (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
98 : : (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
99 : : (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
100 : :
101 : : where the first two insns are now dead. */
102 : :
103 : : using namespace rtl_ssa;
104 : :
105 : : static int num_changes;
106 : :
107 : : /* Do not try to replace constant addresses or addresses of local and
108 : : argument slots. These MEM expressions are made only once and inserted
109 : : in many instructions, as well as being used to control symbol table
110 : : output. It is not safe to clobber them.
111 : :
112 : : There are some uncommon cases where the address is already in a register
113 : : for some reason, but we cannot take advantage of that because we have
114 : : no easy way to unshare the MEM. In addition, looking up all stack
115 : : addresses is costly. */
116 : :
117 : : static bool
118 : 1654811 : can_simplify_addr (rtx addr)
119 : : {
120 : 1654811 : rtx reg;
121 : :
122 : 1654811 : if (CONSTANT_ADDRESS_P (addr))
123 : : return false;
124 : :
125 : 1654811 : if (GET_CODE (addr) == PLUS)
126 : 886546 : reg = XEXP (addr, 0);
127 : : else
128 : : reg = addr;
129 : :
130 : 1654811 : return (!REG_P (reg)
131 : 1654811 : || (REGNO (reg) != FRAME_POINTER_REGNUM
132 : : && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
133 : : && REGNO (reg) != ARG_POINTER_REGNUM));
134 : : }
135 : :
136 : : /* MEM is the result of an address simplification, and temporarily
137 : : undoing changes OLD_NUM_CHANGES onwards restores the original address.
138 : : Return whether it is good to use the new address instead of the
139 : : old one. INSN is the containing instruction. */
140 : :
141 : : static bool
142 : 1551142 : should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
143 : : {
144 : 1551142 : int gain;
145 : :
146 : : /* Prefer the new address if it is less expensive. */
147 : 1551142 : bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
148 : 1551142 : {
149 : 1551142 : undo_recog_changes undo (old_num_changes);
150 : 1551142 : gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
151 : 1551142 : MEM_ADDR_SPACE (mem), speed);
152 : 1551142 : }
153 : 1551142 : gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
154 : 1551142 : MEM_ADDR_SPACE (mem), speed);
155 : :
156 : : /* If the addresses have equivalent cost, prefer the new address
157 : : if it has the highest `set_src_cost'. That has the potential of
158 : : eliminating the most insns without additional costs, and it
159 : : is the same that cse.cc used to do. */
160 : 1551142 : if (gain == 0)
161 : : {
162 : 258922 : gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
163 : 258922 : undo_recog_changes undo (old_num_changes);
164 : 258922 : gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
165 : 258922 : }
166 : :
167 : 1551142 : return (gain > 0);
168 : : }
169 : :
170 : :
171 : : namespace
172 : : {
173 : : class fwprop_propagation : public insn_propagation
174 : : {
175 : : public:
176 : : static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
177 : : static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
178 : : static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
179 : :
180 : : fwprop_propagation (insn_info *, set_info *, rtx, rtx);
181 : :
182 : 53456938 : bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
183 : : bool folded_to_constants_p () const;
184 : : bool likely_profitable_p () const;
185 : :
186 : : bool check_mem (int, rtx) final override;
187 : : void note_simplification (int, uint16_t, rtx, rtx) final override;
188 : : uint16_t classify_result (rtx, rtx);
189 : :
190 : : private:
191 : : const bool single_use_p;
192 : : const bool single_ebb_p;
193 : : };
194 : : }
195 : :
196 : : /* Prepare to replace FROM with TO in USE_INSN. */
197 : :
198 : 31186799 : fwprop_propagation::fwprop_propagation (insn_info *use_insn,
199 : 31186799 : set_info *def, rtx from, rtx to)
200 : : : insn_propagation (use_insn->rtl (), from, to),
201 : 31186799 : single_use_p (def->single_nondebug_use ()),
202 : 31186799 : single_ebb_p (use_insn->ebb () == def->ebb ())
203 : : {
204 : 31186799 : should_check_mems = true;
205 : 31186799 : should_note_simplifications = true;
206 : 31186799 : }
207 : :
208 : : /* MEM is the result of an address simplification, and temporarily
209 : : undoing changes OLD_NUM_CHANGES onwards restores the original address.
210 : : Return true if the propagation should continue, false if it has failed. */
211 : :
212 : : bool
213 : 2867346 : fwprop_propagation::check_mem (int old_num_changes, rtx mem)
214 : : {
215 : 5734692 : if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
216 : 2948233 : MEM_ADDR_SPACE (mem)))
217 : : {
218 : 1212535 : failure_reason = "would create an invalid MEM";
219 : 1212535 : return false;
220 : : }
221 : :
222 : 4964433 : bool can_simplify = [&]()
223 : : {
224 : 1654811 : undo_recog_changes undo (old_num_changes);
225 : 1654811 : return can_simplify_addr (XEXP (mem, 0));
226 : 3309622 : } ();
227 : 1654811 : if (!can_simplify)
228 : : {
229 : 63 : failure_reason = "would replace a frame address";
230 : 63 : return false;
231 : : }
232 : :
233 : : /* Copy propagations are always ok. Otherwise check the costs. */
234 : 1654748 : if (!(REG_P (from) && REG_P (to))
235 : 3205890 : && !should_replace_address (old_num_changes, mem, insn))
236 : : {
237 : 363628 : failure_reason = "would increase the cost of a MEM";
238 : 363628 : return false;
239 : : }
240 : :
241 : 1291120 : result_flags |= CHANGED_MEM;
242 : 1291120 : return true;
243 : : }
244 : :
245 : : /* OLDX has been simplified to NEWX. Describe the change in terms of
246 : : result_flags. */
247 : :
248 : : uint16_t
249 : 3464332 : fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
250 : : {
251 : 3464332 : if (CONSTANT_P (new_rtx))
252 : : {
253 : : /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
254 : : and NEW_RTX is likely not a legitimate address. We want it to
255 : : disappear if it is invalid.
256 : :
257 : : ??? Using the mode of the LO_SUM as the mode of the address
258 : : seems odd, but it was what the pre-SSA code did. */
259 : 633015 : if (GET_CODE (old_rtx) == LO_SUM
260 : 633015 : && !memory_address_p (GET_MODE (old_rtx), new_rtx))
261 : 0 : return CONSTANT;
262 : 633015 : return CONSTANT | PROFITABLE;
263 : : }
264 : :
265 : : /* Allow replacements that simplify operations on a vector or complex
266 : : value to a component. The most prominent case is
267 : : (subreg ([vec_]concat ...)). */
268 : 2831317 : if (REG_P (new_rtx)
269 : 114618 : && !HARD_REGISTER_P (new_rtx)
270 : 114388 : && (VECTOR_MODE_P (GET_MODE (from))
271 : 107063 : || COMPLEX_MODE_P (GET_MODE (from)))
272 : 2845967 : && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
273 : : return PROFITABLE;
274 : :
275 : : /* Allow (subreg (mem)) -> (mem) simplifications with the following
276 : : exceptions:
277 : : 1) Propagating (mem)s into multiple uses is not profitable.
278 : : 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
279 : : runs less frequently.
280 : : 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
281 : : 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
282 : : ones. */
283 : 2830259 : if (single_use_p
284 : 1502452 : && single_ebb_p
285 : 1502435 : && SUBREG_P (old_rtx)
286 : 134980 : && !paradoxical_subreg_p (old_rtx)
287 : 132201 : && MEM_P (new_rtx)
288 : 2875200 : && !MEM_VOLATILE_P (new_rtx))
289 : 44941 : return PROFITABLE;
290 : :
291 : : return 0;
292 : : }
293 : :
294 : : /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
295 : : is the number of unrelated changes that had been made before processing
296 : : OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
297 : : had at that point. */
298 : :
299 : : void
300 : 3464332 : fwprop_propagation::note_simplification (int old_num_changes,
301 : : uint16_t old_result_flags,
302 : : rtx old_rtx, rtx new_rtx)
303 : : {
304 : 3464332 : result_flags &= ~(CONSTANT | PROFITABLE);
305 : 3464332 : uint16_t new_flags = classify_result (old_rtx, new_rtx);
306 : 3464332 : if (old_num_changes)
307 : 13748 : new_flags &= old_result_flags;
308 : 3464332 : result_flags |= new_flags;
309 : 3464332 : }
310 : :
311 : : /* Return true if all substitutions eventually folded to constants. */
312 : :
313 : : bool
314 : 13174782 : fwprop_propagation::folded_to_constants_p () const
315 : : {
316 : : /* If we're propagating a HIGH, require it to be folded with a
317 : : partnering LO_SUM. For example, a REG_EQUAL note with a register
318 : : replaced by an unfolded HIGH is not useful. */
319 : 13174782 : if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
320 : : return true;
321 : 11697006 : return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
322 : : }
323 : :
324 : :
325 : : /* Return true if it is worth keeping the result of the propagation,
326 : : false if it would increase the complexity of the pattern too much. */
327 : :
328 : : bool
329 : 28226826 : fwprop_propagation::likely_profitable_p () const
330 : : {
331 : 28226826 : if (changed_mem_p ())
332 : : return true;
333 : :
334 : 26941483 : if (!(result_flags & UNSIMPLIFIED)
335 : 3245777 : && (result_flags & PROFITABLE))
336 : : return true;
337 : :
338 : 26695273 : if (REG_P (to))
339 : : return true;
340 : :
341 : 24974569 : if (GET_CODE (to) == SUBREG
342 : 1295420 : && REG_P (SUBREG_REG (to))
343 : 26269133 : && !paradoxical_subreg_p (to))
344 : : return true;
345 : :
346 : 23704979 : if (CONSTANT_P (to))
347 : 1609023 : return true;
348 : :
349 : : return false;
350 : : }
351 : :
352 : : /* Check that X has a single def. */
353 : :
354 : : static bool
355 : 77655022 : reg_single_def_p (rtx x)
356 : : {
357 : 77655022 : return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
358 : : }
359 : :
360 : : /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
361 : : USE_INSN. Return the number of substitutions on success, otherwise return
362 : : -1 and leave USE_INSN unchanged.
363 : :
364 : : If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
365 : : to fold to a constant, so that the note does not use any more registers
366 : : than it did previously. If REQUIRE_CONSTANT is false, also allow the
367 : : substitution if it's something we'd normally allow for the main
368 : : instruction pattern. */
369 : :
370 : : static int
371 : 3331964 : try_fwprop_subst_note (insn_info *use_insn, set_info *def,
372 : : rtx note, rtx dest, rtx src, bool require_constant)
373 : : {
374 : 3331964 : rtx_insn *use_rtl = use_insn->rtl ();
375 : 3331964 : insn_info *def_insn = def->insn ();
376 : :
377 : 3331964 : insn_change_watermark watermark;
378 : 3331964 : fwprop_propagation prop (use_insn, def, dest, src);
379 : 3331964 : if (!prop.apply_to_rvalue (&XEXP (note, 0)))
380 : : {
381 : 17640 : if (dump_file && (dump_flags & TDF_DETAILS))
382 : 0 : fprintf (dump_file, "cannot propagate from insn %d into"
383 : : " notes of insn %d: %s\n", def_insn->uid (),
384 : : use_insn->uid (), prop.failure_reason);
385 : 17640 : return -1;
386 : : }
387 : :
388 : 3314324 : if (prop.num_replacements == 0)
389 : : return 0;
390 : :
391 : 502799 : if (require_constant)
392 : : {
393 : 489960 : if (!prop.folded_to_constants_p ())
394 : : {
395 : 459031 : if (dump_file && (dump_flags & TDF_DETAILS))
396 : 0 : fprintf (dump_file, "cannot propagate from insn %d into"
397 : : " notes of insn %d: %s\n", def_insn->uid (),
398 : : use_insn->uid (), "wouldn't fold to constants");
399 : 459031 : return -1;
400 : : }
401 : : }
402 : : else
403 : : {
404 : 12839 : if (!prop.folded_to_constants_p () && !prop.likely_profitable_p ())
405 : : {
406 : 75 : if (dump_file && (dump_flags & TDF_DETAILS))
407 : 0 : fprintf (dump_file, "cannot propagate from insn %d into"
408 : : " notes of insn %d: %s\n", def_insn->uid (),
409 : : use_insn->uid (), "would increase complexity of node");
410 : 75 : return -1;
411 : : }
412 : : }
413 : :
414 : 43693 : if (dump_file && (dump_flags & TDF_DETAILS))
415 : : {
416 : 0 : fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
417 : 0 : INSN_UID (use_rtl));
418 : 0 : {
419 : 0 : undo_recog_changes undo (0);
420 : 0 : print_inline_rtx (dump_file, note, 2);
421 : 0 : }
422 : 0 : fprintf (dump_file, "\n with:\n ");
423 : 0 : print_inline_rtx (dump_file, note, 2);
424 : 0 : fprintf (dump_file, "\n");
425 : : }
426 : 43693 : watermark.keep ();
427 : 43693 : return prop.num_replacements;
428 : 3331964 : }
429 : :
430 : : /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
431 : : USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
432 : : unchanged. */
433 : :
434 : : static bool
435 : 27854835 : try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
436 : : set_info *def, rtx *loc, rtx dest, rtx src)
437 : : {
438 : 27854835 : insn_info *use_insn = use_change.insn ();
439 : 27854835 : rtx_insn *use_rtl = use_insn->rtl ();
440 : 27854835 : insn_info *def_insn = def->insn ();
441 : :
442 : 27854835 : insn_change_watermark watermark;
443 : 27854835 : fwprop_propagation prop (use_insn, def, dest, src);
444 : 27854835 : if (!prop.apply_to_pattern (loc))
445 : : {
446 : 1570482 : if (dump_file && (dump_flags & TDF_DETAILS))
447 : 0 : fprintf (dump_file, "cannot propagate from insn %d into"
448 : : " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
449 : : prop.failure_reason);
450 : 1570482 : return false;
451 : : }
452 : :
453 : 26284353 : if (prop.num_replacements == 0)
454 : : return false;
455 : :
456 : 22461135 : if (!prop.likely_profitable_p ()
457 : 22461135 : && (prop.changed_mem_p ()
458 : 17865069 : || contains_mem_rtx_p (src)
459 : 11819353 : || use_insn->is_asm ()
460 : 11818564 : || use_insn->is_debug_insn ()))
461 : : {
462 : 6856137 : if (dump_file && (dump_flags & TDF_DETAILS))
463 : 0 : fprintf (dump_file, "cannot propagate from insn %d into"
464 : : " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
465 : : "would increase complexity of pattern");
466 : 6856137 : return false;
467 : : }
468 : :
469 : 15604998 : if (dump_file && (dump_flags & TDF_DETAILS))
470 : : {
471 : 0 : fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
472 : : def_insn->uid (), use_insn->uid ());
473 : 0 : undo_recog_changes undo (0);
474 : 0 : print_rtl_single (dump_file, PATTERN (use_rtl));
475 : 0 : }
476 : :
477 : 15604998 : bool ok = recog (attempt, use_change);
478 : 15604998 : if (ok
479 : 7365043 : && !prop.changed_mem_p ()
480 : 6114785 : && !use_insn->is_asm ()
481 : 21719294 : && !use_insn->is_debug_insn ())
482 : : {
483 : 5753334 : bool strict_p = !prop.likely_profitable_p ();
484 : 5753334 : if (!change_is_worthwhile (use_change, strict_p))
485 : : {
486 : 4432028 : if (dump_file)
487 : 69 : fprintf (dump_file, "change not profitable");
488 : : ok = false;
489 : : }
490 : : }
491 : :
492 : 11172970 : if (!ok)
493 : : {
494 : : /* The pattern didn't match, but if all uses of SRC folded to
495 : : constants, we can add a REG_EQUAL note for the result, if there
496 : : isn't one already. */
497 : 12671983 : if (!prop.folded_to_constants_p ())
498 : : return false;
499 : :
500 : : /* Test this first to avoid creating an unnecessary copy of SRC. */
501 : 1448412 : if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
502 : : return false;
503 : :
504 : 686040 : rtx set = set_for_reg_notes (use_rtl);
505 : 686040 : if (!set || !REG_P (SET_DEST (set)))
506 : : return false;
507 : :
508 : 402703 : rtx value = copy_rtx (SET_SRC (set));
509 : 402703 : cancel_changes (0);
510 : :
511 : : /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
512 : : because the bits in there can be anything and so might not
513 : : match the REG_EQUAL note content. See PR70574. */
514 : 402703 : if (contains_paradoxical_subreg_p (SET_SRC (set)))
515 : : return false;
516 : :
517 : 402649 : if (dump_file && (dump_flags & TDF_DETAILS))
518 : 0 : fprintf (dump_file, " Setting REG_EQUAL note\n");
519 : :
520 : 402649 : return set_unique_reg_note (use_rtl, REG_EQUAL, value);
521 : : }
522 : :
523 : 2933015 : rtx *note_ptr = ®_NOTES (use_rtl);
524 : 5669951 : while (rtx note = *note_ptr)
525 : : {
526 : 2736936 : if ((REG_NOTE_KIND (note) == REG_EQUAL
527 : 2736936 : || REG_NOTE_KIND (note) == REG_EQUIV)
528 : 2736936 : && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0)
529 : : {
530 : 79 : *note_ptr = XEXP (note, 1);
531 : 79 : free_EXPR_LIST_node (note);
532 : : }
533 : : else
534 : 2736857 : note_ptr = &XEXP (note, 1);
535 : : }
536 : :
537 : 2933015 : confirm_change_group ();
538 : 2933015 : crtl->ssa->change_insn (use_change);
539 : 2933015 : num_changes++;
540 : 2933015 : return true;
541 : 27854835 : }
542 : :
543 : : /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
544 : : given that it was not possible to do this for USE_INSN's main pattern.
545 : : Return true on success, otherwise leave USE_INSN unchanged. */
546 : :
547 : : static bool
548 : 24523376 : try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
549 : : rtx dest, rtx src)
550 : : {
551 : 24523376 : rtx_insn *use_rtl = use_insn->rtl ();
552 : 71344379 : for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
553 : 46851932 : if ((REG_NOTE_KIND (note) == REG_EQUAL
554 : 46851932 : || REG_NOTE_KIND (note) == REG_EQUIV)
555 : 46851932 : && try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0)
556 : : {
557 : 30929 : confirm_change_group ();
558 : 30929 : return true;
559 : : }
560 : :
561 : : return false;
562 : : }
563 : :
564 : : /* Check whether we could validly substitute (set DEST SRC), which defines DEF,
565 : : into USE. If so, first try performing the substitution in location LOC
566 : : of USE->insn ()'s pattern. If that fails, try instead to substitute
567 : : into the notes.
568 : :
569 : : Return true on success, otherwise leave USE_INSN unchanged. */
570 : :
571 : : static bool
572 : 42517501 : try_fwprop_subst (use_info *use, set_info *def,
573 : : rtx *loc, rtx dest, rtx src)
574 : : {
575 : 42517501 : insn_info *use_insn = use->insn ();
576 : 42517501 : insn_info *def_insn = def->insn ();
577 : :
578 : 42517501 : auto attempt = crtl->ssa->new_change_attempt ();
579 : 42517501 : use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
580 : :
581 : : /* ??? Not really a meaningful test: it means we can propagate arithmetic
582 : : involving hard registers but not bare references to them. A better
583 : : test would be to iterate over src_uses looking for hard registers
584 : : that are not fixed. */
585 : 42517501 : if (REG_P (src) && HARD_REGISTER_P (src))
586 : : return false;
587 : :
588 : : /* ??? It would be better to make this EBB-based instead. That would
589 : : involve checking for equal EBBs rather than equal BBs and trying
590 : : to make the uses available at use_insn->ebb ()->first_bb (). */
591 : 33310133 : if (def_insn->bb () != use_insn->bb ())
592 : : {
593 : 7105161 : src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
594 : : use_insn->bb (),
595 : 7105161 : use_insn->is_debug_insn ());
596 : 7105161 : if (!src_uses.is_valid ())
597 : : return false;
598 : : }
599 : :
600 : 30774830 : insn_change use_change (use_insn);
601 : 30774830 : use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
602 : : src_uses);
603 : 69929943 : if (!use_change.new_uses.is_valid ())
604 : : return false;
605 : :
606 : : /* ??? We could allow movement within the EBB by adding:
607 : :
608 : : use_change.move_range = use_insn->ebb ()->insn_range (); */
609 : 29118080 : if (!restrict_movement (use_change))
610 : : return false;
611 : :
612 : 27854835 : return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
613 : 27854835 : || try_fwprop_subst_notes (use_insn, def, dest, src));
614 : 42517501 : }
615 : :
616 : : /* For the given single_set INSN, containing SRC known to be a
617 : : ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
618 : : is redundant due to the register being set by a LOAD_EXTEND_OP
619 : : load from memory. */
620 : :
621 : : static bool
622 : 16451 : free_load_extend (rtx src, insn_info *insn)
623 : : {
624 : 16451 : rtx reg = XEXP (src, 0);
625 : 16451 : if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
626 : : return false;
627 : :
628 : 0 : def_info *def = nullptr;
629 : 0 : for (use_info *use : insn->uses ())
630 : 0 : if (use->regno () == REGNO (reg))
631 : : {
632 : 0 : def = use->def ();
633 : 0 : break;
634 : : }
635 : :
636 : 0 : if (!def)
637 : : return false;
638 : :
639 : 0 : insn_info *def_insn = def->insn ();
640 : 0 : if (def_insn->is_artificial ())
641 : : return false;
642 : :
643 : 0 : rtx_insn *def_rtl = def_insn->rtl ();
644 : 0 : if (NONJUMP_INSN_P (def_rtl))
645 : : {
646 : 0 : rtx patt = PATTERN (def_rtl);
647 : :
648 : 0 : if (GET_CODE (patt) == SET
649 : 0 : && GET_CODE (SET_SRC (patt)) == MEM
650 : 0 : && rtx_equal_p (SET_DEST (patt), reg))
651 : : return true;
652 : : }
653 : : return false;
654 : : }
655 : :
656 : : /* Subroutine of forward_propagate_subreg that handles a use of DEST
657 : : in REF. The other parameters are the same. */
658 : :
659 : : static bool
660 : 103190 : forward_propagate_subreg (use_info *use, set_info *def,
661 : : rtx dest, rtx src, df_ref ref)
662 : : {
663 : 103190 : scalar_int_mode int_use_mode, src_mode;
664 : :
665 : : /* Only consider subregs... */
666 : 103190 : rtx use_reg = DF_REF_REG (ref);
667 : 103190 : machine_mode use_mode = GET_MODE (use_reg);
668 : 103190 : if (GET_CODE (use_reg) != SUBREG
669 : 103121 : || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
670 : : return false;
671 : :
672 : : /* ??? Replacing throughout the pattern would help for match_dups. */
673 : 103121 : rtx *loc = DF_REF_LOC (ref);
674 : 103121 : if (paradoxical_subreg_p (use_reg))
675 : : {
676 : : /* If this is a paradoxical SUBREG, we have no idea what value the
677 : : extra bits would have. However, if the operand is equivalent to
678 : : a SUBREG whose operand is the same as our mode, and all the modes
679 : : are within a word, we can just use the inner operand because
680 : : these SUBREGs just say how to treat the register. */
681 : 1708 : if (GET_CODE (src) == SUBREG
682 : 1667 : && REG_P (SUBREG_REG (src))
683 : 1627 : && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
684 : 1627 : && GET_MODE (SUBREG_REG (src)) == use_mode
685 : 1708 : && subreg_lowpart_p (src))
686 : 0 : return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src));
687 : : }
688 : :
689 : : /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
690 : : is the low part of the reg being extended then just use the inner
691 : : operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
692 : : be removed due to it matching a LOAD_EXTEND_OP load from memory,
693 : : or due to the operation being a no-op when applied to registers.
694 : : For example, if we have:
695 : :
696 : : A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
697 : : B: (... (subreg:SI (reg:DI X)) ...)
698 : :
699 : : and mode_rep_extended says that Y is already sign-extended,
700 : : the backend will typically allow A to be combined with the
701 : : definition of Y or, failing that, allow A to be deleted after
702 : : reload through register tying. Introducing more uses of Y
703 : : prevents both optimisations. */
704 : 101413 : else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
705 : 90683 : && subreg_lowpart_p (use_reg))
706 : : {
707 : 85888 : if ((GET_CODE (src) == ZERO_EXTEND
708 : 85888 : || GET_CODE (src) == SIGN_EXTEND)
709 : 66508 : && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
710 : 66498 : && REG_P (XEXP (src, 0))
711 : 21024 : && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
712 : 21024 : && GET_MODE (XEXP (src, 0)) == use_mode
713 : 16451 : && !free_load_extend (src, def->insn ())
714 : 102339 : && (targetm.mode_rep_extended (int_use_mode, src_mode)
715 : 16451 : != (int) GET_CODE (src)))
716 : 16451 : return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
717 : : }
718 : :
719 : : return false;
720 : : }
721 : :
722 : : /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
723 : : the result, handling cases where DEST is used in a subreg and where
724 : : applying that subreg to SRC results in a useful simplification. */
725 : :
726 : : static bool
727 : 49251951 : forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
728 : : {
729 : 49251951 : if (!use->includes_subregs () || !REG_P (dest))
730 : : return false;
731 : :
732 : 1639672 : if (GET_CODE (src) != SUBREG
733 : 1609241 : && GET_CODE (src) != ZERO_EXTEND
734 : 1546333 : && GET_CODE (src) != SIGN_EXTEND)
735 : : return false;
736 : :
737 : 99878 : rtx_insn *use_rtl = use->insn ()->rtl ();
738 : 99878 : df_ref ref;
739 : :
740 : 267609 : FOR_EACH_INSN_USE (ref, use_rtl)
741 : 182813 : if (DF_REF_REGNO (ref) == use->regno ()
742 : 182813 : && forward_propagate_subreg (use, def, dest, src, ref))
743 : : return true;
744 : :
745 : 89953 : FOR_EACH_INSN_EQ_USE (ref, use_rtl)
746 : 5159 : if (DF_REF_REGNO (ref) == use->regno ()
747 : 5159 : && forward_propagate_subreg (use, def, dest, src, ref))
748 : : return true;
749 : :
750 : : return false;
751 : : }
752 : :
753 : : /* Try to substitute (set DEST SRC), which defines DEF, into USE and
754 : : simplify the result. */
755 : :
756 : : static bool
757 : 52599255 : forward_propagate_and_simplify (use_info *use, set_info *def,
758 : : rtx dest, rtx src)
759 : : {
760 : 52599255 : insn_info *use_insn = use->insn ();
761 : 52599255 : rtx_insn *use_rtl = use_insn->rtl ();
762 : 52599255 : insn_info *def_insn = def->insn ();
763 : :
764 : : /* ??? This check seems unnecessary. We should be able to propagate
765 : : into any kind of instruction, regardless of whether it's a single set.
766 : : It seems odd to be more permissive with asms than normal instructions. */
767 : 52599255 : bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
768 : 52599255 : rtx use_set = single_set (use_rtl);
769 : 52599255 : if (need_single_set && !use_set)
770 : : return false;
771 : :
772 : : /* Do not propagate into PC etc.
773 : :
774 : : ??? This too seems unnecessary. The current code should work correctly
775 : : without it, including cases where jumps become unconditional. */
776 : 48194628 : if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
777 : : return false;
778 : :
779 : : /* In __asm don't replace if src might need more registers than
780 : : reg, as that could increase register pressure on the __asm. */
781 : 43751300 : if (use_insn->is_asm () && def_insn->uses ().size () > 1)
782 : 1766 : return false;
783 : :
784 : : /* Check if the def is loading something from the constant pool; in this
785 : : case we would undo optimization such as compress_float_constant.
786 : : Still, we can set a REG_EQUAL note. */
787 : 43749534 : if (MEM_P (src) && MEM_READONLY_P (src))
788 : : {
789 : 1248484 : rtx x = avoid_constant_pool_reference (src);
790 : 1248484 : rtx note_set;
791 : 1248484 : if (x != src
792 : 875255 : && (note_set = set_for_reg_notes (use_rtl))
793 : 593919 : && REG_P (SET_DEST (note_set))
794 : 1824206 : && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
795 : : {
796 : 556889 : rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
797 : 556889 : rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
798 : 556889 : rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
799 : 556889 : if (old_rtx != new_rtx)
800 : 7 : set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
801 : : }
802 : 1248484 : return false;
803 : : }
804 : :
805 : : /* ??? Unconditionally propagating into PATTERN would work better
806 : : for instructions that have match_dups. */
807 : 42501050 : rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
808 : 42501050 : return try_fwprop_subst (use, def, loc, dest, src);
809 : : }
810 : :
811 : : /* Given a use USE of an insn, if it has a single reaching
812 : : definition, try to forward propagate it into that insn.
813 : : Return true if something changed.
814 : :
815 : : REG_PROP_ONLY is true if we should only propagate register copies. */
816 : :
817 : : static bool
818 : 147931766 : forward_propagate_into (use_info *use, bool reg_prop_only = false)
819 : : {
820 : 147931766 : if (use->includes_read_writes ())
821 : : return false;
822 : :
823 : : /* Disregard uninitialized uses. */
824 : 140596466 : set_info *def = use->def ();
825 : 140596466 : if (!def)
826 : : return false;
827 : :
828 : : /* Only consider single-register definitions. This could be relaxed,
829 : : but it should rarely be needed before RA. */
830 : 140586574 : def = look_through_degenerate_phi (def);
831 : 140586574 : if (def->includes_multiregs ())
832 : : return false;
833 : :
834 : : /* Only consider uses whose definition comes from a real instruction. */
835 : 140049300 : insn_info *def_insn = def->insn ();
836 : 140049300 : if (def_insn->is_artificial ())
837 : : return false;
838 : :
839 : 103467542 : rtx_insn *def_rtl = def_insn->rtl ();
840 : 103467542 : if (!NONJUMP_INSN_P (def_rtl))
841 : : return false;
842 : : /* ??? This seems an unnecessary restriction. We can easily tell
843 : : which set the definition comes from. */
844 : 100819430 : if (multiple_sets (def_rtl))
845 : : return false;
846 : 100138545 : rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
847 : 100138545 : if (!def_set)
848 : : return false;
849 : :
850 : 99795227 : rtx dest = SET_DEST (def_set);
851 : 99795227 : rtx src = SET_SRC (def_set);
852 : 99795227 : if (volatile_refs_p (src))
853 : : return false;
854 : :
855 : : /* Allow propagations into a loop only for reg-to-reg copies, since
856 : : replacing one register by another shouldn't increase the cost.
857 : : Propagations from inner loop to outer loop should also be ok. */
858 : 98823119 : struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
859 : 197646238 : struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
860 : 98823119 : if ((reg_prop_only
861 : 53253084 : || (def_loop != use_loop
862 : 2076772 : && !flow_loop_nested_p (use_loop, def_loop)))
863 : 100563122 : && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
864 : : return false;
865 : :
866 : : /* Don't substitute into a non-local goto, this confuses CFG. */
867 : 52599938 : insn_info *use_insn = use->insn ();
868 : 52599938 : rtx_insn *use_rtl = use_insn->rtl ();
869 : 52599938 : if (JUMP_P (use_rtl)
870 : 52599938 : && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
871 : : return false;
872 : :
873 : 52599255 : if (forward_propagate_and_simplify (use, def, dest, src)
874 : 52599255 : || forward_propagate_subreg (use, def, dest, src))
875 : 3362388 : return true;
876 : :
877 : : return false;
878 : : }
879 : :
880 : : static void
881 : 1996674 : fwprop_init (void)
882 : : {
883 : 1996674 : num_changes = 0;
884 : 1996674 : calculate_dominance_info (CDI_DOMINATORS);
885 : :
886 : : /* We do not always want to propagate into loops, so we have to find
887 : : loops and be careful about them. Avoid CFG modifications so that
888 : : we don't have to update dominance information afterwards for
889 : : build_single_def_use_links. */
890 : 1996674 : loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
891 : :
892 : 1996674 : df_analyze ();
893 : 1996674 : crtl->ssa = new rtl_ssa::function_info (cfun);
894 : 1996674 : }
895 : :
896 : : static void
897 : 1996674 : fwprop_done (void)
898 : : {
899 : 1996674 : loop_optimizer_finalize ();
900 : :
901 : 1996674 : crtl->ssa->perform_pending_updates ();
902 : 1996674 : free_dominance_info (CDI_DOMINATORS);
903 : 1996674 : cleanup_cfg (0);
904 : :
905 : 1996674 : delete crtl->ssa;
906 : 1996674 : crtl->ssa = nullptr;
907 : :
908 : 1996674 : delete_trivially_dead_insns (get_insns (), max_reg_num ());
909 : :
910 : 1996674 : if (dump_file)
911 : 63 : fprintf (dump_file,
912 : : "\nNumber of successful forward propagations: %d\n\n",
913 : : num_changes);
914 : 1996674 : }
915 : :
916 : : /* Try to optimize INSN, returning true if something changes.
917 : : FWPROP_ADDR_P is true if we are running fwprop_addr rather than
918 : : the full fwprop. */
919 : :
920 : : static bool
921 : 197810998 : fwprop_insn (insn_info *insn, bool fwprop_addr_p)
922 : : {
923 : 371360230 : for (use_info *use : insn->uses ())
924 : : {
925 : 176911620 : if (use->is_mem ())
926 : 25383950 : continue;
927 : : /* ??? The choices here follow those in the pre-SSA code. */
928 : 151527670 : if (!use->includes_address_uses ())
929 : : {
930 : 123432672 : if (forward_propagate_into (use, fwprop_addr_p))
931 : 3362388 : return true;
932 : : }
933 : : else
934 : : {
935 : 28094998 : struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
936 : : /* The outermost loop is not really a loop. */
937 : 28094998 : if (loop == NULL || loop_outer (loop) == NULL)
938 : : {
939 : 20705909 : if (forward_propagate_into (use, fwprop_addr_p))
940 : : return true;
941 : : }
942 : 7389089 : else if (fwprop_addr_p)
943 : : {
944 : 3793185 : if (forward_propagate_into (use, false))
945 : : return true;
946 : : }
947 : : }
948 : : }
949 : : return false;
950 : : }
951 : :
952 : : /* Main entry point. */
953 : :
954 : : static bool
955 : 2831336 : gate_fwprop (void)
956 : : {
957 : 1996826 : return optimize > 0 && flag_forward_propagate;
958 : : }
959 : :
960 : : static unsigned int
961 : 1996674 : fwprop (bool fwprop_addr_p)
962 : : {
963 : 1996674 : fwprop_init ();
964 : :
965 : : /* Go through all the instructions (including debug instructions) looking
966 : : for uses that we could propagate into.
967 : :
968 : : Do not forward propagate addresses into loops until after unrolling.
969 : : CSE did so because it was able to fix its own mess, but we are not. */
970 : :
971 : 1996674 : insn_info *next;
972 : :
973 : : /* ??? This code uses a worklist in order to preserve the behavior
974 : : of the pre-SSA implementation. It would be better to instead
975 : : iterate on each instruction until no more propagations are
976 : : possible, then move on to the next. */
977 : 1996674 : auto_vec<insn_info *> worklist;
978 : 259931700 : for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
979 : : {
980 : 257935026 : next = insn->next_any_insn ();
981 : 257935026 : if (insn->can_be_optimized () || insn->is_debug_insn ())
982 : 194448610 : if (fwprop_insn (insn, fwprop_addr_p))
983 : 3221030 : worklist.safe_push (insn);
984 : : }
985 : 5359062 : for (unsigned int i = 0; i < worklist.length (); ++i)
986 : : {
987 : 3362388 : insn_info *insn = worklist[i];
988 : 3362388 : if (fwprop_insn (insn, fwprop_addr_p))
989 : 141358 : worklist.safe_push (insn);
990 : : }
991 : :
992 : 1996674 : fwprop_done ();
993 : 1996674 : return 0;
994 : 1996674 : }
995 : :
996 : : namespace {
997 : :
998 : : const pass_data pass_data_rtl_fwprop =
999 : : {
1000 : : RTL_PASS, /* type */
1001 : : "fwprop1", /* name */
1002 : : OPTGROUP_NONE, /* optinfo_flags */
1003 : : TV_FWPROP, /* tv_id */
1004 : : 0, /* properties_required */
1005 : : 0, /* properties_provided */
1006 : : 0, /* properties_destroyed */
1007 : : 0, /* todo_flags_start */
1008 : : TODO_df_finish, /* todo_flags_finish */
1009 : : };
1010 : :
1011 : : class pass_rtl_fwprop : public rtl_opt_pass
1012 : : {
1013 : : public:
1014 : 280114 : pass_rtl_fwprop (gcc::context *ctxt)
1015 : 560228 : : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1016 : : {}
1017 : :
1018 : : /* opt_pass methods: */
1019 : 1415668 : bool gate (function *) final override { return gate_fwprop (); }
1020 : 998337 : unsigned int execute (function *) final override { return fwprop (false); }
1021 : :
1022 : : }; // class pass_rtl_fwprop
1023 : :
1024 : : } // anon namespace
1025 : :
1026 : : rtl_opt_pass *
1027 : 280114 : make_pass_rtl_fwprop (gcc::context *ctxt)
1028 : : {
1029 : 280114 : return new pass_rtl_fwprop (ctxt);
1030 : : }
1031 : :
1032 : : namespace {
1033 : :
1034 : : const pass_data pass_data_rtl_fwprop_addr =
1035 : : {
1036 : : RTL_PASS, /* type */
1037 : : "fwprop2", /* name */
1038 : : OPTGROUP_NONE, /* optinfo_flags */
1039 : : TV_FWPROP, /* tv_id */
1040 : : 0, /* properties_required */
1041 : : 0, /* properties_provided */
1042 : : 0, /* properties_destroyed */
1043 : : 0, /* todo_flags_start */
1044 : : TODO_df_finish, /* todo_flags_finish */
1045 : : };
1046 : :
1047 : : class pass_rtl_fwprop_addr : public rtl_opt_pass
1048 : : {
1049 : : public:
1050 : 280114 : pass_rtl_fwprop_addr (gcc::context *ctxt)
1051 : 560228 : : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1052 : : {}
1053 : :
1054 : : /* opt_pass methods: */
1055 : 1415668 : bool gate (function *) final override { return gate_fwprop (); }
1056 : 998337 : unsigned int execute (function *) final override { return fwprop (true); }
1057 : :
1058 : : }; // class pass_rtl_fwprop_addr
1059 : :
1060 : : } // anon namespace
1061 : :
1062 : : rtl_opt_pass *
1063 : 280114 : make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1064 : : {
1065 : 280114 : return new pass_rtl_fwprop_addr (ctxt);
1066 : : }
|