Line data Source code
1 : // Late-stage instruction combination pass.
2 : // Copyright (C) 2023-2026 Free Software Foundation, Inc.
3 : //
4 : // This file is part of GCC.
5 : //
6 : // GCC is free software; you can redistribute it and/or modify it under
7 : // the terms of the GNU General Public License as published by the Free
8 : // Software Foundation; either version 3, or (at your option) any later
9 : // version.
10 : //
11 : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : // for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License
17 : // along with GCC; see the file COPYING3. If not see
18 : // <http://www.gnu.org/licenses/>.
19 :
20 : // This pass currently has two purposes:
21 : //
22 : // - to substitute definitions into all uses, so that the definition
23 : // can be removed.
24 : //
25 : // - to try to parallelise sets of condition-code registers with a
26 : // related instruction (see combine_cc_setter for details).
27 : //
28 : // However, it could be extended to handle other combination-related
29 : // optimizations in future.
30 : //
31 : // The pass can run before or after register allocation. When running
32 : // before register allocation, it tries to avoid cases that are likely
33 : // to increase register pressure. For the same reason, it avoids moving
34 : // instructions around, even if doing so would allow an optimization to
35 : // succeed. These limitations are removed when running after register
36 : // allocation.
37 :
38 : #define INCLUDE_ALGORITHM
39 : #define INCLUDE_FUNCTIONAL
40 : #define INCLUDE_ARRAY
41 : #include "config.h"
42 : #include "system.h"
43 : #include "coretypes.h"
44 : #include "backend.h"
45 : #include "rtl.h"
46 : #include "df.h"
47 : #include "rtl-ssa.h"
48 : #include "print-rtl.h"
49 : #include "tree-pass.h"
50 : #include "cfgcleanup.h"
51 : #include "target.h"
52 : #include "dbgcnt.h"
53 :
54 : using namespace rtl_ssa;
55 :
56 : namespace {
57 : const pass_data pass_data_late_combine =
58 : {
59 : RTL_PASS, // type
60 : "late_combine", // name
61 : OPTGROUP_NONE, // optinfo_flags
62 : TV_LATE_COMBINE, // tv_id
63 : 0, // properties_required
64 : 0, // properties_provided
65 : 0, // properties_destroyed
66 : 0, // todo_flags_start
67 : TODO_df_finish, // todo_flags_finish
68 : };
69 :
70 : // Represents an attempt to substitute a single-set definition into all
71 : // uses of the definition.
72 : class insn_combination
73 : {
74 : public:
75 : insn_combination (set_info *, rtx, rtx);
76 : bool run ();
77 : array_slice<insn_change *const> use_changes () const;
78 :
79 : private:
80 : use_array get_new_uses (use_info *);
81 : bool substitute_nondebug_use (use_info *);
82 : bool substitute_nondebug_uses (set_info *);
83 : bool try_to_preserve_debug_info (insn_change &, use_info *);
84 : void substitute_debug_use (use_info *);
85 : bool substitute_note (insn_info *, rtx, bool);
86 : void substitute_notes (insn_info *, bool);
87 : void substitute_note_uses (use_info *);
88 : void substitute_optional_uses (set_info *);
89 :
90 : // Represents the state of the function's RTL at the start of this
91 : // combination attempt.
92 : insn_change_watermark m_rtl_watermark;
93 :
94 : // Represents the rtl-ssa state at the start of this combination attempt.
95 : obstack_watermark m_attempt;
96 :
97 : // The instruction that contains the definition, and that we're trying
98 : // to delete.
99 : insn_info *m_def_insn;
100 :
101 : // The definition itself.
102 : set_info *m_def;
103 :
104 : // The destination and source of the single set that defines m_def.
105 : // The destination is known to be a plain REG.
106 : rtx m_dest;
107 : rtx m_src;
108 :
109 : // Contains the full list of changes that we want to make, in reverse
110 : // postorder.
111 : auto_vec<insn_change *> m_nondebug_changes;
112 : };
113 :
114 : // Class that represents one run of the pass.
115 5783352 : class late_combine
116 : {
117 : public:
118 : unsigned int execute (function *);
119 :
120 : private:
121 : rtx optimizable_set (set_info *);
122 : bool check_register_pressure (insn_info *, rtx);
123 : bool check_uses (set_info *, rtx);
124 : bool combine_into_uses (set_info *, rtx, insn_info *);
125 : bool parallelize_insns (set_info *, rtx, set_info *, rtx);
126 : bool combine_cc_setter (set_info *, rtx);
127 : bool combine_insn (insn_info *, insn_info *);
128 :
129 : auto_vec<insn_info *> m_worklist;
130 :
131 : // A spare PARALLEL rtx, or null if none.
132 : rtx m_parallel = NULL_RTX;
133 : };
134 :
135 20797694 : insn_combination::insn_combination (set_info *def, rtx dest, rtx src)
136 20797694 : : m_rtl_watermark (),
137 20797694 : m_attempt (crtl->ssa->new_change_attempt ()),
138 20797694 : m_def_insn (def->insn ()),
139 20797694 : m_def (def),
140 20797694 : m_dest (dest),
141 20797694 : m_src (src),
142 20797694 : m_nondebug_changes ()
143 : {
144 20797694 : }
145 :
146 : array_slice<insn_change *const>
147 2155174 : insn_combination::use_changes () const
148 : {
149 2155174 : return { m_nondebug_changes.address () + 1,
150 2155174 : m_nondebug_changes.length () - 1 };
151 : }
152 :
153 : // USE is a direct or indirect use of m_def. Return the list of uses
154 : // that would be needed after substituting m_def into the instruction.
155 : // The returned list is marked as invalid if USE's insn and m_def_insn
156 : // use different definitions for the same resource (register or memory).
157 : use_array
158 26989988 : insn_combination::get_new_uses (use_info *use)
159 : {
160 26989988 : auto *def = use->def ();
161 26989988 : auto *use_insn = use->insn ();
162 :
163 26989988 : use_array new_uses = use_insn->uses ();
164 26989988 : new_uses = remove_uses_of_def (m_attempt, new_uses, def);
165 26989988 : new_uses = merge_access_arrays (m_attempt, m_def_insn->uses (), new_uses);
166 53871884 : if (new_uses.is_valid () && use->ebb () != m_def->ebb ())
167 4542663 : new_uses = crtl->ssa->make_uses_available (m_attempt, new_uses, use->bb (),
168 4542663 : use_insn->is_debug_insn ());
169 26989988 : return new_uses;
170 : }
171 :
172 : // Start the process of trying to replace USE by substitution, given that
173 : // USE occurs in a non-debug instruction. Check:
174 : //
175 : // - that the substitution can be represented in RTL
176 : //
177 : // - that each use of a resource (register or memory) within the new
178 : // instruction has a consistent definition
179 : //
180 : // - that the new instruction is a recognized pattern
181 : //
182 : // - that the instruction can be placed somewhere that makes all definitions
183 : // and uses valid, and that permits any new hard-register clobbers added
184 : // during the recognition process
185 : //
186 : // Return true on success.
187 : bool
188 25847451 : insn_combination::substitute_nondebug_use (use_info *use)
189 : {
190 25847451 : insn_info *use_insn = use->insn ();
191 25847451 : rtx_insn *use_rtl = use_insn->rtl ();
192 :
193 25847451 : if (dump_file && (dump_flags & TDF_DETAILS))
194 0 : dump_insn_slim (dump_file, use->insn ()->rtl ());
195 :
196 : // Reject second and subsequent uses if the target does not allow
197 : // the defining instruction to be copied.
198 25847451 : if (targetm.cannot_copy_insn_p
199 0 : && m_nondebug_changes.length () >= 2
200 25847451 : && targetm.cannot_copy_insn_p (m_def_insn->rtl ()))
201 : {
202 0 : if (dump_file && (dump_flags & TDF_DETAILS))
203 0 : fprintf (dump_file, "-- The target does not allow multiple"
204 0 : " copies of insn %d\n", m_def_insn->uid ());
205 0 : return false;
206 : }
207 :
208 : // Check that we can change the instruction pattern. Leave recognition
209 : // of the result till later.
210 25847451 : insn_propagation prop (use_rtl, m_dest, m_src);
211 25847451 : if (!prop.apply_to_pattern (&PATTERN (use_rtl))
212 25847451 : || prop.num_replacements == 0)
213 : {
214 488940 : if (dump_file && (dump_flags & TDF_DETAILS))
215 0 : fprintf (dump_file, "-- RTL substitution failed\n");
216 488940 : return false;
217 : }
218 :
219 25358511 : use_array new_uses = get_new_uses (use);
220 25358511 : if (!new_uses.is_valid ())
221 : {
222 1056853 : if (dump_file && (dump_flags & TDF_DETAILS))
223 0 : fprintf (dump_file, "-- could not prove that all sources"
224 : " are available\n");
225 1056853 : return false;
226 : }
227 :
228 : // Create a tentative change for the use.
229 24301658 : auto *where = XOBNEW (m_attempt, insn_change);
230 24301658 : auto *use_change = new (where) insn_change (use_insn);
231 24301658 : m_nondebug_changes.safe_push (use_change);
232 24301658 : use_change->new_uses = new_uses;
233 :
234 24301658 : struct local_ignore : ignore_nothing
235 : {
236 24301658 : local_ignore (const set_info *def, const insn_info *use_insn)
237 24301658 : : m_def (def), m_use_insn (use_insn) {}
238 :
239 : // We don't limit the number of insns per optimization, so ignoring all
240 : // insns for all insns would lead to quadratic complexity. Just ignore
241 : // the use and definition, which should be enough for most purposes.
242 : bool
243 138447409 : should_ignore_insn (const insn_info *insn)
244 : {
245 138447409 : return insn == m_def->insn () || insn == m_use_insn;
246 : }
247 :
248 : // Ignore the definition that we're removing, and all uses of it.
249 47578525 : bool should_ignore_def (const def_info *def) { return def == m_def; }
250 :
251 : const set_info *m_def;
252 : const insn_info *m_use_insn;
253 : };
254 :
255 24301658 : auto ignore = local_ignore (m_def, use_insn);
256 :
257 : // Moving instructions before register allocation could increase
258 : // register pressure. Only try moving them after RA.
259 24301658 : if (reload_completed && can_move_insn_p (use_insn))
260 8305289 : use_change->move_range = { use_insn->bb ()->head_insn (),
261 8305289 : use_insn->ebb ()->last_bb ()->end_insn () };
262 24301658 : if (!restrict_movement (*use_change, ignore))
263 : {
264 749783 : if (dump_file && (dump_flags & TDF_DETAILS))
265 0 : fprintf (dump_file, "-- cannot satisfy all definitions and uses"
266 0 : " in insn %d\n", INSN_UID (use_insn->rtl ()));
267 749783 : return false;
268 : }
269 :
270 23551875 : if (!recog (m_attempt, *use_change, ignore))
271 : return false;
272 :
273 : return true;
274 : }
275 :
276 : // Apply substitute_nondebug_use to all direct and indirect uses of DEF.
277 : // There will be at most one level of indirection.
278 : bool
279 21432086 : insn_combination::substitute_nondebug_uses (set_info *def)
280 : {
281 64468760 : for (use_info *use : def->nondebug_insn_uses ())
282 29244807 : if (!use->is_live_out_use ()
283 25928150 : && !use->only_occurs_in_notes ()
284 55092258 : && !substitute_nondebug_use (use))
285 19007724 : return false;
286 :
287 6048327 : for (use_info *use : def->phi_uses ())
288 634392 : if (!substitute_nondebug_uses (use->phi ()))
289 565211 : return false;
290 :
291 2424362 : return true;
292 : }
293 :
294 : // USE_CHANGE.insn () is a debug instruction that uses m_def. Try to
295 : // substitute the definition into the instruction and try to describe
296 : // the result in USE_CHANGE. Return true on success. Failure means that
297 : // the instruction must be reset instead.
298 : bool
299 1583718 : insn_combination::try_to_preserve_debug_info (insn_change &use_change,
300 : use_info *use)
301 : {
302 : // Punt on unsimplified subregs of hard registers. In that case,
303 : // propagation can succeed and create a wider reg than the one we
304 : // started with.
305 1583718 : if (HARD_REGISTER_NUM_P (use->regno ())
306 1583718 : && use->includes_subregs ())
307 : return false;
308 :
309 1583695 : insn_info *use_insn = use_change.insn ();
310 1583695 : rtx_insn *use_rtl = use_insn->rtl ();
311 :
312 1583695 : use_change.new_uses = get_new_uses (use);
313 1583868 : if (!use_change.new_uses.is_valid ()
314 1578688 : || !restrict_movement (use_change))
315 16344 : return false;
316 :
317 1567351 : insn_propagation prop (use_rtl, m_dest, m_src);
318 1567351 : return prop.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl));
319 : }
320 :
321 : // USE_INSN is a debug instruction that uses m_def. Update it to reflect
322 : // the fact that m_def is going to disappear. Try to preserve the source
323 : // value if possible, but reset the instruction if not.
324 : void
325 1583718 : insn_combination::substitute_debug_use (use_info *use)
326 : {
327 1583718 : auto *use_insn = use->insn ();
328 1583718 : rtx_insn *use_rtl = use_insn->rtl ();
329 :
330 1583718 : auto use_change = insn_change (use_insn);
331 1583718 : if (!try_to_preserve_debug_info (use_change, use))
332 : {
333 16431 : use_change.new_uses = {};
334 16431 : use_change.move_range = use_change.insn ();
335 16431 : INSN_VAR_LOCATION_LOC (use_rtl) = gen_rtx_UNKNOWN_VAR_LOC ();
336 : }
337 1583718 : insn_change *changes[] = { &use_change };
338 1583718 : crtl->ssa->change_insns (changes);
339 1583718 : }
340 :
341 : // NOTE is a reg note of USE_INSN, which previously used m_def. Update
342 : // the note to reflect the fact that m_def is going to disappear. Return
343 : // true on success, or false if the note must be deleted.
344 : //
345 : // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
346 : bool
347 3401857 : insn_combination::substitute_note (insn_info *use_insn, rtx note,
348 : bool can_propagate)
349 : {
350 3401857 : if (REG_NOTE_KIND (note) == REG_EQUAL
351 3401857 : || REG_NOTE_KIND (note) == REG_EQUIV)
352 : {
353 188739 : insn_propagation prop (use_insn->rtl (), m_dest, m_src);
354 188739 : return (prop.apply_to_note (&XEXP (note, 0))
355 188739 : && (can_propagate || prop.num_replacements == 0));
356 : }
357 : return true;
358 : }
359 :
360 : // Update USE_INSN's notes after deciding to go ahead with the optimization.
361 : // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
362 : void
363 5782309 : insn_combination::substitute_notes (insn_info *use_insn, bool can_propagate)
364 : {
365 5782309 : rtx_insn *use_rtl = use_insn->rtl ();
366 5782309 : rtx *ptr = ®_NOTES (use_rtl);
367 9184166 : while (rtx note = *ptr)
368 : {
369 3401857 : if (substitute_note (use_insn, note, can_propagate))
370 3398812 : ptr = &XEXP (note, 1);
371 : else
372 3045 : *ptr = XEXP (note, 1);
373 : }
374 5782309 : }
375 :
376 : // We've decided to go ahead with the substitution. Update all REG_NOTES
377 : // involving USE.
378 : void
379 5782309 : insn_combination::substitute_note_uses (use_info *use)
380 : {
381 5782309 : insn_info *use_insn = use->insn ();
382 :
383 5782309 : bool can_propagate = true;
384 5782309 : if (use->only_occurs_in_notes ())
385 : {
386 : // The only uses are in notes. Try to keep the note if we can,
387 : // but removing it is better than aborting the optimization.
388 47782 : insn_change use_change (use_insn);
389 47782 : use_change.new_uses = get_new_uses (use);
390 47782 : if (!use_change.new_uses.is_valid ()
391 47410 : || !restrict_movement (use_change))
392 : {
393 3042 : use_change.move_range = use_insn;
394 3042 : use_change.new_uses = remove_uses_of_def (m_attempt,
395 : use_insn->uses (),
396 3042 : use->def ());
397 3042 : can_propagate = false;
398 : }
399 47782 : if (dump_file && (dump_flags & TDF_DETAILS))
400 : {
401 0 : fprintf (dump_file, "%s notes in:\n",
402 : can_propagate ? "updating" : "removing");
403 0 : dump_insn_slim (dump_file, use_insn->rtl ());
404 : }
405 47782 : substitute_notes (use_insn, can_propagate);
406 47782 : insn_change *changes[] = { &use_change };
407 47782 : crtl->ssa->change_insns (changes);
408 : }
409 : else
410 : // We've already decided to update the insn's pattern and know that m_src
411 : // will be available at the insn's new location. Now update its notes.
412 5734527 : substitute_notes (use_insn, can_propagate);
413 5782309 : }
414 :
415 : // We've decided to go ahead with the substitution and we've dealt with
416 : // all uses that occur in the patterns of non-debug insns. Update all
417 : // other uses for the fact that m_def is about to disappear.
418 : void
419 2181179 : insn_combination::substitute_optional_uses (set_info *def)
420 : {
421 4362358 : if (auto insn_uses = def->all_insn_uses ())
422 : {
423 : use_info *use = *insn_uses.begin ();
424 10803499 : while (use)
425 : {
426 8658357 : use_info *next_use = use->next_any_insn_use ();
427 8658357 : if (use->is_in_debug_insn ())
428 1583718 : substitute_debug_use (use);
429 7074639 : else if (!use->is_live_out_use ())
430 5782309 : substitute_note_uses (use);
431 : use = next_use;
432 : }
433 : }
434 4388363 : for (use_info *use : def->phi_uses ())
435 26005 : substitute_optional_uses (use->phi ());
436 2181179 : }
437 :
438 : // Try to perform the substitution. Return true on success.
439 : bool
440 20797694 : insn_combination::run ()
441 : {
442 20797694 : if (dump_file && (dump_flags & TDF_DETAILS))
443 : {
444 0 : fprintf (dump_file, "\ntrying to combine definition of r%d in:\n",
445 0 : m_def->regno ());
446 0 : dump_insn_slim (dump_file, m_def_insn->rtl ());
447 0 : fprintf (dump_file, "into:\n");
448 : }
449 :
450 20797694 : auto def_change = insn_change::delete_insn (m_def_insn);
451 20797694 : m_nondebug_changes.safe_push (&def_change);
452 :
453 20797694 : if (!substitute_nondebug_uses (m_def)
454 4710362 : || !changes_are_worthwhile (m_nondebug_changes)
455 25116636 : || !crtl->ssa->verify_insn_changes (m_nondebug_changes))
456 18642520 : return false;
457 :
458 : // We've now decided that the optimization is valid and profitable.
459 : // Allow it to be suppressed for bisection purposes.
460 2155174 : if (!dbg_cnt (::late_combine))
461 : return false;
462 :
463 2155174 : substitute_optional_uses (m_def);
464 :
465 2155174 : confirm_change_group ();
466 4310348 : crtl->ssa->change_insns (m_nondebug_changes);
467 2155174 : return true;
468 : }
469 :
470 : // DEF is the result of calling single_set_info on its instruction.
471 : // See whether that instruction is a single_set that we can optimize.
472 : // Return the set if so, otherwise return null.
473 : rtx
474 89910597 : late_combine::optimizable_set (set_info *def)
475 : {
476 : // For simplicity, don't try to handle sets of multiple hard registers.
477 : // And for correctness, don't remove any assignments to the stack or
478 : // frame pointers, since that would implicitly change the set of valid
479 : // memory locations between this assignment and the next.
480 : //
481 : // Removing assignments to the hard frame pointer would invalidate
482 : // backtraces.
483 89910597 : if (!def->is_reg ()
484 71005624 : || def->regno () == STACK_POINTER_REGNUM
485 71005624 : || def->regno () == FRAME_POINTER_REGNUM
486 160916221 : || def->regno () == HARD_FRAME_POINTER_REGNUM)
487 : return NULL_RTX;
488 :
489 70422015 : auto *insn = def->insn ();
490 : if (!insn->can_be_optimized ()
491 70422015 : || insn->is_asm ()
492 70403307 : || insn->is_call ()
493 69382704 : || insn->has_volatile_refs ()
494 68666259 : || insn->has_pre_post_modify ()
495 139088274 : || !can_move_insn_p (insn))
496 10581061 : return NULL_RTX;
497 :
498 59840954 : rtx set = single_set (insn->rtl ());
499 59840954 : if (!set)
500 : return NULL_RTX;
501 :
502 : // For simplicity, don't try to handle subreg destinations.
503 59832114 : rtx dest = SET_DEST (set);
504 59832114 : if (!REG_P (dest) || REG_NREGS (dest) != 1 || def->regno () != REGNO (dest))
505 : return NULL_RTX;
506 :
507 : return set;
508 : }
509 :
510 : // Suppose that we can replace all uses of SET_DEST (SET) with SET_SRC (SET),
511 : // where SET occurs in INSN. Return true if doing so is not likely to
512 : // increase register pressure.
513 : bool
514 26971744 : late_combine::check_register_pressure (insn_info *insn, rtx set)
515 : {
516 : // Plain register-to-register moves do not establish a register class
517 : // preference and have no well-defined effect on the register allocator.
518 : // If changes in register class are needed, the register allocator is
519 : // in the best position to place those changes. If no change in
520 : // register class is needed, then the optimization reduces register
521 : // pressure if SET_SRC (set) was already live at uses, otherwise the
522 : // optimization is pressure-neutral.
523 26971744 : rtx src = SET_SRC (set);
524 26971744 : if (REG_P (src))
525 : return true;
526 :
527 : // On the same basis, substituting a SET_SRC that contains a single
528 : // pseudo register either reduces pressure or is pressure-neutral,
529 : // subject to the constraints below. We would need to do more
530 : // analysis for SET_SRCs that use more than one pseudo register.
531 19722147 : unsigned int nregs = 0;
532 36768638 : for (auto *use : insn->uses ())
533 20411243 : if (use->is_reg ()
534 17084468 : && !HARD_REGISTER_NUM_P (use->regno ())
535 34083606 : && !use->only_occurs_in_notes ())
536 13589473 : if (++nregs > 1)
537 3364752 : return false;
538 :
539 : // If there are no pseudo registers in SET_SRC then the optimization
540 : // should improve register pressure.
541 16357395 : if (nregs == 0)
542 : return true;
543 :
544 : // We'd be substituting (set (reg R1) SRC) where SRC is known to
545 : // contain a single pseudo register R2. Assume for simplicity that
546 : // each new use of R2 would need to be in the same class C as the
547 : // current use of R2. If, for a realistic allocation, C is a
548 : // non-strict superset of the R1's register class, the effect on
549 : // register pressure should be positive or neutral. If instead
550 : // R1 occupies a different register class from R2, or if R1 has
551 : // more allocation freedom than R2, then there's a higher risk that
552 : // the effect on register pressure could be negative.
553 : //
554 : // First use constrain_operands to get the most likely choice of
555 : // alternative. For simplicity, just handle the case where the
556 : // output operand is operand 0.
557 6859969 : extract_insn (insn->rtl ());
558 6859969 : rtx dest = SET_DEST (set);
559 6859969 : if (recog_data.n_operands == 0
560 6859969 : || recog_data.operand[0] != dest)
561 : return false;
562 :
563 4459769 : if (!constrain_operands (0, get_enabled_alternatives (insn->rtl ())))
564 : return false;
565 :
566 4459769 : preprocess_constraints (insn->rtl ());
567 4459769 : auto *alt = which_op_alt ();
568 4459769 : auto dest_class = alt[0].cl;
569 :
570 : // Check operands 1 and above.
571 11920543 : auto check_src = [&] (unsigned int i)
572 : {
573 7460774 : if (recog_data.is_operator[i])
574 : return true;
575 :
576 7357343 : rtx op = recog_data.operand[i];
577 7357343 : if (CONSTANT_P (op))
578 : return true;
579 :
580 4563008 : if (SUBREG_P (op))
581 551564 : op = SUBREG_REG (op);
582 4563008 : if (REG_P (op))
583 : {
584 : // Ignore hard registers. We've already rejected uses of non-fixed
585 : // hard registers in the SET_SRC.
586 3767802 : if (HARD_REGISTER_P (op))
587 : return true;
588 :
589 : // Make sure that the source operand's class is at least as
590 : // permissive as the destination operand's class.
591 3765816 : auto src_class = alternative_class (alt, i);
592 3765816 : if (dest_class != src_class)
593 : {
594 340478 : auto extra_dest_regs = (reg_class_contents[dest_class]
595 340478 : & ~reg_class_contents[src_class]
596 340478 : & ~fixed_reg_set);
597 680956 : if (!hard_reg_set_empty_p (extra_dest_regs))
598 92644 : return false;
599 : }
600 :
601 : // Make sure that the source operand occupies no more hard
602 : // registers than the destination operand. This mostly matters
603 : // for subregs.
604 7346344 : if (targetm.class_max_nregs (dest_class, GET_MODE (dest))
605 3673172 : < targetm.class_max_nregs (src_class, GET_MODE (op)))
606 : return false;
607 :
608 : return true;
609 : }
610 : return false;
611 4459769 : };
612 10843837 : for (int i = 1; i < recog_data.n_operands; ++i)
613 7465861 : if (recog_data.operand_type[i] != OP_OUT && !check_src (i))
614 : return false;
615 :
616 : return true;
617 : }
618 :
619 : // Check uses of DEF to see whether there is anything obvious that
620 : // prevents the substitution of SET into uses of DEF.
621 : bool
622 47838279 : late_combine::check_uses (set_info *def, rtx set)
623 : {
624 47838279 : use_info *prev_use = nullptr;
625 196548274 : for (use_info *use : def->nondebug_insn_uses ())
626 : {
627 67546217 : insn_info *use_insn = use->insn ();
628 :
629 67546217 : if (use->is_live_out_use ())
630 14130263 : continue;
631 53415954 : if (use->only_occurs_in_notes ())
632 196730 : continue;
633 :
634 : // We cannot replace all uses if the value is live on exit.
635 53219224 : if (use->is_artificial ())
636 47838279 : return false;
637 :
638 : // Avoid increasing the complexity of instructions that
639 : // reference allocatable hard registers.
640 52501393 : if (!REG_P (SET_SRC (set))
641 36756531 : && !reload_completed
642 66626131 : && (accesses_include_nonfixed_hard_registers (use_insn->uses ())
643 10717032 : || accesses_include_nonfixed_hard_registers (use_insn->defs ())))
644 4695588 : return false;
645 :
646 : // Don't substitute into a non-local goto, since it can then be
647 : // treated as a jump to local label, e.g. in shorten_branches.
648 : // ??? But this shouldn't be necessary.
649 47805805 : if (use_insn->is_jump ()
650 47805805 : && find_reg_note (use_insn->rtl (), REG_NON_LOCAL_GOTO, NULL_RTX))
651 : return false;
652 :
653 : // Reject cases where one of the uses is a function argument.
654 : // The combine attempt should fail anyway, but this is a common
655 : // case that is easy to check early.
656 47804773 : if (use_insn->is_call ()
657 11120919 : && HARD_REGISTER_P (SET_DEST (set))
658 58913572 : && find_reg_fusage (use_insn->rtl (), USE, SET_DEST (set)))
659 : return false;
660 :
661 : // We'll keep the uses in their original order, even if we move
662 : // them relative to other instructions. Make sure that non-final
663 : // uses do not change any values that occur in the SET_SRC.
664 59504864 : if (prev_use && prev_use->ebb () == use->ebb ())
665 : {
666 8705293 : def_info *ultimate_def = look_through_degenerate_phi (def);
667 8705293 : if (insn_clobbers_resources (prev_use->insn (),
668 8705293 : ultimate_def->insn ()->uses ()))
669 : return false;
670 : }
671 :
672 : prev_use = use;
673 : }
674 :
675 62973698 : for (use_info *use : def->phi_uses ())
676 9930226 : if (!use->phi ()->is_degenerate ()
677 9930226 : || !check_uses (use->phi (), set))
678 8412368 : return false;
679 :
680 22315552 : return true;
681 : }
682 :
683 : // Try to remove DEF's instruction by substituting DEF into all uses.
684 : // SET is the rtx set associated with DEF. If the optimization moves any
685 : // instructions before CURSOR, add those instructions to the end of m_worklist.
686 : bool
687 55196223 : late_combine::combine_into_uses (set_info *def, rtx set, insn_info *cursor)
688 : {
689 55196223 : auto *insn = def->insn ();
690 :
691 : // Don't prolong the live ranges of allocatable hard registers, or put
692 : // them into more complicated instructions. Failing to prevent this
693 : // could lead to spill failures, or at least to worst register allocation.
694 55196223 : if (!reload_completed
695 55196223 : && accesses_include_nonfixed_hard_registers (insn->uses ()))
696 2823332 : return false;
697 :
698 52372891 : if (!reload_completed && !check_register_pressure (insn, set))
699 : return false;
700 :
701 45526146 : if (!check_uses (def, set))
702 : return false;
703 :
704 20797694 : insn_combination combination (def, SET_DEST (set), SET_SRC (set));
705 20797694 : if (!combination.run ())
706 : return false;
707 :
708 2155174 : for (auto *use_change : combination.use_changes ())
709 2119045 : if (*use_change->insn () < *cursor)
710 0 : m_worklist.safe_push (use_change->insn ());
711 : else
712 : break;
713 2155174 : return true;
714 20797694 : }
715 :
716 : // CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
717 : // a single_set that sets (only) OTHER_DEF. CC_SET is known to set a
718 : // condition-code register and the instruction that contains CC_SET is
719 : // known to use OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel.
720 : bool
721 4372510 : late_combine::parallelize_insns (set_info *cc_def, rtx cc_set,
722 : set_info *other_def, rtx other_set)
723 : {
724 4372510 : auto attempt = crtl->ssa->new_change_attempt ();
725 :
726 4372510 : insn_info *cc_insn = cc_def->insn ();
727 4372510 : insn_info *other_insn = other_def->insn ();
728 4372510 : if (dump_file && (dump_flags & TDF_DETAILS))
729 0 : fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
730 : other_insn->uid (), cc_insn->uid ());
731 :
732 : // Try to substitute OTHER_SET into CC_INSN.
733 4372510 : insn_change_watermark rtl_watermark;
734 4372510 : rtx_insn *cc_rtl = cc_insn->rtl ();
735 4372510 : insn_propagation prop (cc_rtl, SET_DEST (other_set), SET_SRC (other_set));
736 4372510 : if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
737 4372510 : || prop.num_replacements == 0)
738 : {
739 13438 : if (dump_file && (dump_flags & TDF_DETAILS))
740 0 : fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
741 : other_def->regno ());
742 13438 : return false;
743 : }
744 :
745 : // Restrict the uses to those outside notes.
746 4359072 : use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
747 4359072 : use_array other_set_uses = remove_note_accesses (attempt,
748 : other_insn->uses ());
749 :
750 : // Remove the use of the substituted value.
751 4359072 : cc_uses = remove_uses_of_def (attempt, cc_uses, other_def);
752 :
753 : // Get the list of uses for the new instruction.
754 4359072 : insn_change cc_change (cc_insn);
755 4359072 : cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
756 4359072 : if (!cc_change.new_uses.is_valid ())
757 : {
758 47639 : if (dump_file && (dump_flags & TDF_DETAILS))
759 0 : fprintf (dump_file, "-- cannot merge uses\n");
760 47639 : return false;
761 : }
762 :
763 : // The instruction initially defines just two registers. recog can add
764 : // extra clobbers if necessary.
765 4311433 : auto_vec<access_info *, 2> new_defs;
766 4311433 : new_defs.quick_push (cc_def);
767 4311433 : new_defs.quick_push (other_def);
768 4311433 : sort_accesses (new_defs);
769 8622866 : cc_change.new_defs = def_array (access_array (new_defs));
770 :
771 : // Make sure there is somewhere that the new instruction could live.
772 4311433 : auto other_change = insn_change::delete_insn (other_insn);
773 4311433 : insn_change *changes[] = { &other_change, &cc_change };
774 4311433 : cc_change.move_range = cc_insn->ebb ()->insn_range ();
775 4311433 : if (!restrict_movement (cc_change, ignore_changing_insns (changes)))
776 : {
777 880857 : if (dump_file && (dump_flags & TDF_DETAILS))
778 0 : fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
779 880857 : return false;
780 : }
781 :
782 : // Tentatively install the new pattern. By convention, the CC set
783 : // must be first.
784 3430576 : if (m_parallel)
785 : {
786 2734244 : XVECEXP (m_parallel, 0, 0) = cc_set;
787 2734244 : XVECEXP (m_parallel, 0, 1) = other_set;
788 : }
789 : else
790 : {
791 696332 : rtvec vec = gen_rtvec (2, cc_set, other_set);
792 696332 : m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
793 : }
794 3430576 : validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
795 :
796 : // These routines report failures themselves.
797 3430576 : if (!recog (attempt, cc_change, ignore_changing_insns (changes))
798 12618 : || !changes_are_worthwhile (changes)
799 3443194 : || !crtl->ssa->verify_insn_changes (changes))
800 3417958 : return false;
801 :
802 12618 : remove_reg_equal_equiv_notes (cc_rtl);
803 12618 : confirm_change_group ();
804 12618 : crtl->ssa->change_insns (changes);
805 12618 : m_parallel = NULL_RTX;
806 12618 : return true;
807 4372510 : }
808 :
809 : // CC_SET is a single_set that sets (only) CC_DEF. See whether CC_DEF
810 : // is a definition of a condition-code register and try to optimize it
811 : // with related instructions if so. Return true if something changed.
812 : //
813 : // This function looks for sequences of the form:
814 : //
815 : // A: (set (reg R1) X1)
816 : // B: ...instructions that might change the value of X1...
817 : // C: (set (reg CC) X2) // X2 uses R1
818 : //
819 : // and tries to change them to:
820 : //
821 : // C': [(set (reg CC) X2')
822 : // (set (reg R1) X1)]
823 : // B: ...instructions that might change the value of X1...
824 : //
825 : // where X2' is the result of replacing R1 with X1 in X2.
826 : //
827 : // Combine can handle this optimization if B doesn't exist and if A and
828 : // C are in the same BB. This pass instead handles cases where B does
829 : // exist and cases where A and C are in different BBs of the same EBB.
830 : bool
831 53041049 : late_combine::combine_cc_setter (set_info *cc_def, rtx cc_set)
832 : {
833 : // Check for a set of a CC register. This isn't needed for correctness;
834 : // it's just a way of narrowing the search space. It could be relaxed if
835 : // there are other situations that would benefit from the same optimization.
836 53041049 : if (!HARD_REGISTER_NUM_P (cc_def->regno ())
837 53041049 : || GET_MODE_CLASS (cc_def->mode()) != MODE_CC)
838 : return false;
839 :
840 : // Search the registers used by the CC setter for an easily-substitutable
841 : // def-use chain.
842 18718039 : for (use_info *other_use : cc_def->insn ()->uses ())
843 10836574 : if (auto *other_def = other_use->def ())
844 10830164 : if (other_use->regno () != cc_def->regno ()
845 10794984 : && other_def->ebb () == cc_def->ebb ()
846 20269774 : && other_def == single_set_info (other_def->insn ()))
847 6414442 : if (rtx other_set = optimizable_set (other_def))
848 4372510 : if (parallelize_insns (cc_def, cc_set, other_def, other_set))
849 12618 : return true;
850 :
851 7881465 : return false;
852 : }
853 :
854 : // Try to optimize INSN in some way. If the optimization moves any
855 : // instructions before CURSOR, and if further optimizations might be
856 : // possible on those instructions, add them to the end of m_worklist.
857 : bool
858 107102595 : late_combine::combine_insn (insn_info *insn, insn_info *cursor)
859 : {
860 107102595 : if (set_info *def = single_set_info (insn))
861 83496155 : if (rtx set = optimizable_set (def))
862 55196223 : return (combine_into_uses (def, set, cursor)
863 55196223 : || combine_cc_setter (def, set));
864 :
865 : return false;
866 : }
867 :
868 : // Run the pass on function FN.
869 : unsigned int
870 1927784 : late_combine::execute (function *fn)
871 : {
872 : // Initialization.
873 1927784 : calculate_dominance_info (CDI_DOMINATORS);
874 1927784 : df_analyze ();
875 1927784 : crtl->ssa = new rtl_ssa::function_info (fn);
876 : // Don't allow memory_operand to match volatile MEMs.
877 1927784 : init_recog_no_volatile ();
878 :
879 1927784 : insn_info *insn = *crtl->ssa->nondebug_insns ().begin ();
880 171281539 : while (insn)
881 : {
882 169353755 : if (!insn->is_artificial ())
883 : {
884 107102595 : insn_info *prev = insn->prev_nondebug_insn ();
885 107102595 : if (combine_insn (insn, prev))
886 : {
887 : // Any instructions that get added to the worklist were
888 : // previously after PREV. Thus if we were able to move
889 : // an instruction X before PREV during one combination,
890 : // X cannot depend on any instructions that we move before
891 : // PREV during subsequent combinations. This means that
892 : // the worklist should be free of backwards dependencies,
893 : // even if it isn't necessarily in RPO.
894 2167792 : for (unsigned int i = 0; i < m_worklist.length (); ++i)
895 0 : combine_insn (m_worklist[i], prev);
896 2167792 : m_worklist.truncate (0);
897 2167792 : insn = prev;
898 : }
899 : }
900 169353755 : insn = insn->next_nondebug_insn ();
901 : }
902 :
903 : // Finalization.
904 1927784 : if (crtl->ssa->perform_pending_updates ())
905 517 : cleanup_cfg (0);
906 :
907 1927784 : delete crtl->ssa;
908 1927784 : crtl->ssa = nullptr;
909 :
910 : // Make the recognizer allow volatile MEMs again.
911 1927784 : init_recog ();
912 1927784 : free_dominance_info (CDI_DOMINATORS);
913 1927784 : return 0;
914 : }
915 :
916 : class pass_late_combine : public rtl_opt_pass
917 : {
918 : public:
919 571444 : pass_late_combine (gcc::context *ctxt)
920 1142888 : : rtl_opt_pass (pass_data_late_combine, ctxt)
921 : {}
922 :
923 : // opt_pass methods:
924 285722 : opt_pass *clone () override { return new pass_late_combine (m_ctxt); }
925 : bool gate (function *) override;
926 : unsigned int execute (function *) override;
927 : };
928 :
929 : bool
930 2942740 : pass_late_combine::gate (function *)
931 : {
932 2942740 : return optimize > 0 && flag_late_combine_instructions;
933 : }
934 :
935 : unsigned int
936 1927784 : pass_late_combine::execute (function *fn)
937 : {
938 1927784 : return late_combine ().execute (fn);
939 : }
940 :
941 : } // end namespace
942 :
943 : // Create a new CC fusion pass instance.
944 :
945 : rtl_opt_pass *
946 285722 : make_pass_late_combine (gcc::context *ctxt)
947 : {
948 285722 : return new pass_late_combine (ctxt);
949 : }
|