Line data Source code
1 : // Late-stage instruction combination pass.
2 : // Copyright (C) 2023-2026 Free Software Foundation, Inc.
3 : //
4 : // This file is part of GCC.
5 : //
6 : // GCC is free software; you can redistribute it and/or modify it under
7 : // the terms of the GNU General Public License as published by the Free
8 : // Software Foundation; either version 3, or (at your option) any later
9 : // version.
10 : //
11 : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : // for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License
17 : // along with GCC; see the file COPYING3. If not see
18 : // <http://www.gnu.org/licenses/>.
19 :
20 : // This pass currently has two purposes:
21 : //
22 : // - to substitute definitions into all uses, so that the definition
23 : // can be removed.
24 : //
25 : // - to try to parallelise sets of condition-code registers with a
26 : // related instruction (see combine_cc_setter for details).
27 : //
28 : // However, it could be extended to handle other combination-related
29 : // optimizations in future.
30 : //
31 : // The pass can run before or after register allocation. When running
32 : // before register allocation, it tries to avoid cases that are likely
33 : // to increase register pressure. For the same reason, it avoids moving
34 : // instructions around, even if doing so would allow an optimization to
35 : // succeed. These limitations are removed when running after register
36 : // allocation.
37 :
38 : #define INCLUDE_ALGORITHM
39 : #define INCLUDE_FUNCTIONAL
40 : #define INCLUDE_ARRAY
41 : #include "config.h"
42 : #include "system.h"
43 : #include "coretypes.h"
44 : #include "backend.h"
45 : #include "rtl.h"
46 : #include "df.h"
47 : #include "rtl-ssa.h"
48 : #include "print-rtl.h"
49 : #include "tree-pass.h"
50 : #include "cfgcleanup.h"
51 : #include "target.h"
52 : #include "dbgcnt.h"
53 :
54 : using namespace rtl_ssa;
55 :
56 : namespace {
57 : const pass_data pass_data_late_combine =
58 : {
59 : RTL_PASS, // type
60 : "late_combine", // name
61 : OPTGROUP_NONE, // optinfo_flags
62 : TV_LATE_COMBINE, // tv_id
63 : 0, // properties_required
64 : 0, // properties_provided
65 : 0, // properties_destroyed
66 : 0, // todo_flags_start
67 : TODO_df_finish, // todo_flags_finish
68 : };
69 :
70 : // Represents an attempt to substitute a single-set definition into all
71 : // uses of the definition.
72 : class insn_combination
73 : {
74 : public:
75 : insn_combination (set_info *, rtx, rtx);
76 : bool run ();
77 : array_slice<insn_change *const> use_changes () const;
78 :
79 : private:
80 : use_array get_new_uses (use_info *);
81 : bool substitute_nondebug_use (use_info *);
82 : bool substitute_nondebug_uses (set_info *);
83 : bool try_to_preserve_debug_info (insn_change &, use_info *);
84 : void substitute_debug_use (use_info *);
85 : bool substitute_note (insn_info *, rtx, bool);
86 : void substitute_notes (insn_info *, bool);
87 : void substitute_note_uses (use_info *);
88 : void substitute_optional_uses (set_info *);
89 :
90 : // Represents the state of the function's RTL at the start of this
91 : // combination attempt.
92 : insn_change_watermark m_rtl_watermark;
93 :
94 : // Represents the rtl-ssa state at the start of this combination attempt.
95 : obstack_watermark m_attempt;
96 :
97 : // The instruction that contains the definition, and that we're trying
98 : // to delete.
99 : insn_info *m_def_insn;
100 :
101 : // The definition itself.
102 : set_info *m_def;
103 :
104 : // The destination and source of the single set that defines m_def.
105 : // The destination is known to be a plain REG.
106 : rtx m_dest;
107 : rtx m_src;
108 :
109 : // Contains the full list of changes that we want to make, in reverse
110 : // postorder.
111 : auto_vec<insn_change *> m_nondebug_changes;
112 : };
113 :
114 : // Class that represents one run of the pass.
115 5761578 : class late_combine
116 : {
117 : public:
118 : unsigned int execute (function *);
119 :
120 : private:
121 : rtx optimizable_set (set_info *);
122 : bool check_register_pressure (insn_info *, rtx);
123 : bool check_uses (set_info *, rtx);
124 : bool combine_into_uses (set_info *, rtx, insn_info *);
125 : bool parallelize_insns (set_info *, rtx, set_info *, rtx);
126 : bool combine_cc_setter (set_info *, rtx);
127 : bool combine_insn (insn_info *, insn_info *);
128 :
129 : auto_vec<insn_info *> m_worklist;
130 :
131 : // A spare PARALLEL rtx, or null if none.
132 : rtx m_parallel = NULL_RTX;
133 : };
134 :
135 20828638 : insn_combination::insn_combination (set_info *def, rtx dest, rtx src)
136 20828638 : : m_rtl_watermark (),
137 20828638 : m_attempt (crtl->ssa->new_change_attempt ()),
138 20828638 : m_def_insn (def->insn ()),
139 20828638 : m_def (def),
140 20828638 : m_dest (dest),
141 20828638 : m_src (src),
142 20828638 : m_nondebug_changes ()
143 : {
144 20828638 : }
145 :
146 : array_slice<insn_change *const>
147 2148508 : insn_combination::use_changes () const
148 : {
149 2148508 : return { m_nondebug_changes.address () + 1,
150 2148508 : m_nondebug_changes.length () - 1 };
151 : }
152 :
153 : // USE is a direct or indirect use of m_def. Return the list of uses
154 : // that would be needed after substituting m_def into the instruction.
155 : // The returned list is marked as invalid if USE's insn and m_def_insn
156 : // use different definitions for the same resource (register or memory).
157 : use_array
158 26977575 : insn_combination::get_new_uses (use_info *use)
159 : {
160 26977575 : auto *def = use->def ();
161 26977575 : auto *use_insn = use->insn ();
162 :
163 26977575 : use_array new_uses = use_insn->uses ();
164 26977575 : new_uses = remove_uses_of_def (m_attempt, new_uses, def);
165 26977575 : new_uses = merge_access_arrays (m_attempt, m_def_insn->uses (), new_uses);
166 53848073 : if (new_uses.is_valid () && use->ebb () != m_def->ebb ())
167 4519831 : new_uses = crtl->ssa->make_uses_available (m_attempt, new_uses, use->bb (),
168 4519831 : use_insn->is_debug_insn ());
169 26977575 : return new_uses;
170 : }
171 :
172 : // Start the process of trying to replace USE by substitution, given that
173 : // USE occurs in a non-debug instruction. Check:
174 : //
175 : // - that the substitution can be represented in RTL
176 : //
177 : // - that each use of a resource (register or memory) within the new
178 : // instruction has a consistent definition
179 : //
180 : // - that the new instruction is a recognized pattern
181 : //
182 : // - that the instruction can be placed somewhere that makes all definitions
183 : // and uses valid, and that permits any new hard-register clobbers added
184 : // during the recognition process
185 : //
186 : // Return true on success.
187 : bool
188 25865707 : insn_combination::substitute_nondebug_use (use_info *use)
189 : {
190 25865707 : insn_info *use_insn = use->insn ();
191 25865707 : rtx_insn *use_rtl = use_insn->rtl ();
192 :
193 25865707 : if (dump_file && (dump_flags & TDF_DETAILS))
194 0 : dump_insn_slim (dump_file, use->insn ()->rtl ());
195 :
196 : // Reject second and subsequent uses if the target does not allow
197 : // the defining instruction to be copied.
198 25865707 : if (targetm.cannot_copy_insn_p
199 0 : && m_nondebug_changes.length () >= 2
200 25865707 : && targetm.cannot_copy_insn_p (m_def_insn->rtl ()))
201 : {
202 0 : if (dump_file && (dump_flags & TDF_DETAILS))
203 0 : fprintf (dump_file, "-- The target does not allow multiple"
204 0 : " copies of insn %d\n", m_def_insn->uid ());
205 0 : return false;
206 : }
207 :
208 : // Check that we can change the instruction pattern. Leave recognition
209 : // of the result till later.
210 25865707 : insn_propagation prop (use_rtl, m_dest, m_src);
211 25865707 : if (!prop.apply_to_pattern (&PATTERN (use_rtl))
212 25865707 : || prop.num_replacements == 0)
213 : {
214 494777 : if (dump_file && (dump_flags & TDF_DETAILS))
215 0 : fprintf (dump_file, "-- RTL substitution failed\n");
216 494777 : return false;
217 : }
218 :
219 25370930 : use_array new_uses = get_new_uses (use);
220 25370930 : if (!new_uses.is_valid ())
221 : {
222 1058703 : if (dump_file && (dump_flags & TDF_DETAILS))
223 0 : fprintf (dump_file, "-- could not prove that all sources"
224 : " are available\n");
225 1058703 : return false;
226 : }
227 :
228 : // Create a tentative change for the use.
229 24312227 : auto *where = XOBNEW (m_attempt, insn_change);
230 24312227 : auto *use_change = new (where) insn_change (use_insn);
231 24312227 : m_nondebug_changes.safe_push (use_change);
232 24312227 : use_change->new_uses = new_uses;
233 :
234 24312227 : struct local_ignore : ignore_nothing
235 : {
236 24312227 : local_ignore (const set_info *def, const insn_info *use_insn)
237 24312227 : : m_def (def), m_use_insn (use_insn) {}
238 :
239 : // We don't limit the number of insns per optimization, so ignoring all
240 : // insns for all insns would lead to quadratic complexity. Just ignore
241 : // the use and definition, which should be enough for most purposes.
242 : bool
243 138575995 : should_ignore_insn (const insn_info *insn)
244 : {
245 138575995 : return insn == m_def->insn () || insn == m_use_insn;
246 : }
247 :
248 : // Ignore the definition that we're removing, and all uses of it.
249 47663495 : bool should_ignore_def (const def_info *def) { return def == m_def; }
250 :
251 : const set_info *m_def;
252 : const insn_info *m_use_insn;
253 : };
254 :
255 24312227 : auto ignore = local_ignore (m_def, use_insn);
256 :
257 : // Moving instructions before register allocation could increase
258 : // register pressure. Only try moving them after RA.
259 24312227 : if (reload_completed && can_move_insn_p (use_insn))
260 8335851 : use_change->move_range = { use_insn->bb ()->head_insn (),
261 8335851 : use_insn->ebb ()->last_bb ()->end_insn () };
262 24312227 : if (!restrict_movement (*use_change, ignore))
263 : {
264 749787 : if (dump_file && (dump_flags & TDF_DETAILS))
265 0 : fprintf (dump_file, "-- cannot satisfy all definitions and uses"
266 0 : " in insn %d\n", INSN_UID (use_insn->rtl ()));
267 749787 : return false;
268 : }
269 :
270 23562440 : if (!recog (m_attempt, *use_change, ignore))
271 : return false;
272 :
273 : return true;
274 : }
275 :
276 : // Apply substitute_nondebug_use to all direct and indirect uses of DEF.
277 : // There will be at most one level of indirection.
278 : bool
279 21472872 : insn_combination::substitute_nondebug_uses (set_info *def)
280 : {
281 64572484 : for (use_info *use : def->nondebug_insn_uses ())
282 29292896 : if (!use->is_live_out_use ()
283 25945331 : && !use->only_occurs_in_notes ()
284 55158603 : && !substitute_nondebug_use (use))
285 19046313 : return false;
286 :
287 6064139 : for (use_info *use : def->phi_uses ())
288 644234 : if (!substitute_nondebug_uses (use->phi ()))
289 566787 : return false;
290 :
291 2426559 : return true;
292 : }
293 :
294 : // USE_CHANGE.insn () is a debug instruction that uses m_def. Try to
295 : // substitute the definition into the instruction and try to describe
296 : // the result in USE_CHANGE. Return true on success. Failure means that
297 : // the instruction must be reset instead.
298 : bool
299 1560237 : insn_combination::try_to_preserve_debug_info (insn_change &use_change,
300 : use_info *use)
301 : {
302 : // Punt on unsimplified subregs of hard registers. In that case,
303 : // propagation can succeed and create a wider reg than the one we
304 : // started with.
305 1560237 : if (HARD_REGISTER_NUM_P (use->regno ())
306 1560237 : && use->includes_subregs ())
307 : return false;
308 :
309 1560198 : insn_info *use_insn = use_change.insn ();
310 1560198 : rtx_insn *use_rtl = use_insn->rtl ();
311 :
312 1560198 : use_change.new_uses = get_new_uses (use);
313 1560371 : if (!use_change.new_uses.is_valid ()
314 1555335 : || !restrict_movement (use_change))
315 16264 : return false;
316 :
317 1543934 : insn_propagation prop (use_rtl, m_dest, m_src);
318 1543934 : return prop.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl));
319 : }
320 :
321 : // USE_INSN is a debug instruction that uses m_def. Update it to reflect
322 : // the fact that m_def is going to disappear. Try to preserve the source
323 : // value if possible, but reset the instruction if not.
324 : void
325 1560237 : insn_combination::substitute_debug_use (use_info *use)
326 : {
327 1560237 : auto *use_insn = use->insn ();
328 1560237 : rtx_insn *use_rtl = use_insn->rtl ();
329 :
330 1560237 : auto use_change = insn_change (use_insn);
331 1560237 : if (!try_to_preserve_debug_info (use_change, use))
332 : {
333 16367 : use_change.new_uses = {};
334 16367 : use_change.move_range = use_change.insn ();
335 16367 : INSN_VAR_LOCATION_LOC (use_rtl) = gen_rtx_UNKNOWN_VAR_LOC ();
336 : }
337 1560237 : insn_change *changes[] = { &use_change };
338 1560237 : crtl->ssa->change_insns (changes);
339 1560237 : }
340 :
341 : // NOTE is a reg note of USE_INSN, which previously used m_def. Update
342 : // the note to reflect the fact that m_def is going to disappear. Return
343 : // true on success, or false if the note must be deleted.
344 : //
345 : // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
346 : bool
347 3382175 : insn_combination::substitute_note (insn_info *use_insn, rtx note,
348 : bool can_propagate)
349 : {
350 3382175 : if (REG_NOTE_KIND (note) == REG_EQUAL
351 3382175 : || REG_NOTE_KIND (note) == REG_EQUIV)
352 : {
353 188185 : insn_propagation prop (use_insn->rtl (), m_dest, m_src);
354 188185 : return (prop.apply_to_note (&XEXP (note, 0))
355 188185 : && (can_propagate || prop.num_replacements == 0));
356 : }
357 : return true;
358 : }
359 :
360 : // Update USE_INSN's notes after deciding to go ahead with the optimization.
361 : // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
362 : void
363 5759953 : insn_combination::substitute_notes (insn_info *use_insn, bool can_propagate)
364 : {
365 5759953 : rtx_insn *use_rtl = use_insn->rtl ();
366 5759953 : rtx *ptr = ®_NOTES (use_rtl);
367 9142128 : while (rtx note = *ptr)
368 : {
369 3382175 : if (substitute_note (use_insn, note, can_propagate))
370 3379154 : ptr = &XEXP (note, 1);
371 : else
372 3021 : *ptr = XEXP (note, 1);
373 : }
374 5759953 : }
375 :
376 : // We've decided to go ahead with the substitution. Update all REG_NOTES
377 : // involving USE.
378 : void
379 5759953 : insn_combination::substitute_note_uses (use_info *use)
380 : {
381 5759953 : insn_info *use_insn = use->insn ();
382 :
383 5759953 : bool can_propagate = true;
384 5759953 : if (use->only_occurs_in_notes ())
385 : {
386 : // The only uses are in notes. Try to keep the note if we can,
387 : // but removing it is better than aborting the optimization.
388 46447 : insn_change use_change (use_insn);
389 46447 : use_change.new_uses = get_new_uses (use);
390 46447 : if (!use_change.new_uses.is_valid ()
391 46040 : || !restrict_movement (use_change))
392 : {
393 3018 : use_change.move_range = use_insn;
394 3018 : use_change.new_uses = remove_uses_of_def (m_attempt,
395 : use_insn->uses (),
396 3018 : use->def ());
397 3018 : can_propagate = false;
398 : }
399 46447 : if (dump_file && (dump_flags & TDF_DETAILS))
400 : {
401 0 : fprintf (dump_file, "%s notes in:\n",
402 : can_propagate ? "updating" : "removing");
403 0 : dump_insn_slim (dump_file, use_insn->rtl ());
404 : }
405 46447 : substitute_notes (use_insn, can_propagate);
406 46447 : insn_change *changes[] = { &use_change };
407 46447 : crtl->ssa->change_insns (changes);
408 : }
409 : else
410 : // We've already decided to update the insn's pattern and know that m_src
411 : // will be available at the insn's new location. Now update its notes.
412 5713506 : substitute_notes (use_insn, can_propagate);
413 5759953 : }
414 :
415 : // We've decided to go ahead with the substitution and we've dealt with
416 : // all uses that occur in the patterns of non-debug insns. Update all
417 : // other uses for the fact that m_def is about to disappear.
418 : void
419 2175651 : insn_combination::substitute_optional_uses (set_info *def)
420 : {
421 4351302 : if (auto insn_uses = def->all_insn_uses ())
422 : {
423 : use_info *use = *insn_uses.begin ();
424 10748695 : while (use)
425 : {
426 8608915 : use_info *next_use = use->next_any_insn_use ();
427 8608915 : if (use->is_in_debug_insn ())
428 1560237 : substitute_debug_use (use);
429 7048678 : else if (!use->is_live_out_use ())
430 5759953 : substitute_note_uses (use);
431 : use = next_use;
432 : }
433 : }
434 4378445 : for (use_info *use : def->phi_uses ())
435 27143 : substitute_optional_uses (use->phi ());
436 2175651 : }
437 :
438 : // Try to perform the substitution. Return true on success.
439 : bool
440 20828638 : insn_combination::run ()
441 : {
442 20828638 : if (dump_file && (dump_flags & TDF_DETAILS))
443 : {
444 0 : fprintf (dump_file, "\ntrying to combine definition of r%d in:\n",
445 0 : m_def->regno ());
446 0 : dump_insn_slim (dump_file, m_def_insn->rtl ());
447 0 : fprintf (dump_file, "into:\n");
448 : }
449 :
450 20828638 : auto def_change = insn_change::delete_insn (m_def_insn);
451 20828638 : m_nondebug_changes.safe_push (&def_change);
452 :
453 20828638 : if (!substitute_nondebug_uses (m_def)
454 4698224 : || !changes_are_worthwhile (m_nondebug_changes)
455 25135074 : || !crtl->ssa->verify_insn_changes (m_nondebug_changes))
456 18680130 : return false;
457 :
458 : // We've now decided that the optimization is valid and profitable.
459 : // Allow it to be suppressed for bisection purposes.
460 2148508 : if (!dbg_cnt (::late_combine))
461 : return false;
462 :
463 2148508 : substitute_optional_uses (m_def);
464 :
465 2148508 : confirm_change_group ();
466 4297016 : crtl->ssa->change_insns (m_nondebug_changes);
467 2148508 : return true;
468 : }
469 :
470 : // DEF is the result of calling single_set_info on its instruction.
471 : // See whether that instruction is a single_set that we can optimize.
472 : // Return the set if so, otherwise return null.
473 : rtx
474 89850966 : late_combine::optimizable_set (set_info *def)
475 : {
476 : // For simplicity, don't try to handle sets of multiple hard registers.
477 : // And for correctness, don't remove any assignments to the stack or
478 : // frame pointers, since that would implicitly change the set of valid
479 : // memory locations between this assignment and the next.
480 : //
481 : // Removing assignments to the hard frame pointer would invalidate
482 : // backtraces.
483 89850966 : if (!def->is_reg ()
484 70950525 : || def->regno () == STACK_POINTER_REGNUM
485 70950525 : || def->regno () == FRAME_POINTER_REGNUM
486 160801491 : || def->regno () == HARD_FRAME_POINTER_REGNUM)
487 : return NULL_RTX;
488 :
489 70373263 : auto *insn = def->insn ();
490 : if (!insn->can_be_optimized ()
491 70373263 : || insn->is_asm ()
492 70354555 : || insn->is_call ()
493 69341012 : || insn->has_volatile_refs ()
494 68623954 : || insn->has_pre_post_modify ()
495 138997217 : || !can_move_insn_p (insn))
496 10534314 : return NULL_RTX;
497 :
498 59838949 : rtx set = single_set (insn->rtl ());
499 59838949 : if (!set)
500 : return NULL_RTX;
501 :
502 : // For simplicity, don't try to handle subreg destinations.
503 59830109 : rtx dest = SET_DEST (set);
504 59830109 : if (!REG_P (dest) || REG_NREGS (dest) != 1 || def->regno () != REGNO (dest))
505 : return NULL_RTX;
506 :
507 : return set;
508 : }
509 :
510 : // Suppose that we can replace all uses of SET_DEST (SET) with SET_SRC (SET),
511 : // where SET occurs in INSN. Return true if doing so is not likely to
512 : // increase register pressure.
513 : bool
514 26974781 : late_combine::check_register_pressure (insn_info *insn, rtx set)
515 : {
516 : // Plain register-to-register moves do not establish a register class
517 : // preference and have no well-defined effect on the register allocator.
518 : // If changes in register class are needed, the register allocator is
519 : // in the best position to place those changes. If no change in
520 : // register class is needed, then the optimization reduces register
521 : // pressure if SET_SRC (set) was already live at uses, otherwise the
522 : // optimization is pressure-neutral.
523 26974781 : rtx src = SET_SRC (set);
524 26974781 : if (REG_P (src))
525 : return true;
526 :
527 : // On the same basis, substituting a SET_SRC that contains a single
528 : // pseudo register either reduces pressure or is pressure-neutral,
529 : // subject to the constraints below. We would need to do more
530 : // analysis for SET_SRCs that use more than one pseudo register.
531 19762469 : unsigned int nregs = 0;
532 36809302 : for (auto *use : insn->uses ())
533 20415171 : if (use->is_reg ()
534 17097537 : && !HARD_REGISTER_NUM_P (use->regno ())
535 34100358 : && !use->only_occurs_in_notes ())
536 13604267 : if (++nregs > 1)
537 3368338 : return false;
538 :
539 : // If there are no pseudo registers in SET_SRC then the optimization
540 : // should improve register pressure.
541 16394131 : if (nregs == 0)
542 : return true;
543 :
544 : // We'd be substituting (set (reg R1) SRC) where SRC is known to
545 : // contain a single pseudo register R2. Assume for simplicity that
546 : // each new use of R2 would need to be in the same class C as the
547 : // current use of R2. If, for a realistic allocation, C is a
548 : // non-strict superset of the R1's register class, the effect on
549 : // register pressure should be positive or neutral. If instead
550 : // R1 occupies a different register class from R2, or if R1 has
551 : // more allocation freedom than R2, then there's a higher risk that
552 : // the effect on register pressure could be negative.
553 : //
554 : // First use constrain_operands to get the most likely choice of
555 : // alternative. For simplicity, just handle the case where the
556 : // output operand is operand 0.
557 6867591 : extract_insn (insn->rtl ());
558 6867591 : rtx dest = SET_DEST (set);
559 6867591 : if (recog_data.n_operands == 0
560 6867591 : || recog_data.operand[0] != dest)
561 : return false;
562 :
563 4470090 : if (!constrain_operands (0, get_enabled_alternatives (insn->rtl ())))
564 : return false;
565 :
566 4470090 : preprocess_constraints (insn->rtl ());
567 4470090 : auto *alt = which_op_alt ();
568 4470090 : auto dest_class = alt[0].cl;
569 :
570 : // Check operands 1 and above.
571 11945250 : auto check_src = [&] (unsigned int i)
572 : {
573 7475160 : if (recog_data.is_operator[i])
574 : return true;
575 :
576 7371513 : rtx op = recog_data.operand[i];
577 7371513 : if (CONSTANT_P (op))
578 : return true;
579 :
580 4573978 : if (SUBREG_P (op))
581 559743 : op = SUBREG_REG (op);
582 4573978 : if (REG_P (op))
583 : {
584 : // Ignore hard registers. We've already rejected uses of non-fixed
585 : // hard registers in the SET_SRC.
586 3780981 : if (HARD_REGISTER_P (op))
587 : return true;
588 :
589 : // Make sure that the source operand's class is at least as
590 : // permissive as the destination operand's class.
591 3778989 : auto src_class = alternative_class (alt, i);
592 3778989 : if (dest_class != src_class)
593 : {
594 344803 : auto extra_dest_regs = (reg_class_contents[dest_class]
595 344803 : & ~reg_class_contents[src_class]
596 344803 : & ~fixed_reg_set);
597 689606 : if (!hard_reg_set_empty_p (extra_dest_regs))
598 96771 : return false;
599 : }
600 :
601 : // Make sure that the source operand occupies no more hard
602 : // registers than the destination operand. This mostly matters
603 : // for subregs.
604 7364436 : if (targetm.class_max_nregs (dest_class, GET_MODE (dest))
605 3682218 : < targetm.class_max_nregs (src_class, GET_MODE (op)))
606 : return false;
607 :
608 : return true;
609 : }
610 : return false;
611 4470090 : };
612 10864761 : for (int i = 1; i < recog_data.n_operands; ++i)
613 7480252 : if (recog_data.operand_type[i] != OP_OUT && !check_src (i))
614 : return false;
615 :
616 : return true;
617 : }
618 :
619 : // Check uses of DEF to see whether there is anything obvious that
620 : // prevents the substitution of SET into uses of DEF.
621 : bool
622 47884163 : late_combine::check_uses (set_info *def, rtx set)
623 : {
624 47884163 : use_info *prev_use = nullptr;
625 196925168 : for (use_info *use : def->nondebug_insn_uses ())
626 : {
627 67684856 : insn_info *use_insn = use->insn ();
628 :
629 67684856 : if (use->is_live_out_use ())
630 14254745 : continue;
631 53430111 : if (use->only_occurs_in_notes ())
632 196147 : continue;
633 :
634 : // We cannot replace all uses if the value is live on exit.
635 53233964 : if (use->is_artificial ())
636 47884163 : return false;
637 :
638 : // Avoid increasing the complexity of instructions that
639 : // reference allocatable hard registers.
640 52518848 : if (!REG_P (SET_SRC (set))
641 36830525 : && !reload_completed
642 66682187 : && (accesses_include_nonfixed_hard_registers (use_insn->uses ())
643 10736222 : || accesses_include_nonfixed_hard_registers (use_insn->defs ())))
644 4713151 : return false;
645 :
646 : // Don't substitute into a non-local goto, since it can then be
647 : // treated as a jump to local label, e.g. in shorten_branches.
648 : // ??? But this shouldn't be necessary.
649 47805697 : if (use_insn->is_jump ()
650 47805697 : && find_reg_note (use_insn->rtl (), REG_NON_LOCAL_GOTO, NULL_RTX))
651 : return false;
652 :
653 : // Reject cases where one of the uses is a function argument.
654 : // The combine attempt should fail anyway, but this is a common
655 : // case that is easy to check early.
656 47804665 : if (use_insn->is_call ()
657 11101046 : && HARD_REGISTER_P (SET_DEST (set))
658 58893643 : && find_reg_fusage (use_insn->rtl (), USE, SET_DEST (set)))
659 : return false;
660 :
661 : // We'll keep the uses in their original order, even if we move
662 : // them relative to other instructions. Make sure that non-final
663 : // uses do not change any values that occur in the SET_SRC.
664 59510839 : if (prev_use && prev_use->ebb () == use->ebb ())
665 : {
666 8705984 : def_info *ultimate_def = look_through_degenerate_phi (def);
667 8705984 : if (insn_clobbers_resources (prev_use->insn (),
668 8705984 : ultimate_def->insn ()->uses ()))
669 : return false;
670 : }
671 :
672 : prev_use = use;
673 : }
674 :
675 63094564 : for (use_info *use : def->phi_uses ())
676 9949090 : if (!use->phi ()->is_degenerate ()
677 9949090 : || !check_uses (use->phi (), set))
678 8409982 : return false;
679 :
680 22367746 : return true;
681 : }
682 :
683 : // Try to remove DEF's instruction by substituting DEF into all uses.
684 : // SET is the rtx set associated with DEF. If the optimization moves any
685 : // instructions before CURSOR, add those instructions to the end of m_worklist.
686 : bool
687 55214602 : late_combine::combine_into_uses (set_info *def, rtx set, insn_info *cursor)
688 : {
689 55214602 : auto *insn = def->insn ();
690 :
691 : // Don't prolong the live ranges of allocatable hard registers, or put
692 : // them into more complicated instructions. Failing to prevent this
693 : // could lead to spill failures, or at least to worst register allocation.
694 55214602 : if (!reload_completed
695 55214602 : && accesses_include_nonfixed_hard_registers (insn->uses ()))
696 2811188 : return false;
697 :
698 52403414 : if (!reload_completed && !check_register_pressure (insn, set))
699 : return false;
700 :
701 45551994 : if (!check_uses (def, set))
702 : return false;
703 :
704 20828638 : insn_combination combination (def, SET_DEST (set), SET_SRC (set));
705 20828638 : if (!combination.run ())
706 : return false;
707 :
708 2148508 : for (auto *use_change : combination.use_changes ())
709 2112546 : if (*use_change->insn () < *cursor)
710 0 : m_worklist.safe_push (use_change->insn ());
711 : else
712 : break;
713 2148508 : return true;
714 20828638 : }
715 :
716 : // CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
717 : // a single_set that sets (only) OTHER_DEF. CC_SET is known to set a
718 : // condition-code register and the instruction that contains CC_SET is
719 : // known to use OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel.
720 : bool
721 4351606 : late_combine::parallelize_insns (set_info *cc_def, rtx cc_set,
722 : set_info *other_def, rtx other_set)
723 : {
724 4351606 : auto attempt = crtl->ssa->new_change_attempt ();
725 :
726 4351606 : insn_info *cc_insn = cc_def->insn ();
727 4351606 : insn_info *other_insn = other_def->insn ();
728 4351606 : if (dump_file && (dump_flags & TDF_DETAILS))
729 0 : fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
730 : other_insn->uid (), cc_insn->uid ());
731 :
732 : // Try to substitute OTHER_SET into CC_INSN.
733 4351606 : insn_change_watermark rtl_watermark;
734 4351606 : rtx_insn *cc_rtl = cc_insn->rtl ();
735 4351606 : insn_propagation prop (cc_rtl, SET_DEST (other_set), SET_SRC (other_set));
736 4351606 : if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
737 4351606 : || prop.num_replacements == 0)
738 : {
739 13607 : if (dump_file && (dump_flags & TDF_DETAILS))
740 0 : fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
741 : other_def->regno ());
742 13607 : return false;
743 : }
744 :
745 : // Restrict the uses to those outside notes.
746 4337999 : use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
747 4337999 : use_array other_set_uses = remove_note_accesses (attempt,
748 : other_insn->uses ());
749 :
750 : // Remove the use of the substituted value.
751 4337999 : cc_uses = remove_uses_of_def (attempt, cc_uses, other_def);
752 :
753 : // Get the list of uses for the new instruction.
754 4337999 : insn_change cc_change (cc_insn);
755 4337999 : cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
756 4337999 : if (!cc_change.new_uses.is_valid ())
757 : {
758 45721 : if (dump_file && (dump_flags & TDF_DETAILS))
759 0 : fprintf (dump_file, "-- cannot merge uses\n");
760 45721 : return false;
761 : }
762 :
763 : // The instruction initially defines just two registers. recog can add
764 : // extra clobbers if necessary.
765 4292278 : auto_vec<access_info *, 2> new_defs;
766 4292278 : new_defs.quick_push (cc_def);
767 4292278 : new_defs.quick_push (other_def);
768 4292278 : sort_accesses (new_defs);
769 8584556 : cc_change.new_defs = def_array (access_array (new_defs));
770 :
771 : // Make sure there is somewhere that the new instruction could live.
772 4292278 : auto other_change = insn_change::delete_insn (other_insn);
773 4292278 : insn_change *changes[] = { &other_change, &cc_change };
774 4292278 : cc_change.move_range = cc_insn->ebb ()->insn_range ();
775 4292278 : if (!restrict_movement (cc_change, ignore_changing_insns (changes)))
776 : {
777 877874 : if (dump_file && (dump_flags & TDF_DETAILS))
778 0 : fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
779 877874 : return false;
780 : }
781 :
782 : // Tentatively install the new pattern. By convention, the CC set
783 : // must be first.
784 3414404 : if (m_parallel)
785 : {
786 2721034 : XVECEXP (m_parallel, 0, 0) = cc_set;
787 2721034 : XVECEXP (m_parallel, 0, 1) = other_set;
788 : }
789 : else
790 : {
791 693370 : rtvec vec = gen_rtvec (2, cc_set, other_set);
792 693370 : m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
793 : }
794 3414404 : validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
795 :
796 : // These routines report failures themselves.
797 3414404 : if (!recog (attempt, cc_change, ignore_changing_insns (changes))
798 12767 : || !changes_are_worthwhile (changes)
799 3427171 : || !crtl->ssa->verify_insn_changes (changes))
800 3401637 : return false;
801 :
802 12767 : remove_reg_equal_equiv_notes (cc_rtl);
803 12767 : confirm_change_group ();
804 12767 : crtl->ssa->change_insns (changes);
805 12767 : m_parallel = NULL_RTX;
806 12767 : return true;
807 4351606 : }
808 :
809 : // CC_SET is a single_set that sets (only) CC_DEF. See whether CC_DEF
810 : // is a definition of a condition-code register and try to optimize it
811 : // with related instructions if so. Return true if something changed.
812 : //
813 : // This function looks for sequences of the form:
814 : //
815 : // A: (set (reg R1) X1)
816 : // B: ...instructions that might change the value of X1...
817 : // C: (set (reg CC) X2) // X2 uses R1
818 : //
819 : // and tries to change them to:
820 : //
821 : // C': [(set (reg CC) X2')
822 : // (set (reg R1) X1)]
823 : // B: ...instructions that might change the value of X1...
824 : //
825 : // where X2' is the result of replacing R1 with X1 in X2.
826 : //
827 : // Combine can handle this optimization if B doesn't exist and if A and
828 : // C are in the same BB. This pass instead handles cases where B does
829 : // exist and cases where A and C are in different BBs of the same EBB.
830 : bool
831 53066094 : late_combine::combine_cc_setter (set_info *cc_def, rtx cc_set)
832 : {
833 : // Check for a set of a CC register. This isn't needed for correctness;
834 : // it's just a way of narrowing the search space. It could be relaxed if
835 : // there are other situations that would benefit from the same optimization.
836 53066094 : if (!HARD_REGISTER_NUM_P (cc_def->regno ())
837 53066094 : || GET_MODE_CLASS (cc_def->mode()) != MODE_CC)
838 : return false;
839 :
840 : // Search the registers used by the CC setter for an easily-substitutable
841 : // def-use chain.
842 18657951 : for (use_info *other_use : cc_def->insn ()->uses ())
843 10797495 : if (auto *other_def = other_use->def ())
844 10791071 : if (other_use->regno () != cc_def->regno ()
845 10755907 : && other_def->ebb () == cc_def->ebb ()
846 20193246 : && other_def == single_set_info (other_def->insn ()))
847 6385805 : if (rtx other_set = optimizable_set (other_def))
848 4351606 : if (parallelize_insns (cc_def, cc_set, other_def, other_set))
849 12767 : return true;
850 :
851 7860456 : return false;
852 : }
853 :
854 : // Try to optimize INSN in some way. If the optimization moves any
855 : // instructions before CURSOR, and if further optimizations might be
856 : // possible on those instructions, add them to the end of m_worklist.
857 : bool
858 107050811 : late_combine::combine_insn (insn_info *insn, insn_info *cursor)
859 : {
860 107050811 : if (set_info *def = single_set_info (insn))
861 83465161 : if (rtx set = optimizable_set (def))
862 55214602 : return (combine_into_uses (def, set, cursor)
863 55214602 : || combine_cc_setter (def, set));
864 :
865 : return false;
866 : }
867 :
868 : // Run the pass on function FN.
869 : unsigned int
870 1920526 : late_combine::execute (function *fn)
871 : {
872 : // Initialization.
873 1920526 : calculate_dominance_info (CDI_DOMINATORS);
874 1920526 : df_analyze ();
875 1920526 : crtl->ssa = new rtl_ssa::function_info (fn);
876 : // Don't allow memory_operand to match volatile MEMs.
877 1920526 : init_recog_no_volatile ();
878 :
879 1920526 : insn_info *insn = *crtl->ssa->nondebug_insns ().begin ();
880 171032710 : while (insn)
881 : {
882 169112184 : if (!insn->is_artificial ())
883 : {
884 107050811 : insn_info *prev = insn->prev_nondebug_insn ();
885 107050811 : if (combine_insn (insn, prev))
886 : {
887 : // Any instructions that get added to the worklist were
888 : // previously after PREV. Thus if we were able to move
889 : // an instruction X before PREV during one combination,
890 : // X cannot depend on any instructions that we move before
891 : // PREV during subsequent combinations. This means that
892 : // the worklist should be free of backwards dependencies,
893 : // even if it isn't necessarily in RPO.
894 2161275 : for (unsigned int i = 0; i < m_worklist.length (); ++i)
895 0 : combine_insn (m_worklist[i], prev);
896 2161275 : m_worklist.truncate (0);
897 2161275 : insn = prev;
898 : }
899 : }
900 169112184 : insn = insn->next_nondebug_insn ();
901 : }
902 :
903 : // Finalization.
904 1920526 : if (crtl->ssa->perform_pending_updates ())
905 518 : cleanup_cfg (0);
906 :
907 1920526 : delete crtl->ssa;
908 1920526 : crtl->ssa = nullptr;
909 :
910 : // Make the recognizer allow volatile MEMs again.
911 1920526 : init_recog ();
912 1920526 : free_dominance_info (CDI_DOMINATORS);
913 1920526 : return 0;
914 : }
915 :
916 : class pass_late_combine : public rtl_opt_pass
917 : {
918 : public:
919 575744 : pass_late_combine (gcc::context *ctxt)
920 1151488 : : rtl_opt_pass (pass_data_late_combine, ctxt)
921 : {}
922 :
923 : // opt_pass methods:
924 287872 : opt_pass *clone () override { return new pass_late_combine (m_ctxt); }
925 : bool gate (function *) override;
926 : unsigned int execute (function *) override;
927 : };
928 :
929 : bool
930 2961910 : pass_late_combine::gate (function *)
931 : {
932 2961910 : return optimize > 0 && flag_late_combine_instructions;
933 : }
934 :
935 : unsigned int
936 1920526 : pass_late_combine::execute (function *fn)
937 : {
938 1920526 : return late_combine ().execute (fn);
939 : }
940 :
941 : } // end namespace
942 :
943 : // Create a new CC fusion pass instance.
944 :
945 : rtl_opt_pass *
946 287872 : make_pass_late_combine (gcc::context *ctxt)
947 : {
948 287872 : return new pass_late_combine (ctxt);
949 : }
|