Branch data Line data Source code
1 : : // Late-stage instruction combination pass.
2 : : // Copyright (C) 2023-2025 Free Software Foundation, Inc.
3 : : //
4 : : // This file is part of GCC.
5 : : //
6 : : // GCC is free software; you can redistribute it and/or modify it under
7 : : // the terms of the GNU General Public License as published by the Free
8 : : // Software Foundation; either version 3, or (at your option) any later
9 : : // version.
10 : : //
11 : : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : // for more details.
15 : : //
16 : : // You should have received a copy of the GNU General Public License
17 : : // along with GCC; see the file COPYING3. If not see
18 : : // <http://www.gnu.org/licenses/>.
19 : :
20 : : // This pass currently has two purposes:
21 : : //
22 : : // - to substitute definitions into all uses, so that the definition
23 : : // can be removed.
24 : : //
25 : : // - to try to parallelise sets of condition-code registers with a
26 : : // related instruction (see combine_cc_setter for details).
27 : : //
28 : : // However, it could be extended to handle other combination-related
29 : : // optimizations in future.
30 : : //
31 : : // The pass can run before or after register allocation. When running
32 : : // before register allocation, it tries to avoid cases that are likely
33 : : // to increase register pressure. For the same reason, it avoids moving
34 : : // instructions around, even if doing so would allow an optimization to
35 : : // succeed. These limitations are removed when running after register
36 : : // allocation.
37 : :
38 : : #define INCLUDE_ALGORITHM
39 : : #define INCLUDE_FUNCTIONAL
40 : : #define INCLUDE_ARRAY
41 : : #include "config.h"
42 : : #include "system.h"
43 : : #include "coretypes.h"
44 : : #include "backend.h"
45 : : #include "rtl.h"
46 : : #include "df.h"
47 : : #include "rtl-ssa.h"
48 : : #include "print-rtl.h"
49 : : #include "tree-pass.h"
50 : : #include "cfgcleanup.h"
51 : : #include "target.h"
52 : : #include "dbgcnt.h"
53 : :
54 : : using namespace rtl_ssa;
55 : :
56 : : namespace {
57 : : const pass_data pass_data_late_combine =
58 : : {
59 : : RTL_PASS, // type
60 : : "late_combine", // name
61 : : OPTGROUP_NONE, // optinfo_flags
62 : : TV_LATE_COMBINE, // tv_id
63 : : 0, // properties_required
64 : : 0, // properties_provided
65 : : 0, // properties_destroyed
66 : : 0, // todo_flags_start
67 : : TODO_df_finish, // todo_flags_finish
68 : : };
69 : :
70 : : // Represents an attempt to substitute a single-set definition into all
71 : : // uses of the definition.
72 : : class insn_combination
73 : : {
74 : : public:
75 : : insn_combination (set_info *, rtx, rtx);
76 : : bool run ();
77 : : array_slice<insn_change *const> use_changes () const;
78 : :
79 : : private:
80 : : use_array get_new_uses (use_info *);
81 : : bool substitute_nondebug_use (use_info *);
82 : : bool substitute_nondebug_uses (set_info *);
83 : : bool try_to_preserve_debug_info (insn_change &, use_info *);
84 : : void substitute_debug_use (use_info *);
85 : : bool substitute_note (insn_info *, rtx, bool);
86 : : void substitute_notes (insn_info *, bool);
87 : : void substitute_note_uses (use_info *);
88 : : void substitute_optional_uses (set_info *);
89 : :
90 : : // Represents the state of the function's RTL at the start of this
91 : : // combination attempt.
92 : : insn_change_watermark m_rtl_watermark;
93 : :
94 : : // Represents the rtl-ssa state at the start of this combination attempt.
95 : : obstack_watermark m_attempt;
96 : :
97 : : // The instruction that contains the definition, and that we're trying
98 : : // to delete.
99 : : insn_info *m_def_insn;
100 : :
101 : : // The definition itself.
102 : : set_info *m_def;
103 : :
104 : : // The destination and source of the single set that defines m_def.
105 : : // The destination is known to be a plain REG.
106 : : rtx m_dest;
107 : : rtx m_src;
108 : :
109 : : // Contains the full list of changes that we want to make, in reverse
110 : : // postorder.
111 : : auto_vec<insn_change *> m_nondebug_changes;
112 : : };
113 : :
114 : : // Class that represents one run of the pass.
115 : 5796018 : class late_combine
116 : : {
117 : : public:
118 : : unsigned int execute (function *);
119 : :
120 : : private:
121 : : rtx optimizable_set (set_info *);
122 : : bool check_register_pressure (insn_info *, rtx);
123 : : bool check_uses (set_info *, rtx);
124 : : bool combine_into_uses (set_info *, rtx, insn_info *);
125 : : bool parallelize_insns (set_info *, rtx, set_info *, rtx);
126 : : bool combine_cc_setter (set_info *, rtx);
127 : : bool combine_insn (insn_info *, insn_info *);
128 : :
129 : : auto_vec<insn_info *> m_worklist;
130 : :
131 : : // A spare PARALLEL rtx, or null if none.
132 : : rtx m_parallel = NULL_RTX;
133 : : };
134 : :
135 : 21079249 : insn_combination::insn_combination (set_info *def, rtx dest, rtx src)
136 : 21079249 : : m_rtl_watermark (),
137 : 21079249 : m_attempt (crtl->ssa->new_change_attempt ()),
138 : 21079249 : m_def_insn (def->insn ()),
139 : 21079249 : m_def (def),
140 : 21079249 : m_dest (dest),
141 : 21079249 : m_src (src),
142 : 21079249 : m_nondebug_changes ()
143 : : {
144 : 21079249 : }
145 : :
146 : : array_slice<insn_change *const>
147 : 2169563 : insn_combination::use_changes () const
148 : : {
149 : 2169563 : return { m_nondebug_changes.address () + 1,
150 : 2169563 : m_nondebug_changes.length () - 1 };
151 : : }
152 : :
153 : : // USE is a direct or indirect use of m_def. Return the list of uses
154 : : // that would be needed after substituting m_def into the instruction.
155 : : // The returned list is marked as invalid if USE's insn and m_def_insn
156 : : // use different definitions for the same resource (register or memory).
157 : : use_array
158 : 27045244 : insn_combination::get_new_uses (use_info *use)
159 : : {
160 : 27045244 : auto *def = use->def ();
161 : 27045244 : auto *use_insn = use->insn ();
162 : :
163 : 27045244 : use_array new_uses = use_insn->uses ();
164 : 27045244 : new_uses = remove_uses_of_def (m_attempt, new_uses, def);
165 : 27045244 : new_uses = merge_access_arrays (m_attempt, m_def_insn->uses (), new_uses);
166 : 53971821 : if (new_uses.is_valid () && use->ebb () != m_def->ebb ())
167 : 4408851 : new_uses = crtl->ssa->make_uses_available (m_attempt, new_uses, use->bb (),
168 : 4408851 : use_insn->is_debug_insn ());
169 : 27045244 : return new_uses;
170 : : }
171 : :
172 : : // Start the process of trying to replace USE by substitution, given that
173 : : // USE occurs in a non-debug instruction. Check:
174 : : //
175 : : // - that the substitution can be represented in RTL
176 : : //
177 : : // - that each use of a resource (register or memory) within the new
178 : : // instruction has a consistent definition
179 : : //
180 : : // - that the new instruction is a recognized pattern
181 : : //
182 : : // - that the instruction can be placed somewhere that makes all definitions
183 : : // and uses valid, and that permits any new hard-register clobbers added
184 : : // during the recognition process
185 : : //
186 : : // Return true on success.
187 : : bool
188 : 26054025 : insn_combination::substitute_nondebug_use (use_info *use)
189 : : {
190 : 26054025 : insn_info *use_insn = use->insn ();
191 : 26054025 : rtx_insn *use_rtl = use_insn->rtl ();
192 : :
193 : 26054025 : if (dump_file && (dump_flags & TDF_DETAILS))
194 : 0 : dump_insn_slim (dump_file, use->insn ()->rtl ());
195 : :
196 : : // Reject second and subsequent uses if the target does not allow
197 : : // the defining instruction to be copied.
198 : 26054025 : if (targetm.cannot_copy_insn_p
199 : 0 : && m_nondebug_changes.length () >= 2
200 : 26054025 : && targetm.cannot_copy_insn_p (m_def_insn->rtl ()))
201 : : {
202 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
203 : 0 : fprintf (dump_file, "-- The target does not allow multiple"
204 : 0 : " copies of insn %d\n", m_def_insn->uid ());
205 : 0 : return false;
206 : : }
207 : :
208 : : // Check that we can change the instruction pattern. Leave recognition
209 : : // of the result till later.
210 : 26054025 : insn_propagation prop (use_rtl, m_dest, m_src);
211 : 26054025 : if (!prop.apply_to_pattern (&PATTERN (use_rtl))
212 : 26054025 : || prop.num_replacements == 0)
213 : : {
214 : 489924 : if (dump_file && (dump_flags & TDF_DETAILS))
215 : 0 : fprintf (dump_file, "-- RTL substitution failed\n");
216 : 489924 : return false;
217 : : }
218 : :
219 : 25564101 : use_array new_uses = get_new_uses (use);
220 : 25564101 : if (!new_uses.is_valid ())
221 : : {
222 : 1057228 : if (dump_file && (dump_flags & TDF_DETAILS))
223 : 0 : fprintf (dump_file, "-- could not prove that all sources"
224 : : " are available\n");
225 : 1057228 : return false;
226 : : }
227 : :
228 : : // Create a tentative change for the use.
229 : 24506873 : auto *where = XOBNEW (m_attempt, insn_change);
230 : 24506873 : auto *use_change = new (where) insn_change (use_insn);
231 : 24506873 : m_nondebug_changes.safe_push (use_change);
232 : 24506873 : use_change->new_uses = new_uses;
233 : :
234 : 24506873 : struct local_ignore : ignore_nothing
235 : : {
236 : 24506873 : local_ignore (const set_info *def, const insn_info *use_insn)
237 : 24506873 : : m_def (def), m_use_insn (use_insn) {}
238 : :
239 : : // We don't limit the number of insns per optimization, so ignoring all
240 : : // insns for all insns would lead to quadratic complexity. Just ignore
241 : : // the use and definition, which should be enough for most purposes.
242 : : bool
243 : 139583392 : should_ignore_insn (const insn_info *insn)
244 : : {
245 : 139583392 : return insn == m_def->insn () || insn == m_use_insn;
246 : : }
247 : :
248 : : // Ignore the definition that we're removing, and all uses of it.
249 : 47997154 : bool should_ignore_def (const def_info *def) { return def == m_def; }
250 : :
251 : : const set_info *m_def;
252 : : const insn_info *m_use_insn;
253 : : };
254 : :
255 : 24506873 : auto ignore = local_ignore (m_def, use_insn);
256 : :
257 : : // Moving instructions before register allocation could increase
258 : : // register pressure. Only try moving them after RA.
259 : 24506873 : if (reload_completed && can_move_insn_p (use_insn))
260 : 8399647 : use_change->move_range = { use_insn->bb ()->head_insn (),
261 : 8399647 : use_insn->ebb ()->last_bb ()->end_insn () };
262 : 24506873 : if (!restrict_movement (*use_change, ignore))
263 : : {
264 : 765562 : if (dump_file && (dump_flags & TDF_DETAILS))
265 : 0 : fprintf (dump_file, "-- cannot satisfy all definitions and uses"
266 : 0 : " in insn %d\n", INSN_UID (use_insn->rtl ()));
267 : 765562 : return false;
268 : : }
269 : :
270 : 23741311 : if (!recog (m_attempt, *use_change, ignore))
271 : : return false;
272 : :
273 : : return true;
274 : : }
275 : :
276 : : // Apply substitute_nondebug_use to all direct and indirect uses of DEF.
277 : : // There will be at most one level of indirection.
278 : : bool
279 : 21711764 : insn_combination::substitute_nondebug_uses (set_info *def)
280 : : {
281 : 64866284 : for (use_info *use : def->nondebug_insn_uses ())
282 : 29438331 : if (!use->is_live_out_use ()
283 : 26139433 : && !use->only_occurs_in_notes ()
284 : 55492356 : && !substitute_nondebug_use (use))
285 : 19283057 : return false;
286 : :
287 : 6056033 : for (use_info *use : def->phi_uses ())
288 : 632515 : if (!substitute_nondebug_uses (use->phi ()))
289 : 566104 : return false;
290 : :
291 : 2428707 : return true;
292 : : }
293 : :
294 : : // USE_CHANGE.insn () is a debug instruction that uses m_def. Try to
295 : : // substitute the definition into the instruction and try to describe
296 : : // the result in USE_CHANGE. Return true on success. Failure means that
297 : : // the instruction must be reset instead.
298 : : bool
299 : 1429291 : insn_combination::try_to_preserve_debug_info (insn_change &use_change,
300 : : use_info *use)
301 : : {
302 : : // Punt on unsimplified subregs of hard registers. In that case,
303 : : // propagation can succeed and create a wider reg than the one we
304 : : // started with.
305 : 1429291 : if (HARD_REGISTER_NUM_P (use->regno ())
306 : 1429291 : && use->includes_subregs ())
307 : : return false;
308 : :
309 : 1429264 : insn_info *use_insn = use_change.insn ();
310 : 1429264 : rtx_insn *use_rtl = use_insn->rtl ();
311 : :
312 : 1429264 : use_change.new_uses = get_new_uses (use);
313 : 1429444 : if (!use_change.new_uses.is_valid ()
314 : 1424589 : || !restrict_movement (use_change))
315 : 14813 : return false;
316 : :
317 : 1414451 : insn_propagation prop (use_rtl, m_dest, m_src);
318 : 1414451 : return prop.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl));
319 : : }
320 : :
321 : : // USE_INSN is a debug instruction that uses m_def. Update it to reflect
322 : : // the fact that m_def is going to disappear. Try to preserve the source
323 : : // value if possible, but reset the instruction if not.
324 : : void
325 : 1429291 : insn_combination::substitute_debug_use (use_info *use)
326 : : {
327 : 1429291 : auto *use_insn = use->insn ();
328 : 1429291 : rtx_insn *use_rtl = use_insn->rtl ();
329 : :
330 : 1429291 : auto use_change = insn_change (use_insn);
331 : 1429291 : if (!try_to_preserve_debug_info (use_change, use))
332 : : {
333 : 14904 : use_change.new_uses = {};
334 : 14904 : use_change.move_range = use_change.insn ();
335 : 14904 : INSN_VAR_LOCATION_LOC (use_rtl) = gen_rtx_UNKNOWN_VAR_LOC ();
336 : : }
337 : 1429291 : insn_change *changes[] = { &use_change };
338 : 1429291 : crtl->ssa->change_insns (changes);
339 : 1429291 : }
340 : :
341 : : // NOTE is a reg note of USE_INSN, which previously used m_def. Update
342 : : // the note to reflect the fact that m_def is going to disappear. Return
343 : : // true on success, or false if the note must be deleted.
344 : : //
345 : : // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
346 : : bool
347 : 3376687 : insn_combination::substitute_note (insn_info *use_insn, rtx note,
348 : : bool can_propagate)
349 : : {
350 : 3376687 : if (REG_NOTE_KIND (note) == REG_EQUAL
351 : 3376687 : || REG_NOTE_KIND (note) == REG_EQUIV)
352 : : {
353 : 173370 : insn_propagation prop (use_insn->rtl (), m_dest, m_src);
354 : 173370 : return (prop.apply_to_note (&XEXP (note, 0))
355 : 173370 : && (can_propagate || prop.num_replacements == 0));
356 : : }
357 : : return true;
358 : : }
359 : :
360 : : // Update USE_INSN's notes after deciding to go ahead with the optimization.
361 : : // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
362 : : void
363 : 5746719 : insn_combination::substitute_notes (insn_info *use_insn, bool can_propagate)
364 : : {
365 : 5746719 : rtx_insn *use_rtl = use_insn->rtl ();
366 : 5746719 : rtx *ptr = ®_NOTES (use_rtl);
367 : 9123406 : while (rtx note = *ptr)
368 : : {
369 : 3376687 : if (substitute_note (use_insn, note, can_propagate))
370 : 3373644 : ptr = &XEXP (note, 1);
371 : : else
372 : 3043 : *ptr = XEXP (note, 1);
373 : : }
374 : 5746719 : }
375 : :
376 : : // We've decided to go ahead with the substitution. Update all REG_NOTES
377 : : // involving USE.
378 : : void
379 : 5746719 : insn_combination::substitute_note_uses (use_info *use)
380 : : {
381 : 5746719 : insn_info *use_insn = use->insn ();
382 : :
383 : 5746719 : bool can_propagate = true;
384 : 5746719 : if (use->only_occurs_in_notes ())
385 : : {
386 : : // The only uses are in notes. Try to keep the note if we can,
387 : : // but removing it is better than aborting the optimization.
388 : 51879 : insn_change use_change (use_insn);
389 : 51879 : use_change.new_uses = get_new_uses (use);
390 : 51879 : if (!use_change.new_uses.is_valid ()
391 : 51395 : || !restrict_movement (use_change))
392 : : {
393 : 3043 : use_change.move_range = use_insn;
394 : 3043 : use_change.new_uses = remove_uses_of_def (m_attempt,
395 : : use_insn->uses (),
396 : 3043 : use->def ());
397 : 3043 : can_propagate = false;
398 : : }
399 : 51879 : if (dump_file && (dump_flags & TDF_DETAILS))
400 : : {
401 : 0 : fprintf (dump_file, "%s notes in:\n",
402 : : can_propagate ? "updating" : "removing");
403 : 0 : dump_insn_slim (dump_file, use_insn->rtl ());
404 : : }
405 : 51879 : substitute_notes (use_insn, can_propagate);
406 : 51879 : insn_change *changes[] = { &use_change };
407 : 51879 : crtl->ssa->change_insns (changes);
408 : : }
409 : : else
410 : : // We've already decided to update the insn's pattern and know that m_src
411 : : // will be available at the insn's new location. Now update its notes.
412 : 5694840 : substitute_notes (use_insn, can_propagate);
413 : 5746719 : }
414 : :
415 : : // We've decided to go ahead with the substitution and we've dealt with
416 : : // all uses that occur in the patterns of non-debug insns. Update all
417 : : // other uses for the fact that m_def is about to disappear.
418 : : void
419 : 2193952 : insn_combination::substitute_optional_uses (set_info *def)
420 : : {
421 : 4387904 : if (auto insn_uses = def->all_insn_uses ())
422 : : {
423 : : use_info *use = *insn_uses.begin ();
424 : 10626025 : while (use)
425 : : {
426 : 8468151 : use_info *next_use = use->next_any_insn_use ();
427 : 8468151 : if (use->is_in_debug_insn ())
428 : 1429291 : substitute_debug_use (use);
429 : 7038860 : else if (!use->is_live_out_use ())
430 : 5746719 : substitute_note_uses (use);
431 : : use = next_use;
432 : : }
433 : : }
434 : 4412293 : for (use_info *use : def->phi_uses ())
435 : 24389 : substitute_optional_uses (use->phi ());
436 : 2193952 : }
437 : :
438 : : // Try to perform the substitution. Return true on success.
439 : : bool
440 : 21079249 : insn_combination::run ()
441 : : {
442 : 21079249 : if (dump_file && (dump_flags & TDF_DETAILS))
443 : : {
444 : 0 : fprintf (dump_file, "\ntrying to combine definition of r%d in:\n",
445 : 0 : m_def->regno ());
446 : 0 : dump_insn_slim (dump_file, m_def_insn->rtl ());
447 : 0 : fprintf (dump_file, "into:\n");
448 : : }
449 : :
450 : 21079249 : auto def_change = insn_change::delete_insn (m_def_insn);
451 : 21079249 : m_nondebug_changes.safe_push (&def_change);
452 : :
453 : 21079249 : if (!substitute_nondebug_uses (m_def)
454 : 4724592 : || !changes_are_worthwhile (m_nondebug_changes)
455 : 25427005 : || !crtl->ssa->verify_insn_changes (m_nondebug_changes))
456 : 18909686 : return false;
457 : :
458 : : // We've now decided that the optimization is valid and profitable.
459 : : // Allow it to be suppressed for bisection purposes.
460 : 2169563 : if (!dbg_cnt (::late_combine))
461 : : return false;
462 : :
463 : 2169563 : substitute_optional_uses (m_def);
464 : :
465 : 2169563 : confirm_change_group ();
466 : 4339126 : crtl->ssa->change_insns (m_nondebug_changes);
467 : 2169563 : return true;
468 : : }
469 : :
470 : : // DEF is the result of calling single_set_info on its instruction.
471 : : // See whether that instruction is a single_set that we can optimize.
472 : : // Return the set if so, otherwise return null.
473 : : rtx
474 : 91166707 : late_combine::optimizable_set (set_info *def)
475 : : {
476 : : // For simplicity, don't try to handle sets of multiple hard registers.
477 : : // And for correctness, don't remove any assignments to the stack or
478 : : // frame pointers, since that would implicitly change the set of valid
479 : : // memory locations between this assignment and the next.
480 : : //
481 : : // Removing assignments to the hard frame pointer would invalidate
482 : : // backtraces.
483 : 91166707 : if (!def->is_reg ()
484 : 72195369 : || def->regno () == STACK_POINTER_REGNUM
485 : 72195369 : || def->regno () == FRAME_POINTER_REGNUM
486 : 163362076 : || def->regno () == HARD_FRAME_POINTER_REGNUM)
487 : : return NULL_RTX;
488 : :
489 : 71611287 : auto *insn = def->insn ();
490 : : if (!insn->can_be_optimized ()
491 : 71611287 : || insn->is_asm ()
492 : 71592649 : || insn->is_call ()
493 : 70564655 : || insn->has_volatile_refs ()
494 : 69716123 : || insn->has_pre_post_modify ()
495 : 141327410 : || !can_move_insn_p (insn))
496 : 10647144 : return NULL_RTX;
497 : :
498 : 60964143 : rtx set = single_set (insn->rtl ());
499 : 60964143 : if (!set)
500 : : return NULL_RTX;
501 : :
502 : : // For simplicity, don't try to handle subreg destinations.
503 : 60960326 : rtx dest = SET_DEST (set);
504 : 60960326 : if (!REG_P (dest) || REG_NREGS (dest) != 1 || def->regno () != REGNO (dest))
505 : : return NULL_RTX;
506 : :
507 : : return set;
508 : : }
509 : :
510 : : // Suppose that we can replace all uses of SET_DEST (SET) with SET_SRC (SET),
511 : : // where SET occurs in INSN. Return true if doing so is not likely to
512 : : // increase register pressure.
513 : : bool
514 : 27606157 : late_combine::check_register_pressure (insn_info *insn, rtx set)
515 : : {
516 : : // Plain register-to-register moves do not establish a register class
517 : : // preference and have no well-defined effect on the register allocator.
518 : : // If changes in register class are needed, the register allocator is
519 : : // in the best position to place those changes. If no change in
520 : : // register class is needed, then the optimization reduces register
521 : : // pressure if SET_SRC (set) was already live at uses, otherwise the
522 : : // optimization is pressure-neutral.
523 : 27606157 : rtx src = SET_SRC (set);
524 : 27606157 : if (REG_P (src))
525 : : return true;
526 : :
527 : : // On the same basis, substituting a SET_SRC that contains a single
528 : : // pseudo register either reduces pressure or is pressure-neutral,
529 : : // subject to the constraints below. We would need to do more
530 : : // analysis for SET_SRCs that use more than one pseudo register.
531 : 20124141 : unsigned int nregs = 0;
532 : 37590427 : for (auto *use : insn->uses ())
533 : 20818137 : if (use->is_reg ()
534 : 17364989 : && !HARD_REGISTER_NUM_P (use->regno ())
535 : 34587928 : && !use->only_occurs_in_notes ())
536 : 13683542 : if (++nregs > 1)
537 : 3351851 : return false;
538 : :
539 : : // If there are no pseudo registers in SET_SRC then the optimization
540 : : // should improve register pressure.
541 : 16772290 : if (nregs == 0)
542 : : return true;
543 : :
544 : : // We'd be substituting (set (reg R1) SRC) where SRC is known to
545 : : // contain a single pseudo register R2. Assume for simplicity that
546 : : // each new use of R2 would need to be in the same class C as the
547 : : // current use of R2. If, for a realistic allocation, C is a
548 : : // non-strict superset of the R1's register class, the effect on
549 : : // register pressure should be positive or neutral. If instead
550 : : // R1 occupies a different register class from R2, or if R1 has
551 : : // more allocation freedom than R2, then there's a higher risk that
552 : : // the effect on register pressure could be negative.
553 : : //
554 : : // First use constrain_operands to get the most likely choice of
555 : : // alternative. For simplicity, just handle the case where the
556 : : // output operand is operand 0.
557 : 6979840 : extract_insn (insn->rtl ());
558 : 6979840 : rtx dest = SET_DEST (set);
559 : 6979840 : if (recog_data.n_operands == 0
560 : 6979840 : || recog_data.operand[0] != dest)
561 : : return false;
562 : :
563 : 4550496 : if (!constrain_operands (0, get_enabled_alternatives (insn->rtl ())))
564 : : return false;
565 : :
566 : 4550496 : preprocess_constraints (insn->rtl ());
567 : 4550496 : auto *alt = which_op_alt ();
568 : 4550496 : auto dest_class = alt[0].cl;
569 : :
570 : : // Check operands 1 and above.
571 : 12140664 : auto check_src = [&] (unsigned int i)
572 : : {
573 : 7590168 : if (recog_data.is_operator[i])
574 : : return true;
575 : :
576 : 7478212 : rtx op = recog_data.operand[i];
577 : 7478212 : if (CONSTANT_P (op))
578 : : return true;
579 : :
580 : 4661232 : if (SUBREG_P (op))
581 : 567654 : op = SUBREG_REG (op);
582 : 4661232 : if (REG_P (op))
583 : : {
584 : : // Ignore hard registers. We've already rejected uses of non-fixed
585 : : // hard registers in the SET_SRC.
586 : 3837499 : if (HARD_REGISTER_P (op))
587 : : return true;
588 : :
589 : : // Make sure that the source operand's class is at least as
590 : : // permissive as the destination operand's class.
591 : 3835587 : auto src_class = alternative_class (alt, i);
592 : 3835587 : if (dest_class != src_class)
593 : : {
594 : 337198 : auto extra_dest_regs = (reg_class_contents[dest_class]
595 : 337198 : & ~reg_class_contents[src_class]
596 : 337198 : & ~fixed_reg_set);
597 : 674396 : if (!hard_reg_set_empty_p (extra_dest_regs))
598 : 93240 : return false;
599 : : }
600 : :
601 : : // Make sure that the source operand occupies no more hard
602 : : // registers than the destination operand. This mostly matters
603 : : // for subregs.
604 : 7484694 : if (targetm.class_max_nregs (dest_class, GET_MODE (dest))
605 : 3742347 : < targetm.class_max_nregs (src_class, GET_MODE (op)))
606 : : return false;
607 : :
608 : : return true;
609 : : }
610 : : return false;
611 : 4550496 : };
612 : 11018976 : for (int i = 1; i < recog_data.n_operands; ++i)
613 : 7595214 : if (recog_data.operand_type[i] != OP_OUT && !check_src (i))
614 : : return false;
615 : :
616 : : return true;
617 : : }
618 : :
619 : : // Check uses of DEF to see whether there is anything obvious that
620 : : // prevents the substitution of SET into uses of DEF.
621 : : bool
622 : 48846087 : late_combine::check_uses (set_info *def, rtx set)
623 : : {
624 : 48846087 : use_info *prev_use = nullptr;
625 : 199128984 : for (use_info *use : def->nondebug_insn_uses ())
626 : : {
627 : 68402975 : insn_info *use_insn = use->insn ();
628 : :
629 : 68402975 : if (use->is_live_out_use ())
630 : 14330573 : continue;
631 : 54072402 : if (use->only_occurs_in_notes ())
632 : 201947 : continue;
633 : :
634 : : // We cannot replace all uses if the value is live on exit.
635 : 53870455 : if (use->is_artificial ())
636 : 48846087 : return false;
637 : :
638 : : // Avoid increasing the complexity of instructions that
639 : : // reference allocatable hard registers.
640 : 53146015 : if (!REG_P (SET_SRC (set))
641 : 37138881 : && !reload_completed
642 : 67444585 : && (accesses_include_nonfixed_hard_registers (use_insn->uses ())
643 : 10813035 : || accesses_include_nonfixed_hard_registers (use_insn->defs ())))
644 : 4838505 : return false;
645 : :
646 : : // Don't substitute into a non-local goto, since it can then be
647 : : // treated as a jump to local label, e.g. in shorten_branches.
648 : : // ??? But this shouldn't be necessary.
649 : 48307510 : if (use_insn->is_jump ()
650 : 48307510 : && find_reg_note (use_insn->rtl (), REG_NON_LOCAL_GOTO, NULL_RTX))
651 : : return false;
652 : :
653 : : // Reject cases where one of the uses is a function argument.
654 : : // The combine attempt should fail anyway, but this is a common
655 : : // case that is easy to check early.
656 : 48306478 : if (use_insn->is_call ()
657 : 11534365 : && HARD_REGISTER_P (SET_DEST (set))
658 : 59828745 : && find_reg_fusage (use_insn->rtl (), USE, SET_DEST (set)))
659 : : return false;
660 : :
661 : : // We'll keep the uses in their original order, even if we move
662 : : // them relative to other instructions. Make sure that non-final
663 : : // uses do not change any values that occur in the SET_SRC.
664 : 59208080 : if (prev_use && prev_use->ebb () == use->ebb ())
665 : : {
666 : 8554198 : def_info *ultimate_def = look_through_degenerate_phi (def);
667 : 8554198 : if (insn_clobbers_resources (prev_use->insn (),
668 : 8554198 : ultimate_def->insn ()->uses ()))
669 : : return false;
670 : : }
671 : :
672 : : prev_use = use;
673 : : }
674 : :
675 : 63824130 : for (use_info *use : def->phi_uses ())
676 : 10082268 : if (!use->phi ()->is_degenerate ()
677 : 10082268 : || !check_uses (use->phi (), set))
678 : 8581172 : return false;
679 : :
680 : 22580345 : return true;
681 : : }
682 : :
683 : : // Try to remove DEF's instruction by substituting DEF into all uses.
684 : : // SET is the rtx set associated with DEF. If the optimization moves any
685 : : // instructions before CURSOR, add those instructions to the end of m_worklist.
686 : : bool
687 : 56319743 : late_combine::combine_into_uses (set_info *def, rtx set, insn_info *cursor)
688 : : {
689 : 56319743 : auto *insn = def->insn ();
690 : :
691 : : // Don't prolong the live ranges of allocatable hard registers, or put
692 : : // them into more complicated instructions. Failing to prevent this
693 : : // could lead to spill failures, or at least to worst register allocation.
694 : 56319743 : if (!reload_completed
695 : 56319743 : && accesses_include_nonfixed_hard_registers (insn->uses ()))
696 : 2867003 : return false;
697 : :
698 : 53452740 : if (!reload_completed && !check_register_pressure (insn, set))
699 : : return false;
700 : :
701 : 46544811 : if (!check_uses (def, set))
702 : : return false;
703 : :
704 : 21079249 : insn_combination combination (def, SET_DEST (set), SET_SRC (set));
705 : 21079249 : if (!combination.run ())
706 : : return false;
707 : :
708 : 2169563 : for (auto *use_change : combination.use_changes ())
709 : 2133410 : if (*use_change->insn () < *cursor)
710 : 0 : m_worklist.safe_push (use_change->insn ());
711 : : else
712 : : break;
713 : 2169563 : return true;
714 : 21079249 : }
715 : :
716 : : // CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
717 : : // a single_set that sets (only) OTHER_DEF. CC_SET is known to set a
718 : : // condition-code register and the instruction that contains CC_SET is
719 : : // known to use OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel.
720 : : bool
721 : 4377188 : late_combine::parallelize_insns (set_info *cc_def, rtx cc_set,
722 : : set_info *other_def, rtx other_set)
723 : : {
724 : 4377188 : auto attempt = crtl->ssa->new_change_attempt ();
725 : :
726 : 4377188 : insn_info *cc_insn = cc_def->insn ();
727 : 4377188 : insn_info *other_insn = other_def->insn ();
728 : 4377188 : if (dump_file && (dump_flags & TDF_DETAILS))
729 : 0 : fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
730 : : other_insn->uid (), cc_insn->uid ());
731 : :
732 : : // Try to substitute OTHER_SET into CC_INSN.
733 : 4377188 : insn_change_watermark rtl_watermark;
734 : 4377188 : rtx_insn *cc_rtl = cc_insn->rtl ();
735 : 4377188 : insn_propagation prop (cc_rtl, SET_DEST (other_set), SET_SRC (other_set));
736 : 4377188 : if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
737 : 4377188 : || prop.num_replacements == 0)
738 : : {
739 : 13884 : if (dump_file && (dump_flags & TDF_DETAILS))
740 : 0 : fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
741 : : other_def->regno ());
742 : 13884 : return false;
743 : : }
744 : :
745 : : // Restrict the uses to those outside notes.
746 : 4363304 : use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
747 : 4363304 : use_array other_set_uses = remove_note_accesses (attempt,
748 : : other_insn->uses ());
749 : :
750 : : // Remove the use of the substituted value.
751 : 4363304 : cc_uses = remove_uses_of_def (attempt, cc_uses, other_def);
752 : :
753 : : // Get the list of uses for the new instruction.
754 : 4363304 : insn_change cc_change (cc_insn);
755 : 4363304 : cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
756 : 4363304 : if (!cc_change.new_uses.is_valid ())
757 : : {
758 : 47350 : if (dump_file && (dump_flags & TDF_DETAILS))
759 : 0 : fprintf (dump_file, "-- cannot merge uses\n");
760 : 47350 : return false;
761 : : }
762 : :
763 : : // The instruction initially defines just two registers. recog can add
764 : : // extra clobbers if necessary.
765 : 4315954 : auto_vec<access_info *, 2> new_defs;
766 : 4315954 : new_defs.quick_push (cc_def);
767 : 4315954 : new_defs.quick_push (other_def);
768 : 4315954 : sort_accesses (new_defs);
769 : 8631908 : cc_change.new_defs = def_array (access_array (new_defs));
770 : :
771 : : // Make sure there is somewhere that the new instruction could live.
772 : 4315954 : auto other_change = insn_change::delete_insn (other_insn);
773 : 4315954 : insn_change *changes[] = { &other_change, &cc_change };
774 : 4315954 : cc_change.move_range = cc_insn->ebb ()->insn_range ();
775 : 4315954 : if (!restrict_movement (cc_change, ignore_changing_insns (changes)))
776 : : {
777 : 890581 : if (dump_file && (dump_flags & TDF_DETAILS))
778 : 0 : fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
779 : 890581 : return false;
780 : : }
781 : :
782 : : // Tentatively install the new pattern. By convention, the CC set
783 : : // must be first.
784 : 3425373 : if (m_parallel)
785 : : {
786 : 2717667 : XVECEXP (m_parallel, 0, 0) = cc_set;
787 : 2717667 : XVECEXP (m_parallel, 0, 1) = other_set;
788 : : }
789 : : else
790 : : {
791 : 707706 : rtvec vec = gen_rtvec (2, cc_set, other_set);
792 : 707706 : m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
793 : : }
794 : 3425373 : validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
795 : :
796 : : // These routines report failures themselves.
797 : 3425373 : if (!recog (attempt, cc_change, ignore_changing_insns (changes))
798 : 15609 : || !changes_are_worthwhile (changes)
799 : 3440982 : || !crtl->ssa->verify_insn_changes (changes))
800 : 3409764 : return false;
801 : :
802 : 15609 : remove_reg_equal_equiv_notes (cc_rtl);
803 : 15609 : confirm_change_group ();
804 : 15609 : crtl->ssa->change_insns (changes);
805 : 15609 : m_parallel = NULL_RTX;
806 : 15609 : return true;
807 : 4377188 : }
808 : :
809 : : // CC_SET is a single_set that sets (only) CC_DEF. See whether CC_DEF
810 : : // is a definition of a condition-code register and try to optimize it
811 : : // with related instructions if so. Return true if something changed.
812 : : //
813 : : // This function looks for sequences of the form:
814 : : //
815 : : // A: (set (reg R1) X1)
816 : : // B: ...instructions that might change the value of X1...
817 : : // C: (set (reg CC) X2) // X2 uses R1
818 : : //
819 : : // and tries to change them to:
820 : : //
821 : : // C': [(set (reg CC) X2')
822 : : // (set (reg R1) X1)]
823 : : // B: ...instructions that might change the value of X1...
824 : : //
825 : : // where X2' is the result of replacing R1 with X1 in X2.
826 : : //
827 : : // Combine can handle this optimization if B doesn't exist and if A and
828 : : // C are in the same BB. This pass instead handles cases where B does
829 : : // exist and cases where A and C are in different BBs of the same EBB.
830 : : bool
831 : 54150180 : late_combine::combine_cc_setter (set_info *cc_def, rtx cc_set)
832 : : {
833 : : // Check for a set of a CC register. This isn't needed for correctness;
834 : : // it's just a way of narrowing the search space. It could be relaxed if
835 : : // there are other situations that would benefit from the same optimization.
836 : 54150180 : if (!HARD_REGISTER_NUM_P (cc_def->regno ())
837 : 54150180 : || GET_MODE_CLASS (cc_def->mode()) != MODE_CC)
838 : : return false;
839 : :
840 : : // Search the registers used by the CC setter for an easily-substitutable
841 : : // def-use chain.
842 : 18907888 : for (use_info *other_use : cc_def->insn ()->uses ())
843 : 10953966 : if (auto *other_def = other_use->def ())
844 : 10948267 : if (other_use->regno () != cc_def->regno ()
845 : 10908347 : && other_def->ebb () == cc_def->ebb ()
846 : 20505912 : && other_def == single_set_info (other_def->insn ()))
847 : 6492431 : if (rtx other_set = optimizable_set (other_def))
848 : 4377188 : if (parallelize_insns (cc_def, cc_set, other_def, other_set))
849 : 15609 : return true;
850 : :
851 : 7953922 : return false;
852 : : }
853 : :
854 : : // Try to optimize INSN in some way. If the optimization moves any
855 : : // instructions before CURSOR, and if further optimizations might be
856 : : // possible on those instructions, add them to the end of m_worklist.
857 : : bool
858 : 108415336 : late_combine::combine_insn (insn_info *insn, insn_info *cursor)
859 : : {
860 : 108415336 : if (set_info *def = single_set_info (insn))
861 : 84674276 : if (rtx set = optimizable_set (def))
862 : 56319743 : return (combine_into_uses (def, set, cursor)
863 : 56319743 : || combine_cc_setter (def, set));
864 : :
865 : : return false;
866 : : }
867 : :
868 : : // Run the pass on function FN.
869 : : unsigned int
870 : 1932006 : late_combine::execute (function *fn)
871 : : {
872 : : // Initialization.
873 : 1932006 : calculate_dominance_info (CDI_DOMINATORS);
874 : 1932006 : df_analyze ();
875 : 1932006 : crtl->ssa = new rtl_ssa::function_info (fn);
876 : : // Don't allow memory_operand to match volatile MEMs.
877 : 1932006 : init_recog_no_volatile ();
878 : :
879 : 1932006 : insn_info *insn = *crtl->ssa->nondebug_insns ().begin ();
880 : 173317419 : while (insn)
881 : : {
882 : 171385413 : if (!insn->is_artificial ())
883 : : {
884 : 108415336 : insn_info *prev = insn->prev_nondebug_insn ();
885 : 108415336 : if (combine_insn (insn, prev))
886 : : {
887 : : // Any instructions that get added to the worklist were
888 : : // previously after PREV. Thus if we were able to move
889 : : // an instruction X before PREV during one combination,
890 : : // X cannot depend on any instructions that we move before
891 : : // PREV during subsequent combinations. This means that
892 : : // the worklist should be free of backwards dependencies,
893 : : // even if it isn't necessarily in RPO.
894 : 2185172 : for (unsigned int i = 0; i < m_worklist.length (); ++i)
895 : 0 : combine_insn (m_worklist[i], prev);
896 : 2185172 : m_worklist.truncate (0);
897 : 2185172 : insn = prev;
898 : : }
899 : : }
900 : 171385413 : insn = insn->next_nondebug_insn ();
901 : : }
902 : :
903 : : // Finalization.
904 : 1932006 : if (crtl->ssa->perform_pending_updates ())
905 : 525 : cleanup_cfg (0);
906 : :
907 : 1932006 : delete crtl->ssa;
908 : 1932006 : crtl->ssa = nullptr;
909 : :
910 : : // Make the recognizer allow volatile MEMs again.
911 : 1932006 : init_recog ();
912 : 1932006 : free_dominance_info (CDI_DOMINATORS);
913 : 1932006 : return 0;
914 : : }
915 : :
916 : : class pass_late_combine : public rtl_opt_pass
917 : : {
918 : : public:
919 : 571378 : pass_late_combine (gcc::context *ctxt)
920 : 1142756 : : rtl_opt_pass (pass_data_late_combine, ctxt)
921 : : {}
922 : :
923 : : // opt_pass methods:
924 : 285689 : opt_pass *clone () override { return new pass_late_combine (m_ctxt); }
925 : : bool gate (function *) override;
926 : : unsigned int execute (function *) override;
927 : : };
928 : :
929 : : bool
930 : 2962680 : pass_late_combine::gate (function *)
931 : : {
932 : 2962680 : return optimize > 0 && flag_late_combine_instructions;
933 : : }
934 : :
935 : : unsigned int
936 : 1932006 : pass_late_combine::execute (function *fn)
937 : : {
938 : 1932006 : return late_combine ().execute (fn);
939 : : }
940 : :
941 : : } // end namespace
942 : :
943 : : // Create a new CC fusion pass instance.
944 : :
945 : : rtl_opt_pass *
946 : 285689 : make_pass_late_combine (gcc::context *ctxt)
947 : : {
948 : 285689 : return new pass_late_combine (ctxt);
949 : : }
|