Branch data Line data Source code
1 : : /* Support routines for Splitting Paths to loop backedges
2 : : Copyright (C) 2015-2025 Free Software Foundation, Inc.
3 : : Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify
8 : : it under the terms of the GNU General Public License as published by
9 : : the Free Software Foundation; either version 3, or (at your option)
10 : : any later version.
11 : :
12 : : GCC is distributed in the hope that it will be useful,
13 : : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : : GNU General Public License for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : #include "config.h"
22 : : #include "system.h"
23 : : #include "coretypes.h"
24 : : #include "backend.h"
25 : : #include "tree.h"
26 : : #include "gimple.h"
27 : : #include "tree-pass.h"
28 : : #include "tree-cfg.h"
29 : : #include "cfganal.h"
30 : : #include "cfgloop.h"
31 : : #include "gimple-iterator.h"
32 : : #include "tracer.h"
33 : : #include "predict.h"
34 : : #include "gimple-ssa.h"
35 : : #include "tree-phinodes.h"
36 : : #include "ssa-iterators.h"
37 : : #include "fold-const.h"
38 : : #include "cfghooks.h"
39 : :
40 : : /* Given LATCH, the latch block in a loop, see if the shape of the
41 : : path reaching LATCH is suitable for being split by duplication.
42 : : If so, return the block that will be duplicated into its predecessor
43 : : paths. Else return NULL. */
44 : :
45 : : static basic_block
46 : 53606 : find_block_to_duplicate_for_splitting_paths (basic_block latch)
47 : : {
48 : : /* We should have simple latches at this point. So the latch should
49 : : have a single successor. This implies the predecessor of the latch
50 : : likely has the loop exit. And it's that predecessor we're most
51 : : interested in. To keep things simple, we're going to require that
52 : : the latch have a single predecessor too. */
53 : 107212 : if (single_succ_p (latch) && single_pred_p (latch))
54 : : {
55 : 52285 : basic_block bb = get_immediate_dominator (CDI_DOMINATORS, latch);
56 : 52285 : gcc_assert (single_pred_edge (latch)->src == bb);
57 : :
58 : : /* If BB has been marked as not to be duplicated, then honor that
59 : : request. */
60 : 52285 : if (ignore_bb_p (bb))
61 : : return NULL;
62 : :
63 : 52145 : gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb));
64 : : /* The immediate dominator of the latch must end in a conditional. */
65 : 52145 : if (!last || gimple_code (last) != GIMPLE_COND)
66 : : return NULL;
67 : :
68 : : /* We're hoping that BB is a join point for an IF-THEN-ELSE diamond
69 : : region. Verify that it is.
70 : :
71 : : First, verify that BB has two predecessors (each arm of the
72 : : IF-THEN-ELSE) and two successors (the latch and exit) and that
73 : : all edges are normal. */
74 : 50743 : if (EDGE_COUNT (bb->preds) == 2
75 : 36172 : && !(EDGE_PRED (bb, 0)->flags & EDGE_COMPLEX)
76 : 36172 : && !(EDGE_PRED (bb, 1)->flags & EDGE_COMPLEX)
77 : 36171 : && EDGE_COUNT (bb->succs) == 2
78 : 36171 : && !(EDGE_SUCC (bb, 0)->flags & EDGE_COMPLEX)
79 : 86914 : && !(EDGE_SUCC (bb, 1)->flags & EDGE_COMPLEX))
80 : : {
81 : : /* Now verify that BB's immediate dominator ends in a
82 : : conditional as well. */
83 : 36171 : basic_block bb_idom = get_immediate_dominator (CDI_DOMINATORS, bb);
84 : 36171 : gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb_idom));
85 : 36171 : if (!last || gimple_code (last) != GIMPLE_COND)
86 : : return NULL;
87 : :
88 : : /* And that BB's immediate dominator's successors are the
89 : : predecessors of BB or BB itself. */
90 : 15080 : if (!(EDGE_PRED (bb, 0)->src == bb_idom
91 : 5435 : || find_edge (bb_idom, EDGE_PRED (bb, 0)->src))
92 : 17481 : || !(EDGE_PRED (bb, 1)->src == bb_idom
93 : 5717 : || find_edge (bb_idom, EDGE_PRED (bb, 1)->src)))
94 : 4610 : return NULL;
95 : :
96 : : /* And that the predecessors of BB each have a single successor
97 : : or are BB's immediate domiator itself. */
98 : 5035 : if (!(EDGE_PRED (bb, 0)->src == bb_idom
99 : 1762 : || single_succ_p (EDGE_PRED (bb, 0)->src))
100 : 6162 : || !(EDGE_PRED (bb, 1)->src == bb_idom
101 : 53086 : || single_succ_p (EDGE_PRED (bb, 1)->src)))
102 : : return NULL;
103 : :
104 : : /* So at this point we have a simple diamond for an IF-THEN-ELSE
105 : : construct starting at BB_IDOM, with a join point at BB. BB
106 : : pass control outside the loop or to the loop latch.
107 : :
108 : : We're going to want to create two duplicates of BB, one for
109 : : each successor of BB_IDOM. */
110 : : return bb;
111 : : }
112 : : }
113 : : return NULL;
114 : : }
115 : :
116 : : /* Return the number of non-debug statements in a block. */
117 : : static unsigned int
118 : 9233 : count_stmts_in_block (basic_block bb)
119 : : {
120 : 9233 : gimple_stmt_iterator gsi;
121 : 9233 : unsigned int num_stmts = 0;
122 : :
123 : 68846 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
124 : : {
125 : 50380 : gimple *stmt = gsi_stmt (gsi);
126 : 50380 : if (!is_gimple_debug (stmt))
127 : 40741 : num_stmts++;
128 : : }
129 : 9233 : return num_stmts;
130 : : }
131 : :
132 : : /* Return TRUE if CODE represents a tree code that is not likely to
133 : : be easily if-convertable because it likely expands into multiple
134 : : insns, FALSE otherwise. */
135 : : static bool
136 : 929 : poor_ifcvt_candidate_code (enum tree_code code)
137 : : {
138 : 929 : return (code == MIN_EXPR
139 : : || code == MAX_EXPR
140 : 929 : || code == ABS_EXPR
141 : 929 : || code == COND_EXPR);
142 : : }
143 : :
144 : : /* Return TRUE if PRED of BB is an poor ifcvt candidate. */
145 : : static bool
146 : 2761 : poor_ifcvt_pred (basic_block pred, basic_block bb)
147 : : {
148 : : /* If the edge count of the pred is not 1, then
149 : : this is the predecessor from the if rather
150 : : than middle one. */
151 : 3889 : if (EDGE_COUNT (pred->succs) != 1)
152 : : return false;
153 : :
154 : : /* Empty middle bb are never a poor ifcvt candidate. */
155 : 1440 : if (empty_block_p (pred))
156 : : return false;
157 : : /* If BB's predecessors are single statement blocks where
158 : : the output of that statement feed the same PHI in BB,
159 : : it an ifcvt candidate. */
160 : 1123 : gimple *stmt = last_and_only_stmt (pred);
161 : 1123 : if (!stmt || gimple_code (stmt) != GIMPLE_ASSIGN)
162 : : return true;
163 : :
164 : : /* If the statement could trap, then this is a poor ifcvt candidate. */
165 : 970 : if (gimple_could_trap_p (stmt))
166 : : return true;
167 : :
168 : 929 : tree_code code = gimple_assign_rhs_code (stmt);
169 : 929 : if (poor_ifcvt_candidate_code (code))
170 : : return true;
171 : 928 : tree lhs = gimple_assign_lhs (stmt);
172 : 928 : gimple_stmt_iterator gsi;
173 : 1075 : for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
174 : : {
175 : 958 : gimple *phi = gsi_stmt (gsi);
176 : 958 : if (gimple_phi_arg_def (phi, 0) == lhs
177 : 1782 : || gimple_phi_arg_def (phi, 1) == lhs)
178 : : return false;
179 : : }
180 : : return true;
181 : : }
182 : :
183 : : /* Return TRUE if BB is a reasonable block to duplicate by examining
184 : : its size, false otherwise. BB will always be a loop latch block.
185 : :
186 : : Things to consider:
187 : :
188 : : We do not want to spoil if-conversion if at all possible.
189 : :
190 : : Most of the benefit seems to be from eliminating the unconditional
191 : : jump rather than CSE/DCE opportunities. So favor duplicating
192 : : small latches. A latch with just a conditional branch is ideal.
193 : :
194 : : CSE/DCE opportunties crop up when statements from the predecessors
195 : : feed statements in the latch and allow statements in the latch to
196 : : simplify. */
197 : :
198 : : static bool
199 : 4325 : is_feasible_trace (basic_block bb)
200 : : {
201 : 4325 : basic_block pred1 = EDGE_PRED (bb, 0)->src;
202 : 4325 : basic_block pred2 = EDGE_PRED (bb, 1)->src;
203 : 4325 : int num_stmts_in_join = count_stmts_in_block (bb);
204 : 4325 : int num_stmts_in_pred1
205 : 4325 : = EDGE_COUNT (pred1->succs) == 1 ? count_stmts_in_block (pred1) : 0;
206 : 4325 : int num_stmts_in_pred2
207 : 4325 : = EDGE_COUNT (pred2->succs) == 1 ? count_stmts_in_block (pred2) : 0;
208 : :
209 : : /* Upper Hard limit on the number statements to copy. */
210 : 4325 : if (num_stmts_in_join
211 : 4325 : >= param_max_jump_thread_duplication_stmts)
212 : : {
213 : 44 : if (dump_file && (dump_flags & TDF_DETAILS))
214 : 0 : fprintf (dump_file,
215 : : "Duplicating block %d would duplicate "
216 : : "too many statments: %d >= %d\n",
217 : : bb->index, num_stmts_in_join,
218 : : param_max_jump_thread_duplication_stmts);
219 : 44 : return false;
220 : : }
221 : :
222 : : /* This is meant to catch cases that are likely opportunities for
223 : : if-conversion. */
224 : 4281 : if (num_stmts_in_pred1 <= 1 && num_stmts_in_pred2 <= 1)
225 : : {
226 : 1457 : int num_phis = 0;
227 : : /* The max number of PHIs that should be considered for an ifcvt
228 : : candidate. */
229 : 1457 : const int max_num_phis = 3;
230 : 3211 : for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (si);
231 : 1754 : gsi_next (&si))
232 : : {
233 : 1797 : num_phis++;
234 : 1797 : if (num_phis > max_num_phis)
235 : : break;
236 : : }
237 : 1457 : if (num_phis <= max_num_phis
238 : 1414 : && !poor_ifcvt_pred (pred1, bb)
239 : 2804 : && !poor_ifcvt_pred (pred2, bb))
240 : : {
241 : 1102 : if (dump_file && (dump_flags & TDF_DETAILS))
242 : 9 : fprintf (dump_file,
243 : : "Block %d appears to be a join point for "
244 : : "if-convertable bbs.\n",
245 : : bb->index);
246 : 1102 : return false;
247 : : }
248 : : }
249 : :
250 : : /* If the joiner has no PHIs with useful uses there is zero chance
251 : : of CSE/DCE/jump-threading possibilities exposed by duplicating it. */
252 : 3179 : bool found_useful_phi = false;
253 : 5345 : for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (si);
254 : 2166 : gsi_next (&si))
255 : : {
256 : 3745 : gphi *phi = si.phi ();
257 : 3745 : use_operand_p use_p;
258 : 3745 : imm_use_iterator iter;
259 : 9784 : FOR_EACH_IMM_USE_FAST (use_p, iter, gimple_phi_result (phi))
260 : : {
261 : 7618 : gimple *stmt = USE_STMT (use_p);
262 : 7618 : if (is_gimple_debug (stmt))
263 : 261 : continue;
264 : : /* If there's a use in the joiner this might be a CSE/DCE
265 : : opportunity, but not if the use is in a conditional
266 : : which makes this a likely if-conversion candidate. */
267 : 7357 : if (gimple_bb (stmt) == bb
268 : 7357 : && (!is_gimple_assign (stmt)
269 : 1131 : || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
270 : : != tcc_comparison)))
271 : : {
272 : : found_useful_phi = true;
273 : : break;
274 : : }
275 : : /* If the use is on a loop header PHI and on one path the
276 : : value is unchanged this might expose a jump threading
277 : : opportunity. */
278 : 5799 : if (gimple_code (stmt) == GIMPLE_PHI
279 : 4241 : && gimple_bb (stmt) == bb->loop_father->header
280 : : /* But for memory the PHI alone isn't good enough. */
281 : 10433 : && ! virtual_operand_p (gimple_phi_result (stmt)))
282 : : {
283 : 1429 : bool found_unchanged_path = false;
284 : 1429 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
285 : 1140 : if (gimple_phi_arg_def (phi, i) == gimple_phi_result (stmt))
286 : : {
287 : : found_unchanged_path = true;
288 : : break;
289 : : }
290 : : /* If we found an unchanged path this can only be a threading
291 : : opportunity if we have uses of the loop header PHI result
292 : : in a stmt dominating the merge block. Otherwise the
293 : : splitting may prevent if-conversion. */
294 : 786 : if (found_unchanged_path)
295 : : {
296 : 497 : use_operand_p use2_p;
297 : 497 : imm_use_iterator iter2;
298 : 1271 : FOR_EACH_IMM_USE_FAST (use2_p, iter2, gimple_phi_result (stmt))
299 : : {
300 : 907 : gimple *use_stmt = USE_STMT (use2_p);
301 : 907 : if (is_gimple_debug (use_stmt))
302 : 87 : continue;
303 : 820 : basic_block use_bb = gimple_bb (use_stmt);
304 : 820 : if (use_bb != bb
305 : 820 : && dominated_by_p (CDI_DOMINATORS, bb, use_bb))
306 : : {
307 : 521 : if (gcond *cond = dyn_cast <gcond *> (use_stmt))
308 : 45 : if (gimple_cond_code (cond) == EQ_EXPR
309 : 45 : || gimple_cond_code (cond) == NE_EXPR)
310 : 21 : found_useful_phi = true;
311 : : break;
312 : : }
313 : : }
314 : : }
315 : 21 : if (found_useful_phi)
316 : : break;
317 : : }
318 : : }
319 : 3745 : if (found_useful_phi)
320 : : break;
321 : : }
322 : : /* There is one exception namely a controlling condition we can propagate
323 : : an equivalence from to the joiner. */
324 : 3179 : bool found_cprop_opportunity = false;
325 : 3179 : basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
326 : 6358 : gcond *cond = as_a <gcond *> (*gsi_last_bb (dom));
327 : 3179 : if (gimple_cond_code (cond) == EQ_EXPR
328 : 3179 : || gimple_cond_code (cond) == NE_EXPR)
329 : 7966 : for (unsigned i = 0; i < 2; ++i)
330 : : {
331 : 5420 : tree op = gimple_op (cond, i);
332 : 5420 : if (TREE_CODE (op) == SSA_NAME)
333 : : {
334 : 3153 : use_operand_p use_p;
335 : 3153 : imm_use_iterator iter;
336 : 7844 : FOR_EACH_IMM_USE_FAST (use_p, iter, op)
337 : : {
338 : 4963 : if (is_gimple_debug (USE_STMT (use_p)))
339 : 216 : continue;
340 : 4747 : if (gimple_bb (USE_STMT (use_p)) == bb)
341 : : {
342 : : found_cprop_opportunity = true;
343 : : break;
344 : : }
345 : : }
346 : : }
347 : 5420 : if (found_cprop_opportunity)
348 : : break;
349 : : }
350 : :
351 : 3179 : if (! found_useful_phi && ! found_cprop_opportunity)
352 : : {
353 : 1573 : if (dump_file && (dump_flags & TDF_DETAILS))
354 : 4 : fprintf (dump_file,
355 : : "Block %d is a join that does not expose CSE/DCE/jump-thread "
356 : : "opportunities when duplicated.\n",
357 : : bb->index);
358 : 1573 : return false;
359 : : }
360 : :
361 : : /* We may want something here which looks at dataflow and tries
362 : : to guess if duplication of BB is likely to result in simplification
363 : : of instructions in BB in either the original or the duplicate. */
364 : : return true;
365 : : }
366 : :
367 : : /* If the immediate dominator of the latch of the loop is
368 : : block with conditional branch, then the loop latch is
369 : : duplicated to its predecessors path preserving the SSA
370 : : semantics.
371 : :
372 : : CFG before transformation.
373 : :
374 : : 2
375 : : |
376 : : |
377 : : +---->3
378 : : | / \
379 : : | / \
380 : : | 4 5
381 : : | \ /
382 : : | \ /
383 : : | 6
384 : : | / \
385 : : | / \
386 : : | 8 7
387 : : | | |
388 : : ---+ E
389 : :
390 : :
391 : :
392 : : Block 8 is the latch. We're going to make copies of block 6 (9 & 10)
393 : : and wire things up so they look like this:
394 : :
395 : : 2
396 : : |
397 : : |
398 : : +---->3
399 : : | / \
400 : : | / \
401 : : | 4 5
402 : : | | |
403 : : | | |
404 : : | 9 10
405 : : | |\ /|
406 : : | | \ / |
407 : : | | 7 |
408 : : | | | |
409 : : | | E |
410 : : | | |
411 : : | \ /
412 : : | \ /
413 : : +-----8
414 : :
415 : :
416 : : Blocks 9 and 10 will get merged into blocks 4 & 5 respectively which
417 : : enables CSE, DCE and other optimizations to occur on a larger block
418 : : of code. */
419 : :
420 : : static bool
421 : 17601 : split_paths ()
422 : : {
423 : 17601 : bool changed = false;
424 : :
425 : 17601 : loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
426 : 17601 : initialize_original_copy_tables ();
427 : 17601 : calculate_dominance_info (CDI_DOMINATORS);
428 : :
429 : 107795 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
430 : : {
431 : : /* Only split paths if we are optimizing this loop for speed. */
432 : 54992 : if (!optimize_loop_for_speed_p (loop))
433 : 1386 : continue;
434 : :
435 : : /* See if there is a block that we can duplicate to split the
436 : : path to the loop latch. */
437 : 53606 : basic_block bb
438 : 53606 : = find_block_to_duplicate_for_splitting_paths (loop->latch);
439 : :
440 : : /* BB is the merge point for an IF-THEN-ELSE we want to transform.
441 : :
442 : : Essentially we want to create a duplicate of bb and redirect the
443 : : first predecessor of BB to the duplicate (leaving the second
444 : : predecessor as is. This will split the path leading to the latch
445 : : re-using BB to avoid useless copying. */
446 : 53606 : if (bb && is_feasible_trace (bb))
447 : : {
448 : 1606 : if (dump_file && (dump_flags & TDF_DETAILS))
449 : 4 : fprintf (dump_file,
450 : : "Duplicating join block %d into predecessor paths\n",
451 : : bb->index);
452 : 1606 : basic_block pred0 = EDGE_PRED (bb, 0)->src;
453 : 1606 : if (EDGE_COUNT (pred0->succs) != 1)
454 : 1245 : pred0 = EDGE_PRED (bb, 1)->src;
455 : 1606 : transform_duplicate (pred0, bb);
456 : 1606 : changed = true;
457 : :
458 : : /* If BB has an outgoing edge marked as IRREDUCIBLE, then
459 : : duplicating BB may result in an irreducible region turning
460 : : into a natural loop.
461 : :
462 : : Long term we might want to hook this into the block
463 : : duplication code, but as we've seen with similar changes
464 : : for edge removal, that can be somewhat risky. */
465 : 1606 : if (EDGE_SUCC (bb, 0)->flags & EDGE_IRREDUCIBLE_LOOP
466 : 1606 : || EDGE_SUCC (bb, 1)->flags & EDGE_IRREDUCIBLE_LOOP)
467 : : {
468 : 6 : if (dump_file && (dump_flags & TDF_DETAILS))
469 : 0 : fprintf (dump_file,
470 : : "Join block %d has EDGE_IRREDUCIBLE_LOOP set. "
471 : : "Scheduling loop fixups.\n",
472 : : bb->index);
473 : 6 : loops_state_set (LOOPS_NEED_FIXUP);
474 : : }
475 : : }
476 : 17601 : }
477 : :
478 : 17601 : loop_optimizer_finalize ();
479 : 17601 : free_original_copy_tables ();
480 : 17601 : return changed;
481 : : }
482 : :
483 : : /* Main entry point for splitting paths. Returns TODO_cleanup_cfg if any
484 : : paths where split, otherwise return zero. */
485 : :
486 : : static unsigned int
487 : 83006 : execute_split_paths ()
488 : : {
489 : : /* If we don't have at least 2 real blocks and backedges in the
490 : : CFG, then there's no point in trying to perform path splitting. */
491 : 83006 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
492 : 83006 : || !mark_dfs_back_edges ())
493 : 65405 : return 0;
494 : :
495 : 17601 : bool changed = split_paths();
496 : 17601 : if (changed)
497 : 1205 : free_dominance_info (CDI_DOMINATORS);
498 : :
499 : 1205 : return changed ? TODO_cleanup_cfg : 0;
500 : : }
501 : :
502 : : static bool
503 : 1002800 : gate_split_paths ()
504 : : {
505 : 1002800 : return flag_split_paths;
506 : : }
507 : :
508 : : namespace {
509 : :
510 : : const pass_data pass_data_split_paths =
511 : : {
512 : : GIMPLE_PASS, /* type */
513 : : "split-paths", /* name */
514 : : OPTGROUP_NONE, /* optinfo_flags */
515 : : TV_SPLIT_PATHS, /* tv_id */
516 : : PROP_ssa, /* properties_required */
517 : : 0, /* properties_provided */
518 : : 0, /* properties_destroyed */
519 : : 0, /* todo_flags_start */
520 : : TODO_update_ssa, /* todo_flags_finish */
521 : : };
522 : :
523 : : class pass_split_paths : public gimple_opt_pass
524 : : {
525 : : public:
526 : 282953 : pass_split_paths (gcc::context *ctxt)
527 : 565906 : : gimple_opt_pass (pass_data_split_paths, ctxt)
528 : : {}
529 : : /* opt_pass methods: */
530 : 0 : opt_pass * clone () final override { return new pass_split_paths (m_ctxt); }
531 : 1002800 : bool gate (function *) final override { return gate_split_paths (); }
532 : 83006 : unsigned int execute (function *) final override
533 : : {
534 : 83006 : return execute_split_paths ();
535 : : }
536 : :
537 : : }; // class pass_split_paths
538 : :
539 : : } // anon namespace
540 : :
541 : : gimple_opt_pass *
542 : 282953 : make_pass_split_paths (gcc::context *ctxt)
543 : : {
544 : 282953 : return new pass_split_paths (ctxt);
545 : : }
|