Branch data Line data Source code
1 : : /* Support routines for Splitting Paths to loop backedges
2 : : Copyright (C) 2015-2025 Free Software Foundation, Inc.
3 : : Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify
8 : : it under the terms of the GNU General Public License as published by
9 : : the Free Software Foundation; either version 3, or (at your option)
10 : : any later version.
11 : :
12 : : GCC is distributed in the hope that it will be useful,
13 : : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : : GNU General Public License for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : #include "config.h"
22 : : #include "system.h"
23 : : #include "coretypes.h"
24 : : #include "backend.h"
25 : : #include "tree.h"
26 : : #include "gimple.h"
27 : : #include "tree-pass.h"
28 : : #include "tree-cfg.h"
29 : : #include "cfganal.h"
30 : : #include "cfgloop.h"
31 : : #include "gimple-iterator.h"
32 : : #include "tracer.h"
33 : : #include "predict.h"
34 : : #include "gimple-ssa.h"
35 : : #include "tree-phinodes.h"
36 : : #include "ssa-iterators.h"
37 : : #include "fold-const.h"
38 : : #include "cfghooks.h"
39 : :
40 : : /* Given LATCH, the latch block in a loop, see if the shape of the
41 : : path reaching LATCH is suitable for being split by duplication.
42 : : If so, return the block that will be duplicated into its predecessor
43 : : paths. Else return NULL. */
44 : :
45 : : static basic_block
46 : 53415 : find_block_to_duplicate_for_splitting_paths (basic_block latch)
47 : : {
48 : : /* We should have simple latches at this point. So the latch should
49 : : have a single successor. This implies the predecessor of the latch
50 : : likely has the loop exit. And it's that predecessor we're most
51 : : interested in. To keep things simple, we're going to require that
52 : : the latch have a single predecessor too. */
53 : 106830 : if (single_succ_p (latch) && single_pred_p (latch))
54 : : {
55 : 52098 : basic_block bb = get_immediate_dominator (CDI_DOMINATORS, latch);
56 : 52098 : gcc_assert (single_pred_edge (latch)->src == bb);
57 : :
58 : : /* If BB has been marked as not to be duplicated, then honor that
59 : : request. */
60 : 52098 : if (ignore_bb_p (bb))
61 : : return NULL;
62 : :
63 : 51962 : gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb));
64 : : /* The immediate dominator of the latch must end in a conditional. */
65 : 51962 : if (!last || gimple_code (last) != GIMPLE_COND)
66 : : return NULL;
67 : :
68 : : /* We're hoping that BB is a join point for an IF-THEN-ELSE diamond
69 : : region. Verify that it is.
70 : :
71 : : First, verify that BB has two predecessors (each arm of the
72 : : IF-THEN-ELSE) and two successors (the latch and exit) and that
73 : : all edges are normal. */
74 : 50566 : if (EDGE_COUNT (bb->preds) == 2
75 : 36077 : && !(EDGE_PRED (bb, 0)->flags & EDGE_COMPLEX)
76 : 36077 : && !(EDGE_PRED (bb, 1)->flags & EDGE_COMPLEX)
77 : 36076 : && EDGE_COUNT (bb->succs) == 2
78 : 36076 : && !(EDGE_SUCC (bb, 0)->flags & EDGE_COMPLEX)
79 : 86642 : && !(EDGE_SUCC (bb, 1)->flags & EDGE_COMPLEX))
80 : : {
81 : : /* Now verify that BB's immediate dominator ends in a
82 : : conditional as well. */
83 : 36076 : basic_block bb_idom = get_immediate_dominator (CDI_DOMINATORS, bb);
84 : 36076 : gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb_idom));
85 : 36076 : if (!last || gimple_code (last) != GIMPLE_COND)
86 : : return NULL;
87 : :
88 : : /* And that BB's immediate dominator's successors are the
89 : : predecessors of BB or BB itself. */
90 : 15106 : if (!(EDGE_PRED (bb, 0)->src == bb_idom
91 : 5467 : || find_edge (bb_idom, EDGE_PRED (bb, 0)->src))
92 : 17493 : || !(EDGE_PRED (bb, 1)->src == bb_idom
93 : 5709 : || find_edge (bb_idom, EDGE_PRED (bb, 1)->src)))
94 : 4595 : return NULL;
95 : :
96 : : /* And that the predecessors of BB each have a single successor
97 : : or are BB's immediate domiator itself. */
98 : 5044 : if (!(EDGE_PRED (bb, 0)->src == bb_idom
99 : 1787 : || single_succ_p (EDGE_PRED (bb, 0)->src))
100 : 6197 : || !(EDGE_PRED (bb, 1)->src == bb_idom
101 : 52899 : || single_succ_p (EDGE_PRED (bb, 1)->src)))
102 : : return NULL;
103 : :
104 : : /* So at this point we have a simple diamond for an IF-THEN-ELSE
105 : : construct starting at BB_IDOM, with a join point at BB. BB
106 : : pass control outside the loop or to the loop latch.
107 : :
108 : : We're going to want to create two duplicates of BB, one for
109 : : each successor of BB_IDOM. */
110 : : return bb;
111 : : }
112 : : }
113 : : return NULL;
114 : : }
115 : :
116 : : /* Return the number of non-debug statements in a block. */
117 : : static unsigned int
118 : 9283 : count_stmts_in_block (basic_block bb)
119 : : {
120 : 9283 : gimple_stmt_iterator gsi;
121 : 9283 : unsigned int num_stmts = 0;
122 : :
123 : 69421 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
124 : : {
125 : 50855 : gimple *stmt = gsi_stmt (gsi);
126 : 50855 : if (!is_gimple_debug (stmt))
127 : 40843 : num_stmts++;
128 : : }
129 : 9283 : return num_stmts;
130 : : }
131 : :
132 : : /* Return TRUE if CODE represents a tree code that is not likely to
133 : : be easily if-convertable because it likely expands into multiple
134 : : insns, FALSE otherwise. */
135 : : static bool
136 : 926 : poor_ifcvt_candidate_code (enum tree_code code)
137 : : {
138 : 926 : return (code == MIN_EXPR
139 : : || code == MAX_EXPR
140 : 926 : || code == ABS_EXPR
141 : 926 : || code == COND_EXPR);
142 : : }
143 : :
144 : : /* Return TRUE if PRED of BB is an poor ifcvt candidate. */
145 : : static bool
146 : 2759 : poor_ifcvt_pred (basic_block pred, basic_block bb)
147 : : {
148 : : /* If the edge count of the pred is not 1, then
149 : : this is the predecessor from the if rather
150 : : than middle one. */
151 : 3885 : if (EDGE_COUNT (pred->succs) != 1)
152 : : return false;
153 : :
154 : : /* Empty middle bb are never a poor ifcvt candidate. */
155 : 1438 : if (empty_block_p (pred))
156 : : return false;
157 : : /* If BB's predecessors are single statement blocks where
158 : : the output of that statement feed the same PHI in BB,
159 : : it an ifcvt candidate. */
160 : 1121 : gimple *stmt = last_and_only_stmt (pred);
161 : 1121 : if (!stmt || gimple_code (stmt) != GIMPLE_ASSIGN)
162 : : return true;
163 : :
164 : : /* If the statement could trap, then this is a poor ifcvt candidate. */
165 : 967 : if (gimple_could_trap_p (stmt))
166 : : return true;
167 : :
168 : 926 : tree_code code = gimple_assign_rhs_code (stmt);
169 : 926 : if (poor_ifcvt_candidate_code (code))
170 : : return true;
171 : 925 : tree lhs = gimple_assign_lhs (stmt);
172 : 925 : gimple_stmt_iterator gsi;
173 : 1069 : for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
174 : : {
175 : 953 : gimple *phi = gsi_stmt (gsi);
176 : 953 : if (gimple_phi_arg_def (phi, 0) == lhs
177 : 1772 : || gimple_phi_arg_def (phi, 1) == lhs)
178 : : return false;
179 : : }
180 : : return true;
181 : : }
182 : :
183 : : /* Return TRUE if BB is a reasonable block to duplicate by examining
184 : : its size, false otherwise. BB will always be a loop latch block.
185 : :
186 : : Things to consider:
187 : :
188 : : We do not want to spoil if-conversion if at all possible.
189 : :
190 : : Most of the benefit seems to be from eliminating the unconditional
191 : : jump rather than CSE/DCE opportunities. So favor duplicating
192 : : small latches. A latch with just a conditional branch is ideal.
193 : :
194 : : CSE/DCE opportunties crop up when statements from the predecessors
195 : : feed statements in the latch and allow statements in the latch to
196 : : simplify. */
197 : :
198 : : static bool
199 : 4335 : is_feasible_trace (basic_block bb)
200 : : {
201 : 4335 : basic_block pred1 = EDGE_PRED (bb, 0)->src;
202 : 4335 : basic_block pred2 = EDGE_PRED (bb, 1)->src;
203 : 4335 : int num_stmts_in_join = count_stmts_in_block (bb);
204 : 4335 : int num_stmts_in_pred1
205 : 4335 : = EDGE_COUNT (pred1->succs) == 1 ? count_stmts_in_block (pred1) : 0;
206 : 4335 : int num_stmts_in_pred2
207 : 4335 : = EDGE_COUNT (pred2->succs) == 1 ? count_stmts_in_block (pred2) : 0;
208 : :
209 : : /* Upper Hard limit on the number statements to copy. */
210 : 4335 : if (num_stmts_in_join
211 : 4335 : >= param_max_jump_thread_duplication_stmts)
212 : : {
213 : 44 : if (dump_file && (dump_flags & TDF_DETAILS))
214 : 0 : fprintf (dump_file,
215 : : "Duplicating block %d would duplicate "
216 : : "too many statments: %d >= %d\n",
217 : : bb->index, num_stmts_in_join,
218 : : param_max_jump_thread_duplication_stmts);
219 : 44 : return false;
220 : : }
221 : :
222 : : /* This is meant to catch cases that are likely opportunities for
223 : : if-conversion. */
224 : 4291 : if (num_stmts_in_pred1 <= 1 && num_stmts_in_pred2 <= 1)
225 : : {
226 : 1453 : int num_phis = 0;
227 : : /* The max number of PHIs that should be considered for an ifcvt
228 : : candidate. */
229 : 1453 : const int max_num_phis = 3;
230 : 3195 : for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (si);
231 : 1742 : gsi_next (&si))
232 : : {
233 : 1783 : num_phis++;
234 : 1783 : if (num_phis > max_num_phis)
235 : : break;
236 : : }
237 : 1453 : if (num_phis <= max_num_phis
238 : 1412 : && !poor_ifcvt_pred (pred1, bb)
239 : 2800 : && !poor_ifcvt_pred (pred2, bb))
240 : : {
241 : 1100 : if (dump_file && (dump_flags & TDF_DETAILS))
242 : 9 : fprintf (dump_file,
243 : : "Block %d appears to be a join point for "
244 : : "if-convertable bbs.\n",
245 : : bb->index);
246 : 1100 : return false;
247 : : }
248 : : }
249 : :
250 : : /* If the joiner has no PHIs with useful uses there is zero chance
251 : : of CSE/DCE/jump-threading possibilities exposed by duplicating it. */
252 : 3191 : bool found_useful_phi = false;
253 : 5445 : for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (si);
254 : 2254 : gsi_next (&si))
255 : : {
256 : 3819 : gphi *phi = si.phi ();
257 : 3819 : use_operand_p use_p;
258 : 3819 : imm_use_iterator iter;
259 : 9979 : FOR_EACH_IMM_USE_FAST (use_p, iter, gimple_phi_result (phi))
260 : : {
261 : 7725 : gimple *stmt = USE_STMT (use_p);
262 : 7725 : if (is_gimple_debug (stmt))
263 : 262 : continue;
264 : : /* If there's a use in the joiner this might be a CSE/DCE
265 : : opportunity, but not if the use is in a conditional
266 : : which makes this a likely if-conversion candidate. */
267 : 7463 : if (gimple_bb (stmt) == bb
268 : 7463 : && (!is_gimple_assign (stmt)
269 : 1119 : || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
270 : : != tcc_comparison)))
271 : : {
272 : : found_useful_phi = true;
273 : : break;
274 : : }
275 : : /* If the use is on a loop header PHI and on one path the
276 : : value is unchanged this might expose a jump threading
277 : : opportunity. */
278 : 5919 : if (gimple_code (stmt) == GIMPLE_PHI
279 : 4317 : && gimple_bb (stmt) == bb->loop_father->header
280 : : /* But for memory the PHI alone isn't good enough. */
281 : 10593 : && ! virtual_operand_p (gimple_phi_result (stmt)))
282 : : {
283 : 1473 : bool found_unchanged_path = false;
284 : 1473 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
285 : 1168 : if (gimple_phi_arg_def (phi, i) == gimple_phi_result (stmt))
286 : : {
287 : : found_unchanged_path = true;
288 : : break;
289 : : }
290 : : /* If we found an unchanged path this can only be a threading
291 : : opportunity if we have uses of the loop header PHI result
292 : : in a stmt dominating the merge block. Otherwise the
293 : : splitting may prevent if-conversion. */
294 : 800 : if (found_unchanged_path)
295 : : {
296 : 495 : use_operand_p use2_p;
297 : 495 : imm_use_iterator iter2;
298 : 1268 : FOR_EACH_IMM_USE_FAST (use2_p, iter2, gimple_phi_result (stmt))
299 : : {
300 : 906 : gimple *use_stmt = USE_STMT (use2_p);
301 : 906 : if (is_gimple_debug (use_stmt))
302 : 88 : continue;
303 : 818 : basic_block use_bb = gimple_bb (use_stmt);
304 : 818 : if (use_bb != bb
305 : 818 : && dominated_by_p (CDI_DOMINATORS, bb, use_bb))
306 : : {
307 : 519 : if (gcond *cond = dyn_cast <gcond *> (use_stmt))
308 : 45 : if (gimple_cond_code (cond) == EQ_EXPR
309 : 45 : || gimple_cond_code (cond) == NE_EXPR)
310 : 21 : found_useful_phi = true;
311 : : break;
312 : : }
313 : : }
314 : : }
315 : 21 : if (found_useful_phi)
316 : : break;
317 : : }
318 : : }
319 : 3819 : if (found_useful_phi)
320 : : break;
321 : : }
322 : : /* There is one exception namely a controlling condition we can propagate
323 : : an equivalence from to the joiner. */
324 : 3191 : bool found_cprop_opportunity = false;
325 : 3191 : basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
326 : 6382 : gcond *cond = as_a <gcond *> (*gsi_last_bb (dom));
327 : 3191 : if (gimple_cond_code (cond) == EQ_EXPR
328 : 3191 : || gimple_cond_code (cond) == NE_EXPR)
329 : 8002 : for (unsigned i = 0; i < 2; ++i)
330 : : {
331 : 5444 : tree op = gimple_op (cond, i);
332 : 5444 : if (TREE_CODE (op) == SSA_NAME)
333 : : {
334 : 3165 : use_operand_p use_p;
335 : 3165 : imm_use_iterator iter;
336 : 7867 : FOR_EACH_IMM_USE_FAST (use_p, iter, op)
337 : : {
338 : 4974 : if (is_gimple_debug (USE_STMT (use_p)))
339 : 217 : continue;
340 : 4757 : if (gimple_bb (USE_STMT (use_p)) == bb)
341 : : {
342 : : found_cprop_opportunity = true;
343 : : break;
344 : : }
345 : : }
346 : : }
347 : 5444 : if (found_cprop_opportunity)
348 : : break;
349 : : }
350 : :
351 : 3191 : if (! found_useful_phi && ! found_cprop_opportunity)
352 : : {
353 : 1599 : if (dump_file && (dump_flags & TDF_DETAILS))
354 : 4 : fprintf (dump_file,
355 : : "Block %d is a join that does not expose CSE/DCE/jump-thread "
356 : : "opportunities when duplicated.\n",
357 : : bb->index);
358 : 1599 : return false;
359 : : }
360 : :
361 : : /* We may want something here which looks at dataflow and tries
362 : : to guess if duplication of BB is likely to result in simplification
363 : : of instructions in BB in either the original or the duplicate. */
364 : : return true;
365 : : }
366 : :
367 : : /* If the immediate dominator of the latch of the loop is
368 : : block with conditional branch, then the loop latch is
369 : : duplicated to its predecessors path preserving the SSA
370 : : semantics.
371 : :
372 : : CFG before transformation.
373 : :
374 : : 2
375 : : |
376 : : |
377 : : +---->3
378 : : | / \
379 : : | / \
380 : : | 4 5
381 : : | \ /
382 : : | \ /
383 : : | 6
384 : : | / \
385 : : | / \
386 : : | 8 7
387 : : | | |
388 : : ---+ E
389 : :
390 : :
391 : :
392 : : Block 8 is the latch. We're going to make copies of block 6 (9 & 10)
393 : : and wire things up so they look like this:
394 : :
395 : : 2
396 : : |
397 : : |
398 : : +---->3
399 : : | / \
400 : : | / \
401 : : | 4 5
402 : : | | |
403 : : | | |
404 : : | 9 10
405 : : | |\ /|
406 : : | | \ / |
407 : : | | 7 |
408 : : | | | |
409 : : | | E |
410 : : | | |
411 : : | \ /
412 : : | \ /
413 : : +-----8
414 : :
415 : :
416 : : Blocks 9 and 10 will get merged into blocks 4 & 5 respectively which
417 : : enables CSE, DCE and other optimizations to occur on a larger block
418 : : of code. */
419 : :
420 : : static bool
421 : 17521 : split_paths ()
422 : : {
423 : 17521 : bool changed = false;
424 : :
425 : 17521 : loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
426 : 17521 : initialize_original_copy_tables ();
427 : 17521 : calculate_dominance_info (CDI_DOMINATORS);
428 : :
429 : 107351 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
430 : : {
431 : : /* Only split paths if we are optimizing this loop for speed. */
432 : 54788 : if (!optimize_loop_for_speed_p (loop))
433 : 1373 : continue;
434 : :
435 : : /* See if there is a block that we can duplicate to split the
436 : : path to the loop latch. */
437 : 53415 : basic_block bb
438 : 53415 : = find_block_to_duplicate_for_splitting_paths (loop->latch);
439 : :
440 : : /* BB is the merge point for an IF-THEN-ELSE we want to transform.
441 : :
442 : : Essentially we want to create a duplicate of bb and redirect the
443 : : first predecessor of BB to the duplicate (leaving the second
444 : : predecessor as is. This will split the path leading to the latch
445 : : re-using BB to avoid useless copying. */
446 : 53415 : if (bb && is_feasible_trace (bb))
447 : : {
448 : 1592 : if (dump_file && (dump_flags & TDF_DETAILS))
449 : 4 : fprintf (dump_file,
450 : : "Duplicating join block %d into predecessor paths\n",
451 : : bb->index);
452 : 1592 : basic_block pred0 = EDGE_PRED (bb, 0)->src;
453 : 1592 : if (EDGE_COUNT (pred0->succs) != 1)
454 : 1233 : pred0 = EDGE_PRED (bb, 1)->src;
455 : 1592 : transform_duplicate (pred0, bb);
456 : 1592 : changed = true;
457 : :
458 : : /* If BB has an outgoing edge marked as IRREDUCIBLE, then
459 : : duplicating BB may result in an irreducible region turning
460 : : into a natural loop.
461 : :
462 : : Long term we might want to hook this into the block
463 : : duplication code, but as we've seen with similar changes
464 : : for edge removal, that can be somewhat risky. */
465 : 1592 : if (EDGE_SUCC (bb, 0)->flags & EDGE_IRREDUCIBLE_LOOP
466 : 1592 : || EDGE_SUCC (bb, 1)->flags & EDGE_IRREDUCIBLE_LOOP)
467 : : {
468 : 6 : if (dump_file && (dump_flags & TDF_DETAILS))
469 : 0 : fprintf (dump_file,
470 : : "Join block %d has EDGE_IRREDUCIBLE_LOOP set. "
471 : : "Scheduling loop fixups.\n",
472 : : bb->index);
473 : 6 : loops_state_set (LOOPS_NEED_FIXUP);
474 : : }
475 : : }
476 : 17521 : }
477 : :
478 : 17521 : loop_optimizer_finalize ();
479 : 17521 : free_original_copy_tables ();
480 : 17521 : return changed;
481 : : }
482 : :
483 : : /* Main entry point for splitting paths. Returns TODO_cleanup_cfg if any
484 : : paths where split, otherwise return zero. */
485 : :
486 : : static unsigned int
487 : 82561 : execute_split_paths ()
488 : : {
489 : : /* If we don't have at least 2 real blocks and backedges in the
490 : : CFG, then there's no point in trying to perform path splitting. */
491 : 82561 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
492 : 82561 : || !mark_dfs_back_edges ())
493 : 65040 : return 0;
494 : :
495 : 17521 : bool changed = split_paths();
496 : 17521 : if (changed)
497 : 1193 : free_dominance_info (CDI_DOMINATORS);
498 : :
499 : 1193 : return changed ? TODO_cleanup_cfg : 0;
500 : : }
501 : :
502 : : static bool
503 : 1000779 : gate_split_paths ()
504 : : {
505 : 1000779 : return flag_split_paths;
506 : : }
507 : :
508 : : namespace {
509 : :
510 : : const pass_data pass_data_split_paths =
511 : : {
512 : : GIMPLE_PASS, /* type */
513 : : "split-paths", /* name */
514 : : OPTGROUP_NONE, /* optinfo_flags */
515 : : TV_SPLIT_PATHS, /* tv_id */
516 : : PROP_ssa, /* properties_required */
517 : : 0, /* properties_provided */
518 : : 0, /* properties_destroyed */
519 : : 0, /* todo_flags_start */
520 : : TODO_update_ssa, /* todo_flags_finish */
521 : : };
522 : :
523 : : class pass_split_paths : public gimple_opt_pass
524 : : {
525 : : public:
526 : 280831 : pass_split_paths (gcc::context *ctxt)
527 : 561662 : : gimple_opt_pass (pass_data_split_paths, ctxt)
528 : : {}
529 : : /* opt_pass methods: */
530 : 0 : opt_pass * clone () final override { return new pass_split_paths (m_ctxt); }
531 : 1000779 : bool gate (function *) final override { return gate_split_paths (); }
532 : 82561 : unsigned int execute (function *) final override
533 : : {
534 : 82561 : return execute_split_paths ();
535 : : }
536 : :
537 : : }; // class pass_split_paths
538 : :
539 : : } // anon namespace
540 : :
541 : : gimple_opt_pass *
542 : 280831 : make_pass_split_paths (gcc::context *ctxt)
543 : : {
544 : 280831 : return new pass_split_paths (ctxt);
545 : : }
|