Line data Source code
1 : /* Support routines for Splitting Paths to loop backedges
2 : Copyright (C) 2015-2026 Free Software Foundation, Inc.
3 : Contributed by Ajit Kumar Agarwal <ajitkum@xilinx.com>.
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3, or (at your option)
10 : any later version.
11 :
12 : GCC is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : GNU General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #include "system.h"
23 : #include "coretypes.h"
24 : #include "backend.h"
25 : #include "tree.h"
26 : #include "gimple.h"
27 : #include "tree-pass.h"
28 : #include "tree-cfg.h"
29 : #include "cfganal.h"
30 : #include "cfgloop.h"
31 : #include "gimple-iterator.h"
32 : #include "tracer.h"
33 : #include "predict.h"
34 : #include "gimple-ssa.h"
35 : #include "tree-phinodes.h"
36 : #include "ssa-iterators.h"
37 : #include "fold-const.h"
38 : #include "cfghooks.h"
39 :
40 : /* Given LATCH, the latch block in a loop, see if the shape of the
41 : path reaching LATCH is suitable for being split by duplication.
42 : If so, return the block that will be duplicated into its predecessor
43 : paths. Else return NULL. */
44 :
45 : static basic_block
46 58652 : find_block_to_duplicate_for_splitting_paths (basic_block latch)
47 : {
48 : /* We should have simple latches at this point. So the latch should
49 : have a single successor. This implies the predecessor of the latch
50 : likely has the loop exit. And it's that predecessor we're most
51 : interested in. To keep things simple, we're going to require that
52 : the latch have a single predecessor too. */
53 117304 : if (single_succ_p (latch) && single_pred_p (latch))
54 : {
55 57124 : basic_block bb = get_immediate_dominator (CDI_DOMINATORS, latch);
56 57124 : gcc_assert (single_pred_edge (latch)->src == bb);
57 :
58 : /* If BB has been marked as not to be duplicated, then honor that
59 : request. */
60 57124 : if (ignore_bb_p (bb))
61 : return NULL;
62 :
63 56864 : gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb));
64 : /* The immediate dominator of the latch must end in a conditional. */
65 56864 : if (!last || gimple_code (last) != GIMPLE_COND)
66 : return NULL;
67 :
68 : /* We're hoping that BB is a join point for an IF-THEN-ELSE diamond
69 : region. Verify that it is.
70 :
71 : First, verify that BB has two predecessors (each arm of the
72 : IF-THEN-ELSE) and two successors (the latch and exit) and that
73 : all edges are normal. */
74 55362 : if (EDGE_COUNT (bb->preds) == 2
75 37719 : && !(EDGE_PRED (bb, 0)->flags & EDGE_COMPLEX)
76 37719 : && !(EDGE_PRED (bb, 1)->flags & EDGE_COMPLEX)
77 37718 : && EDGE_COUNT (bb->succs) == 2
78 37718 : && !(EDGE_SUCC (bb, 0)->flags & EDGE_COMPLEX)
79 93080 : && !(EDGE_SUCC (bb, 1)->flags & EDGE_COMPLEX))
80 : {
81 : /* Now verify that BB's immediate dominator ends in a
82 : conditional as well. */
83 37718 : basic_block bb_idom = get_immediate_dominator (CDI_DOMINATORS, bb);
84 37718 : gimple *last = gsi_stmt (gsi_last_nondebug_bb (bb_idom));
85 37718 : if (!last || gimple_code (last) != GIMPLE_COND)
86 : return NULL;
87 :
88 : /* And that BB's immediate dominator's successors are the
89 : predecessors of BB or BB itself. */
90 14701 : if (!(EDGE_PRED (bb, 0)->src == bb_idom
91 5088 : || find_edge (bb_idom, EDGE_PRED (bb, 0)->src))
92 18069 : || !(EDGE_PRED (bb, 1)->src == bb_idom
93 6137 : || find_edge (bb_idom, EDGE_PRED (bb, 1)->src)))
94 3849 : return NULL;
95 :
96 : /* And that the predecessors of BB each have a single successor
97 : or are BB's immediate domiator itself. */
98 5764 : if (!(EDGE_PRED (bb, 0)->src == bb_idom
99 2103 : || single_succ_p (EDGE_PRED (bb, 0)->src))
100 7166 : || !(EDGE_PRED (bb, 1)->src == bb_idom
101 58101 : || single_succ_p (EDGE_PRED (bb, 1)->src)))
102 : return NULL;
103 :
104 : /* So at this point we have a simple diamond for an IF-THEN-ELSE
105 : construct starting at BB_IDOM, with a join point at BB. BB
106 : pass control outside the loop or to the loop latch.
107 :
108 : We're going to want to create two duplicates of BB, one for
109 : each successor of BB_IDOM. */
110 : return bb;
111 : }
112 : }
113 : return NULL;
114 : }
115 :
116 : /* Return the number of non-debug statements in a block. */
117 : static unsigned int
118 10852 : count_stmts_in_block (basic_block bb)
119 : {
120 10852 : gimple_stmt_iterator gsi;
121 10852 : unsigned int num_stmts = 0;
122 :
123 78967 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
124 : {
125 57263 : gimple *stmt = gsi_stmt (gsi);
126 57263 : if (!is_gimple_debug (stmt))
127 46856 : num_stmts++;
128 : }
129 10852 : return num_stmts;
130 : }
131 :
132 : /* Return TRUE if CODE represents a tree code that is not likely to
133 : be easily if-convertable because it likely expands into multiple
134 : insns, FALSE otherwise. */
135 : static bool
136 997 : poor_ifcvt_candidate_code (enum tree_code code)
137 : {
138 997 : return (code == MIN_EXPR
139 : || code == MAX_EXPR
140 997 : || code == ABS_EXPR
141 997 : || code == COND_EXPR);
142 : }
143 :
144 : /* Return TRUE if PRED of BB is an poor ifcvt candidate. */
145 : static bool
146 3056 : poor_ifcvt_pred (basic_block pred, basic_block bb)
147 : {
148 : /* If the edge count of the pred is not 1, then
149 : this is the predecessor from the if rather
150 : than middle one. */
151 4242 : if (EDGE_COUNT (pred->succs) != 1)
152 : return false;
153 :
154 : /* Empty middle bb are never a poor ifcvt candidate. */
155 1671 : if (empty_block_p (pred))
156 : return false;
157 : /* If BB's predecessors are single statement blocks where
158 : the output of that statement feed the same PHI in BB,
159 : it an ifcvt candidate. */
160 1350 : gimple *stmt = last_and_only_stmt (pred);
161 1350 : if (!stmt || gimple_code (stmt) != GIMPLE_ASSIGN)
162 : return true;
163 :
164 : /* If the statement could trap, then this is a poor ifcvt candidate. */
165 1066 : if (gimple_could_trap_p (stmt))
166 : return true;
167 :
168 997 : tree_code code = gimple_assign_rhs_code (stmt);
169 997 : if (poor_ifcvt_candidate_code (code))
170 : return true;
171 996 : tree lhs = gimple_assign_lhs (stmt);
172 996 : gimple_stmt_iterator gsi;
173 1155 : for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
174 : {
175 1024 : gimple *phi = gsi_stmt (gsi);
176 1024 : if (gimple_phi_arg_def (phi, 0) == lhs
177 1920 : || gimple_phi_arg_def (phi, 1) == lhs)
178 : return false;
179 : }
180 : return true;
181 : }
182 :
183 : /* Return TRUE if BB is a reasonable block to duplicate by examining
184 : its size, false otherwise. BB will always be a loop latch block.
185 :
186 : Things to consider:
187 :
188 : We do not want to spoil if-conversion if at all possible.
189 :
190 : Most of the benefit seems to be from eliminating the unconditional
191 : jump rather than CSE/DCE opportunities. So favor duplicating
192 : small latches. A latch with just a conditional branch is ideal.
193 :
194 : CSE/DCE opportunties crop up when statements from the predecessors
195 : feed statements in the latch and allow statements in the latch to
196 : simplify. */
197 :
198 : static bool
199 5007 : is_feasible_trace (basic_block bb)
200 : {
201 5007 : basic_block pred1 = EDGE_PRED (bb, 0)->src;
202 5007 : basic_block pred2 = EDGE_PRED (bb, 1)->src;
203 5007 : int num_stmts_in_join = count_stmts_in_block (bb);
204 5007 : int num_stmts_in_pred1
205 5007 : = EDGE_COUNT (pred1->succs) == 1 ? count_stmts_in_block (pred1) : 0;
206 5007 : int num_stmts_in_pred2
207 5007 : = EDGE_COUNT (pred2->succs) == 1 ? count_stmts_in_block (pred2) : 0;
208 :
209 : /* Upper Hard limit on the number statements to copy. */
210 5007 : if (num_stmts_in_join
211 5007 : >= param_max_jump_thread_duplication_stmts)
212 : {
213 47 : if (dump_file && (dump_flags & TDF_DETAILS))
214 0 : fprintf (dump_file,
215 : "Duplicating block %d would duplicate "
216 : "too many statments: %d >= %d\n",
217 : bb->index, num_stmts_in_join,
218 : param_max_jump_thread_duplication_stmts);
219 47 : return false;
220 : }
221 :
222 : /* This is meant to catch cases that are likely opportunities for
223 : if-conversion. */
224 4960 : if (num_stmts_in_pred1 <= 1 && num_stmts_in_pred2 <= 1)
225 : {
226 1697 : int num_phis = 0;
227 : /* The max number of PHIs that should be considered for an ifcvt
228 : candidate. */
229 1697 : const int max_num_phis = 3;
230 3718 : for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (si);
231 2021 : gsi_next (&si))
232 : {
233 2083 : num_phis++;
234 2083 : if (num_phis > max_num_phis)
235 : break;
236 : }
237 1697 : if (num_phis <= max_num_phis
238 1635 : && !poor_ifcvt_pred (pred1, bb)
239 3118 : && !poor_ifcvt_pred (pred2, bb))
240 : {
241 1150 : if (dump_file && (dump_flags & TDF_DETAILS))
242 10 : fprintf (dump_file,
243 : "Block %d appears to be a join point for "
244 : "if-convertable bbs.\n",
245 : bb->index);
246 1150 : return false;
247 : }
248 : }
249 :
250 : /* If the joiner has no PHIs with useful uses there is zero chance
251 : of CSE/DCE/jump-threading possibilities exposed by duplicating it. */
252 3810 : bool found_useful_phi = false;
253 6688 : for (gphi_iterator si = gsi_start_phis (bb); ! gsi_end_p (si);
254 2878 : gsi_next (&si))
255 : {
256 4576 : gphi *phi = si.phi ();
257 4576 : use_operand_p use_p;
258 4576 : imm_use_iterator iter;
259 17197 : FOR_EACH_IMM_USE_FAST (use_p, iter, gimple_phi_result (phi))
260 : {
261 9743 : gimple *stmt = USE_STMT (use_p);
262 9743 : if (is_gimple_debug (stmt))
263 281 : continue;
264 : /* If there's a use in the joiner this might be a CSE/DCE
265 : opportunity, but not if the use is in a conditional
266 : which makes this a likely if-conversion candidate. */
267 9462 : if (gimple_bb (stmt) == bb
268 9462 : && (!is_gimple_assign (stmt)
269 1261 : || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
270 : != tcc_comparison)))
271 : {
272 : found_useful_phi = true;
273 : break;
274 : }
275 : /* If the use is on a loop header PHI and on one path the
276 : value is unchanged this might expose a jump threading
277 : opportunity. */
278 7786 : if (gimple_code (stmt) == GIMPLE_PHI
279 5527 : && gimple_bb (stmt) == bb->loop_father->header
280 : /* But for memory the PHI alone isn't good enough. */
281 13768 : && ! virtual_operand_p (gimple_phi_result (stmt)))
282 : {
283 2012 : bool found_unchanged_path = false;
284 2012 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
285 1573 : if (gimple_phi_arg_def (phi, i) == gimple_phi_result (stmt))
286 : {
287 : found_unchanged_path = true;
288 : break;
289 : }
290 : /* If we found an unchanged path this can only be a threading
291 : opportunity if we have uses of the loop header PHI result
292 : in a stmt dominating the merge block. Otherwise the
293 : splitting may prevent if-conversion. */
294 1044 : if (found_unchanged_path)
295 : {
296 605 : use_operand_p use2_p;
297 605 : imm_use_iterator iter2;
298 2157 : FOR_EACH_IMM_USE_FAST (use2_p, iter2, gimple_phi_result (stmt))
299 : {
300 1158 : gimple *use_stmt = USE_STMT (use2_p);
301 1158 : if (is_gimple_debug (use_stmt))
302 88 : continue;
303 1070 : basic_block use_bb = gimple_bb (use_stmt);
304 1070 : if (use_bb != bb
305 1070 : && dominated_by_p (CDI_DOMINATORS, bb, use_bb))
306 : {
307 816 : if (gcond *cond = dyn_cast <gcond *> (use_stmt))
308 46 : if (gimple_cond_code (cond) == EQ_EXPR
309 46 : || gimple_cond_code (cond) == NE_EXPR)
310 : found_useful_phi = true;
311 : break;
312 : }
313 605 : }
314 : }
315 1044 : if (found_useful_phi)
316 : break;
317 : }
318 4576 : }
319 4576 : if (found_useful_phi)
320 : break;
321 : }
322 : /* There is one exception namely a controlling condition we can propagate
323 : an equivalence from to the joiner. */
324 3810 : bool found_cprop_opportunity = false;
325 3810 : basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
326 7620 : gcond *cond = as_a <gcond *> (*gsi_last_bb (dom));
327 3810 : if (gimple_cond_code (cond) == EQ_EXPR
328 3810 : || gimple_cond_code (cond) == NE_EXPR)
329 9390 : for (unsigned i = 0; i < 2; ++i)
330 : {
331 6369 : tree op = gimple_op (cond, i);
332 6369 : if (TREE_CODE (op) == SSA_NAME)
333 : {
334 3664 : use_operand_p use_p;
335 3664 : imm_use_iterator iter;
336 12893 : FOR_EACH_IMM_USE_FAST (use_p, iter, op)
337 : {
338 5836 : if (is_gimple_debug (USE_STMT (use_p)))
339 221 : continue;
340 5615 : if (gimple_bb (USE_STMT (use_p)) == bb)
341 : {
342 : found_cprop_opportunity = true;
343 : break;
344 : }
345 3664 : }
346 : }
347 6369 : if (found_cprop_opportunity)
348 : break;
349 : }
350 :
351 3810 : if (! found_useful_phi && ! found_cprop_opportunity)
352 : {
353 2087 : if (dump_file && (dump_flags & TDF_DETAILS))
354 4 : fprintf (dump_file,
355 : "Block %d is a join that does not expose CSE/DCE/jump-thread "
356 : "opportunities when duplicated.\n",
357 : bb->index);
358 2087 : return false;
359 : }
360 :
361 : /* We may want something here which looks at dataflow and tries
362 : to guess if duplication of BB is likely to result in simplification
363 : of instructions in BB in either the original or the duplicate. */
364 : return true;
365 : }
366 :
367 : /* If the immediate dominator of the latch of the loop is
368 : block with conditional branch, then the loop latch is
369 : duplicated to its predecessors path preserving the SSA
370 : semantics.
371 :
372 : CFG before transformation.
373 :
374 : 2
375 : |
376 : |
377 : +---->3
378 : | / \
379 : | / \
380 : | 4 5
381 : | \ /
382 : | \ /
383 : | 6
384 : | / \
385 : | / \
386 : | 8 7
387 : | | |
388 : ---+ E
389 :
390 :
391 :
392 : Block 8 is the latch. We're going to make copies of block 6 (9 & 10)
393 : and wire things up so they look like this:
394 :
395 : 2
396 : |
397 : |
398 : +---->3
399 : | / \
400 : | / \
401 : | 4 5
402 : | | |
403 : | | |
404 : | 9 10
405 : | |\ /|
406 : | | \ / |
407 : | | 7 |
408 : | | | |
409 : | | E |
410 : | | |
411 : | \ /
412 : | \ /
413 : +-----8
414 :
415 :
416 : Blocks 9 and 10 will get merged into blocks 4 & 5 respectively which
417 : enables CSE, DCE and other optimizations to occur on a larger block
418 : of code. */
419 :
420 : static bool
421 19231 : split_paths ()
422 : {
423 19231 : bool changed = false;
424 :
425 19231 : loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
426 19231 : initialize_original_copy_tables ();
427 19231 : calculate_dominance_info (CDI_DOMINATORS);
428 :
429 117807 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
430 : {
431 : /* Only split paths if we are optimizing this loop for speed. */
432 60114 : if (!optimize_loop_for_speed_p (loop))
433 1462 : continue;
434 :
435 : /* See if there is a block that we can duplicate to split the
436 : path to the loop latch. */
437 58652 : basic_block bb
438 58652 : = find_block_to_duplicate_for_splitting_paths (loop->latch);
439 :
440 : /* BB is the merge point for an IF-THEN-ELSE we want to transform.
441 :
442 : Essentially we want to create a duplicate of bb and redirect the
443 : first predecessor of BB to the duplicate (leaving the second
444 : predecessor as is. This will split the path leading to the latch
445 : re-using BB to avoid useless copying. */
446 58652 : if (bb && is_feasible_trace (bb))
447 : {
448 1723 : if (dump_file && (dump_flags & TDF_DETAILS))
449 4 : fprintf (dump_file,
450 : "Duplicating join block %d into predecessor paths\n",
451 : bb->index);
452 1723 : basic_block pred0 = EDGE_PRED (bb, 0)->src;
453 1723 : if (EDGE_COUNT (pred0->succs) != 1)
454 1298 : pred0 = EDGE_PRED (bb, 1)->src;
455 1723 : transform_duplicate (pred0, bb);
456 1723 : changed = true;
457 :
458 : /* If BB has an outgoing edge marked as IRREDUCIBLE, then
459 : duplicating BB may result in an irreducible region turning
460 : into a natural loop.
461 :
462 : Long term we might want to hook this into the block
463 : duplication code, but as we've seen with similar changes
464 : for edge removal, that can be somewhat risky. */
465 1723 : if (EDGE_SUCC (bb, 0)->flags & EDGE_IRREDUCIBLE_LOOP
466 1723 : || EDGE_SUCC (bb, 1)->flags & EDGE_IRREDUCIBLE_LOOP)
467 : {
468 6 : if (dump_file && (dump_flags & TDF_DETAILS))
469 0 : fprintf (dump_file,
470 : "Join block %d has EDGE_IRREDUCIBLE_LOOP set. "
471 : "Scheduling loop fixups.\n",
472 : bb->index);
473 6 : loops_state_set (LOOPS_NEED_FIXUP);
474 : }
475 : }
476 19231 : }
477 :
478 19231 : loop_optimizer_finalize ();
479 19231 : free_original_copy_tables ();
480 19231 : return changed;
481 : }
482 :
483 : /* Main entry point for splitting paths. Returns TODO_cleanup_cfg if any
484 : paths where split, otherwise return zero. */
485 :
486 : static unsigned int
487 89445 : execute_split_paths ()
488 : {
489 : /* If we don't have at least 2 real blocks and backedges in the
490 : CFG, then there's no point in trying to perform path splitting. */
491 89445 : if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
492 89445 : || !mark_dfs_back_edges ())
493 70214 : return 0;
494 :
495 19231 : bool changed = split_paths();
496 19231 : if (changed)
497 1339 : free_dominance_info (CDI_DOMINATORS);
498 :
499 1339 : return changed ? TODO_cleanup_cfg : 0;
500 : }
501 :
502 : static bool
503 1041484 : gate_split_paths ()
504 : {
505 1041484 : return flag_split_paths;
506 : }
507 :
508 : namespace {
509 :
510 : const pass_data pass_data_split_paths =
511 : {
512 : GIMPLE_PASS, /* type */
513 : "split-paths", /* name */
514 : OPTGROUP_NONE, /* optinfo_flags */
515 : TV_SPLIT_PATHS, /* tv_id */
516 : PROP_ssa, /* properties_required */
517 : 0, /* properties_provided */
518 : 0, /* properties_destroyed */
519 : 0, /* todo_flags_start */
520 : TODO_update_ssa, /* todo_flags_finish */
521 : };
522 :
523 : class pass_split_paths : public gimple_opt_pass
524 : {
525 : public:
526 285722 : pass_split_paths (gcc::context *ctxt)
527 571444 : : gimple_opt_pass (pass_data_split_paths, ctxt)
528 : {}
529 : /* opt_pass methods: */
530 0 : opt_pass * clone () final override { return new pass_split_paths (m_ctxt); }
531 1041484 : bool gate (function *) final override { return gate_split_paths (); }
532 89445 : unsigned int execute (function *) final override
533 : {
534 89445 : return execute_split_paths ();
535 : }
536 :
537 : }; // class pass_split_paths
538 :
539 : } // anon namespace
540 :
541 : gimple_opt_pass *
542 285722 : make_pass_split_paths (gcc::context *ctxt)
543 : {
544 285722 : return new pass_split_paths (ctxt);
545 : }
|