Branch data Line data Source code
1 : : /* Loop splitting.
2 : : Copyright (C) 2015-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it
7 : : under the terms of the GNU General Public License as published by the
8 : : Free Software Foundation; either version 3, or (at your option) any
9 : : later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "tree.h"
25 : : #include "gimple.h"
26 : : #include "tree-pass.h"
27 : : #include "ssa.h"
28 : : #include "fold-const.h"
29 : : #include "tree-cfg.h"
30 : : #include "tree-ssa.h"
31 : : #include "tree-ssa-loop-niter.h"
32 : : #include "tree-ssa-loop.h"
33 : : #include "tree-ssa-loop-manip.h"
34 : : #include "tree-into-ssa.h"
35 : : #include "tree-inline.h"
36 : : #include "tree-cfgcleanup.h"
37 : : #include "cfgloop.h"
38 : : #include "tree-scalar-evolution.h"
39 : : #include "gimple-iterator.h"
40 : : #include "gimple-pretty-print.h"
41 : : #include "cfghooks.h"
42 : : #include "gimple-fold.h"
43 : : #include "gimplify-me.h"
44 : : #include "print-tree.h"
45 : : #include "value-query.h"
46 : : #include "sreal.h"
47 : :
48 : : /* This file implements two kinds of loop splitting.
49 : :
50 : : One transformation of loops like:
51 : :
52 : : for (i = 0; i < 100; i++)
53 : : {
54 : : if (i < 50)
55 : : A;
56 : : else
57 : : B;
58 : : }
59 : :
60 : : into:
61 : :
62 : : for (i = 0; i < 50; i++)
63 : : {
64 : : A;
65 : : }
66 : : for (; i < 100; i++)
67 : : {
68 : : B;
69 : : }
70 : :
71 : : */
72 : :
73 : : /* Return true when BB inside LOOP is a potential iteration space
74 : : split point, i.e. ends with a condition like "IV < comp", which
75 : : is true on one side of the iteration space and false on the other,
76 : : and the split point can be computed. If so, also return the border
77 : : point in *BORDER and the comparison induction variable in IV. */
78 : :
79 : : static tree
80 : 164665 : split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
81 : : enum tree_code *guard_code)
82 : : {
83 : 164665 : gcond *stmt;
84 : 164665 : affine_iv iv2;
85 : :
86 : : /* BB must end in a simple conditional jump. */
87 : 419544 : stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
88 : 73194 : if (!stmt)
89 : : return NULL_TREE;
90 : :
91 : 73194 : enum tree_code code = gimple_cond_code (stmt);
92 : :
93 : 73194 : if (loop_exits_from_bb_p (loop, bb))
94 : : return NULL_TREE;
95 : :
96 : 32296 : tree op0 = gimple_cond_lhs (stmt);
97 : 32296 : tree op1 = gimple_cond_rhs (stmt);
98 : 32296 : class loop *useloop = loop_containing_stmt (stmt);
99 : :
100 : 32296 : if (!simple_iv (loop, useloop, op0, iv, false))
101 : : return NULL_TREE;
102 : 5179 : if (!simple_iv (loop, useloop, op1, &iv2, false))
103 : : return NULL_TREE;
104 : :
105 : : /* Make it so that the first argument of the condition is
106 : : the looping one. */
107 : 2317 : if (!integer_zerop (iv2.step))
108 : : {
109 : 732 : std::swap (op0, op1);
110 : 732 : std::swap (*iv, iv2);
111 : 732 : code = swap_tree_comparison (code);
112 : 732 : gimple_cond_set_condition (stmt, code, op0, op1);
113 : 732 : update_stmt (stmt);
114 : : }
115 : 1585 : else if (integer_zerop (iv->step))
116 : : return NULL_TREE;
117 : 1300 : if (!integer_zerop (iv2.step))
118 : : return NULL_TREE;
119 : 1118 : if (!iv->no_overflow)
120 : : return NULL_TREE;
121 : :
122 : : /* Only handle relational comparisons, for equality and non-equality
123 : : we'd have to split the loop into two loops and a middle statement. */
124 : 1066 : switch (code)
125 : : {
126 : : case LT_EXPR:
127 : : case LE_EXPR:
128 : : case GT_EXPR:
129 : : case GE_EXPR:
130 : : break;
131 : 359 : case NE_EXPR:
132 : 359 : case EQ_EXPR:
133 : : /* If the test check for first iteration, we can handle NE/EQ
134 : : with only one split loop. */
135 : 359 : if (operand_equal_p (iv->base, iv2.base, 0))
136 : : {
137 : 129 : if (code == EQ_EXPR)
138 : 102 : code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
139 : : else
140 : 27 : code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
141 : : break;
142 : : }
143 : : /* Similarly when the test checks for minimal or maximal
144 : : value range. */
145 : : else
146 : : {
147 : 230 : value_range r (TREE_TYPE (op0));
148 : 230 : get_global_range_query ()->range_of_expr (r, op0, stmt);
149 : 230 : if (!r.varying_p () && !r.undefined_p ()
150 : 452 : && TREE_CODE (op1) == INTEGER_CST)
151 : : {
152 : 109 : wide_int val = wi::to_wide (op1);
153 : 109 : if (known_eq (val, wi::to_wide (r.lbound ())))
154 : : {
155 : 0 : code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
156 : : break;
157 : : }
158 : 109 : else if (known_eq (val, wi::to_wide (r.ubound ())))
159 : : {
160 : 36 : code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
161 : : break;
162 : : }
163 : 73 : }
164 : 230 : }
165 : : /* TODO: We can compare with exit condition; it seems that testing for
166 : : last iteration is common case. */
167 : 194 : return NULL_TREE;
168 : : default:
169 : : return NULL_TREE;
170 : : }
171 : :
172 : 872 : if (dump_file && (dump_flags & TDF_DETAILS))
173 : : {
174 : 25 : fprintf (dump_file, "Found potential split point: ");
175 : 25 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
176 : 25 : fprintf (dump_file, " { ");
177 : 25 : print_generic_expr (dump_file, iv->base, TDF_SLIM);
178 : 25 : fprintf (dump_file, " + I*");
179 : 25 : print_generic_expr (dump_file, iv->step, TDF_SLIM);
180 : 25 : fprintf (dump_file, " } %s ", get_tree_code_name (code));
181 : 25 : print_generic_expr (dump_file, iv2.base, TDF_SLIM);
182 : 25 : fprintf (dump_file, "\n");
183 : : }
184 : :
185 : 872 : *border = iv2.base;
186 : 872 : *guard_code = code;
187 : 872 : return op0;
188 : : }
189 : :
190 : : /* Given a GUARD conditional stmt inside LOOP, which we want to make always
191 : : true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
192 : : (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
193 : : exit test statement to loop back only if the GUARD statement will
194 : : also be true/false in the next iteration. */
195 : :
196 : : static void
197 : 383 : patch_loop_exit (class loop *loop, tree_code guard_code, tree nextval,
198 : : tree newbound, bool initial_true)
199 : : {
200 : 383 : edge exit = single_exit (loop);
201 : 766 : gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
202 : 383 : gimple_cond_set_condition (stmt, guard_code, nextval, newbound);
203 : 383 : update_stmt (stmt);
204 : :
205 : 383 : edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
206 : :
207 : 383 : exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
208 : 383 : stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
209 : :
210 : 383 : if (initial_true)
211 : : {
212 : 279 : exit->flags |= EDGE_FALSE_VALUE;
213 : 279 : stay->flags |= EDGE_TRUE_VALUE;
214 : : }
215 : : else
216 : : {
217 : 104 : exit->flags |= EDGE_TRUE_VALUE;
218 : 104 : stay->flags |= EDGE_FALSE_VALUE;
219 : : }
220 : 383 : }
221 : :
222 : : /* Give an induction variable GUARD_IV, and its affine descriptor IV,
223 : : find the loop phi node in LOOP defining it directly, or create
224 : : such phi node. Return that phi node. */
225 : :
226 : : static gphi *
227 : 864 : find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
228 : : {
229 : 864 : gimple *def = SSA_NAME_DEF_STMT (guard_iv);
230 : 864 : gphi *phi;
231 : 864 : if ((phi = dyn_cast <gphi *> (def))
232 : 383 : && gimple_bb (phi) == loop->header)
233 : 383 : return phi;
234 : :
235 : : /* XXX Create the PHI instead. */
236 : : return NULL;
237 : : }
238 : :
239 : : /* Returns true if the exit values of all loop phi nodes can be
240 : : determined easily (i.e. that connect_loop_phis can determine them). */
241 : :
242 : : static bool
243 : 48298 : easy_exit_values (class loop *loop)
244 : : {
245 : 48298 : edge exit = single_exit (loop);
246 : 48298 : edge latch = loop_latch_edge (loop);
247 : 48298 : gphi_iterator psi;
248 : :
249 : : /* Currently we regard the exit values as easy if they are the same
250 : : as the value over the backedge. Which is the case if the definition
251 : : of the backedge value dominates the exit edge. */
252 : 161377 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
253 : : {
254 : 113079 : gphi *phi = psi.phi ();
255 : 113079 : tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
256 : 113079 : basic_block bb;
257 : 113079 : if (TREE_CODE (next) == SSA_NAME
258 : 111938 : && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
259 : 225014 : && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
260 : : return false;
261 : : }
262 : :
263 : : return true;
264 : : }
265 : :
266 : : /* This function updates the SSA form after connect_loops made a new
267 : : edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
268 : : conditional). I.e. the second loop can now be entered either
269 : : via the original entry or via NEW_E, so the entry values of LOOP2
270 : : phi nodes are either the original ones or those at the exit
271 : : of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting
272 : : this. The loops need to fulfill easy_exit_values(). */
273 : :
274 : : static void
275 : 1170 : connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
276 : : {
277 : 1170 : basic_block rest = loop_preheader_edge (loop2)->src;
278 : 1170 : gcc_assert (new_e->dest == rest);
279 : 1170 : edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
280 : :
281 : 1170 : edge firste = loop_preheader_edge (loop1);
282 : 1170 : edge seconde = loop_preheader_edge (loop2);
283 : 1170 : edge firstn = loop_latch_edge (loop1);
284 : 1170 : gphi_iterator psi_first, psi_second;
285 : 1170 : for (psi_first = gsi_start_phis (loop1->header),
286 : 1170 : psi_second = gsi_start_phis (loop2->header);
287 : 4840 : !gsi_end_p (psi_first);
288 : 3670 : gsi_next (&psi_first), gsi_next (&psi_second))
289 : : {
290 : 3670 : tree init, next, new_init;
291 : 3670 : use_operand_p op;
292 : 3670 : gphi *phi_first = psi_first.phi ();
293 : 3670 : gphi *phi_second = psi_second.phi ();
294 : :
295 : 3670 : init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
296 : 3670 : next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
297 : 3670 : op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
298 : 3670 : gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
299 : :
300 : : /* Prefer using original variable as a base for the new ssa name.
301 : : This is necessary for virtual ops, and useful in order to avoid
302 : : losing debug info for real ops. */
303 : 3670 : if (TREE_CODE (next) == SSA_NAME
304 : 7267 : && useless_type_conversion_p (TREE_TYPE (next),
305 : 3597 : TREE_TYPE (init)))
306 : 3597 : new_init = copy_ssa_name (next);
307 : 73 : else if (TREE_CODE (init) == SSA_NAME
308 : 83 : && useless_type_conversion_p (TREE_TYPE (init),
309 : 10 : TREE_TYPE (next)))
310 : 10 : new_init = copy_ssa_name (init);
311 : 63 : else if (useless_type_conversion_p (TREE_TYPE (next),
312 : 63 : TREE_TYPE (init)))
313 : 63 : new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
314 : : "unrinittmp");
315 : : else
316 : 0 : new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
317 : : "unrinittmp");
318 : :
319 : 3670 : gphi * newphi = create_phi_node (new_init, rest);
320 : 3670 : add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
321 : 3670 : add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
322 : 3670 : SET_USE (op, new_init);
323 : : }
324 : 1170 : }
325 : :
326 : : /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
327 : : they are still equivalent and placed in two arms of a diamond, like so:
328 : :
329 : : .------if (cond)------.
330 : : v v
331 : : pre1 pre2
332 : : | |
333 : : .--->h1 h2<----.
334 : : | | | |
335 : : | ex1---. .---ex2 |
336 : : | / | | \ |
337 : : '---l1 X | l2---'
338 : : | |
339 : : | |
340 : : '--->join<---'
341 : :
342 : : This function transforms the program such that LOOP1 is conditionally
343 : : falling through to LOOP2, or skipping it. This is done by splitting
344 : : the ex1->join edge at X in the diagram above, and inserting a condition
345 : : whose one arm goes to pre2, resulting in this situation:
346 : :
347 : : .------if (cond)------.
348 : : v v
349 : : pre1 .---------->pre2
350 : : | | |
351 : : .--->h1 | h2<----.
352 : : | | | | |
353 : : | ex1---. | .---ex2 |
354 : : | / v | | \ |
355 : : '---l1 skip---' | l2---'
356 : : | |
357 : : | |
358 : : '--->join<---'
359 : :
360 : :
361 : : The condition used is the exit condition of LOOP1, which effectively means
362 : : that when the first loop exits (for whatever reason) but the real original
363 : : exit expression is still false the second loop will be entered.
364 : : The function returns the new edge cond->pre2.
365 : :
366 : : This doesn't update the SSA form, see connect_loop_phis for that. */
367 : :
368 : : static edge
369 : 383 : connect_loops (class loop *loop1, class loop *loop2)
370 : : {
371 : 383 : edge exit = single_exit (loop1);
372 : 383 : basic_block skip_bb = split_edge (exit);
373 : 383 : gcond *skip_stmt;
374 : 383 : gimple_stmt_iterator gsi;
375 : 383 : edge new_e, skip_e;
376 : :
377 : 766 : gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
378 : 383 : skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
379 : : gimple_cond_lhs (stmt),
380 : : gimple_cond_rhs (stmt),
381 : : NULL_TREE, NULL_TREE);
382 : 383 : gsi = gsi_last_bb (skip_bb);
383 : 383 : gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
384 : :
385 : 383 : skip_e = EDGE_SUCC (skip_bb, 0);
386 : 383 : skip_e->flags &= ~EDGE_FALLTHRU;
387 : 383 : new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
388 : 383 : if (exit->flags & EDGE_TRUE_VALUE)
389 : : {
390 : 95 : skip_e->flags |= EDGE_TRUE_VALUE;
391 : 95 : new_e->flags |= EDGE_FALSE_VALUE;
392 : : }
393 : : else
394 : : {
395 : 288 : skip_e->flags |= EDGE_FALSE_VALUE;
396 : 288 : new_e->flags |= EDGE_TRUE_VALUE;
397 : : }
398 : :
399 : 383 : new_e->probability = profile_probability::very_likely ();
400 : 383 : skip_e->probability = new_e->probability.invert ();
401 : :
402 : 383 : return new_e;
403 : : }
404 : :
405 : : /* This returns the new bound for iterations given the original iteration
406 : : space in NITER, an arbitrary new bound BORDER, assumed to be some
407 : : comparison value with a different IV, the initial value GUARD_INIT of
408 : : that other IV, and the comparison code GUARD_CODE that compares
409 : : that other IV with BORDER. We return an SSA name, and place any
410 : : necessary statements for that computation into *STMTS.
411 : :
412 : : For example for such a loop:
413 : :
414 : : for (i = beg, j = guard_init; i < end; i++, j++)
415 : : if (j < border) // this is supposed to be true/false
416 : : ...
417 : :
418 : : we want to return a new bound (on j) that makes the loop iterate
419 : : as long as the condition j < border stays true. We also don't want
420 : : to iterate more often than the original loop, so we have to introduce
421 : : some cut-off as well (via min/max), effectively resulting in:
422 : :
423 : : newend = min (end+guard_init-beg, border)
424 : : for (i = beg; j = guard_init; j < newend; i++, j++)
425 : : if (j < c)
426 : : ...
427 : :
428 : : Depending on the direction of the IVs and if the exit tests
429 : : are strict or non-strict we need to use MIN or MAX,
430 : : and add or subtract 1. This routine computes newend above. */
431 : :
432 : : static tree
433 : 383 : compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
434 : : tree border,
435 : : enum tree_code guard_code, tree guard_init)
436 : : {
437 : : /* The niter structure contains the after-increment IV, we need
438 : : the loop-enter base, so subtract STEP once. */
439 : 383 : tree controlbase = force_gimple_operand (niter->control.base,
440 : : stmts, true, NULL_TREE);
441 : 383 : tree controlstep = niter->control.step;
442 : 383 : tree enddiff;
443 : 383 : if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
444 : : {
445 : 1 : controlstep = gimple_build (stmts, NEGATE_EXPR,
446 : 1 : TREE_TYPE (controlstep), controlstep);
447 : 1 : enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
448 : 1 : TREE_TYPE (controlbase),
449 : : controlbase, controlstep);
450 : : }
451 : : else
452 : 382 : enddiff = gimple_build (stmts, MINUS_EXPR,
453 : 382 : TREE_TYPE (controlbase),
454 : : controlbase, controlstep);
455 : :
456 : : /* Compute end-beg. */
457 : 383 : gimple_seq stmts2;
458 : 383 : tree end = force_gimple_operand (niter->bound, &stmts2,
459 : : true, NULL_TREE);
460 : 383 : gimple_seq_add_seq_without_update (stmts, stmts2);
461 : 383 : if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
462 : : {
463 : 1 : tree tem = gimple_convert (stmts, sizetype, enddiff);
464 : 1 : tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
465 : 1 : enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
466 : 1 : TREE_TYPE (enddiff),
467 : : end, tem);
468 : : }
469 : : else
470 : 382 : enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
471 : : end, enddiff);
472 : :
473 : : /* Compute guard_init + (end-beg). */
474 : 383 : tree newbound;
475 : 383 : enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
476 : 383 : if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
477 : : {
478 : 1 : enddiff = gimple_convert (stmts, sizetype, enddiff);
479 : 1 : newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
480 : 1 : TREE_TYPE (guard_init),
481 : : guard_init, enddiff);
482 : : }
483 : : else
484 : 382 : newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
485 : : guard_init, enddiff);
486 : :
487 : : /* Depending on the direction of the IVs the new bound for the first
488 : : loop is the minimum or maximum of old bound and border.
489 : : Also, if the guard condition isn't strictly less or greater,
490 : : we need to adjust the bound. */
491 : 383 : int addbound = 0;
492 : 383 : enum tree_code minmax;
493 : 383 : if (niter->cmp == LT_EXPR)
494 : : {
495 : : /* GT and LE are the same, inverted. */
496 : 361 : if (guard_code == GT_EXPR || guard_code == LE_EXPR)
497 : : addbound = -1;
498 : : minmax = MIN_EXPR;
499 : : }
500 : : else
501 : : {
502 : 22 : gcc_assert (niter->cmp == GT_EXPR);
503 : 22 : if (guard_code == GE_EXPR || guard_code == LT_EXPR)
504 : : addbound = 1;
505 : : minmax = MAX_EXPR;
506 : : }
507 : :
508 : : if (addbound)
509 : : {
510 : 290 : tree type2 = TREE_TYPE (newbound);
511 : 290 : if (POINTER_TYPE_P (type2))
512 : 0 : type2 = sizetype;
513 : 580 : newbound = gimple_build (stmts,
514 : 290 : POINTER_TYPE_P (TREE_TYPE (newbound))
515 : : ? POINTER_PLUS_EXPR : PLUS_EXPR,
516 : 290 : TREE_TYPE (newbound),
517 : : newbound,
518 : 290 : build_int_cst (type2, addbound));
519 : : }
520 : :
521 : 383 : tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
522 : : border, newbound);
523 : 383 : return newend;
524 : : }
525 : :
526 : : /* Fix the two loop's bb count after split based on the split edge probability,
527 : : don't adjust the bbs dominated by true branches of that loop to avoid
528 : : dropping 1s down. */
529 : : static void
530 : 1170 : fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
531 : : edge false_edge)
532 : : {
533 : : /* Proportion first loop's bb counts except those dominated by true
534 : : branch to avoid drop 1s down. */
535 : 1170 : basic_block *bbs1, *bbs2;
536 : 1170 : bbs1 = get_loop_body (loop1);
537 : 1170 : unsigned j;
538 : 13717 : for (j = 0; j < loop1->num_nodes; j++)
539 : 11377 : if (bbs1[j] == loop1->latch
540 : : /* Watch for case where the true conditional is empty. */
541 : 10207 : || !single_pred_p (true_edge->dest)
542 : 21190 : || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
543 : 8079 : bbs1[j]->count
544 : 8079 : = bbs1[j]->count.apply_probability (true_edge->probability);
545 : 1170 : free (bbs1);
546 : :
547 : : /* Proportion second loop's bb counts except those dominated by false
548 : : branch to avoid drop 1s down. */
549 : 1170 : basic_block bbi_copy = get_bb_copy (false_edge->dest);
550 : 1170 : bbs2 = get_loop_body (loop2);
551 : 12143 : for (j = 0; j < loop2->num_nodes; j++)
552 : 9803 : if (bbs2[j] == loop2->latch
553 : : /* Watch for case where the flase conditional is empty. */
554 : 8633 : || !single_pred_p (bbi_copy)
555 : 14113 : || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
556 : 8877 : bbs2[j]->count
557 : 8877 : = bbs2[j]->count.apply_probability (true_edge->probability.invert ());
558 : 1170 : free (bbs2);
559 : 1170 : }
560 : :
561 : : /* Checks if LOOP contains an conditional block whose condition
562 : : depends on which side in the iteration space it is, and if so
563 : : splits the iteration space into two loops. Returns true if the
564 : : loop was split. NITER must contain the iteration descriptor for the
565 : : single exit of LOOP. */
566 : :
567 : : static bool
568 : 76058 : split_loop (class loop *loop1)
569 : : {
570 : 76058 : class tree_niter_desc niter;
571 : 76058 : basic_block *bbs;
572 : 76058 : unsigned i;
573 : 76058 : bool changed = false;
574 : 76058 : tree guard_iv;
575 : 76058 : tree border = NULL_TREE;
576 : 76058 : affine_iv iv;
577 : 76058 : edge exit1;
578 : :
579 : 76058 : if (!(exit1 = single_exit (loop1))
580 : 52728 : || EDGE_COUNT (exit1->src->succs) != 2
581 : : /* ??? We could handle non-empty latches when we split the latch edge
582 : : (not the exit edge), and put the new exit condition in the new block.
583 : : OTOH this executes some code unconditionally that might have been
584 : : skipped by the original exit before. */
585 : 52700 : || !empty_block_p (loop1->latch)
586 : 48298 : || !easy_exit_values (loop1)
587 : 48298 : || !number_of_iterations_exit (loop1, exit1, &niter, false, true)
588 : 117596 : || niter.cmp == ERROR_MARK)
589 : 34576 : return false;
590 : 41482 : if (niter.cmp == NE_EXPR)
591 : : {
592 : 27105 : if (!niter.control.no_overflow)
593 : : return false;
594 : 26806 : if (tree_int_cst_sign_bit (niter.control.step))
595 : 2646 : niter.cmp = GT_EXPR;
596 : : else
597 : 24160 : niter.cmp = LT_EXPR;
598 : : }
599 : :
600 : 41183 : bbs = get_loop_body (loop1);
601 : :
602 : 41183 : if (!can_copy_bbs_p (bbs, loop1->num_nodes))
603 : : {
604 : 4 : free (bbs);
605 : 4 : return false;
606 : : }
607 : :
608 : : /* Find a splitting opportunity. */
609 : : enum tree_code guard_code;
610 : 205461 : for (i = 0; i < loop1->num_nodes; i++)
611 : 164665 : if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code)))
612 : : {
613 : : /* Handling opposite steps is not implemented yet. Neither
614 : : is handling different step sizes. */
615 : 872 : if ((tree_int_cst_sign_bit (iv.step)
616 : 872 : != tree_int_cst_sign_bit (niter.control.step))
617 : 872 : || !tree_int_cst_equal (iv.step, niter.control.step))
618 : 489 : continue;
619 : :
620 : : /* Find a loop PHI node that defines guard_iv directly,
621 : : or create one doing that. */
622 : 864 : gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
623 : 864 : if (!phi)
624 : 481 : continue;
625 : 383 : const tree phi_result = gimple_phi_result (phi);
626 : 766 : gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
627 : 383 : tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
628 : : loop_preheader_edge (loop1));
629 : :
630 : : /* Loop splitting is implemented by versioning the loop, placing
631 : : the new loop after the old loop, make the first loop iterate
632 : : as long as the conditional stays true (or false) and let the
633 : : second (new) loop handle the rest of the iterations.
634 : :
635 : : First we need to determine if the condition will start being true
636 : : or false in the first loop. */
637 : 383 : bool initial_true;
638 : 383 : switch (guard_code)
639 : : {
640 : 277 : case LT_EXPR:
641 : 277 : case LE_EXPR:
642 : 277 : initial_true = !tree_int_cst_sign_bit (iv.step);
643 : 277 : break;
644 : 106 : case GT_EXPR:
645 : 106 : case GE_EXPR:
646 : 106 : initial_true = tree_int_cst_sign_bit (iv.step);
647 : 106 : break;
648 : 0 : default:
649 : 0 : gcc_unreachable ();
650 : : }
651 : :
652 : : /* Build a condition that will skip the first loop when the
653 : : guard condition won't ever be true (or false). */
654 : 383 : gimple_seq stmts2;
655 : 383 : border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
656 : 383 : if (stmts2)
657 : : {
658 : : /* When the split condition is not always evaluated make sure
659 : : to rewrite it to defined overflow. */
660 : 2 : if (!dominated_by_p (CDI_DOMINATORS, exit1->src, bbs[i]))
661 : : {
662 : 2 : gimple_stmt_iterator gsi;
663 : 2 : gsi = gsi_start (stmts2);
664 : 6 : while (!gsi_end_p (gsi))
665 : : {
666 : 4 : if (gimple_needing_rewrite_undefined (gsi_stmt (gsi)))
667 : 2 : rewrite_to_defined_unconditional (&gsi);
668 : 4 : gsi_next (&gsi);
669 : : }
670 : : }
671 : 2 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
672 : : stmts2);
673 : : }
674 : 383 : tree cond = fold_build2 (guard_code, boolean_type_node,
675 : : guard_init, border);
676 : 383 : if (!initial_true)
677 : 104 : cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
678 : :
679 : 383 : edge true_edge, false_edge;
680 : 383 : extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
681 : :
682 : : /* Now version the loop, placing loop2 after loop1 connecting
683 : : them, and fix up SSA form for that. */
684 : 383 : initialize_original_copy_tables ();
685 : 383 : basic_block cond_bb;
686 : :
687 : 383 : profile_probability loop1_prob
688 : 383 : = integer_onep (cond) ? profile_probability::always ()
689 : 101 : : true_edge->probability;
690 : : /* TODO: It is commonly a case that we know that both loops will be
691 : : entered. very_likely below is the probability that second loop will
692 : : be entered given by connect_loops. We should work out the common
693 : : case it is always true. */
694 : 383 : class loop *loop2 = loop_version (loop1, cond, &cond_bb,
695 : : loop1_prob,
696 : : /* Pass always as we will later
697 : : redirect first loop to second
698 : : loop. */
699 : : profile_probability::always (),
700 : : profile_probability::always (),
701 : : profile_probability::very_likely (),
702 : : true);
703 : 383 : gcc_assert (loop2);
704 : :
705 : : /* The phi address may have changed due to being resized in
706 : : loop_version (), so reobtain it. */
707 : 383 : phi = as_a<gphi *> (SSA_NAME_DEF_STMT (phi_result));
708 : 383 : gcc_checking_assert (gimple_bb (phi) == loop1->header);
709 : :
710 : : /* Correct probability of edge cond_bb->preheader_of_loop2. */
711 : 383 : single_pred_edge
712 : 383 : (loop_preheader_edge (loop2)->src)->probability
713 : 383 : = loop1_prob.invert ();
714 : :
715 : 383 : fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
716 : : /* If conditional we split on has reliable profilea nd both
717 : : preconditionals of loop1 and loop2 are constant true, we can
718 : : only redistribute the iteration counts to the split loops.
719 : :
720 : : If the conditionals we insert before loop1 or loop2 are non-trivial
721 : : they increase expected loop count, so account this accordingly.
722 : : If we do not know the probability of split conditional, avoid
723 : : reudcing loop estimates, since we do not really know how they are
724 : : split between of the two new loops. Keep orignal estimate since
725 : : it is likely better then completely dropping it.
726 : :
727 : : TODO: If we know that one of the new loops has constant
728 : : number of iterations, we can do better. We could also update
729 : : upper bounds. */
730 : 383 : if (loop1->any_estimate
731 : 383 : && wi::fits_shwi_p (loop1->nb_iterations_estimate))
732 : : {
733 : 413 : sreal scale = true_edge->probability.reliable_p ()
734 : 210 : ? true_edge->probability.to_sreal () : (sreal)1;
735 : 413 : sreal scale2 = false_edge->probability.reliable_p ()
736 : 210 : ? false_edge->probability.to_sreal () : (sreal)1;
737 : 210 : sreal div1 = loop1_prob.initialized_p ()
738 : 212 : ? loop1_prob.to_sreal () : (sreal)1/(sreal)2;
739 : : /* +1 to get header interations rather than latch iterations and then
740 : : -1 to convert back. */
741 : 210 : if (div1 != 0)
742 : 209 : loop1->nb_iterations_estimate
743 : 417 : = MAX ((((sreal)loop1->nb_iterations_estimate.to_shwi () + 1)
744 : : * scale / div1).to_nearest_int () - 1, 0);
745 : : else
746 : 1 : loop1->any_estimate = false;
747 : 210 : loop2->nb_iterations_estimate
748 : 210 : = MAX ((((sreal)loop2->nb_iterations_estimate.to_shwi () + 1) * scale2
749 : : / profile_probability::very_likely ().to_sreal ())
750 : : .to_nearest_int () - 1, 0);
751 : : }
752 : 383 : update_loop_exit_probability_scale_dom_bbs (loop1);
753 : 383 : update_loop_exit_probability_scale_dom_bbs (loop2);
754 : :
755 : 383 : edge new_e = connect_loops (loop1, loop2);
756 : 383 : connect_loop_phis (loop1, loop2, new_e);
757 : :
758 : : /* The iterations of the second loop is now already
759 : : exactly those that the first loop didn't do, but the
760 : : iteration space of the first loop is still the original one.
761 : : Compute the new bound for the guarding IV and patch the
762 : : loop exit to use it instead of original IV and bound. */
763 : 383 : gimple_seq stmts = NULL;
764 : 383 : tree newend = compute_new_first_bound (&stmts, &niter, border,
765 : : guard_code, guard_init);
766 : 383 : if (stmts)
767 : 183 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
768 : : stmts);
769 : 383 : tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
770 : 383 : patch_loop_exit (loop1, guard_code, guard_next, newend, initial_true);
771 : :
772 : : /* Finally patch out the two copies of the condition to be always
773 : : true/false (or opposite). */
774 : 766 : gcond *force_true = as_a<gcond *> (*gsi_last_bb (bbs[i]));
775 : 766 : gcond *force_false = as_a<gcond *> (*gsi_last_bb (get_bb_copy (bbs[i])));
776 : 383 : if (!initial_true)
777 : 104 : std::swap (force_true, force_false);
778 : 383 : gimple_cond_make_true (force_true);
779 : 383 : gimple_cond_make_false (force_false);
780 : 383 : update_stmt (force_true);
781 : 383 : update_stmt (force_false);
782 : :
783 : 383 : free_original_copy_tables ();
784 : :
785 : 383 : changed = true;
786 : 383 : if (dump_file && (dump_flags & TDF_DETAILS))
787 : 25 : fprintf (dump_file, ";; Loop split.\n");
788 : :
789 : 383 : if (dump_enabled_p ())
790 : 26 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
791 : :
792 : : /* Only deal with the first opportunity. */
793 : 383 : break;
794 : : }
795 : :
796 : 41179 : free (bbs);
797 : 41179 : return changed;
798 : 76058 : }
799 : :
800 : : /* Another transformation of loops like:
801 : :
802 : : for (i = INIT (); CHECK (i); i = NEXT ())
803 : : {
804 : : if (expr (a_1, a_2, ..., a_n)) // expr is pure
805 : : a_j = ...; // change at least one a_j
806 : : else
807 : : S; // not change any a_j
808 : : }
809 : :
810 : : into:
811 : :
812 : : for (i = INIT (); CHECK (i); i = NEXT ())
813 : : {
814 : : if (expr (a_1, a_2, ..., a_n))
815 : : a_j = ...;
816 : : else
817 : : {
818 : : S;
819 : : i = NEXT ();
820 : : break;
821 : : }
822 : : }
823 : :
824 : : for (; CHECK (i); i = NEXT ())
825 : : {
826 : : S;
827 : : }
828 : :
829 : : */
830 : :
831 : : /* Data structure to hold temporary information during loop split upon
832 : : semi-invariant conditional statement. */
833 : : class split_info {
834 : : public:
835 : : /* Array of all basic blocks in a loop, returned by get_loop_body(). */
836 : : basic_block *bbs;
837 : :
838 : : /* All memory store/clobber statements in a loop. */
839 : : auto_vec<gimple *> memory_stores;
840 : :
841 : : /* Whether above memory stores vector has been filled. */
842 : : int need_init;
843 : :
844 : : /* Control dependencies of basic blocks in a loop. */
845 : : auto_vec<hash_set<basic_block> *> control_deps;
846 : :
847 : 75675 : split_info () : bbs (NULL), need_init (true) { }
848 : :
849 : 75675 : ~split_info ()
850 : : {
851 : 75675 : if (bbs)
852 : 75675 : free (bbs);
853 : :
854 : 76256 : for (unsigned i = 0; i < control_deps.length (); i++)
855 : 1162 : delete control_deps[i];
856 : 75675 : }
857 : : };
858 : :
859 : : /* Find all statements with memory-write effect in LOOP, including memory
860 : : store and non-pure function call, and keep those in a vector. This work
861 : : is only done one time, for the vector should be constant during analysis
862 : : stage of semi-invariant condition. */
863 : :
864 : : static void
865 : 9638 : find_vdef_in_loop (struct loop *loop)
866 : : {
867 : 9638 : split_info *info = (split_info *) loop->aux;
868 : 9638 : gphi *vphi = get_virtual_phi (loop->header);
869 : :
870 : : /* Indicate memory store vector has been filled. */
871 : 9638 : info->need_init = false;
872 : :
873 : : /* If loop contains memory operation, there must be a virtual PHI node in
874 : : loop header basic block. */
875 : 9638 : if (vphi == NULL)
876 : 2448 : return;
877 : :
878 : : /* All virtual SSA names inside the loop are connected to be a cyclic
879 : : graph via virtual PHI nodes. The virtual PHI node in loop header just
880 : : links the first and the last virtual SSA names, by using the last as
881 : : PHI operand to define the first. */
882 : 7246 : const edge latch = loop_latch_edge (loop);
883 : 7246 : const tree first = gimple_phi_result (vphi);
884 : 7246 : const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
885 : :
886 : : /* The virtual SSA cyclic graph might consist of only one SSA name, who
887 : : is defined by itself.
888 : :
889 : : .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
890 : :
891 : : This means the loop contains only memory loads, so we can skip it. */
892 : 7246 : if (first == last)
893 : : return;
894 : :
895 : 7190 : auto_vec<gimple *> other_stores;
896 : 7190 : auto_vec<tree> worklist;
897 : 7190 : auto_bitmap visited;
898 : :
899 : 7190 : bitmap_set_bit (visited, SSA_NAME_VERSION (first));
900 : 7190 : bitmap_set_bit (visited, SSA_NAME_VERSION (last));
901 : 7190 : worklist.safe_push (last);
902 : :
903 : 46670 : do
904 : : {
905 : 46670 : tree vuse = worklist.pop ();
906 : 46670 : gimple *stmt = SSA_NAME_DEF_STMT (vuse);
907 : :
908 : : /* We mark the first and last SSA names as visited at the beginning,
909 : : and reversely start the process from the last SSA name towards the
910 : : first, which ensures that this do-while will not touch SSA names
911 : : defined outside the loop. */
912 : 46670 : gcc_assert (gimple_bb (stmt)
913 : : && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
914 : :
915 : 46670 : if (gimple_code (stmt) == GIMPLE_PHI)
916 : : {
917 : 13141 : gphi *phi = as_a <gphi *> (stmt);
918 : :
919 : 39639 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
920 : : {
921 : 26498 : tree arg = gimple_phi_arg_def (stmt, i);
922 : :
923 : 26498 : if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
924 : 16907 : worklist.safe_push (arg);
925 : : }
926 : : }
927 : : else
928 : : {
929 : 33529 : tree prev = gimple_vuse (stmt);
930 : :
931 : : /* Non-pure call statement is conservatively assumed to impact all
932 : : memory locations. So place call statements ahead of other memory
933 : : stores in the vector with an idea of using them as shortcut
934 : : terminators to memory alias analysis. */
935 : 33529 : if (gimple_code (stmt) == GIMPLE_CALL)
936 : 11558 : info->memory_stores.safe_push (stmt);
937 : : else
938 : 21971 : other_stores.safe_push (stmt);
939 : :
940 : 33529 : if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
941 : 22573 : worklist.safe_push (prev);
942 : : }
943 : 93340 : } while (!worklist.is_empty ());
944 : :
945 : 7190 : info->memory_stores.safe_splice (other_stores);
946 : 7190 : }
947 : :
948 : : /* Two basic blocks have equivalent control dependency if one dominates to
949 : : the other, and it is post-dominated by the latter. Given a basic block
950 : : BB in LOOP, find farest equivalent dominating basic block. For BB, there
951 : : is a constraint that BB does not post-dominate loop header of LOOP, this
952 : : means BB is control-dependent on at least one basic block in LOOP. */
953 : :
954 : : static basic_block
955 : 1128 : get_control_equiv_head_block (struct loop *loop, basic_block bb)
956 : : {
957 : 1194 : while (!bb->aux)
958 : : {
959 : 669 : basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
960 : :
961 : 669 : gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
962 : :
963 : 669 : if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
964 : : break;
965 : :
966 : : bb = dom_bb;
967 : : }
968 : 1128 : return bb;
969 : : }
970 : :
971 : : /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
972 : : dependent on. */
973 : :
974 : : static hash_set<basic_block> *
975 : 7072 : find_control_dep_blocks (struct loop *loop, basic_block bb)
976 : : {
977 : : /* BB has same control dependency as loop header, then it is not control-
978 : : dependent on any basic block in LOOP. */
979 : 7072 : if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
980 : : return NULL;
981 : :
982 : 1106 : basic_block equiv_head = get_control_equiv_head_block (loop, bb);
983 : :
984 : 1106 : if (equiv_head->aux)
985 : : {
986 : : /* There is a basic block containing control dependency equivalent
987 : : to BB. No need to recompute that, and also set this information
988 : : to other equivalent basic blocks. */
989 : 525 : for (; bb != equiv_head;
990 : 0 : bb = get_immediate_dominator (CDI_DOMINATORS, bb))
991 : 0 : bb->aux = equiv_head->aux;
992 : 525 : return (hash_set<basic_block> *) equiv_head->aux;
993 : : }
994 : :
995 : : /* A basic block X is control-dependent on another Y iff there exists
996 : : a path from X to Y, in which every basic block other than X and Y
997 : : is post-dominated by Y, but X is not post-dominated by Y.
998 : :
999 : : According to this rule, traverse basic blocks in the loop backwards
1000 : : starting from BB, if a basic block is post-dominated by BB, extend
1001 : : current post-dominating path to this block, otherwise it is another
1002 : : one that BB is control-dependent on. */
1003 : :
1004 : 581 : auto_vec<basic_block> pdom_worklist;
1005 : 581 : hash_set<basic_block> pdom_visited;
1006 : 581 : hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
1007 : :
1008 : 581 : pdom_worklist.safe_push (equiv_head);
1009 : :
1010 : 603 : do
1011 : : {
1012 : 603 : basic_block pdom_bb = pdom_worklist.pop ();
1013 : 603 : edge_iterator ei;
1014 : 603 : edge e;
1015 : :
1016 : 603 : if (pdom_visited.add (pdom_bb))
1017 : 0 : continue;
1018 : :
1019 : 1231 : FOR_EACH_EDGE (e, ei, pdom_bb->preds)
1020 : : {
1021 : 628 : basic_block pred_bb = e->src;
1022 : :
1023 : 628 : if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
1024 : : {
1025 : 606 : dep_bbs->add (pred_bb);
1026 : 628 : continue;
1027 : : }
1028 : :
1029 : 22 : pred_bb = get_control_equiv_head_block (loop, pred_bb);
1030 : :
1031 : 22 : if (pdom_visited.contains (pred_bb))
1032 : 0 : continue;
1033 : :
1034 : 22 : if (!pred_bb->aux)
1035 : : {
1036 : 22 : pdom_worklist.safe_push (pred_bb);
1037 : 22 : continue;
1038 : : }
1039 : :
1040 : : /* If control dependency of basic block is available, fast extend
1041 : : post-dominating path using the information instead of advancing
1042 : : forward step-by-step. */
1043 : 0 : hash_set<basic_block> *pred_dep_bbs
1044 : : = (hash_set<basic_block> *) pred_bb->aux;
1045 : :
1046 : 0 : for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
1047 : 0 : iter != pred_dep_bbs->end (); ++iter)
1048 : : {
1049 : 0 : basic_block pred_dep_bb = *iter;
1050 : :
1051 : : /* Basic blocks can either be in control dependency of BB, or
1052 : : must be post-dominated by BB, if so, extend the path from
1053 : : these basic blocks. */
1054 : 0 : if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
1055 : 0 : dep_bbs->add (pred_dep_bb);
1056 : 0 : else if (!pdom_visited.contains (pred_dep_bb))
1057 : 0 : pdom_worklist.safe_push (pred_dep_bb);
1058 : : }
1059 : : }
1060 : 1206 : } while (!pdom_worklist.is_empty ());
1061 : :
1062 : : /* Record computed control dependencies in loop so that we can reach them
1063 : : when reclaiming resources. */
1064 : 581 : ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
1065 : :
1066 : : /* Associate control dependence with related equivalent basic blocks. */
1067 : 623 : for (equiv_head->aux = dep_bbs; bb != equiv_head;
1068 : 42 : bb = get_immediate_dominator (CDI_DOMINATORS, bb))
1069 : 42 : bb->aux = dep_bbs;
1070 : :
1071 : 581 : return dep_bbs;
1072 : 581 : }
1073 : :
1074 : : /* Forward declaration */
1075 : :
1076 : : static bool
1077 : : stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1078 : : const_basic_block skip_head,
1079 : : hash_map<gimple *, bool> &stmt_stat);
1080 : :
1081 : : /* Given STMT, memory load or pure call statement, check whether it is impacted
1082 : : by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
1083 : : trace is composed of SKIP_HEAD and those basic block dominated by it, always
1084 : : corresponds to one branch of a conditional statement). If SKIP_HEAD is
1085 : : NULL, all basic blocks of LOOP are checked. */
1086 : :
1087 : : static bool
1088 : 23325 : vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
1089 : : const_basic_block skip_head)
1090 : : {
1091 : 23325 : split_info *info = (split_info *) loop->aux;
1092 : 23325 : tree rhs = NULL_TREE;
1093 : 23325 : ao_ref ref;
1094 : 23325 : gimple *store;
1095 : 23325 : unsigned i;
1096 : :
1097 : : /* Collect memory store/clobber statements if haven't done that. */
1098 : 23325 : if (info->need_init)
1099 : 9638 : find_vdef_in_loop (loop);
1100 : :
1101 : 23325 : if (is_gimple_assign (stmt))
1102 : 23092 : rhs = gimple_assign_rhs1 (stmt);
1103 : :
1104 : 23325 : ao_ref_init (&ref, rhs);
1105 : :
1106 : 81133 : FOR_EACH_VEC_ELT (info->memory_stores, i, store)
1107 : : {
1108 : : /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL. */
1109 : 59897 : if (skip_head
1110 : 43890 : && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
1111 : 16007 : continue;
1112 : :
1113 : 27883 : if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1114 : 9407 : return false;
1115 : : }
1116 : :
1117 : : return true;
1118 : : }
1119 : :
1120 : : /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1121 : : certain iteration of LOOP, check whether an SSA name (NAME) remains
1122 : : unchanged in next iteration. We call this characteristic semi-
1123 : : invariantness. SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1124 : : blocks and control flows in the loop will be considered. Semi-invariant
1125 : : state of checked statement is cached in hash map STMT_STAT to avoid
1126 : : redundant computation in possible following re-check. */
1127 : :
1128 : : static inline bool
1129 : 156696 : ssa_semi_invariant_p (struct loop *loop, tree name,
1130 : : const_basic_block skip_head,
1131 : : hash_map<gimple *, bool> &stmt_stat)
1132 : : {
1133 : 156696 : gimple *def = SSA_NAME_DEF_STMT (name);
1134 : 156696 : const_basic_block def_bb = gimple_bb (def);
1135 : :
1136 : : /* An SSA name defined outside loop is definitely semi-invariant. */
1137 : 156696 : if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1138 : 13775 : return true;
1139 : :
1140 : 142921 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1141 : : return false;
1142 : :
1143 : 142913 : return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
1144 : : }
1145 : :
1146 : : /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1147 : : semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1148 : : are excluded from LOOP. */
1149 : :
1150 : : static bool
1151 : 29267 : loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1152 : : const_basic_block skip_head)
1153 : : {
1154 : 29267 : const_edge latch = loop_latch_edge (loop);
1155 : 29267 : tree name = gimple_phi_result (loop_phi);
1156 : 29267 : tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1157 : :
1158 : 29267 : gcc_checking_assert (from);
1159 : :
1160 : : /* Loop iteration PHI node locates in loop header, and it has two source
1161 : : operands, one is an initial value coming from outside the loop, the other
1162 : : is a value through latch of the loop, which is derived in last iteration,
1163 : : we call the latter latch value. From the PHI node to definition of latch
1164 : : value, if excluding branch trace starting from SKIP_HEAD, except copy-
1165 : : assignment or likewise, there is no other kind of value redefinition, SSA
1166 : : name defined by the PHI node is semi-invariant.
1167 : :
1168 : : loop entry
1169 : : | .--- latch ---.
1170 : : | | |
1171 : : v v |
1172 : : x_1 = PHI <x_0, x_3> |
1173 : : | |
1174 : : v |
1175 : : .------- if (cond) -------. |
1176 : : | | |
1177 : : | [ SKIP ] |
1178 : : | | |
1179 : : | x_2 = ... |
1180 : : | | |
1181 : : '---- T ---->.<---- F ----' |
1182 : : | |
1183 : : v |
1184 : : x_3 = PHI <x_1, x_2> |
1185 : : | |
1186 : : '----------------------'
1187 : :
1188 : : Suppose in certain iteration, execution flow in above graph goes through
1189 : : true branch, which means that one source value to define x_3 in false
1190 : : branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1191 : : iterations is defined by x_3, we know that x_1 will never changed if COND
1192 : : always chooses true branch from then on. */
1193 : :
1194 : 32830 : while (from != name)
1195 : : {
1196 : : /* A new value comes from a CONSTANT. */
1197 : 31972 : if (TREE_CODE (from) != SSA_NAME)
1198 : : return false;
1199 : :
1200 : 30293 : gimple *stmt = SSA_NAME_DEF_STMT (from);
1201 : 30293 : const_basic_block bb = gimple_bb (stmt);
1202 : :
1203 : : /* A new value comes from outside the loop. */
1204 : 30293 : if (!bb || !flow_bb_inside_loop_p (loop, bb))
1205 : 34 : return false;
1206 : :
1207 : 30259 : from = NULL_TREE;
1208 : :
1209 : 30259 : if (gimple_code (stmt) == GIMPLE_PHI)
1210 : : {
1211 : 6588 : gphi *phi = as_a <gphi *> (stmt);
1212 : :
1213 : 17678 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1214 : : {
1215 : 14123 : if (skip_head)
1216 : : {
1217 : 13467 : const_edge e = gimple_phi_arg_edge (phi, i);
1218 : :
1219 : : /* Don't consider redefinitions in excluded basic blocks. */
1220 : 13467 : if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1221 : 3944 : continue;
1222 : : }
1223 : :
1224 : 10179 : tree arg = gimple_phi_arg_def (phi, i);
1225 : :
1226 : 10179 : if (!from)
1227 : : from = arg;
1228 : 3591 : else if (!operand_equal_p (from, arg, 0))
1229 : : /* There are more than one source operands that provide
1230 : : different values to the SSA name, it is variant. */
1231 : : return false;
1232 : : }
1233 : : }
1234 : 23671 : else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1235 : : {
1236 : : /* For simple value copy, check its rhs instead. */
1237 : 23545 : if (gimple_assign_ssa_name_copy_p (stmt))
1238 : 8 : from = gimple_assign_rhs1 (stmt);
1239 : : }
1240 : :
1241 : : /* Any other kind of definition is deemed to introduce a new value
1242 : : to the SSA name. */
1243 : 27226 : if (!from)
1244 : : return false;
1245 : : }
1246 : : return true;
1247 : : }
1248 : :
1249 : : /* Check whether conditional predicates that BB is control-dependent on, are
1250 : : semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1251 : : are excluded from LOOP. Semi-invariant state of checked statement is cached
1252 : : in hash map STMT_STAT. */
1253 : :
1254 : : static bool
1255 : 7072 : control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1256 : : const_basic_block skip_head,
1257 : : hash_map<gimple *, bool> &stmt_stat)
1258 : : {
1259 : 7072 : hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1260 : :
1261 : 7072 : if (!dep_bbs)
1262 : : return true;
1263 : :
1264 : 1253 : for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1265 : 1400 : iter != dep_bbs->end (); ++iter)
1266 : : {
1267 : 1113 : gimple *last = *gsi_last_bb (*iter);
1268 : 1113 : if (!last)
1269 : 966 : return false;
1270 : :
1271 : : /* Only check condition predicates. */
1272 : 1113 : if (gimple_code (last) != GIMPLE_COND
1273 : 1113 : && gimple_code (last) != GIMPLE_SWITCH)
1274 : : return false;
1275 : :
1276 : 1113 : if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1277 : : return false;
1278 : : }
1279 : :
1280 : 140 : return true;
1281 : : }
1282 : :
1283 : : /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1284 : : semi-invariant, consequently, all its defined values are semi-invariant.
1285 : : Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1286 : : Semi-invariant state of checked statement is cached in hash map
1287 : : STMT_STAT. */
1288 : :
1289 : : static bool
1290 : 188034 : stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1291 : : const_basic_block skip_head,
1292 : : hash_map<gimple *, bool> &stmt_stat)
1293 : : {
1294 : 188034 : bool existed;
1295 : 188034 : bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1296 : :
1297 : 188034 : if (existed)
1298 : 524 : return invar;
1299 : :
1300 : : /* A statement might depend on itself, which is treated as variant. So set
1301 : : state of statement under check to be variant to ensure that. */
1302 : 187510 : invar = false;
1303 : :
1304 : 187510 : if (gimple_code (stmt) == GIMPLE_PHI)
1305 : : {
1306 : 51102 : gphi *phi = as_a <gphi *> (stmt);
1307 : :
1308 : 51102 : if (gimple_bb (stmt) == loop->header)
1309 : : {
1310 : : /* If the entry value is subject to abnormal coalescing
1311 : : avoid the transform since we're going to duplicate the
1312 : : loop header and thus likely introduce overlapping life-ranges
1313 : : between the PHI def and the entry on the path when the
1314 : : first loop is skipped. */
1315 : 29267 : tree entry_def
1316 : 29267 : = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1317 : 29267 : if (TREE_CODE (entry_def) == SSA_NAME
1318 : 29267 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1319 : : return false;
1320 : 29267 : invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1321 : 29267 : return invar;
1322 : : }
1323 : :
1324 : : /* For a loop PHI node that does not locate in loop header, it is semi-
1325 : : invariant only if two conditions are met. The first is its source
1326 : : values are derived from CONSTANT (including loop-invariant value), or
1327 : : from SSA name defined by semi-invariant loop iteration PHI node. The
1328 : : second is its source incoming edges are control-dependent on semi-
1329 : : invariant conditional predicates. */
1330 : 21892 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1331 : : {
1332 : 21878 : const_edge e = gimple_phi_arg_edge (phi, i);
1333 : 21878 : tree arg = gimple_phi_arg_def (phi, i);
1334 : :
1335 : 21878 : if (TREE_CODE (arg) == SSA_NAME)
1336 : : {
1337 : 21542 : if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1338 : : return false;
1339 : :
1340 : : /* If source value is defined in location from where the source
1341 : : edge comes in, no need to check control dependency again
1342 : : since this has been done in above SSA name check stage. */
1343 : 56 : if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1344 : 4 : continue;
1345 : : }
1346 : :
1347 : 388 : if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1348 : : stmt_stat))
1349 : : return false;
1350 : : }
1351 : : }
1352 : : else
1353 : : {
1354 : 136408 : ssa_op_iter iter;
1355 : 136408 : tree use;
1356 : :
1357 : : /* Volatile memory load or return of normal (non-const/non-pure) call
1358 : : should not be treated as constant in each iteration of loop. */
1359 : 136408 : if (gimple_has_side_effects (stmt))
1360 : 129738 : return false;
1361 : :
1362 : : /* Check if any memory store may kill memory load at this place. */
1363 : 224164 : if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1364 : : return false;
1365 : :
1366 : : /* Although operand of a statement might be SSA name, CONSTANT or
1367 : : VARDECL, here we only need to check SSA name operands. This is
1368 : : because check on VARDECL operands, which involve memory loads,
1369 : : must have been done prior to invocation of this function in
1370 : : vuse_semi_invariant_p. */
1371 : 141824 : FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1372 : 135154 : if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1373 : : return false;
1374 : : }
1375 : :
1376 : 6684 : if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1377 : : stmt_stat))
1378 : : return false;
1379 : :
1380 : : /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1381 : : to new insertion, and thus invar may point to invalid memory. */
1382 : 6053 : stmt_stat.put (stmt, true);
1383 : 6053 : return true;
1384 : : }
1385 : :
1386 : : /* A helper function to check whether STMT is semi-invariant in LOOP. Basic
1387 : : blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. */
1388 : :
1389 : : static bool
1390 : 44008 : stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1391 : : const_basic_block skip_head)
1392 : : {
1393 : 44008 : hash_map<gimple *, bool> stmt_stat;
1394 : 44008 : return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1395 : 44008 : }
1396 : :
1397 : : /* Determine when conditional statement never transfers execution to one of its
1398 : : branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1399 : : and those basic blocks dominated by BRANCH_BB. */
1400 : :
1401 : : static bool
1402 : 47980 : branch_removable_p (basic_block branch_bb)
1403 : : {
1404 : 47980 : edge_iterator ei;
1405 : 47980 : edge e;
1406 : :
1407 : 47980 : if (single_pred_p (branch_bb))
1408 : : return true;
1409 : :
1410 : 8002 : FOR_EACH_EDGE (e, ei, branch_bb->preds)
1411 : : {
1412 : 8002 : if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1413 : 0 : continue;
1414 : :
1415 : 8002 : if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1416 : 2878 : continue;
1417 : :
1418 : : /* The branch can be reached from opposite branch, or from some
1419 : : statement not dominated by the conditional statement. */
1420 : : return false;
1421 : : }
1422 : :
1423 : : return true;
1424 : : }
1425 : :
1426 : : /* Find out which branch of a conditional statement (COND) is invariant in the
1427 : : execution context of LOOP. That is: once the branch is selected in certain
1428 : : iteration of the loop, any operand that contributes to computation of the
1429 : : conditional statement remains unchanged in all following iterations. */
1430 : :
1431 : : static edge
1432 : 128765 : get_cond_invariant_branch (struct loop *loop, gcond *cond)
1433 : : {
1434 : 128765 : basic_block cond_bb = gimple_bb (cond);
1435 : 128765 : basic_block targ_bb[2];
1436 : 128765 : bool invar[2];
1437 : 128765 : unsigned invar_checks = 0;
1438 : :
1439 : 209464 : for (unsigned i = 0; i < 2; i++)
1440 : : {
1441 : 185474 : targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1442 : :
1443 : : /* One branch directs to loop exit, no need to perform loop split upon
1444 : : this conditional statement. Firstly, it is trivial if the exit branch
1445 : : is semi-invariant, for the statement is just to break loop. Secondly,
1446 : : if the opposite branch is semi-invariant, it means that the statement
1447 : : is real loop-invariant, which is covered by loop unswitch. */
1448 : 185474 : if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1449 : : return NULL;
1450 : : }
1451 : :
1452 : 71970 : for (unsigned i = 0; i < 2; i++)
1453 : : {
1454 : 47980 : invar[!i] = false;
1455 : :
1456 : 47980 : if (!branch_removable_p (targ_bb[i]))
1457 : 5124 : continue;
1458 : :
1459 : : /* Given a semi-invariant branch, if its opposite branch dominates
1460 : : loop latch, it and its following trace will only be executed in
1461 : : final iteration of loop, namely it is not part of repeated body
1462 : : of the loop. Similar to the above case that the branch is loop
1463 : : exit, no need to split loop. */
1464 : 42856 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1465 : 0 : continue;
1466 : :
1467 : 42856 : invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1468 : 42856 : invar_checks++;
1469 : : }
1470 : :
1471 : : /* With both branches being invariant (handled by loop unswitch) or
1472 : : variant is not what we want. */
1473 : 23990 : if (invar[0] ^ !invar[1])
1474 : : return NULL;
1475 : :
1476 : : /* Found a real loop-invariant condition, do nothing. */
1477 : 1536 : if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1478 : : return NULL;
1479 : :
1480 : 1318 : return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1481 : : }
1482 : :
1483 : : /* Calculate increased code size measured by estimated insn number if applying
1484 : : loop split upon certain branch (BRANCH_EDGE) of a conditional statement. */
1485 : :
1486 : : static int
1487 : 791 : compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1488 : : {
1489 : 791 : basic_block cond_bb = branch_edge->src;
1490 : 791 : unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1491 : 791 : basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1492 : 791 : basic_block *bbs = ((split_info *) loop->aux)->bbs;
1493 : 791 : int num = 0;
1494 : :
1495 : 7166 : for (unsigned i = 0; i < loop->num_nodes; i++)
1496 : : {
1497 : : /* Do no count basic blocks only in opposite branch. */
1498 : 6375 : if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1499 : 2562 : continue;
1500 : :
1501 : 7626 : num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1502 : : }
1503 : :
1504 : : /* It is unnecessary to evaluate expression of the conditional statement
1505 : : in new loop that contains only invariant branch. This expression should
1506 : : be constant value (either true or false). Exclude code size of insns
1507 : : that contribute to computation of the expression. */
1508 : :
1509 : 791 : auto_vec<gimple *> worklist;
1510 : 791 : hash_set<gimple *> removed;
1511 : 791 : gimple *stmt = last_nondebug_stmt (cond_bb);
1512 : :
1513 : 791 : worklist.safe_push (stmt);
1514 : 791 : removed.add (stmt);
1515 : 791 : num -= estimate_num_insns (stmt, &eni_size_weights);
1516 : :
1517 : 956 : do
1518 : : {
1519 : 956 : ssa_op_iter opnd_iter;
1520 : 956 : use_operand_p opnd_p;
1521 : :
1522 : 956 : stmt = worklist.pop ();
1523 : 2919 : FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1524 : : {
1525 : 1007 : tree opnd = USE_FROM_PTR (opnd_p);
1526 : :
1527 : 1007 : if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1528 : 108 : continue;
1529 : :
1530 : 981 : gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1531 : 981 : use_operand_p use_p;
1532 : 981 : imm_use_iterator use_iter;
1533 : :
1534 : 981 : if (removed.contains (opnd_stmt)
1535 : 981 : || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1536 : 82 : continue;
1537 : :
1538 : 1097 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1539 : : {
1540 : 932 : gimple *use_stmt = USE_STMT (use_p);
1541 : :
1542 : 932 : if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1543 : : {
1544 : 734 : opnd_stmt = NULL;
1545 : 734 : break;
1546 : : }
1547 : : }
1548 : :
1549 : 899 : if (opnd_stmt)
1550 : : {
1551 : 165 : worklist.safe_push (opnd_stmt);
1552 : 165 : removed.add (opnd_stmt);
1553 : 165 : num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1554 : : }
1555 : : }
1556 : 1912 : } while (!worklist.is_empty ());
1557 : :
1558 : 791 : gcc_assert (num >= 0);
1559 : 791 : return num;
1560 : 791 : }
1561 : :
1562 : : /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1563 : : and check whether it is eligible and profitable to perform loop split upon
1564 : : this branch in LOOP. */
1565 : :
1566 : : static edge
1567 : 128765 : get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1568 : : {
1569 : 128765 : edge invar_branch = get_cond_invariant_branch (loop, cond);
1570 : 128765 : if (!invar_branch)
1571 : : return NULL;
1572 : :
1573 : : /* When accurate profile information is available, and execution
1574 : : frequency of the branch is too low, just let it go. */
1575 : 791 : profile_probability prob = invar_branch->probability;
1576 : 791 : if (prob.reliable_p ())
1577 : : {
1578 : 5 : int thres = param_min_loop_cond_split_prob;
1579 : :
1580 : 10 : if (prob < profile_probability::always ().apply_scale (thres, 100))
1581 : 0 : return NULL;
1582 : : }
1583 : :
1584 : : /* Add a threshold for increased code size to disable loop split. */
1585 : 791 : if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1586 : : return NULL;
1587 : :
1588 : : return invar_branch;
1589 : : }
1590 : :
1591 : : /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1592 : : conditional statement, perform loop split transformation illustrated
1593 : : as the following graph.
1594 : :
1595 : : .-------T------ if (true) ------F------.
1596 : : | .---------------. |
1597 : : | | | |
1598 : : v | v v
1599 : : pre-header | pre-header
1600 : : | .------------. | | .------------.
1601 : : | | | | | | |
1602 : : | v | | | v |
1603 : : header | | header |
1604 : : | | | | |
1605 : : .--- if (cond) ---. | | .--- if (true) ---. |
1606 : : | | | | | | |
1607 : : invariant | | | invariant | |
1608 : : | | | | | | |
1609 : : '---T--->.<---F---' | | '---T--->.<---F---' |
1610 : : | | / | |
1611 : : stmts | / stmts |
1612 : : | F T | |
1613 : : / \ | / / \ |
1614 : : .-------* * [ if (cond) ] .-------* * |
1615 : : | | | | | |
1616 : : | latch | | latch |
1617 : : | | | | | |
1618 : : | '------------' | '------------'
1619 : : '------------------------. .-----------'
1620 : : loop1 | | loop2
1621 : : v v
1622 : : exits
1623 : :
1624 : : In the graph, loop1 represents the part derived from original one, and
1625 : : loop2 is duplicated using loop_version (), which corresponds to the part
1626 : : of original one being splitted out. In original latch edge of loop1, we
1627 : : insert a new conditional statement duplicated from the semi-invariant cond,
1628 : : and one of its branch goes back to loop1 header as a latch edge, and the
1629 : : other branch goes to loop2 pre-header as an entry edge. And also in loop2,
1630 : : we abandon the variant branch of the conditional statement by setting a
1631 : : constant bool condition, based on which branch is semi-invariant. */
1632 : :
1633 : : static bool
1634 : 787 : do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1635 : : {
1636 : 787 : basic_block cond_bb = invar_branch->src;
1637 : 787 : bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1638 : 1574 : gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
1639 : :
1640 : 787 : gcc_assert (cond_bb->loop_father == loop1);
1641 : :
1642 : 787 : if (dump_enabled_p ())
1643 : 252 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1644 : : "loop split on semi-invariant condition at %s branch\n",
1645 : : true_invar ? "true" : "false");
1646 : :
1647 : 787 : initialize_original_copy_tables ();
1648 : :
1649 : 787 : struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1650 : : invar_branch->probability.invert (),
1651 : : invar_branch->probability,
1652 : : profile_probability::always (),
1653 : : profile_probability::always (),
1654 : : true);
1655 : 787 : if (!loop2)
1656 : : {
1657 : 0 : free_original_copy_tables ();
1658 : 0 : return false;
1659 : : }
1660 : :
1661 : 787 : basic_block cond_bb_copy = get_bb_copy (cond_bb);
1662 : 1574 : gcond *cond_copy = as_a<gcond *> (*gsi_last_bb (cond_bb_copy));
1663 : :
1664 : : /* Replace the condition in loop2 with a bool constant to let PassManager
1665 : : remove the variant branch after current pass completes. */
1666 : 787 : if (true_invar)
1667 : 244 : gimple_cond_make_true (cond_copy);
1668 : : else
1669 : 543 : gimple_cond_make_false (cond_copy);
1670 : :
1671 : 787 : update_stmt (cond_copy);
1672 : :
1673 : : /* Insert a new conditional statement on latch edge of loop1, its condition
1674 : : is duplicated from the semi-invariant. This statement acts as a switch
1675 : : to transfer execution from loop1 to loop2, when loop1 enters into
1676 : : invariant state. */
1677 : 787 : basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1678 : 787 : basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1679 : 787 : gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1680 : : gimple_cond_lhs (cond),
1681 : : gimple_cond_rhs (cond),
1682 : : NULL_TREE, NULL_TREE);
1683 : :
1684 : 787 : gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1685 : 787 : gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1686 : :
1687 : 787 : edge to_loop1 = single_succ_edge (break_bb);
1688 : 787 : edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1689 : :
1690 : 787 : to_loop1->flags &= ~EDGE_FALLTHRU;
1691 : 787 : to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1692 : 787 : to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1693 : :
1694 : : /* Due to introduction of a control flow edge from loop1 latch to loop2
1695 : : pre-header, we should update PHIs in loop2 to reflect this connection
1696 : : between loop1 and loop2. */
1697 : 787 : connect_loop_phis (loop1, loop2, to_loop2);
1698 : :
1699 : 787 : edge true_edge, false_edge, skip_edge1, skip_edge2;
1700 : 787 : extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1701 : :
1702 : 787 : skip_edge1 = true_invar ? false_edge : true_edge;
1703 : 787 : skip_edge2 = true_invar ? true_edge : false_edge;
1704 : 787 : fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
1705 : :
1706 : : /* Fix first loop's exit probability after scaling. */
1707 : 787 : to_loop1->probability = invar_branch->probability.invert ();
1708 : 787 : to_loop2->probability = invar_branch->probability;
1709 : :
1710 : 787 : free_original_copy_tables ();
1711 : :
1712 : 787 : return true;
1713 : : }
1714 : :
1715 : : /* Traverse all conditional statements in LOOP, to find out a good candidate
1716 : : upon which we can do loop split. */
1717 : :
1718 : : static bool
1719 : 75675 : split_loop_on_cond (struct loop *loop)
1720 : : {
1721 : 75675 : split_info *info = new split_info ();
1722 : 75675 : basic_block *bbs = info->bbs = get_loop_body (loop);
1723 : 75675 : bool do_split = false;
1724 : :
1725 : : /* Allocate an area to keep temporary info, and associate its address
1726 : : with loop aux field. */
1727 : 75675 : loop->aux = info;
1728 : :
1729 : 529754 : for (unsigned i = 0; i < loop->num_nodes; i++)
1730 : 454079 : bbs[i]->aux = NULL;
1731 : :
1732 : 523837 : for (unsigned i = 0; i < loop->num_nodes; i++)
1733 : : {
1734 : 448949 : basic_block bb = bbs[i];
1735 : :
1736 : : /* We only consider conditional statement, which be executed at most once
1737 : : in each iteration of the loop. So skip statements in inner loops. */
1738 : 448949 : if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1739 : 128486 : continue;
1740 : :
1741 : : /* Actually this check is not a must constraint. With it, we can ensure
1742 : : conditional statement will always be executed in each iteration. */
1743 : 320463 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1744 : 105812 : continue;
1745 : :
1746 : 429302 : gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1747 : 214651 : if (!cond)
1748 : 85886 : continue;
1749 : :
1750 : 128765 : edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1751 : :
1752 : 128765 : if (branch_edge)
1753 : : {
1754 : 787 : do_split_loop_on_cond (loop, branch_edge);
1755 : 787 : do_split = true;
1756 : 787 : break;
1757 : : }
1758 : : }
1759 : :
1760 : 75675 : delete info;
1761 : 75675 : loop->aux = NULL;
1762 : :
1763 : 75675 : return do_split;
1764 : : }
1765 : :
1766 : : /* Main entry point. Perform loop splitting on all suitable loops. */
1767 : :
1768 : : static unsigned int
1769 : 26438 : tree_ssa_split_loops (void)
1770 : : {
1771 : 26438 : bool changed = false;
1772 : :
1773 : 26438 : gcc_assert (scev_initialized_p ());
1774 : :
1775 : 26438 : calculate_dominance_info (CDI_POST_DOMINATORS);
1776 : :
1777 : 183663 : for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1778 : 104349 : loop->aux = NULL;
1779 : :
1780 : : /* Go through all loops starting from innermost. */
1781 : 157225 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1782 : : {
1783 : 77911 : if (loop->aux)
1784 : : {
1785 : : /* If any of our inner loops was split, don't split us,
1786 : : and mark our containing loop as having had splits as well.
1787 : : This allows for delaying SSA update. */
1788 : 463 : loop_outer (loop)->aux = loop;
1789 : 463 : continue;
1790 : : }
1791 : :
1792 : 77448 : if (optimize_loop_for_size_p (loop))
1793 : 1390 : continue;
1794 : :
1795 : 76058 : if (split_loop (loop) || split_loop_on_cond (loop))
1796 : : {
1797 : : /* Mark our containing loop as having had some split inner loops. */
1798 : 1170 : loop_outer (loop)->aux = loop;
1799 : 1170 : changed = true;
1800 : : }
1801 : 26438 : }
1802 : :
1803 : 185391 : for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1804 : 106077 : loop->aux = NULL;
1805 : :
1806 : 26438 : clear_aux_for_blocks ();
1807 : :
1808 : 26438 : free_dominance_info (CDI_POST_DOMINATORS);
1809 : :
1810 : 26438 : if (changed)
1811 : : {
1812 : 851 : rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1813 : 851 : return TODO_cleanup_cfg;
1814 : : }
1815 : : return 0;
1816 : : }
1817 : :
1818 : : /* Loop splitting pass. */
1819 : :
1820 : : namespace {
1821 : :
1822 : : const pass_data pass_data_loop_split =
1823 : : {
1824 : : GIMPLE_PASS, /* type */
1825 : : "lsplit", /* name */
1826 : : OPTGROUP_LOOP, /* optinfo_flags */
1827 : : TV_LOOP_SPLIT, /* tv_id */
1828 : : PROP_cfg, /* properties_required */
1829 : : 0, /* properties_provided */
1830 : : 0, /* properties_destroyed */
1831 : : 0, /* todo_flags_start */
1832 : : 0, /* todo_flags_finish */
1833 : : };
1834 : :
1835 : : class pass_loop_split : public gimple_opt_pass
1836 : : {
1837 : : public:
1838 : 286682 : pass_loop_split (gcc::context *ctxt)
1839 : 573364 : : gimple_opt_pass (pass_data_loop_split, ctxt)
1840 : : {}
1841 : :
1842 : : /* opt_pass methods: */
1843 : 237663 : bool gate (function *) final override { return flag_split_loops != 0; }
1844 : : unsigned int execute (function *) final override;
1845 : :
1846 : : }; // class pass_loop_split
1847 : :
1848 : : unsigned int
1849 : 26438 : pass_loop_split::execute (function *fun)
1850 : : {
1851 : 52876 : if (number_of_loops (fun) <= 1)
1852 : : return 0;
1853 : :
1854 : 26438 : return tree_ssa_split_loops ();
1855 : : }
1856 : :
1857 : : } // anon namespace
1858 : :
1859 : : gimple_opt_pass *
1860 : 286682 : make_pass_loop_split (gcc::context *ctxt)
1861 : : {
1862 : 286682 : return new pass_loop_split (ctxt);
1863 : : }
|