Branch data Line data Source code
1 : : /* Loop splitting.
2 : : Copyright (C) 2015-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it
7 : : under the terms of the GNU General Public License as published by the
8 : : Free Software Foundation; either version 3, or (at your option) any
9 : : later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "tree.h"
25 : : #include "gimple.h"
26 : : #include "tree-pass.h"
27 : : #include "ssa.h"
28 : : #include "fold-const.h"
29 : : #include "tree-cfg.h"
30 : : #include "tree-ssa.h"
31 : : #include "tree-ssa-loop-niter.h"
32 : : #include "tree-ssa-loop.h"
33 : : #include "tree-ssa-loop-manip.h"
34 : : #include "tree-into-ssa.h"
35 : : #include "tree-inline.h"
36 : : #include "tree-cfgcleanup.h"
37 : : #include "cfgloop.h"
38 : : #include "tree-scalar-evolution.h"
39 : : #include "gimple-iterator.h"
40 : : #include "gimple-pretty-print.h"
41 : : #include "cfghooks.h"
42 : : #include "gimple-fold.h"
43 : : #include "gimplify-me.h"
44 : : #include "print-tree.h"
45 : : #include "value-query.h"
46 : : #include "sreal.h"
47 : :
48 : : /* This file implements two kinds of loop splitting.
49 : :
50 : : One transformation of loops like:
51 : :
52 : : for (i = 0; i < 100; i++)
53 : : {
54 : : if (i < 50)
55 : : A;
56 : : else
57 : : B;
58 : : }
59 : :
60 : : into:
61 : :
62 : : for (i = 0; i < 50; i++)
63 : : {
64 : : A;
65 : : }
66 : : for (; i < 100; i++)
67 : : {
68 : : B;
69 : : }
70 : :
71 : : */
72 : :
73 : : /* Return true when BB inside LOOP is a potential iteration space
74 : : split point, i.e. ends with a condition like "IV < comp", which
75 : : is true on one side of the iteration space and false on the other,
76 : : and the split point can be computed. If so, also return the border
77 : : point in *BORDER and the comparison induction variable in IV. */
78 : :
79 : : static tree
80 : 151821 : split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
81 : : enum tree_code *guard_code)
82 : : {
83 : 151821 : gcond *stmt;
84 : 151821 : affine_iv iv2;
85 : :
86 : : /* BB must end in a simple conditional jump. */
87 : 387094 : stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
88 : 67711 : if (!stmt)
89 : : return NULL_TREE;
90 : :
91 : 67711 : enum tree_code code = gimple_cond_code (stmt);
92 : :
93 : 67711 : if (loop_exits_from_bb_p (loop, bb))
94 : : return NULL_TREE;
95 : :
96 : 30286 : tree op0 = gimple_cond_lhs (stmt);
97 : 30286 : tree op1 = gimple_cond_rhs (stmt);
98 : 30286 : class loop *useloop = loop_containing_stmt (stmt);
99 : :
100 : 30286 : if (!simple_iv (loop, useloop, op0, iv, false))
101 : : return NULL_TREE;
102 : 4846 : if (!simple_iv (loop, useloop, op1, &iv2, false))
103 : : return NULL_TREE;
104 : :
105 : : /* Make it so that the first argument of the condition is
106 : : the looping one. */
107 : 2118 : if (!integer_zerop (iv2.step))
108 : : {
109 : 638 : std::swap (op0, op1);
110 : 638 : std::swap (*iv, iv2);
111 : 638 : code = swap_tree_comparison (code);
112 : 638 : gimple_cond_set_condition (stmt, code, op0, op1);
113 : 638 : update_stmt (stmt);
114 : : }
115 : 1480 : else if (integer_zerop (iv->step))
116 : : return NULL_TREE;
117 : 1187 : if (!integer_zerop (iv2.step))
118 : : return NULL_TREE;
119 : 1097 : if (!iv->no_overflow)
120 : : return NULL_TREE;
121 : :
122 : : /* Only handle relational comparisons, for equality and non-equality
123 : : we'd have to split the loop into two loops and a middle statement. */
124 : 1046 : switch (code)
125 : : {
126 : : case LT_EXPR:
127 : : case LE_EXPR:
128 : : case GT_EXPR:
129 : : case GE_EXPR:
130 : : break;
131 : 351 : case NE_EXPR:
132 : 351 : case EQ_EXPR:
133 : : /* If the test check for first iteration, we can handle NE/EQ
134 : : with only one split loop. */
135 : 351 : if (operand_equal_p (iv->base, iv2.base, 0))
136 : : {
137 : 127 : if (code == EQ_EXPR)
138 : 102 : code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
139 : : else
140 : 25 : code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
141 : : break;
142 : : }
143 : : /* Similarly when the test checks for minimal or maximal
144 : : value range. */
145 : : else
146 : : {
147 : 224 : value_range r (TREE_TYPE (op0));
148 : 224 : get_global_range_query ()->range_of_expr (r, op0, stmt);
149 : 224 : if (!r.varying_p () && !r.undefined_p ()
150 : 440 : && TREE_CODE (op1) == INTEGER_CST)
151 : : {
152 : 103 : wide_int val = wi::to_wide (op1);
153 : 103 : if (known_eq (val, wi::to_wide (r.lbound ())))
154 : : {
155 : 0 : code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
156 : : break;
157 : : }
158 : 103 : else if (known_eq (val, wi::to_wide (r.ubound ())))
159 : : {
160 : 36 : code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
161 : : break;
162 : : }
163 : 67 : }
164 : 224 : }
165 : : /* TODO: We can compare with exit condition; it seems that testing for
166 : : last iteration is common case. */
167 : 188 : return NULL_TREE;
168 : : default:
169 : : return NULL_TREE;
170 : : }
171 : :
172 : 858 : if (dump_file && (dump_flags & TDF_DETAILS))
173 : : {
174 : 25 : fprintf (dump_file, "Found potential split point: ");
175 : 25 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
176 : 25 : fprintf (dump_file, " { ");
177 : 25 : print_generic_expr (dump_file, iv->base, TDF_SLIM);
178 : 25 : fprintf (dump_file, " + I*");
179 : 25 : print_generic_expr (dump_file, iv->step, TDF_SLIM);
180 : 25 : fprintf (dump_file, " } %s ", get_tree_code_name (code));
181 : 25 : print_generic_expr (dump_file, iv2.base, TDF_SLIM);
182 : 25 : fprintf (dump_file, "\n");
183 : : }
184 : :
185 : 858 : *border = iv2.base;
186 : 858 : *guard_code = code;
187 : 858 : return op0;
188 : : }
189 : :
190 : : /* Given a GUARD conditional stmt inside LOOP, which we want to make always
191 : : true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
192 : : (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
193 : : exit test statement to loop back only if the GUARD statement will
194 : : also be true/false in the next iteration. */
195 : :
196 : : static void
197 : 373 : patch_loop_exit (class loop *loop, tree_code guard_code, tree nextval,
198 : : tree newbound, bool initial_true)
199 : : {
200 : 373 : edge exit = single_exit (loop);
201 : 746 : gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
202 : 373 : gimple_cond_set_condition (stmt, guard_code, nextval, newbound);
203 : 373 : update_stmt (stmt);
204 : :
205 : 373 : edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
206 : :
207 : 373 : exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
208 : 373 : stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
209 : :
210 : 373 : if (initial_true)
211 : : {
212 : 271 : exit->flags |= EDGE_FALSE_VALUE;
213 : 271 : stay->flags |= EDGE_TRUE_VALUE;
214 : : }
215 : : else
216 : : {
217 : 102 : exit->flags |= EDGE_TRUE_VALUE;
218 : 102 : stay->flags |= EDGE_FALSE_VALUE;
219 : : }
220 : 373 : }
221 : :
222 : : /* Give an induction variable GUARD_IV, and its affine descriptor IV,
223 : : find the loop phi node in LOOP defining it directly, or create
224 : : such phi node. Return that phi node. */
225 : :
226 : : static gphi *
227 : 850 : find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
228 : : {
229 : 850 : gimple *def = SSA_NAME_DEF_STMT (guard_iv);
230 : 850 : gphi *phi;
231 : 850 : if ((phi = dyn_cast <gphi *> (def))
232 : 373 : && gimple_bb (phi) == loop->header)
233 : 373 : return phi;
234 : :
235 : : /* XXX Create the PHI instead. */
236 : : return NULL;
237 : : }
238 : :
239 : : /* Returns true if the exit values of all loop phi nodes can be
240 : : determined easily (i.e. that connect_loop_phis can determine them). */
241 : :
242 : : static bool
243 : 44943 : easy_exit_values (class loop *loop)
244 : : {
245 : 44943 : edge exit = single_exit (loop);
246 : 44943 : edge latch = loop_latch_edge (loop);
247 : 44943 : gphi_iterator psi;
248 : :
249 : : /* Currently we regard the exit values as easy if they are the same
250 : : as the value over the backedge. Which is the case if the definition
251 : : of the backedge value dominates the exit edge. */
252 : 150651 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
253 : : {
254 : 105708 : gphi *phi = psi.phi ();
255 : 105708 : tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
256 : 105708 : basic_block bb;
257 : 105708 : if (TREE_CODE (next) == SSA_NAME
258 : 104911 : && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
259 : 210616 : && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
260 : : return false;
261 : : }
262 : :
263 : : return true;
264 : : }
265 : :
266 : : /* This function updates the SSA form after connect_loops made a new
267 : : edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
268 : : conditional). I.e. the second loop can now be entered either
269 : : via the original entry or via NEW_E, so the entry values of LOOP2
270 : : phi nodes are either the original ones or those at the exit
271 : : of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting
272 : : this. The loops need to fulfill easy_exit_values(). */
273 : :
274 : : static void
275 : 1118 : connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
276 : : {
277 : 1118 : basic_block rest = loop_preheader_edge (loop2)->src;
278 : 1118 : gcc_assert (new_e->dest == rest);
279 : 1118 : edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
280 : :
281 : 1118 : edge firste = loop_preheader_edge (loop1);
282 : 1118 : edge seconde = loop_preheader_edge (loop2);
283 : 1118 : edge firstn = loop_latch_edge (loop1);
284 : 1118 : gphi_iterator psi_first, psi_second;
285 : 1118 : for (psi_first = gsi_start_phis (loop1->header),
286 : 1118 : psi_second = gsi_start_phis (loop2->header);
287 : 4617 : !gsi_end_p (psi_first);
288 : 3499 : gsi_next (&psi_first), gsi_next (&psi_second))
289 : : {
290 : 3499 : tree init, next, new_init;
291 : 3499 : use_operand_p op;
292 : 3499 : gphi *phi_first = psi_first.phi ();
293 : 3499 : gphi *phi_second = psi_second.phi ();
294 : :
295 : 3499 : init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
296 : 3499 : next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
297 : 3499 : op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
298 : 3499 : gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
299 : :
300 : : /* Prefer using original variable as a base for the new ssa name.
301 : : This is necessary for virtual ops, and useful in order to avoid
302 : : losing debug info for real ops. */
303 : 3499 : if (TREE_CODE (next) == SSA_NAME
304 : 6925 : && useless_type_conversion_p (TREE_TYPE (next),
305 : 3426 : TREE_TYPE (init)))
306 : 3426 : new_init = copy_ssa_name (next);
307 : 73 : else if (TREE_CODE (init) == SSA_NAME
308 : 83 : && useless_type_conversion_p (TREE_TYPE (init),
309 : 10 : TREE_TYPE (next)))
310 : 10 : new_init = copy_ssa_name (init);
311 : 63 : else if (useless_type_conversion_p (TREE_TYPE (next),
312 : 63 : TREE_TYPE (init)))
313 : 63 : new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
314 : : "unrinittmp");
315 : : else
316 : 0 : new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
317 : : "unrinittmp");
318 : :
319 : 3499 : gphi * newphi = create_phi_node (new_init, rest);
320 : 3499 : add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
321 : 3499 : add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
322 : 3499 : SET_USE (op, new_init);
323 : : }
324 : 1118 : }
325 : :
326 : : /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
327 : : they are still equivalent and placed in two arms of a diamond, like so:
328 : :
329 : : .------if (cond)------.
330 : : v v
331 : : pre1 pre2
332 : : | |
333 : : .--->h1 h2<----.
334 : : | | | |
335 : : | ex1---. .---ex2 |
336 : : | / | | \ |
337 : : '---l1 X | l2---'
338 : : | |
339 : : | |
340 : : '--->join<---'
341 : :
342 : : This function transforms the program such that LOOP1 is conditionally
343 : : falling through to LOOP2, or skipping it. This is done by splitting
344 : : the ex1->join edge at X in the diagram above, and inserting a condition
345 : : whose one arm goes to pre2, resulting in this situation:
346 : :
347 : : .------if (cond)------.
348 : : v v
349 : : pre1 .---------->pre2
350 : : | | |
351 : : .--->h1 | h2<----.
352 : : | | | | |
353 : : | ex1---. | .---ex2 |
354 : : | / v | | \ |
355 : : '---l1 skip---' | l2---'
356 : : | |
357 : : | |
358 : : '--->join<---'
359 : :
360 : :
361 : : The condition used is the exit condition of LOOP1, which effectively means
362 : : that when the first loop exits (for whatever reason) but the real original
363 : : exit expression is still false the second loop will be entered.
364 : : The function returns the new edge cond->pre2.
365 : :
366 : : This doesn't update the SSA form, see connect_loop_phis for that. */
367 : :
368 : : static edge
369 : 373 : connect_loops (class loop *loop1, class loop *loop2)
370 : : {
371 : 373 : edge exit = single_exit (loop1);
372 : 373 : basic_block skip_bb = split_edge (exit);
373 : 373 : gcond *skip_stmt;
374 : 373 : gimple_stmt_iterator gsi;
375 : 373 : edge new_e, skip_e;
376 : :
377 : 746 : gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
378 : 373 : skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
379 : : gimple_cond_lhs (stmt),
380 : : gimple_cond_rhs (stmt),
381 : : NULL_TREE, NULL_TREE);
382 : 373 : gsi = gsi_last_bb (skip_bb);
383 : 373 : gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
384 : :
385 : 373 : skip_e = EDGE_SUCC (skip_bb, 0);
386 : 373 : skip_e->flags &= ~EDGE_FALLTHRU;
387 : 373 : new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
388 : 373 : if (exit->flags & EDGE_TRUE_VALUE)
389 : : {
390 : 89 : skip_e->flags |= EDGE_TRUE_VALUE;
391 : 89 : new_e->flags |= EDGE_FALSE_VALUE;
392 : : }
393 : : else
394 : : {
395 : 284 : skip_e->flags |= EDGE_FALSE_VALUE;
396 : 284 : new_e->flags |= EDGE_TRUE_VALUE;
397 : : }
398 : :
399 : 373 : new_e->probability = profile_probability::very_likely ();
400 : 373 : skip_e->probability = new_e->probability.invert ();
401 : :
402 : 373 : return new_e;
403 : : }
404 : :
405 : : /* This returns the new bound for iterations given the original iteration
406 : : space in NITER, an arbitrary new bound BORDER, assumed to be some
407 : : comparison value with a different IV, the initial value GUARD_INIT of
408 : : that other IV, and the comparison code GUARD_CODE that compares
409 : : that other IV with BORDER. We return an SSA name, and place any
410 : : necessary statements for that computation into *STMTS.
411 : :
412 : : For example for such a loop:
413 : :
414 : : for (i = beg, j = guard_init; i < end; i++, j++)
415 : : if (j < border) // this is supposed to be true/false
416 : : ...
417 : :
418 : : we want to return a new bound (on j) that makes the loop iterate
419 : : as long as the condition j < border stays true. We also don't want
420 : : to iterate more often than the original loop, so we have to introduce
421 : : some cut-off as well (via min/max), effectively resulting in:
422 : :
423 : : newend = min (end+guard_init-beg, border)
424 : : for (i = beg; j = guard_init; j < newend; i++, j++)
425 : : if (j < c)
426 : : ...
427 : :
428 : : Depending on the direction of the IVs and if the exit tests
429 : : are strict or non-strict we need to use MIN or MAX,
430 : : and add or subtract 1. This routine computes newend above. */
431 : :
432 : : static tree
433 : 373 : compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
434 : : tree border,
435 : : enum tree_code guard_code, tree guard_init)
436 : : {
437 : : /* The niter structure contains the after-increment IV, we need
438 : : the loop-enter base, so subtract STEP once. */
439 : 373 : tree controlbase = force_gimple_operand (niter->control.base,
440 : : stmts, true, NULL_TREE);
441 : 373 : tree controlstep = niter->control.step;
442 : 373 : tree enddiff;
443 : 373 : if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
444 : : {
445 : 1 : controlstep = gimple_build (stmts, NEGATE_EXPR,
446 : 1 : TREE_TYPE (controlstep), controlstep);
447 : 1 : enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
448 : 1 : TREE_TYPE (controlbase),
449 : : controlbase, controlstep);
450 : : }
451 : : else
452 : 372 : enddiff = gimple_build (stmts, MINUS_EXPR,
453 : 372 : TREE_TYPE (controlbase),
454 : : controlbase, controlstep);
455 : :
456 : : /* Compute end-beg. */
457 : 373 : gimple_seq stmts2;
458 : 373 : tree end = force_gimple_operand (niter->bound, &stmts2,
459 : : true, NULL_TREE);
460 : 373 : gimple_seq_add_seq_without_update (stmts, stmts2);
461 : 373 : if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
462 : : {
463 : 1 : tree tem = gimple_convert (stmts, sizetype, enddiff);
464 : 1 : tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
465 : 1 : enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
466 : 1 : TREE_TYPE (enddiff),
467 : : end, tem);
468 : : }
469 : : else
470 : 372 : enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
471 : : end, enddiff);
472 : :
473 : : /* Compute guard_init + (end-beg). */
474 : 373 : tree newbound;
475 : 373 : enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
476 : 373 : if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
477 : : {
478 : 1 : enddiff = gimple_convert (stmts, sizetype, enddiff);
479 : 1 : newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
480 : 1 : TREE_TYPE (guard_init),
481 : : guard_init, enddiff);
482 : : }
483 : : else
484 : 372 : newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
485 : : guard_init, enddiff);
486 : :
487 : : /* Depending on the direction of the IVs the new bound for the first
488 : : loop is the minimum or maximum of old bound and border.
489 : : Also, if the guard condition isn't strictly less or greater,
490 : : we need to adjust the bound. */
491 : 373 : int addbound = 0;
492 : 373 : enum tree_code minmax;
493 : 373 : if (niter->cmp == LT_EXPR)
494 : : {
495 : : /* GT and LE are the same, inverted. */
496 : 351 : if (guard_code == GT_EXPR || guard_code == LE_EXPR)
497 : : addbound = -1;
498 : : minmax = MIN_EXPR;
499 : : }
500 : : else
501 : : {
502 : 22 : gcc_assert (niter->cmp == GT_EXPR);
503 : 22 : if (guard_code == GE_EXPR || guard_code == LT_EXPR)
504 : : addbound = 1;
505 : : minmax = MAX_EXPR;
506 : : }
507 : :
508 : : if (addbound)
509 : : {
510 : 280 : tree type2 = TREE_TYPE (newbound);
511 : 280 : if (POINTER_TYPE_P (type2))
512 : 0 : type2 = sizetype;
513 : 560 : newbound = gimple_build (stmts,
514 : 280 : POINTER_TYPE_P (TREE_TYPE (newbound))
515 : : ? POINTER_PLUS_EXPR : PLUS_EXPR,
516 : 280 : TREE_TYPE (newbound),
517 : : newbound,
518 : 280 : build_int_cst (type2, addbound));
519 : : }
520 : :
521 : 373 : tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
522 : : border, newbound);
523 : 373 : return newend;
524 : : }
525 : :
526 : : /* Fix the two loop's bb count after split based on the split edge probability,
527 : : don't adjust the bbs dominated by true branches of that loop to avoid
528 : : dropping 1s down. */
529 : : static void
530 : 1118 : fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
531 : : edge false_edge)
532 : : {
533 : : /* Proportion first loop's bb counts except those dominated by true
534 : : branch to avoid drop 1s down. */
535 : 1118 : basic_block *bbs1, *bbs2;
536 : 1118 : bbs1 = get_loop_body (loop1);
537 : 1118 : unsigned j;
538 : 13234 : for (j = 0; j < loop1->num_nodes; j++)
539 : 10998 : if (bbs1[j] == loop1->latch
540 : : /* Watch for case where the true conditional is empty. */
541 : 9880 : || !single_pred_p (true_edge->dest)
542 : 20484 : || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
543 : 7763 : bbs1[j]->count
544 : 7763 : = bbs1[j]->count.apply_probability (true_edge->probability);
545 : 1118 : free (bbs1);
546 : :
547 : : /* Proportion second loop's bb counts except those dominated by false
548 : : branch to avoid drop 1s down. */
549 : 1118 : basic_block bbi_copy = get_bb_copy (false_edge->dest);
550 : 1118 : bbs2 = get_loop_body (loop2);
551 : 11744 : for (j = 0; j < loop2->num_nodes; j++)
552 : 9508 : if (bbs2[j] == loop2->latch
553 : : /* Watch for case where the flase conditional is empty. */
554 : 8390 : || !single_pred_p (bbi_copy)
555 : 13714 : || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
556 : 8606 : bbs2[j]->count
557 : 8606 : = bbs2[j]->count.apply_probability (true_edge->probability.invert ());
558 : 1118 : free (bbs2);
559 : 1118 : }
560 : :
561 : : /* Checks if LOOP contains an conditional block whose condition
562 : : depends on which side in the iteration space it is, and if so
563 : : splits the iteration space into two loops. Returns true if the
564 : : loop was split. NITER must contain the iteration descriptor for the
565 : : single exit of LOOP. */
566 : :
567 : : static bool
568 : 70630 : split_loop (class loop *loop1)
569 : : {
570 : 70630 : class tree_niter_desc niter;
571 : 70630 : basic_block *bbs;
572 : 70630 : unsigned i;
573 : 70630 : bool changed = false;
574 : 70630 : tree guard_iv;
575 : 70630 : tree border = NULL_TREE;
576 : 70630 : affine_iv iv;
577 : 70630 : edge exit1;
578 : :
579 : 70630 : if (!(exit1 = single_exit (loop1))
580 : 49274 : || EDGE_COUNT (exit1->src->succs) != 2
581 : : /* ??? We could handle non-empty latches when we split the latch edge
582 : : (not the exit edge), and put the new exit condition in the new block.
583 : : OTOH this executes some code unconditionally that might have been
584 : : skipped by the original exit before. */
585 : 49266 : || !empty_block_p (loop1->latch)
586 : 44943 : || !easy_exit_values (loop1)
587 : 44943 : || !number_of_iterations_exit (loop1, exit1, &niter, false, true)
588 : 108639 : || niter.cmp == ERROR_MARK)
589 : 32663 : return false;
590 : 37967 : if (niter.cmp == NE_EXPR)
591 : : {
592 : 24024 : if (!niter.control.no_overflow)
593 : : return false;
594 : 23763 : if (tree_int_cst_sign_bit (niter.control.step))
595 : 2311 : niter.cmp = GT_EXPR;
596 : : else
597 : 21452 : niter.cmp = LT_EXPR;
598 : : }
599 : :
600 : 37706 : bbs = get_loop_body (loop1);
601 : :
602 : 37706 : if (!can_copy_bbs_p (bbs, loop1->num_nodes))
603 : : {
604 : 4 : free (bbs);
605 : 4 : return false;
606 : : }
607 : :
608 : : /* Find a splitting opportunity. */
609 : : enum tree_code guard_code;
610 : 189150 : for (i = 0; i < loop1->num_nodes; i++)
611 : 151821 : if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code)))
612 : : {
613 : : /* Handling opposite steps is not implemented yet. Neither
614 : : is handling different step sizes. */
615 : 858 : if ((tree_int_cst_sign_bit (iv.step)
616 : 858 : != tree_int_cst_sign_bit (niter.control.step))
617 : 858 : || !tree_int_cst_equal (iv.step, niter.control.step))
618 : 485 : continue;
619 : :
620 : : /* Find a loop PHI node that defines guard_iv directly,
621 : : or create one doing that. */
622 : 850 : gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
623 : 850 : if (!phi)
624 : 477 : continue;
625 : 373 : const tree phi_result = gimple_phi_result (phi);
626 : 746 : gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
627 : 373 : tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
628 : : loop_preheader_edge (loop1));
629 : :
630 : : /* Loop splitting is implemented by versioning the loop, placing
631 : : the new loop after the old loop, make the first loop iterate
632 : : as long as the conditional stays true (or false) and let the
633 : : second (new) loop handle the rest of the iterations.
634 : :
635 : : First we need to determine if the condition will start being true
636 : : or false in the first loop. */
637 : 373 : bool initial_true;
638 : 373 : switch (guard_code)
639 : : {
640 : 269 : case LT_EXPR:
641 : 269 : case LE_EXPR:
642 : 269 : initial_true = !tree_int_cst_sign_bit (iv.step);
643 : 269 : break;
644 : 104 : case GT_EXPR:
645 : 104 : case GE_EXPR:
646 : 104 : initial_true = tree_int_cst_sign_bit (iv.step);
647 : 104 : break;
648 : 0 : default:
649 : 0 : gcc_unreachable ();
650 : : }
651 : :
652 : : /* Build a condition that will skip the first loop when the
653 : : guard condition won't ever be true (or false). */
654 : 373 : gimple_seq stmts2;
655 : 373 : border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
656 : 373 : if (stmts2)
657 : : {
658 : : /* When the split condition is not always evaluated make sure
659 : : to rewrite it to defined overflow. */
660 : 2 : if (!dominated_by_p (CDI_DOMINATORS, exit1->src, bbs[i]))
661 : : {
662 : 2 : gimple_stmt_iterator gsi;
663 : 2 : gsi = gsi_start (stmts2);
664 : 6 : while (!gsi_end_p (gsi))
665 : : {
666 : 4 : gimple *stmt = gsi_stmt (gsi);
667 : 4 : if (is_gimple_assign (stmt)
668 : 8 : && arith_code_with_undefined_signed_overflow
669 : 4 : (gimple_assign_rhs_code (stmt)))
670 : 2 : rewrite_to_defined_overflow (&gsi);
671 : 4 : gsi_next (&gsi);
672 : : }
673 : : }
674 : 2 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
675 : : stmts2);
676 : : }
677 : 373 : tree cond = fold_build2 (guard_code, boolean_type_node,
678 : : guard_init, border);
679 : 373 : if (!initial_true)
680 : 102 : cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
681 : :
682 : 373 : edge true_edge, false_edge;
683 : 373 : extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
684 : :
685 : : /* Now version the loop, placing loop2 after loop1 connecting
686 : : them, and fix up SSA form for that. */
687 : 373 : initialize_original_copy_tables ();
688 : 373 : basic_block cond_bb;
689 : :
690 : 373 : profile_probability loop1_prob
691 : 373 : = integer_onep (cond) ? profile_probability::always ()
692 : 101 : : true_edge->probability;
693 : : /* TODO: It is commonly a case that we know that both loops will be
694 : : entered. very_likely below is the probability that second loop will
695 : : be entered given by connect_loops. We should work out the common
696 : : case it is always true. */
697 : 373 : class loop *loop2 = loop_version (loop1, cond, &cond_bb,
698 : : loop1_prob,
699 : : /* Pass always as we will later
700 : : redirect first loop to second
701 : : loop. */
702 : : profile_probability::always (),
703 : : profile_probability::always (),
704 : : profile_probability::very_likely (),
705 : : true);
706 : 373 : gcc_assert (loop2);
707 : :
708 : : /* The phi address may have changed due to being resized in
709 : : loop_version (), so reobtain it. */
710 : 373 : phi = as_a<gphi *> (SSA_NAME_DEF_STMT (phi_result));
711 : 373 : gcc_checking_assert (gimple_bb (phi) == loop1->header);
712 : :
713 : : /* Correct probability of edge cond_bb->preheader_of_loop2. */
714 : 373 : single_pred_edge
715 : 373 : (loop_preheader_edge (loop2)->src)->probability
716 : 373 : = loop1_prob.invert ();
717 : :
718 : 373 : fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
719 : : /* If conditional we split on has reliable profilea nd both
720 : : preconditionals of loop1 and loop2 are constant true, we can
721 : : only redistribute the iteration counts to the split loops.
722 : :
723 : : If the conditionals we insert before loop1 or loop2 are non-trivial
724 : : they increase expected loop count, so account this accordingly.
725 : : If we do not know the probability of split conditional, avoid
726 : : reudcing loop estimates, since we do not really know how they are
727 : : split between of the two new loops. Keep orignal estimate since
728 : : it is likely better then completely dropping it.
729 : :
730 : : TODO: If we know that one of the new loops has constant
731 : : number of iterations, we can do better. We could also update
732 : : upper bounds. */
733 : 373 : if (loop1->any_estimate
734 : 373 : && wi::fits_shwi_p (loop1->nb_iterations_estimate))
735 : : {
736 : 200 : sreal scale = true_edge->probability.reliable_p ()
737 : 200 : ? true_edge->probability.to_sreal () : (sreal)1;
738 : 200 : sreal scale2 = false_edge->probability.reliable_p ()
739 : 200 : ? false_edge->probability.to_sreal () : (sreal)1;
740 : 200 : sreal div1 = loop1_prob.initialized_p ()
741 : 202 : ? loop1_prob.to_sreal () : (sreal)1/(sreal)2;
742 : : /* +1 to get header interations rather than latch iterations and then
743 : : -1 to convert back. */
744 : 200 : if (div1 != 0)
745 : 199 : loop1->nb_iterations_estimate
746 : 397 : = MAX ((((sreal)loop1->nb_iterations_estimate.to_shwi () + 1)
747 : : * scale / div1).to_nearest_int () - 1, 0);
748 : : else
749 : 1 : loop1->any_estimate = false;
750 : 200 : loop2->nb_iterations_estimate
751 : 200 : = MAX ((((sreal)loop2->nb_iterations_estimate.to_shwi () + 1) * scale2
752 : : / profile_probability::very_likely ().to_sreal ())
753 : : .to_nearest_int () - 1, 0);
754 : : }
755 : 373 : update_loop_exit_probability_scale_dom_bbs (loop1);
756 : 373 : update_loop_exit_probability_scale_dom_bbs (loop2);
757 : :
758 : 373 : edge new_e = connect_loops (loop1, loop2);
759 : 373 : connect_loop_phis (loop1, loop2, new_e);
760 : :
761 : : /* The iterations of the second loop is now already
762 : : exactly those that the first loop didn't do, but the
763 : : iteration space of the first loop is still the original one.
764 : : Compute the new bound for the guarding IV and patch the
765 : : loop exit to use it instead of original IV and bound. */
766 : 373 : gimple_seq stmts = NULL;
767 : 373 : tree newend = compute_new_first_bound (&stmts, &niter, border,
768 : : guard_code, guard_init);
769 : 373 : if (stmts)
770 : 183 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
771 : : stmts);
772 : 373 : tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
773 : 373 : patch_loop_exit (loop1, guard_code, guard_next, newend, initial_true);
774 : :
775 : : /* Finally patch out the two copies of the condition to be always
776 : : true/false (or opposite). */
777 : 746 : gcond *force_true = as_a<gcond *> (*gsi_last_bb (bbs[i]));
778 : 746 : gcond *force_false = as_a<gcond *> (*gsi_last_bb (get_bb_copy (bbs[i])));
779 : 373 : if (!initial_true)
780 : 102 : std::swap (force_true, force_false);
781 : 373 : gimple_cond_make_true (force_true);
782 : 373 : gimple_cond_make_false (force_false);
783 : 373 : update_stmt (force_true);
784 : 373 : update_stmt (force_false);
785 : :
786 : 373 : free_original_copy_tables ();
787 : :
788 : 373 : changed = true;
789 : 373 : if (dump_file && (dump_flags & TDF_DETAILS))
790 : 25 : fprintf (dump_file, ";; Loop split.\n");
791 : :
792 : 373 : if (dump_enabled_p ())
793 : 26 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
794 : :
795 : : /* Only deal with the first opportunity. */
796 : 373 : break;
797 : : }
798 : :
799 : 37702 : free (bbs);
800 : 37702 : return changed;
801 : 70630 : }
802 : :
803 : : /* Another transformation of loops like:
804 : :
805 : : for (i = INIT (); CHECK (i); i = NEXT ())
806 : : {
807 : : if (expr (a_1, a_2, ..., a_n)) // expr is pure
808 : : a_j = ...; // change at least one a_j
809 : : else
810 : : S; // not change any a_j
811 : : }
812 : :
813 : : into:
814 : :
815 : : for (i = INIT (); CHECK (i); i = NEXT ())
816 : : {
817 : : if (expr (a_1, a_2, ..., a_n))
818 : : a_j = ...;
819 : : else
820 : : {
821 : : S;
822 : : i = NEXT ();
823 : : break;
824 : : }
825 : : }
826 : :
827 : : for (; CHECK (i); i = NEXT ())
828 : : {
829 : : S;
830 : : }
831 : :
832 : : */
833 : :
834 : : /* Data structure to hold temporary information during loop split upon
835 : : semi-invariant conditional statement. */
836 : : class split_info {
837 : : public:
838 : : /* Array of all basic blocks in a loop, returned by get_loop_body(). */
839 : : basic_block *bbs;
840 : :
841 : : /* All memory store/clobber statements in a loop. */
842 : : auto_vec<gimple *> memory_stores;
843 : :
844 : : /* Whether above memory stores vector has been filled. */
845 : : int need_init;
846 : :
847 : : /* Control dependencies of basic blocks in a loop. */
848 : : auto_vec<hash_set<basic_block> *> control_deps;
849 : :
850 : 70257 : split_info () : bbs (NULL), need_init (true) { }
851 : :
852 : 70257 : ~split_info ()
853 : : {
854 : 70257 : if (bbs)
855 : 70257 : free (bbs);
856 : :
857 : 70753 : for (unsigned i = 0; i < control_deps.length (); i++)
858 : 992 : delete control_deps[i];
859 : 70257 : }
860 : : };
861 : :
862 : : /* Find all statements with memory-write effect in LOOP, including memory
863 : : store and non-pure function call, and keep those in a vector. This work
864 : : is only done one time, for the vector should be constant during analysis
865 : : stage of semi-invariant condition. */
866 : :
867 : : static void
868 : 8935 : find_vdef_in_loop (struct loop *loop)
869 : : {
870 : 8935 : split_info *info = (split_info *) loop->aux;
871 : 8935 : gphi *vphi = get_virtual_phi (loop->header);
872 : :
873 : : /* Indicate memory store vector has been filled. */
874 : 8935 : info->need_init = false;
875 : :
876 : : /* If loop contains memory operation, there must be a virtual PHI node in
877 : : loop header basic block. */
878 : 8935 : if (vphi == NULL)
879 : 2002 : return;
880 : :
881 : : /* All virtual SSA names inside the loop are connected to be a cyclic
882 : : graph via virtual PHI nodes. The virtual PHI node in loop header just
883 : : links the first and the last virtual SSA names, by using the last as
884 : : PHI operand to define the first. */
885 : 6989 : const edge latch = loop_latch_edge (loop);
886 : 6989 : const tree first = gimple_phi_result (vphi);
887 : 6989 : const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
888 : :
889 : : /* The virtual SSA cyclic graph might consist of only one SSA name, who
890 : : is defined by itself.
891 : :
892 : : .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
893 : :
894 : : This means the loop contains only memory loads, so we can skip it. */
895 : 6989 : if (first == last)
896 : : return;
897 : :
898 : 6933 : auto_vec<gimple *> other_stores;
899 : 6933 : auto_vec<tree> worklist;
900 : 6933 : auto_bitmap visited;
901 : :
902 : 6933 : bitmap_set_bit (visited, SSA_NAME_VERSION (first));
903 : 6933 : bitmap_set_bit (visited, SSA_NAME_VERSION (last));
904 : 6933 : worklist.safe_push (last);
905 : :
906 : 43345 : do
907 : : {
908 : 43345 : tree vuse = worklist.pop ();
909 : 43345 : gimple *stmt = SSA_NAME_DEF_STMT (vuse);
910 : :
911 : : /* We mark the first and last SSA names as visited at the beginning,
912 : : and reversely start the process from the last SSA name towards the
913 : : first, which ensures that this do-while will not touch SSA names
914 : : defined outside the loop. */
915 : 43345 : gcc_assert (gimple_bb (stmt)
916 : : && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
917 : :
918 : 43345 : if (gimple_code (stmt) == GIMPLE_PHI)
919 : : {
920 : 12781 : gphi *phi = as_a <gphi *> (stmt);
921 : :
922 : 38521 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
923 : : {
924 : 25740 : tree arg = gimple_phi_arg_def (stmt, i);
925 : :
926 : 25740 : if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
927 : 16309 : worklist.safe_push (arg);
928 : : }
929 : : }
930 : : else
931 : : {
932 : 30564 : tree prev = gimple_vuse (stmt);
933 : :
934 : : /* Non-pure call statement is conservatively assumed to impact all
935 : : memory locations. So place call statements ahead of other memory
936 : : stores in the vector with an idea of using them as shortcut
937 : : terminators to memory alias analysis. */
938 : 30564 : if (gimple_code (stmt) == GIMPLE_CALL)
939 : 11083 : info->memory_stores.safe_push (stmt);
940 : : else
941 : 19481 : other_stores.safe_push (stmt);
942 : :
943 : 30564 : if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
944 : 20103 : worklist.safe_push (prev);
945 : : }
946 : 86690 : } while (!worklist.is_empty ());
947 : :
948 : 6933 : info->memory_stores.safe_splice (other_stores);
949 : 6933 : }
950 : :
951 : : /* Two basic blocks have equivalent control dependency if one dominates to
952 : : the other, and it is post-dominated by the latter. Given a basic block
953 : : BB in LOOP, find farest equivalent dominating basic block. For BB, there
954 : : is a constraint that BB does not post-dominate loop header of LOOP, this
955 : : means BB is control-dependent on at least one basic block in LOOP. */
956 : :
957 : : static basic_block
958 : 1009 : get_control_equiv_head_block (struct loop *loop, basic_block bb)
959 : : {
960 : 1075 : while (!bb->aux)
961 : : {
962 : 592 : basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
963 : :
964 : 592 : gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
965 : :
966 : 592 : if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
967 : : break;
968 : :
969 : : bb = dom_bb;
970 : : }
971 : 1009 : return bb;
972 : : }
973 : :
974 : : /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
975 : : dependent on. */
976 : :
977 : : static hash_set<basic_block> *
978 : 6986 : find_control_dep_blocks (struct loop *loop, basic_block bb)
979 : : {
980 : : /* BB has same control dependency as loop header, then it is not control-
981 : : dependent on any basic block in LOOP. */
982 : 6986 : if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
983 : : return NULL;
984 : :
985 : 979 : basic_block equiv_head = get_control_equiv_head_block (loop, bb);
986 : :
987 : 979 : if (equiv_head->aux)
988 : : {
989 : : /* There is a basic block containing control dependency equivalent
990 : : to BB. No need to recompute that, and also set this information
991 : : to other equivalent basic blocks. */
992 : 483 : for (; bb != equiv_head;
993 : 0 : bb = get_immediate_dominator (CDI_DOMINATORS, bb))
994 : 0 : bb->aux = equiv_head->aux;
995 : 483 : return (hash_set<basic_block> *) equiv_head->aux;
996 : : }
997 : :
998 : : /* A basic block X is control-dependent on another Y iff there exists
999 : : a path from X to Y, in which every basic block other than X and Y
1000 : : is post-dominated by Y, but X is not post-dominated by Y.
1001 : :
1002 : : According to this rule, traverse basic blocks in the loop backwards
1003 : : starting from BB, if a basic block is post-dominated by BB, extend
1004 : : current post-dominating path to this block, otherwise it is another
1005 : : one that BB is control-dependent on. */
1006 : :
1007 : 496 : auto_vec<basic_block> pdom_worklist;
1008 : 496 : hash_set<basic_block> pdom_visited;
1009 : 496 : hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
1010 : :
1011 : 496 : pdom_worklist.safe_push (equiv_head);
1012 : :
1013 : 526 : do
1014 : : {
1015 : 526 : basic_block pdom_bb = pdom_worklist.pop ();
1016 : 526 : edge_iterator ei;
1017 : 526 : edge e;
1018 : :
1019 : 526 : if (pdom_visited.add (pdom_bb))
1020 : 0 : continue;
1021 : :
1022 : 1076 : FOR_EACH_EDGE (e, ei, pdom_bb->preds)
1023 : : {
1024 : 550 : basic_block pred_bb = e->src;
1025 : :
1026 : 550 : if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
1027 : : {
1028 : 520 : dep_bbs->add (pred_bb);
1029 : 550 : continue;
1030 : : }
1031 : :
1032 : 30 : pred_bb = get_control_equiv_head_block (loop, pred_bb);
1033 : :
1034 : 30 : if (pdom_visited.contains (pred_bb))
1035 : 0 : continue;
1036 : :
1037 : 30 : if (!pred_bb->aux)
1038 : : {
1039 : 30 : pdom_worklist.safe_push (pred_bb);
1040 : 30 : continue;
1041 : : }
1042 : :
1043 : : /* If control dependency of basic block is available, fast extend
1044 : : post-dominating path using the information instead of advancing
1045 : : forward step-by-step. */
1046 : 0 : hash_set<basic_block> *pred_dep_bbs
1047 : : = (hash_set<basic_block> *) pred_bb->aux;
1048 : :
1049 : 0 : for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
1050 : 0 : iter != pred_dep_bbs->end (); ++iter)
1051 : : {
1052 : 0 : basic_block pred_dep_bb = *iter;
1053 : :
1054 : : /* Basic blocks can either be in control dependency of BB, or
1055 : : must be post-dominated by BB, if so, extend the path from
1056 : : these basic blocks. */
1057 : 0 : if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
1058 : 0 : dep_bbs->add (pred_dep_bb);
1059 : 0 : else if (!pdom_visited.contains (pred_dep_bb))
1060 : 0 : pdom_worklist.safe_push (pred_dep_bb);
1061 : : }
1062 : : }
1063 : 1052 : } while (!pdom_worklist.is_empty ());
1064 : :
1065 : : /* Record computed control dependencies in loop so that we can reach them
1066 : : when reclaiming resources. */
1067 : 496 : ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
1068 : :
1069 : : /* Associate control dependence with related equivalent basic blocks. */
1070 : 538 : for (equiv_head->aux = dep_bbs; bb != equiv_head;
1071 : 42 : bb = get_immediate_dominator (CDI_DOMINATORS, bb))
1072 : 42 : bb->aux = dep_bbs;
1073 : :
1074 : 496 : return dep_bbs;
1075 : 496 : }
1076 : :
1077 : : /* Forward declaration */
1078 : :
1079 : : static bool
1080 : : stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1081 : : const_basic_block skip_head,
1082 : : hash_map<gimple *, bool> &stmt_stat);
1083 : :
1084 : : /* Given STMT, memory load or pure call statement, check whether it is impacted
1085 : : by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
1086 : : trace is composed of SKIP_HEAD and those basic block dominated by it, always
1087 : : corresponds to one branch of a conditional statement). If SKIP_HEAD is
1088 : : NULL, all basic blocks of LOOP are checked. */
1089 : :
1090 : : static bool
1091 : 21758 : vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
1092 : : const_basic_block skip_head)
1093 : : {
1094 : 21758 : split_info *info = (split_info *) loop->aux;
1095 : 21758 : tree rhs = NULL_TREE;
1096 : 21758 : ao_ref ref;
1097 : 21758 : gimple *store;
1098 : 21758 : unsigned i;
1099 : :
1100 : : /* Collect memory store/clobber statements if haven't done that. */
1101 : 21758 : if (info->need_init)
1102 : 8935 : find_vdef_in_loop (loop);
1103 : :
1104 : 21758 : if (is_gimple_assign (stmt))
1105 : 21567 : rhs = gimple_assign_rhs1 (stmt);
1106 : :
1107 : 21758 : ao_ref_init (&ref, rhs);
1108 : :
1109 : 76002 : FOR_EACH_VEC_ELT (info->memory_stores, i, store)
1110 : : {
1111 : : /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL. */
1112 : 55835 : if (skip_head
1113 : 41522 : && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
1114 : 14313 : continue;
1115 : :
1116 : 27209 : if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1117 : 9036 : return false;
1118 : : }
1119 : :
1120 : : return true;
1121 : : }
1122 : :
1123 : : /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1124 : : certain iteration of LOOP, check whether an SSA name (NAME) remains
1125 : : unchanged in next iteration. We call this characteristic semi-
1126 : : invariantness. SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1127 : : blocks and control flows in the loop will be considered. Semi-invariant
1128 : : state of checked statement is cached in hash map STMT_STAT to avoid
1129 : : redundant computation in possible following re-check. */
1130 : :
1131 : : static inline bool
1132 : 145296 : ssa_semi_invariant_p (struct loop *loop, tree name,
1133 : : const_basic_block skip_head,
1134 : : hash_map<gimple *, bool> &stmt_stat)
1135 : : {
1136 : 145296 : gimple *def = SSA_NAME_DEF_STMT (name);
1137 : 145296 : const_basic_block def_bb = gimple_bb (def);
1138 : :
1139 : : /* An SSA name defined outside loop is definitely semi-invariant. */
1140 : 145296 : if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1141 : 13321 : return true;
1142 : :
1143 : 131975 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1144 : : return false;
1145 : :
1146 : 131967 : return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
1147 : : }
1148 : :
1149 : : /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1150 : : semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1151 : : are excluded from LOOP. */
1152 : :
1153 : : static bool
1154 : 27339 : loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1155 : : const_basic_block skip_head)
1156 : : {
1157 : 27339 : const_edge latch = loop_latch_edge (loop);
1158 : 27339 : tree name = gimple_phi_result (loop_phi);
1159 : 27339 : tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1160 : :
1161 : 27339 : gcc_checking_assert (from);
1162 : :
1163 : : /* Loop iteration PHI node locates in loop header, and it has two source
1164 : : operands, one is an initial value coming from outside the loop, the other
1165 : : is a value through latch of the loop, which is derived in last iteration,
1166 : : we call the latter latch value. From the PHI node to definition of latch
1167 : : value, if excluding branch trace starting from SKIP_HEAD, except copy-
1168 : : assignment or likewise, there is no other kind of value redefinition, SSA
1169 : : name defined by the PHI node is semi-invariant.
1170 : :
1171 : : loop entry
1172 : : | .--- latch ---.
1173 : : | | |
1174 : : v v |
1175 : : x_1 = PHI <x_0, x_3> |
1176 : : | |
1177 : : v |
1178 : : .------- if (cond) -------. |
1179 : : | | |
1180 : : | [ SKIP ] |
1181 : : | | |
1182 : : | x_2 = ... |
1183 : : | | |
1184 : : '---- T ---->.<---- F ----' |
1185 : : | |
1186 : : v |
1187 : : x_3 = PHI <x_1, x_2> |
1188 : : | |
1189 : : '----------------------'
1190 : :
1191 : : Suppose in certain iteration, execution flow in above graph goes through
1192 : : true branch, which means that one source value to define x_3 in false
1193 : : branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1194 : : iterations is defined by x_3, we know that x_1 will never changed if COND
1195 : : always chooses true branch from then on. */
1196 : :
1197 : 30776 : while (from != name)
1198 : : {
1199 : : /* A new value comes from a CONSTANT. */
1200 : 29969 : if (TREE_CODE (from) != SSA_NAME)
1201 : : return false;
1202 : :
1203 : 28846 : gimple *stmt = SSA_NAME_DEF_STMT (from);
1204 : 28846 : const_basic_block bb = gimple_bb (stmt);
1205 : :
1206 : : /* A new value comes from outside the loop. */
1207 : 28846 : if (!bb || !flow_bb_inside_loop_p (loop, bb))
1208 : 32 : return false;
1209 : :
1210 : 28814 : from = NULL_TREE;
1211 : :
1212 : 28814 : if (gimple_code (stmt) == GIMPLE_PHI)
1213 : : {
1214 : 6395 : gphi *phi = as_a <gphi *> (stmt);
1215 : :
1216 : 17152 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1217 : : {
1218 : 13715 : if (skip_head)
1219 : : {
1220 : 13103 : const_edge e = gimple_phi_arg_edge (phi, i);
1221 : :
1222 : : /* Don't consider redefinitions in excluded basic blocks. */
1223 : 13103 : if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1224 : 3812 : continue;
1225 : : }
1226 : :
1227 : 9903 : tree arg = gimple_phi_arg_def (phi, i);
1228 : :
1229 : 9903 : if (!from)
1230 : : from = arg;
1231 : 3508 : else if (!operand_equal_p (from, arg, 0))
1232 : : /* There are more than one source operands that provide
1233 : : different values to the SSA name, it is variant. */
1234 : : return false;
1235 : : }
1236 : : }
1237 : 22419 : else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1238 : : {
1239 : : /* For simple value copy, check its rhs instead. */
1240 : 22299 : if (gimple_assign_ssa_name_copy_p (stmt))
1241 : 0 : from = gimple_assign_rhs1 (stmt);
1242 : : }
1243 : :
1244 : : /* Any other kind of definition is deemed to introduce a new value
1245 : : to the SSA name. */
1246 : 25856 : if (!from)
1247 : : return false;
1248 : : }
1249 : : return true;
1250 : : }
1251 : :
1252 : : /* Check whether conditional predicates that BB is control-dependent on, are
1253 : : semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1254 : : are excluded from LOOP. Semi-invariant state of checked statement is cached
1255 : : in hash map STMT_STAT. */
1256 : :
1257 : : static bool
1258 : 6986 : control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1259 : : const_basic_block skip_head,
1260 : : hash_map<gimple *, bool> &stmt_stat)
1261 : : {
1262 : 6986 : hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1263 : :
1264 : 6986 : if (!dep_bbs)
1265 : : return true;
1266 : :
1267 : 1077 : for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1268 : 1175 : iter != dep_bbs->end (); ++iter)
1269 : : {
1270 : 985 : gimple *last = *gsi_last_bb (*iter);
1271 : 985 : if (!last)
1272 : 887 : return false;
1273 : :
1274 : : /* Only check condition predicates. */
1275 : 985 : if (gimple_code (last) != GIMPLE_COND
1276 : 985 : && gimple_code (last) != GIMPLE_SWITCH)
1277 : : return false;
1278 : :
1279 : 985 : if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1280 : : return false;
1281 : : }
1282 : :
1283 : 92 : return true;
1284 : : }
1285 : :
1286 : : /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1287 : : semi-invariant, consequently, all its defined values are semi-invariant.
1288 : : Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1289 : : Semi-invariant state of checked statement is cached in hash map
1290 : : STMT_STAT. */
1291 : :
1292 : : static bool
1293 : 174517 : stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1294 : : const_basic_block skip_head,
1295 : : hash_map<gimple *, bool> &stmt_stat)
1296 : : {
1297 : 174517 : bool existed;
1298 : 174517 : bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1299 : :
1300 : 174517 : if (existed)
1301 : 488 : return invar;
1302 : :
1303 : : /* A statement might depend on itself, which is treated as variant. So set
1304 : : state of statement under check to be variant to ensure that. */
1305 : 174029 : invar = false;
1306 : :
1307 : 174029 : if (gimple_code (stmt) == GIMPLE_PHI)
1308 : : {
1309 : 46677 : gphi *phi = as_a <gphi *> (stmt);
1310 : :
1311 : 46677 : if (gimple_bb (stmt) == loop->header)
1312 : : {
1313 : : /* If the entry value is subject to abnormal coalescing
1314 : : avoid the transform since we're going to duplicate the
1315 : : loop header and thus likely introduce overlapping life-ranges
1316 : : between the PHI def and the entry on the path when the
1317 : : first loop is skipped. */
1318 : 27339 : tree entry_def
1319 : 27339 : = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1320 : 27339 : if (TREE_CODE (entry_def) == SSA_NAME
1321 : 27339 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1322 : : return false;
1323 : 27339 : invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1324 : 27339 : return invar;
1325 : : }
1326 : :
1327 : : /* For a loop PHI node that does not locate in loop header, it is semi-
1328 : : invariant only if two conditions are met. The first is its source
1329 : : values are derived from CONSTANT (including loop-invariant value), or
1330 : : from SSA name defined by semi-invariant loop iteration PHI node. The
1331 : : second is its source incoming edges are control-dependent on semi-
1332 : : invariant conditional predicates. */
1333 : 19387 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1334 : : {
1335 : 19373 : const_edge e = gimple_phi_arg_edge (phi, i);
1336 : 19373 : tree arg = gimple_phi_arg_def (phi, i);
1337 : :
1338 : 19373 : if (TREE_CODE (arg) == SSA_NAME)
1339 : : {
1340 : 19055 : if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1341 : : return false;
1342 : :
1343 : : /* If source value is defined in location from where the source
1344 : : edge comes in, no need to check control dependency again
1345 : : since this has been done in above SSA name check stage. */
1346 : 56 : if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1347 : 4 : continue;
1348 : : }
1349 : :
1350 : 370 : if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1351 : : stmt_stat))
1352 : : return false;
1353 : : }
1354 : : }
1355 : : else
1356 : : {
1357 : 127352 : ssa_op_iter iter;
1358 : 127352 : tree use;
1359 : :
1360 : : /* Volatile memory load or return of normal (non-const/non-pure) call
1361 : : should not be treated as constant in each iteration of loop. */
1362 : 127352 : if (gimple_has_side_effects (stmt))
1363 : 120750 : return false;
1364 : :
1365 : : /* Check if any memory store may kill memory load at this place. */
1366 : 208583 : if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1367 : : return false;
1368 : :
1369 : : /* Although operand of a statement might be SSA name, CONSTANT or
1370 : : VARDECL, here we only need to check SSA name operands. This is
1371 : : because check on VARDECL operands, which involve memory loads,
1372 : : must have been done prior to invocation of this function in
1373 : : vuse_semi_invariant_p. */
1374 : 132843 : FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1375 : 126241 : if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1376 : : return false;
1377 : : }
1378 : :
1379 : 6616 : if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1380 : : stmt_stat))
1381 : : return false;
1382 : :
1383 : : /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1384 : : to new insertion, and thus invar may point to invalid memory. */
1385 : 6054 : stmt_stat.put (stmt, true);
1386 : 6054 : return true;
1387 : : }
1388 : :
1389 : : /* A helper function to check whether STMT is semi-invariant in LOOP. Basic
1390 : : blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. */
1391 : :
1392 : : static bool
1393 : 41565 : stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1394 : : const_basic_block skip_head)
1395 : : {
1396 : 41565 : hash_map<gimple *, bool> stmt_stat;
1397 : 41565 : return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1398 : 41565 : }
1399 : :
1400 : : /* Determine when conditional statement never transfers execution to one of its
1401 : : branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1402 : : and those basic blocks dominated by BRANCH_BB. */
1403 : :
1404 : : static bool
1405 : 45440 : branch_removable_p (basic_block branch_bb)
1406 : : {
1407 : 45440 : edge_iterator ei;
1408 : 45440 : edge e;
1409 : :
1410 : 45440 : if (single_pred_p (branch_bb))
1411 : : return true;
1412 : :
1413 : 7679 : FOR_EACH_EDGE (e, ei, branch_bb->preds)
1414 : : {
1415 : 7679 : if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1416 : 0 : continue;
1417 : :
1418 : 7679 : if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1419 : 2741 : continue;
1420 : :
1421 : : /* The branch can be reached from opposite branch, or from some
1422 : : statement not dominated by the conditional statement. */
1423 : : return false;
1424 : : }
1425 : :
1426 : : return true;
1427 : : }
1428 : :
1429 : : /* Find out which branch of a conditional statement (COND) is invariant in the
1430 : : execution context of LOOP. That is: once the branch is selected in certain
1431 : : iteration of the loop, any operand that contributes to computation of the
1432 : : conditional statement remains unchanged in all following iterations. */
1433 : :
1434 : : static edge
1435 : 120100 : get_cond_invariant_branch (struct loop *loop, gcond *cond)
1436 : : {
1437 : 120100 : basic_block cond_bb = gimple_bb (cond);
1438 : 120100 : basic_block targ_bb[2];
1439 : 120100 : bool invar[2];
1440 : 120100 : unsigned invar_checks = 0;
1441 : :
1442 : 196862 : for (unsigned i = 0; i < 2; i++)
1443 : : {
1444 : 174142 : targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1445 : :
1446 : : /* One branch directs to loop exit, no need to perform loop split upon
1447 : : this conditional statement. Firstly, it is trivial if the exit branch
1448 : : is semi-invariant, for the statement is just to break loop. Secondly,
1449 : : if the opposite branch is semi-invariant, it means that the statement
1450 : : is real loop-invariant, which is covered by loop unswitch. */
1451 : 174142 : if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1452 : : return NULL;
1453 : : }
1454 : :
1455 : 68160 : for (unsigned i = 0; i < 2; i++)
1456 : : {
1457 : 45440 : invar[!i] = false;
1458 : :
1459 : 45440 : if (!branch_removable_p (targ_bb[i]))
1460 : 4938 : continue;
1461 : :
1462 : : /* Given a semi-invariant branch, if its opposite branch dominates
1463 : : loop latch, it and its following trace will only be executed in
1464 : : final iteration of loop, namely it is not part of repeated body
1465 : : of the loop. Similar to the above case that the branch is loop
1466 : : exit, no need to split loop. */
1467 : 40502 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1468 : 0 : continue;
1469 : :
1470 : 40502 : invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1471 : 40502 : invar_checks++;
1472 : : }
1473 : :
1474 : : /* With both branches being invariant (handled by loop unswitch) or
1475 : : variant is not what we want. */
1476 : 22720 : if (invar[0] ^ !invar[1])
1477 : : return NULL;
1478 : :
1479 : : /* Found a real loop-invariant condition, do nothing. */
1480 : 1429 : if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1481 : : return NULL;
1482 : :
1483 : 1242 : return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1484 : : }
1485 : :
1486 : : /* Calculate increased code size measured by estimated insn number if applying
1487 : : loop split upon certain branch (BRANCH_EDGE) of a conditional statement. */
1488 : :
1489 : : static int
1490 : 749 : compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1491 : : {
1492 : 749 : basic_block cond_bb = branch_edge->src;
1493 : 749 : unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1494 : 749 : basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1495 : 749 : basic_block *bbs = ((split_info *) loop->aux)->bbs;
1496 : 749 : int num = 0;
1497 : :
1498 : 6856 : for (unsigned i = 0; i < loop->num_nodes; i++)
1499 : : {
1500 : : /* Do no count basic blocks only in opposite branch. */
1501 : 6107 : if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1502 : 2510 : continue;
1503 : :
1504 : 7194 : num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1505 : : }
1506 : :
1507 : : /* It is unnecessary to evaluate expression of the conditional statement
1508 : : in new loop that contains only invariant branch. This expression should
1509 : : be constant value (either true or false). Exclude code size of insns
1510 : : that contribute to computation of the expression. */
1511 : :
1512 : 749 : auto_vec<gimple *> worklist;
1513 : 749 : hash_set<gimple *> removed;
1514 : 749 : gimple *stmt = last_nondebug_stmt (cond_bb);
1515 : :
1516 : 749 : worklist.safe_push (stmt);
1517 : 749 : removed.add (stmt);
1518 : 749 : num -= estimate_num_insns (stmt, &eni_size_weights);
1519 : :
1520 : 913 : do
1521 : : {
1522 : 913 : ssa_op_iter opnd_iter;
1523 : 913 : use_operand_p opnd_p;
1524 : :
1525 : 913 : stmt = worklist.pop ();
1526 : 2788 : FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1527 : : {
1528 : 962 : tree opnd = USE_FROM_PTR (opnd_p);
1529 : :
1530 : 962 : if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1531 : 107 : continue;
1532 : :
1533 : 935 : gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1534 : 935 : use_operand_p use_p;
1535 : 935 : imm_use_iterator use_iter;
1536 : :
1537 : 935 : if (removed.contains (opnd_stmt)
1538 : 935 : || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1539 : 80 : continue;
1540 : :
1541 : 1044 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1542 : : {
1543 : 880 : gimple *use_stmt = USE_STMT (use_p);
1544 : :
1545 : 880 : if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1546 : : {
1547 : 691 : opnd_stmt = NULL;
1548 : 691 : break;
1549 : : }
1550 : : }
1551 : :
1552 : 855 : if (opnd_stmt)
1553 : : {
1554 : 164 : worklist.safe_push (opnd_stmt);
1555 : 164 : removed.add (opnd_stmt);
1556 : 164 : num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1557 : : }
1558 : : }
1559 : 1826 : } while (!worklist.is_empty ());
1560 : :
1561 : 749 : gcc_assert (num >= 0);
1562 : 749 : return num;
1563 : 749 : }
1564 : :
1565 : : /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1566 : : and check whether it is eligible and profitable to perform loop split upon
1567 : : this branch in LOOP. */
1568 : :
1569 : : static edge
1570 : 120100 : get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1571 : : {
1572 : 120100 : edge invar_branch = get_cond_invariant_branch (loop, cond);
1573 : 120100 : if (!invar_branch)
1574 : : return NULL;
1575 : :
1576 : : /* When accurate profile information is available, and execution
1577 : : frequency of the branch is too low, just let it go. */
1578 : 749 : profile_probability prob = invar_branch->probability;
1579 : 749 : if (prob.reliable_p ())
1580 : : {
1581 : 7 : int thres = param_min_loop_cond_split_prob;
1582 : :
1583 : 14 : if (prob < profile_probability::always ().apply_scale (thres, 100))
1584 : 0 : return NULL;
1585 : : }
1586 : :
1587 : : /* Add a threshold for increased code size to disable loop split. */
1588 : 749 : if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1589 : : return NULL;
1590 : :
1591 : : return invar_branch;
1592 : : }
1593 : :
1594 : : /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1595 : : conditional statement, perform loop split transformation illustrated
1596 : : as the following graph.
1597 : :
1598 : : .-------T------ if (true) ------F------.
1599 : : | .---------------. |
1600 : : | | | |
1601 : : v | v v
1602 : : pre-header | pre-header
1603 : : | .------------. | | .------------.
1604 : : | | | | | | |
1605 : : | v | | | v |
1606 : : header | | header |
1607 : : | | | | |
1608 : : .--- if (cond) ---. | | .--- if (true) ---. |
1609 : : | | | | | | |
1610 : : invariant | | | invariant | |
1611 : : | | | | | | |
1612 : : '---T--->.<---F---' | | '---T--->.<---F---' |
1613 : : | | / | |
1614 : : stmts | / stmts |
1615 : : | F T | |
1616 : : / \ | / / \ |
1617 : : .-------* * [ if (cond) ] .-------* * |
1618 : : | | | | | |
1619 : : | latch | | latch |
1620 : : | | | | | |
1621 : : | '------------' | '------------'
1622 : : '------------------------. .-----------'
1623 : : loop1 | | loop2
1624 : : v v
1625 : : exits
1626 : :
1627 : : In the graph, loop1 represents the part derived from original one, and
1628 : : loop2 is duplicated using loop_version (), which corresponds to the part
1629 : : of original one being splitted out. In original latch edge of loop1, we
1630 : : insert a new conditional statement duplicated from the semi-invariant cond,
1631 : : and one of its branch goes back to loop1 header as a latch edge, and the
1632 : : other branch goes to loop2 pre-header as an entry edge. And also in loop2,
1633 : : we abandon the variant branch of the conditional statement by setting a
1634 : : constant bool condition, based on which branch is semi-invariant. */
1635 : :
1636 : : static bool
1637 : 745 : do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1638 : : {
1639 : 745 : basic_block cond_bb = invar_branch->src;
1640 : 745 : bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1641 : 1490 : gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
1642 : :
1643 : 745 : gcc_assert (cond_bb->loop_father == loop1);
1644 : :
1645 : 745 : if (dump_enabled_p ())
1646 : 252 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1647 : : "loop split on semi-invariant condition at %s branch\n",
1648 : : true_invar ? "true" : "false");
1649 : :
1650 : 745 : initialize_original_copy_tables ();
1651 : :
1652 : 745 : struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1653 : : invar_branch->probability.invert (),
1654 : : invar_branch->probability,
1655 : : profile_probability::always (),
1656 : : profile_probability::always (),
1657 : : true);
1658 : 745 : if (!loop2)
1659 : : {
1660 : 0 : free_original_copy_tables ();
1661 : 0 : return false;
1662 : : }
1663 : :
1664 : 745 : basic_block cond_bb_copy = get_bb_copy (cond_bb);
1665 : 1490 : gcond *cond_copy = as_a<gcond *> (*gsi_last_bb (cond_bb_copy));
1666 : :
1667 : : /* Replace the condition in loop2 with a bool constant to let PassManager
1668 : : remove the variant branch after current pass completes. */
1669 : 745 : if (true_invar)
1670 : 234 : gimple_cond_make_true (cond_copy);
1671 : : else
1672 : 511 : gimple_cond_make_false (cond_copy);
1673 : :
1674 : 745 : update_stmt (cond_copy);
1675 : :
1676 : : /* Insert a new conditional statement on latch edge of loop1, its condition
1677 : : is duplicated from the semi-invariant. This statement acts as a switch
1678 : : to transfer execution from loop1 to loop2, when loop1 enters into
1679 : : invariant state. */
1680 : 745 : basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1681 : 745 : basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1682 : 745 : gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1683 : : gimple_cond_lhs (cond),
1684 : : gimple_cond_rhs (cond),
1685 : : NULL_TREE, NULL_TREE);
1686 : :
1687 : 745 : gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1688 : 745 : gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1689 : :
1690 : 745 : edge to_loop1 = single_succ_edge (break_bb);
1691 : 745 : edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1692 : :
1693 : 745 : to_loop1->flags &= ~EDGE_FALLTHRU;
1694 : 745 : to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1695 : 745 : to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1696 : :
1697 : : /* Due to introduction of a control flow edge from loop1 latch to loop2
1698 : : pre-header, we should update PHIs in loop2 to reflect this connection
1699 : : between loop1 and loop2. */
1700 : 745 : connect_loop_phis (loop1, loop2, to_loop2);
1701 : :
1702 : 745 : edge true_edge, false_edge, skip_edge1, skip_edge2;
1703 : 745 : extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1704 : :
1705 : 745 : skip_edge1 = true_invar ? false_edge : true_edge;
1706 : 745 : skip_edge2 = true_invar ? true_edge : false_edge;
1707 : 745 : fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
1708 : :
1709 : : /* Fix first loop's exit probability after scaling. */
1710 : 745 : to_loop1->probability = invar_branch->probability.invert ();
1711 : 745 : to_loop2->probability = invar_branch->probability;
1712 : :
1713 : 745 : free_original_copy_tables ();
1714 : :
1715 : 745 : return true;
1716 : : }
1717 : :
1718 : : /* Traverse all conditional statements in LOOP, to find out a good candidate
1719 : : upon which we can do loop split. */
1720 : :
1721 : : static bool
1722 : 70257 : split_loop_on_cond (struct loop *loop)
1723 : : {
1724 : 70257 : split_info *info = new split_info ();
1725 : 70257 : basic_block *bbs = info->bbs = get_loop_body (loop);
1726 : 70257 : bool do_split = false;
1727 : :
1728 : : /* Allocate an area to keep temporary info, and associate its address
1729 : : with loop aux field. */
1730 : 70257 : loop->aux = info;
1731 : :
1732 : 493221 : for (unsigned i = 0; i < loop->num_nodes; i++)
1733 : 422964 : bbs[i]->aux = NULL;
1734 : :
1735 : 487577 : for (unsigned i = 0; i < loop->num_nodes; i++)
1736 : : {
1737 : 418065 : basic_block bb = bbs[i];
1738 : :
1739 : : /* We only consider conditional statement, which be executed at most once
1740 : : in each iteration of the loop. So skip statements in inner loops. */
1741 : 418065 : if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1742 : 120055 : continue;
1743 : :
1744 : : /* Actually this check is not a must constraint. With it, we can ensure
1745 : : conditional statement will always be executed in each iteration. */
1746 : 298010 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1747 : 98545 : continue;
1748 : :
1749 : 398930 : gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1750 : 199465 : if (!cond)
1751 : 79365 : continue;
1752 : :
1753 : 120100 : edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1754 : :
1755 : 120100 : if (branch_edge)
1756 : : {
1757 : 745 : do_split_loop_on_cond (loop, branch_edge);
1758 : 745 : do_split = true;
1759 : 745 : break;
1760 : : }
1761 : : }
1762 : :
1763 : 70257 : delete info;
1764 : 70257 : loop->aux = NULL;
1765 : :
1766 : 70257 : return do_split;
1767 : : }
1768 : :
1769 : : /* Main entry point. Perform loop splitting on all suitable loops. */
1770 : :
1771 : : static unsigned int
1772 : 25289 : tree_ssa_split_loops (void)
1773 : : {
1774 : 25289 : bool changed = false;
1775 : :
1776 : 25289 : gcc_assert (scev_initialized_p ());
1777 : :
1778 : 25289 : calculate_dominance_info (CDI_POST_DOMINATORS);
1779 : :
1780 : 173580 : for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1781 : 97713 : loop->aux = NULL;
1782 : :
1783 : : /* Go through all loops starting from innermost. */
1784 : 148291 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1785 : : {
1786 : 72424 : if (loop->aux)
1787 : : {
1788 : : /* If any of our inner loops was split, don't split us,
1789 : : and mark our containing loop as having had splits as well.
1790 : : This allows for delaying SSA update. */
1791 : 450 : loop_outer (loop)->aux = loop;
1792 : 450 : continue;
1793 : : }
1794 : :
1795 : 71974 : if (optimize_loop_for_size_p (loop))
1796 : 1344 : continue;
1797 : :
1798 : 70630 : if (split_loop (loop) || split_loop_on_cond (loop))
1799 : : {
1800 : : /* Mark our containing loop as having had some split inner loops. */
1801 : 1118 : loop_outer (loop)->aux = loop;
1802 : 1118 : changed = true;
1803 : : }
1804 : 25289 : }
1805 : :
1806 : 175239 : for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1807 : 99372 : loop->aux = NULL;
1808 : :
1809 : 25289 : clear_aux_for_blocks ();
1810 : :
1811 : 25289 : free_dominance_info (CDI_POST_DOMINATORS);
1812 : :
1813 : 25289 : if (changed)
1814 : : {
1815 : 811 : rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1816 : 811 : return TODO_cleanup_cfg;
1817 : : }
1818 : : return 0;
1819 : : }
1820 : :
1821 : : /* Loop splitting pass. */
1822 : :
1823 : : namespace {
1824 : :
1825 : : const pass_data pass_data_loop_split =
1826 : : {
1827 : : GIMPLE_PASS, /* type */
1828 : : "lsplit", /* name */
1829 : : OPTGROUP_LOOP, /* optinfo_flags */
1830 : : TV_LOOP_SPLIT, /* tv_id */
1831 : : PROP_cfg, /* properties_required */
1832 : : 0, /* properties_provided */
1833 : : 0, /* properties_destroyed */
1834 : : 0, /* todo_flags_start */
1835 : : 0, /* todo_flags_finish */
1836 : : };
1837 : :
1838 : : class pass_loop_split : public gimple_opt_pass
1839 : : {
1840 : : public:
1841 : 280114 : pass_loop_split (gcc::context *ctxt)
1842 : 560228 : : gimple_opt_pass (pass_data_loop_split, ctxt)
1843 : : {}
1844 : :
1845 : : /* opt_pass methods: */
1846 : 225983 : bool gate (function *) final override { return flag_split_loops != 0; }
1847 : : unsigned int execute (function *) final override;
1848 : :
1849 : : }; // class pass_loop_split
1850 : :
1851 : : unsigned int
1852 : 25289 : pass_loop_split::execute (function *fun)
1853 : : {
1854 : 50578 : if (number_of_loops (fun) <= 1)
1855 : : return 0;
1856 : :
1857 : 25289 : return tree_ssa_split_loops ();
1858 : : }
1859 : :
1860 : : } // anon namespace
1861 : :
1862 : : gimple_opt_pass *
1863 : 280114 : make_pass_loop_split (gcc::context *ctxt)
1864 : : {
1865 : 280114 : return new pass_loop_split (ctxt);
1866 : : }
|