Branch data Line data Source code
1 : : /* Loop splitting.
2 : : Copyright (C) 2015-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it
7 : : under the terms of the GNU General Public License as published by the
8 : : Free Software Foundation; either version 3, or (at your option) any
9 : : later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "tree.h"
25 : : #include "gimple.h"
26 : : #include "tree-pass.h"
27 : : #include "ssa.h"
28 : : #include "fold-const.h"
29 : : #include "tree-cfg.h"
30 : : #include "tree-ssa.h"
31 : : #include "tree-ssa-loop-niter.h"
32 : : #include "tree-ssa-loop.h"
33 : : #include "tree-ssa-loop-manip.h"
34 : : #include "tree-into-ssa.h"
35 : : #include "tree-inline.h"
36 : : #include "tree-cfgcleanup.h"
37 : : #include "cfgloop.h"
38 : : #include "tree-scalar-evolution.h"
39 : : #include "gimple-iterator.h"
40 : : #include "gimple-pretty-print.h"
41 : : #include "cfghooks.h"
42 : : #include "gimple-fold.h"
43 : : #include "gimplify-me.h"
44 : : #include "print-tree.h"
45 : : #include "value-query.h"
46 : : #include "sreal.h"
47 : :
48 : : /* This file implements two kinds of loop splitting.
49 : :
50 : : One transformation of loops like:
51 : :
52 : : for (i = 0; i < 100; i++)
53 : : {
54 : : if (i < 50)
55 : : A;
56 : : else
57 : : B;
58 : : }
59 : :
60 : : into:
61 : :
62 : : for (i = 0; i < 50; i++)
63 : : {
64 : : A;
65 : : }
66 : : for (; i < 100; i++)
67 : : {
68 : : B;
69 : : }
70 : :
71 : : */
72 : :
73 : : /* Return true when BB inside LOOP is a potential iteration space
74 : : split point, i.e. ends with a condition like "IV < comp", which
75 : : is true on one side of the iteration space and false on the other,
76 : : and the split point can be computed. If so, also return the border
77 : : point in *BORDER and the comparison induction variable in IV. */
78 : :
79 : : static tree
80 : 117348 : split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
81 : : enum tree_code *guard_code)
82 : : {
83 : 117348 : gcond *stmt;
84 : 117348 : affine_iv iv2;
85 : :
86 : : /* BB must end in a simple conditional jump. */
87 : 301620 : stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
88 : 53746 : if (!stmt)
89 : : return NULL_TREE;
90 : :
91 : 53746 : enum tree_code code = gimple_cond_code (stmt);
92 : :
93 : 53746 : if (loop_exits_from_bb_p (loop, bb))
94 : : return NULL_TREE;
95 : :
96 : 21039 : tree op0 = gimple_cond_lhs (stmt);
97 : 21039 : tree op1 = gimple_cond_rhs (stmt);
98 : 21039 : class loop *useloop = loop_containing_stmt (stmt);
99 : :
100 : 21039 : if (!simple_iv (loop, useloop, op0, iv, false))
101 : : return NULL_TREE;
102 : 4705 : if (!simple_iv (loop, useloop, op1, &iv2, false))
103 : : return NULL_TREE;
104 : :
105 : : /* Make it so that the first argument of the condition is
106 : : the looping one. */
107 : 2073 : if (!integer_zerop (iv2.step))
108 : : {
109 : 624 : std::swap (op0, op1);
110 : 624 : std::swap (*iv, iv2);
111 : 624 : code = swap_tree_comparison (code);
112 : 624 : gimple_cond_set_condition (stmt, code, op0, op1);
113 : 624 : update_stmt (stmt);
114 : : }
115 : 1449 : else if (integer_zerop (iv->step))
116 : : return NULL_TREE;
117 : 1163 : if (!integer_zerop (iv2.step))
118 : : return NULL_TREE;
119 : 1057 : if (!iv->no_overflow)
120 : : return NULL_TREE;
121 : :
122 : : /* Only handle relational comparisons, for equality and non-equality
123 : : we'd have to split the loop into two loops and a middle statement. */
124 : 1007 : switch (code)
125 : : {
126 : : case LT_EXPR:
127 : : case LE_EXPR:
128 : : case GT_EXPR:
129 : : case GE_EXPR:
130 : : break;
131 : 342 : case NE_EXPR:
132 : 342 : case EQ_EXPR:
133 : : /* If the test check for first iteration, we can handle NE/EQ
134 : : with only one split loop. */
135 : 342 : if (operand_equal_p (iv->base, iv2.base, 0))
136 : : {
137 : 108 : if (code == EQ_EXPR)
138 : 87 : code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
139 : : else
140 : 21 : code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
141 : : break;
142 : : }
143 : : /* Similarly when the test checks for minimal or maximal
144 : : value range. */
145 : : else
146 : : {
147 : 234 : int_range<2> r;
148 : 234 : get_global_range_query ()->range_of_expr (r, op0, stmt);
149 : 234 : if (!r.varying_p () && !r.undefined_p ()
150 : 462 : && TREE_CODE (op1) == INTEGER_CST)
151 : : {
152 : 115 : wide_int val = wi::to_wide (op1);
153 : 115 : if (known_eq (val, r.lower_bound ()))
154 : : {
155 : 15 : code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
156 : : break;
157 : : }
158 : 100 : else if (known_eq (val, r.upper_bound ()))
159 : : {
160 : 33 : code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
161 : : break;
162 : : }
163 : 67 : }
164 : 234 : }
165 : : /* TODO: We can compare with exit condition; it seems that testing for
166 : : last iteration is common case. */
167 : 186 : return NULL_TREE;
168 : : default:
169 : : return NULL_TREE;
170 : : }
171 : :
172 : 821 : if (dump_file && (dump_flags & TDF_DETAILS))
173 : : {
174 : 25 : fprintf (dump_file, "Found potential split point: ");
175 : 25 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
176 : 25 : fprintf (dump_file, " { ");
177 : 25 : print_generic_expr (dump_file, iv->base, TDF_SLIM);
178 : 25 : fprintf (dump_file, " + I*");
179 : 25 : print_generic_expr (dump_file, iv->step, TDF_SLIM);
180 : 25 : fprintf (dump_file, " } %s ", get_tree_code_name (code));
181 : 25 : print_generic_expr (dump_file, iv2.base, TDF_SLIM);
182 : 25 : fprintf (dump_file, "\n");
183 : : }
184 : :
185 : 821 : *border = iv2.base;
186 : 821 : *guard_code = code;
187 : 821 : return op0;
188 : : }
189 : :
190 : : /* Given a GUARD conditional stmt inside LOOP, which we want to make always
191 : : true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
192 : : (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
193 : : exit test statement to loop back only if the GUARD statement will
194 : : also be true/false in the next iteration. */
195 : :
196 : : static void
197 : 345 : patch_loop_exit (class loop *loop, tree_code guard_code, tree nextval,
198 : : tree newbound, bool initial_true)
199 : : {
200 : 345 : edge exit = single_exit (loop);
201 : 690 : gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
202 : 345 : gimple_cond_set_condition (stmt, guard_code, nextval, newbound);
203 : 345 : update_stmt (stmt);
204 : :
205 : 345 : edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
206 : :
207 : 345 : exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
208 : 345 : stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
209 : :
210 : 345 : if (initial_true)
211 : : {
212 : 251 : exit->flags |= EDGE_FALSE_VALUE;
213 : 251 : stay->flags |= EDGE_TRUE_VALUE;
214 : : }
215 : : else
216 : : {
217 : 94 : exit->flags |= EDGE_TRUE_VALUE;
218 : 94 : stay->flags |= EDGE_FALSE_VALUE;
219 : : }
220 : 345 : }
221 : :
222 : : /* Give an induction variable GUARD_IV, and its affine descriptor IV,
223 : : find the loop phi node in LOOP defining it directly, or create
224 : : such phi node. Return that phi node. */
225 : :
226 : : static gphi *
227 : 814 : find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
228 : : {
229 : 814 : gimple *def = SSA_NAME_DEF_STMT (guard_iv);
230 : 814 : gphi *phi;
231 : 814 : if ((phi = dyn_cast <gphi *> (def))
232 : 345 : && gimple_bb (phi) == loop->header)
233 : 345 : return phi;
234 : :
235 : : /* XXX Create the PHI instead. */
236 : : return NULL;
237 : : }
238 : :
239 : : /* Returns true if the exit values of all loop phi nodes can be
240 : : determined easily (i.e. that connect_loop_phis can determine them). */
241 : :
242 : : static bool
243 : 39949 : easy_exit_values (class loop *loop)
244 : : {
245 : 39949 : edge exit = single_exit (loop);
246 : 39949 : edge latch = loop_latch_edge (loop);
247 : 39949 : gphi_iterator psi;
248 : :
249 : : /* Currently we regard the exit values as easy if they are the same
250 : : as the value over the backedge. Which is the case if the definition
251 : : of the backedge value dominates the exit edge. */
252 : 132270 : for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
253 : : {
254 : 92321 : gphi *phi = psi.phi ();
255 : 92321 : tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
256 : 92321 : basic_block bb;
257 : 92321 : if (TREE_CODE (next) == SSA_NAME
258 : 91897 : && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
259 : 184217 : && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
260 : : return false;
261 : : }
262 : :
263 : : return true;
264 : : }
265 : :
266 : : /* This function updates the SSA form after connect_loops made a new
267 : : edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
268 : : conditional). I.e. the second loop can now be entered either
269 : : via the original entry or via NEW_E, so the entry values of LOOP2
270 : : phi nodes are either the original ones or those at the exit
271 : : of LOOP1. Insert new phi nodes in LOOP2 pre-header reflecting
272 : : this. The loops need to fulfill easy_exit_values(). */
273 : :
274 : : static void
275 : 1016 : connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
276 : : {
277 : 1016 : basic_block rest = loop_preheader_edge (loop2)->src;
278 : 1016 : gcc_assert (new_e->dest == rest);
279 : 1016 : edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
280 : :
281 : 1016 : edge firste = loop_preheader_edge (loop1);
282 : 1016 : edge seconde = loop_preheader_edge (loop2);
283 : 1016 : edge firstn = loop_latch_edge (loop1);
284 : 1016 : gphi_iterator psi_first, psi_second;
285 : 1016 : for (psi_first = gsi_start_phis (loop1->header),
286 : 1016 : psi_second = gsi_start_phis (loop2->header);
287 : 4109 : !gsi_end_p (psi_first);
288 : 3093 : gsi_next (&psi_first), gsi_next (&psi_second))
289 : : {
290 : 3093 : tree init, next, new_init;
291 : 3093 : use_operand_p op;
292 : 3093 : gphi *phi_first = psi_first.phi ();
293 : 3093 : gphi *phi_second = psi_second.phi ();
294 : :
295 : 3093 : init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
296 : 3093 : next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
297 : 3093 : op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
298 : 3093 : gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
299 : :
300 : : /* Prefer using original variable as a base for the new ssa name.
301 : : This is necessary for virtual ops, and useful in order to avoid
302 : : losing debug info for real ops. */
303 : 3093 : if (TREE_CODE (next) == SSA_NAME
304 : 6157 : && useless_type_conversion_p (TREE_TYPE (next),
305 : 3064 : TREE_TYPE (init)))
306 : 3064 : new_init = copy_ssa_name (next);
307 : 29 : else if (TREE_CODE (init) == SSA_NAME
308 : 35 : && useless_type_conversion_p (TREE_TYPE (init),
309 : 6 : TREE_TYPE (next)))
310 : 6 : new_init = copy_ssa_name (init);
311 : 23 : else if (useless_type_conversion_p (TREE_TYPE (next),
312 : 23 : TREE_TYPE (init)))
313 : 23 : new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
314 : : "unrinittmp");
315 : : else
316 : 0 : new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
317 : : "unrinittmp");
318 : :
319 : 3093 : gphi * newphi = create_phi_node (new_init, rest);
320 : 3093 : add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
321 : 3093 : add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
322 : 3093 : SET_USE (op, new_init);
323 : : }
324 : 1016 : }
325 : :
326 : : /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
327 : : they are still equivalent and placed in two arms of a diamond, like so:
328 : :
329 : : .------if (cond)------.
330 : : v v
331 : : pre1 pre2
332 : : | |
333 : : .--->h1 h2<----.
334 : : | | | |
335 : : | ex1---. .---ex2 |
336 : : | / | | \ |
337 : : '---l1 X | l2---'
338 : : | |
339 : : | |
340 : : '--->join<---'
341 : :
342 : : This function transforms the program such that LOOP1 is conditionally
343 : : falling through to LOOP2, or skipping it. This is done by splitting
344 : : the ex1->join edge at X in the diagram above, and inserting a condition
345 : : whose one arm goes to pre2, resulting in this situation:
346 : :
347 : : .------if (cond)------.
348 : : v v
349 : : pre1 .---------->pre2
350 : : | | |
351 : : .--->h1 | h2<----.
352 : : | | | | |
353 : : | ex1---. | .---ex2 |
354 : : | / v | | \ |
355 : : '---l1 skip---' | l2---'
356 : : | |
357 : : | |
358 : : '--->join<---'
359 : :
360 : :
361 : : The condition used is the exit condition of LOOP1, which effectively means
362 : : that when the first loop exits (for whatever reason) but the real original
363 : : exit expression is still false the second loop will be entered.
364 : : The function returns the new edge cond->pre2.
365 : :
366 : : This doesn't update the SSA form, see connect_loop_phis for that. */
367 : :
368 : : static edge
369 : 345 : connect_loops (class loop *loop1, class loop *loop2)
370 : : {
371 : 345 : edge exit = single_exit (loop1);
372 : 345 : basic_block skip_bb = split_edge (exit);
373 : 345 : gcond *skip_stmt;
374 : 345 : gimple_stmt_iterator gsi;
375 : 345 : edge new_e, skip_e;
376 : :
377 : 690 : gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
378 : 345 : skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
379 : : gimple_cond_lhs (stmt),
380 : : gimple_cond_rhs (stmt),
381 : : NULL_TREE, NULL_TREE);
382 : 345 : gsi = gsi_last_bb (skip_bb);
383 : 345 : gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
384 : :
385 : 345 : skip_e = EDGE_SUCC (skip_bb, 0);
386 : 345 : skip_e->flags &= ~EDGE_FALLTHRU;
387 : 345 : new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
388 : 345 : if (exit->flags & EDGE_TRUE_VALUE)
389 : : {
390 : 89 : skip_e->flags |= EDGE_TRUE_VALUE;
391 : 89 : new_e->flags |= EDGE_FALSE_VALUE;
392 : : }
393 : : else
394 : : {
395 : 256 : skip_e->flags |= EDGE_FALSE_VALUE;
396 : 256 : new_e->flags |= EDGE_TRUE_VALUE;
397 : : }
398 : :
399 : 345 : new_e->probability = profile_probability::very_likely ();
400 : 345 : skip_e->probability = new_e->probability.invert ();
401 : :
402 : 345 : return new_e;
403 : : }
404 : :
405 : : /* This returns the new bound for iterations given the original iteration
406 : : space in NITER, an arbitrary new bound BORDER, assumed to be some
407 : : comparison value with a different IV, the initial value GUARD_INIT of
408 : : that other IV, and the comparison code GUARD_CODE that compares
409 : : that other IV with BORDER. We return an SSA name, and place any
410 : : necessary statements for that computation into *STMTS.
411 : :
412 : : For example for such a loop:
413 : :
414 : : for (i = beg, j = guard_init; i < end; i++, j++)
415 : : if (j < border) // this is supposed to be true/false
416 : : ...
417 : :
418 : : we want to return a new bound (on j) that makes the loop iterate
419 : : as long as the condition j < border stays true. We also don't want
420 : : to iterate more often than the original loop, so we have to introduce
421 : : some cut-off as well (via min/max), effectively resulting in:
422 : :
423 : : newend = min (end+guard_init-beg, border)
424 : : for (i = beg; j = guard_init; j < newend; i++, j++)
425 : : if (j < c)
426 : : ...
427 : :
428 : : Depending on the direction of the IVs and if the exit tests
429 : : are strict or non-strict we need to use MIN or MAX,
430 : : and add or subtract 1. This routine computes newend above. */
431 : :
432 : : static tree
433 : 345 : compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
434 : : tree border,
435 : : enum tree_code guard_code, tree guard_init)
436 : : {
437 : : /* The niter structure contains the after-increment IV, we need
438 : : the loop-enter base, so subtract STEP once. */
439 : 345 : tree controlbase = force_gimple_operand (niter->control.base,
440 : : stmts, true, NULL_TREE);
441 : 345 : tree controlstep = niter->control.step;
442 : 345 : tree enddiff;
443 : 345 : if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
444 : : {
445 : 1 : controlstep = gimple_build (stmts, NEGATE_EXPR,
446 : 1 : TREE_TYPE (controlstep), controlstep);
447 : 1 : enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
448 : 1 : TREE_TYPE (controlbase),
449 : : controlbase, controlstep);
450 : : }
451 : : else
452 : 344 : enddiff = gimple_build (stmts, MINUS_EXPR,
453 : 344 : TREE_TYPE (controlbase),
454 : : controlbase, controlstep);
455 : :
456 : : /* Compute end-beg. */
457 : 345 : gimple_seq stmts2;
458 : 345 : tree end = force_gimple_operand (niter->bound, &stmts2,
459 : : true, NULL_TREE);
460 : 345 : gimple_seq_add_seq_without_update (stmts, stmts2);
461 : 345 : if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
462 : : {
463 : 1 : tree tem = gimple_convert (stmts, sizetype, enddiff);
464 : 1 : tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
465 : 1 : enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
466 : 1 : TREE_TYPE (enddiff),
467 : : end, tem);
468 : : }
469 : : else
470 : 344 : enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
471 : : end, enddiff);
472 : :
473 : : /* Compute guard_init + (end-beg). */
474 : 345 : tree newbound;
475 : 345 : enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
476 : 345 : if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
477 : : {
478 : 1 : enddiff = gimple_convert (stmts, sizetype, enddiff);
479 : 1 : newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
480 : 1 : TREE_TYPE (guard_init),
481 : : guard_init, enddiff);
482 : : }
483 : : else
484 : 344 : newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
485 : : guard_init, enddiff);
486 : :
487 : : /* Depending on the direction of the IVs the new bound for the first
488 : : loop is the minimum or maximum of old bound and border.
489 : : Also, if the guard condition isn't strictly less or greater,
490 : : we need to adjust the bound. */
491 : 345 : int addbound = 0;
492 : 345 : enum tree_code minmax;
493 : 345 : if (niter->cmp == LT_EXPR)
494 : : {
495 : : /* GT and LE are the same, inverted. */
496 : 325 : if (guard_code == GT_EXPR || guard_code == LE_EXPR)
497 : : addbound = -1;
498 : : minmax = MIN_EXPR;
499 : : }
500 : : else
501 : : {
502 : 20 : gcc_assert (niter->cmp == GT_EXPR);
503 : 20 : if (guard_code == GE_EXPR || guard_code == LT_EXPR)
504 : : addbound = 1;
505 : : minmax = MAX_EXPR;
506 : : }
507 : :
508 : : if (addbound)
509 : : {
510 : 260 : tree type2 = TREE_TYPE (newbound);
511 : 260 : if (POINTER_TYPE_P (type2))
512 : 0 : type2 = sizetype;
513 : 520 : newbound = gimple_build (stmts,
514 : 260 : POINTER_TYPE_P (TREE_TYPE (newbound))
515 : : ? POINTER_PLUS_EXPR : PLUS_EXPR,
516 : 260 : TREE_TYPE (newbound),
517 : : newbound,
518 : : build_int_cst (type2, addbound));
519 : : }
520 : :
521 : 345 : tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
522 : : border, newbound);
523 : 345 : return newend;
524 : : }
525 : :
526 : : /* Fix the two loop's bb count after split based on the split edge probability,
527 : : don't adjust the bbs dominated by true branches of that loop to avoid
528 : : dropping 1s down. */
529 : : static void
530 : 1016 : fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
531 : : edge false_edge)
532 : : {
533 : : /* Proportion first loop's bb counts except those dominated by true
534 : : branch to avoid drop 1s down. */
535 : 1016 : basic_block *bbs1, *bbs2;
536 : 1016 : bbs1 = get_loop_body (loop1);
537 : 1016 : unsigned j;
538 : 12101 : for (j = 0; j < loop1->num_nodes; j++)
539 : 10069 : if (bbs1[j] == loop1->latch
540 : : /* Watch for case where the true conditional is empty. */
541 : 18106 : || !single_pred_p (true_edge->dest)
542 : 18709 : || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
543 : 7195 : bbs1[j]->count
544 : 7195 : = bbs1[j]->count.apply_probability (true_edge->probability);
545 : 1016 : free (bbs1);
546 : :
547 : : /* Proportion second loop's bb counts except those dominated by false
548 : : branch to avoid drop 1s down. */
549 : 1016 : basic_block bbi_copy = get_bb_copy (false_edge->dest);
550 : 1016 : bbs2 = get_loop_body (loop2);
551 : 10759 : for (j = 0; j < loop2->num_nodes; j++)
552 : 8727 : if (bbs2[j] == loop2->latch
553 : : /* Watch for case where the flase conditional is empty. */
554 : 7711 : || !single_pred_p (bbi_copy)
555 : 12526 : || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
556 : 7849 : bbs2[j]->count
557 : 7849 : = bbs2[j]->count.apply_probability (true_edge->probability.invert ());
558 : 1016 : free (bbs2);
559 : 1016 : }
560 : :
561 : : /* Checks if LOOP contains an conditional block whose condition
562 : : depends on which side in the iteration space it is, and if so
563 : : splits the iteration space into two loops. Returns true if the
564 : : loop was split. NITER must contain the iteration descriptor for the
565 : : single exit of LOOP. */
566 : :
567 : : static bool
568 : 64056 : split_loop (class loop *loop1)
569 : : {
570 : 64056 : class tree_niter_desc niter;
571 : 64056 : basic_block *bbs;
572 : 64056 : unsigned i;
573 : 64056 : bool changed = false;
574 : 64056 : tree guard_iv;
575 : 64056 : tree border = NULL_TREE;
576 : 64056 : affine_iv iv;
577 : 64056 : edge exit1;
578 : :
579 : 64056 : if (!(exit1 = single_exit (loop1))
580 : 44157 : || EDGE_COUNT (exit1->src->succs) != 2
581 : : /* ??? We could handle non-empty latches when we split the latch edge
582 : : (not the exit edge), and put the new exit condition in the new block.
583 : : OTOH this executes some code unconditionally that might have been
584 : : skipped by the original exit before. */
585 : 44148 : || !empty_block_p (loop1->latch)
586 : 39949 : || !easy_exit_values (loop1)
587 : 39949 : || !number_of_iterations_exit (loop1, exit1, &niter, false, true)
588 : 97307 : || niter.cmp == ERROR_MARK)
589 : 30842 : return false;
590 : 33214 : if (niter.cmp == NE_EXPR)
591 : : {
592 : 20022 : if (!niter.control.no_overflow)
593 : : return false;
594 : 19767 : if (tree_int_cst_sign_bit (niter.control.step))
595 : 2286 : niter.cmp = GT_EXPR;
596 : : else
597 : 17481 : niter.cmp = LT_EXPR;
598 : : }
599 : :
600 : 32959 : bbs = get_loop_body (loop1);
601 : :
602 : 32959 : if (!can_copy_bbs_p (bbs, loop1->num_nodes))
603 : : {
604 : 4 : free (bbs);
605 : 4 : return false;
606 : : }
607 : :
608 : : /* Find a splitting opportunity. */
609 : : enum tree_code guard_code;
610 : 149958 : for (i = 0; i < loop1->num_nodes; i++)
611 : 117348 : if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code)))
612 : : {
613 : : /* Handling opposite steps is not implemented yet. Neither
614 : : is handling different step sizes. */
615 : 821 : if ((tree_int_cst_sign_bit (iv.step)
616 : 821 : != tree_int_cst_sign_bit (niter.control.step))
617 : 821 : || !tree_int_cst_equal (iv.step, niter.control.step))
618 : 476 : continue;
619 : :
620 : : /* Find a loop PHI node that defines guard_iv directly,
621 : : or create one doing that. */
622 : 814 : gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
623 : 814 : if (!phi)
624 : 469 : continue;
625 : 690 : gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
626 : 345 : tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
627 : : loop_preheader_edge (loop1));
628 : :
629 : : /* Loop splitting is implemented by versioning the loop, placing
630 : : the new loop after the old loop, make the first loop iterate
631 : : as long as the conditional stays true (or false) and let the
632 : : second (new) loop handle the rest of the iterations.
633 : :
634 : : First we need to determine if the condition will start being true
635 : : or false in the first loop. */
636 : 345 : bool initial_true;
637 : 345 : switch (guard_code)
638 : : {
639 : 247 : case LT_EXPR:
640 : 247 : case LE_EXPR:
641 : 247 : initial_true = !tree_int_cst_sign_bit (iv.step);
642 : 247 : break;
643 : 98 : case GT_EXPR:
644 : 98 : case GE_EXPR:
645 : 98 : initial_true = tree_int_cst_sign_bit (iv.step);
646 : 98 : break;
647 : 0 : default:
648 : 0 : gcc_unreachable ();
649 : : }
650 : :
651 : : /* Build a condition that will skip the first loop when the
652 : : guard condition won't ever be true (or false). */
653 : 345 : gimple_seq stmts2;
654 : 345 : border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
655 : 345 : if (stmts2)
656 : 0 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
657 : : stmts2);
658 : 345 : tree cond = fold_build2 (guard_code, boolean_type_node,
659 : : guard_init, border);
660 : 345 : if (!initial_true)
661 : 94 : cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
662 : :
663 : 345 : edge true_edge, false_edge;
664 : 345 : extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
665 : :
666 : : /* Now version the loop, placing loop2 after loop1 connecting
667 : : them, and fix up SSA form for that. */
668 : 345 : initialize_original_copy_tables ();
669 : 345 : basic_block cond_bb;
670 : :
671 : 345 : profile_probability loop1_prob
672 : 345 : = integer_onep (cond) ? profile_probability::always ()
673 : 94 : : true_edge->probability;
674 : : /* TODO: It is commonly a case that we know that both loops will be
675 : : entered. very_likely below is the probability that second loop will
676 : : be entered given by connect_loops. We should work out the common
677 : : case it is always true. */
678 : 345 : class loop *loop2 = loop_version (loop1, cond, &cond_bb,
679 : : loop1_prob,
680 : : /* Pass always as we will later
681 : : redirect first loop to second
682 : : loop. */
683 : : profile_probability::always (),
684 : : profile_probability::always (),
685 : : profile_probability::very_likely (),
686 : : true);
687 : 345 : gcc_assert (loop2);
688 : : /* Correct probability of edge cond_bb->preheader_of_loop2. */
689 : 345 : single_pred_edge
690 : 345 : (loop_preheader_edge (loop2)->src)->probability
691 : 345 : = loop1_prob.invert ();
692 : :
693 : 345 : fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
694 : : /* If conditional we split on has reliable profilea nd both
695 : : preconditionals of loop1 and loop2 are constant true, we can
696 : : only redistribute the iteration counts to the split loops.
697 : :
698 : : If the conditionals we insert before loop1 or loop2 are non-trivial
699 : : they increase expected loop count, so account this accordingly.
700 : : If we do not know the probability of split conditional, avoid
701 : : reudcing loop estimates, since we do not really know how they are
702 : : split between of the two new loops. Keep orignal estimate since
703 : : it is likely better then completely dropping it.
704 : :
705 : : TODO: If we know that one of the new loops has constant
706 : : number of iterations, we can do better. We could also update
707 : : upper bounds. */
708 : 345 : if (loop1->any_estimate
709 : 345 : && wi::fits_shwi_p (loop1->nb_iterations_estimate))
710 : : {
711 : 198 : sreal scale = true_edge->probability.reliable_p ()
712 : 198 : ? true_edge->probability.to_sreal () : (sreal)1;
713 : 198 : sreal scale2 = false_edge->probability.reliable_p ()
714 : 198 : ? false_edge->probability.to_sreal () : (sreal)1;
715 : 198 : sreal div1 = loop1_prob.initialized_p ()
716 : 200 : ? loop1_prob.to_sreal () : (sreal)1/(sreal)2;
717 : : /* +1 to get header interations rather than latch iterations and then
718 : : -1 to convert back. */
719 : 198 : if (div1 != 0)
720 : 197 : loop1->nb_iterations_estimate
721 : 393 : = MAX ((((sreal)loop1->nb_iterations_estimate.to_shwi () + 1)
722 : : * scale / div1).to_nearest_int () - 1, 0);
723 : : else
724 : 1 : loop1->any_estimate = false;
725 : 198 : loop2->nb_iterations_estimate
726 : 198 : = MAX ((((sreal)loop2->nb_iterations_estimate.to_shwi () + 1) * scale2
727 : : / profile_probability::very_likely ().to_sreal ())
728 : : .to_nearest_int () - 1, 0);
729 : : }
730 : 345 : update_loop_exit_probability_scale_dom_bbs (loop1);
731 : 345 : update_loop_exit_probability_scale_dom_bbs (loop2);
732 : :
733 : 345 : edge new_e = connect_loops (loop1, loop2);
734 : 345 : connect_loop_phis (loop1, loop2, new_e);
735 : :
736 : : /* The iterations of the second loop is now already
737 : : exactly those that the first loop didn't do, but the
738 : : iteration space of the first loop is still the original one.
739 : : Compute the new bound for the guarding IV and patch the
740 : : loop exit to use it instead of original IV and bound. */
741 : 345 : gimple_seq stmts = NULL;
742 : 345 : tree newend = compute_new_first_bound (&stmts, &niter, border,
743 : : guard_code, guard_init);
744 : 345 : if (stmts)
745 : 157 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
746 : : stmts);
747 : 345 : tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
748 : 345 : patch_loop_exit (loop1, guard_code, guard_next, newend, initial_true);
749 : :
750 : : /* Finally patch out the two copies of the condition to be always
751 : : true/false (or opposite). */
752 : 690 : gcond *force_true = as_a<gcond *> (*gsi_last_bb (bbs[i]));
753 : 690 : gcond *force_false = as_a<gcond *> (*gsi_last_bb (get_bb_copy (bbs[i])));
754 : 345 : if (!initial_true)
755 : 94 : std::swap (force_true, force_false);
756 : 345 : gimple_cond_make_true (force_true);
757 : 345 : gimple_cond_make_false (force_false);
758 : 345 : update_stmt (force_true);
759 : 345 : update_stmt (force_false);
760 : :
761 : 345 : free_original_copy_tables ();
762 : :
763 : 345 : changed = true;
764 : 345 : if (dump_file && (dump_flags & TDF_DETAILS))
765 : 25 : fprintf (dump_file, ";; Loop split.\n");
766 : :
767 : 345 : if (dump_enabled_p ())
768 : 26 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
769 : :
770 : : /* Only deal with the first opportunity. */
771 : 345 : break;
772 : : }
773 : :
774 : 32955 : free (bbs);
775 : 32955 : return changed;
776 : 64056 : }
777 : :
778 : : /* Another transformation of loops like:
779 : :
780 : : for (i = INIT (); CHECK (i); i = NEXT ())
781 : : {
782 : : if (expr (a_1, a_2, ..., a_n)) // expr is pure
783 : : a_j = ...; // change at least one a_j
784 : : else
785 : : S; // not change any a_j
786 : : }
787 : :
788 : : into:
789 : :
790 : : for (i = INIT (); CHECK (i); i = NEXT ())
791 : : {
792 : : if (expr (a_1, a_2, ..., a_n))
793 : : a_j = ...;
794 : : else
795 : : {
796 : : S;
797 : : i = NEXT ();
798 : : break;
799 : : }
800 : : }
801 : :
802 : : for (; CHECK (i); i = NEXT ())
803 : : {
804 : : S;
805 : : }
806 : :
807 : : */
808 : :
809 : : /* Data structure to hold temporary information during loop split upon
810 : : semi-invariant conditional statement. */
811 : : class split_info {
812 : : public:
813 : : /* Array of all basic blocks in a loop, returned by get_loop_body(). */
814 : : basic_block *bbs;
815 : :
816 : : /* All memory store/clobber statements in a loop. */
817 : : auto_vec<gimple *> memory_stores;
818 : :
819 : : /* Whether above memory stores vector has been filled. */
820 : : int need_init;
821 : :
822 : : /* Control dependencies of basic blocks in a loop. */
823 : : auto_vec<hash_set<basic_block> *> control_deps;
824 : :
825 : 63711 : split_info () : bbs (NULL), need_init (true) { }
826 : :
827 : 63711 : ~split_info ()
828 : : {
829 : 63711 : if (bbs)
830 : 63711 : free (bbs);
831 : :
832 : 65069 : for (unsigned i = 0; i < control_deps.length (); i++)
833 : 958 : delete control_deps[i];
834 : 63711 : }
835 : : };
836 : :
837 : : /* Find all statements with memory-write effect in LOOP, including memory
838 : : store and non-pure function call, and keep those in a vector. This work
839 : : is only done one time, for the vector should be constant during analysis
840 : : stage of semi-invariant condition. */
841 : :
842 : : static void
843 : 7296 : find_vdef_in_loop (struct loop *loop)
844 : : {
845 : 7296 : split_info *info = (split_info *) loop->aux;
846 : 7296 : gphi *vphi = get_virtual_phi (loop->header);
847 : :
848 : : /* Indicate memory store vector has been filled. */
849 : 7296 : info->need_init = false;
850 : :
851 : : /* If loop contains memory operation, there must be a virtual PHI node in
852 : : loop header basic block. */
853 : 7296 : if (vphi == NULL)
854 : 1008 : return;
855 : :
856 : : /* All virtual SSA names inside the loop are connected to be a cyclic
857 : : graph via virtual PHI nodes. The virtual PHI node in loop header just
858 : : links the first and the last virtual SSA names, by using the last as
859 : : PHI operand to define the first. */
860 : 6338 : const edge latch = loop_latch_edge (loop);
861 : 6338 : const tree first = gimple_phi_result (vphi);
862 : 6338 : const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
863 : :
864 : : /* The virtual SSA cyclic graph might consist of only one SSA name, who
865 : : is defined by itself.
866 : :
867 : : .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
868 : :
869 : : This means the loop contains only memory loads, so we can skip it. */
870 : 6338 : if (first == last)
871 : : return;
872 : :
873 : 6288 : auto_vec<gimple *> other_stores;
874 : 6288 : auto_vec<tree> worklist;
875 : 6288 : auto_bitmap visited;
876 : :
877 : 6288 : bitmap_set_bit (visited, SSA_NAME_VERSION (first));
878 : 6288 : bitmap_set_bit (visited, SSA_NAME_VERSION (last));
879 : 6288 : worklist.safe_push (last);
880 : :
881 : 40223 : do
882 : : {
883 : 40223 : tree vuse = worklist.pop ();
884 : 40223 : gimple *stmt = SSA_NAME_DEF_STMT (vuse);
885 : :
886 : : /* We mark the first and last SSA names as visited at the beginning,
887 : : and reversely start the process from the last SSA name towards the
888 : : first, which ensures that this do-while will not touch SSA names
889 : : defined outside the loop. */
890 : 40223 : gcc_assert (gimple_bb (stmt)
891 : : && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
892 : :
893 : 40223 : if (gimple_code (stmt) == GIMPLE_PHI)
894 : : {
895 : 11624 : gphi *phi = as_a <gphi *> (stmt);
896 : :
897 : 35086 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
898 : : {
899 : 23462 : tree arg = gimple_phi_arg_def (stmt, i);
900 : :
901 : 23462 : if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
902 : 14811 : worklist.safe_push (arg);
903 : : }
904 : : }
905 : : else
906 : : {
907 : 28599 : tree prev = gimple_vuse (stmt);
908 : :
909 : : /* Non-pure call statement is conservatively assumed to impact all
910 : : memory locations. So place call statements ahead of other memory
911 : : stores in the vector with an idea of using them as shortcut
912 : : terminators to memory alias analysis. */
913 : 28599 : if (gimple_code (stmt) == GIMPLE_CALL)
914 : 10208 : info->memory_stores.safe_push (stmt);
915 : : else
916 : 18391 : other_stores.safe_push (stmt);
917 : :
918 : 28599 : if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
919 : 19124 : worklist.safe_push (prev);
920 : : }
921 : 80446 : } while (!worklist.is_empty ());
922 : :
923 : 6288 : info->memory_stores.safe_splice (other_stores);
924 : 6288 : }
925 : :
926 : : /* Two basic blocks have equivalent control dependency if one dominates to
927 : : the other, and it is post-dominated by the latter. Given a basic block
928 : : BB in LOOP, find farest equivalent dominating basic block. For BB, there
929 : : is a constraint that BB does not post-dominate loop header of LOOP, this
930 : : means BB is control-dependent on at least one basic block in LOOP. */
931 : :
932 : : static basic_block
933 : 881 : get_control_equiv_head_block (struct loop *loop, basic_block bb)
934 : : {
935 : 947 : while (!bb->aux)
936 : : {
937 : 577 : basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
938 : :
939 : 577 : gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
940 : :
941 : 577 : if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
942 : : break;
943 : :
944 : : bb = dom_bb;
945 : : }
946 : 881 : return bb;
947 : : }
948 : :
949 : : /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
950 : : dependent on. */
951 : :
952 : : static hash_set<basic_block> *
953 : 6437 : find_control_dep_blocks (struct loop *loop, basic_block bb)
954 : : {
955 : : /* BB has same control dependency as loop header, then it is not control-
956 : : dependent on any basic block in LOOP. */
957 : 6437 : if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
958 : : return NULL;
959 : :
960 : 849 : basic_block equiv_head = get_control_equiv_head_block (loop, bb);
961 : :
962 : 849 : if (equiv_head->aux)
963 : : {
964 : : /* There is a basic block containing control dependency equivalent
965 : : to BB. No need to recompute that, and also set this information
966 : : to other equivalent basic blocks. */
967 : 370 : for (; bb != equiv_head;
968 : 0 : bb = get_immediate_dominator (CDI_DOMINATORS, bb))
969 : 0 : bb->aux = equiv_head->aux;
970 : 370 : return (hash_set<basic_block> *) equiv_head->aux;
971 : : }
972 : :
973 : : /* A basic block X is control-dependent on another Y iff there exists
974 : : a path from X to Y, in which every basic block other than X and Y
975 : : is post-dominated by Y, but X is not post-dominated by Y.
976 : :
977 : : According to this rule, traverse basic blocks in the loop backwards
978 : : starting from BB, if a basic block is post-dominated by BB, extend
979 : : current post-dominating path to this block, otherwise it is another
980 : : one that BB is control-dependent on. */
981 : :
982 : 479 : auto_vec<basic_block> pdom_worklist;
983 : 479 : hash_set<basic_block> pdom_visited;
984 : 479 : hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
985 : :
986 : 479 : pdom_worklist.safe_push (equiv_head);
987 : :
988 : 511 : do
989 : : {
990 : 511 : basic_block pdom_bb = pdom_worklist.pop ();
991 : 511 : edge_iterator ei;
992 : 511 : edge e;
993 : :
994 : 511 : if (pdom_visited.add (pdom_bb))
995 : 0 : continue;
996 : :
997 : 1047 : FOR_EACH_EDGE (e, ei, pdom_bb->preds)
998 : : {
999 : 536 : basic_block pred_bb = e->src;
1000 : :
1001 : 536 : if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
1002 : : {
1003 : 504 : dep_bbs->add (pred_bb);
1004 : 536 : continue;
1005 : : }
1006 : :
1007 : 32 : pred_bb = get_control_equiv_head_block (loop, pred_bb);
1008 : :
1009 : 32 : if (pdom_visited.contains (pred_bb))
1010 : 0 : continue;
1011 : :
1012 : 32 : if (!pred_bb->aux)
1013 : : {
1014 : 32 : pdom_worklist.safe_push (pred_bb);
1015 : 32 : continue;
1016 : : }
1017 : :
1018 : : /* If control dependency of basic block is available, fast extend
1019 : : post-dominating path using the information instead of advancing
1020 : : forward step-by-step. */
1021 : 0 : hash_set<basic_block> *pred_dep_bbs
1022 : : = (hash_set<basic_block> *) pred_bb->aux;
1023 : :
1024 : 0 : for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
1025 : 0 : iter != pred_dep_bbs->end (); ++iter)
1026 : : {
1027 : 0 : basic_block pred_dep_bb = *iter;
1028 : :
1029 : : /* Basic blocks can either be in control dependency of BB, or
1030 : : must be post-dominated by BB, if so, extend the path from
1031 : : these basic blocks. */
1032 : 0 : if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
1033 : 0 : dep_bbs->add (pred_dep_bb);
1034 : 0 : else if (!pdom_visited.contains (pred_dep_bb))
1035 : 0 : pdom_worklist.safe_push (pred_dep_bb);
1036 : : }
1037 : : }
1038 : 1022 : } while (!pdom_worklist.is_empty ());
1039 : :
1040 : : /* Record computed control dependencies in loop so that we can reach them
1041 : : when reclaiming resources. */
1042 : 479 : ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
1043 : :
1044 : : /* Associate control dependence with related equivalent basic blocks. */
1045 : 521 : for (equiv_head->aux = dep_bbs; bb != equiv_head;
1046 : 42 : bb = get_immediate_dominator (CDI_DOMINATORS, bb))
1047 : 42 : bb->aux = dep_bbs;
1048 : :
1049 : 479 : return dep_bbs;
1050 : 479 : }
1051 : :
1052 : : /* Forward declaration */
1053 : :
1054 : : static bool
1055 : : stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1056 : : const_basic_block skip_head,
1057 : : hash_map<gimple *, bool> &stmt_stat);
1058 : :
1059 : : /* Given STMT, memory load or pure call statement, check whether it is impacted
1060 : : by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
1061 : : trace is composed of SKIP_HEAD and those basic block dominated by it, always
1062 : : corresponds to one branch of a conditional statement). If SKIP_HEAD is
1063 : : NULL, all basic blocks of LOOP are checked. */
1064 : :
1065 : : static bool
1066 : 16602 : vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
1067 : : const_basic_block skip_head)
1068 : : {
1069 : 16602 : split_info *info = (split_info *) loop->aux;
1070 : 16602 : tree rhs = NULL_TREE;
1071 : 16602 : ao_ref ref;
1072 : 16602 : gimple *store;
1073 : 16602 : unsigned i;
1074 : :
1075 : : /* Collect memory store/clobber statements if haven't done that. */
1076 : 16602 : if (info->need_init)
1077 : 7296 : find_vdef_in_loop (loop);
1078 : :
1079 : 16602 : if (is_gimple_assign (stmt))
1080 : 16449 : rhs = gimple_assign_rhs1 (stmt);
1081 : :
1082 : 16602 : ao_ref_init (&ref, rhs);
1083 : :
1084 : 57363 : FOR_EACH_VEC_ELT (info->memory_stores, i, store)
1085 : : {
1086 : : /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL. */
1087 : 46071 : if (skip_head
1088 : 32479 : && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
1089 : 13592 : continue;
1090 : :
1091 : 18887 : if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1092 : 8320 : return false;
1093 : : }
1094 : :
1095 : : return true;
1096 : : }
1097 : :
1098 : : /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1099 : : certain iteration of LOOP, check whether an SSA name (NAME) remains
1100 : : unchanged in next iteration. We call this characteristic semi-
1101 : : invariantness. SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1102 : : blocks and control flows in the loop will be considered. Semi-invariant
1103 : : state of checked statement is cached in hash map STMT_STAT to avoid
1104 : : redundant computation in possible following re-check. */
1105 : :
1106 : : static inline bool
1107 : 94528 : ssa_semi_invariant_p (struct loop *loop, tree name,
1108 : : const_basic_block skip_head,
1109 : : hash_map<gimple *, bool> &stmt_stat)
1110 : : {
1111 : 94528 : gimple *def = SSA_NAME_DEF_STMT (name);
1112 : 94528 : const_basic_block def_bb = gimple_bb (def);
1113 : :
1114 : : /* An SSA name defined outside loop is definitely semi-invariant. */
1115 : 94528 : if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1116 : 9588 : return true;
1117 : :
1118 : 84940 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1119 : : return false;
1120 : :
1121 : 84932 : return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
1122 : : }
1123 : :
1124 : : /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1125 : : semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1126 : : are excluded from LOOP. */
1127 : :
1128 : : static bool
1129 : 19098 : loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1130 : : const_basic_block skip_head)
1131 : : {
1132 : 19098 : const_edge latch = loop_latch_edge (loop);
1133 : 19098 : tree name = gimple_phi_result (loop_phi);
1134 : 19098 : tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1135 : :
1136 : 19098 : gcc_checking_assert (from);
1137 : :
1138 : : /* Loop iteration PHI node locates in loop header, and it has two source
1139 : : operands, one is an initial value coming from outside the loop, the other
1140 : : is a value through latch of the loop, which is derived in last iteration,
1141 : : we call the latter latch value. From the PHI node to definition of latch
1142 : : value, if excluding branch trace starting from SKIP_HEAD, except copy-
1143 : : assignment or likewise, there is no other kind of value redefinition, SSA
1144 : : name defined by the PHI node is semi-invariant.
1145 : :
1146 : : loop entry
1147 : : | .--- latch ---.
1148 : : | | |
1149 : : v v |
1150 : : x_1 = PHI <x_0, x_3> |
1151 : : | |
1152 : : v |
1153 : : .------- if (cond) -------. |
1154 : : | | |
1155 : : | [ SKIP ] |
1156 : : | | |
1157 : : | x_2 = ... |
1158 : : | | |
1159 : : '---- T ---->.<---- F ----' |
1160 : : | |
1161 : : v |
1162 : : x_3 = PHI <x_1, x_2> |
1163 : : | |
1164 : : '----------------------'
1165 : :
1166 : : Suppose in certain iteration, execution flow in above graph goes through
1167 : : true branch, which means that one source value to define x_3 in false
1168 : : branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1169 : : iterations is defined by x_3, we know that x_1 will never changed if COND
1170 : : always chooses true branch from then on. */
1171 : :
1172 : 21903 : while (from != name)
1173 : : {
1174 : : /* A new value comes from a CONSTANT. */
1175 : 21171 : if (TREE_CODE (from) != SSA_NAME)
1176 : : return false;
1177 : :
1178 : 20618 : gimple *stmt = SSA_NAME_DEF_STMT (from);
1179 : 20618 : const_basic_block bb = gimple_bb (stmt);
1180 : :
1181 : : /* A new value comes from outside the loop. */
1182 : 20618 : if (!bb || !flow_bb_inside_loop_p (loop, bb))
1183 : 30 : return false;
1184 : :
1185 : 20588 : from = NULL_TREE;
1186 : :
1187 : 20588 : if (gimple_code (stmt) == GIMPLE_PHI)
1188 : : {
1189 : 3955 : gphi *phi = as_a <gphi *> (stmt);
1190 : :
1191 : 12036 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1192 : : {
1193 : 9231 : if (skip_head)
1194 : : {
1195 : 8658 : const_edge e = gimple_phi_arg_edge (phi, i);
1196 : :
1197 : : /* Don't consider redefinitions in excluded basic blocks. */
1198 : 8658 : if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1199 : 3606 : continue;
1200 : : }
1201 : :
1202 : 5625 : tree arg = gimple_phi_arg_def (phi, i);
1203 : :
1204 : 5625 : if (!from)
1205 : : from = arg;
1206 : 1670 : else if (!operand_equal_p (from, arg, 0))
1207 : : /* There are more than one source operands that provide
1208 : : different values to the SSA name, it is variant. */
1209 : : return false;
1210 : : }
1211 : : }
1212 : 16633 : else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1213 : : {
1214 : : /* For simple value copy, check its rhs instead. */
1215 : 16589 : if (gimple_assign_ssa_name_copy_p (stmt))
1216 : 0 : from = gimple_assign_rhs1 (stmt);
1217 : : }
1218 : :
1219 : : /* Any other kind of definition is deemed to introduce a new value
1220 : : to the SSA name. */
1221 : 19438 : if (!from)
1222 : : return false;
1223 : : }
1224 : : return true;
1225 : : }
1226 : :
1227 : : /* Check whether conditional predicates that BB is control-dependent on, are
1228 : : semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL),
1229 : : are excluded from LOOP. Semi-invariant state of checked statement is cached
1230 : : in hash map STMT_STAT. */
1231 : :
1232 : : static bool
1233 : 6437 : control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1234 : : const_basic_block skip_head,
1235 : : hash_map<gimple *, bool> &stmt_stat)
1236 : : {
1237 : 6437 : hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1238 : :
1239 : 6437 : if (!dep_bbs)
1240 : : return true;
1241 : :
1242 : 936 : for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1243 : 1023 : iter != dep_bbs->end (); ++iter)
1244 : : {
1245 : 854 : gimple *last = *gsi_last_bb (*iter);
1246 : 854 : if (!last)
1247 : 767 : return false;
1248 : :
1249 : : /* Only check condition predicates. */
1250 : 854 : if (gimple_code (last) != GIMPLE_COND
1251 : 854 : && gimple_code (last) != GIMPLE_SWITCH)
1252 : : return false;
1253 : :
1254 : 852 : if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1255 : : return false;
1256 : : }
1257 : :
1258 : 82 : return true;
1259 : : }
1260 : :
1261 : : /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1262 : : semi-invariant, consequently, all its defined values are semi-invariant.
1263 : : Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1264 : : Semi-invariant state of checked statement is cached in hash map
1265 : : STMT_STAT. */
1266 : :
1267 : : static bool
1268 : 118254 : stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1269 : : const_basic_block skip_head,
1270 : : hash_map<gimple *, bool> &stmt_stat)
1271 : : {
1272 : 118254 : bool existed;
1273 : 118254 : bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1274 : :
1275 : 118254 : if (existed)
1276 : 466 : return invar;
1277 : :
1278 : : /* A statement might depend on itself, which is treated as variant. So set
1279 : : state of statement under check to be variant to ensure that. */
1280 : 117788 : invar = false;
1281 : :
1282 : 117788 : if (gimple_code (stmt) == GIMPLE_PHI)
1283 : : {
1284 : 22117 : gphi *phi = as_a <gphi *> (stmt);
1285 : :
1286 : 22117 : if (gimple_bb (stmt) == loop->header)
1287 : : {
1288 : : /* If the entry value is subject to abnormal coalescing
1289 : : avoid the transform since we're going to duplicate the
1290 : : loop header and thus likely introduce overlapping life-ranges
1291 : : between the PHI def and the entry on the path when the
1292 : : first loop is skipped. */
1293 : 19098 : tree entry_def
1294 : 19098 : = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1295 : 19098 : if (TREE_CODE (entry_def) == SSA_NAME
1296 : 19098 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1297 : : return false;
1298 : 19098 : invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1299 : 19098 : return invar;
1300 : : }
1301 : :
1302 : : /* For a loop PHI node that does not locate in loop header, it is semi-
1303 : : invariant only if two conditions are met. The first is its source
1304 : : values are derived from CONSTANT (including loop-invariant value), or
1305 : : from SSA name defined by semi-invariant loop iteration PHI node. The
1306 : : second is its source incoming edges are control-dependent on semi-
1307 : : invariant conditional predicates. */
1308 : 3068 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1309 : : {
1310 : 3054 : const_edge e = gimple_phi_arg_edge (phi, i);
1311 : 3054 : tree arg = gimple_phi_arg_def (phi, i);
1312 : :
1313 : 3054 : if (TREE_CODE (arg) == SSA_NAME)
1314 : : {
1315 : 2846 : if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1316 : : return false;
1317 : :
1318 : : /* If source value is defined in location from where the source
1319 : : edge comes in, no need to check control dependency again
1320 : : since this has been done in above SSA name check stage. */
1321 : 56 : if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1322 : 4 : continue;
1323 : : }
1324 : :
1325 : 260 : if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1326 : : stmt_stat))
1327 : : return false;
1328 : : }
1329 : : }
1330 : : else
1331 : : {
1332 : 95671 : ssa_op_iter iter;
1333 : 95671 : tree use;
1334 : :
1335 : : /* Volatile memory load or return of normal (non-const/non-pure) call
1336 : : should not be treated as constant in each iteration of loop. */
1337 : 95671 : if (gimple_has_side_effects (stmt))
1338 : 89508 : return false;
1339 : :
1340 : : /* Check if any memory store may kill memory load at this place. */
1341 : 154259 : if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1342 : : return false;
1343 : :
1344 : : /* Although operand of a statement might be SSA name, CONSTANT or
1345 : : VARDECL, here we only need to check SSA name operands. This is
1346 : : because check on VARDECL operands, which involve memory loads,
1347 : : must have been done prior to invocation of this function in
1348 : : vuse_semi_invariant_p. */
1349 : 97845 : FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1350 : 91682 : if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1351 : : return false;
1352 : : }
1353 : :
1354 : 6177 : if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1355 : : stmt_stat))
1356 : : return false;
1357 : :
1358 : : /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1359 : : to new insertion, and thus invar may point to invalid memory. */
1360 : 5625 : stmt_stat.put (stmt, true);
1361 : 5625 : return true;
1362 : : }
1363 : :
1364 : : /* A helper function to check whether STMT is semi-invariant in LOOP. Basic
1365 : : blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. */
1366 : :
1367 : : static bool
1368 : 32470 : stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1369 : : const_basic_block skip_head)
1370 : : {
1371 : 32470 : hash_map<gimple *, bool> stmt_stat;
1372 : 32470 : return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1373 : 32470 : }
1374 : :
1375 : : /* Determine when conditional statement never transfers execution to one of its
1376 : : branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1377 : : and those basic blocks dominated by BRANCH_BB. */
1378 : :
1379 : : static bool
1380 : 35844 : branch_removable_p (basic_block branch_bb)
1381 : : {
1382 : 35844 : edge_iterator ei;
1383 : 35844 : edge e;
1384 : :
1385 : 35844 : if (single_pred_p (branch_bb))
1386 : : return true;
1387 : :
1388 : 6736 : FOR_EACH_EDGE (e, ei, branch_bb->preds)
1389 : : {
1390 : 6736 : if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1391 : 0 : continue;
1392 : :
1393 : 6736 : if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1394 : 2378 : continue;
1395 : :
1396 : : /* The branch can be reached from opposite branch, or from some
1397 : : statement not dominated by the conditional statement. */
1398 : : return false;
1399 : : }
1400 : :
1401 : : return true;
1402 : : }
1403 : :
1404 : : /* Find out which branch of a conditional statement (COND) is invariant in the
1405 : : execution context of LOOP. That is: once the branch is selected in certain
1406 : : iteration of the loop, any operand that contributes to computation of the
1407 : : conditional statement remains unchanged in all following iterations. */
1408 : :
1409 : : static edge
1410 : 105004 : get_cond_invariant_branch (struct loop *loop, gcond *cond)
1411 : : {
1412 : 105004 : basic_block cond_bb = gimple_bb (cond);
1413 : 105004 : basic_block targ_bb[2];
1414 : 105004 : bool invar[2];
1415 : 105004 : unsigned invar_checks = 0;
1416 : :
1417 : 171118 : for (unsigned i = 0; i < 2; i++)
1418 : : {
1419 : 153196 : targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1420 : :
1421 : : /* One branch directs to loop exit, no need to perform loop split upon
1422 : : this conditional statement. Firstly, it is trivial if the exit branch
1423 : : is semi-invariant, for the statement is just to break loop. Secondly,
1424 : : if the opposite branch is semi-invariant, it means that the statement
1425 : : is real loop-invariant, which is covered by loop unswitch. */
1426 : 153196 : if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1427 : : return NULL;
1428 : : }
1429 : :
1430 : 53766 : for (unsigned i = 0; i < 2; i++)
1431 : : {
1432 : 35844 : invar[!i] = false;
1433 : :
1434 : 35844 : if (!branch_removable_p (targ_bb[i]))
1435 : 4358 : continue;
1436 : :
1437 : : /* Given a semi-invariant branch, if its opposite branch dominates
1438 : : loop latch, it and its following trace will only be executed in
1439 : : final iteration of loop, namely it is not part of repeated body
1440 : : of the loop. Similar to the above case that the branch is loop
1441 : : exit, no need to split loop. */
1442 : 31486 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1443 : 0 : continue;
1444 : :
1445 : 31486 : invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1446 : 31486 : invar_checks++;
1447 : : }
1448 : :
1449 : : /* With both branches being invariant (handled by loop unswitch) or
1450 : : variant is not what we want. */
1451 : 17922 : if (invar[0] ^ !invar[1])
1452 : : return NULL;
1453 : :
1454 : : /* Found a real loop-invariant condition, do nothing. */
1455 : 1304 : if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1456 : : return NULL;
1457 : :
1458 : 1103 : return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1459 : : }
1460 : :
1461 : : /* Calculate increased code size measured by estimated insn number if applying
1462 : : loop split upon certain branch (BRANCH_EDGE) of a conditional statement. */
1463 : :
1464 : : static int
1465 : 675 : compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1466 : : {
1467 : 675 : basic_block cond_bb = branch_edge->src;
1468 : 675 : unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1469 : 675 : basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1470 : 675 : basic_block *bbs = ((split_info *) loop->aux)->bbs;
1471 : 675 : int num = 0;
1472 : :
1473 : 6239 : for (unsigned i = 0; i < loop->num_nodes; i++)
1474 : : {
1475 : : /* Do no count basic blocks only in opposite branch. */
1476 : 5564 : if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1477 : 2329 : continue;
1478 : :
1479 : 6470 : num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1480 : : }
1481 : :
1482 : : /* It is unnecessary to evaluate expression of the conditional statement
1483 : : in new loop that contains only invariant branch. This expression should
1484 : : be constant value (either true or false). Exclude code size of insns
1485 : : that contribute to computation of the expression. */
1486 : :
1487 : 675 : auto_vec<gimple *> worklist;
1488 : 675 : hash_set<gimple *> removed;
1489 : 675 : gimple *stmt = last_nondebug_stmt (cond_bb);
1490 : :
1491 : 675 : worklist.safe_push (stmt);
1492 : 675 : removed.add (stmt);
1493 : 675 : num -= estimate_num_insns (stmt, &eni_size_weights);
1494 : :
1495 : 816 : do
1496 : : {
1497 : 816 : ssa_op_iter opnd_iter;
1498 : 816 : use_operand_p opnd_p;
1499 : :
1500 : 816 : stmt = worklist.pop ();
1501 : 2490 : FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1502 : : {
1503 : 858 : tree opnd = USE_FROM_PTR (opnd_p);
1504 : :
1505 : 858 : if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1506 : 88 : continue;
1507 : :
1508 : 840 : gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1509 : 840 : use_operand_p use_p;
1510 : 840 : imm_use_iterator use_iter;
1511 : :
1512 : 840 : if (removed.contains (opnd_stmt)
1513 : 840 : || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1514 : 70 : continue;
1515 : :
1516 : 933 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1517 : : {
1518 : 792 : gimple *use_stmt = USE_STMT (use_p);
1519 : :
1520 : 792 : if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1521 : : {
1522 : 629 : opnd_stmt = NULL;
1523 : 629 : break;
1524 : : }
1525 : : }
1526 : :
1527 : 770 : if (opnd_stmt)
1528 : : {
1529 : 141 : worklist.safe_push (opnd_stmt);
1530 : 141 : removed.add (opnd_stmt);
1531 : 141 : num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1532 : : }
1533 : : }
1534 : 1632 : } while (!worklist.is_empty ());
1535 : :
1536 : 675 : gcc_assert (num >= 0);
1537 : 675 : return num;
1538 : 675 : }
1539 : :
1540 : : /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1541 : : and check whether it is eligible and profitable to perform loop split upon
1542 : : this branch in LOOP. */
1543 : :
1544 : : static edge
1545 : 105004 : get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1546 : : {
1547 : 105004 : edge invar_branch = get_cond_invariant_branch (loop, cond);
1548 : 105004 : if (!invar_branch)
1549 : : return NULL;
1550 : :
1551 : : /* When accurate profile information is available, and execution
1552 : : frequency of the branch is too low, just let it go. */
1553 : 675 : profile_probability prob = invar_branch->probability;
1554 : 675 : if (prob.reliable_p ())
1555 : : {
1556 : 7 : int thres = param_min_loop_cond_split_prob;
1557 : :
1558 : 14 : if (prob < profile_probability::always ().apply_scale (thres, 100))
1559 : 0 : return NULL;
1560 : : }
1561 : :
1562 : : /* Add a threshold for increased code size to disable loop split. */
1563 : 675 : if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1564 : : return NULL;
1565 : :
1566 : : return invar_branch;
1567 : : }
1568 : :
1569 : : /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1570 : : conditional statement, perform loop split transformation illustrated
1571 : : as the following graph.
1572 : :
1573 : : .-------T------ if (true) ------F------.
1574 : : | .---------------. |
1575 : : | | | |
1576 : : v | v v
1577 : : pre-header | pre-header
1578 : : | .------------. | | .------------.
1579 : : | | | | | | |
1580 : : | v | | | v |
1581 : : header | | header |
1582 : : | | | | |
1583 : : .--- if (cond) ---. | | .--- if (true) ---. |
1584 : : | | | | | | |
1585 : : invariant | | | invariant | |
1586 : : | | | | | | |
1587 : : '---T--->.<---F---' | | '---T--->.<---F---' |
1588 : : | | / | |
1589 : : stmts | / stmts |
1590 : : | F T | |
1591 : : / \ | / / \ |
1592 : : .-------* * [ if (cond) ] .-------* * |
1593 : : | | | | | |
1594 : : | latch | | latch |
1595 : : | | | | | |
1596 : : | '------------' | '------------'
1597 : : '------------------------. .-----------'
1598 : : loop1 | | loop2
1599 : : v v
1600 : : exits
1601 : :
1602 : : In the graph, loop1 represents the part derived from original one, and
1603 : : loop2 is duplicated using loop_version (), which corresponds to the part
1604 : : of original one being splitted out. In original latch edge of loop1, we
1605 : : insert a new conditional statement duplicated from the semi-invariant cond,
1606 : : and one of its branch goes back to loop1 header as a latch edge, and the
1607 : : other branch goes to loop2 pre-header as an entry edge. And also in loop2,
1608 : : we abandon the variant branch of the conditional statement by setting a
1609 : : constant bool condition, based on which branch is semi-invariant. */
1610 : :
1611 : : static bool
1612 : 671 : do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1613 : : {
1614 : 671 : basic_block cond_bb = invar_branch->src;
1615 : 671 : bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1616 : 1342 : gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
1617 : :
1618 : 671 : gcc_assert (cond_bb->loop_father == loop1);
1619 : :
1620 : 671 : if (dump_enabled_p ())
1621 : 254 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1622 : : "loop split on semi-invariant condition at %s branch\n",
1623 : : true_invar ? "true" : "false");
1624 : :
1625 : 671 : initialize_original_copy_tables ();
1626 : :
1627 : 671 : struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1628 : : invar_branch->probability.invert (),
1629 : : invar_branch->probability,
1630 : : profile_probability::always (),
1631 : : profile_probability::always (),
1632 : : true);
1633 : 671 : if (!loop2)
1634 : : {
1635 : 0 : free_original_copy_tables ();
1636 : 0 : return false;
1637 : : }
1638 : :
1639 : 671 : basic_block cond_bb_copy = get_bb_copy (cond_bb);
1640 : 1342 : gcond *cond_copy = as_a<gcond *> (*gsi_last_bb (cond_bb_copy));
1641 : :
1642 : : /* Replace the condition in loop2 with a bool constant to let PassManager
1643 : : remove the variant branch after current pass completes. */
1644 : 671 : if (true_invar)
1645 : 225 : gimple_cond_make_true (cond_copy);
1646 : : else
1647 : 446 : gimple_cond_make_false (cond_copy);
1648 : :
1649 : 671 : update_stmt (cond_copy);
1650 : :
1651 : : /* Insert a new conditional statement on latch edge of loop1, its condition
1652 : : is duplicated from the semi-invariant. This statement acts as a switch
1653 : : to transfer execution from loop1 to loop2, when loop1 enters into
1654 : : invariant state. */
1655 : 671 : basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1656 : 671 : basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1657 : 671 : gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1658 : : gimple_cond_lhs (cond),
1659 : : gimple_cond_rhs (cond),
1660 : : NULL_TREE, NULL_TREE);
1661 : :
1662 : 671 : gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1663 : 671 : gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1664 : :
1665 : 671 : edge to_loop1 = single_succ_edge (break_bb);
1666 : 671 : edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1667 : :
1668 : 671 : to_loop1->flags &= ~EDGE_FALLTHRU;
1669 : 671 : to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1670 : 671 : to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1671 : :
1672 : : /* Due to introduction of a control flow edge from loop1 latch to loop2
1673 : : pre-header, we should update PHIs in loop2 to reflect this connection
1674 : : between loop1 and loop2. */
1675 : 671 : connect_loop_phis (loop1, loop2, to_loop2);
1676 : :
1677 : 671 : edge true_edge, false_edge, skip_edge1, skip_edge2;
1678 : 671 : extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1679 : :
1680 : 671 : skip_edge1 = true_invar ? false_edge : true_edge;
1681 : 671 : skip_edge2 = true_invar ? true_edge : false_edge;
1682 : 671 : fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
1683 : :
1684 : : /* Fix first loop's exit probability after scaling. */
1685 : 671 : to_loop1->probability = invar_branch->probability.invert ();
1686 : 671 : to_loop2->probability = invar_branch->probability;
1687 : :
1688 : 671 : free_original_copy_tables ();
1689 : :
1690 : 671 : return true;
1691 : : }
1692 : :
1693 : : /* Traverse all conditional statements in LOOP, to find out a good candidate
1694 : : upon which we can do loop split. */
1695 : :
1696 : : static bool
1697 : 63711 : split_loop_on_cond (struct loop *loop)
1698 : : {
1699 : 63711 : split_info *info = new split_info ();
1700 : 63711 : basic_block *bbs = info->bbs = get_loop_body (loop);
1701 : 63711 : bool do_split = false;
1702 : :
1703 : : /* Allocate an area to keep temporary info, and associate its address
1704 : : with loop aux field. */
1705 : 63711 : loop->aux = info;
1706 : :
1707 : 432047 : for (unsigned i = 0; i < loop->num_nodes; i++)
1708 : 368336 : bbs[i]->aux = NULL;
1709 : :
1710 : 426931 : for (unsigned i = 0; i < loop->num_nodes; i++)
1711 : : {
1712 : 363891 : basic_block bb = bbs[i];
1713 : :
1714 : : /* We only consider conditional statement, which be executed at most once
1715 : : in each iteration of the loop. So skip statements in inner loops. */
1716 : 363891 : if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1717 : 104469 : continue;
1718 : :
1719 : : /* Actually this check is not a must constraint. With it, we can ensure
1720 : : conditional statement will always be executed in each iteration. */
1721 : 259422 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1722 : 82530 : continue;
1723 : :
1724 : 353784 : gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1725 : 176892 : if (!cond)
1726 : 71888 : continue;
1727 : :
1728 : 105004 : edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1729 : :
1730 : 105004 : if (branch_edge)
1731 : : {
1732 : 671 : do_split_loop_on_cond (loop, branch_edge);
1733 : 671 : do_split = true;
1734 : 671 : break;
1735 : : }
1736 : : }
1737 : :
1738 : 63711 : delete info;
1739 : 63711 : loop->aux = NULL;
1740 : :
1741 : 63711 : return do_split;
1742 : : }
1743 : :
1744 : : /* Main entry point. Perform loop splitting on all suitable loops. */
1745 : :
1746 : : static unsigned int
1747 : 24015 : tree_ssa_split_loops (void)
1748 : : {
1749 : 24015 : bool changed = false;
1750 : :
1751 : 24015 : gcc_assert (scev_initialized_p ());
1752 : :
1753 : 24015 : calculate_dominance_info (CDI_POST_DOMINATORS);
1754 : :
1755 : 161815 : for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1756 : 89770 : loop->aux = NULL;
1757 : :
1758 : : /* Go through all loops starting from innermost. */
1759 : 137800 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1760 : : {
1761 : 65755 : if (loop->aux)
1762 : : {
1763 : : /* If any of our inner loops was split, don't split us,
1764 : : and mark our containing loop as having had splits as well.
1765 : : This allows for delaying SSA update. */
1766 : 357 : loop_outer (loop)->aux = loop;
1767 : 357 : continue;
1768 : : }
1769 : :
1770 : 65398 : if (optimize_loop_for_size_p (loop))
1771 : 1342 : continue;
1772 : :
1773 : 64056 : if (split_loop (loop) || split_loop_on_cond (loop))
1774 : : {
1775 : : /* Mark our containing loop as having had some split inner loops. */
1776 : 1016 : loop_outer (loop)->aux = loop;
1777 : 1016 : changed = true;
1778 : : }
1779 : 24015 : }
1780 : :
1781 : 163329 : for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1782 : 91284 : loop->aux = NULL;
1783 : :
1784 : 24015 : clear_aux_for_blocks ();
1785 : :
1786 : 24015 : free_dominance_info (CDI_POST_DOMINATORS);
1787 : :
1788 : 24015 : if (changed)
1789 : : {
1790 : 746 : rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1791 : 746 : return TODO_cleanup_cfg;
1792 : : }
1793 : : return 0;
1794 : : }
1795 : :
1796 : : /* Loop splitting pass. */
1797 : :
1798 : : namespace {
1799 : :
1800 : : const pass_data pass_data_loop_split =
1801 : : {
1802 : : GIMPLE_PASS, /* type */
1803 : : "lsplit", /* name */
1804 : : OPTGROUP_LOOP, /* optinfo_flags */
1805 : : TV_LOOP_SPLIT, /* tv_id */
1806 : : PROP_cfg, /* properties_required */
1807 : : 0, /* properties_provided */
1808 : : 0, /* properties_destroyed */
1809 : : 0, /* todo_flags_start */
1810 : : 0, /* todo_flags_finish */
1811 : : };
1812 : :
1813 : : class pass_loop_split : public gimple_opt_pass
1814 : : {
1815 : : public:
1816 : 285617 : pass_loop_split (gcc::context *ctxt)
1817 : 571234 : : gimple_opt_pass (pass_data_loop_split, ctxt)
1818 : : {}
1819 : :
1820 : : /* opt_pass methods: */
1821 : 217809 : bool gate (function *) final override { return flag_split_loops != 0; }
1822 : : unsigned int execute (function *) final override;
1823 : :
1824 : : }; // class pass_loop_split
1825 : :
1826 : : unsigned int
1827 : 24015 : pass_loop_split::execute (function *fun)
1828 : : {
1829 : 48030 : if (number_of_loops (fun) <= 1)
1830 : : return 0;
1831 : :
1832 : 24015 : return tree_ssa_split_loops ();
1833 : : }
1834 : :
1835 : : } // anon namespace
1836 : :
1837 : : gimple_opt_pass *
1838 : 285617 : make_pass_loop_split (gcc::context *ctxt)
1839 : : {
1840 : 285617 : return new pass_loop_split (ctxt);
1841 : : }
|