Line data Source code
1 : /* High-level loop manipulation functions.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "tree.h"
25 : #include "gimple.h"
26 : #include "cfghooks.h"
27 : #include "tree-pass.h" /* ??? for TODO_update_ssa but this isn't a pass. */
28 : #include "ssa.h"
29 : #include "gimple-pretty-print.h"
30 : #include "fold-const.h"
31 : #include "cfganal.h"
32 : #include "gimplify.h"
33 : #include "gimple-iterator.h"
34 : #include "gimplify-me.h"
35 : #include "tree-cfg.h"
36 : #include "tree-ssa-loop-ivopts.h"
37 : #include "tree-ssa-loop-manip.h"
38 : #include "tree-ssa-loop-niter.h"
39 : #include "tree-ssa-loop.h"
40 : #include "tree-into-ssa.h"
41 : #include "tree-ssa.h"
42 : #include "cfgloop.h"
43 : #include "tree-scalar-evolution.h"
44 : #include "tree-inline.h"
45 :
46 : /* All bitmaps for rewriting into loop-closed SSA go on this obstack,
47 : so that we can free them all at once. */
48 : static bitmap_obstack loop_renamer_obstack;
49 :
50 : /* Insert IV increment statements STMTS before or after INCR_POS;
51 : AFTER selects which. INCR_POS and AFTER can be computed using
52 : standard_iv_increment_position. */
53 :
54 : void
55 1007057 : insert_iv_increment (gimple_stmt_iterator *incr_pos, bool after,
56 : gimple_seq stmts)
57 : {
58 : /* Prevent the increment from inheriting a bogus location if it is not put
59 : immediately after a statement whose location is known. */
60 1007057 : if (after)
61 : {
62 11036 : gimple_stmt_iterator gsi = *incr_pos;
63 11036 : if (!gsi_end_p (gsi))
64 9537 : gsi_next_nondebug (&gsi);
65 11036 : if (gsi_end_p (gsi))
66 : {
67 11036 : edge e = single_succ_edge (gsi_bb (*incr_pos));
68 11036 : gimple_seq_set_location (stmts, e->goto_locus);
69 : }
70 11036 : gsi_insert_seq_after (incr_pos, stmts, GSI_NEW_STMT);
71 : }
72 : else
73 : {
74 996021 : gimple_stmt_iterator gsi = *incr_pos;
75 996021 : if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
76 0 : gsi_next_nondebug (&gsi);
77 996021 : if (!gsi_end_p (gsi))
78 995521 : gimple_seq_set_location (stmts, gimple_location (gsi_stmt (gsi)));
79 996021 : gsi_insert_seq_before (incr_pos, stmts, GSI_NEW_STMT);
80 : }
81 1007057 : }
82 :
83 : /* Creates an induction variable with value BASE (+/-) STEP * iteration in LOOP.
84 : If INCR_OP is PLUS_EXPR, the induction variable is BASE + STEP * iteration.
85 : If INCR_OP is MINUS_EXPR, the induction variable is BASE - STEP * iteration.
86 : It is expected that neither BASE nor STEP are shared with other expressions
87 : (unless the sharing rules allow this). Use VAR as a base var_decl for it
88 : (if NULL, a new temporary will be created). The increment will occur at
89 : INCR_POS (after it if AFTER is true, before it otherwise). INCR_POS and
90 : AFTER can be computed using standard_iv_increment_position. The ssa versions
91 : of the variable before and after increment will be stored in VAR_BEFORE and
92 : VAR_AFTER (unless they are NULL). */
93 :
94 : void
95 991120 : create_iv (tree base, tree_code incr_op, tree step, tree var, class loop *loop,
96 : gimple_stmt_iterator *incr_pos, bool after, tree *var_before,
97 : tree *var_after)
98 : {
99 991120 : gphi *phi;
100 991120 : tree initial, step1;
101 991120 : gimple_seq stmts;
102 991120 : tree vb, va;
103 991120 : gcc_assert (incr_op == PLUS_EXPR || incr_op == MINUS_EXPR);
104 991120 : edge pe = loop_preheader_edge (loop);
105 :
106 991120 : if (var != NULL_TREE)
107 : {
108 605932 : vb = make_ssa_name (var);
109 605932 : va = make_ssa_name (var);
110 : }
111 : else
112 : {
113 385188 : vb = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
114 385188 : va = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
115 : }
116 991120 : if (var_before)
117 676022 : *var_before = vb;
118 991120 : if (var_after)
119 983981 : *var_after = va;
120 :
121 : /* For easier readability of the created code, produce MINUS_EXPRs
122 : when suitable. */
123 991120 : if (TREE_CODE (step) == INTEGER_CST)
124 : {
125 934118 : if (TYPE_UNSIGNED (TREE_TYPE (step)))
126 : {
127 932158 : step1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
128 932158 : if (tree_int_cst_lt (step1, step))
129 : {
130 330051 : incr_op = (incr_op == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR);
131 : step = step1;
132 : }
133 : }
134 : else
135 : {
136 1960 : if (!tree_expr_nonnegative_p (step)
137 1960 : && may_negate_without_overflow_p (step))
138 : {
139 21 : incr_op = (incr_op == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR);
140 21 : step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
141 : }
142 : }
143 : }
144 991120 : if (POINTER_TYPE_P (TREE_TYPE (base)))
145 : {
146 134804 : if (TREE_CODE (base) == ADDR_EXPR)
147 48626 : mark_addressable (TREE_OPERAND (base, 0));
148 134804 : step = convert_to_ptrofftype (step);
149 134804 : if (incr_op == MINUS_EXPR)
150 2982 : step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
151 : incr_op = POINTER_PLUS_EXPR;
152 : }
153 : /* Gimplify the step if necessary. We put the computations in front of the
154 : loop (i.e. the step should be loop invariant). */
155 991120 : step = force_gimple_operand (step, &stmts, true, NULL_TREE);
156 991120 : if (stmts)
157 0 : gsi_insert_seq_on_edge_immediate (pe, stmts);
158 :
159 991120 : gimple_seq incr_stmts = nullptr;
160 991120 : gimple_seq_add_stmt (&incr_stmts,
161 991120 : gimple_build_assign (va, incr_op, vb, step));
162 991120 : insert_iv_increment (incr_pos, after, incr_stmts);
163 :
164 991120 : initial = force_gimple_operand (base, &stmts, true, var);
165 991120 : if (stmts)
166 256559 : gsi_insert_seq_on_edge_immediate (pe, stmts);
167 :
168 991120 : phi = create_phi_node (vb, loop->header);
169 991120 : add_phi_arg (phi, initial, loop_preheader_edge (loop), UNKNOWN_LOCATION);
170 991120 : add_phi_arg (phi, va, loop_latch_edge (loop), UNKNOWN_LOCATION);
171 991120 : }
172 :
173 : /* Return the innermost superloop LOOP of USE_LOOP that is a superloop of
174 : both DEF_LOOP and USE_LOOP. */
175 :
176 : static inline class loop *
177 145665 : find_sibling_superloop (class loop *use_loop, class loop *def_loop)
178 : {
179 145665 : unsigned ud = loop_depth (use_loop);
180 145665 : unsigned dd = loop_depth (def_loop);
181 145665 : gcc_assert (ud > 0 && dd > 0);
182 145665 : if (ud > dd)
183 18878 : use_loop = superloop_at_depth (use_loop, dd);
184 145665 : if (ud < dd)
185 25622 : def_loop = superloop_at_depth (def_loop, ud);
186 148170 : while (loop_outer (use_loop) != loop_outer (def_loop))
187 : {
188 2505 : use_loop = loop_outer (use_loop);
189 2505 : def_loop = loop_outer (def_loop);
190 2505 : gcc_assert (use_loop && def_loop);
191 : }
192 145665 : return use_loop;
193 : }
194 :
195 : /* DEF_BB is a basic block containing a DEF that needs rewriting into
196 : loop-closed SSA form. USE_BLOCKS is the set of basic blocks containing
197 : uses of DEF that "escape" from the loop containing DEF_BB (i.e. blocks in
198 : USE_BLOCKS are dominated by DEF_BB but not in the loop father of DEF_BB).
199 : ALL_EXITS[I] is the set of all basic blocks that exit loop I.
200 : DEF_LOOP_EXITS is a bitmap of loop exit blocks that exit the loop
201 : containing DEF_BB or its outer loops.
202 :
203 : Compute the subset of loop exit destinations that exit the loop
204 : containing DEF_BB or one of its loop fathers, in which DEF is live.
205 : This set is returned in the bitmap LIVE_EXITS.
206 :
207 : Instead of computing the complete livein set of the def, we use the loop
208 : nesting tree as a form of poor man's structure analysis. This greatly
209 : speeds up the analysis, which is important because this function may be
210 : called on all SSA names that need rewriting, one at a time. */
211 :
212 : static void
213 3129444 : compute_live_loop_exits (bitmap live_exits, bitmap use_blocks,
214 : basic_block def_bb, bitmap def_loop_exits)
215 : {
216 3129444 : unsigned i;
217 3129444 : bitmap_iterator bi;
218 3129444 : class loop *def_loop = def_bb->loop_father;
219 3129444 : unsigned def_loop_depth = loop_depth (def_loop);
220 :
221 : /* Normally the work list size is bounded by the number of basic
222 : blocks in the largest loop. We don't know this number, but we
223 : can be fairly sure that it will be relatively small. */
224 3129444 : auto_vec<basic_block, 8> worklist (MAX (8, n_basic_blocks_for_fn (cfun) / 128));
225 :
226 7560356 : EXECUTE_IF_SET_IN_BITMAP (use_blocks, 0, i, bi)
227 : {
228 4430912 : basic_block use_bb = BASIC_BLOCK_FOR_FN (cfun, i);
229 4430912 : class loop *use_loop = use_bb->loop_father;
230 4430912 : gcc_checking_assert (def_loop != use_loop
231 : && ! flow_loop_nested_p (def_loop, use_loop));
232 4430912 : if (! flow_loop_nested_p (use_loop, def_loop))
233 123249 : use_bb = find_sibling_superloop (use_loop, def_loop)->header;
234 4430912 : if (bitmap_set_bit (live_exits, use_bb->index))
235 4389244 : worklist.safe_push (use_bb);
236 : }
237 :
238 : /* Iterate until the worklist is empty. */
239 9799468 : while (! worklist.is_empty ())
240 : {
241 6670024 : edge e;
242 6670024 : edge_iterator ei;
243 :
244 : /* Pull a block off the worklist. */
245 6670024 : basic_block bb = worklist.pop ();
246 :
247 : /* Make sure we have at least enough room in the work list
248 : for all predecessors of this block. */
249 6670024 : worklist.reserve (EDGE_COUNT (bb->preds));
250 :
251 : /* For each predecessor block. */
252 14398097 : FOR_EACH_EDGE (e, ei, bb->preds)
253 : {
254 7728073 : basic_block pred = e->src;
255 7728073 : class loop *pred_loop = pred->loop_father;
256 7728073 : unsigned pred_loop_depth = loop_depth (pred_loop);
257 7728073 : bool pred_visited;
258 :
259 : /* We should have met DEF_BB along the way. */
260 7728073 : gcc_assert (pred != ENTRY_BLOCK_PTR_FOR_FN (cfun));
261 :
262 7728073 : if (pred_loop_depth >= def_loop_depth)
263 : {
264 4682172 : if (pred_loop_depth > def_loop_depth)
265 154919 : pred_loop = superloop_at_depth (pred_loop, def_loop_depth);
266 : /* If we've reached DEF_LOOP, our train ends here. */
267 4682172 : if (pred_loop == def_loop)
268 5447293 : continue;
269 : }
270 3045901 : else if (! flow_loop_nested_p (pred_loop, def_loop))
271 22416 : pred = find_sibling_superloop (pred_loop, def_loop)->header;
272 :
273 : /* Add PRED to the LIVEIN set. PRED_VISITED is true if
274 : we had already added PRED to LIVEIN before. */
275 3929718 : pred_visited = !bitmap_set_bit (live_exits, pred->index);
276 :
277 : /* If we have visited PRED before, don't add it to the worklist.
278 : If BB dominates PRED, then we're probably looking at a loop.
279 : We're only interested in looking up in the dominance tree
280 : because DEF_BB dominates all the uses. */
281 3929718 : if (pred_visited || dominated_by_p (CDI_DOMINATORS, pred, bb))
282 1648938 : continue;
283 :
284 2280780 : worklist.quick_push (pred);
285 : }
286 : }
287 :
288 3129444 : bitmap_and_into (live_exits, def_loop_exits);
289 3129444 : }
290 :
291 : /* Add a loop-closing PHI for VAR in basic block EXIT. */
292 :
293 : static void
294 3731298 : add_exit_phi (basic_block exit, tree var)
295 : {
296 3731298 : gphi *phi;
297 3731298 : edge e;
298 3731298 : edge_iterator ei;
299 :
300 : /* Check that at least one of the edges entering the EXIT block exits
301 : the loop, or a superloop of that loop, that VAR is defined in. */
302 3731298 : if (flag_checking)
303 : {
304 3731196 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
305 3731196 : basic_block def_bb = gimple_bb (def_stmt);
306 3741052 : FOR_EACH_EDGE (e, ei, exit->preds)
307 : {
308 7482104 : class loop *aloop = find_common_loop (def_bb->loop_father,
309 3741052 : e->src->loop_father);
310 3741052 : if (!flow_bb_inside_loop_p (aloop, e->dest))
311 : break;
312 : }
313 3731196 : gcc_assert (e);
314 : }
315 :
316 3731298 : phi = create_phi_node (NULL_TREE, exit);
317 3731298 : create_new_def_for (var, phi, gimple_phi_result_ptr (phi));
318 7651522 : FOR_EACH_EDGE (e, ei, exit->preds)
319 3920224 : add_phi_arg (phi, var, e, UNKNOWN_LOCATION);
320 :
321 3731298 : if (dump_file && (dump_flags & TDF_DETAILS))
322 : {
323 1896 : fprintf (dump_file, ";; Created LCSSA PHI: ");
324 1896 : print_gimple_stmt (dump_file, phi, 0, dump_flags);
325 : }
326 3731298 : }
327 :
328 : /* Add exit phis for VAR that is used in LIVEIN.
329 : Exits of the loops are stored in LOOP_EXITS. Returns the number
330 : of PHIs added for VAR. */
331 :
332 : static unsigned
333 3129444 : add_exit_phis_var (tree var, bitmap use_blocks, bitmap def_loop_exits)
334 : {
335 3129444 : unsigned index;
336 3129444 : bitmap_iterator bi;
337 3129444 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
338 :
339 3129444 : gcc_checking_assert (! bitmap_bit_p (use_blocks, def_bb->index));
340 :
341 3129444 : auto_bitmap live_exits (&loop_renamer_obstack);
342 3129444 : compute_live_loop_exits (live_exits, use_blocks, def_bb, def_loop_exits);
343 :
344 3129444 : unsigned cnt = 0;
345 6860742 : EXECUTE_IF_SET_IN_BITMAP (live_exits, 0, index, bi)
346 : {
347 3731298 : add_exit_phi (BASIC_BLOCK_FOR_FN (cfun, index), var);
348 3731298 : cnt++;
349 : }
350 3129444 : return cnt;
351 3129444 : }
352 :
353 : static int
354 46906873 : loop_name_cmp (const void *p1, const void *p2)
355 : {
356 46906873 : auto l1 = (const std::pair<int, int> *)p1;
357 46906873 : auto l2 = (const std::pair<int, int> *)p2;
358 46906873 : if (l1->first < l2->first)
359 : return -1;
360 28245541 : else if (l1->first > l2->first)
361 17563347 : return 1;
362 : return 0;
363 : }
364 :
365 : /* Add exit phis for the names marked in NAMES_TO_RENAME.
366 : Exits of the loops are stored in EXITS. Sets of blocks where the ssa
367 : names are used are stored in USE_BLOCKS. Returns whether any name
368 : required multiple LC PHI nodes. */
369 :
370 : static bool
371 720882 : add_exit_phis (bitmap names_to_rename, bitmap *use_blocks)
372 : {
373 720882 : unsigned i;
374 720882 : bitmap_iterator bi;
375 720882 : bool multiple_p = false;
376 :
377 : /* Sort names_to_rename after definition loop so we can avoid re-computing
378 : def_loop_exits. */
379 720882 : auto_vec<std::pair<int, int> > names (bitmap_count_bits (names_to_rename));
380 3850326 : EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi)
381 : {
382 3129444 : tree name = ssa_name (i);
383 3129444 : loop_p def_loop = gimple_bb (SSA_NAME_DEF_STMT (name))->loop_father;
384 3129444 : names.quick_push (std::make_pair (def_loop->num, i));
385 : }
386 720882 : names.qsort (loop_name_cmp);
387 :
388 720882 : auto_bitmap def_loop_exits (&loop_renamer_obstack);
389 720882 : loop_p last_def_loop = NULL;
390 5292090 : for (auto p : names)
391 : {
392 3129444 : loop_p def_loop = get_loop (cfun, p.first);
393 3129444 : if (def_loop != last_def_loop)
394 : {
395 1728636 : bitmap_clear (def_loop_exits);
396 1728636 : last_def_loop = def_loop;
397 4047424 : for (class loop *loop = def_loop; loop != current_loops->tree_root;
398 2318788 : loop = loop_outer (loop))
399 8437159 : for (auto exit = loop->exits->next; exit->e; exit = exit->next)
400 6118371 : bitmap_set_bit (def_loop_exits, exit->e->dest->index);
401 : }
402 3129444 : if (add_exit_phis_var (ssa_name (p.second), use_blocks[p.second],
403 : def_loop_exits) > 1)
404 353278 : multiple_p = true;
405 : }
406 :
407 720882 : return multiple_p;
408 720882 : }
409 :
410 : /* For USE in BB, if it is used outside of the loop it is defined in,
411 : mark it for rewrite. Record basic block BB where it is used
412 : to USE_BLOCKS. Record the ssa name index to NEED_PHIS bitmap.
413 : Note that for USEs in phis, BB should be the src of the edge corresponding to
414 : the use, rather than the bb containing the phi. */
415 :
416 : static void
417 161230178 : find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks,
418 : bitmap need_phis)
419 : {
420 161230178 : unsigned ver;
421 161230178 : basic_block def_bb;
422 161230178 : class loop *def_loop;
423 :
424 161230178 : if (TREE_CODE (use) != SSA_NAME)
425 : return;
426 :
427 155540918 : ver = SSA_NAME_VERSION (use);
428 155540918 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (use));
429 155540918 : if (!def_bb)
430 : return;
431 144270002 : def_loop = def_bb->loop_father;
432 :
433 : /* If the definition is not inside a loop, it is not interesting. */
434 144270002 : if (!loop_outer (def_loop))
435 : return;
436 :
437 : /* If the use is not outside of the loop it is defined in, it is not
438 : interesting. */
439 75726681 : if (flow_bb_inside_loop_p (def_loop, bb))
440 : return;
441 :
442 : /* If we're seeing VER for the first time, we still have to allocate
443 : a bitmap for its uses. */
444 5050536 : if (bitmap_set_bit (need_phis, ver))
445 3129444 : use_blocks[ver] = BITMAP_ALLOC (&loop_renamer_obstack);
446 5050536 : bitmap_set_bit (use_blocks[ver], bb->index);
447 : }
448 :
449 : /* For uses matching USE_FLAGS in STMT, mark names that are used outside of the
450 : loop they are defined to rewrite. Record the set of blocks in which the ssa
451 : names are used to USE_BLOCKS, and the ssa names themselves to NEED_PHIS. */
452 :
453 : static void
454 211003918 : find_uses_to_rename_stmt (gimple *stmt, bitmap *use_blocks, bitmap need_phis,
455 : int use_flags)
456 : {
457 211003918 : ssa_op_iter iter;
458 211003918 : tree var;
459 211003918 : basic_block bb = gimple_bb (stmt);
460 :
461 211003918 : if (is_gimple_debug (stmt))
462 123156966 : return;
463 :
464 : /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows SSA_OP_VIRTUAL_USES
465 : only. */
466 87846952 : if (use_flags == SSA_OP_VIRTUAL_USES)
467 : {
468 87846952 : tree vuse = gimple_vuse (stmt);
469 0 : if (vuse != NULL_TREE)
470 0 : find_uses_to_rename_use (bb, gimple_vuse (stmt), use_blocks, need_phis);
471 : }
472 : else
473 212469644 : FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, use_flags)
474 124622692 : find_uses_to_rename_use (bb, var, use_blocks, need_phis);
475 : }
476 :
477 : /* Marks names matching USE_FLAGS that are used in BB and outside of the loop
478 : they are defined in for rewrite. Records the set of blocks in which the ssa
479 : names are used to USE_BLOCKS. Record the SSA names that will
480 : need exit PHIs in NEED_PHIS. */
481 :
482 : static void
483 30045718 : find_uses_to_rename_bb (basic_block bb, bitmap *use_blocks, bitmap need_phis,
484 : int use_flags)
485 : {
486 30045718 : edge e;
487 30045718 : edge_iterator ei;
488 30045718 : bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0;
489 30045718 : bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0;
490 :
491 72774336 : FOR_EACH_EDGE (e, ei, bb->succs)
492 79336104 : for (gphi_iterator bsi = gsi_start_phis (e->dest); !gsi_end_p (bsi);
493 36607486 : gsi_next (&bsi))
494 : {
495 36607486 : gphi *phi = bsi.phi ();
496 36607486 : bool virtual_p = virtual_operand_p (gimple_phi_result (phi));
497 36607486 : if ((virtual_p && do_virtuals)
498 21768787 : || (!virtual_p && do_nonvirtuals))
499 36607486 : find_uses_to_rename_use (bb, PHI_ARG_DEF_FROM_EDGE (phi, e),
500 : use_blocks, need_phis);
501 : }
502 :
503 271095354 : for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
504 211003918 : gsi_next (&bsi))
505 211003918 : find_uses_to_rename_stmt (gsi_stmt (bsi), use_blocks, need_phis,
506 : use_flags);
507 30045718 : }
508 :
509 : /* Marks names matching USE_FLAGS that are used outside of the loop they are
510 : defined in for rewrite. Records the set of blocks in which the ssa names are
511 : used to USE_BLOCKS. Record the SSA names that will need exit PHIs in
512 : NEED_PHIS. If CHANGED_BBS is not NULL, scan only blocks in this set. */
513 :
514 : static void
515 1039539 : find_uses_to_rename (bitmap changed_bbs, bitmap *use_blocks, bitmap need_phis,
516 : int use_flags)
517 : {
518 1039539 : basic_block bb;
519 1039539 : unsigned index;
520 1039539 : bitmap_iterator bi;
521 :
522 1039539 : if (changed_bbs)
523 27580 : EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi)
524 : {
525 20092 : bb = BASIC_BLOCK_FOR_FN (cfun, index);
526 20092 : if (bb)
527 20092 : find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
528 : }
529 : else
530 31057677 : FOR_EACH_BB_FN (bb, cfun)
531 30025626 : find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
532 1039539 : }
533 :
534 : /* Rewrites the program into a loop closed ssa form -- i.e. inserts extra
535 : phi nodes to ensure that no variable is used outside the loop it is
536 : defined in.
537 :
538 : This strengthening of the basic ssa form has several advantages:
539 :
540 : 1) Updating it during unrolling/peeling/versioning is trivial, since
541 : we do not need to care about the uses outside of the loop.
542 : The same applies to virtual operands which are also rewritten into
543 : loop closed SSA form. Note that virtual operands are always live
544 : until function exit.
545 : 2) The behavior of all uses of an induction variable is the same.
546 : Without this, you need to distinguish the case when the variable
547 : is used outside of the loop it is defined in, for example
548 :
549 : for (i = 0; i < 100; i++)
550 : {
551 : for (j = 0; j < 100; j++)
552 : {
553 : k = i + j;
554 : use1 (k);
555 : }
556 : use2 (k);
557 : }
558 :
559 : Looking from the outer loop with the normal SSA form, the first use of k
560 : is not well-behaved, while the second one is an induction variable with
561 : base 99 and step 1.
562 :
563 : If CHANGED_BBS is not NULL, we look for uses outside loops only in the
564 : basic blocks in this set.
565 :
566 : USE_FLAGS allows us to specify whether we want virtual, non-virtual or
567 : both variables rewritten.
568 :
569 : UPDATE_FLAG is used in the call to update_ssa. See
570 : TODO_update_ssa* for documentation. */
571 :
572 : static void
573 4596045 : rewrite_into_loop_closed_ssa_1 (bitmap changed_bbs, unsigned update_flag,
574 : int use_flags)
575 : {
576 4596045 : bitmap *use_blocks;
577 4596045 : bitmap names_to_rename;
578 :
579 4596045 : loops_state_set (LOOP_CLOSED_SSA);
580 4596045 : if (number_of_loops (cfun) <= 1)
581 : return;
582 :
583 : /* If the pass has caused the SSA form to be out-of-date, update it
584 : now. */
585 1039539 : if (update_flag != 0)
586 1039161 : update_ssa (update_flag);
587 378 : else if (flag_checking)
588 378 : verify_ssa (true, true);
589 :
590 1039539 : bitmap_obstack_initialize (&loop_renamer_obstack);
591 :
592 1039539 : names_to_rename = BITMAP_ALLOC (&loop_renamer_obstack);
593 :
594 : /* Uses of names to rename. We don't have to initialize this array,
595 : because we know that we will only have entries for the SSA names
596 : in NAMES_TO_RENAME. */
597 2079078 : use_blocks = XNEWVEC (bitmap, num_ssa_names);
598 1039539 : find_uses_to_rename (changed_bbs, use_blocks, names_to_rename, use_flags);
599 :
600 1039539 : if (!bitmap_empty_p (names_to_rename))
601 : {
602 720882 : bool release_recorded_exits_p = false;
603 720882 : if (!loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
604 : {
605 : /* Doing one scan over the whole function is cheaper than
606 : traversing the loop tree and gathering BBs of each loop. */
607 0 : record_loop_exits ();
608 0 : release_recorded_exits_p = true;
609 : }
610 :
611 : /* Add the PHI nodes on exits of the loops for the names we need to
612 : rewrite. When no variable required multiple LC PHI nodes to be
613 : inserted then we know that all uses outside of the loop are
614 : dominated by the single LC SSA definition and no further PHI
615 : node insertions are required. */
616 720882 : bool need_phis_p = add_exit_phis (names_to_rename, use_blocks);
617 :
618 720882 : if (release_recorded_exits_p)
619 0 : release_recorded_exits (cfun);
620 :
621 : /* Fix up all the names found to be used outside their original
622 : loops. */
623 1297967 : update_ssa (need_phis_p ? TODO_update_ssa : TODO_update_ssa_no_phi);
624 : }
625 :
626 1039539 : bitmap_obstack_release (&loop_renamer_obstack);
627 1039539 : free (use_blocks);
628 : }
629 :
630 : /* Rewrites the defs and uses into a loop closed ssa form.
631 : If CHANGED_BBS is not NULL, we look for uses outside loops only in the basic
632 : blocks in this set. UPDATE_FLAG is used in the call to update_ssa. See
633 : TODO_update_ssa* for documentation. */
634 :
635 : void
636 4596045 : rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag)
637 : {
638 4596045 : rewrite_into_loop_closed_ssa_1 (changed_bbs, update_flag, SSA_OP_ALL_USES);
639 4596045 : }
640 :
641 : /* Check invariants of the loop closed ssa form for the def in DEF_BB. */
642 :
643 : static void
644 173771258 : check_loop_closed_ssa_def (basic_block def_bb, tree def)
645 : {
646 173771258 : use_operand_p use_p;
647 173771258 : imm_use_iterator iterator;
648 685837556 : FOR_EACH_IMM_USE_FAST (use_p, iterator, def)
649 : {
650 338295040 : if (is_gimple_debug (USE_STMT (use_p)))
651 43994834 : continue;
652 :
653 294300206 : basic_block use_bb = gimple_bb (USE_STMT (use_p));
654 294300206 : if (is_a <gphi *> (USE_STMT (use_p)))
655 79358667 : use_bb = EDGE_PRED (use_bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
656 :
657 294300206 : gcc_assert (flow_bb_inside_loop_p (def_bb->loop_father, use_bb));
658 173771258 : }
659 173771258 : }
660 :
661 : /* Checks invariants of loop closed ssa form in BB. */
662 :
663 : static void
664 51063667 : check_loop_closed_ssa_bb (basic_block bb)
665 : {
666 92954798 : for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
667 41891131 : gsi_next (&bsi))
668 : {
669 41891131 : gphi *phi = bsi.phi ();
670 :
671 41891131 : check_loop_closed_ssa_def (bb, PHI_RESULT (phi));
672 : }
673 :
674 206852471 : for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); !gsi_end_p (bsi);
675 155788804 : gsi_next_nondebug (&bsi))
676 : {
677 155788804 : ssa_op_iter iter;
678 155788804 : tree var;
679 155788804 : gimple *stmt = gsi_stmt (bsi);
680 :
681 287668931 : FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_DEFS)
682 131880127 : check_loop_closed_ssa_def (bb, var);
683 : }
684 51063667 : }
685 :
686 : /* Checks that invariants of the loop closed ssa form are preserved.
687 : Call verify_ssa when VERIFY_SSA_P is true. Note all loops are checked
688 : if LOOP is NULL, otherwise, only LOOP is checked. */
689 :
690 : DEBUG_FUNCTION void
691 3905017 : verify_loop_closed_ssa (bool verify_ssa_p, class loop *loop)
692 : {
693 7810034 : if (number_of_loops (cfun) <= 1)
694 : return;
695 :
696 3905017 : timevar_push (TV_VERIFY_LOOP_CLOSED);
697 :
698 3905017 : if (loop == NULL)
699 : {
700 3903409 : basic_block bb;
701 :
702 3903409 : if (verify_ssa_p)
703 33663 : verify_ssa (false, true);
704 :
705 128468888 : FOR_EACH_BB_FN (bb, cfun)
706 124565479 : if (bb->loop_father && bb->loop_father->num > 0)
707 51055945 : check_loop_closed_ssa_bb (bb);
708 : }
709 : else
710 : {
711 1608 : basic_block *bbs = get_loop_body (loop);
712 :
713 : /* We do not have loop-local SSA verification so just
714 : check there's no update queued. */
715 1608 : if (verify_ssa_p)
716 1608 : gcc_assert (!need_ssa_update_p (cfun));
717 :
718 9330 : for (unsigned i = 0; i < loop->num_nodes; ++i)
719 7722 : check_loop_closed_ssa_bb (bbs[i]);
720 :
721 1608 : free (bbs);
722 : }
723 :
724 3905017 : timevar_pop (TV_VERIFY_LOOP_CLOSED);
725 : }
726 :
727 : /* Split loop exit edge EXIT. The things are a bit complicated by a need to
728 : preserve the loop closed ssa form. If COPY_CONSTANTS_P is true then
729 : forwarder PHIs are also created for constant arguments.
730 : The newly created block is returned. */
731 :
732 : basic_block
733 65041 : split_loop_exit_edge (edge exit, bool copy_constants_p)
734 : {
735 65041 : basic_block dest = exit->dest;
736 65041 : basic_block bb = split_edge (exit);
737 65041 : gphi *phi, *new_phi;
738 65041 : tree new_name, name;
739 65041 : use_operand_p op_p;
740 65041 : gphi_iterator psi;
741 65041 : location_t locus;
742 :
743 155004 : for (psi = gsi_start_phis (dest); !gsi_end_p (psi); gsi_next (&psi))
744 : {
745 89963 : phi = psi.phi ();
746 89963 : op_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (bb));
747 89963 : locus = gimple_phi_arg_location_from_edge (phi, single_succ_edge (bb));
748 :
749 89963 : name = USE_FROM_PTR (op_p);
750 :
751 : /* If the argument of the PHI node is a constant, we do not need
752 : to keep it inside loop. */
753 89963 : if (TREE_CODE (name) != SSA_NAME
754 2519 : && !copy_constants_p)
755 2380 : continue;
756 :
757 : /* Otherwise create an auxiliary phi node that will copy the value
758 : of the SSA name out of the loop. */
759 87583 : new_name = duplicate_ssa_name (PHI_RESULT (phi), NULL);
760 87583 : new_phi = create_phi_node (new_name, bb);
761 87583 : add_phi_arg (new_phi, name, exit, locus);
762 87583 : SET_USE (op_p, new_name);
763 : }
764 :
765 65041 : return bb;
766 : }
767 :
768 : /* Returns the basic block in that statements should be emitted for induction
769 : variables incremented at the end of the LOOP. */
770 :
771 : basic_block
772 23984386 : ip_end_pos (class loop *loop)
773 : {
774 23984386 : return loop->latch;
775 : }
776 :
777 : /* Returns the basic block in that statements should be emitted for induction
778 : variables incremented just before exit condition of a LOOP. */
779 :
780 : basic_block
781 49884599 : ip_normal_pos (class loop *loop)
782 : {
783 49884599 : basic_block bb;
784 49884599 : edge exit;
785 :
786 49935427 : if (!single_pred_p (loop->latch))
787 : return NULL;
788 :
789 49737055 : bb = single_pred (loop->latch);
790 99659956 : if (!safe_is_a <gcond *> (*gsi_last_bb (bb)))
791 : return NULL;
792 :
793 49724529 : exit = EDGE_SUCC (bb, 0);
794 49724529 : if (exit->dest == loop->latch)
795 37613600 : exit = EDGE_SUCC (bb, 1);
796 :
797 49724529 : if (flow_bb_inside_loop_p (loop, exit->dest))
798 : return NULL;
799 :
800 : return bb;
801 : }
802 :
803 : /* Stores the standard position for induction variable increment in LOOP
804 : (just before the exit condition if it is available and latch block is empty,
805 : end of the latch block otherwise) to BSI. INSERT_AFTER is set to true if
806 : the increment should be inserted after *BSI. */
807 :
808 : void
809 150169 : standard_iv_increment_position (class loop *loop, gimple_stmt_iterator *bsi,
810 : bool *insert_after)
811 : {
812 150169 : basic_block bb = ip_normal_pos (loop), latch = ip_end_pos (loop);
813 150169 : gimple *last = last_nondebug_stmt (latch);
814 :
815 150169 : if (!bb
816 150169 : || (last && gimple_code (last) != GIMPLE_LABEL))
817 : {
818 39 : *bsi = gsi_last_bb (latch);
819 39 : *insert_after = true;
820 : }
821 : else
822 : {
823 150130 : *bsi = gsi_last_bb (bb);
824 150130 : *insert_after = false;
825 : }
826 150169 : }
827 :
828 : /* Copies phi node arguments for duplicated blocks. The index of the first
829 : duplicated block is FIRST_NEW_BLOCK. */
830 :
831 : static void
832 148429 : copy_phi_node_args (unsigned first_new_block)
833 : {
834 148429 : unsigned i;
835 :
836 1286806 : for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
837 1138377 : BASIC_BLOCK_FOR_FN (cfun, i)->flags |= BB_DUPLICATED;
838 :
839 1286806 : for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
840 1138377 : add_phi_args_after_copy_bb (BASIC_BLOCK_FOR_FN (cfun, i));
841 :
842 1286806 : for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
843 1138377 : BASIC_BLOCK_FOR_FN (cfun, i)->flags &= ~BB_DUPLICATED;
844 148429 : }
845 :
846 :
847 : /* The same as cfgloopmanip.cc:duplicate_loop_body_to_header_edge, but also
848 : updates the PHI nodes at start of the copied region. In order to
849 : achieve this, only loops whose exits all lead to the same location
850 : are handled.
851 :
852 : Notice that we do not completely update the SSA web after
853 : duplication. The caller is responsible for calling update_ssa
854 : after the loop has been duplicated. */
855 :
856 : bool
857 148448 : gimple_duplicate_loop_body_to_header_edge (class loop *loop, edge e,
858 : unsigned int ndupl,
859 : sbitmap wont_exit, edge orig,
860 : vec<edge> *to_remove, int flags)
861 : {
862 148448 : unsigned first_new_block;
863 :
864 148448 : if (!loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
865 : return false;
866 148448 : if (!loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
867 : return false;
868 :
869 148448 : first_new_block = last_basic_block_for_fn (cfun);
870 148448 : if (!duplicate_loop_body_to_header_edge (loop, e, ndupl, wont_exit, orig,
871 : to_remove, flags))
872 : return false;
873 :
874 : /* Readd the removed phi args for e. */
875 148429 : flush_pending_stmts (e);
876 :
877 : /* Copy the phi node arguments. */
878 148429 : copy_phi_node_args (first_new_block);
879 :
880 148429 : scev_reset ();
881 :
882 148429 : return true;
883 : }
884 :
885 : /* Returns true if we can unroll LOOP FACTOR times. Number
886 : of iterations of the loop is returned in NITER. */
887 :
888 : bool
889 859 : can_unroll_loop_p (class loop *loop, unsigned factor,
890 : class tree_niter_desc *niter)
891 : {
892 859 : edge exit;
893 :
894 : /* Check whether unrolling is possible. We only want to unroll loops
895 : for that we are able to determine number of iterations. We also
896 : want to split the extra iterations of the loop from its end,
897 : therefore we require that the loop has precisely one
898 : exit. */
899 :
900 859 : exit = single_dom_exit (loop);
901 859 : if (!exit)
902 : return false;
903 :
904 838 : if (!number_of_iterations_exit (loop, exit, niter, false)
905 837 : || niter->cmp == ERROR_MARK
906 : /* Scalar evolutions analysis might have copy propagated
907 : the abnormal ssa names into these expressions, hence
908 : emitting the computations based on them during loop
909 : unrolling might create overlapping life ranges for
910 : them, and failures in out-of-ssa. */
911 835 : || contains_abnormal_ssa_name_p (niter->may_be_zero)
912 835 : || contains_abnormal_ssa_name_p (niter->control.base)
913 835 : || contains_abnormal_ssa_name_p (niter->control.step)
914 1673 : || contains_abnormal_ssa_name_p (niter->bound))
915 3 : return false;
916 :
917 : /* And of course, we must be able to duplicate the loop. */
918 835 : if (!can_duplicate_loop_p (loop))
919 : return false;
920 :
921 : /* The final loop should be small enough. */
922 835 : if (tree_num_loop_insns (loop, &eni_size_weights) * factor
923 835 : > (unsigned) param_max_unrolled_insns)
924 : return false;
925 :
926 : return true;
927 : }
928 :
929 : /* Determines the conditions that control execution of LOOP unrolled FACTOR
930 : times. DESC is number of iterations of LOOP. ENTER_COND is set to
931 : condition that must be true if the main loop can be entered.
932 : If the loop does not always iterate an exact multiple of FACTOR times,
933 : EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
934 : how the exit from the unrolled loop should be controlled. Otherwise,
935 : the trees are set to null and EXIT_CMP is set to ERROR_MARK. */
936 :
937 : static void
938 819 : determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
939 : unsigned factor, tree *enter_cond,
940 : tree *exit_base, tree *exit_step,
941 : enum tree_code *exit_cmp, tree *exit_bound)
942 : {
943 819 : gimple_seq stmts;
944 819 : tree base = desc->control.base;
945 819 : tree step = desc->control.step;
946 819 : tree bound = desc->bound;
947 819 : tree type = TREE_TYPE (step);
948 819 : tree bigstep, delta;
949 819 : tree min = lower_bound_in_type (type, type);
950 819 : tree max = upper_bound_in_type (type, type);
951 819 : enum tree_code cmp = desc->cmp;
952 819 : tree cond = boolean_true_node, assum;
953 :
954 : /* For pointers, do the arithmetics in the type of step. */
955 819 : base = fold_convert (type, base);
956 819 : bound = fold_convert (type, bound);
957 :
958 819 : *enter_cond = boolean_false_node;
959 819 : *exit_base = NULL_TREE;
960 819 : *exit_step = NULL_TREE;
961 819 : *exit_cmp = ERROR_MARK;
962 819 : *exit_bound = NULL_TREE;
963 819 : gcc_assert (cmp != ERROR_MARK);
964 :
965 : /* We only need to be correct when we answer question
966 : "Do at least FACTOR more iterations remain?" in the unrolled loop.
967 : Thus, transforming BASE + STEP * i <> BOUND to
968 : BASE + STEP * i < BOUND is ok. */
969 819 : if (cmp == NE_EXPR)
970 : {
971 84 : if (tree_int_cst_sign_bit (step))
972 : cmp = GT_EXPR;
973 : else
974 771 : cmp = LT_EXPR;
975 : }
976 735 : else if (cmp == LT_EXPR)
977 : {
978 733 : gcc_assert (!tree_int_cst_sign_bit (step));
979 : }
980 2 : else if (cmp == GT_EXPR)
981 : {
982 2 : gcc_assert (tree_int_cst_sign_bit (step));
983 : }
984 : else
985 0 : gcc_unreachable ();
986 :
987 : /* The main body of the loop may be entered iff:
988 :
989 : 1) desc->may_be_zero is false.
990 : 2) it is possible to check that there are at least FACTOR iterations
991 : of the loop, i.e., BOUND - step * FACTOR does not overflow.
992 : 3) # of iterations is at least FACTOR */
993 :
994 819 : if (!integer_zerop (desc->may_be_zero))
995 6 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
996 : invert_truthvalue (desc->may_be_zero),
997 : cond);
998 :
999 819 : bigstep = fold_build2 (MULT_EXPR, type, step,
1000 : build_int_cst_type (type, factor));
1001 819 : delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
1002 819 : if (cmp == LT_EXPR)
1003 771 : assum = fold_build2 (GE_EXPR, boolean_type_node,
1004 : bound,
1005 : fold_build2 (PLUS_EXPR, type, min, delta));
1006 : else
1007 48 : assum = fold_build2 (LE_EXPR, boolean_type_node,
1008 : bound,
1009 : fold_build2 (PLUS_EXPR, type, max, delta));
1010 819 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
1011 :
1012 819 : bound = fold_build2 (MINUS_EXPR, type, bound, delta);
1013 819 : assum = fold_build2 (cmp, boolean_type_node, base, bound);
1014 819 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
1015 :
1016 819 : if (integer_nonzerop (cond)
1017 819 : && integer_zerop (desc->may_be_zero))
1018 : {
1019 : /* Convert the latch count to an iteration count. */
1020 77 : tree niter = fold_build2 (PLUS_EXPR, type, desc->niter,
1021 : build_one_cst (type));
1022 77 : if (multiple_of_p (type, niter, build_int_cst (type, factor)))
1023 30 : return;
1024 : }
1025 :
1026 789 : cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
1027 789 : if (stmts)
1028 17 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1029 : /* cond now may be a gimple comparison, which would be OK, but also any
1030 : other gimple rhs (say a && b). In this case we need to force it to
1031 : operand. */
1032 789 : if (!is_gimple_condexpr_for_cond (cond))
1033 : {
1034 14 : cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
1035 14 : if (stmts)
1036 14 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1037 : }
1038 789 : *enter_cond = cond;
1039 :
1040 789 : base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
1041 789 : if (stmts)
1042 29 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1043 789 : bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
1044 789 : if (stmts)
1045 709 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1046 :
1047 789 : *exit_base = base;
1048 789 : *exit_step = bigstep;
1049 789 : *exit_cmp = cmp;
1050 789 : *exit_bound = bound;
1051 : }
1052 :
1053 : /* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge
1054 : whose source block dominates the latch. DESC describes the number of
1055 : iterations of LOOP.
1056 :
1057 : If N is number of iterations of the loop and MAY_BE_ZERO is the condition
1058 : under that loop exits in the first iteration even if N != 0,
1059 :
1060 : while (1)
1061 : {
1062 : x = phi (init, next);
1063 :
1064 : pre;
1065 : if (st)
1066 : break;
1067 : post;
1068 : }
1069 :
1070 : becomes (with possibly the exit conditions formulated a bit differently,
1071 : avoiding the need to create a new iv):
1072 :
1073 : if (MAY_BE_ZERO || N < FACTOR)
1074 : goto rest;
1075 :
1076 : do
1077 : {
1078 : x = phi (init, next);
1079 :
1080 : pre;
1081 : post;
1082 : pre;
1083 : post;
1084 : ...
1085 : pre;
1086 : post;
1087 : N -= FACTOR;
1088 :
1089 : } while (N >= FACTOR);
1090 :
1091 : rest:
1092 : init' = phi (init, x);
1093 :
1094 : while (1)
1095 : {
1096 : x = phi (init', next);
1097 :
1098 : pre;
1099 : if (st)
1100 : break;
1101 : post;
1102 : }
1103 :
1104 : Before the loop is unrolled, TRANSFORM is called for it (only for the
1105 : unrolled loop, but not for its versioned copy). DATA is passed to
1106 : TRANSFORM. */
1107 :
1108 : /* Probability in % that the unrolled loop is entered. Just a guess. */
1109 : #define PROB_UNROLLED_LOOP_ENTERED 90
1110 :
1111 : void
1112 819 : tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
1113 : class tree_niter_desc *desc,
1114 : transform_callback transform,
1115 : void *data)
1116 : {
1117 819 : unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1118 :
1119 819 : enum tree_code exit_cmp;
1120 819 : tree enter_main_cond, exit_base, exit_step, exit_bound;
1121 819 : bool flat = maybe_flat_loop_profile (loop);
1122 819 : determine_exit_conditions (loop, desc, factor,
1123 : &enter_main_cond, &exit_base, &exit_step,
1124 : &exit_cmp, &exit_bound);
1125 819 : bool single_loop_p = !exit_base;
1126 :
1127 819 : gcond *exit_if = nullptr;
1128 819 : class loop *new_loop = nullptr;
1129 819 : edge new_exit;
1130 819 : if (!single_loop_p)
1131 : {
1132 789 : profile_count entry_count = loop_preheader_edge (loop)->src->count;
1133 : /* Let us assume that the unrolled loop is quite likely to be entered. */
1134 789 : profile_probability prob_entry;
1135 789 : if (integer_nonzerop (enter_main_cond))
1136 47 : prob_entry = profile_probability::always ();
1137 : else
1138 742 : prob_entry = profile_probability::guessed_always ()
1139 742 : .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
1140 :
1141 :
1142 : /* The values for scales should keep profile consistent, and somewhat
1143 : close to correct. */
1144 789 : new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
1145 : prob_entry.invert (),
1146 : prob_entry,
1147 : /* We will later redirect exit from vectorized
1148 : loop to new_loop. */
1149 : profile_probability::always (),
1150 : true);
1151 789 : gcc_assert (new_loop != NULL);
1152 789 : update_ssa (TODO_update_ssa_no_phi);
1153 :
1154 : /* Prepare the cfg and update the phi nodes. Move the loop exit to the
1155 : loop latch (and make its condition dummy, for the moment). */
1156 789 : basic_block rest = loop_preheader_edge (new_loop)->src;
1157 789 : edge precond_edge = single_pred_edge (rest);
1158 789 : split_edge (loop_latch_edge (loop));
1159 789 : basic_block exit_bb = single_pred (loop->latch);
1160 789 : edge exit = single_dom_exit (loop);
1161 :
1162 : /* Since the exit edge will be removed, the frequency of all the blocks
1163 : in the loop that are dominated by it must be scaled. */
1164 789 : if (exit->probability.initialized_p ())
1165 789 : scale_dominated_blocks_in_loop (loop, exit->src,
1166 : /* We are scaling up here so
1167 : probability does not fit. */
1168 789 : exit->src->count,
1169 1578 : exit->src->count - exit->count ());
1170 :
1171 789 : gimple_stmt_iterator bsi = gsi_last_bb (exit_bb);
1172 789 : exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
1173 : integer_zero_node,
1174 : NULL_TREE, NULL_TREE);
1175 :
1176 789 : gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT);
1177 789 : new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
1178 789 : rescan_loop_exit (new_exit, true, false);
1179 :
1180 : /* Set the probability of new exit to the same of the old one. Fix
1181 : the count of the latch block. */
1182 789 : new_exit->probability = exit->probability;
1183 789 : edge new_nonexit = single_pred_edge (loop->latch);
1184 789 : new_nonexit->probability = exit->probability.invert ();
1185 789 : new_nonexit->flags = EDGE_TRUE_VALUE;
1186 789 : set_edge_probability_and_rescale_others
1187 789 : (exit, profile_probability::never ());
1188 789 : loop->latch->count = new_nonexit->count ();
1189 :
1190 789 : edge old_entry = loop_preheader_edge (loop);
1191 789 : edge new_entry = loop_preheader_edge (new_loop);
1192 789 : edge old_latch = loop_latch_edge (loop);
1193 789 : for (gphi_iterator psi_old_loop = gsi_start_phis (loop->header),
1194 789 : psi_new_loop = gsi_start_phis (new_loop->header);
1195 2460 : !gsi_end_p (psi_old_loop);
1196 1671 : gsi_next (&psi_old_loop), gsi_next (&psi_new_loop))
1197 : {
1198 1671 : gphi *phi_old_loop = psi_old_loop.phi ();
1199 1671 : gphi *phi_new_loop = psi_new_loop.phi ();
1200 :
1201 1671 : tree init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
1202 1671 : use_operand_p op
1203 1671 : = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
1204 1671 : gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
1205 1671 : tree next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
1206 :
1207 : /* Prefer using original variable as a base for the new ssa name.
1208 : This is necessary for virtual ops, and useful in order to avoid
1209 : losing debug info for real ops. */
1210 1671 : tree new_init;
1211 1671 : if (TREE_CODE (next) == SSA_NAME
1212 3342 : && useless_type_conversion_p (TREE_TYPE (next),
1213 1671 : TREE_TYPE (init)))
1214 1671 : new_init = copy_ssa_name (next);
1215 0 : else if (TREE_CODE (init) == SSA_NAME
1216 0 : && useless_type_conversion_p (TREE_TYPE (init),
1217 0 : TREE_TYPE (next)))
1218 0 : new_init = copy_ssa_name (init);
1219 0 : else if (useless_type_conversion_p (TREE_TYPE (next),
1220 0 : TREE_TYPE (init)))
1221 0 : new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
1222 : "unrinittmp");
1223 : else
1224 0 : new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
1225 : "unrinittmp");
1226 :
1227 1671 : gphi *phi_rest = create_phi_node (new_init, rest);
1228 1671 : add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION);
1229 1671 : add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION);
1230 1671 : SET_USE (op, new_init);
1231 : }
1232 :
1233 789 : remove_path (exit);
1234 : /* We will later redirect exit from vectorized loop to new_loop. */
1235 789 : loop_preheader_edge (new_loop)->src->count = entry_count;
1236 :
1237 : /* The epilog loop latch executes at most factor - 1 times.
1238 : Since the epilog is entered unconditionally it will need to handle
1239 : up to factor executions of its body. */
1240 789 : new_loop->any_upper_bound = true;
1241 789 : new_loop->nb_iterations_upper_bound = factor - 1;
1242 : /* We do not really know estimate on number of iterations, since we do not
1243 : track any estimates modulo unroll factor.
1244 : Drop estimate from loop_info and scale loop profile.
1245 : It may be more realistic to scale loop profile to factor / 2 - 1,
1246 : but vectorizer also uses factor - 1. */
1247 789 : new_loop->any_estimate = false;
1248 789 : scale_loop_profile (new_loop, profile_probability::always (), factor - 1);
1249 : }
1250 : else
1251 30 : new_exit = single_dom_exit (loop);
1252 :
1253 : /* Transform the loop. */
1254 819 : if (transform)
1255 728 : (*transform) (loop, data);
1256 :
1257 : /* Unroll the loop and remove the exits in all iterations except for the
1258 : last one. */
1259 819 : auto_sbitmap wont_exit (factor);
1260 819 : bitmap_ones (wont_exit);
1261 819 : bitmap_clear_bit (wont_exit, factor - 1);
1262 :
1263 819 : auto_vec<edge> to_remove;
1264 819 : bool ok
1265 : = gimple_duplicate_loop_body_to_header_edge
1266 891 : (loop, loop_latch_edge (loop), factor - 1, wont_exit,
1267 : new_exit, &to_remove,
1268 : DLTHE_FLAG_UPDATE_FREQ | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
1269 819 : gcc_assert (ok);
1270 :
1271 3531 : for (edge e : to_remove)
1272 : {
1273 1074 : ok = remove_path (e);
1274 1074 : gcc_assert (ok);
1275 : }
1276 819 : update_ssa (TODO_update_ssa);
1277 :
1278 819 : new_exit = single_dom_exit (loop);
1279 : /* gimple_duplicate_loop_body_to_header_edge depending on
1280 : DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
1281 : or scales it down accordingly.
1282 : However exit edge probability is kept as original. Fix it if needed
1283 : and compensate. */
1284 819 : update_loop_exit_probability_scale_dom_bbs (loop, new_exit);
1285 819 : if (!single_loop_p)
1286 : {
1287 : /* Finally create the new counter for number of iterations and add
1288 : the new exit instruction. */
1289 789 : tree ctr_before, ctr_after;
1290 789 : gimple_stmt_iterator bsi = gsi_last_nondebug_bb (new_exit->src);
1291 789 : exit_if = as_a <gcond *> (gsi_stmt (bsi));
1292 789 : create_iv (exit_base, PLUS_EXPR, exit_step, NULL_TREE, loop,
1293 : &bsi, false, &ctr_before, &ctr_after);
1294 789 : gimple_cond_set_code (exit_if, exit_cmp);
1295 789 : gimple_cond_set_lhs (exit_if, ctr_after);
1296 789 : gimple_cond_set_rhs (exit_if, exit_bound);
1297 789 : update_stmt (exit_if);
1298 : }
1299 819 : if (loop->any_upper_bound)
1300 1616 : loop->nb_iterations_upper_bound = wi::udiv_floor
1301 808 : (loop->nb_iterations_upper_bound + 1, factor) - 1;
1302 819 : if (loop->any_likely_upper_bound)
1303 1616 : loop->nb_iterations_likely_upper_bound = wi::udiv_floor
1304 808 : (loop->nb_iterations_likely_upper_bound + 1, factor) - 1;
1305 819 : if (loop->any_estimate)
1306 172 : loop->nb_iterations_estimate = wi::udiv_floor
1307 86 : (loop->nb_iterations_estimate + 1, factor) - 1;
1308 :
1309 819 : checking_verify_flow_info ();
1310 819 : checking_verify_loop_structure ();
1311 819 : checking_verify_loop_closed_ssa (true, loop);
1312 819 : if (new_loop)
1313 1608 : checking_verify_loop_closed_ssa (true, new_loop);
1314 819 : }
1315 :
1316 : /* Wrapper over tree_transform_and_unroll_loop for case we do not
1317 : want to transform the loop before unrolling. The meaning
1318 : of the arguments is the same as for tree_transform_and_unroll_loop. */
1319 :
1320 : void
1321 91 : tree_unroll_loop (class loop *loop, unsigned factor,
1322 : class tree_niter_desc *desc)
1323 : {
1324 91 : tree_transform_and_unroll_loop (loop, factor, desc, NULL, NULL);
1325 91 : }
1326 :
1327 : /* Rewrite the phi node at position PSI in function of the main
1328 : induction variable MAIN_IV and insert the generated code at GSI. */
1329 :
1330 : static void
1331 2038 : rewrite_phi_with_iv (loop_p loop,
1332 : gphi_iterator *psi,
1333 : gimple_stmt_iterator *gsi,
1334 : tree main_iv)
1335 : {
1336 2038 : affine_iv iv;
1337 2038 : gassign *stmt;
1338 2038 : gphi *phi = psi->phi ();
1339 2038 : tree atype, mtype, val, res = PHI_RESULT (phi);
1340 :
1341 4076 : if (virtual_operand_p (res) || res == main_iv)
1342 : {
1343 1160 : gsi_next (psi);
1344 1243 : return;
1345 : }
1346 :
1347 878 : if (!simple_iv (loop, loop, res, &iv, true))
1348 : {
1349 83 : gsi_next (psi);
1350 83 : return;
1351 : }
1352 :
1353 795 : remove_phi_node (psi, false);
1354 :
1355 795 : atype = TREE_TYPE (res);
1356 795 : mtype = POINTER_TYPE_P (atype) ? sizetype : atype;
1357 795 : val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step),
1358 : fold_convert (mtype, main_iv));
1359 1588 : val = fold_build2 (POINTER_TYPE_P (atype)
1360 : ? POINTER_PLUS_EXPR : PLUS_EXPR,
1361 : atype, unshare_expr (iv.base), val);
1362 795 : val = force_gimple_operand_gsi (gsi, val, false, NULL_TREE, true,
1363 : GSI_SAME_STMT);
1364 795 : stmt = gimple_build_assign (res, val);
1365 795 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1366 : }
1367 :
1368 : /* Rewrite all the phi nodes of LOOP in function of the main induction
1369 : variable MAIN_IV. */
1370 :
1371 : static void
1372 583 : rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv)
1373 : {
1374 583 : unsigned i;
1375 583 : basic_block *bbs = get_loop_body_in_dom_order (loop);
1376 583 : gphi_iterator psi;
1377 :
1378 2064 : for (i = 0; i < loop->num_nodes; i++)
1379 : {
1380 1481 : basic_block bb = bbs[i];
1381 1481 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
1382 :
1383 1481 : if (bb->loop_father != loop)
1384 97 : continue;
1385 :
1386 3422 : for (psi = gsi_start_phis (bb); !gsi_end_p (psi); )
1387 2038 : rewrite_phi_with_iv (loop, &psi, &gsi, main_iv);
1388 : }
1389 :
1390 583 : free (bbs);
1391 583 : }
1392 :
1393 : /* Bases all the induction variables in LOOP on a single induction variable
1394 : (with base 0 and step 1), whose final value is compared with *NIT. When the
1395 : IV type precision has to be larger than *NIT type precision, *NIT is
1396 : converted to the larger type, the conversion code is inserted before the
1397 : loop, and *NIT is updated to the new definition. When BUMP_IN_LATCH is true,
1398 : the induction variable is incremented in the loop latch, otherwise it is
1399 : incremented in the loop header. Return the induction variable that was
1400 : created. */
1401 :
1402 : tree
1403 583 : canonicalize_loop_ivs (class loop *loop, tree *nit, bool bump_in_latch)
1404 : {
1405 583 : unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit));
1406 583 : unsigned original_precision = precision;
1407 583 : tree type, var_before;
1408 583 : gimple_stmt_iterator gsi;
1409 583 : gphi_iterator psi;
1410 583 : gcond *stmt;
1411 583 : edge exit = single_dom_exit (loop);
1412 583 : gimple_seq stmts;
1413 583 : bool unsigned_p = false;
1414 :
1415 583 : for (psi = gsi_start_phis (loop->header);
1416 1934 : !gsi_end_p (psi); gsi_next (&psi))
1417 : {
1418 1351 : gphi *phi = psi.phi ();
1419 1351 : tree res = PHI_RESULT (phi);
1420 1351 : bool uns;
1421 :
1422 1351 : type = TREE_TYPE (res);
1423 1351 : if (virtual_operand_p (res)
1424 857 : || (!INTEGRAL_TYPE_P (type)
1425 21 : && !POINTER_TYPE_P (type))
1426 2193 : || TYPE_PRECISION (type) < precision)
1427 531 : continue;
1428 :
1429 820 : uns = POINTER_TYPE_P (type) | TYPE_UNSIGNED (type);
1430 :
1431 820 : if (TYPE_PRECISION (type) > precision)
1432 : unsigned_p = uns;
1433 : else
1434 815 : unsigned_p |= uns;
1435 :
1436 820 : precision = TYPE_PRECISION (type);
1437 : }
1438 :
1439 583 : scalar_int_mode mode = smallest_int_mode_for_size (precision).require ();
1440 583 : precision = GET_MODE_PRECISION (mode);
1441 583 : type = build_nonstandard_integer_type (precision, unsigned_p);
1442 :
1443 583 : if (original_precision != precision
1444 583 : || TYPE_UNSIGNED (TREE_TYPE (*nit)) != unsigned_p)
1445 : {
1446 301 : *nit = fold_convert (type, *nit);
1447 301 : *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE);
1448 301 : if (stmts)
1449 115 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1450 : }
1451 :
1452 583 : if (bump_in_latch)
1453 1166 : gsi = gsi_last_bb (loop->latch);
1454 : else
1455 0 : gsi = gsi_last_nondebug_bb (loop->header);
1456 583 : create_iv (build_int_cst_type (type, 0), PLUS_EXPR, build_int_cst (type, 1),
1457 : NULL_TREE, loop, &gsi, bump_in_latch, &var_before, NULL);
1458 :
1459 583 : rewrite_all_phi_nodes_with_iv (loop, var_before);
1460 :
1461 1166 : stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
1462 : /* Make the loop exit if the control condition is not satisfied. */
1463 583 : if (exit->flags & EDGE_TRUE_VALUE)
1464 : {
1465 100 : edge te, fe;
1466 :
1467 100 : extract_true_false_edges_from_block (exit->src, &te, &fe);
1468 100 : te->flags = EDGE_FALSE_VALUE;
1469 100 : fe->flags = EDGE_TRUE_VALUE;
1470 : }
1471 583 : gimple_cond_set_code (stmt, LT_EXPR);
1472 583 : gimple_cond_set_lhs (stmt, var_before);
1473 583 : gimple_cond_set_rhs (stmt, *nit);
1474 583 : update_stmt (stmt);
1475 :
1476 583 : return var_before;
1477 : }
|