Branch data Line data Source code
1 : : /* High-level loop manipulation functions.
2 : : Copyright (C) 2004-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it
7 : : under the terms of the GNU General Public License as published by the
8 : : Free Software Foundation; either version 3, or (at your option) any
9 : : later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "tree.h"
25 : : #include "gimple.h"
26 : : #include "cfghooks.h"
27 : : #include "tree-pass.h" /* ??? for TODO_update_ssa but this isn't a pass. */
28 : : #include "ssa.h"
29 : : #include "gimple-pretty-print.h"
30 : : #include "fold-const.h"
31 : : #include "cfganal.h"
32 : : #include "gimplify.h"
33 : : #include "gimple-iterator.h"
34 : : #include "gimplify-me.h"
35 : : #include "tree-cfg.h"
36 : : #include "tree-ssa-loop-ivopts.h"
37 : : #include "tree-ssa-loop-manip.h"
38 : : #include "tree-ssa-loop-niter.h"
39 : : #include "tree-ssa-loop.h"
40 : : #include "tree-into-ssa.h"
41 : : #include "tree-ssa.h"
42 : : #include "cfgloop.h"
43 : : #include "tree-scalar-evolution.h"
44 : : #include "tree-inline.h"
45 : :
46 : : /* All bitmaps for rewriting into loop-closed SSA go on this obstack,
47 : : so that we can free them all at once. */
48 : : static bitmap_obstack loop_renamer_obstack;
49 : :
50 : : /* Insert IV increment statements STMTS before or after INCR_POS;
51 : : AFTER selects which. INCR_POS and AFTER can be computed using
52 : : standard_iv_increment_position. */
53 : :
54 : : void
55 : 936860 : insert_iv_increment (gimple_stmt_iterator *incr_pos, bool after,
56 : : gimple_seq stmts)
57 : : {
58 : : /* Prevent the increment from inheriting a bogus location if it is not put
59 : : immediately after a statement whose location is known. */
60 : 936860 : if (after)
61 : : {
62 : 10831 : gimple_stmt_iterator gsi = *incr_pos;
63 : 10831 : if (!gsi_end_p (gsi))
64 : 9364 : gsi_next_nondebug (&gsi);
65 : 10831 : if (gsi_end_p (gsi))
66 : : {
67 : 10831 : edge e = single_succ_edge (gsi_bb (*incr_pos));
68 : 10831 : gimple_seq_set_location (stmts, e->goto_locus);
69 : : }
70 : 10831 : gsi_insert_seq_after (incr_pos, stmts, GSI_NEW_STMT);
71 : : }
72 : : else
73 : : {
74 : 926029 : gimple_stmt_iterator gsi = *incr_pos;
75 : 926029 : if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
76 : 0 : gsi_next_nondebug (&gsi);
77 : 926029 : if (!gsi_end_p (gsi))
78 : 925556 : gimple_seq_set_location (stmts, gimple_location (gsi_stmt (gsi)));
79 : 926029 : gsi_insert_seq_before (incr_pos, stmts, GSI_NEW_STMT);
80 : : }
81 : 936860 : }
82 : :
83 : : /* Creates an induction variable with value BASE (+/-) STEP * iteration in LOOP.
84 : : If INCR_OP is PLUS_EXPR, the induction variable is BASE + STEP * iteration.
85 : : If INCR_OP is MINUS_EXPR, the induction variable is BASE - STEP * iteration.
86 : : It is expected that neither BASE nor STEP are shared with other expressions
87 : : (unless the sharing rules allow this). Use VAR as a base var_decl for it
88 : : (if NULL, a new temporary will be created). The increment will occur at
89 : : INCR_POS (after it if AFTER is true, before it otherwise). INCR_POS and
90 : : AFTER can be computed using standard_iv_increment_position. The ssa versions
91 : : of the variable before and after increment will be stored in VAR_BEFORE and
92 : : VAR_AFTER (unless they are NULL). */
93 : :
94 : : void
95 : 921532 : create_iv (tree base, tree_code incr_op, tree step, tree var, class loop *loop,
96 : : gimple_stmt_iterator *incr_pos, bool after, tree *var_before,
97 : : tree *var_after)
98 : : {
99 : 921532 : gphi *phi;
100 : 921532 : tree initial, step1;
101 : 921532 : gimple_seq stmts;
102 : 921532 : tree vb, va;
103 : 921532 : gcc_assert (incr_op == PLUS_EXPR || incr_op == MINUS_EXPR);
104 : 921532 : edge pe = loop_preheader_edge (loop);
105 : :
106 : 921532 : if (var != NULL_TREE)
107 : : {
108 : 567049 : vb = make_ssa_name (var);
109 : 567049 : va = make_ssa_name (var);
110 : : }
111 : : else
112 : : {
113 : 354483 : vb = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
114 : 354483 : va = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
115 : : }
116 : 921532 : if (var_before)
117 : 629897 : *var_before = vb;
118 : 921532 : if (var_after)
119 : 915055 : *var_after = va;
120 : :
121 : : /* For easier readability of the created code, produce MINUS_EXPRs
122 : : when suitable. */
123 : 921532 : if (TREE_CODE (step) == INTEGER_CST)
124 : : {
125 : 866651 : if (TYPE_UNSIGNED (TREE_TYPE (step)))
126 : : {
127 : 865142 : step1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
128 : 865142 : if (tree_int_cst_lt (step1, step))
129 : : {
130 : 306147 : incr_op = (incr_op == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR);
131 : : step = step1;
132 : : }
133 : : }
134 : : else
135 : : {
136 : 1509 : bool ovf;
137 : :
138 : 1509 : if (!tree_expr_nonnegative_warnv_p (step, &ovf)
139 : 1509 : && may_negate_without_overflow_p (step))
140 : : {
141 : 13 : incr_op = (incr_op == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR);
142 : 13 : step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
143 : : }
144 : : }
145 : : }
146 : 921532 : if (POINTER_TYPE_P (TREE_TYPE (base)))
147 : : {
148 : 122493 : if (TREE_CODE (base) == ADDR_EXPR)
149 : 45018 : mark_addressable (TREE_OPERAND (base, 0));
150 : 122493 : step = convert_to_ptrofftype (step);
151 : 122493 : if (incr_op == MINUS_EXPR)
152 : 2909 : step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
153 : : incr_op = POINTER_PLUS_EXPR;
154 : : }
155 : : /* Gimplify the step if necessary. We put the computations in front of the
156 : : loop (i.e. the step should be loop invariant). */
157 : 921532 : step = force_gimple_operand (step, &stmts, true, NULL_TREE);
158 : 921532 : if (stmts)
159 : 46821 : gsi_insert_seq_on_edge_immediate (pe, stmts);
160 : :
161 : 921532 : gimple_seq incr_stmts = nullptr;
162 : 921532 : gimple_seq_add_stmt (&incr_stmts,
163 : 921532 : gimple_build_assign (va, incr_op, vb, step));
164 : 921532 : insert_iv_increment (incr_pos, after, incr_stmts);
165 : :
166 : 921532 : initial = force_gimple_operand (base, &stmts, true, var);
167 : 921532 : if (stmts)
168 : 240415 : gsi_insert_seq_on_edge_immediate (pe, stmts);
169 : :
170 : 921532 : phi = create_phi_node (vb, loop->header);
171 : 921532 : add_phi_arg (phi, initial, loop_preheader_edge (loop), UNKNOWN_LOCATION);
172 : 921532 : add_phi_arg (phi, va, loop_latch_edge (loop), UNKNOWN_LOCATION);
173 : 921532 : }
174 : :
175 : : /* Return the innermost superloop LOOP of USE_LOOP that is a superloop of
176 : : both DEF_LOOP and USE_LOOP. */
177 : :
178 : : static inline class loop *
179 : 136920 : find_sibling_superloop (class loop *use_loop, class loop *def_loop)
180 : : {
181 : 136920 : unsigned ud = loop_depth (use_loop);
182 : 136920 : unsigned dd = loop_depth (def_loop);
183 : 136920 : gcc_assert (ud > 0 && dd > 0);
184 : 136920 : if (ud > dd)
185 : 16911 : use_loop = superloop_at_depth (use_loop, dd);
186 : 136920 : if (ud < dd)
187 : 26123 : def_loop = superloop_at_depth (def_loop, ud);
188 : 139430 : while (loop_outer (use_loop) != loop_outer (def_loop))
189 : : {
190 : 2510 : use_loop = loop_outer (use_loop);
191 : 2510 : def_loop = loop_outer (def_loop);
192 : 2510 : gcc_assert (use_loop && def_loop);
193 : : }
194 : 136920 : return use_loop;
195 : : }
196 : :
197 : : /* DEF_BB is a basic block containing a DEF that needs rewriting into
198 : : loop-closed SSA form. USE_BLOCKS is the set of basic blocks containing
199 : : uses of DEF that "escape" from the loop containing DEF_BB (i.e. blocks in
200 : : USE_BLOCKS are dominated by DEF_BB but not in the loop father of DEF_BB).
201 : : ALL_EXITS[I] is the set of all basic blocks that exit loop I.
202 : : DEF_LOOP_EXITS is a bitmap of loop exit blocks that exit the loop
203 : : containing DEF_BB or its outer loops.
204 : :
205 : : Compute the subset of loop exit destinations that exit the loop
206 : : containing DEF_BB or one of its loop fathers, in which DEF is live.
207 : : This set is returned in the bitmap LIVE_EXITS.
208 : :
209 : : Instead of computing the complete livein set of the def, we use the loop
210 : : nesting tree as a form of poor man's structure analysis. This greatly
211 : : speeds up the analysis, which is important because this function may be
212 : : called on all SSA names that need rewriting, one at a time. */
213 : :
214 : : static void
215 : 3087469 : compute_live_loop_exits (bitmap live_exits, bitmap use_blocks,
216 : : basic_block def_bb, bitmap def_loop_exits)
217 : : {
218 : 3087469 : unsigned i;
219 : 3087469 : bitmap_iterator bi;
220 : 3087469 : class loop *def_loop = def_bb->loop_father;
221 : 3087469 : unsigned def_loop_depth = loop_depth (def_loop);
222 : :
223 : : /* Normally the work list size is bounded by the number of basic
224 : : blocks in the largest loop. We don't know this number, but we
225 : : can be fairly sure that it will be relatively small. */
226 : 3087469 : auto_vec<basic_block, 8> worklist (MAX (8, n_basic_blocks_for_fn (cfun) / 128));
227 : :
228 : 7448495 : EXECUTE_IF_SET_IN_BITMAP (use_blocks, 0, i, bi)
229 : : {
230 : 4361026 : basic_block use_bb = BASIC_BLOCK_FOR_FN (cfun, i);
231 : 4361026 : class loop *use_loop = use_bb->loop_father;
232 : 4361026 : gcc_checking_assert (def_loop != use_loop
233 : : && ! flow_loop_nested_p (def_loop, use_loop));
234 : 4361026 : if (! flow_loop_nested_p (use_loop, def_loop))
235 : 114089 : use_bb = find_sibling_superloop (use_loop, def_loop)->header;
236 : 4361026 : if (bitmap_set_bit (live_exits, use_bb->index))
237 : 4325207 : worklist.safe_push (use_bb);
238 : : }
239 : :
240 : : /* Iterate until the worklist is empty. */
241 : 9930372 : while (! worklist.is_empty ())
242 : : {
243 : 6842903 : edge e;
244 : 6842903 : edge_iterator ei;
245 : :
246 : : /* Pull a block off the worklist. */
247 : 6842903 : basic_block bb = worklist.pop ();
248 : :
249 : : /* Make sure we have at least enough room in the work list
250 : : for all predecessors of this block. */
251 : 6842903 : worklist.reserve (EDGE_COUNT (bb->preds));
252 : :
253 : : /* For each predecessor block. */
254 : 14769145 : FOR_EACH_EDGE (e, ei, bb->preds)
255 : : {
256 : 7926242 : basic_block pred = e->src;
257 : 7926242 : class loop *pred_loop = pred->loop_father;
258 : 7926242 : unsigned pred_loop_depth = loop_depth (pred_loop);
259 : 7926242 : bool pred_visited;
260 : :
261 : : /* We should have met DEF_BB along the way. */
262 : 7926242 : gcc_assert (pred != ENTRY_BLOCK_PTR_FOR_FN (cfun));
263 : :
264 : 7926242 : if (pred_loop_depth >= def_loop_depth)
265 : : {
266 : 4699027 : if (pred_loop_depth > def_loop_depth)
267 : 151106 : pred_loop = superloop_at_depth (pred_loop, def_loop_depth);
268 : : /* If we've reached DEF_LOOP, our train ends here. */
269 : 4699027 : if (pred_loop == def_loop)
270 : 5408546 : continue;
271 : : }
272 : 3227215 : else if (! flow_loop_nested_p (pred_loop, def_loop))
273 : 22831 : pred = find_sibling_superloop (pred_loop, def_loop)->header;
274 : :
275 : : /* Add PRED to the LIVEIN set. PRED_VISITED is true if
276 : : we had already added PRED to LIVEIN before. */
277 : 4129308 : pred_visited = !bitmap_set_bit (live_exits, pred->index);
278 : :
279 : : /* If we have visited PRED before, don't add it to the worklist.
280 : : If BB dominates PRED, then we're probably looking at a loop.
281 : : We're only interested in looking up in the dominance tree
282 : : because DEF_BB dominates all the uses. */
283 : 4129308 : if (pred_visited || dominated_by_p (CDI_DOMINATORS, pred, bb))
284 : 1611612 : continue;
285 : :
286 : 2517696 : worklist.quick_push (pred);
287 : : }
288 : : }
289 : :
290 : 3087469 : bitmap_and_into (live_exits, def_loop_exits);
291 : 3087469 : }
292 : :
293 : : /* Add a loop-closing PHI for VAR in basic block EXIT. */
294 : :
295 : : static void
296 : 3737366 : add_exit_phi (basic_block exit, tree var)
297 : : {
298 : 3737366 : gphi *phi;
299 : 3737366 : edge e;
300 : 3737366 : edge_iterator ei;
301 : :
302 : : /* Check that at least one of the edges entering the EXIT block exits
303 : : the loop, or a superloop of that loop, that VAR is defined in. */
304 : 3737366 : if (flag_checking)
305 : : {
306 : 3737264 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
307 : 3737264 : basic_block def_bb = gimple_bb (def_stmt);
308 : 3747092 : FOR_EACH_EDGE (e, ei, exit->preds)
309 : : {
310 : 7494184 : class loop *aloop = find_common_loop (def_bb->loop_father,
311 : 3747092 : e->src->loop_father);
312 : 3747092 : if (!flow_bb_inside_loop_p (aloop, e->dest))
313 : : break;
314 : : }
315 : 3737264 : gcc_assert (e);
316 : : }
317 : :
318 : 3737366 : phi = create_phi_node (NULL_TREE, exit);
319 : 3737366 : create_new_def_for (var, phi, gimple_phi_result_ptr (phi));
320 : 7659465 : FOR_EACH_EDGE (e, ei, exit->preds)
321 : 3922099 : add_phi_arg (phi, var, e, UNKNOWN_LOCATION);
322 : :
323 : 3737366 : if (dump_file && (dump_flags & TDF_DETAILS))
324 : : {
325 : 1459 : fprintf (dump_file, ";; Created LCSSA PHI: ");
326 : 1459 : print_gimple_stmt (dump_file, phi, 0, dump_flags);
327 : : }
328 : 3737366 : }
329 : :
330 : : /* Add exit phis for VAR that is used in LIVEIN.
331 : : Exits of the loops are stored in LOOP_EXITS. Returns the number
332 : : of PHIs added for VAR. */
333 : :
334 : : static unsigned
335 : 3087469 : add_exit_phis_var (tree var, bitmap use_blocks, bitmap def_loop_exits)
336 : : {
337 : 3087469 : unsigned index;
338 : 3087469 : bitmap_iterator bi;
339 : 3087469 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
340 : :
341 : 3087469 : gcc_checking_assert (! bitmap_bit_p (use_blocks, def_bb->index));
342 : :
343 : 3087469 : auto_bitmap live_exits (&loop_renamer_obstack);
344 : 3087469 : compute_live_loop_exits (live_exits, use_blocks, def_bb, def_loop_exits);
345 : :
346 : 3087469 : unsigned cnt = 0;
347 : 6824835 : EXECUTE_IF_SET_IN_BITMAP (live_exits, 0, index, bi)
348 : : {
349 : 3737366 : add_exit_phi (BASIC_BLOCK_FOR_FN (cfun, index), var);
350 : 3737366 : cnt++;
351 : : }
352 : 3087469 : return cnt;
353 : 3087469 : }
354 : :
355 : : static int
356 : 46060097 : loop_name_cmp (const void *p1, const void *p2)
357 : : {
358 : 46060097 : auto l1 = (const std::pair<int, int> *)p1;
359 : 46060097 : auto l2 = (const std::pair<int, int> *)p2;
360 : 46060097 : if (l1->first < l2->first)
361 : : return -1;
362 : 28057990 : else if (l1->first > l2->first)
363 : 16869271 : return 1;
364 : : return 0;
365 : : }
366 : :
367 : : /* Add exit phis for the names marked in NAMES_TO_RENAME.
368 : : Exits of the loops are stored in EXITS. Sets of blocks where the ssa
369 : : names are used are stored in USE_BLOCKS. Returns whether any name
370 : : required multiple LC PHI nodes. */
371 : :
372 : : static bool
373 : 690444 : add_exit_phis (bitmap names_to_rename, bitmap *use_blocks)
374 : : {
375 : 690444 : unsigned i;
376 : 690444 : bitmap_iterator bi;
377 : 690444 : bool multiple_p = false;
378 : :
379 : : /* Sort names_to_rename after definition loop so we can avoid re-computing
380 : : def_loop_exits. */
381 : 690444 : auto_vec<std::pair<int, int> > names (bitmap_count_bits (names_to_rename));
382 : 3777913 : EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi)
383 : : {
384 : 3087469 : tree name = ssa_name (i);
385 : 3087469 : loop_p def_loop = gimple_bb (SSA_NAME_DEF_STMT (name))->loop_father;
386 : 3087469 : names.quick_push (std::make_pair (def_loop->num, i));
387 : : }
388 : 690444 : names.qsort (loop_name_cmp);
389 : :
390 : 690444 : auto_bitmap def_loop_exits (&loop_renamer_obstack);
391 : 690444 : loop_p last_def_loop = NULL;
392 : 5158801 : for (auto p : names)
393 : : {
394 : 3087469 : loop_p def_loop = get_loop (cfun, p.first);
395 : 3087469 : if (def_loop != last_def_loop)
396 : : {
397 : 1635586 : bitmap_clear (def_loop_exits);
398 : 1635586 : last_def_loop = def_loop;
399 : 3840437 : for (class loop *loop = def_loop; loop != current_loops->tree_root;
400 : 2204851 : loop = loop_outer (loop))
401 : 8342634 : for (auto exit = loop->exits->next; exit->e; exit = exit->next)
402 : 6137783 : bitmap_set_bit (def_loop_exits, exit->e->dest->index);
403 : : }
404 : 3087469 : if (add_exit_phis_var (ssa_name (p.second), use_blocks[p.second],
405 : : def_loop_exits) > 1)
406 : 351514 : multiple_p = true;
407 : : }
408 : :
409 : 690444 : return multiple_p;
410 : 690444 : }
411 : :
412 : : /* For USE in BB, if it is used outside of the loop it is defined in,
413 : : mark it for rewrite. Record basic block BB where it is used
414 : : to USE_BLOCKS. Record the ssa name index to NEED_PHIS bitmap.
415 : : Note that for USEs in phis, BB should be the src of the edge corresponding to
416 : : the use, rather than the bb containing the phi. */
417 : :
418 : : static void
419 : 151601814 : find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks,
420 : : bitmap need_phis)
421 : : {
422 : 151601814 : unsigned ver;
423 : 151601814 : basic_block def_bb;
424 : 151601814 : class loop *def_loop;
425 : :
426 : 151601814 : if (TREE_CODE (use) != SSA_NAME)
427 : : return;
428 : :
429 : 146255497 : ver = SSA_NAME_VERSION (use);
430 : 146255497 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (use));
431 : 146255497 : if (!def_bb)
432 : : return;
433 : 135522955 : def_loop = def_bb->loop_father;
434 : :
435 : : /* If the definition is not inside a loop, it is not interesting. */
436 : 135522955 : if (!loop_outer (def_loop))
437 : : return;
438 : :
439 : : /* If the use is not outside of the loop it is defined in, it is not
440 : : interesting. */
441 : 71839076 : if (flow_bb_inside_loop_p (def_loop, bb))
442 : : return;
443 : :
444 : : /* If we're seeing VER for the first time, we still have to allocate
445 : : a bitmap for its uses. */
446 : 4990905 : if (bitmap_set_bit (need_phis, ver))
447 : 3087469 : use_blocks[ver] = BITMAP_ALLOC (&loop_renamer_obstack);
448 : 4990905 : bitmap_set_bit (use_blocks[ver], bb->index);
449 : : }
450 : :
451 : : /* For uses matching USE_FLAGS in STMT, mark names that are used outside of the
452 : : loop they are defined to rewrite. Record the set of blocks in which the ssa
453 : : names are used to USE_BLOCKS, and the ssa names themselves to NEED_PHIS. */
454 : :
455 : : static void
456 : 185368986 : find_uses_to_rename_stmt (gimple *stmt, bitmap *use_blocks, bitmap need_phis,
457 : : int use_flags)
458 : : {
459 : 185368986 : ssa_op_iter iter;
460 : 185368986 : tree var;
461 : 185368986 : basic_block bb = gimple_bb (stmt);
462 : :
463 : 185368986 : if (is_gimple_debug (stmt))
464 : 102623701 : return;
465 : :
466 : : /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows SSA_OP_VIRTUAL_USES
467 : : only. */
468 : 82745285 : if (use_flags == SSA_OP_VIRTUAL_USES)
469 : : {
470 : 82745285 : tree vuse = gimple_vuse (stmt);
471 : 0 : if (vuse != NULL_TREE)
472 : 0 : find_uses_to_rename_use (bb, gimple_vuse (stmt), use_blocks, need_phis);
473 : : }
474 : : else
475 : 199503229 : FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, use_flags)
476 : 116757944 : find_uses_to_rename_use (bb, var, use_blocks, need_phis);
477 : : }
478 : :
479 : : /* Marks names matching USE_FLAGS that are used in BB and outside of the loop
480 : : they are defined in for rewrite. Records the set of blocks in which the ssa
481 : : names are used to USE_BLOCKS. Record the SSA names that will
482 : : need exit PHIs in NEED_PHIS. */
483 : :
484 : : static void
485 : 28853365 : find_uses_to_rename_bb (basic_block bb, bitmap *use_blocks, bitmap need_phis,
486 : : int use_flags)
487 : : {
488 : 28853365 : edge e;
489 : 28853365 : edge_iterator ei;
490 : 28853365 : bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0;
491 : 28853365 : bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0;
492 : :
493 : 69908338 : FOR_EACH_EDGE (e, ei, bb->succs)
494 : 75898843 : for (gphi_iterator bsi = gsi_start_phis (e->dest); !gsi_end_p (bsi);
495 : 34843870 : gsi_next (&bsi))
496 : : {
497 : 34843870 : gphi *phi = bsi.phi ();
498 : 34843870 : bool virtual_p = virtual_operand_p (gimple_phi_result (phi));
499 : 34843870 : if ((virtual_p && do_virtuals)
500 : 20415032 : || (!virtual_p && do_nonvirtuals))
501 : 34843870 : find_uses_to_rename_use (bb, PHI_ARG_DEF_FROM_EDGE (phi, e),
502 : : use_blocks, need_phis);
503 : : }
504 : :
505 : 243075716 : for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
506 : 185368986 : gsi_next (&bsi))
507 : 185368986 : find_uses_to_rename_stmt (gsi_stmt (bsi), use_blocks, need_phis,
508 : : use_flags);
509 : 28853365 : }
510 : :
511 : : /* Marks names matching USE_FLAGS that are used outside of the loop they are
512 : : defined in for rewrite. Records the set of blocks in which the ssa names are
513 : : used to USE_BLOCKS. Record the SSA names that will need exit PHIs in
514 : : NEED_PHIS. If CHANGED_BBS is not NULL, scan only blocks in this set. */
515 : :
516 : : static void
517 : 988709 : find_uses_to_rename (bitmap changed_bbs, bitmap *use_blocks, bitmap need_phis,
518 : : int use_flags)
519 : : {
520 : 988709 : basic_block bb;
521 : 988709 : unsigned index;
522 : 988709 : bitmap_iterator bi;
523 : :
524 : 988709 : if (changed_bbs)
525 : 25619 : EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi)
526 : : {
527 : 18630 : bb = BASIC_BLOCK_FOR_FN (cfun, index);
528 : 18630 : if (bb)
529 : 18630 : find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
530 : : }
531 : : else
532 : 29816455 : FOR_EACH_BB_FN (bb, cfun)
533 : 28834735 : find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
534 : 988709 : }
535 : :
536 : : /* Rewrites the program into a loop closed ssa form -- i.e. inserts extra
537 : : phi nodes to ensure that no variable is used outside the loop it is
538 : : defined in.
539 : :
540 : : This strengthening of the basic ssa form has several advantages:
541 : :
542 : : 1) Updating it during unrolling/peeling/versioning is trivial, since
543 : : we do not need to care about the uses outside of the loop.
544 : : The same applies to virtual operands which are also rewritten into
545 : : loop closed SSA form. Note that virtual operands are always live
546 : : until function exit.
547 : : 2) The behavior of all uses of an induction variable is the same.
548 : : Without this, you need to distinguish the case when the variable
549 : : is used outside of the loop it is defined in, for example
550 : :
551 : : for (i = 0; i < 100; i++)
552 : : {
553 : : for (j = 0; j < 100; j++)
554 : : {
555 : : k = i + j;
556 : : use1 (k);
557 : : }
558 : : use2 (k);
559 : : }
560 : :
561 : : Looking from the outer loop with the normal SSA form, the first use of k
562 : : is not well-behaved, while the second one is an induction variable with
563 : : base 99 and step 1.
564 : :
565 : : If CHANGED_BBS is not NULL, we look for uses outside loops only in the
566 : : basic blocks in this set.
567 : :
568 : : USE_FLAGS allows us to specify whether we want virtual, non-virtual or
569 : : both variables rewritten.
570 : :
571 : : UPDATE_FLAG is used in the call to update_ssa. See
572 : : TODO_update_ssa* for documentation. */
573 : :
574 : : static void
575 : 4391504 : rewrite_into_loop_closed_ssa_1 (bitmap changed_bbs, unsigned update_flag,
576 : : int use_flags)
577 : : {
578 : 4391504 : bitmap *use_blocks;
579 : 4391504 : bitmap names_to_rename;
580 : :
581 : 4391504 : loops_state_set (LOOP_CLOSED_SSA);
582 : 4391504 : if (number_of_loops (cfun) <= 1)
583 : : return;
584 : :
585 : : /* If the pass has caused the SSA form to be out-of-date, update it
586 : : now. */
587 : 988709 : if (update_flag != 0)
588 : 988337 : update_ssa (update_flag);
589 : 372 : else if (flag_checking)
590 : 372 : verify_ssa (true, true);
591 : :
592 : 988709 : bitmap_obstack_initialize (&loop_renamer_obstack);
593 : :
594 : 988709 : names_to_rename = BITMAP_ALLOC (&loop_renamer_obstack);
595 : :
596 : : /* Uses of names to rename. We don't have to initialize this array,
597 : : because we know that we will only have entries for the SSA names
598 : : in NAMES_TO_RENAME. */
599 : 1977418 : use_blocks = XNEWVEC (bitmap, num_ssa_names);
600 : 988709 : find_uses_to_rename (changed_bbs, use_blocks, names_to_rename, use_flags);
601 : :
602 : 988709 : if (!bitmap_empty_p (names_to_rename))
603 : : {
604 : 690444 : bool release_recorded_exits_p = false;
605 : 690444 : if (!loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
606 : : {
607 : : /* Doing one scan over the whole function is cheaper than
608 : : traversing the loop tree and gathering BBs of each loop. */
609 : 0 : record_loop_exits ();
610 : 0 : release_recorded_exits_p = true;
611 : : }
612 : :
613 : : /* Add the PHI nodes on exits of the loops for the names we need to
614 : : rewrite. When no variable required multiple LC PHI nodes to be
615 : : inserted then we know that all uses outside of the loop are
616 : : dominated by the single LC SSA definition and no further PHI
617 : : node insertions are required. */
618 : 690444 : bool need_phis_p = add_exit_phis (names_to_rename, use_blocks);
619 : :
620 : 690444 : if (release_recorded_exits_p)
621 : 0 : release_recorded_exits (cfun);
622 : :
623 : : /* Fix up all the names found to be used outside their original
624 : : loops. */
625 : 1236332 : update_ssa (need_phis_p ? TODO_update_ssa : TODO_update_ssa_no_phi);
626 : : }
627 : :
628 : 988709 : bitmap_obstack_release (&loop_renamer_obstack);
629 : 988709 : free (use_blocks);
630 : : }
631 : :
632 : : /* Rewrites the defs and uses into a loop closed ssa form.
633 : : If CHANGED_BBS is not NULL, we look for uses outside loops only in the basic
634 : : blocks in this set. UPDATE_FLAG is used in the call to update_ssa. See
635 : : TODO_update_ssa* for documentation. */
636 : :
637 : : void
638 : 4391504 : rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag)
639 : : {
640 : 4391504 : rewrite_into_loop_closed_ssa_1 (changed_bbs, update_flag, SSA_OP_ALL_USES);
641 : 4391504 : }
642 : :
643 : : /* Check invariants of the loop closed ssa form for the def in DEF_BB. */
644 : :
645 : : static void
646 : 166859778 : check_loop_closed_ssa_def (basic_block def_bb, tree def)
647 : : {
648 : 166859778 : use_operand_p use_p;
649 : 166859778 : imm_use_iterator iterator;
650 : 486073142 : FOR_EACH_IMM_USE_FAST (use_p, iterator, def)
651 : : {
652 : 319213364 : if (is_gimple_debug (USE_STMT (use_p)))
653 : 38835280 : continue;
654 : :
655 : 280378084 : basic_block use_bb = gimple_bb (USE_STMT (use_p));
656 : 280378084 : if (is_a <gphi *> (USE_STMT (use_p)))
657 : 76574812 : use_bb = EDGE_PRED (use_bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
658 : :
659 : 280378084 : gcc_assert (flow_bb_inside_loop_p (def_bb->loop_father, use_bb));
660 : : }
661 : 166859778 : }
662 : :
663 : : /* Checks invariants of loop closed ssa form in BB. */
664 : :
665 : : static void
666 : 49440025 : check_loop_closed_ssa_bb (basic_block bb)
667 : : {
668 : 90250737 : for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
669 : 40810712 : gsi_next (&bsi))
670 : : {
671 : 40810712 : gphi *phi = bsi.phi ();
672 : :
673 : 40810712 : check_loop_closed_ssa_def (bb, PHI_RESULT (phi));
674 : : }
675 : :
676 : 197565127 : for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); !gsi_end_p (bsi);
677 : 148125102 : gsi_next_nondebug (&bsi))
678 : : {
679 : 148125102 : ssa_op_iter iter;
680 : 148125102 : tree var;
681 : 148125102 : gimple *stmt = gsi_stmt (bsi);
682 : :
683 : 274174168 : FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_DEFS)
684 : 126049066 : check_loop_closed_ssa_def (bb, var);
685 : : }
686 : 49440025 : }
687 : :
688 : : /* Checks that invariants of the loop closed ssa form are preserved.
689 : : Call verify_ssa when VERIFY_SSA_P is true. Note all loops are checked
690 : : if LOOP is NULL, otherwise, only LOOP is checked. */
691 : :
692 : : DEBUG_FUNCTION void
693 : 3704268 : verify_loop_closed_ssa (bool verify_ssa_p, class loop *loop)
694 : : {
695 : 7408536 : if (number_of_loops (cfun) <= 1)
696 : : return;
697 : :
698 : 3704268 : timevar_push (TV_VERIFY_LOOP_CLOSED);
699 : :
700 : 3704268 : if (loop == NULL)
701 : : {
702 : 3702875 : basic_block bb;
703 : :
704 : 3702875 : if (verify_ssa_p)
705 : 29926 : verify_ssa (false, true);
706 : :
707 : 126028141 : FOR_EACH_BB_FN (bb, cfun)
708 : 122325266 : if (bb->loop_father && bb->loop_father->num > 0)
709 : 49433197 : check_loop_closed_ssa_bb (bb);
710 : : }
711 : : else
712 : : {
713 : 1393 : basic_block *bbs = get_loop_body (loop);
714 : :
715 : : /* We do not have loop-local SSA verification so just
716 : : check there's no update queued. */
717 : 1393 : if (verify_ssa_p)
718 : 1393 : gcc_assert (!need_ssa_update_p (cfun));
719 : :
720 : 8221 : for (unsigned i = 0; i < loop->num_nodes; ++i)
721 : 6828 : check_loop_closed_ssa_bb (bbs[i]);
722 : :
723 : 1393 : free (bbs);
724 : : }
725 : :
726 : 3704268 : timevar_pop (TV_VERIFY_LOOP_CLOSED);
727 : : }
728 : :
729 : : /* Split loop exit edge EXIT. The things are a bit complicated by a need to
730 : : preserve the loop closed ssa form. If COPY_CONSTANTS_P is true then
731 : : forwarder PHIs are also created for constant arguments.
732 : : The newly created block is returned. */
733 : :
734 : : basic_block
735 : 53157 : split_loop_exit_edge (edge exit, bool copy_constants_p)
736 : : {
737 : 53157 : basic_block dest = exit->dest;
738 : 53157 : basic_block bb = split_edge (exit);
739 : 53157 : gphi *phi, *new_phi;
740 : 53157 : tree new_name, name;
741 : 53157 : use_operand_p op_p;
742 : 53157 : gphi_iterator psi;
743 : 53157 : location_t locus;
744 : :
745 : 127286 : for (psi = gsi_start_phis (dest); !gsi_end_p (psi); gsi_next (&psi))
746 : : {
747 : 74129 : phi = psi.phi ();
748 : 74129 : op_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (bb));
749 : 74129 : locus = gimple_phi_arg_location_from_edge (phi, single_succ_edge (bb));
750 : :
751 : 74129 : name = USE_FROM_PTR (op_p);
752 : :
753 : : /* If the argument of the PHI node is a constant, we do not need
754 : : to keep it inside loop. */
755 : 74129 : if (TREE_CODE (name) != SSA_NAME
756 : 3237 : && !copy_constants_p)
757 : 3228 : continue;
758 : :
759 : : /* Otherwise create an auxiliary phi node that will copy the value
760 : : of the SSA name out of the loop. */
761 : 70901 : new_name = duplicate_ssa_name (PHI_RESULT (phi), NULL);
762 : 70901 : new_phi = create_phi_node (new_name, bb);
763 : 70901 : add_phi_arg (new_phi, name, exit, locus);
764 : 70901 : SET_USE (op_p, new_name);
765 : : }
766 : :
767 : 53157 : return bb;
768 : : }
769 : :
770 : : /* Returns the basic block in that statements should be emitted for induction
771 : : variables incremented at the end of the LOOP. */
772 : :
773 : : basic_block
774 : 15030958 : ip_end_pos (class loop *loop)
775 : : {
776 : 15030958 : return loop->latch;
777 : : }
778 : :
779 : : /* Returns the basic block in that statements should be emitted for induction
780 : : variables incremented just before exit condition of a LOOP. */
781 : :
782 : : basic_block
783 : 46238683 : ip_normal_pos (class loop *loop)
784 : : {
785 : 46238683 : basic_block bb;
786 : 46238683 : edge exit;
787 : :
788 : 46305921 : if (!single_pred_p (loop->latch))
789 : : return NULL;
790 : :
791 : 46107139 : bb = single_pred (loop->latch);
792 : 92214278 : if (!safe_is_a <gcond *> (*gsi_last_bb (bb)))
793 : 13010 : return NULL;
794 : :
795 : 46094129 : exit = EDGE_SUCC (bb, 0);
796 : 46094129 : if (exit->dest == loop->latch)
797 : 34824218 : exit = EDGE_SUCC (bb, 1);
798 : :
799 : 46094129 : if (flow_bb_inside_loop_p (loop, exit->dest))
800 : : return NULL;
801 : :
802 : : return bb;
803 : : }
804 : :
805 : : /* Stores the standard position for induction variable increment in LOOP
806 : : (just before the exit condition if it is available and latch block is empty,
807 : : end of the latch block otherwise) to BSI. INSERT_AFTER is set to true if
808 : : the increment should be inserted after *BSI. */
809 : :
810 : : void
811 : 137315 : standard_iv_increment_position (class loop *loop, gimple_stmt_iterator *bsi,
812 : : bool *insert_after)
813 : : {
814 : 137315 : basic_block bb = ip_normal_pos (loop), latch = ip_end_pos (loop);
815 : 137315 : gimple *last = last_nondebug_stmt (latch);
816 : :
817 : 137315 : if (!bb
818 : 137315 : || (last && gimple_code (last) != GIMPLE_LABEL))
819 : : {
820 : 28 : *bsi = gsi_last_bb (latch);
821 : 28 : *insert_after = true;
822 : : }
823 : : else
824 : : {
825 : 137287 : *bsi = gsi_last_bb (bb);
826 : 137287 : *insert_after = false;
827 : : }
828 : 137315 : }
829 : :
830 : : /* Copies phi node arguments for duplicated blocks. The index of the first
831 : : duplicated block is FIRST_NEW_BLOCK. */
832 : :
833 : : static void
834 : 130977 : copy_phi_node_args (unsigned first_new_block)
835 : : {
836 : 130977 : unsigned i;
837 : :
838 : 1124671 : for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
839 : 993694 : BASIC_BLOCK_FOR_FN (cfun, i)->flags |= BB_DUPLICATED;
840 : :
841 : 1124671 : for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
842 : 993694 : add_phi_args_after_copy_bb (BASIC_BLOCK_FOR_FN (cfun, i));
843 : :
844 : 1124671 : for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
845 : 993694 : BASIC_BLOCK_FOR_FN (cfun, i)->flags &= ~BB_DUPLICATED;
846 : 130977 : }
847 : :
848 : :
849 : : /* The same as cfgloopmanip.cc:duplicate_loop_body_to_header_edge, but also
850 : : updates the PHI nodes at start of the copied region. In order to
851 : : achieve this, only loops whose exits all lead to the same location
852 : : are handled.
853 : :
854 : : Notice that we do not completely update the SSA web after
855 : : duplication. The caller is responsible for calling update_ssa
856 : : after the loop has been duplicated. */
857 : :
858 : : bool
859 : 130996 : gimple_duplicate_loop_body_to_header_edge (class loop *loop, edge e,
860 : : unsigned int ndupl,
861 : : sbitmap wont_exit, edge orig,
862 : : vec<edge> *to_remove, int flags)
863 : : {
864 : 130996 : unsigned first_new_block;
865 : :
866 : 130996 : if (!loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
867 : : return false;
868 : 130996 : if (!loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
869 : : return false;
870 : :
871 : 130996 : first_new_block = last_basic_block_for_fn (cfun);
872 : 130996 : if (!duplicate_loop_body_to_header_edge (loop, e, ndupl, wont_exit, orig,
873 : : to_remove, flags))
874 : : return false;
875 : :
876 : : /* Readd the removed phi args for e. */
877 : 130977 : flush_pending_stmts (e);
878 : :
879 : : /* Copy the phi node arguments. */
880 : 130977 : copy_phi_node_args (first_new_block);
881 : :
882 : 130977 : scev_reset ();
883 : :
884 : 130977 : return true;
885 : : }
886 : :
887 : : /* Returns true if we can unroll LOOP FACTOR times. Number
888 : : of iterations of the loop is returned in NITER. */
889 : :
890 : : bool
891 : 752 : can_unroll_loop_p (class loop *loop, unsigned factor,
892 : : class tree_niter_desc *niter)
893 : : {
894 : 752 : edge exit;
895 : :
896 : : /* Check whether unrolling is possible. We only want to unroll loops
897 : : for that we are able to determine number of iterations. We also
898 : : want to split the extra iterations of the loop from its end,
899 : : therefore we require that the loop has precisely one
900 : : exit. */
901 : :
902 : 752 : exit = single_dom_exit (loop);
903 : 752 : if (!exit)
904 : : return false;
905 : :
906 : 733 : if (!number_of_iterations_exit (loop, exit, niter, false)
907 : 732 : || niter->cmp == ERROR_MARK
908 : : /* Scalar evolutions analysis might have copy propagated
909 : : the abnormal ssa names into these expressions, hence
910 : : emitting the computations based on them during loop
911 : : unrolling might create overlapping life ranges for
912 : : them, and failures in out-of-ssa. */
913 : 730 : || contains_abnormal_ssa_name_p (niter->may_be_zero)
914 : 730 : || contains_abnormal_ssa_name_p (niter->control.base)
915 : 730 : || contains_abnormal_ssa_name_p (niter->control.step)
916 : 1463 : || contains_abnormal_ssa_name_p (niter->bound))
917 : 3 : return false;
918 : :
919 : : /* And of course, we must be able to duplicate the loop. */
920 : 730 : if (!can_duplicate_loop_p (loop))
921 : : return false;
922 : :
923 : : /* The final loop should be small enough. */
924 : 730 : if (tree_num_loop_insns (loop, &eni_size_weights) * factor
925 : 730 : > (unsigned) param_max_unrolled_insns)
926 : : return false;
927 : :
928 : : return true;
929 : : }
930 : :
931 : : /* Determines the conditions that control execution of LOOP unrolled FACTOR
932 : : times. DESC is number of iterations of LOOP. ENTER_COND is set to
933 : : condition that must be true if the main loop can be entered.
934 : : If the loop does not always iterate an exact multiple of FACTOR times,
935 : : EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
936 : : how the exit from the unrolled loop should be controlled. Otherwise,
937 : : the trees are set to null and EXIT_CMP is set to ERROR_MARK. */
938 : :
939 : : static void
940 : 710 : determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
941 : : unsigned factor, tree *enter_cond,
942 : : tree *exit_base, tree *exit_step,
943 : : enum tree_code *exit_cmp, tree *exit_bound)
944 : : {
945 : 710 : gimple_seq stmts;
946 : 710 : tree base = desc->control.base;
947 : 710 : tree step = desc->control.step;
948 : 710 : tree bound = desc->bound;
949 : 710 : tree type = TREE_TYPE (step);
950 : 710 : tree bigstep, delta;
951 : 710 : tree min = lower_bound_in_type (type, type);
952 : 710 : tree max = upper_bound_in_type (type, type);
953 : 710 : enum tree_code cmp = desc->cmp;
954 : 710 : tree cond = boolean_true_node, assum;
955 : :
956 : : /* For pointers, do the arithmetics in the type of step. */
957 : 710 : base = fold_convert (type, base);
958 : 710 : bound = fold_convert (type, bound);
959 : :
960 : 710 : *enter_cond = boolean_false_node;
961 : 710 : *exit_base = NULL_TREE;
962 : 710 : *exit_step = NULL_TREE;
963 : 710 : *exit_cmp = ERROR_MARK;
964 : 710 : *exit_bound = NULL_TREE;
965 : 710 : gcc_assert (cmp != ERROR_MARK);
966 : :
967 : : /* We only need to be correct when we answer question
968 : : "Do at least FACTOR more iterations remain?" in the unrolled loop.
969 : : Thus, transforming BASE + STEP * i <> BOUND to
970 : : BASE + STEP * i < BOUND is ok. */
971 : 710 : if (cmp == NE_EXPR)
972 : : {
973 : 71 : if (tree_int_cst_sign_bit (step))
974 : : cmp = GT_EXPR;
975 : : else
976 : 672 : cmp = LT_EXPR;
977 : : }
978 : 639 : else if (cmp == LT_EXPR)
979 : : {
980 : 637 : gcc_assert (!tree_int_cst_sign_bit (step));
981 : : }
982 : 2 : else if (cmp == GT_EXPR)
983 : : {
984 : 2 : gcc_assert (tree_int_cst_sign_bit (step));
985 : : }
986 : : else
987 : 0 : gcc_unreachable ();
988 : :
989 : : /* The main body of the loop may be entered iff:
990 : :
991 : : 1) desc->may_be_zero is false.
992 : : 2) it is possible to check that there are at least FACTOR iterations
993 : : of the loop, i.e., BOUND - step * FACTOR does not overflow.
994 : : 3) # of iterations is at least FACTOR */
995 : :
996 : 710 : if (!integer_zerop (desc->may_be_zero))
997 : 6 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
998 : : invert_truthvalue (desc->may_be_zero),
999 : : cond);
1000 : :
1001 : 710 : bigstep = fold_build2 (MULT_EXPR, type, step,
1002 : : build_int_cst_type (type, factor));
1003 : 710 : delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
1004 : 710 : if (cmp == LT_EXPR)
1005 : 672 : assum = fold_build2 (GE_EXPR, boolean_type_node,
1006 : : bound,
1007 : : fold_build2 (PLUS_EXPR, type, min, delta));
1008 : : else
1009 : 38 : assum = fold_build2 (LE_EXPR, boolean_type_node,
1010 : : bound,
1011 : : fold_build2 (PLUS_EXPR, type, max, delta));
1012 : 710 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
1013 : :
1014 : 710 : bound = fold_build2 (MINUS_EXPR, type, bound, delta);
1015 : 710 : assum = fold_build2 (cmp, boolean_type_node, base, bound);
1016 : 710 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
1017 : :
1018 : 710 : if (integer_nonzerop (cond)
1019 : 710 : && integer_zerop (desc->may_be_zero))
1020 : : {
1021 : : /* Convert the latch count to an iteration count. */
1022 : 74 : tree niter = fold_build2 (PLUS_EXPR, type, desc->niter,
1023 : : build_one_cst (type));
1024 : 74 : if (multiple_of_p (type, niter, build_int_cst (type, factor)))
1025 : 27 : return;
1026 : : }
1027 : :
1028 : 683 : cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
1029 : 683 : if (stmts)
1030 : 19 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1031 : : /* cond now may be a gimple comparison, which would be OK, but also any
1032 : : other gimple rhs (say a && b). In this case we need to force it to
1033 : : operand. */
1034 : 683 : if (!is_gimple_condexpr_for_cond (cond))
1035 : : {
1036 : 16 : cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
1037 : 16 : if (stmts)
1038 : 16 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1039 : : }
1040 : 683 : *enter_cond = cond;
1041 : :
1042 : 683 : base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
1043 : 683 : if (stmts)
1044 : 31 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1045 : 683 : bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
1046 : 683 : if (stmts)
1047 : 613 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1048 : :
1049 : 683 : *exit_base = base;
1050 : 683 : *exit_step = bigstep;
1051 : 683 : *exit_cmp = cmp;
1052 : 683 : *exit_bound = bound;
1053 : : }
1054 : :
1055 : : /* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge
1056 : : whose source block dominates the latch. DESC describes the number of
1057 : : iterations of LOOP.
1058 : :
1059 : : If N is number of iterations of the loop and MAY_BE_ZERO is the condition
1060 : : under that loop exits in the first iteration even if N != 0,
1061 : :
1062 : : while (1)
1063 : : {
1064 : : x = phi (init, next);
1065 : :
1066 : : pre;
1067 : : if (st)
1068 : : break;
1069 : : post;
1070 : : }
1071 : :
1072 : : becomes (with possibly the exit conditions formulated a bit differently,
1073 : : avoiding the need to create a new iv):
1074 : :
1075 : : if (MAY_BE_ZERO || N < FACTOR)
1076 : : goto rest;
1077 : :
1078 : : do
1079 : : {
1080 : : x = phi (init, next);
1081 : :
1082 : : pre;
1083 : : post;
1084 : : pre;
1085 : : post;
1086 : : ...
1087 : : pre;
1088 : : post;
1089 : : N -= FACTOR;
1090 : :
1091 : : } while (N >= FACTOR);
1092 : :
1093 : : rest:
1094 : : init' = phi (init, x);
1095 : :
1096 : : while (1)
1097 : : {
1098 : : x = phi (init', next);
1099 : :
1100 : : pre;
1101 : : if (st)
1102 : : break;
1103 : : post;
1104 : : }
1105 : :
1106 : : Before the loop is unrolled, TRANSFORM is called for it (only for the
1107 : : unrolled loop, but not for its versioned copy). DATA is passed to
1108 : : TRANSFORM. */
1109 : :
1110 : : /* Probability in % that the unrolled loop is entered. Just a guess. */
1111 : : #define PROB_UNROLLED_LOOP_ENTERED 90
1112 : :
1113 : : void
1114 : 710 : tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
1115 : : class tree_niter_desc *desc,
1116 : : transform_callback transform,
1117 : : void *data)
1118 : : {
1119 : 710 : unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1120 : :
1121 : 710 : enum tree_code exit_cmp;
1122 : 710 : tree enter_main_cond, exit_base, exit_step, exit_bound;
1123 : 710 : bool flat = maybe_flat_loop_profile (loop);
1124 : 710 : determine_exit_conditions (loop, desc, factor,
1125 : : &enter_main_cond, &exit_base, &exit_step,
1126 : : &exit_cmp, &exit_bound);
1127 : 710 : bool single_loop_p = !exit_base;
1128 : :
1129 : 710 : gcond *exit_if = nullptr;
1130 : 710 : class loop *new_loop = nullptr;
1131 : 710 : edge new_exit;
1132 : 710 : if (!single_loop_p)
1133 : : {
1134 : 683 : profile_count entry_count = loop_preheader_edge (loop)->src->count;
1135 : : /* Let us assume that the unrolled loop is quite likely to be entered. */
1136 : 683 : profile_probability prob_entry;
1137 : 683 : if (integer_nonzerop (enter_main_cond))
1138 : 47 : prob_entry = profile_probability::always ();
1139 : : else
1140 : 636 : prob_entry = profile_probability::guessed_always ()
1141 : 636 : .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
1142 : :
1143 : :
1144 : : /* The values for scales should keep profile consistent, and somewhat
1145 : : close to correct. */
1146 : 683 : new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
1147 : : prob_entry.invert (),
1148 : : prob_entry,
1149 : : /* We will later redirect exit from vectorized
1150 : : loop to new_loop. */
1151 : : profile_probability::always (),
1152 : : true);
1153 : 683 : gcc_assert (new_loop != NULL);
1154 : 683 : update_ssa (TODO_update_ssa_no_phi);
1155 : :
1156 : : /* Prepare the cfg and update the phi nodes. Move the loop exit to the
1157 : : loop latch (and make its condition dummy, for the moment). */
1158 : 683 : basic_block rest = loop_preheader_edge (new_loop)->src;
1159 : 683 : edge precond_edge = single_pred_edge (rest);
1160 : 683 : split_edge (loop_latch_edge (loop));
1161 : 683 : basic_block exit_bb = single_pred (loop->latch);
1162 : 683 : edge exit = single_dom_exit (loop);
1163 : :
1164 : : /* Since the exit edge will be removed, the frequency of all the blocks
1165 : : in the loop that are dominated by it must be scaled. */
1166 : 683 : if (exit->probability.initialized_p ())
1167 : 683 : scale_dominated_blocks_in_loop (loop, exit->src,
1168 : : /* We are scaling up here so
1169 : : probability does not fit. */
1170 : 683 : exit->src->count,
1171 : 1366 : exit->src->count - exit->count ());
1172 : :
1173 : 683 : gimple_stmt_iterator bsi = gsi_last_bb (exit_bb);
1174 : 683 : exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
1175 : : integer_zero_node,
1176 : : NULL_TREE, NULL_TREE);
1177 : :
1178 : 683 : gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT);
1179 : 683 : new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
1180 : 683 : rescan_loop_exit (new_exit, true, false);
1181 : :
1182 : : /* Set the probability of new exit to the same of the old one. Fix
1183 : : the count of the latch block. */
1184 : 683 : new_exit->probability = exit->probability;
1185 : 683 : edge new_nonexit = single_pred_edge (loop->latch);
1186 : 683 : new_nonexit->probability = exit->probability.invert ();
1187 : 683 : new_nonexit->flags = EDGE_TRUE_VALUE;
1188 : 683 : set_edge_probability_and_rescale_others
1189 : 683 : (exit, profile_probability::never ());
1190 : 683 : loop->latch->count = new_nonexit->count ();
1191 : :
1192 : 683 : edge old_entry = loop_preheader_edge (loop);
1193 : 683 : edge new_entry = loop_preheader_edge (new_loop);
1194 : 683 : edge old_latch = loop_latch_edge (loop);
1195 : 683 : for (gphi_iterator psi_old_loop = gsi_start_phis (loop->header),
1196 : 683 : psi_new_loop = gsi_start_phis (new_loop->header);
1197 : 2132 : !gsi_end_p (psi_old_loop);
1198 : 1449 : gsi_next (&psi_old_loop), gsi_next (&psi_new_loop))
1199 : : {
1200 : 1449 : gphi *phi_old_loop = psi_old_loop.phi ();
1201 : 1449 : gphi *phi_new_loop = psi_new_loop.phi ();
1202 : :
1203 : 1449 : tree init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
1204 : 1449 : use_operand_p op
1205 : 1449 : = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
1206 : 1449 : gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
1207 : 1449 : tree next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
1208 : :
1209 : : /* Prefer using original variable as a base for the new ssa name.
1210 : : This is necessary for virtual ops, and useful in order to avoid
1211 : : losing debug info for real ops. */
1212 : 1449 : tree new_init;
1213 : 1449 : if (TREE_CODE (next) == SSA_NAME
1214 : 2898 : && useless_type_conversion_p (TREE_TYPE (next),
1215 : 1449 : TREE_TYPE (init)))
1216 : 1449 : new_init = copy_ssa_name (next);
1217 : 0 : else if (TREE_CODE (init) == SSA_NAME
1218 : 0 : && useless_type_conversion_p (TREE_TYPE (init),
1219 : 0 : TREE_TYPE (next)))
1220 : 0 : new_init = copy_ssa_name (init);
1221 : 0 : else if (useless_type_conversion_p (TREE_TYPE (next),
1222 : 0 : TREE_TYPE (init)))
1223 : 0 : new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
1224 : : "unrinittmp");
1225 : : else
1226 : 0 : new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
1227 : : "unrinittmp");
1228 : :
1229 : 1449 : gphi *phi_rest = create_phi_node (new_init, rest);
1230 : 1449 : add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION);
1231 : 1449 : add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION);
1232 : 1449 : SET_USE (op, new_init);
1233 : : }
1234 : :
1235 : 683 : remove_path (exit);
1236 : : /* We will later redirect exit from vectorized loop to new_loop. */
1237 : 683 : loop_preheader_edge (new_loop)->src->count = entry_count;
1238 : :
1239 : : /* The epilog loop latch executes at most factor - 1 times.
1240 : : Since the epilog is entered unconditionally it will need to handle
1241 : : up to factor executions of its body. */
1242 : 683 : new_loop->any_upper_bound = true;
1243 : 683 : new_loop->nb_iterations_upper_bound = factor - 1;
1244 : : /* We do not really know estimate on number of iterations, since we do not
1245 : : track any estimates modulo unroll factor.
1246 : : Drop estimate from loop_info and scale loop profile.
1247 : : It may be more realistic to scale loop profile to factor / 2 - 1,
1248 : : but vectorizer also uses factor - 1. */
1249 : 683 : new_loop->any_estimate = false;
1250 : 683 : scale_loop_profile (new_loop, profile_probability::always (), factor - 1);
1251 : : }
1252 : : else
1253 : 27 : new_exit = single_dom_exit (loop);
1254 : :
1255 : : /* Transform the loop. */
1256 : 710 : if (transform)
1257 : 622 : (*transform) (loop, data);
1258 : :
1259 : : /* Unroll the loop and remove the exits in all iterations except for the
1260 : : last one. */
1261 : 710 : auto_sbitmap wont_exit (factor);
1262 : 710 : bitmap_ones (wont_exit);
1263 : 710 : bitmap_clear_bit (wont_exit, factor - 1);
1264 : :
1265 : 710 : auto_vec<edge> to_remove;
1266 : 710 : bool ok
1267 : : = gimple_duplicate_loop_body_to_header_edge
1268 : 771 : (loop, loop_latch_edge (loop), factor - 1, wont_exit,
1269 : : new_exit, &to_remove,
1270 : : DLTHE_FLAG_UPDATE_FREQ | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
1271 : 710 : gcc_assert (ok);
1272 : :
1273 : 3087 : for (edge e : to_remove)
1274 : : {
1275 : 957 : ok = remove_path (e);
1276 : 957 : gcc_assert (ok);
1277 : : }
1278 : 710 : update_ssa (TODO_update_ssa);
1279 : :
1280 : 710 : new_exit = single_dom_exit (loop);
1281 : : /* gimple_duplicate_loop_body_to_header_edge depending on
1282 : : DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
1283 : : or scales it down accordingly.
1284 : : However exit edge probability is kept as original. Fix it if needed
1285 : : and compensate. */
1286 : 710 : update_loop_exit_probability_scale_dom_bbs (loop, new_exit);
1287 : 710 : if (!single_loop_p)
1288 : : {
1289 : : /* Finally create the new counter for number of iterations and add
1290 : : the new exit instruction. */
1291 : 683 : tree ctr_before, ctr_after;
1292 : 683 : gimple_stmt_iterator bsi = gsi_last_nondebug_bb (new_exit->src);
1293 : 683 : exit_if = as_a <gcond *> (gsi_stmt (bsi));
1294 : 683 : create_iv (exit_base, PLUS_EXPR, exit_step, NULL_TREE, loop,
1295 : : &bsi, false, &ctr_before, &ctr_after);
1296 : 683 : gimple_cond_set_code (exit_if, exit_cmp);
1297 : 683 : gimple_cond_set_lhs (exit_if, ctr_after);
1298 : 683 : gimple_cond_set_rhs (exit_if, exit_bound);
1299 : 683 : update_stmt (exit_if);
1300 : : }
1301 : 710 : if (loop->any_upper_bound)
1302 : 1396 : loop->nb_iterations_upper_bound = wi::udiv_floor
1303 : 698 : (loop->nb_iterations_upper_bound + 1, factor) - 1;
1304 : 710 : if (loop->any_likely_upper_bound)
1305 : 1396 : loop->nb_iterations_likely_upper_bound = wi::udiv_floor
1306 : 698 : (loop->nb_iterations_likely_upper_bound + 1, factor) - 1;
1307 : 710 : if (loop->any_estimate)
1308 : 146 : loop->nb_iterations_estimate = wi::udiv_floor
1309 : 73 : (loop->nb_iterations_estimate + 1, factor) - 1;
1310 : :
1311 : 710 : checking_verify_flow_info ();
1312 : 710 : checking_verify_loop_structure ();
1313 : 710 : checking_verify_loop_closed_ssa (true, loop);
1314 : 710 : if (new_loop)
1315 : 1393 : checking_verify_loop_closed_ssa (true, new_loop);
1316 : 710 : }
1317 : :
1318 : : /* Wrapper over tree_transform_and_unroll_loop for case we do not
1319 : : want to transform the loop before unrolling. The meaning
1320 : : of the arguments is the same as for tree_transform_and_unroll_loop. */
1321 : :
1322 : : void
1323 : 88 : tree_unroll_loop (class loop *loop, unsigned factor,
1324 : : class tree_niter_desc *desc)
1325 : : {
1326 : 88 : tree_transform_and_unroll_loop (loop, factor, desc, NULL, NULL);
1327 : 88 : }
1328 : :
1329 : : /* Rewrite the phi node at position PSI in function of the main
1330 : : induction variable MAIN_IV and insert the generated code at GSI. */
1331 : :
1332 : : static void
1333 : 2080 : rewrite_phi_with_iv (loop_p loop,
1334 : : gphi_iterator *psi,
1335 : : gimple_stmt_iterator *gsi,
1336 : : tree main_iv)
1337 : : {
1338 : 2080 : affine_iv iv;
1339 : 2080 : gassign *stmt;
1340 : 2080 : gphi *phi = psi->phi ();
1341 : 2080 : tree atype, mtype, val, res = PHI_RESULT (phi);
1342 : :
1343 : 4160 : if (virtual_operand_p (res) || res == main_iv)
1344 : : {
1345 : 1180 : gsi_next (psi);
1346 : 1273 : return;
1347 : : }
1348 : :
1349 : 900 : if (!simple_iv (loop, loop, res, &iv, true))
1350 : : {
1351 : 93 : gsi_next (psi);
1352 : 93 : return;
1353 : : }
1354 : :
1355 : 807 : remove_phi_node (psi, false);
1356 : :
1357 : 807 : atype = TREE_TYPE (res);
1358 : 807 : mtype = POINTER_TYPE_P (atype) ? sizetype : atype;
1359 : 807 : val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step),
1360 : : fold_convert (mtype, main_iv));
1361 : 1614 : val = fold_build2 (POINTER_TYPE_P (atype)
1362 : : ? POINTER_PLUS_EXPR : PLUS_EXPR,
1363 : : atype, unshare_expr (iv.base), val);
1364 : 807 : val = force_gimple_operand_gsi (gsi, val, false, NULL_TREE, true,
1365 : : GSI_SAME_STMT);
1366 : 807 : stmt = gimple_build_assign (res, val);
1367 : 807 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1368 : : }
1369 : :
1370 : : /* Rewrite all the phi nodes of LOOP in function of the main induction
1371 : : variable MAIN_IV. */
1372 : :
1373 : : static void
1374 : 593 : rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv)
1375 : : {
1376 : 593 : unsigned i;
1377 : 593 : basic_block *bbs = get_loop_body_in_dom_order (loop);
1378 : 593 : gphi_iterator psi;
1379 : :
1380 : 2097 : for (i = 0; i < loop->num_nodes; i++)
1381 : : {
1382 : 1504 : basic_block bb = bbs[i];
1383 : 1504 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
1384 : :
1385 : 1504 : if (bb->loop_father != loop)
1386 : 97 : continue;
1387 : :
1388 : 3487 : for (psi = gsi_start_phis (bb); !gsi_end_p (psi); )
1389 : 2080 : rewrite_phi_with_iv (loop, &psi, &gsi, main_iv);
1390 : : }
1391 : :
1392 : 593 : free (bbs);
1393 : 593 : }
1394 : :
1395 : : /* Bases all the induction variables in LOOP on a single induction variable
1396 : : (with base 0 and step 1), whose final value is compared with *NIT. When the
1397 : : IV type precision has to be larger than *NIT type precision, *NIT is
1398 : : converted to the larger type, the conversion code is inserted before the
1399 : : loop, and *NIT is updated to the new definition. When BUMP_IN_LATCH is true,
1400 : : the induction variable is incremented in the loop latch, otherwise it is
1401 : : incremented in the loop header. Return the induction variable that was
1402 : : created. */
1403 : :
1404 : : tree
1405 : 593 : canonicalize_loop_ivs (class loop *loop, tree *nit, bool bump_in_latch)
1406 : : {
1407 : 593 : unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit));
1408 : 593 : unsigned original_precision = precision;
1409 : 593 : tree type, var_before;
1410 : 593 : gimple_stmt_iterator gsi;
1411 : 593 : gphi_iterator psi;
1412 : 593 : gcond *stmt;
1413 : 593 : edge exit = single_dom_exit (loop);
1414 : 593 : gimple_seq stmts;
1415 : 593 : bool unsigned_p = false;
1416 : :
1417 : 593 : for (psi = gsi_start_phis (loop->header);
1418 : 1973 : !gsi_end_p (psi); gsi_next (&psi))
1419 : : {
1420 : 1380 : gphi *phi = psi.phi ();
1421 : 1380 : tree res = PHI_RESULT (phi);
1422 : 1380 : bool uns;
1423 : :
1424 : 1380 : type = TREE_TYPE (res);
1425 : 1380 : if (virtual_operand_p (res)
1426 : 879 : || (!INTEGRAL_TYPE_P (type)
1427 : 29 : && !POINTER_TYPE_P (type))
1428 : 2234 : || TYPE_PRECISION (type) < precision)
1429 : 548 : continue;
1430 : :
1431 : 832 : uns = POINTER_TYPE_P (type) | TYPE_UNSIGNED (type);
1432 : :
1433 : 832 : if (TYPE_PRECISION (type) > precision)
1434 : : unsigned_p = uns;
1435 : : else
1436 : 827 : unsigned_p |= uns;
1437 : :
1438 : 832 : precision = TYPE_PRECISION (type);
1439 : : }
1440 : :
1441 : 593 : scalar_int_mode mode = smallest_int_mode_for_size (precision).require ();
1442 : 593 : precision = GET_MODE_PRECISION (mode);
1443 : 593 : type = build_nonstandard_integer_type (precision, unsigned_p);
1444 : :
1445 : 593 : if (original_precision != precision
1446 : 593 : || TYPE_UNSIGNED (TREE_TYPE (*nit)) != unsigned_p)
1447 : : {
1448 : 313 : *nit = fold_convert (type, *nit);
1449 : 313 : *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE);
1450 : 313 : if (stmts)
1451 : 125 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1452 : : }
1453 : :
1454 : 593 : if (bump_in_latch)
1455 : 1186 : gsi = gsi_last_bb (loop->latch);
1456 : : else
1457 : 0 : gsi = gsi_last_nondebug_bb (loop->header);
1458 : 593 : create_iv (build_int_cst_type (type, 0), PLUS_EXPR, build_int_cst (type, 1),
1459 : : NULL_TREE, loop, &gsi, bump_in_latch, &var_before, NULL);
1460 : :
1461 : 593 : rewrite_all_phi_nodes_with_iv (loop, var_before);
1462 : :
1463 : 1186 : stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
1464 : : /* Make the loop exit if the control condition is not satisfied. */
1465 : 593 : if (exit->flags & EDGE_TRUE_VALUE)
1466 : : {
1467 : 113 : edge te, fe;
1468 : :
1469 : 113 : extract_true_false_edges_from_block (exit->src, &te, &fe);
1470 : 113 : te->flags = EDGE_FALSE_VALUE;
1471 : 113 : fe->flags = EDGE_TRUE_VALUE;
1472 : : }
1473 : 593 : gimple_cond_set_code (stmt, LT_EXPR);
1474 : 593 : gimple_cond_set_lhs (stmt, var_before);
1475 : 593 : gimple_cond_set_rhs (stmt, *nit);
1476 : 593 : update_stmt (stmt);
1477 : :
1478 : 593 : return var_before;
1479 : : }
|