Branch data Line data Source code
1 : : /* High-level loop manipulation functions.
2 : : Copyright (C) 2004-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it
7 : : under the terms of the GNU General Public License as published by the
8 : : Free Software Foundation; either version 3, or (at your option) any
9 : : later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "tree.h"
25 : : #include "gimple.h"
26 : : #include "cfghooks.h"
27 : : #include "tree-pass.h" /* ??? for TODO_update_ssa but this isn't a pass. */
28 : : #include "ssa.h"
29 : : #include "gimple-pretty-print.h"
30 : : #include "fold-const.h"
31 : : #include "cfganal.h"
32 : : #include "gimplify.h"
33 : : #include "gimple-iterator.h"
34 : : #include "gimplify-me.h"
35 : : #include "tree-cfg.h"
36 : : #include "tree-ssa-loop-ivopts.h"
37 : : #include "tree-ssa-loop-manip.h"
38 : : #include "tree-ssa-loop-niter.h"
39 : : #include "tree-ssa-loop.h"
40 : : #include "tree-into-ssa.h"
41 : : #include "tree-ssa.h"
42 : : #include "cfgloop.h"
43 : : #include "tree-scalar-evolution.h"
44 : : #include "tree-inline.h"
45 : :
46 : : /* All bitmaps for rewriting into loop-closed SSA go on this obstack,
47 : : so that we can free them all at once. */
48 : : static bitmap_obstack loop_renamer_obstack;
49 : :
50 : : /* Creates an induction variable with value BASE (+/-) STEP * iteration in LOOP.
51 : : If INCR_OP is PLUS_EXPR, the induction variable is BASE + STEP * iteration.
52 : : If INCR_OP is MINUS_EXPR, the induction variable is BASE - STEP * iteration.
53 : : It is expected that neither BASE nor STEP are shared with other expressions
54 : : (unless the sharing rules allow this). Use VAR as a base var_decl for it
55 : : (if NULL, a new temporary will be created). The increment will occur at
56 : : INCR_POS (after it if AFTER is true, before it otherwise). INCR_POS and
57 : : AFTER can be computed using standard_iv_increment_position. The ssa versions
58 : : of the variable before and after increment will be stored in VAR_BEFORE and
59 : : VAR_AFTER (unless they are NULL). */
60 : :
61 : : void
62 : 847042 : create_iv (tree base, tree_code incr_op, tree step, tree var, class loop *loop,
63 : : gimple_stmt_iterator *incr_pos, bool after, tree *var_before,
64 : : tree *var_after)
65 : : {
66 : 847042 : gassign *stmt;
67 : 847042 : gphi *phi;
68 : 847042 : tree initial, step1;
69 : 847042 : gimple_seq stmts;
70 : 847042 : tree vb, va;
71 : 847042 : gcc_assert (incr_op == PLUS_EXPR || incr_op == MINUS_EXPR);
72 : 847042 : edge pe = loop_preheader_edge (loop);
73 : :
74 : 847042 : if (var != NULL_TREE)
75 : : {
76 : 515690 : vb = make_ssa_name (var);
77 : 515690 : va = make_ssa_name (var);
78 : : }
79 : : else
80 : : {
81 : 331352 : vb = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
82 : 331352 : va = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
83 : : }
84 : 847042 : if (var_before)
85 : 572428 : *var_before = vb;
86 : 847042 : if (var_after)
87 : 838275 : *var_after = va;
88 : :
89 : : /* For easier readability of the created code, produce MINUS_EXPRs
90 : : when suitable. */
91 : 847042 : if (TREE_CODE (step) == INTEGER_CST)
92 : : {
93 : 792231 : if (TYPE_UNSIGNED (TREE_TYPE (step)))
94 : : {
95 : 790740 : step1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
96 : 790740 : if (tree_int_cst_lt (step1, step))
97 : : {
98 : 287938 : incr_op = (incr_op == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR);
99 : : step = step1;
100 : : }
101 : : }
102 : : else
103 : : {
104 : 1491 : bool ovf;
105 : :
106 : 1491 : if (!tree_expr_nonnegative_warnv_p (step, &ovf)
107 : 1491 : && may_negate_without_overflow_p (step))
108 : : {
109 : 11 : incr_op = (incr_op == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR);
110 : 11 : step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
111 : : }
112 : : }
113 : : }
114 : 847042 : if (POINTER_TYPE_P (TREE_TYPE (base)))
115 : : {
116 : 108250 : if (TREE_CODE (base) == ADDR_EXPR)
117 : 41333 : mark_addressable (TREE_OPERAND (base, 0));
118 : 108250 : step = convert_to_ptrofftype (step);
119 : 108250 : if (incr_op == MINUS_EXPR)
120 : 2635 : step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
121 : : incr_op = POINTER_PLUS_EXPR;
122 : : }
123 : : /* Gimplify the step if necessary. We put the computations in front of the
124 : : loop (i.e. the step should be loop invariant). */
125 : 847042 : step = force_gimple_operand (step, &stmts, true, NULL_TREE);
126 : 847042 : if (stmts)
127 : 43391 : gsi_insert_seq_on_edge_immediate (pe, stmts);
128 : :
129 : 847042 : stmt = gimple_build_assign (va, incr_op, vb, step);
130 : : /* Prevent the increment from inheriting a bogus location if it is not put
131 : : immediately after a statement whose location is known. */
132 : 847042 : if (after)
133 : : {
134 : 9450 : gimple_stmt_iterator gsi = *incr_pos;
135 : 9450 : if (!gsi_end_p (gsi))
136 : 8062 : gsi_next_nondebug (&gsi);
137 : 9450 : if (gsi_end_p (gsi))
138 : : {
139 : 9450 : edge e = single_succ_edge (gsi_bb (*incr_pos));
140 : 9450 : gimple_set_location (stmt, e->goto_locus);
141 : : }
142 : 9450 : gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT);
143 : : }
144 : : else
145 : : {
146 : 837592 : gimple_stmt_iterator gsi = *incr_pos;
147 : 837592 : if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
148 : 0 : gsi_next_nondebug (&gsi);
149 : 837592 : if (!gsi_end_p (gsi))
150 : 837119 : gimple_set_location (stmt, gimple_location (gsi_stmt (gsi)));
151 : 837592 : gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT);
152 : : }
153 : :
154 : 847042 : initial = force_gimple_operand (base, &stmts, true, var);
155 : 847042 : if (stmts)
156 : 214168 : gsi_insert_seq_on_edge_immediate (pe, stmts);
157 : :
158 : 847042 : phi = create_phi_node (vb, loop->header);
159 : 847042 : add_phi_arg (phi, initial, loop_preheader_edge (loop), UNKNOWN_LOCATION);
160 : 847042 : add_phi_arg (phi, va, loop_latch_edge (loop), UNKNOWN_LOCATION);
161 : 847042 : }
162 : :
163 : : /* Return the innermost superloop LOOP of USE_LOOP that is a superloop of
164 : : both DEF_LOOP and USE_LOOP. */
165 : :
166 : : static inline class loop *
167 : 119582 : find_sibling_superloop (class loop *use_loop, class loop *def_loop)
168 : : {
169 : 119582 : unsigned ud = loop_depth (use_loop);
170 : 119582 : unsigned dd = loop_depth (def_loop);
171 : 119582 : gcc_assert (ud > 0 && dd > 0);
172 : 119582 : if (ud > dd)
173 : 12937 : use_loop = superloop_at_depth (use_loop, dd);
174 : 119582 : if (ud < dd)
175 : 22824 : def_loop = superloop_at_depth (def_loop, ud);
176 : 121780 : while (loop_outer (use_loop) != loop_outer (def_loop))
177 : : {
178 : 2198 : use_loop = loop_outer (use_loop);
179 : 2198 : def_loop = loop_outer (def_loop);
180 : 2198 : gcc_assert (use_loop && def_loop);
181 : : }
182 : 119582 : return use_loop;
183 : : }
184 : :
185 : : /* DEF_BB is a basic block containing a DEF that needs rewriting into
186 : : loop-closed SSA form. USE_BLOCKS is the set of basic blocks containing
187 : : uses of DEF that "escape" from the loop containing DEF_BB (i.e. blocks in
188 : : USE_BLOCKS are dominated by DEF_BB but not in the loop father of DEF_BB).
189 : : ALL_EXITS[I] is the set of all basic blocks that exit loop I.
190 : : DEF_LOOP_EXITS is a bitmap of loop exit blocks that exit the loop
191 : : containing DEF_BB or its outer loops.
192 : :
193 : : Compute the subset of loop exit destinations that exit the loop
194 : : containing DEF_BB or one of its loop fathers, in which DEF is live.
195 : : This set is returned in the bitmap LIVE_EXITS.
196 : :
197 : : Instead of computing the complete livein set of the def, we use the loop
198 : : nesting tree as a form of poor man's structure analysis. This greatly
199 : : speeds up the analysis, which is important because this function may be
200 : : called on all SSA names that need rewriting, one at a time. */
201 : :
202 : : static void
203 : 2944873 : compute_live_loop_exits (bitmap live_exits, bitmap use_blocks,
204 : : basic_block def_bb, bitmap def_loop_exits)
205 : : {
206 : 2944873 : unsigned i;
207 : 2944873 : bitmap_iterator bi;
208 : 2944873 : class loop *def_loop = def_bb->loop_father;
209 : 2944873 : unsigned def_loop_depth = loop_depth (def_loop);
210 : :
211 : : /* Normally the work list size is bounded by the number of basic
212 : : blocks in the largest loop. We don't know this number, but we
213 : : can be fairly sure that it will be relatively small. */
214 : 2944873 : auto_vec<basic_block, 8> worklist (MAX (8, n_basic_blocks_for_fn (cfun) / 128));
215 : :
216 : 7119305 : EXECUTE_IF_SET_IN_BITMAP (use_blocks, 0, i, bi)
217 : : {
218 : 4174432 : basic_block use_bb = BASIC_BLOCK_FOR_FN (cfun, i);
219 : 4174432 : class loop *use_loop = use_bb->loop_father;
220 : 4174432 : gcc_checking_assert (def_loop != use_loop
221 : : && ! flow_loop_nested_p (def_loop, use_loop));
222 : 4174432 : if (! flow_loop_nested_p (use_loop, def_loop))
223 : 99467 : use_bb = find_sibling_superloop (use_loop, def_loop)->header;
224 : 4174432 : if (bitmap_set_bit (live_exits, use_bb->index))
225 : 4144186 : worklist.safe_push (use_bb);
226 : : }
227 : :
228 : : /* Iterate until the worklist is empty. */
229 : 9520719 : while (! worklist.is_empty ())
230 : : {
231 : 6575846 : edge e;
232 : 6575846 : edge_iterator ei;
233 : :
234 : : /* Pull a block off the worklist. */
235 : 6575846 : basic_block bb = worklist.pop ();
236 : :
237 : : /* Make sure we have at least enough room in the work list
238 : : for all predecessors of this block. */
239 : 6575846 : worklist.reserve (EDGE_COUNT (bb->preds));
240 : :
241 : : /* For each predecessor block. */
242 : 14194372 : FOR_EACH_EDGE (e, ei, bb->preds)
243 : : {
244 : 7618526 : basic_block pred = e->src;
245 : 7618526 : class loop *pred_loop = pred->loop_father;
246 : 7618526 : unsigned pred_loop_depth = loop_depth (pred_loop);
247 : 7618526 : bool pred_visited;
248 : :
249 : : /* We should have met DEF_BB along the way. */
250 : 7618526 : gcc_assert (pred != ENTRY_BLOCK_PTR_FOR_FN (cfun));
251 : :
252 : 7618526 : if (pred_loop_depth >= def_loop_depth)
253 : : {
254 : 4477815 : if (pred_loop_depth > def_loop_depth)
255 : 122676 : pred_loop = superloop_at_depth (pred_loop, def_loop_depth);
256 : : /* If we've reached DEF_LOOP, our train ends here. */
257 : 4477815 : if (pred_loop == def_loop)
258 : 5186866 : continue;
259 : : }
260 : 3140711 : else if (! flow_loop_nested_p (pred_loop, def_loop))
261 : 20115 : pred = find_sibling_superloop (pred_loop, def_loop)->header;
262 : :
263 : : /* Add PRED to the LIVEIN set. PRED_VISITED is true if
264 : : we had already added PRED to LIVEIN before. */
265 : 3987133 : pred_visited = !bitmap_set_bit (live_exits, pred->index);
266 : :
267 : : /* If we have visited PRED before, don't add it to the worklist.
268 : : If BB dominates PRED, then we're probably looking at a loop.
269 : : We're only interested in looking up in the dominance tree
270 : : because DEF_BB dominates all the uses. */
271 : 3987133 : if (pred_visited || dominated_by_p (CDI_DOMINATORS, pred, bb))
272 : 1555473 : continue;
273 : :
274 : 2431660 : worklist.quick_push (pred);
275 : : }
276 : : }
277 : :
278 : 2944873 : bitmap_and_into (live_exits, def_loop_exits);
279 : 2944873 : }
280 : :
281 : : /* Add a loop-closing PHI for VAR in basic block EXIT. */
282 : :
283 : : static void
284 : 3573448 : add_exit_phi (basic_block exit, tree var)
285 : : {
286 : 3573448 : gphi *phi;
287 : 3573448 : edge e;
288 : 3573448 : edge_iterator ei;
289 : :
290 : : /* Check that at least one of the edges entering the EXIT block exits
291 : : the loop, or a superloop of that loop, that VAR is defined in. */
292 : 3573448 : if (flag_checking)
293 : : {
294 : 3573346 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
295 : 3573346 : basic_block def_bb = gimple_bb (def_stmt);
296 : 3582403 : FOR_EACH_EDGE (e, ei, exit->preds)
297 : : {
298 : 7164806 : class loop *aloop = find_common_loop (def_bb->loop_father,
299 : 3582403 : e->src->loop_father);
300 : 3582403 : if (!flow_bb_inside_loop_p (aloop, e->dest))
301 : : break;
302 : : }
303 : 3573346 : gcc_assert (e);
304 : : }
305 : :
306 : 3573448 : phi = create_phi_node (NULL_TREE, exit);
307 : 3573448 : create_new_def_for (var, phi, gimple_phi_result_ptr (phi));
308 : 7327158 : FOR_EACH_EDGE (e, ei, exit->preds)
309 : 3753710 : add_phi_arg (phi, var, e, UNKNOWN_LOCATION);
310 : :
311 : 3573448 : if (dump_file && (dump_flags & TDF_DETAILS))
312 : : {
313 : 1409 : fprintf (dump_file, ";; Created LCSSA PHI: ");
314 : 1409 : print_gimple_stmt (dump_file, phi, 0, dump_flags);
315 : : }
316 : 3573448 : }
317 : :
318 : : /* Add exit phis for VAR that is used in LIVEIN.
319 : : Exits of the loops are stored in LOOP_EXITS. Returns the number
320 : : of PHIs added for VAR. */
321 : :
322 : : static unsigned
323 : 2944873 : add_exit_phis_var (tree var, bitmap use_blocks, bitmap def_loop_exits)
324 : : {
325 : 2944873 : unsigned index;
326 : 2944873 : bitmap_iterator bi;
327 : 2944873 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
328 : :
329 : 2944873 : gcc_checking_assert (! bitmap_bit_p (use_blocks, def_bb->index));
330 : :
331 : 2944873 : auto_bitmap live_exits (&loop_renamer_obstack);
332 : 2944873 : compute_live_loop_exits (live_exits, use_blocks, def_bb, def_loop_exits);
333 : :
334 : 2944873 : unsigned cnt = 0;
335 : 6518321 : EXECUTE_IF_SET_IN_BITMAP (live_exits, 0, index, bi)
336 : : {
337 : 3573448 : add_exit_phi (BASIC_BLOCK_FOR_FN (cfun, index), var);
338 : 3573448 : cnt++;
339 : : }
340 : 2944873 : return cnt;
341 : 2944873 : }
342 : :
343 : : static int
344 : 44682429 : loop_name_cmp (const void *p1, const void *p2)
345 : : {
346 : 44682429 : auto l1 = (const std::pair<int, int> *)p1;
347 : 44682429 : auto l2 = (const std::pair<int, int> *)p2;
348 : 44682429 : if (l1->first < l2->first)
349 : : return -1;
350 : 27272426 : else if (l1->first > l2->first)
351 : 16355136 : return 1;
352 : : return 0;
353 : : }
354 : :
355 : : /* Add exit phis for the names marked in NAMES_TO_RENAME.
356 : : Exits of the loops are stored in EXITS. Sets of blocks where the ssa
357 : : names are used are stored in USE_BLOCKS. Returns whether any name
358 : : required multiple LC PHI nodes. */
359 : :
360 : : static bool
361 : 659771 : add_exit_phis (bitmap names_to_rename, bitmap *use_blocks)
362 : : {
363 : 659771 : unsigned i;
364 : 659771 : bitmap_iterator bi;
365 : 659771 : bool multiple_p = false;
366 : :
367 : : /* Sort names_to_rename after definition loop so we can avoid re-computing
368 : : def_loop_exits. */
369 : 659771 : auto_vec<std::pair<int, int> > names (bitmap_count_bits (names_to_rename));
370 : 3604644 : EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi)
371 : : {
372 : 2944873 : tree name = ssa_name (i);
373 : 2944873 : loop_p def_loop = gimple_bb (SSA_NAME_DEF_STMT (name))->loop_father;
374 : 2944873 : names.quick_push (std::make_pair (def_loop->num, i));
375 : : }
376 : 659771 : names.qsort (loop_name_cmp);
377 : :
378 : 659771 : auto_bitmap def_loop_exits (&loop_renamer_obstack);
379 : 659771 : loop_p last_def_loop = NULL;
380 : 4924186 : for (auto p : names)
381 : : {
382 : 2944873 : loop_p def_loop = get_loop (cfun, p.first);
383 : 2944873 : if (def_loop != last_def_loop)
384 : : {
385 : 1545622 : bitmap_clear (def_loop_exits);
386 : 1545622 : last_def_loop = def_loop;
387 : 3615405 : for (class loop *loop = def_loop; loop != current_loops->tree_root;
388 : 2069783 : loop = loop_outer (loop))
389 : 7903724 : for (auto exit = loop->exits->next; exit->e; exit = exit->next)
390 : 5833941 : bitmap_set_bit (def_loop_exits, exit->e->dest->index);
391 : : }
392 : 2944873 : if (add_exit_phis_var (ssa_name (p.second), use_blocks[p.second],
393 : : def_loop_exits) > 1)
394 : 341144 : multiple_p = true;
395 : : }
396 : :
397 : 659771 : return multiple_p;
398 : 659771 : }
399 : :
400 : : /* For USE in BB, if it is used outside of the loop it is defined in,
401 : : mark it for rewrite. Record basic block BB where it is used
402 : : to USE_BLOCKS. Record the ssa name index to NEED_PHIS bitmap.
403 : : Note that for USEs in phis, BB should be the src of the edge corresponding to
404 : : the use, rather than the bb containing the phi. */
405 : :
406 : : static void
407 : 143547467 : find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks,
408 : : bitmap need_phis)
409 : : {
410 : 143547467 : unsigned ver;
411 : 143547467 : basic_block def_bb;
412 : 143547467 : class loop *def_loop;
413 : :
414 : 143547467 : if (TREE_CODE (use) != SSA_NAME)
415 : : return;
416 : :
417 : 138549321 : ver = SSA_NAME_VERSION (use);
418 : 138549321 : def_bb = gimple_bb (SSA_NAME_DEF_STMT (use));
419 : 138549321 : if (!def_bb)
420 : : return;
421 : 128375627 : def_loop = def_bb->loop_father;
422 : :
423 : : /* If the definition is not inside a loop, it is not interesting. */
424 : 128375627 : if (!loop_outer (def_loop))
425 : : return;
426 : :
427 : : /* If the use is not outside of the loop it is defined in, it is not
428 : : interesting. */
429 : 68066792 : if (flow_bb_inside_loop_p (def_loop, bb))
430 : : return;
431 : :
432 : : /* If we're seeing VER for the first time, we still have to allocate
433 : : a bitmap for its uses. */
434 : 4757413 : if (bitmap_set_bit (need_phis, ver))
435 : 2944873 : use_blocks[ver] = BITMAP_ALLOC (&loop_renamer_obstack);
436 : 4757413 : bitmap_set_bit (use_blocks[ver], bb->index);
437 : : }
438 : :
439 : : /* For uses matching USE_FLAGS in STMT, mark names that are used outside of the
440 : : loop they are defined to rewrite. Record the set of blocks in which the ssa
441 : : names are used to USE_BLOCKS, and the ssa names themselves to NEED_PHIS. */
442 : :
443 : : static void
444 : 168470611 : find_uses_to_rename_stmt (gimple *stmt, bitmap *use_blocks, bitmap need_phis,
445 : : int use_flags)
446 : : {
447 : 168470611 : ssa_op_iter iter;
448 : 168470611 : tree var;
449 : 168470611 : basic_block bb = gimple_bb (stmt);
450 : :
451 : 168470611 : if (is_gimple_debug (stmt))
452 : 90152080 : return;
453 : :
454 : : /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows SSA_OP_VIRTUAL_USES
455 : : only. */
456 : 78318531 : if (use_flags == SSA_OP_VIRTUAL_USES)
457 : : {
458 : 78318531 : tree vuse = gimple_vuse (stmt);
459 : 0 : if (vuse != NULL_TREE)
460 : 0 : find_uses_to_rename_use (bb, gimple_vuse (stmt), use_blocks, need_phis);
461 : : }
462 : : else
463 : 188798940 : FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, use_flags)
464 : 110480409 : find_uses_to_rename_use (bb, var, use_blocks, need_phis);
465 : : }
466 : :
467 : : /* Marks names matching USE_FLAGS that are used in BB and outside of the loop
468 : : they are defined in for rewrite. Records the set of blocks in which the ssa
469 : : names are used to USE_BLOCKS. Record the SSA names that will
470 : : need exit PHIs in NEED_PHIS. */
471 : :
472 : : static void
473 : 27207874 : find_uses_to_rename_bb (basic_block bb, bitmap *use_blocks, bitmap need_phis,
474 : : int use_flags)
475 : : {
476 : 27207874 : edge e;
477 : 27207874 : edge_iterator ei;
478 : 27207874 : bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0;
479 : 27207874 : bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0;
480 : :
481 : 65981487 : FOR_EACH_EDGE (e, ei, bb->succs)
482 : 71840671 : for (gphi_iterator bsi = gsi_start_phis (e->dest); !gsi_end_p (bsi);
483 : 33067058 : gsi_next (&bsi))
484 : : {
485 : 33067058 : gphi *phi = bsi.phi ();
486 : 33067058 : bool virtual_p = virtual_operand_p (gimple_phi_result (phi));
487 : 33067058 : if ((virtual_p && do_virtuals)
488 : 19289598 : || (!virtual_p && do_nonvirtuals))
489 : 33067058 : find_uses_to_rename_use (bb, PHI_ARG_DEF_FROM_EDGE (phi, e),
490 : : use_blocks, need_phis);
491 : : }
492 : :
493 : 222886359 : for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
494 : 168470611 : gsi_next (&bsi))
495 : 168470611 : find_uses_to_rename_stmt (gsi_stmt (bsi), use_blocks, need_phis,
496 : : use_flags);
497 : 27207874 : }
498 : :
499 : : /* Marks names matching USE_FLAGS that are used outside of the loop they are
500 : : defined in for rewrite. Records the set of blocks in which the ssa names are
501 : : used to USE_BLOCKS. Record the SSA names that will need exit PHIs in
502 : : NEED_PHIS. If CHANGED_BBS is not NULL, scan only blocks in this set. */
503 : :
504 : : static void
505 : 945630 : find_uses_to_rename (bitmap changed_bbs, bitmap *use_blocks, bitmap need_phis,
506 : : int use_flags)
507 : : {
508 : 945630 : basic_block bb;
509 : 945630 : unsigned index;
510 : 945630 : bitmap_iterator bi;
511 : :
512 : 945630 : if (changed_bbs)
513 : 22162 : EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi)
514 : : {
515 : 16196 : bb = BASIC_BLOCK_FOR_FN (cfun, index);
516 : 16196 : if (bb)
517 : 16196 : find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
518 : : }
519 : : else
520 : 28131342 : FOR_EACH_BB_FN (bb, cfun)
521 : 27191678 : find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
522 : 945630 : }
523 : :
524 : : /* Rewrites the program into a loop closed ssa form -- i.e. inserts extra
525 : : phi nodes to ensure that no variable is used outside the loop it is
526 : : defined in.
527 : :
528 : : This strengthening of the basic ssa form has several advantages:
529 : :
530 : : 1) Updating it during unrolling/peeling/versioning is trivial, since
531 : : we do not need to care about the uses outside of the loop.
532 : : The same applies to virtual operands which are also rewritten into
533 : : loop closed SSA form. Note that virtual operands are always live
534 : : until function exit.
535 : : 2) The behavior of all uses of an induction variable is the same.
536 : : Without this, you need to distinguish the case when the variable
537 : : is used outside of the loop it is defined in, for example
538 : :
539 : : for (i = 0; i < 100; i++)
540 : : {
541 : : for (j = 0; j < 100; j++)
542 : : {
543 : : k = i + j;
544 : : use1 (k);
545 : : }
546 : : use2 (k);
547 : : }
548 : :
549 : : Looking from the outer loop with the normal SSA form, the first use of k
550 : : is not well-behaved, while the second one is an induction variable with
551 : : base 99 and step 1.
552 : :
553 : : If CHANGED_BBS is not NULL, we look for uses outside loops only in the
554 : : basic blocks in this set.
555 : :
556 : : USE_FLAGS allows us to specify whether we want virtual, non-virtual or
557 : : both variables rewritten.
558 : :
559 : : UPDATE_FLAG is used in the call to update_ssa. See
560 : : TODO_update_ssa* for documentation. */
561 : :
562 : : static void
563 : 4288957 : rewrite_into_loop_closed_ssa_1 (bitmap changed_bbs, unsigned update_flag,
564 : : int use_flags)
565 : : {
566 : 4288957 : bitmap *use_blocks;
567 : 4288957 : bitmap names_to_rename;
568 : :
569 : 4288957 : loops_state_set (LOOP_CLOSED_SSA);
570 : 4288957 : if (number_of_loops (cfun) <= 1)
571 : : return;
572 : :
573 : : /* If the pass has caused the SSA form to be out-of-date, update it
574 : : now. */
575 : 945630 : if (update_flag != 0)
576 : 945245 : update_ssa (update_flag);
577 : 385 : else if (flag_checking)
578 : 385 : verify_ssa (true, true);
579 : :
580 : 945630 : bitmap_obstack_initialize (&loop_renamer_obstack);
581 : :
582 : 945630 : names_to_rename = BITMAP_ALLOC (&loop_renamer_obstack);
583 : :
584 : : /* Uses of names to rename. We don't have to initialize this array,
585 : : because we know that we will only have entries for the SSA names
586 : : in NAMES_TO_RENAME. */
587 : 1891260 : use_blocks = XNEWVEC (bitmap, num_ssa_names);
588 : 945630 : find_uses_to_rename (changed_bbs, use_blocks, names_to_rename, use_flags);
589 : :
590 : 945630 : if (!bitmap_empty_p (names_to_rename))
591 : : {
592 : 659771 : bool release_recorded_exits_p = false;
593 : 659771 : if (!loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
594 : : {
595 : : /* Doing one scan over the whole function is cheaper than
596 : : traversing the loop tree and gathering BBs of each loop. */
597 : 0 : record_loop_exits ();
598 : 0 : release_recorded_exits_p = true;
599 : : }
600 : :
601 : : /* Add the PHI nodes on exits of the loops for the names we need to
602 : : rewrite. When no variable required multiple LC PHI nodes to be
603 : : inserted then we know that all uses outside of the loop are
604 : : dominated by the single LC SSA definition and no further PHI
605 : : node insertions are required. */
606 : 659771 : bool need_phis_p = add_exit_phis (names_to_rename, use_blocks);
607 : :
608 : 659771 : if (release_recorded_exits_p)
609 : 0 : release_recorded_exits (cfun);
610 : :
611 : : /* Fix up all the names found to be used outside their original
612 : : loops. */
613 : 1181969 : update_ssa (need_phis_p ? TODO_update_ssa : TODO_update_ssa_no_phi);
614 : : }
615 : :
616 : 945630 : bitmap_obstack_release (&loop_renamer_obstack);
617 : 945630 : free (use_blocks);
618 : : }
619 : :
620 : : /* Rewrites the defs and uses into a loop closed ssa form.
621 : : If CHANGED_BBS is not NULL, we look for uses outside loops only in the basic
622 : : blocks in this set. UPDATE_FLAG is used in the call to update_ssa. See
623 : : TODO_update_ssa* for documentation. */
624 : :
625 : : void
626 : 4288957 : rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag)
627 : : {
628 : 4288957 : rewrite_into_loop_closed_ssa_1 (changed_bbs, update_flag, SSA_OP_ALL_USES);
629 : 4288957 : }
630 : :
631 : : /* Check invariants of the loop closed ssa form for the def in DEF_BB. */
632 : :
633 : : static void
634 : 150086893 : check_loop_closed_ssa_def (basic_block def_bb, tree def)
635 : : {
636 : 150086893 : use_operand_p use_p;
637 : 150086893 : imm_use_iterator iterator;
638 : 435455655 : FOR_EACH_IMM_USE_FAST (use_p, iterator, def)
639 : : {
640 : 285368762 : if (is_gimple_debug (USE_STMT (use_p)))
641 : 32794473 : continue;
642 : :
643 : 252574289 : basic_block use_bb = gimple_bb (USE_STMT (use_p));
644 : 252574289 : if (is_a <gphi *> (USE_STMT (use_p)))
645 : 67953591 : use_bb = EDGE_PRED (use_bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
646 : :
647 : 252574289 : gcc_assert (flow_bb_inside_loop_p (def_bb->loop_father, use_bb));
648 : : }
649 : 150086893 : }
650 : :
651 : : /* Checks invariants of loop closed ssa form in BB. */
652 : :
653 : : static void
654 : 43606433 : check_loop_closed_ssa_bb (basic_block bb)
655 : : {
656 : 79221176 : for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
657 : 35614743 : gsi_next (&bsi))
658 : : {
659 : 35614743 : gphi *phi = bsi.phi ();
660 : :
661 : 35614743 : check_loop_closed_ssa_def (bb, PHI_RESULT (phi));
662 : : }
663 : :
664 : 177249492 : for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); !gsi_end_p (bsi);
665 : 133643059 : gsi_next_nondebug (&bsi))
666 : : {
667 : 133643059 : ssa_op_iter iter;
668 : 133643059 : tree var;
669 : 133643059 : gimple *stmt = gsi_stmt (bsi);
670 : :
671 : 248115209 : FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_DEFS)
672 : 114472150 : check_loop_closed_ssa_def (bb, var);
673 : : }
674 : 43606433 : }
675 : :
676 : : /* Checks that invariants of the loop closed ssa form are preserved.
677 : : Call verify_ssa when VERIFY_SSA_P is true. Note all loops are checked
678 : : if LOOP is NULL, otherwise, only LOOP is checked. */
679 : :
680 : : DEBUG_FUNCTION void
681 : 3358174 : verify_loop_closed_ssa (bool verify_ssa_p, class loop *loop)
682 : : {
683 : 6716348 : if (number_of_loops (cfun) <= 1)
684 : : return;
685 : :
686 : 3358174 : timevar_push (TV_VERIFY_LOOP_CLOSED);
687 : :
688 : 3358174 : if (loop == NULL)
689 : : {
690 : 3356811 : basic_block bb;
691 : :
692 : 3356811 : if (verify_ssa_p)
693 : 27268 : verify_ssa (false, true);
694 : :
695 : 112396624 : FOR_EACH_BB_FN (bb, cfun)
696 : 109039813 : if (bb->loop_father && bb->loop_father->num > 0)
697 : 43599431 : check_loop_closed_ssa_bb (bb);
698 : : }
699 : : else
700 : : {
701 : 1363 : basic_block *bbs = get_loop_body (loop);
702 : :
703 : : /* We do not have loop-local SSA verification so just
704 : : check there's no update queued. */
705 : 1363 : if (verify_ssa_p)
706 : 1363 : gcc_assert (!need_ssa_update_p (cfun));
707 : :
708 : 8365 : for (unsigned i = 0; i < loop->num_nodes; ++i)
709 : 7002 : check_loop_closed_ssa_bb (bbs[i]);
710 : :
711 : 1363 : free (bbs);
712 : : }
713 : :
714 : 3358174 : timevar_pop (TV_VERIFY_LOOP_CLOSED);
715 : : }
716 : :
717 : : /* Split loop exit edge EXIT. The things are a bit complicated by a need to
718 : : preserve the loop closed ssa form. If COPY_CONSTANTS_P is true then
719 : : forwarder PHIs are also created for constant arguments.
720 : : The newly created block is returned. */
721 : :
722 : : basic_block
723 : 48826 : split_loop_exit_edge (edge exit, bool copy_constants_p)
724 : : {
725 : 48826 : basic_block dest = exit->dest;
726 : 48826 : basic_block bb = split_edge (exit);
727 : 48826 : gphi *phi, *new_phi;
728 : 48826 : tree new_name, name;
729 : 48826 : use_operand_p op_p;
730 : 48826 : gphi_iterator psi;
731 : 48826 : location_t locus;
732 : :
733 : 116901 : for (psi = gsi_start_phis (dest); !gsi_end_p (psi); gsi_next (&psi))
734 : : {
735 : 68075 : phi = psi.phi ();
736 : 68075 : op_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (bb));
737 : 68075 : locus = gimple_phi_arg_location_from_edge (phi, single_succ_edge (bb));
738 : :
739 : 68075 : name = USE_FROM_PTR (op_p);
740 : :
741 : : /* If the argument of the PHI node is a constant, we do not need
742 : : to keep it inside loop. */
743 : 68075 : if (TREE_CODE (name) != SSA_NAME
744 : 2900 : && !copy_constants_p)
745 : 2900 : continue;
746 : :
747 : : /* Otherwise create an auxiliary phi node that will copy the value
748 : : of the SSA name out of the loop. */
749 : 65175 : new_name = duplicate_ssa_name (PHI_RESULT (phi), NULL);
750 : 65175 : new_phi = create_phi_node (new_name, bb);
751 : 65175 : add_phi_arg (new_phi, name, exit, locus);
752 : 65175 : SET_USE (op_p, new_name);
753 : : }
754 : :
755 : 48826 : return bb;
756 : : }
757 : :
758 : : /* Returns the basic block in that statements should be emitted for induction
759 : : variables incremented at the end of the LOOP. */
760 : :
761 : : basic_block
762 : 14107119 : ip_end_pos (class loop *loop)
763 : : {
764 : 14107119 : return loop->latch;
765 : : }
766 : :
767 : : /* Returns the basic block in that statements should be emitted for induction
768 : : variables incremented just before exit condition of a LOOP. */
769 : :
770 : : basic_block
771 : 44614991 : ip_normal_pos (class loop *loop)
772 : : {
773 : 44614991 : basic_block bb;
774 : 44614991 : edge exit;
775 : :
776 : 89229982 : if (!single_pred_p (loop->latch))
777 : : return NULL;
778 : :
779 : 44497117 : bb = single_pred (loop->latch);
780 : 88994234 : if (!safe_is_a <gcond *> (*gsi_last_bb (bb)))
781 : 12756 : return NULL;
782 : :
783 : 44484361 : exit = EDGE_SUCC (bb, 0);
784 : 44484361 : if (exit->dest == loop->latch)
785 : 34445040 : exit = EDGE_SUCC (bb, 1);
786 : :
787 : 44484361 : if (flow_bb_inside_loop_p (loop, exit->dest))
788 : : return NULL;
789 : :
790 : : return bb;
791 : : }
792 : :
793 : : /* Stores the standard position for induction variable increment in LOOP
794 : : (just before the exit condition if it is available and latch block is empty,
795 : : end of the latch block otherwise) to BSI. INSERT_AFTER is set to true if
796 : : the increment should be inserted after *BSI. */
797 : :
798 : : void
799 : 108250 : standard_iv_increment_position (class loop *loop, gimple_stmt_iterator *bsi,
800 : : bool *insert_after)
801 : : {
802 : 108250 : basic_block bb = ip_normal_pos (loop), latch = ip_end_pos (loop);
803 : 108250 : gimple *last = last_nondebug_stmt (latch);
804 : :
805 : 108250 : if (!bb
806 : 108250 : || (last && gimple_code (last) != GIMPLE_LABEL))
807 : : {
808 : 2 : *bsi = gsi_last_bb (latch);
809 : 2 : *insert_after = true;
810 : : }
811 : : else
812 : : {
813 : 108248 : *bsi = gsi_last_bb (bb);
814 : 108248 : *insert_after = false;
815 : : }
816 : 108250 : }
817 : :
818 : : /* Copies phi node arguments for duplicated blocks. The index of the first
819 : : duplicated block is FIRST_NEW_BLOCK. */
820 : :
821 : : static void
822 : 119198 : copy_phi_node_args (unsigned first_new_block)
823 : : {
824 : 119198 : unsigned i;
825 : :
826 : 1006057 : for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
827 : 886859 : BASIC_BLOCK_FOR_FN (cfun, i)->flags |= BB_DUPLICATED;
828 : :
829 : 1006057 : for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
830 : 886859 : add_phi_args_after_copy_bb (BASIC_BLOCK_FOR_FN (cfun, i));
831 : :
832 : 1006057 : for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
833 : 886859 : BASIC_BLOCK_FOR_FN (cfun, i)->flags &= ~BB_DUPLICATED;
834 : 119198 : }
835 : :
836 : :
837 : : /* The same as cfgloopmanip.cc:duplicate_loop_body_to_header_edge, but also
838 : : updates the PHI nodes at start of the copied region. In order to
839 : : achieve this, only loops whose exits all lead to the same location
840 : : are handled.
841 : :
842 : : Notice that we do not completely update the SSA web after
843 : : duplication. The caller is responsible for calling update_ssa
844 : : after the loop has been duplicated. */
845 : :
846 : : bool
847 : 119216 : gimple_duplicate_loop_body_to_header_edge (class loop *loop, edge e,
848 : : unsigned int ndupl,
849 : : sbitmap wont_exit, edge orig,
850 : : vec<edge> *to_remove, int flags)
851 : : {
852 : 119216 : unsigned first_new_block;
853 : :
854 : 119216 : if (!loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
855 : : return false;
856 : 119216 : if (!loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
857 : : return false;
858 : :
859 : 119216 : first_new_block = last_basic_block_for_fn (cfun);
860 : 119216 : if (!duplicate_loop_body_to_header_edge (loop, e, ndupl, wont_exit, orig,
861 : : to_remove, flags))
862 : : return false;
863 : :
864 : : /* Readd the removed phi args for e. */
865 : 119198 : flush_pending_stmts (e);
866 : :
867 : : /* Copy the phi node arguments. */
868 : 119198 : copy_phi_node_args (first_new_block);
869 : :
870 : 119198 : scev_reset ();
871 : :
872 : 119198 : return true;
873 : : }
874 : :
875 : : /* Returns true if we can unroll LOOP FACTOR times. Number
876 : : of iterations of the loop is returned in NITER. */
877 : :
878 : : bool
879 : 735 : can_unroll_loop_p (class loop *loop, unsigned factor,
880 : : class tree_niter_desc *niter)
881 : : {
882 : 735 : edge exit;
883 : :
884 : : /* Check whether unrolling is possible. We only want to unroll loops
885 : : for that we are able to determine number of iterations. We also
886 : : want to split the extra iterations of the loop from its end,
887 : : therefore we require that the loop has precisely one
888 : : exit. */
889 : :
890 : 735 : exit = single_dom_exit (loop);
891 : 735 : if (!exit)
892 : : return false;
893 : :
894 : 716 : if (!number_of_iterations_exit (loop, exit, niter, false)
895 : 715 : || niter->cmp == ERROR_MARK
896 : : /* Scalar evolutions analysis might have copy propagated
897 : : the abnormal ssa names into these expressions, hence
898 : : emitting the computations based on them during loop
899 : : unrolling might create overlapping life ranges for
900 : : them, and failures in out-of-ssa. */
901 : 713 : || contains_abnormal_ssa_name_p (niter->may_be_zero)
902 : 713 : || contains_abnormal_ssa_name_p (niter->control.base)
903 : 713 : || contains_abnormal_ssa_name_p (niter->control.step)
904 : 1429 : || contains_abnormal_ssa_name_p (niter->bound))
905 : 3 : return false;
906 : :
907 : : /* And of course, we must be able to duplicate the loop. */
908 : 713 : if (!can_duplicate_loop_p (loop))
909 : : return false;
910 : :
911 : : /* The final loop should be small enough. */
912 : 713 : if (tree_num_loop_insns (loop, &eni_size_weights) * factor
913 : 713 : > (unsigned) param_max_unrolled_insns)
914 : : return false;
915 : :
916 : : return true;
917 : : }
918 : :
919 : : /* Determines the conditions that control execution of LOOP unrolled FACTOR
920 : : times. DESC is number of iterations of LOOP. ENTER_COND is set to
921 : : condition that must be true if the main loop can be entered.
922 : : If the loop does not always iterate an exact multiple of FACTOR times,
923 : : EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
924 : : how the exit from the unrolled loop should be controlled. Otherwise,
925 : : the trees are set to null and EXIT_CMP is set to ERROR_MARK. */
926 : :
927 : : static void
928 : 693 : determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
929 : : unsigned factor, tree *enter_cond,
930 : : tree *exit_base, tree *exit_step,
931 : : enum tree_code *exit_cmp, tree *exit_bound)
932 : : {
933 : 693 : gimple_seq stmts;
934 : 693 : tree base = desc->control.base;
935 : 693 : tree step = desc->control.step;
936 : 693 : tree bound = desc->bound;
937 : 693 : tree type = TREE_TYPE (step);
938 : 693 : tree bigstep, delta;
939 : 693 : tree min = lower_bound_in_type (type, type);
940 : 693 : tree max = upper_bound_in_type (type, type);
941 : 693 : enum tree_code cmp = desc->cmp;
942 : 693 : tree cond = boolean_true_node, assum;
943 : :
944 : : /* For pointers, do the arithmetics in the type of step. */
945 : 693 : base = fold_convert (type, base);
946 : 693 : bound = fold_convert (type, bound);
947 : :
948 : 693 : *enter_cond = boolean_false_node;
949 : 693 : *exit_base = NULL_TREE;
950 : 693 : *exit_step = NULL_TREE;
951 : 693 : *exit_cmp = ERROR_MARK;
952 : 693 : *exit_bound = NULL_TREE;
953 : 693 : gcc_assert (cmp != ERROR_MARK);
954 : :
955 : : /* We only need to be correct when we answer question
956 : : "Do at least FACTOR more iterations remain?" in the unrolled loop.
957 : : Thus, transforming BASE + STEP * i <> BOUND to
958 : : BASE + STEP * i < BOUND is ok. */
959 : 693 : if (cmp == NE_EXPR)
960 : : {
961 : 71 : if (tree_int_cst_sign_bit (step))
962 : : cmp = GT_EXPR;
963 : : else
964 : 657 : cmp = LT_EXPR;
965 : : }
966 : 622 : else if (cmp == LT_EXPR)
967 : : {
968 : 622 : gcc_assert (!tree_int_cst_sign_bit (step));
969 : : }
970 : 0 : else if (cmp == GT_EXPR)
971 : : {
972 : 0 : gcc_assert (tree_int_cst_sign_bit (step));
973 : : }
974 : : else
975 : 0 : gcc_unreachable ();
976 : :
977 : : /* The main body of the loop may be entered iff:
978 : :
979 : : 1) desc->may_be_zero is false.
980 : : 2) it is possible to check that there are at least FACTOR iterations
981 : : of the loop, i.e., BOUND - step * FACTOR does not overflow.
982 : : 3) # of iterations is at least FACTOR */
983 : :
984 : 693 : if (!integer_zerop (desc->may_be_zero))
985 : 6 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
986 : : invert_truthvalue (desc->may_be_zero),
987 : : cond);
988 : :
989 : 693 : bigstep = fold_build2 (MULT_EXPR, type, step,
990 : : build_int_cst_type (type, factor));
991 : 693 : delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
992 : 693 : if (cmp == LT_EXPR)
993 : 657 : assum = fold_build2 (GE_EXPR, boolean_type_node,
994 : : bound,
995 : : fold_build2 (PLUS_EXPR, type, min, delta));
996 : : else
997 : 36 : assum = fold_build2 (LE_EXPR, boolean_type_node,
998 : : bound,
999 : : fold_build2 (PLUS_EXPR, type, max, delta));
1000 : 693 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
1001 : :
1002 : 693 : bound = fold_build2 (MINUS_EXPR, type, bound, delta);
1003 : 693 : assum = fold_build2 (cmp, boolean_type_node, base, bound);
1004 : 693 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
1005 : :
1006 : 693 : if (integer_nonzerop (cond)
1007 : 693 : && integer_zerop (desc->may_be_zero))
1008 : : {
1009 : : /* Convert the latch count to an iteration count. */
1010 : 70 : tree niter = fold_build2 (PLUS_EXPR, type, desc->niter,
1011 : : build_one_cst (type));
1012 : 70 : if (multiple_of_p (type, niter, build_int_cst (type, factor)))
1013 : 23 : return;
1014 : : }
1015 : :
1016 : 670 : cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
1017 : 670 : if (stmts)
1018 : 21 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1019 : : /* cond now may be a gimple comparison, which would be OK, but also any
1020 : : other gimple rhs (say a && b). In this case we need to force it to
1021 : : operand. */
1022 : 670 : if (!is_gimple_condexpr_for_cond (cond))
1023 : : {
1024 : 18 : cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
1025 : 18 : if (stmts)
1026 : 18 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1027 : : }
1028 : 670 : *enter_cond = cond;
1029 : :
1030 : 670 : base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
1031 : 670 : if (stmts)
1032 : 49 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1033 : 670 : bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
1034 : 670 : if (stmts)
1035 : 584 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1036 : :
1037 : 670 : *exit_base = base;
1038 : 670 : *exit_step = bigstep;
1039 : 670 : *exit_cmp = cmp;
1040 : 670 : *exit_bound = bound;
1041 : : }
1042 : :
1043 : : /* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge
1044 : : whose source block dominates the latch. DESC describes the number of
1045 : : iterations of LOOP.
1046 : :
1047 : : If N is number of iterations of the loop and MAY_BE_ZERO is the condition
1048 : : under that loop exits in the first iteration even if N != 0,
1049 : :
1050 : : while (1)
1051 : : {
1052 : : x = phi (init, next);
1053 : :
1054 : : pre;
1055 : : if (st)
1056 : : break;
1057 : : post;
1058 : : }
1059 : :
1060 : : becomes (with possibly the exit conditions formulated a bit differently,
1061 : : avoiding the need to create a new iv):
1062 : :
1063 : : if (MAY_BE_ZERO || N < FACTOR)
1064 : : goto rest;
1065 : :
1066 : : do
1067 : : {
1068 : : x = phi (init, next);
1069 : :
1070 : : pre;
1071 : : post;
1072 : : pre;
1073 : : post;
1074 : : ...
1075 : : pre;
1076 : : post;
1077 : : N -= FACTOR;
1078 : :
1079 : : } while (N >= FACTOR);
1080 : :
1081 : : rest:
1082 : : init' = phi (init, x);
1083 : :
1084 : : while (1)
1085 : : {
1086 : : x = phi (init', next);
1087 : :
1088 : : pre;
1089 : : if (st)
1090 : : break;
1091 : : post;
1092 : : }
1093 : :
1094 : : Before the loop is unrolled, TRANSFORM is called for it (only for the
1095 : : unrolled loop, but not for its versioned copy). DATA is passed to
1096 : : TRANSFORM. */
1097 : :
1098 : : /* Probability in % that the unrolled loop is entered. Just a guess. */
1099 : : #define PROB_UNROLLED_LOOP_ENTERED 90
1100 : :
1101 : : void
1102 : 693 : tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
1103 : : class tree_niter_desc *desc,
1104 : : transform_callback transform,
1105 : : void *data)
1106 : : {
1107 : 693 : unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
1108 : :
1109 : 693 : enum tree_code exit_cmp;
1110 : 693 : tree enter_main_cond, exit_base, exit_step, exit_bound;
1111 : 693 : bool flat = maybe_flat_loop_profile (loop);
1112 : 693 : determine_exit_conditions (loop, desc, factor,
1113 : : &enter_main_cond, &exit_base, &exit_step,
1114 : : &exit_cmp, &exit_bound);
1115 : 693 : bool single_loop_p = !exit_base;
1116 : :
1117 : 693 : gcond *exit_if = nullptr;
1118 : 693 : class loop *new_loop = nullptr;
1119 : 693 : edge new_exit;
1120 : 693 : if (!single_loop_p)
1121 : : {
1122 : 670 : profile_count entry_count = loop_preheader_edge (loop)->src->count;
1123 : : /* Let us assume that the unrolled loop is quite likely to be entered. */
1124 : 670 : profile_probability prob_entry;
1125 : 670 : if (integer_nonzerop (enter_main_cond))
1126 : 47 : prob_entry = profile_probability::always ();
1127 : : else
1128 : 623 : prob_entry = profile_probability::guessed_always ()
1129 : 623 : .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
1130 : :
1131 : :
1132 : : /* The values for scales should keep profile consistent, and somewhat
1133 : : close to correct. */
1134 : 670 : new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
1135 : : prob_entry.invert (),
1136 : : prob_entry,
1137 : : /* We will later redirect exit from vectorized
1138 : : loop to new_loop. */
1139 : : profile_probability::always (),
1140 : : true);
1141 : 670 : gcc_assert (new_loop != NULL);
1142 : 670 : update_ssa (TODO_update_ssa_no_phi);
1143 : :
1144 : : /* Prepare the cfg and update the phi nodes. Move the loop exit to the
1145 : : loop latch (and make its condition dummy, for the moment). */
1146 : 670 : basic_block rest = loop_preheader_edge (new_loop)->src;
1147 : 670 : edge precond_edge = single_pred_edge (rest);
1148 : 670 : split_edge (loop_latch_edge (loop));
1149 : 670 : basic_block exit_bb = single_pred (loop->latch);
1150 : 670 : edge exit = single_dom_exit (loop);
1151 : :
1152 : : /* Since the exit edge will be removed, the frequency of all the blocks
1153 : : in the loop that are dominated by it must be scaled. */
1154 : 670 : if (exit->probability.initialized_p ())
1155 : 670 : scale_dominated_blocks_in_loop (loop, exit->src,
1156 : : /* We are scaling up here so
1157 : : probability does not fit. */
1158 : 670 : exit->src->count,
1159 : 1340 : exit->src->count - exit->count ());
1160 : :
1161 : 670 : gimple_stmt_iterator bsi = gsi_last_bb (exit_bb);
1162 : 670 : exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
1163 : : integer_zero_node,
1164 : : NULL_TREE, NULL_TREE);
1165 : :
1166 : 670 : gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT);
1167 : 670 : new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
1168 : 670 : rescan_loop_exit (new_exit, true, false);
1169 : :
1170 : : /* Set the probability of new exit to the same of the old one. Fix
1171 : : the count of the latch block. */
1172 : 670 : new_exit->probability = exit->probability;
1173 : 670 : edge new_nonexit = single_pred_edge (loop->latch);
1174 : 670 : new_nonexit->probability = exit->probability.invert ();
1175 : 670 : new_nonexit->flags = EDGE_TRUE_VALUE;
1176 : 670 : set_edge_probability_and_rescale_others
1177 : 670 : (exit, profile_probability::never ());
1178 : 670 : loop->latch->count = new_nonexit->count ();
1179 : :
1180 : 670 : edge old_entry = loop_preheader_edge (loop);
1181 : 670 : edge new_entry = loop_preheader_edge (new_loop);
1182 : 670 : edge old_latch = loop_latch_edge (loop);
1183 : 670 : for (gphi_iterator psi_old_loop = gsi_start_phis (loop->header),
1184 : 670 : psi_new_loop = gsi_start_phis (new_loop->header);
1185 : 2106 : !gsi_end_p (psi_old_loop);
1186 : 1436 : gsi_next (&psi_old_loop), gsi_next (&psi_new_loop))
1187 : : {
1188 : 1436 : gphi *phi_old_loop = psi_old_loop.phi ();
1189 : 1436 : gphi *phi_new_loop = psi_new_loop.phi ();
1190 : :
1191 : 1436 : tree init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
1192 : 1436 : use_operand_p op
1193 : 1436 : = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
1194 : 1436 : gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
1195 : 1436 : tree next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
1196 : :
1197 : : /* Prefer using original variable as a base for the new ssa name.
1198 : : This is necessary for virtual ops, and useful in order to avoid
1199 : : losing debug info for real ops. */
1200 : 1436 : tree new_init;
1201 : 1436 : if (TREE_CODE (next) == SSA_NAME
1202 : 2872 : && useless_type_conversion_p (TREE_TYPE (next),
1203 : 1436 : TREE_TYPE (init)))
1204 : 1436 : new_init = copy_ssa_name (next);
1205 : 0 : else if (TREE_CODE (init) == SSA_NAME
1206 : 0 : && useless_type_conversion_p (TREE_TYPE (init),
1207 : 0 : TREE_TYPE (next)))
1208 : 0 : new_init = copy_ssa_name (init);
1209 : 0 : else if (useless_type_conversion_p (TREE_TYPE (next),
1210 : 0 : TREE_TYPE (init)))
1211 : 0 : new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
1212 : : "unrinittmp");
1213 : : else
1214 : 0 : new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
1215 : : "unrinittmp");
1216 : :
1217 : 1436 : gphi *phi_rest = create_phi_node (new_init, rest);
1218 : 1436 : add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION);
1219 : 1436 : add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION);
1220 : 1436 : SET_USE (op, new_init);
1221 : : }
1222 : :
1223 : 670 : remove_path (exit);
1224 : : /* We will later redirect exit from vectorized loop to new_loop. */
1225 : 670 : loop_preheader_edge (new_loop)->src->count = entry_count;
1226 : :
1227 : : /* The epilog loop latch executes at most factor - 1 times.
1228 : : Since the epilog is entered unconditionally it will need to handle
1229 : : up to factor executions of its body. */
1230 : 670 : new_loop->any_upper_bound = true;
1231 : 670 : new_loop->nb_iterations_upper_bound = factor - 1;
1232 : : /* We do not really know estimate on number of iterations, since we do not
1233 : : track any estimates modulo unroll factor.
1234 : : Drop estimate from loop_info and scale loop profile.
1235 : : It may be more realistic to scale loop profile to factor / 2 - 1,
1236 : : but vectorizer also uses factor - 1. */
1237 : 670 : new_loop->any_estimate = false;
1238 : 670 : scale_loop_profile (new_loop, profile_probability::always (), factor - 1);
1239 : : }
1240 : : else
1241 : 23 : new_exit = single_dom_exit (loop);
1242 : :
1243 : : /* Transform the loop. */
1244 : 693 : if (transform)
1245 : 591 : (*transform) (loop, data);
1246 : :
1247 : : /* Unroll the loop and remove the exits in all iterations except for the
1248 : : last one. */
1249 : 693 : auto_sbitmap wont_exit (factor);
1250 : 693 : bitmap_ones (wont_exit);
1251 : 693 : bitmap_clear_bit (wont_exit, factor - 1);
1252 : :
1253 : 693 : auto_vec<edge> to_remove;
1254 : 693 : bool ok
1255 : : = gimple_duplicate_loop_body_to_header_edge
1256 : 755 : (loop, loop_latch_edge (loop), factor - 1, wont_exit,
1257 : : new_exit, &to_remove,
1258 : : DLTHE_FLAG_UPDATE_FREQ | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
1259 : 693 : gcc_assert (ok);
1260 : :
1261 : 3023 : for (edge e : to_remove)
1262 : : {
1263 : 944 : ok = remove_path (e);
1264 : 944 : gcc_assert (ok);
1265 : : }
1266 : 693 : update_ssa (TODO_update_ssa);
1267 : :
1268 : 693 : new_exit = single_dom_exit (loop);
1269 : : /* gimple_duplicate_loop_body_to_header_edge depending on
1270 : : DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
1271 : : or scales it down accordingly.
1272 : : However exit edge probability is kept as original. Fix it if needed
1273 : : and compensate. */
1274 : 693 : update_loop_exit_probability_scale_dom_bbs (loop, new_exit);
1275 : 693 : if (!single_loop_p)
1276 : : {
1277 : : /* Finally create the new counter for number of iterations and add
1278 : : the new exit instruction. */
1279 : 670 : tree ctr_before, ctr_after;
1280 : 670 : gimple_stmt_iterator bsi = gsi_last_nondebug_bb (new_exit->src);
1281 : 670 : exit_if = as_a <gcond *> (gsi_stmt (bsi));
1282 : 670 : create_iv (exit_base, PLUS_EXPR, exit_step, NULL_TREE, loop,
1283 : : &bsi, false, &ctr_before, &ctr_after);
1284 : 670 : gimple_cond_set_code (exit_if, exit_cmp);
1285 : 670 : gimple_cond_set_lhs (exit_if, ctr_after);
1286 : 670 : gimple_cond_set_rhs (exit_if, exit_bound);
1287 : 670 : update_stmt (exit_if);
1288 : : }
1289 : 693 : if (loop->any_upper_bound)
1290 : 1366 : loop->nb_iterations_upper_bound = wi::udiv_floor
1291 : 683 : (loop->nb_iterations_upper_bound + 1, factor) - 1;
1292 : 693 : if (loop->any_likely_upper_bound)
1293 : 1366 : loop->nb_iterations_likely_upper_bound = wi::udiv_floor
1294 : 683 : (loop->nb_iterations_likely_upper_bound + 1, factor) - 1;
1295 : 693 : if (loop->any_estimate)
1296 : 142 : loop->nb_iterations_estimate = wi::udiv_floor
1297 : 71 : (loop->nb_iterations_estimate + 1, factor) - 1;
1298 : :
1299 : 693 : checking_verify_flow_info ();
1300 : 693 : checking_verify_loop_structure ();
1301 : 693 : checking_verify_loop_closed_ssa (true, loop);
1302 : 693 : if (new_loop)
1303 : 1363 : checking_verify_loop_closed_ssa (true, new_loop);
1304 : 693 : }
1305 : :
1306 : : /* Wrapper over tree_transform_and_unroll_loop for case we do not
1307 : : want to transform the loop before unrolling. The meaning
1308 : : of the arguments is the same as for tree_transform_and_unroll_loop. */
1309 : :
1310 : : void
1311 : 102 : tree_unroll_loop (class loop *loop, unsigned factor,
1312 : : class tree_niter_desc *desc)
1313 : : {
1314 : 102 : tree_transform_and_unroll_loop (loop, factor, desc, NULL, NULL);
1315 : 102 : }
1316 : :
1317 : : /* Rewrite the phi node at position PSI in function of the main
1318 : : induction variable MAIN_IV and insert the generated code at GSI. */
1319 : :
1320 : : static void
1321 : 2050 : rewrite_phi_with_iv (loop_p loop,
1322 : : gphi_iterator *psi,
1323 : : gimple_stmt_iterator *gsi,
1324 : : tree main_iv)
1325 : : {
1326 : 2050 : affine_iv iv;
1327 : 2050 : gassign *stmt;
1328 : 2050 : gphi *phi = psi->phi ();
1329 : 2050 : tree atype, mtype, val, res = PHI_RESULT (phi);
1330 : :
1331 : 4100 : if (virtual_operand_p (res) || res == main_iv)
1332 : : {
1333 : 1166 : gsi_next (psi);
1334 : 1257 : return;
1335 : : }
1336 : :
1337 : 884 : if (!simple_iv (loop, loop, res, &iv, true))
1338 : : {
1339 : 91 : gsi_next (psi);
1340 : 91 : return;
1341 : : }
1342 : :
1343 : 793 : remove_phi_node (psi, false);
1344 : :
1345 : 793 : atype = TREE_TYPE (res);
1346 : 793 : mtype = POINTER_TYPE_P (atype) ? sizetype : atype;
1347 : 793 : val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step),
1348 : : fold_convert (mtype, main_iv));
1349 : 1586 : val = fold_build2 (POINTER_TYPE_P (atype)
1350 : : ? POINTER_PLUS_EXPR : PLUS_EXPR,
1351 : : atype, unshare_expr (iv.base), val);
1352 : 793 : val = force_gimple_operand_gsi (gsi, val, false, NULL_TREE, true,
1353 : : GSI_SAME_STMT);
1354 : 793 : stmt = gimple_build_assign (res, val);
1355 : 793 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1356 : : }
1357 : :
1358 : : /* Rewrite all the phi nodes of LOOP in function of the main induction
1359 : : variable MAIN_IV. */
1360 : :
1361 : : static void
1362 : 586 : rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv)
1363 : : {
1364 : 586 : unsigned i;
1365 : 586 : basic_block *bbs = get_loop_body_in_dom_order (loop);
1366 : 586 : gphi_iterator psi;
1367 : :
1368 : 2070 : for (i = 0; i < loop->num_nodes; i++)
1369 : : {
1370 : 1484 : basic_block bb = bbs[i];
1371 : 1484 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
1372 : :
1373 : 1484 : if (bb->loop_father != loop)
1374 : 97 : continue;
1375 : :
1376 : 3437 : for (psi = gsi_start_phis (bb); !gsi_end_p (psi); )
1377 : 2050 : rewrite_phi_with_iv (loop, &psi, &gsi, main_iv);
1378 : : }
1379 : :
1380 : 586 : free (bbs);
1381 : 586 : }
1382 : :
1383 : : /* Bases all the induction variables in LOOP on a single induction variable
1384 : : (with base 0 and step 1), whose final value is compared with *NIT. When the
1385 : : IV type precision has to be larger than *NIT type precision, *NIT is
1386 : : converted to the larger type, the conversion code is inserted before the
1387 : : loop, and *NIT is updated to the new definition. When BUMP_IN_LATCH is true,
1388 : : the induction variable is incremented in the loop latch, otherwise it is
1389 : : incremented in the loop header. Return the induction variable that was
1390 : : created. */
1391 : :
1392 : : tree
1393 : 586 : canonicalize_loop_ivs (class loop *loop, tree *nit, bool bump_in_latch)
1394 : : {
1395 : 586 : unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit));
1396 : 586 : unsigned original_precision = precision;
1397 : 586 : tree type, var_before;
1398 : 586 : gimple_stmt_iterator gsi;
1399 : 586 : gphi_iterator psi;
1400 : 586 : gcond *stmt;
1401 : 586 : edge exit = single_dom_exit (loop);
1402 : 586 : gimple_seq stmts;
1403 : 586 : bool unsigned_p = false;
1404 : :
1405 : 586 : for (psi = gsi_start_phis (loop->header);
1406 : 1945 : !gsi_end_p (psi); gsi_next (&psi))
1407 : : {
1408 : 1359 : gphi *phi = psi.phi ();
1409 : 1359 : tree res = PHI_RESULT (phi);
1410 : 1359 : bool uns;
1411 : :
1412 : 1359 : type = TREE_TYPE (res);
1413 : 1359 : if (virtual_operand_p (res)
1414 : 865 : || (!INTEGRAL_TYPE_P (type)
1415 : 29 : && !POINTER_TYPE_P (type))
1416 : 2199 : || TYPE_PRECISION (type) < precision)
1417 : 541 : continue;
1418 : :
1419 : 818 : uns = POINTER_TYPE_P (type) | TYPE_UNSIGNED (type);
1420 : :
1421 : 818 : if (TYPE_PRECISION (type) > precision)
1422 : : unsigned_p = uns;
1423 : : else
1424 : 813 : unsigned_p |= uns;
1425 : :
1426 : 818 : precision = TYPE_PRECISION (type);
1427 : : }
1428 : :
1429 : 586 : scalar_int_mode mode = smallest_int_mode_for_size (precision).require ();
1430 : 586 : precision = GET_MODE_PRECISION (mode);
1431 : 586 : type = build_nonstandard_integer_type (precision, unsigned_p);
1432 : :
1433 : 586 : if (original_precision != precision
1434 : 586 : || TYPE_UNSIGNED (TREE_TYPE (*nit)) != unsigned_p)
1435 : : {
1436 : 307 : *nit = fold_convert (type, *nit);
1437 : 307 : *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE);
1438 : 307 : if (stmts)
1439 : 125 : gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1440 : : }
1441 : :
1442 : 586 : if (bump_in_latch)
1443 : 1172 : gsi = gsi_last_bb (loop->latch);
1444 : : else
1445 : 0 : gsi = gsi_last_nondebug_bb (loop->header);
1446 : 586 : create_iv (build_int_cst_type (type, 0), PLUS_EXPR, build_int_cst (type, 1),
1447 : : NULL_TREE, loop, &gsi, bump_in_latch, &var_before, NULL);
1448 : :
1449 : 586 : rewrite_all_phi_nodes_with_iv (loop, var_before);
1450 : :
1451 : 1172 : stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
1452 : : /* Make the loop exit if the control condition is not satisfied. */
1453 : 586 : if (exit->flags & EDGE_TRUE_VALUE)
1454 : : {
1455 : 113 : edge te, fe;
1456 : :
1457 : 113 : extract_true_false_edges_from_block (exit->src, &te, &fe);
1458 : 113 : te->flags = EDGE_FALSE_VALUE;
1459 : 113 : fe->flags = EDGE_TRUE_VALUE;
1460 : : }
1461 : 586 : gimple_cond_set_code (stmt, LT_EXPR);
1462 : 586 : gimple_cond_set_lhs (stmt, var_before);
1463 : 586 : gimple_cond_set_rhs (stmt, *nit);
1464 : 586 : update_stmt (stmt);
1465 : :
1466 : 586 : return var_before;
1467 : : }
|