Line data Source code
1 : /* Induction variable canonicalization and loop peeling.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : /* This pass detects the loops that iterate a constant number of times,
21 : adds a canonical induction variable (step -1, tested against 0)
22 : and replaces the exit test. This enables the less powerful rtl
23 : level analysis to use this information.
24 :
25 : This might spoil the code in some cases (by increasing register pressure).
26 : Note that in the case the new variable is not needed, ivopts will get rid
27 : of it, so it might only be a problem when there are no other linear induction
28 : variables. In that case the created optimization possibilities are likely
29 : to pay up.
30 :
31 : We also perform
32 : - complete unrolling (or peeling) when the loops is rolling few enough
33 : times
34 : - simple peeling (i.e. copying few initial iterations prior the loop)
35 : when number of iteration estimate is known (typically by the profile
36 : info). */
37 :
38 : #include "config.h"
39 : #include "system.h"
40 : #include "coretypes.h"
41 : #include "backend.h"
42 : #include "tree.h"
43 : #include "gimple.h"
44 : #include "cfghooks.h"
45 : #include "tree-pass.h"
46 : #include "ssa.h"
47 : #include "cgraph.h"
48 : #include "gimple-pretty-print.h"
49 : #include "fold-const.h"
50 : #include "profile.h"
51 : #include "gimple-iterator.h"
52 : #include "gimple-fold.h"
53 : #include "tree-eh.h"
54 : #include "tree-cfg.h"
55 : #include "tree-ssa-loop-manip.h"
56 : #include "tree-ssa-loop-niter.h"
57 : #include "tree-ssa-loop.h"
58 : #include "tree-into-ssa.h"
59 : #include "cfgloop.h"
60 : #include "tree-chrec.h"
61 : #include "tree-scalar-evolution.h"
62 : #include "tree-inline.h"
63 : #include "tree-cfgcleanup.h"
64 : #include "builtins.h"
65 : #include "tree-ssa-sccvn.h"
66 : #include "tree-vectorizer.h" /* For find_loop_location */
67 : #include "dbgcnt.h"
68 : #include "hierarchical_discriminator.h"
69 :
70 : /* Specifies types of loops that may be unrolled. */
71 :
72 : enum unroll_level
73 : {
74 : UL_SINGLE_ITER, /* Only loops that exit immediately in the first
75 : iteration. */
76 : UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase
77 : of code size. */
78 : UL_ALL /* All suitable loops. */
79 : };
80 :
81 : /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
82 : is the exit edge whose condition is replaced. The ssa versions of the new
83 : IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
84 : if they are not NULL. */
85 :
86 : void
87 314391 : create_canonical_iv (class loop *loop, edge exit, tree niter,
88 : tree *var_before = NULL, tree *var_after = NULL)
89 : {
90 314391 : edge in;
91 314391 : tree type, var;
92 314391 : gcond *cond;
93 314391 : gimple_stmt_iterator incr_at;
94 314391 : enum tree_code cmp;
95 :
96 314391 : if (dump_file && (dump_flags & TDF_DETAILS))
97 : {
98 41 : fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
99 41 : print_generic_expr (dump_file, niter, TDF_SLIM);
100 41 : fprintf (dump_file, " iterations.\n");
101 : }
102 :
103 628782 : cond = as_a <gcond *> (*gsi_last_bb (exit->src));
104 314391 : in = EDGE_SUCC (exit->src, 0);
105 314391 : if (in == exit)
106 89280 : in = EDGE_SUCC (exit->src, 1);
107 :
108 : /* Note that we do not need to worry about overflows, since
109 : type of niter is always unsigned and all comparisons are
110 : just for equality/nonequality -- i.e. everything works
111 : with a modulo arithmetics. */
112 :
113 314391 : type = TREE_TYPE (niter);
114 314391 : niter = fold_build2 (PLUS_EXPR, type,
115 : niter,
116 : build_int_cst (type, 1));
117 314391 : incr_at = gsi_last_bb (in->src);
118 314391 : create_iv (niter, PLUS_EXPR,
119 : build_int_cst (type, -1),
120 : NULL_TREE, loop,
121 : &incr_at, false, var_before, &var);
122 314391 : if (var_after)
123 96 : *var_after = var;
124 :
125 314391 : cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
126 314391 : gimple_cond_set_code (cond, cmp);
127 314391 : gimple_cond_set_lhs (cond, var);
128 314391 : gimple_cond_set_rhs (cond, build_int_cst (type, 0));
129 314391 : update_stmt (cond);
130 314391 : }
131 :
132 : /* Describe size of loop as detected by tree_estimate_loop_size. */
133 : struct loop_size
134 : {
135 : /* Number of instructions in the loop. */
136 : int overall;
137 :
138 : /* Number of instructions that will be likely optimized out in
139 : peeled iterations of loop (i.e. computation based on induction
140 : variable where induction variable starts at known constant.) */
141 : int eliminated_by_peeling;
142 :
143 : /* Number of instructions that cannot be further optimized in the
144 : peeled loop, for example volatile accesses. */
145 : int not_eliminatable_after_peeling;
146 :
147 : /* Same statistics for last iteration of loop: it is smaller because
148 : instructions after exit are not executed. */
149 : int last_iteration;
150 : int last_iteration_eliminated_by_peeling;
151 : int last_iteration_not_eliminatable_after_peeling;
152 :
153 : /* If some IV computation will become constant. */
154 : bool constant_iv;
155 :
156 : /* Number of call stmts that are not a builtin and are pure or const
157 : present on the hot path. */
158 : int num_pure_calls_on_hot_path;
159 : /* Number of call stmts that are not a builtin and are not pure nor const
160 : present on the hot path. */
161 : int num_non_pure_calls_on_hot_path;
162 : /* Number of statements other than calls in the loop. */
163 : int non_call_stmts_on_hot_path;
164 : /* Number of branches seen on the hot path. */
165 : int num_branches_on_hot_path;
166 : };
167 :
168 : /* Return true if OP in STMT will be constant after peeling LOOP. */
169 :
170 : static bool
171 13128482 : constant_after_peeling (tree op, gimple *stmt, class loop *loop)
172 : {
173 13128482 : if (CONSTANT_CLASS_P (op))
174 : return true;
175 :
176 : /* Get at the actual SSA operand. */
177 12335340 : if (handled_component_p (op)
178 1347561 : && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
179 30237 : op = TREE_OPERAND (op, 0);
180 :
181 : /* We can still fold accesses to constant arrays when index is known. */
182 12335340 : if (TREE_CODE (op) != SSA_NAME)
183 : {
184 : tree base = op;
185 :
186 : /* First make fast look if we see constant array inside. */
187 4076918 : while (handled_component_p (base))
188 2105291 : base = TREE_OPERAND (base, 0);
189 1971627 : if ((DECL_P (base)
190 902031 : && ctor_for_folding (base) != error_mark_node)
191 2814398 : || CONSTANT_CLASS_P (base))
192 : {
193 : /* If so, see if we understand all the indices. */
194 : base = op;
195 3037443 : while (handled_component_p (base))
196 : {
197 66591 : if (TREE_CODE (base) == ARRAY_REF
198 66591 : && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
199 : return false;
200 53242 : base = TREE_OPERAND (base, 0);
201 : }
202 : return true;
203 : }
204 : return false;
205 : }
206 :
207 : /* Induction variables are constants when defined in loop. */
208 20727426 : if (loop_containing_stmt (stmt) != loop)
209 : return false;
210 7655753 : tree ev = analyze_scalar_evolution (loop, op);
211 7655753 : if (chrec_contains_undetermined (ev)
212 7655753 : || chrec_contains_symbols (ev))
213 : {
214 5536566 : if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
215 : {
216 4078263 : gassign *ass = nullptr;
217 4078263 : gphi *phi = nullptr;
218 4078263 : if (is_a <gassign *> (SSA_NAME_DEF_STMT (op)))
219 : {
220 3668301 : ass = as_a <gassign *> (SSA_NAME_DEF_STMT (op));
221 3668301 : if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
222 2313307 : phi = dyn_cast <gphi *>
223 2313307 : (SSA_NAME_DEF_STMT (gimple_assign_rhs1 (ass)));
224 : }
225 409962 : else if (is_a <gphi *> (SSA_NAME_DEF_STMT (op)))
226 : {
227 321457 : phi = as_a <gphi *> (SSA_NAME_DEF_STMT (op));
228 321457 : if (gimple_bb (phi) == loop->header)
229 : {
230 208361 : tree def = gimple_phi_arg_def_from_edge
231 208361 : (phi, loop_latch_edge (loop));
232 208361 : if (TREE_CODE (def) == SSA_NAME
233 208361 : && is_a <gassign *> (SSA_NAME_DEF_STMT (def)))
234 147808 : ass = as_a <gassign *> (SSA_NAME_DEF_STMT (def));
235 : }
236 : }
237 4078263 : if (ass && phi)
238 : {
239 560679 : tree rhs1 = gimple_assign_rhs1 (ass);
240 560679 : if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
241 452397 : && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
242 335684 : && rhs1 == gimple_phi_result (phi)
243 334776 : && gimple_bb (phi) == loop->header
244 258792 : && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
245 258792 : == gimple_assign_lhs (ass))
246 760818 : && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
247 : (phi, loop_preheader_edge (loop)))))
248 : return true;
249 : }
250 : }
251 5523969 : return false;
252 : }
253 : return true;
254 : }
255 :
256 : /* Computes an estimated number of insns in LOOP.
257 : EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
258 : iteration of the loop.
259 : EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
260 : of loop.
261 : Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
262 : Stop estimating after UPPER_BOUND is met. Return true in this case. */
263 :
264 : static bool
265 499040 : tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
266 : struct loop_size *size, int upper_bound)
267 : {
268 499040 : basic_block *body = get_loop_body (loop);
269 499040 : gimple_stmt_iterator gsi;
270 499040 : unsigned int i;
271 499040 : bool after_exit;
272 499040 : auto_vec<basic_block> path = get_loop_hot_path (loop);
273 :
274 499040 : size->overall = 0;
275 499040 : size->eliminated_by_peeling = 0;
276 499040 : size->not_eliminatable_after_peeling = 0;
277 499040 : size->last_iteration = 0;
278 499040 : size->last_iteration_eliminated_by_peeling = 0;
279 499040 : size->last_iteration_not_eliminatable_after_peeling = 0;
280 499040 : size->num_pure_calls_on_hot_path = 0;
281 499040 : size->num_non_pure_calls_on_hot_path = 0;
282 499040 : size->non_call_stmts_on_hot_path = 0;
283 499040 : size->num_branches_on_hot_path = 0;
284 499040 : size->constant_iv = 0;
285 :
286 499040 : if (dump_file && (dump_flags & TDF_DETAILS))
287 65 : fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
288 2836023 : for (i = 0; i < loop->num_nodes; i++)
289 : {
290 2233322 : if (edge_to_cancel && body[i] != edge_to_cancel->src
291 4098205 : && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
292 : after_exit = true;
293 : else
294 : after_exit = false;
295 2343508 : if (dump_file && (dump_flags & TDF_DETAILS))
296 192 : fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
297 : after_exit);
298 :
299 14567234 : for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
300 : {
301 9886743 : gimple *stmt = gsi_stmt (gsi);
302 9886743 : int num = estimate_num_insns (stmt, &eni_size_weights);
303 9886743 : bool not_eliminatable_after_peeling = false;
304 9886743 : bool likely_eliminated = false;
305 9886743 : bool likely_eliminated_last = false;
306 9886743 : bool likely_eliminated_peeled = false;
307 :
308 9886743 : if (dump_file && (dump_flags & TDF_DETAILS))
309 : {
310 977 : fprintf (dump_file, " size: %3i ", num);
311 977 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
312 : }
313 :
314 : /* Look for reasons why we might optimize this stmt away. */
315 :
316 9886743 : if (gimple_has_side_effects (stmt))
317 : not_eliminatable_after_peeling = true;
318 : else
319 : {
320 : /* Exit conditional. */
321 7783475 : if (exit && body[i] == exit->src
322 13246006 : && stmt == *gsi_last_bb (exit->src))
323 : {
324 411107 : if (dump_file && (dump_flags & TDF_DETAILS))
325 32 : fprintf (dump_file, " Exit condition will be eliminated "
326 : "in peeled copies.\n");
327 : likely_eliminated_peeled = true;
328 : }
329 9199919 : if (edge_to_cancel && body[i] == edge_to_cancel->src
330 14780186 : && stmt == *gsi_last_bb (edge_to_cancel->src))
331 : {
332 478440 : if (dump_file && (dump_flags & TDF_DETAILS))
333 59 : fprintf (dump_file, " Exit condition will be eliminated "
334 : "in last copy.\n");
335 : likely_eliminated_last = true;
336 : }
337 : /* Sets of IV variables */
338 9623418 : if (gimple_code (stmt) == GIMPLE_ASSIGN
339 9623418 : && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
340 : {
341 1152729 : if (dump_file && (dump_flags & TDF_DETAILS))
342 105 : fprintf (dump_file, " Induction variable computation will"
343 : " be folded away.\n");
344 : likely_eliminated = true;
345 : }
346 : /* Assignments of IV variables. */
347 8470689 : else if (gimple_code (stmt) == GIMPLE_ASSIGN
348 4737256 : && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
349 3999407 : && constant_after_peeling (gimple_assign_rhs1 (stmt),
350 : stmt, loop)
351 182451 : && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
352 94273 : || constant_after_peeling (gimple_assign_rhs2 (stmt),
353 : stmt, loop))
354 8571504 : && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
355 : {
356 100260 : size->constant_iv = true;
357 100260 : if (dump_file && (dump_flags & TDF_DETAILS))
358 13 : fprintf (dump_file,
359 : " Constant expression will be folded away.\n");
360 : likely_eliminated = true;
361 : }
362 : /* Conditionals. */
363 8370429 : else if ((gimple_code (stmt) == GIMPLE_COND
364 1261824 : && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
365 : loop)
366 431498 : && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
367 : loop)
368 : /* We don't simplify all constant compares so make sure
369 : they are not both constant already. See PR70288. */
370 407664 : && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
371 183 : || ! is_gimple_min_invariant
372 183 : (gimple_cond_rhs (stmt))))
373 9224772 : || (gimple_code (stmt) == GIMPLE_SWITCH
374 1354 : && constant_after_peeling (gimple_switch_index (
375 : as_a <gswitch *>
376 1354 : (stmt)),
377 : stmt, loop)
378 290 : && ! is_gimple_min_invariant
379 290 : (gimple_switch_index
380 290 : (as_a <gswitch *> (stmt)))))
381 : {
382 407771 : if (dump_file && (dump_flags & TDF_DETAILS))
383 29 : fprintf (dump_file, " Constant conditional.\n");
384 : likely_eliminated = true;
385 : }
386 : }
387 :
388 9886743 : size->overall += num;
389 9886743 : if (likely_eliminated || likely_eliminated_peeled)
390 1678937 : size->eliminated_by_peeling += num;
391 9886743 : if (not_eliminatable_after_peeling)
392 263325 : size->not_eliminatable_after_peeling += num;
393 9886743 : if (!after_exit)
394 : {
395 6268988 : size->last_iteration += num;
396 6268988 : if (likely_eliminated || likely_eliminated_last)
397 1235094 : size->last_iteration_eliminated_by_peeling += num;
398 6268988 : if (not_eliminatable_after_peeling)
399 136187 : size->last_iteration_not_eliminatable_after_peeling += num;
400 : }
401 9886743 : if ((size->overall * 3 / 2 - size->eliminated_by_peeling
402 9886743 : - size->last_iteration_eliminated_by_peeling) > upper_bound)
403 : {
404 6525 : free (body);
405 6525 : return true;
406 : }
407 : }
408 : }
409 2207903 : while (path.length ())
410 : {
411 1715388 : basic_block bb = path.pop ();
412 11083063 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
413 : {
414 7652287 : gimple *stmt = gsi_stmt (gsi);
415 7652287 : if (gimple_code (stmt) == GIMPLE_CALL
416 7652287 : && !gimple_inexpensive_call_p (as_a <gcall *> (stmt)))
417 : {
418 143427 : int flags = gimple_call_flags (stmt);
419 143427 : if (flags & (ECF_PURE | ECF_CONST))
420 28076 : size->num_pure_calls_on_hot_path++;
421 : else
422 115351 : size->num_non_pure_calls_on_hot_path++;
423 143427 : size->num_branches_on_hot_path ++;
424 : }
425 : /* Count inexpensive calls as non-calls, because they will likely
426 : expand inline. */
427 7508860 : else if (gimple_code (stmt) != GIMPLE_DEBUG)
428 5810871 : size->non_call_stmts_on_hot_path++;
429 7652287 : if (((gimple_code (stmt) == GIMPLE_COND
430 1009053 : && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
431 379936 : || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
432 : loop)))
433 6999781 : || (gimple_code (stmt) == GIMPLE_SWITCH
434 899 : && !constant_after_peeling (gimple_switch_index (
435 899 : as_a <gswitch *> (stmt)),
436 : stmt, loop)))
437 8305496 : && (!exit || bb != exit->src))
438 635827 : size->num_branches_on_hot_path++;
439 : }
440 : }
441 :
442 492515 : if (dump_file && (dump_flags & TDF_DETAILS))
443 65 : fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
444 : size->eliminated_by_peeling, size->last_iteration,
445 : size->last_iteration_eliminated_by_peeling);
446 :
447 492515 : free (body);
448 492515 : return false;
449 499040 : }
450 :
451 : /* Estimate number of insns of completely unrolled loop.
452 : It is (NUNROLL + 1) * size of loop body with taking into account
453 : the fact that in last copy everything after exit conditional
454 : is dead and that some instructions will be eliminated after
455 : peeling. Set *EST_ELIMINATED to the number of stmts that could be
456 : optimistically eliminated by followup transforms. */
457 : static unsigned HOST_WIDE_INT
458 489637 : estimated_unrolled_size (struct loop_size *size,
459 : unsigned HOST_WIDE_INT *est_eliminated,
460 : unsigned HOST_WIDE_INT nunroll)
461 : {
462 489637 : HOST_WIDE_INT unr_insns = ((nunroll)
463 489637 : * (HOST_WIDE_INT) (size->overall
464 489637 : - size->eliminated_by_peeling));
465 489637 : HOST_WIDE_INT not_elim
466 489637 : = ((nunroll) * (HOST_WIDE_INT) size->not_eliminatable_after_peeling);
467 489637 : unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
468 489637 : not_elim += size->last_iteration_not_eliminatable_after_peeling;
469 :
470 : /* Testcases rely on rounding up, so do not write as
471 : (unr_insns - not_elim) / 3. */
472 489637 : *est_eliminated = unr_insns - not_elim - (unr_insns - not_elim) * 2 / 3;
473 489637 : return unr_insns;
474 : }
475 :
476 : /* Loop LOOP is known to not loop. See if there is an edge in the loop
477 : body that can be remove to make the loop to always exit and at
478 : the same time it does not make any code potentially executed
479 : during the last iteration dead.
480 :
481 : After complete unrolling we still may get rid of the conditional
482 : on the exit in the last copy even if we have no idea what it does.
483 : This is quite common case for loops of form
484 :
485 : int a[5];
486 : for (i=0;i<b;i++)
487 : a[i]=0;
488 :
489 : Here we prove the loop to iterate 5 times but we do not know
490 : it from induction variable.
491 :
492 : For now we handle only simple case where there is exit condition
493 : just before the latch block and the latch block contains no statements
494 : with side effect that may otherwise terminate the execution of loop
495 : (such as by EH or by terminating the program or longjmp).
496 :
497 : In the general case we may want to cancel the paths leading to statements
498 : loop-niter identified as having undefined effect in the last iteration.
499 : The other cases are hopefully rare and will be cleaned up later. */
500 :
501 : static edge
502 97190 : loop_edge_to_cancel (class loop *loop)
503 : {
504 97190 : unsigned i;
505 97190 : edge edge_to_cancel;
506 97190 : gimple_stmt_iterator gsi;
507 :
508 : /* We want only one predecestor of the loop. */
509 97190 : if (EDGE_COUNT (loop->latch->preds) > 1)
510 : return NULL;
511 :
512 82087 : auto_vec<edge> exits = get_loop_exit_edges (loop);
513 :
514 256849 : FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
515 : {
516 : /* Find the other edge than the loop exit
517 : leaving the conditoinal. */
518 90724 : if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
519 80 : continue;
520 90644 : if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
521 23346 : edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
522 : else
523 : edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
524 :
525 : /* We only can handle conditionals. */
526 90644 : if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
527 378 : continue;
528 :
529 : /* We should never have conditionals in the loop latch. */
530 90266 : gcc_assert (edge_to_cancel->dest != loop->header);
531 :
532 : /* Check that it leads to loop latch. */
533 90266 : if (edge_to_cancel->dest != loop->latch)
534 10130 : continue;
535 :
536 : /* Verify that the code in loop latch does nothing that may end program
537 : execution without really reaching the exit. This may include
538 : non-pure/const function calls, EH statements, volatile ASMs etc. */
539 286492 : for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
540 126831 : if (gimple_has_side_effects (gsi_stmt (gsi)))
541 : return NULL;
542 : return edge_to_cancel;
543 : }
544 : return NULL;
545 82087 : }
546 :
547 : /* Remove all tests for exits that are known to be taken after LOOP was
548 : peeled NPEELED times. Put gcc_unreachable before every statement
549 : known to not be executed. */
550 :
551 : static bool
552 136914 : remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
553 : {
554 136914 : class nb_iter_bound *elt;
555 136914 : bool changed = false;
556 :
557 413246 : for (elt = loop->bounds; elt; elt = elt->next)
558 : {
559 : /* If statement is known to be undefined after peeling, turn it
560 : into unreachable (or trap when debugging experience is supposed
561 : to be good). */
562 276332 : if (!elt->is_exit
563 276332 : && wi::ltu_p (elt->bound, npeeled))
564 : {
565 37204 : gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
566 37204 : location_t loc = gimple_location (elt->stmt);
567 37204 : gcall *stmt = gimple_build_builtin_unreachable (loc);
568 37204 : gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
569 37204 : split_block (gimple_bb (stmt), stmt);
570 37204 : changed = true;
571 37204 : if (dump_file && (dump_flags & TDF_DETAILS))
572 : {
573 4 : fprintf (dump_file, "Forced statement unreachable: ");
574 4 : print_gimple_stmt (dump_file, elt->stmt, 0);
575 : }
576 : }
577 : /* If we know the exit will be taken after peeling, update. */
578 239128 : else if (elt->is_exit
579 239128 : && wi::leu_p (elt->bound, npeeled))
580 : {
581 104809 : basic_block bb = gimple_bb (elt->stmt);
582 104809 : edge exit_edge = EDGE_SUCC (bb, 0);
583 :
584 104809 : if (dump_file && (dump_flags & TDF_DETAILS))
585 : {
586 77 : fprintf (dump_file, "Forced exit to be taken: ");
587 77 : print_gimple_stmt (dump_file, elt->stmt, 0);
588 : }
589 104809 : if (!loop_exit_edge_p (loop, exit_edge))
590 38967 : exit_edge = EDGE_SUCC (bb, 1);
591 104809 : exit_edge->probability = profile_probability::always ();
592 104809 : gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
593 104809 : gcond *cond_stmt = as_a <gcond *> (elt->stmt);
594 104809 : if (exit_edge->flags & EDGE_TRUE_VALUE)
595 64114 : gimple_cond_make_true (cond_stmt);
596 : else
597 40695 : gimple_cond_make_false (cond_stmt);
598 104809 : update_stmt (cond_stmt);
599 104809 : changed = true;
600 : }
601 : }
602 136914 : return changed;
603 : }
604 :
605 : /* Remove all exits that are known to be never taken because of the loop bound
606 : discovered. */
607 :
608 : static bool
609 2255234 : remove_redundant_iv_tests (class loop *loop)
610 : {
611 2255234 : class nb_iter_bound *elt;
612 2255234 : bool changed = false;
613 :
614 2255234 : if (!loop->any_upper_bound)
615 : return false;
616 5403390 : for (elt = loop->bounds; elt; elt = elt->next)
617 : {
618 : /* Exit is pointless if it won't be taken before loop reaches
619 : upper bound. */
620 1629010 : if (elt->is_exit && loop->any_upper_bound
621 5335232 : && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
622 : {
623 310852 : basic_block bb = gimple_bb (elt->stmt);
624 310852 : edge exit_edge = EDGE_SUCC (bb, 0);
625 310852 : class tree_niter_desc niter;
626 :
627 310852 : if (!loop_exit_edge_p (loop, exit_edge))
628 224454 : exit_edge = EDGE_SUCC (bb, 1);
629 :
630 : /* Only when we know the actual number of iterations, not
631 : just a bound, we can remove the exit. */
632 310852 : if (!number_of_iterations_exit (loop, exit_edge,
633 : &niter, false, false)
634 310707 : || !integer_onep (niter.assumptions)
635 310707 : || !integer_zerop (niter.may_be_zero)
636 203285 : || !niter.niter
637 203285 : || TREE_CODE (niter.niter) != INTEGER_CST
638 316770 : || !wi::ltu_p (widest_int::from (loop->nb_iterations_upper_bound,
639 : SIGNED),
640 313811 : wi::to_widest (niter.niter)))
641 307893 : continue;
642 :
643 2959 : if (dump_file && (dump_flags & TDF_DETAILS))
644 : {
645 2 : fprintf (dump_file, "Removed pointless exit: ");
646 2 : print_gimple_stmt (dump_file, elt->stmt, 0);
647 : }
648 2959 : gcond *cond_stmt = as_a <gcond *> (elt->stmt);
649 2959 : if (exit_edge->flags & EDGE_TRUE_VALUE)
650 855 : gimple_cond_make_false (cond_stmt);
651 : else
652 2104 : gimple_cond_make_true (cond_stmt);
653 2959 : update_stmt (cond_stmt);
654 2959 : changed = true;
655 310852 : }
656 : }
657 : return changed;
658 : }
659 :
660 : /* Stores loops that will be unlooped and edges that will be removed
661 : after we process whole loop tree. */
662 : static vec<loop_p> loops_to_unloop;
663 : static vec<int> loops_to_unloop_nunroll;
664 : static vec<edge> edges_to_remove;
665 : /* Stores loops that has been peeled. */
666 : static bitmap peeled_loops;
667 :
668 : /* Cancel all fully unrolled loops by putting __builtin_unreachable
669 : on the latch edge.
670 : We do it after all unrolling since unlooping moves basic blocks
671 : across loop boundaries trashing loop closed SSA form as well
672 : as SCEV info needed to be intact during unrolling.
673 :
674 : IRRED_INVALIDATED is used to bookkeep if information about
675 : irreducible regions may become invalid as a result
676 : of the transformation.
677 : LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
678 : when we need to go into loop closed SSA form. */
679 :
680 : void
681 293497 : unloop_loops (vec<class loop *> &loops_to_unloop,
682 : vec<int> &loops_to_unloop_nunroll,
683 : vec<edge> &edges_to_remove,
684 : bitmap loop_closed_ssa_invalidated,
685 : bool *irred_invalidated)
686 : {
687 430411 : while (loops_to_unloop.length ())
688 : {
689 136914 : class loop *loop = loops_to_unloop.pop ();
690 136914 : int n_unroll = loops_to_unloop_nunroll.pop ();
691 136914 : basic_block latch = loop->latch;
692 136914 : edge latch_edge = loop_latch_edge (loop);
693 136914 : int flags = latch_edge->flags;
694 136914 : location_t locus = latch_edge->goto_locus;
695 136914 : gcall *stmt;
696 136914 : gimple_stmt_iterator gsi;
697 :
698 136914 : remove_exits_and_undefined_stmts (loop, n_unroll);
699 :
700 : /* Unloop destroys the latch edge. */
701 136914 : unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
702 :
703 : /* Create new basic block for the latch edge destination and wire
704 : it in. */
705 136914 : stmt = gimple_build_builtin_unreachable (locus);
706 136914 : latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
707 136914 : latch_edge->probability = profile_probability::never ();
708 136914 : latch_edge->flags |= flags;
709 136914 : latch_edge->goto_locus = locus;
710 :
711 136914 : add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
712 136914 : latch_edge->dest->count = profile_count::zero ();
713 136914 : set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
714 :
715 136914 : gsi = gsi_start_bb (latch_edge->dest);
716 136914 : gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
717 : }
718 :
719 : /* Remove edges in peeled copies. Given remove_path removes dominated
720 : regions we need to cope with removal of already removed paths. */
721 293497 : unsigned i;
722 293497 : edge e;
723 293497 : auto_vec<int, 20> src_bbs;
724 327194 : src_bbs.reserve_exact (edges_to_remove.length ());
725 876730 : FOR_EACH_VEC_ELT (edges_to_remove, i, e)
726 289736 : src_bbs.quick_push (e->src->index);
727 583233 : FOR_EACH_VEC_ELT (edges_to_remove, i, e)
728 289736 : if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
729 : {
730 289736 : bool ok = remove_path (e, irred_invalidated,
731 : loop_closed_ssa_invalidated);
732 289736 : gcc_assert (ok);
733 : }
734 293497 : edges_to_remove.release ();
735 293497 : }
736 :
737 : /* Tries to unroll LOOP completely, i.e. NITER times.
738 : UL determines which loops we are allowed to unroll.
739 : EXIT is the exit of the loop that should be eliminated.
740 : MAXITER specfy bound on number of iterations, -1 if it is
741 : not known or too large for HOST_WIDE_INT. The location
742 : LOCUS corresponding to the loop is used when emitting
743 : a summary of the unroll to the dump file. */
744 :
745 : static bool
746 2255234 : try_unroll_loop_completely (class loop *loop,
747 : edge exit, tree niter, bool may_be_zero,
748 : enum unroll_level ul,
749 : HOST_WIDE_INT maxiter,
750 : dump_user_location_t locus, bool allow_peel,
751 : bool cunrolli)
752 : {
753 2255234 : unsigned HOST_WIDE_INT n_unroll = 0;
754 2255234 : bool n_unroll_found = false;
755 2255234 : edge edge_to_cancel = NULL;
756 :
757 : /* See if we proved number of iterations to be low constant.
758 :
759 : EXIT is an edge that will be removed in all but last iteration of
760 : the loop.
761 :
762 : EDGE_TO_CACNEL is an edge that will be removed from the last iteration
763 : of the unrolled sequence and is expected to make the final loop not
764 : rolling.
765 :
766 : If the number of execution of loop is determined by standard induction
767 : variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
768 : from the iv test. */
769 2255234 : if (tree_fits_uhwi_p (niter))
770 : {
771 1046894 : n_unroll = tree_to_uhwi (niter);
772 1046894 : n_unroll_found = true;
773 1046894 : edge_to_cancel = EDGE_SUCC (exit->src, 0);
774 1046894 : if (edge_to_cancel == exit)
775 343781 : edge_to_cancel = EDGE_SUCC (exit->src, 1);
776 : }
777 : /* We do not know the number of iterations and thus we cannot eliminate
778 : the EXIT edge. */
779 : else
780 : exit = NULL;
781 :
782 : /* See if we can improve our estimate by using recorded loop bounds. */
783 2255234 : if ((maxiter == 0 || ul != UL_SINGLE_ITER)
784 1575082 : && maxiter >= 0
785 1139819 : && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
786 : {
787 405048 : n_unroll = maxiter;
788 405048 : n_unroll_found = true;
789 : /* Loop terminates before the IV variable test, so we cannot
790 : remove it in the last iteration. */
791 405048 : edge_to_cancel = NULL;
792 : /* If we do not allow peeling and we iterate just allow cases
793 : that do not grow code. */
794 405048 : if (!allow_peel && maxiter != 0)
795 : ul = UL_NO_GROWTH;
796 : }
797 :
798 1850186 : if (!n_unroll_found)
799 : return false;
800 :
801 1451728 : if (!loop->unroll
802 1449451 : && n_unroll > (unsigned) param_max_completely_peel_times)
803 : {
804 769451 : if (dump_file && (dump_flags & TDF_DETAILS))
805 33 : fprintf (dump_file, "Not unrolling loop %d "
806 : "(--param max-completely-peel-times limit reached).\n",
807 : loop->num);
808 769451 : return false;
809 : }
810 :
811 682277 : if (!edge_to_cancel)
812 97190 : edge_to_cancel = loop_edge_to_cancel (loop);
813 :
814 682277 : if (n_unroll)
815 : {
816 653699 : if (ul == UL_SINGLE_ITER)
817 2255234 : return false;
818 :
819 497960 : if (loop->unroll)
820 : {
821 : /* If the unrolling factor is too large, bail out. */
822 1800 : if (n_unroll > (unsigned)loop->unroll)
823 : {
824 1265 : if (dump_file && (dump_flags & TDF_DETAILS))
825 44 : fprintf (dump_file,
826 : "Not unrolling loop %d: "
827 : "user didn't want it unrolled completely.\n",
828 : loop->num);
829 1265 : return false;
830 : }
831 : }
832 : else
833 : {
834 496160 : struct loop_size size;
835 : /* EXIT can be removed only if we are sure it passes first N_UNROLL
836 : iterations. */
837 496160 : bool remove_exit = (exit && niter
838 411241 : && TREE_CODE (niter) == INTEGER_CST
839 907401 : && wi::leu_p (n_unroll, wi::to_widest (niter)));
840 496160 : bool large
841 : = tree_estimate_loop_size
842 496160 : (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
843 : param_max_completely_peeled_insns);
844 496160 : if (large)
845 : {
846 6523 : if (dump_file && (dump_flags & TDF_DETAILS))
847 0 : fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
848 : loop->num);
849 391206 : return false;
850 : }
851 :
852 489637 : unsigned HOST_WIDE_INT ninsns = size.overall;
853 489637 : unsigned HOST_WIDE_INT est_eliminated;
854 489637 : unsigned HOST_WIDE_INT unr_insns
855 489637 : = estimated_unrolled_size (&size, &est_eliminated, n_unroll);
856 489637 : if (dump_file && (dump_flags & TDF_DETAILS))
857 : {
858 62 : fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
859 62 : fprintf (dump_file, " Estimated size after unrolling: %d-%d\n",
860 : (int) unr_insns, (int) est_eliminated);
861 : }
862 :
863 : /* If the code is going to shrink, we don't need to be extra
864 : cautious on guessing if the unrolling is going to be
865 : profitable.
866 : Move from estimated_unrolled_size to unroll small loops. */
867 489637 : if (unr_insns - est_eliminated
868 : /* If there is IV variable that will become constant, we
869 : save one instruction in the loop prologue we do not
870 : account otherwise. */
871 489637 : <= ninsns + (size.constant_iv != false))
872 : ;
873 : /* We unroll only inner loops, because we do not consider it
874 : profitable otheriwse. We still can cancel loopback edge
875 : of not rolling loop; this is always a good idea. */
876 413985 : else if (ul == UL_NO_GROWTH)
877 : {
878 365302 : if (dump_file && (dump_flags & TDF_DETAILS))
879 24 : fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
880 : loop->num);
881 365302 : return false;
882 : }
883 : /* Outer loops tend to be less interesting candidates for
884 : complete unrolling unless we can do a lot of propagation
885 : into the inner loop body. For now we disable outer loop
886 : unrolling when the code would grow. */
887 48683 : else if (loop->inner)
888 : {
889 4960 : if (dump_file && (dump_flags & TDF_DETAILS))
890 0 : fprintf (dump_file, "Not unrolling loop %d: "
891 : "it is not innermost and code would grow.\n",
892 : loop->num);
893 4960 : return false;
894 : }
895 : /* If there is call on a hot path through the loop, then
896 : there is most probably not much to optimize. */
897 43723 : else if (size.num_non_pure_calls_on_hot_path)
898 : {
899 10314 : if (dump_file && (dump_flags & TDF_DETAILS))
900 1 : fprintf (dump_file, "Not unrolling loop %d: "
901 : "contains call and code would grow.\n",
902 : loop->num);
903 10314 : return false;
904 : }
905 : /* If there is pure/const call in the function, then we can
906 : still optimize the unrolled loop body if it contains some
907 : other interesting code than the calls and code storing or
908 : cumulating the return value. */
909 33409 : else if (size.num_pure_calls_on_hot_path
910 : /* One IV increment, one test, one ivtmp store and
911 : one useful stmt. That is about minimal loop
912 : doing pure call. */
913 2387 : && (size.non_call_stmts_on_hot_path
914 2387 : <= 3 + size.num_pure_calls_on_hot_path))
915 : {
916 30 : if (dump_file && (dump_flags & TDF_DETAILS))
917 0 : fprintf (dump_file, "Not unrolling loop %d: "
918 : "contains just pure calls and code would grow.\n",
919 : loop->num);
920 30 : return false;
921 : }
922 : /* Complete unrolling is major win when control flow is
923 : removed and one big basic block is created. If the loop
924 : contains control flow the optimization may still be a win
925 : because of eliminating the loop overhead but it also may
926 : blow the branch predictor tables. Limit number of
927 : branches on the hot path through the peeled sequence. */
928 33379 : else if (size.num_branches_on_hot_path * (int)n_unroll
929 33379 : > param_max_peel_branches)
930 : {
931 2441 : if (dump_file && (dump_flags & TDF_DETAILS))
932 0 : fprintf (dump_file, "Not unrolling loop %d: "
933 : "number of branches on hot path in the unrolled "
934 : "sequence reaches --param max-peel-branches limit.\n",
935 : loop->num);
936 2441 : return false;
937 : }
938 : /* Move 2 / 3 reduction from estimated_unrolled_size, but don't reduce
939 : unrolled size for innermost loop.
940 : 1) It could increase register pressure.
941 : 2) Big loop after completely unroll may not be vectorized
942 : by BB vectorizer. */
943 30938 : else if ((cunrolli ? unr_insns : unr_insns - est_eliminated)
944 30938 : > (unsigned) param_max_completely_peeled_insns)
945 : {
946 1636 : if (dump_file && (dump_flags & TDF_DETAILS))
947 1 : fprintf (dump_file, "Not unrolling loop %d: "
948 : "number of insns in the unrolled sequence reaches "
949 : "--param max-completely-peeled-insns limit.\n",
950 : loop->num);
951 1636 : return false;
952 : }
953 : }
954 :
955 105489 : if (!dbg_cnt (gimple_unroll))
956 : return false;
957 :
958 105489 : initialize_original_copy_tables ();
959 105489 : auto_sbitmap wont_exit (n_unroll + 1);
960 105489 : if (exit && niter
961 96901 : && TREE_CODE (niter) == INTEGER_CST
962 202390 : && wi::leu_p (n_unroll, wi::to_widest (niter)))
963 : {
964 96901 : bitmap_ones (wont_exit);
965 193802 : if (wi::eq_p (wi::to_widest (niter), n_unroll)
966 96901 : || edge_to_cancel)
967 96896 : bitmap_clear_bit (wont_exit, 0);
968 : }
969 : else
970 : {
971 8588 : exit = NULL;
972 8588 : bitmap_clear (wont_exit);
973 : }
974 105489 : if (may_be_zero)
975 2 : bitmap_clear_bit (wont_exit, 1);
976 :
977 : /* If loop was originally estimated to iterate too many times,
978 : reduce the profile to avoid new profile inconsistencies. */
979 105489 : scale_loop_profile (loop, profile_probability::always (), n_unroll);
980 :
981 105489 : if (!gimple_duplicate_loop_body_to_header_edge (
982 : loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit,
983 : &edges_to_remove,
984 : DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL
985 : | DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR))
986 : {
987 9 : free_original_copy_tables ();
988 9 : if (dump_file && (dump_flags & TDF_DETAILS))
989 0 : fprintf (dump_file, "Failed to duplicate the loop\n");
990 9 : return false;
991 : }
992 :
993 105480 : free_original_copy_tables ();
994 105489 : }
995 : else
996 28578 : scale_loop_profile (loop, profile_probability::always (), 0);
997 :
998 : /* Remove the conditional from the last copy of the loop. */
999 134058 : if (edge_to_cancel)
1000 : {
1001 267952 : gcond *cond = as_a <gcond *> (*gsi_last_bb (edge_to_cancel->src));
1002 133976 : force_edge_cold (edge_to_cancel, true);
1003 133976 : if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
1004 60091 : gimple_cond_make_false (cond);
1005 : else
1006 73885 : gimple_cond_make_true (cond);
1007 133976 : update_stmt (cond);
1008 : /* Do not remove the path, as doing so may remove outer loop and
1009 : confuse bookkeeping code in tree_unroll_loops_completely. */
1010 : }
1011 :
1012 : /* Store the loop for later unlooping and exit removal. */
1013 134058 : loops_to_unloop.safe_push (loop);
1014 134058 : loops_to_unloop_nunroll.safe_push (n_unroll);
1015 :
1016 134058 : if (dump_enabled_p ())
1017 : {
1018 169 : if (!n_unroll)
1019 62 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1020 : "loop turned into non-loop; it never loops\n");
1021 : else
1022 : {
1023 107 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1024 : "loop with %d iterations completely unrolled",
1025 : (int) n_unroll);
1026 107 : if (loop->header->count.initialized_p ())
1027 106 : dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
1028 : " (header execution count %d)",
1029 106 : (int)loop->header->count.to_gcov_type ());
1030 107 : dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
1031 : }
1032 : }
1033 :
1034 134058 : if (dump_file && (dump_flags & TDF_DETAILS))
1035 : {
1036 100 : if (exit)
1037 75 : fprintf (dump_file, "Exit condition of peeled iterations was "
1038 : "eliminated.\n");
1039 100 : if (edge_to_cancel)
1040 100 : fprintf (dump_file, "Last iteration exit edge was proved true.\n");
1041 : else
1042 0 : fprintf (dump_file, "Latch of last iteration was marked by "
1043 : "__builtin_unreachable ().\n");
1044 : }
1045 :
1046 : return true;
1047 : }
1048 :
1049 : /* Return number of instructions after peeling. */
1050 : static unsigned HOST_WIDE_INT
1051 2880 : estimated_peeled_sequence_size (struct loop_size *size,
1052 : unsigned HOST_WIDE_INT npeel)
1053 : {
1054 2880 : return MAX (npeel * (HOST_WIDE_INT) (size->overall
1055 : - size->eliminated_by_peeling), 1);
1056 : }
1057 :
1058 : /* Update loop estimates after peeling LOOP by NPEEL.
1059 : If PRECISE is false only likely exists were duplicated and thus
1060 : do not update any estimates that are supposed to be always reliable. */
1061 : void
1062 453914 : adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise)
1063 : {
1064 453914 : if (loop->any_estimate)
1065 : {
1066 : /* Since peeling is mostly about loops where first few
1067 : iterations are special, it is not quite correct to
1068 : assume that the remaining iterations will behave
1069 : the same way. However we do not have better info
1070 : so update the esitmate, since it is likely better
1071 : than keeping it as it is.
1072 :
1073 : Remove it if it looks wrong.
1074 :
1075 : TODO: We likely want to special case the situation where
1076 : peeling is optimizing out exit edges and only update
1077 : estimates here. */
1078 228048 : if (wi::leu_p (npeel, loop->nb_iterations_estimate))
1079 228031 : loop->nb_iterations_estimate -= npeel;
1080 : else
1081 17 : loop->any_estimate = false;
1082 : }
1083 453914 : if (loop->any_upper_bound && precise)
1084 : {
1085 268085 : if (wi::leu_p (npeel, loop->nb_iterations_upper_bound))
1086 268085 : loop->nb_iterations_upper_bound -= npeel;
1087 : else
1088 : {
1089 : /* Peeling maximal number of iterations or more
1090 : makes no sense and is a bug.
1091 : We should peel completely. */
1092 0 : gcc_unreachable ();
1093 : }
1094 : }
1095 453914 : if (loop->any_likely_upper_bound)
1096 : {
1097 374379 : if (wi::leu_p (npeel, loop->nb_iterations_likely_upper_bound))
1098 373520 : loop->nb_iterations_likely_upper_bound -= npeel;
1099 : else
1100 : {
1101 859 : loop->any_estimate = true;
1102 859 : loop->nb_iterations_estimate = 0;
1103 859 : loop->nb_iterations_likely_upper_bound = 0;
1104 : }
1105 : }
1106 453914 : }
1107 :
1108 : /* If the loop is expected to iterate N times and is
1109 : small enough, duplicate the loop body N+1 times before
1110 : the loop itself. This way the hot path will never
1111 : enter the loop.
1112 : Parameters are the same as for try_unroll_loops_completely */
1113 :
1114 : static bool
1115 130504 : try_peel_loop (class loop *loop,
1116 : edge exit, tree niter, bool may_be_zero,
1117 : HOST_WIDE_INT maxiter)
1118 : {
1119 130504 : HOST_WIDE_INT npeel;
1120 130504 : struct loop_size size;
1121 130504 : int peeled_size;
1122 :
1123 130504 : if (!flag_peel_loops
1124 111968 : || param_max_peel_times <= 0
1125 111968 : || !peeled_loops)
1126 : return false;
1127 :
1128 84033 : if (bitmap_bit_p (peeled_loops, loop->num))
1129 : {
1130 843 : if (dump_file)
1131 2 : fprintf (dump_file, "Not peeling: loop is already peeled\n");
1132 843 : return false;
1133 : }
1134 :
1135 : /* We don't peel loops that will be unrolled as this can duplicate a
1136 : loop more times than the user requested. */
1137 83190 : if (loop->unroll)
1138 : {
1139 95 : if (dump_file)
1140 0 : fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1141 95 : return false;
1142 : }
1143 :
1144 : /* Peel only innermost loops.
1145 : While the code is perfectly capable of peeling non-innermost loops,
1146 : the heuristics would probably need some improvements. */
1147 83095 : if (loop->inner)
1148 : {
1149 15468 : if (dump_file)
1150 2 : fprintf (dump_file, "Not peeling: outer loop\n");
1151 15468 : return false;
1152 : }
1153 :
1154 67627 : if (!optimize_loop_for_speed_p (loop))
1155 : {
1156 0 : if (dump_file)
1157 0 : fprintf (dump_file, "Not peeling: cold loop\n");
1158 0 : return false;
1159 : }
1160 :
1161 : /* Check if there is an estimate on the number of iterations. */
1162 67627 : npeel = estimated_loop_iterations_int (loop);
1163 67627 : if (npeel < 0)
1164 49650 : npeel = likely_max_loop_iterations_int (loop);
1165 67627 : if (npeel < 0)
1166 : {
1167 13798 : if (dump_file)
1168 0 : fprintf (dump_file, "Not peeling: number of iterations is not "
1169 : "estimated\n");
1170 13798 : return false;
1171 : }
1172 53829 : if (maxiter >= 0 && maxiter <= npeel)
1173 : {
1174 49990 : if (dump_file)
1175 23 : fprintf (dump_file, "Not peeling: upper bound is known so can "
1176 : "unroll completely\n");
1177 49990 : return false;
1178 : }
1179 :
1180 : /* We want to peel estimated number of iterations + 1 (so we never
1181 : enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
1182 : and be sure to avoid overflows. */
1183 3839 : if (npeel > param_max_peel_times - 1)
1184 : {
1185 959 : if (dump_file)
1186 0 : fprintf (dump_file, "Not peeling: rolls too much "
1187 : "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1188 959 : return false;
1189 : }
1190 2880 : npeel++;
1191 :
1192 : /* Check peeled loops size. */
1193 2880 : tree_estimate_loop_size (loop, exit, NULL, &size,
1194 : param_max_peeled_insns);
1195 2880 : if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1196 2880 : > param_max_peeled_insns)
1197 : {
1198 2010 : if (dump_file)
1199 0 : fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1200 : "(%i insns > --param max-peel-insns)", peeled_size);
1201 2010 : return false;
1202 : }
1203 :
1204 870 : if (!dbg_cnt (gimple_unroll))
1205 : return false;
1206 :
1207 : /* Duplicate possibly eliminating the exits. */
1208 870 : initialize_original_copy_tables ();
1209 870 : auto_sbitmap wont_exit (npeel + 1);
1210 870 : if (exit && niter
1211 9 : && TREE_CODE (niter) == INTEGER_CST
1212 879 : && wi::leu_p (npeel, wi::to_widest (niter)))
1213 : {
1214 9 : bitmap_ones (wont_exit);
1215 9 : bitmap_clear_bit (wont_exit, 0);
1216 : }
1217 : else
1218 : {
1219 861 : exit = NULL;
1220 861 : bitmap_clear (wont_exit);
1221 : }
1222 870 : if (may_be_zero)
1223 0 : bitmap_clear_bit (wont_exit, 1);
1224 :
1225 870 : if (!gimple_duplicate_loop_body_to_header_edge (
1226 : loop, loop_preheader_edge (loop), npeel, wont_exit, exit,
1227 : &edges_to_remove,
1228 : DLTHE_FLAG_UPDATE_FREQ | DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR))
1229 : {
1230 0 : free_original_copy_tables ();
1231 0 : return false;
1232 : }
1233 870 : free_original_copy_tables ();
1234 870 : if (dump_file && (dump_flags & TDF_DETAILS))
1235 : {
1236 3 : fprintf (dump_file, "Peeled loop %d, %i times.\n",
1237 : loop->num, (int) npeel);
1238 : }
1239 870 : adjust_loop_info_after_peeling (loop, npeel, true);
1240 :
1241 870 : bitmap_set_bit (peeled_loops, loop->num);
1242 870 : return true;
1243 870 : }
1244 : /* Adds a canonical induction variable to LOOP if suitable.
1245 : CREATE_IV is true if we may create a new iv. UL determines
1246 : which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
1247 : to determine the number of iterations of a loop by direct evaluation.
1248 : Returns true if cfg is changed. */
1249 :
1250 : static bool
1251 2255234 : canonicalize_loop_induction_variables (class loop *loop,
1252 : bool create_iv, enum unroll_level ul,
1253 : bool try_eval, bool allow_peel,
1254 : const_sbitmap innermost,
1255 : bool cunrolli)
1256 : {
1257 2255234 : edge exit = NULL;
1258 2255234 : tree niter;
1259 2255234 : HOST_WIDE_INT maxiter;
1260 2255234 : bool modified = false;
1261 2255234 : class tree_niter_desc niter_desc;
1262 2255234 : bool may_be_zero = false;
1263 2255234 : bool by_eval = false;
1264 :
1265 : /* For unrolling allow conditional constant or zero iterations, thus
1266 : perform loop-header copying on-the-fly. */
1267 2255234 : exit = single_exit (loop);
1268 2255234 : niter = chrec_dont_know;
1269 2255234 : if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1270 : {
1271 1120706 : niter = niter_desc.niter;
1272 1120706 : may_be_zero
1273 1120706 : = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1274 : }
1275 2255234 : if (TREE_CODE (niter) != INTEGER_CST)
1276 : {
1277 : /* For non-constant niter fold may_be_zero into niter again. */
1278 1558926 : if (may_be_zero)
1279 : {
1280 122659 : if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1281 122647 : niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1282 : niter_desc.may_be_zero,
1283 : build_int_cst (TREE_TYPE (niter), 0), niter);
1284 : else
1285 12 : niter = chrec_dont_know;
1286 : may_be_zero = false;
1287 : }
1288 :
1289 : /* If the loop has more than one exit, try checking all of them
1290 : for # of iterations determinable through scev. */
1291 1558926 : if (!exit)
1292 757381 : niter = find_loop_niter (loop, &exit);
1293 :
1294 : /* Finally if everything else fails, try brute force evaluation. */
1295 1558926 : if (try_eval
1296 1558926 : && (chrec_contains_undetermined (niter)
1297 510501 : || TREE_CODE (niter) != INTEGER_CST))
1298 : {
1299 786229 : niter = find_loop_niter_by_eval (loop, &exit);
1300 786229 : if (TREE_CODE (niter) == INTEGER_CST)
1301 15038 : by_eval = true;
1302 : }
1303 :
1304 1558926 : if (TREE_CODE (niter) != INTEGER_CST)
1305 1208340 : exit = NULL;
1306 : }
1307 :
1308 : /* We work exceptionally hard here to estimate the bound
1309 : by find_loop_niter_by_eval. Be sure to keep it for future. */
1310 3463574 : if (niter && TREE_CODE (niter) == INTEGER_CST)
1311 : {
1312 1046894 : auto_vec<edge> exits = get_loop_exit_edges (loop);
1313 1046894 : record_niter_bound (loop, wi::to_widest (niter),
1314 1046894 : exit == single_likely_exit (loop, exits), true);
1315 1046894 : }
1316 :
1317 : /* Force re-computation of loop bounds so we can remove redundant exits. */
1318 2255234 : maxiter = max_loop_iterations_int (loop);
1319 :
1320 303 : if (dump_file && (dump_flags & TDF_DETAILS)
1321 2255479 : && TREE_CODE (niter) == INTEGER_CST)
1322 : {
1323 145 : fprintf (dump_file, "Loop %d iterates ", loop->num);
1324 145 : print_generic_expr (dump_file, niter, TDF_SLIM);
1325 145 : fprintf (dump_file, " times.\n");
1326 : }
1327 303 : if (dump_file && (dump_flags & TDF_DETAILS)
1328 2255479 : && maxiter >= 0)
1329 : {
1330 209 : fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1331 : (int)maxiter);
1332 : }
1333 303 : if (dump_file && (dump_flags & TDF_DETAILS)
1334 2255479 : && likely_max_loop_iterations_int (loop) >= 0)
1335 : {
1336 209 : fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1337 209 : loop->num, (int)likely_max_loop_iterations_int (loop));
1338 : }
1339 :
1340 : /* Remove exits that are known to be never taken based on loop bound.
1341 : Needs to be called after compilation of max_loop_iterations_int that
1342 : populates the loop bounds. */
1343 2255234 : modified |= remove_redundant_iv_tests (loop);
1344 :
1345 2255234 : dump_user_location_t locus = find_loop_location (loop);
1346 :
1347 2255234 : bool innermost_cunrolli_p
1348 : = cunrolli
1349 764294 : && (unsigned) loop->num < SBITMAP_SIZE (innermost)
1350 3016582 : && bitmap_bit_p (innermost, loop->num);
1351 :
1352 2255234 : if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1353 : maxiter, locus, allow_peel,
1354 : innermost_cunrolli_p))
1355 : return true;
1356 :
1357 2121176 : if ((create_iv || by_eval)
1358 682566 : && niter && !chrec_contains_undetermined (niter)
1359 2435473 : && exit && just_once_each_iteration_p (loop, exit->src))
1360 : {
1361 314295 : tree iv_niter = niter;
1362 314295 : if (may_be_zero)
1363 : {
1364 21 : if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1365 21 : iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1366 : niter_desc.may_be_zero,
1367 : build_int_cst (TREE_TYPE (iv_niter), 0),
1368 : iv_niter);
1369 : else
1370 : iv_niter = NULL_TREE;
1371 : }
1372 314295 : if (iv_niter)
1373 314295 : create_canonical_iv (loop, exit, iv_niter);
1374 : }
1375 :
1376 2121176 : if (ul == UL_ALL)
1377 130504 : modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1378 :
1379 : return modified;
1380 2255234 : }
1381 :
1382 : /* The main entry point of the pass. Adds canonical induction variables
1383 : to the suitable loops. */
1384 :
1385 : unsigned int
1386 241441 : canonicalize_induction_variables (void)
1387 : {
1388 241441 : bool changed = false;
1389 241441 : bool irred_invalidated = false;
1390 241441 : bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1391 482882 : auto_sbitmap innermost (number_of_loops (cfun));
1392 241441 : bitmap_clear (innermost);
1393 :
1394 241441 : estimate_numbers_of_iterations (cfun);
1395 :
1396 1404736 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1397 : {
1398 680413 : changed
1399 680413 : |= canonicalize_loop_induction_variables (loop,
1400 : true, UL_SINGLE_ITER,
1401 : true, false,
1402 680413 : (const_sbitmap) innermost,
1403 : false);
1404 241441 : }
1405 241441 : gcc_assert (!need_ssa_update_p (cfun));
1406 :
1407 241441 : unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, edges_to_remove,
1408 : loop_closed_ssa_invalidated, &irred_invalidated);
1409 241441 : loops_to_unloop.release ();
1410 241441 : loops_to_unloop_nunroll.release ();
1411 241441 : if (irred_invalidated
1412 241441 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1413 2 : mark_irreducible_loops ();
1414 :
1415 : /* Clean up the information about numbers of iterations, since brute force
1416 : evaluation could reveal new information. */
1417 241441 : free_numbers_of_iterations_estimates (cfun);
1418 241441 : scev_reset ();
1419 :
1420 241441 : if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1421 : {
1422 25 : gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1423 25 : rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1424 : }
1425 241441 : BITMAP_FREE (loop_closed_ssa_invalidated);
1426 :
1427 241441 : if (changed)
1428 1115 : return TODO_cleanup_cfg;
1429 : return 0;
1430 241441 : }
1431 :
1432 : /* Process loops from innermost to outer, stopping at the innermost
1433 : loop we unrolled. */
1434 :
1435 : static bool
1436 2141103 : tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1437 : bitmap father_bbs, class loop *loop,
1438 : const_sbitmap innermost, bool cunrolli)
1439 : {
1440 2141103 : class loop *loop_father;
1441 2141103 : bool changed = false;
1442 2141103 : class loop *inner;
1443 2141103 : enum unroll_level ul;
1444 2141103 : unsigned num = number_of_loops (cfun);
1445 :
1446 : /* Process inner loops first. Don't walk loops added by the recursive
1447 : calls because SSA form is not up-to-date. They can be handled in the
1448 : next iteration. */
1449 2141103 : bitmap child_father_bbs = NULL;
1450 3767938 : for (inner = loop->inner; inner != NULL; inner = inner->next)
1451 1626835 : if ((unsigned) inner->num < num)
1452 : {
1453 1624753 : if (!child_father_bbs)
1454 765229 : child_father_bbs = BITMAP_ALLOC (NULL);
1455 1624753 : if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1456 : child_father_bbs, inner,
1457 : innermost, cunrolli))
1458 : {
1459 174581 : bitmap_ior_into (father_bbs, child_father_bbs);
1460 174581 : bitmap_clear (child_father_bbs);
1461 174581 : changed = true;
1462 : }
1463 : }
1464 2141103 : if (child_father_bbs)
1465 765229 : BITMAP_FREE (child_father_bbs);
1466 :
1467 : /* If we changed an inner loop we cannot process outer loops in this
1468 : iteration because SSA form is not up-to-date. Continue with
1469 : siblings of outer loops instead. */
1470 2141103 : if (changed)
1471 : {
1472 : /* If we are recorded as father clear all other fathers that
1473 : are necessarily covered already to avoid redundant work. */
1474 90952 : if (bitmap_bit_p (father_bbs, loop->header->index))
1475 : {
1476 28165 : bitmap_clear (father_bbs);
1477 28165 : bitmap_set_bit (father_bbs, loop->header->index);
1478 : }
1479 90952 : return true;
1480 : }
1481 :
1482 : /* Don't unroll #pragma omp simd loops until the vectorizer
1483 : attempts to vectorize those. */
1484 2050151 : if (loop->force_vectorize)
1485 : return false;
1486 :
1487 : /* Try to unroll this loop. */
1488 2039881 : loop_father = loop_outer (loop);
1489 2039881 : if (!loop_father)
1490 : return false;
1491 :
1492 1574821 : if (loop->unroll > 1)
1493 : ul = UL_ALL;
1494 273522 : else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1495 : /* Unroll outermost loops only if asked to do so or they do
1496 : not cause code growth. */
1497 1838763 : && (unroll_outer || loop_outer (loop_father)))
1498 : ul = UL_ALL;
1499 : else
1500 : ul = UL_NO_GROWTH;
1501 :
1502 1574821 : if (canonicalize_loop_induction_variables
1503 1574866 : (loop, false, ul, !flag_tree_loop_ivcanon || cunrolli, unroll_outer,
1504 : innermost, cunrolli))
1505 : {
1506 : /* If we'll continue unrolling, we need to propagate constants
1507 : within the new basic blocks to fold away induction variable
1508 : computations; otherwise, the size might blow up before the
1509 : iteration is complete and the IR eventually cleaned up. */
1510 134919 : if (loop_outer (loop_father))
1511 : {
1512 : /* Once we process our father we will have processed
1513 : the fathers of our children as well, so avoid doing
1514 : redundant work and clear fathers we've gathered sofar. */
1515 36040 : bitmap_clear (father_bbs);
1516 36040 : bitmap_set_bit (father_bbs, loop_father->header->index);
1517 : }
1518 98879 : else if (unroll_outer)
1519 : /* Trigger scalar cleanup once any outermost loop gets unrolled. */
1520 58556 : cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
1521 :
1522 134919 : return true;
1523 : }
1524 :
1525 : return false;
1526 : }
1527 :
1528 : /* Unroll LOOPS completely if they iterate just few times. Unless
1529 : MAY_INCREASE_SIZE is true, perform the unrolling only if the
1530 : size of the code does not increase.
1531 : cunrolli is true when passs is cunrolli. */
1532 :
1533 : static unsigned int
1534 465069 : tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer, bool cunrolli)
1535 : {
1536 465069 : bitmap father_bbs = BITMAP_ALLOC (NULL);
1537 465069 : bool changed;
1538 465069 : int iteration = 0;
1539 465069 : bool irred_invalidated = false;
1540 930138 : auto_sbitmap innermost (number_of_loops (cfun));
1541 465069 : bitmap_clear (innermost);
1542 :
1543 465069 : estimate_numbers_of_iterations (cfun);
1544 :
1545 : /* Mark all innermost loop at the beginning. */
1546 2793248 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1547 : {
1548 1398041 : if (!loop->inner)
1549 1157345 : bitmap_set_bit (innermost, loop->num);
1550 465069 : }
1551 :
1552 516350 : do
1553 : {
1554 516350 : changed = false;
1555 516350 : bitmap loop_closed_ssa_invalidated = NULL;
1556 :
1557 516350 : if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1558 274361 : loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1559 :
1560 516350 : free_numbers_of_iterations_estimates (cfun);
1561 516350 : estimate_numbers_of_iterations (cfun);
1562 :
1563 1032700 : changed = tree_unroll_loops_completely_1 (may_increase_size,
1564 : unroll_outer, father_bbs,
1565 516350 : current_loops->tree_root,
1566 516350 : (const_sbitmap) innermost,
1567 : cunrolli);
1568 516350 : if (changed)
1569 : {
1570 51290 : unsigned i;
1571 :
1572 51290 : unloop_loops (loops_to_unloop, loops_to_unloop_nunroll,
1573 : edges_to_remove, loop_closed_ssa_invalidated,
1574 : &irred_invalidated);
1575 51290 : loops_to_unloop.release ();
1576 51290 : loops_to_unloop_nunroll.release ();
1577 :
1578 : /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused. */
1579 51290 : if (loop_closed_ssa_invalidated
1580 51290 : && !bitmap_empty_p (loop_closed_ssa_invalidated))
1581 7478 : rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1582 : TODO_update_ssa);
1583 : else
1584 43812 : update_ssa (TODO_update_ssa);
1585 :
1586 : /* father_bbs is a bitmap of loop father header BB indices.
1587 : Translate that to what non-root loops these BBs belong to now. */
1588 51290 : bitmap_iterator bi;
1589 51290 : bitmap fathers = BITMAP_ALLOC (NULL);
1590 77978 : EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1591 : {
1592 26688 : basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1593 26688 : if (! unrolled_loop_bb)
1594 0 : continue;
1595 26688 : if (loop_outer (unrolled_loop_bb->loop_father))
1596 26688 : bitmap_set_bit (fathers,
1597 : unrolled_loop_bb->loop_father->num);
1598 : }
1599 51290 : bitmap_clear (father_bbs);
1600 : /* Propagate the constants within the new basic blocks. */
1601 77978 : EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1602 : {
1603 26688 : loop_p father = get_loop (cfun, i);
1604 26688 : bitmap exit_bbs = BITMAP_ALLOC (NULL);
1605 26688 : loop_exit *exit = father->exits->next;
1606 125981 : while (exit->e)
1607 : {
1608 99293 : bitmap_set_bit (exit_bbs, exit->e->dest->index);
1609 99293 : exit = exit->next;
1610 : }
1611 26688 : do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1612 : }
1613 51290 : BITMAP_FREE (fathers);
1614 :
1615 : /* Clean up the information about numbers of iterations, since
1616 : complete unrolling might have invalidated it. */
1617 51290 : scev_reset ();
1618 :
1619 : /* This will take care of removing completely unrolled loops
1620 : from the loop structures so we can continue unrolling now
1621 : innermost loops. */
1622 51290 : if (cleanup_tree_cfg ())
1623 51290 : update_ssa (TODO_update_ssa_only_virtuals);
1624 :
1625 51290 : if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1626 32926 : verify_loop_closed_ssa (true);
1627 : }
1628 516350 : if (loop_closed_ssa_invalidated)
1629 274361 : BITMAP_FREE (loop_closed_ssa_invalidated);
1630 : }
1631 : while (changed
1632 516359 : && ++iteration <= param_max_unroll_iterations);
1633 :
1634 465069 : BITMAP_FREE (father_bbs);
1635 :
1636 465069 : if (irred_invalidated
1637 465069 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1638 20 : mark_irreducible_loops ();
1639 :
1640 465069 : return 0;
1641 465069 : }
1642 :
1643 : /* Canonical induction variable creation pass. */
1644 :
1645 : namespace {
1646 :
1647 : const pass_data pass_data_iv_canon =
1648 : {
1649 : GIMPLE_PASS, /* type */
1650 : "ivcanon", /* name */
1651 : OPTGROUP_LOOP, /* optinfo_flags */
1652 : TV_TREE_LOOP_IVCANON, /* tv_id */
1653 : ( PROP_cfg | PROP_ssa ), /* properties_required */
1654 : 0, /* properties_provided */
1655 : 0, /* properties_destroyed */
1656 : 0, /* todo_flags_start */
1657 : 0, /* todo_flags_finish */
1658 : };
1659 :
1660 : class pass_iv_canon : public gimple_opt_pass
1661 : {
1662 : public:
1663 285722 : pass_iv_canon (gcc::context *ctxt)
1664 571444 : : gimple_opt_pass (pass_data_iv_canon, ctxt)
1665 : {}
1666 :
1667 : /* opt_pass methods: */
1668 241458 : bool gate (function *) final override { return flag_tree_loop_ivcanon != 0; }
1669 : unsigned int execute (function *fun) final override;
1670 :
1671 : }; // class pass_iv_canon
1672 :
1673 : unsigned int
1674 241441 : pass_iv_canon::execute (function *fun)
1675 : {
1676 482882 : if (number_of_loops (fun) <= 1)
1677 : return 0;
1678 :
1679 241441 : return canonicalize_induction_variables ();
1680 : }
1681 :
1682 : } // anon namespace
1683 :
1684 : gimple_opt_pass *
1685 285722 : make_pass_iv_canon (gcc::context *ctxt)
1686 : {
1687 285722 : return new pass_iv_canon (ctxt);
1688 : }
1689 :
1690 : /* Complete unrolling of loops. */
1691 :
1692 : namespace {
1693 :
1694 : const pass_data pass_data_complete_unroll =
1695 : {
1696 : GIMPLE_PASS, /* type */
1697 : "cunroll", /* name */
1698 : OPTGROUP_LOOP, /* optinfo_flags */
1699 : TV_COMPLETE_UNROLL, /* tv_id */
1700 : ( PROP_cfg | PROP_ssa ), /* properties_required */
1701 : 0, /* properties_provided */
1702 : 0, /* properties_destroyed */
1703 : 0, /* todo_flags_start */
1704 : 0, /* todo_flags_finish */
1705 : };
1706 :
1707 : class pass_complete_unroll : public gimple_opt_pass
1708 : {
1709 : public:
1710 285722 : pass_complete_unroll (gcc::context *ctxt)
1711 571444 : : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1712 : {}
1713 :
1714 : /* opt_pass methods: */
1715 : unsigned int execute (function *) final override;
1716 :
1717 : }; // class pass_complete_unroll
1718 :
1719 : unsigned int
1720 241437 : pass_complete_unroll::execute (function *fun)
1721 : {
1722 482874 : if (number_of_loops (fun) <= 1)
1723 : return 0;
1724 :
1725 : /* If we ever decide to run loop peeling more than once, we will need to
1726 : track loops already peeled in loop structures themselves to avoid
1727 : re-peeling the same loop multiple times. */
1728 241437 : if (flag_peel_loops)
1729 28045 : peeled_loops = BITMAP_ALLOC (NULL);
1730 241437 : unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_size, true, false);
1731 241437 : if (peeled_loops)
1732 : {
1733 28045 : BITMAP_FREE (peeled_loops);
1734 28045 : peeled_loops = NULL;
1735 : }
1736 : return val;
1737 : }
1738 :
1739 : } // anon namespace
1740 :
1741 : gimple_opt_pass *
1742 285722 : make_pass_complete_unroll (gcc::context *ctxt)
1743 : {
1744 285722 : return new pass_complete_unroll (ctxt);
1745 : }
1746 :
1747 : /* Complete unrolling of inner loops. */
1748 :
1749 : namespace {
1750 :
1751 : const pass_data pass_data_complete_unrolli =
1752 : {
1753 : GIMPLE_PASS, /* type */
1754 : "cunrolli", /* name */
1755 : OPTGROUP_LOOP, /* optinfo_flags */
1756 : TV_COMPLETE_UNROLL, /* tv_id */
1757 : ( PROP_cfg | PROP_ssa ), /* properties_required */
1758 : 0, /* properties_provided */
1759 : 0, /* properties_destroyed */
1760 : 0, /* todo_flags_start */
1761 : 0, /* todo_flags_finish */
1762 : };
1763 :
1764 : class pass_complete_unrolli : public gimple_opt_pass
1765 : {
1766 : public:
1767 285722 : pass_complete_unrolli (gcc::context *ctxt)
1768 571444 : : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1769 : {}
1770 :
1771 : /* opt_pass methods: */
1772 1041484 : bool gate (function *) final override { return optimize >= 2; }
1773 : unsigned int execute (function *) final override;
1774 :
1775 : }; // class pass_complete_unrolli
1776 :
1777 : unsigned int
1778 964174 : pass_complete_unrolli::execute (function *fun)
1779 : {
1780 964174 : unsigned ret = 0;
1781 :
1782 964174 : loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1783 1928348 : if (number_of_loops (fun) > 1)
1784 : {
1785 223632 : scev_initialize ();
1786 223632 : ret = tree_unroll_loops_completely (optimize >= 3, false, true);
1787 223632 : scev_finalize ();
1788 : }
1789 964174 : loop_optimizer_finalize ();
1790 :
1791 964174 : return ret;
1792 : }
1793 :
1794 : } // anon namespace
1795 :
1796 : gimple_opt_pass *
1797 285722 : make_pass_complete_unrolli (gcc::context *ctxt)
1798 : {
1799 285722 : return new pass_complete_unrolli (ctxt);
1800 : }
1801 :
1802 :
|