Branch data Line data Source code
1 : : /* Induction variable canonicalization and loop peeling.
2 : : Copyright (C) 2004-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it
7 : : under the terms of the GNU General Public License as published by the
8 : : Free Software Foundation; either version 3, or (at your option) any
9 : : later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : /* This pass detects the loops that iterate a constant number of times,
21 : : adds a canonical induction variable (step -1, tested against 0)
22 : : and replaces the exit test. This enables the less powerful rtl
23 : : level analysis to use this information.
24 : :
25 : : This might spoil the code in some cases (by increasing register pressure).
26 : : Note that in the case the new variable is not needed, ivopts will get rid
27 : : of it, so it might only be a problem when there are no other linear induction
28 : : variables. In that case the created optimization possibilities are likely
29 : : to pay up.
30 : :
31 : : We also perform
32 : : - complete unrolling (or peeling) when the loops is rolling few enough
33 : : times
34 : : - simple peeling (i.e. copying few initial iterations prior the loop)
35 : : when number of iteration estimate is known (typically by the profile
36 : : info). */
37 : :
38 : : #include "config.h"
39 : : #include "system.h"
40 : : #include "coretypes.h"
41 : : #include "backend.h"
42 : : #include "tree.h"
43 : : #include "gimple.h"
44 : : #include "cfghooks.h"
45 : : #include "tree-pass.h"
46 : : #include "ssa.h"
47 : : #include "cgraph.h"
48 : : #include "gimple-pretty-print.h"
49 : : #include "fold-const.h"
50 : : #include "profile.h"
51 : : #include "gimple-iterator.h"
52 : : #include "gimple-fold.h"
53 : : #include "tree-eh.h"
54 : : #include "tree-cfg.h"
55 : : #include "tree-ssa-loop-manip.h"
56 : : #include "tree-ssa-loop-niter.h"
57 : : #include "tree-ssa-loop.h"
58 : : #include "tree-into-ssa.h"
59 : : #include "cfgloop.h"
60 : : #include "tree-chrec.h"
61 : : #include "tree-scalar-evolution.h"
62 : : #include "tree-inline.h"
63 : : #include "tree-cfgcleanup.h"
64 : : #include "builtins.h"
65 : : #include "tree-ssa-sccvn.h"
66 : : #include "tree-vectorizer.h" /* For find_loop_location */
67 : : #include "dbgcnt.h"
68 : :
69 : : /* Specifies types of loops that may be unrolled. */
70 : :
71 : : enum unroll_level
72 : : {
73 : : UL_SINGLE_ITER, /* Only loops that exit immediately in the first
74 : : iteration. */
75 : : UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase
76 : : of code size. */
77 : : UL_ALL /* All suitable loops. */
78 : : };
79 : :
80 : : /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
81 : : is the exit edge whose condition is replaced. The ssa versions of the new
82 : : IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
83 : : if they are not NULL. */
84 : :
85 : : void
86 : 291682 : create_canonical_iv (class loop *loop, edge exit, tree niter,
87 : : tree *var_before = NULL, tree *var_after = NULL)
88 : : {
89 : 291682 : edge in;
90 : 291682 : tree type, var;
91 : 291682 : gcond *cond;
92 : 291682 : gimple_stmt_iterator incr_at;
93 : 291682 : enum tree_code cmp;
94 : :
95 : 291682 : if (dump_file && (dump_flags & TDF_DETAILS))
96 : : {
97 : 41 : fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
98 : 41 : print_generic_expr (dump_file, niter, TDF_SLIM);
99 : 41 : fprintf (dump_file, " iterations.\n");
100 : : }
101 : :
102 : 583364 : cond = as_a <gcond *> (*gsi_last_bb (exit->src));
103 : 291682 : in = EDGE_SUCC (exit->src, 0);
104 : 291682 : if (in == exit)
105 : 82493 : in = EDGE_SUCC (exit->src, 1);
106 : :
107 : : /* Note that we do not need to worry about overflows, since
108 : : type of niter is always unsigned and all comparisons are
109 : : just for equality/nonequality -- i.e. everything works
110 : : with a modulo arithmetics. */
111 : :
112 : 291682 : type = TREE_TYPE (niter);
113 : 291682 : niter = fold_build2 (PLUS_EXPR, type,
114 : : niter,
115 : : build_int_cst (type, 1));
116 : 291682 : incr_at = gsi_last_bb (in->src);
117 : 291682 : create_iv (niter, PLUS_EXPR,
118 : 291682 : build_int_cst (type, -1),
119 : : NULL_TREE, loop,
120 : : &incr_at, false, var_before, &var);
121 : 291682 : if (var_after)
122 : 94 : *var_after = var;
123 : :
124 : 291682 : cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
125 : 291682 : gimple_cond_set_code (cond, cmp);
126 : 291682 : gimple_cond_set_lhs (cond, var);
127 : 291682 : gimple_cond_set_rhs (cond, build_int_cst (type, 0));
128 : 291682 : update_stmt (cond);
129 : 291682 : }
130 : :
131 : : /* Describe size of loop as detected by tree_estimate_loop_size. */
132 : : struct loop_size
133 : : {
134 : : /* Number of instructions in the loop. */
135 : : int overall;
136 : :
137 : : /* Number of instructions that will be likely optimized out in
138 : : peeled iterations of loop (i.e. computation based on induction
139 : : variable where induction variable starts at known constant.) */
140 : : int eliminated_by_peeling;
141 : :
142 : : /* Number of instructions that cannot be further optimized in the
143 : : peeled loop, for example volatile accesses. */
144 : : int not_eliminatable_after_peeling;
145 : :
146 : : /* Same statistics for last iteration of loop: it is smaller because
147 : : instructions after exit are not executed. */
148 : : int last_iteration;
149 : : int last_iteration_eliminated_by_peeling;
150 : : int last_iteration_not_eliminatable_after_peeling;
151 : :
152 : : /* If some IV computation will become constant. */
153 : : bool constant_iv;
154 : :
155 : : /* Number of call stmts that are not a builtin and are pure or const
156 : : present on the hot path. */
157 : : int num_pure_calls_on_hot_path;
158 : : /* Number of call stmts that are not a builtin and are not pure nor const
159 : : present on the hot path. */
160 : : int num_non_pure_calls_on_hot_path;
161 : : /* Number of statements other than calls in the loop. */
162 : : int non_call_stmts_on_hot_path;
163 : : /* Number of branches seen on the hot path. */
164 : : int num_branches_on_hot_path;
165 : : };
166 : :
167 : : /* Return true if OP in STMT will be constant after peeling LOOP. */
168 : :
169 : : static bool
170 : 11854331 : constant_after_peeling (tree op, gimple *stmt, class loop *loop)
171 : : {
172 : 11854331 : if (CONSTANT_CLASS_P (op))
173 : : return true;
174 : :
175 : : /* Get at the actual SSA operand. */
176 : 11134856 : if (handled_component_p (op)
177 : 1242607 : && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
178 : 26692 : op = TREE_OPERAND (op, 0);
179 : :
180 : : /* We can still fold accesses to constant arrays when index is known. */
181 : 11134856 : if (TREE_CODE (op) != SSA_NAME)
182 : : {
183 : : tree base = op;
184 : :
185 : : /* First make fast look if we see constant array inside. */
186 : 3726528 : while (handled_component_p (base))
187 : 1936990 : base = TREE_OPERAND (base, 0);
188 : 1789538 : if ((DECL_P (base)
189 : 837282 : && ctor_for_folding (base) != error_mark_node)
190 : 2567919 : || CONSTANT_CLASS_P (base))
191 : : {
192 : : /* If so, see if we understand all the indices. */
193 : : base = op;
194 : 2775077 : while (handled_component_p (base))
195 : : {
196 : 66046 : if (TREE_CODE (base) == ARRAY_REF
197 : 66046 : && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
198 : : return false;
199 : 52959 : base = TREE_OPERAND (base, 0);
200 : : }
201 : : return true;
202 : : }
203 : : return false;
204 : : }
205 : :
206 : : /* Induction variables are constants when defined in loop. */
207 : 18690636 : if (loop_containing_stmt (stmt) != loop)
208 : : return false;
209 : 6905703 : tree ev = analyze_scalar_evolution (loop, op);
210 : 6905703 : if (chrec_contains_undetermined (ev)
211 : 6905703 : || chrec_contains_symbols (ev))
212 : : {
213 : 4974009 : if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
214 : : {
215 : 3638193 : gassign *ass = nullptr;
216 : 3638193 : gphi *phi = nullptr;
217 : 3638193 : if (is_a <gassign *> (SSA_NAME_DEF_STMT (op)))
218 : : {
219 : 3256443 : ass = as_a <gassign *> (SSA_NAME_DEF_STMT (op));
220 : 3256443 : if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
221 : 2042267 : phi = dyn_cast <gphi *>
222 : 2042267 : (SSA_NAME_DEF_STMT (gimple_assign_rhs1 (ass)));
223 : : }
224 : 381750 : else if (is_a <gphi *> (SSA_NAME_DEF_STMT (op)))
225 : : {
226 : 292196 : phi = as_a <gphi *> (SSA_NAME_DEF_STMT (op));
227 : 292196 : if (gimple_bb (phi) == loop->header)
228 : : {
229 : 182966 : tree def = gimple_phi_arg_def_from_edge
230 : 182966 : (phi, loop_latch_edge (loop));
231 : 182966 : if (TREE_CODE (def) == SSA_NAME
232 : 182966 : && is_a <gassign *> (SSA_NAME_DEF_STMT (def)))
233 : 131952 : ass = as_a <gassign *> (SSA_NAME_DEF_STMT (def));
234 : : }
235 : : }
236 : 3638193 : if (ass && phi)
237 : : {
238 : 482988 : tree rhs1 = gimple_assign_rhs1 (ass);
239 : 482988 : if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
240 : 388091 : && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
241 : 282063 : && rhs1 == gimple_phi_result (phi)
242 : 281223 : && gimple_bb (phi) == loop->header
243 : 230406 : && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
244 : 230406 : == gimple_assign_lhs (ass))
245 : 664361 : && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
246 : : (phi, loop_preheader_edge (loop)))))
247 : : return true;
248 : : }
249 : : }
250 : 4961976 : return false;
251 : : }
252 : : return true;
253 : : }
254 : :
255 : : /* Computes an estimated number of insns in LOOP.
256 : : EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
257 : : iteration of the loop.
258 : : EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
259 : : of loop.
260 : : Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
261 : : Stop estimating after UPPER_BOUND is met. Return true in this case. */
262 : :
263 : : static bool
264 : 455643 : tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
265 : : struct loop_size *size, int upper_bound)
266 : : {
267 : 455643 : basic_block *body = get_loop_body (loop);
268 : 455643 : gimple_stmt_iterator gsi;
269 : 455643 : unsigned int i;
270 : 455643 : bool after_exit;
271 : 455643 : auto_vec<basic_block> path = get_loop_hot_path (loop);
272 : :
273 : 455643 : size->overall = 0;
274 : 455643 : size->eliminated_by_peeling = 0;
275 : 455643 : size->not_eliminatable_after_peeling = 0;
276 : 455643 : size->last_iteration = 0;
277 : 455643 : size->last_iteration_eliminated_by_peeling = 0;
278 : 455643 : size->last_iteration_not_eliminatable_after_peeling = 0;
279 : 455643 : size->num_pure_calls_on_hot_path = 0;
280 : 455643 : size->num_non_pure_calls_on_hot_path = 0;
281 : 455643 : size->non_call_stmts_on_hot_path = 0;
282 : 455643 : size->num_branches_on_hot_path = 0;
283 : 455643 : size->constant_iv = 0;
284 : :
285 : 455643 : if (dump_file && (dump_flags & TDF_DETAILS))
286 : 65 : fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
287 : 2563103 : for (i = 0; i < loop->num_nodes; i++)
288 : : {
289 : 2008474 : if (edge_to_cancel && body[i] != edge_to_cancel->src
290 : 3685053 : && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
291 : : after_exit = true;
292 : : else
293 : : after_exit = false;
294 : 2113472 : if (dump_file && (dump_flags & TDF_DETAILS))
295 : 192 : fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
296 : : after_exit);
297 : :
298 : 13100474 : for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
299 : : {
300 : 8879542 : gimple *stmt = gsi_stmt (gsi);
301 : 8879542 : int num = estimate_num_insns (stmt, &eni_size_weights);
302 : 8879542 : bool not_eliminatable_after_peeling = false;
303 : 8879542 : bool likely_eliminated = false;
304 : 8879542 : bool likely_eliminated_last = false;
305 : 8879542 : bool likely_eliminated_peeled = false;
306 : :
307 : 8879542 : if (dump_file && (dump_flags & TDF_DETAILS))
308 : : {
309 : 977 : fprintf (dump_file, " size: %3i ", num);
310 : 977 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
311 : : }
312 : :
313 : : /* Look for reasons why we might optimize this stmt away. */
314 : :
315 : 8879542 : if (gimple_has_side_effects (stmt))
316 : : not_eliminatable_after_peeling = true;
317 : : else
318 : : {
319 : : /* Exit conditional. */
320 : 6821478 : if (exit && body[i] == exit->src
321 : 11826184 : && stmt == *gsi_last_bb (exit->src))
322 : : {
323 : 373033 : if (dump_file && (dump_flags & TDF_DETAILS))
324 : 32 : fprintf (dump_file, " Exit condition will be eliminated "
325 : : "in peeled copies.\n");
326 : : likely_eliminated_peeled = true;
327 : : }
328 : 8230135 : if (edge_to_cancel && body[i] == edge_to_cancel->src
329 : 13310422 : && stmt == *gsi_last_bb (edge_to_cancel->src))
330 : : {
331 : 436757 : if (dump_file && (dump_flags & TDF_DETAILS))
332 : 59 : fprintf (dump_file, " Exit condition will be eliminated "
333 : : "in last copy.\n");
334 : : likely_eliminated_last = true;
335 : : }
336 : : /* Sets of IV variables */
337 : 8635482 : if (gimple_code (stmt) == GIMPLE_ASSIGN
338 : 8635482 : && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
339 : : {
340 : 1055222 : if (dump_file && (dump_flags & TDF_DETAILS))
341 : 105 : fprintf (dump_file, " Induction variable computation will"
342 : : " be folded away.\n");
343 : : likely_eliminated = true;
344 : : }
345 : : /* Assignments of IV variables. */
346 : 7580260 : else if (gimple_code (stmt) == GIMPLE_ASSIGN
347 : 4282815 : && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
348 : 3613823 : && constant_after_peeling (gimple_assign_rhs1 (stmt),
349 : : stmt, loop)
350 : 162026 : && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
351 : 83986 : || constant_after_peeling (gimple_assign_rhs2 (stmt),
352 : : stmt, loop))
353 : 7671219 : && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
354 : : {
355 : 90422 : size->constant_iv = true;
356 : 90422 : if (dump_file && (dump_flags & TDF_DETAILS))
357 : 13 : fprintf (dump_file,
358 : : " Constant expression will be folded away.\n");
359 : : likely_eliminated = true;
360 : : }
361 : : /* Conditionals. */
362 : 7489838 : else if ((gimple_code (stmt) == GIMPLE_COND
363 : 1124088 : && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
364 : : loop)
365 : 392277 : && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
366 : : loop)
367 : : /* We don't simplify all constant compares so make sure
368 : : they are not both constant already. See PR70288. */
369 : 369944 : && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
370 : 144 : || ! is_gimple_min_invariant
371 : 144 : (gimple_cond_rhs (stmt))))
372 : 8244126 : || (gimple_code (stmt) == GIMPLE_SWITCH
373 : 1357 : && constant_after_peeling (gimple_switch_index (
374 : : as_a <gswitch *>
375 : 1357 : (stmt)),
376 : : stmt, loop)
377 : 355 : && ! is_gimple_min_invariant
378 : 355 : (gimple_switch_index
379 : 355 : (as_a <gswitch *> (stmt)))))
380 : : {
381 : 370155 : if (dump_file && (dump_flags & TDF_DETAILS))
382 : 29 : fprintf (dump_file, " Constant conditional.\n");
383 : : likely_eliminated = true;
384 : : }
385 : : }
386 : :
387 : 8879542 : size->overall += num;
388 : 8879542 : if (likely_eliminated || likely_eliminated_peeled)
389 : 1532209 : size->eliminated_by_peeling += num;
390 : 8879542 : if (not_eliminatable_after_peeling)
391 : 244060 : size->not_eliminatable_after_peeling += num;
392 : 8879542 : if (!after_exit)
393 : : {
394 : 5728563 : size->last_iteration += num;
395 : 5728563 : if (likely_eliminated || likely_eliminated_last)
396 : 1129145 : size->last_iteration_eliminated_by_peeling += num;
397 : 5728563 : if (not_eliminatable_after_peeling)
398 : 125790 : size->last_iteration_not_eliminatable_after_peeling += num;
399 : : }
400 : 8879542 : if ((size->overall * 3 / 2 - size->eliminated_by_peeling
401 : 8879542 : - size->last_iteration_eliminated_by_peeling) > upper_bound)
402 : : {
403 : 6012 : free (body);
404 : 6012 : return true;
405 : : }
406 : : }
407 : : }
408 : 1988869 : while (path.length ())
409 : : {
410 : 1539238 : basic_block bb = path.pop ();
411 : 9962786 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
412 : : {
413 : 6884310 : gimple *stmt = gsi_stmt (gsi);
414 : 6884310 : if (gimple_code (stmt) == GIMPLE_CALL
415 : 6884310 : && !gimple_inexpensive_call_p (as_a <gcall *> (stmt)))
416 : : {
417 : 133615 : int flags = gimple_call_flags (stmt);
418 : 133615 : if (flags & (ECF_PURE | ECF_CONST))
419 : 28823 : size->num_pure_calls_on_hot_path++;
420 : : else
421 : 104792 : size->num_non_pure_calls_on_hot_path++;
422 : 133615 : size->num_branches_on_hot_path ++;
423 : : }
424 : : /* Count inexpensive calls as non-calls, because they will likely
425 : : expand inline. */
426 : 6750695 : else if (gimple_code (stmt) != GIMPLE_DEBUG)
427 : 5266399 : size->non_call_stmts_on_hot_path++;
428 : 6884310 : if (((gimple_code (stmt) == GIMPLE_COND
429 : 894319 : && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
430 : 345598 : || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
431 : : loop)))
432 : 6313637 : || (gimple_code (stmt) == GIMPLE_SWITCH
433 : 944 : && !constant_after_peeling (gimple_switch_index (
434 : 944 : as_a <gswitch *> (stmt)),
435 : : stmt, loop)))
436 : 7455698 : && (!exit || bb != exit->src))
437 : 555744 : size->num_branches_on_hot_path++;
438 : : }
439 : : }
440 : :
441 : 449631 : if (dump_file && (dump_flags & TDF_DETAILS))
442 : 65 : fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
443 : : size->eliminated_by_peeling, size->last_iteration,
444 : : size->last_iteration_eliminated_by_peeling);
445 : :
446 : 449631 : free (body);
447 : 449631 : return false;
448 : 455643 : }
449 : :
450 : : /* Estimate number of insns of completely unrolled loop.
451 : : It is (NUNROLL + 1) * size of loop body with taking into account
452 : : the fact that in last copy everything after exit conditional
453 : : is dead and that some instructions will be eliminated after
454 : : peeling. Set *EST_ELIMINATED to the number of stmts that could be
455 : : optimistically eliminated by followup transforms. */
456 : : static unsigned HOST_WIDE_INT
457 : 446930 : estimated_unrolled_size (struct loop_size *size,
458 : : unsigned HOST_WIDE_INT *est_eliminated,
459 : : unsigned HOST_WIDE_INT nunroll)
460 : : {
461 : 446930 : HOST_WIDE_INT unr_insns = ((nunroll)
462 : 446930 : * (HOST_WIDE_INT) (size->overall
463 : 446930 : - size->eliminated_by_peeling));
464 : 446930 : HOST_WIDE_INT not_elim
465 : 446930 : = ((nunroll) * (HOST_WIDE_INT) size->not_eliminatable_after_peeling);
466 : 446930 : unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
467 : 446930 : not_elim += size->last_iteration_not_eliminatable_after_peeling;
468 : :
469 : : /* Testcases rely on rounding up, so do not write as
470 : : (unr_insns - not_elim) / 3. */
471 : 446930 : *est_eliminated = unr_insns - not_elim - (unr_insns - not_elim) * 2 / 3;
472 : 446930 : return unr_insns;
473 : : }
474 : :
475 : : /* Loop LOOP is known to not loop. See if there is an edge in the loop
476 : : body that can be remove to make the loop to always exit and at
477 : : the same time it does not make any code potentially executed
478 : : during the last iteration dead.
479 : :
480 : : After complete unrolling we still may get rid of the conditional
481 : : on the exit in the last copy even if we have no idea what it does.
482 : : This is quite common case for loops of form
483 : :
484 : : int a[5];
485 : : for (i=0;i<b;i++)
486 : : a[i]=0;
487 : :
488 : : Here we prove the loop to iterate 5 times but we do not know
489 : : it from induction variable.
490 : :
491 : : For now we handle only simple case where there is exit condition
492 : : just before the latch block and the latch block contains no statements
493 : : with side effect that may otherwise terminate the execution of loop
494 : : (such as by EH or by terminating the program or longjmp).
495 : :
496 : : In the general case we may want to cancel the paths leading to statements
497 : : loop-niter identified as having undefined effect in the last iteration.
498 : : The other cases are hopefully rare and will be cleaned up later. */
499 : :
500 : : static edge
501 : 90196 : loop_edge_to_cancel (class loop *loop)
502 : : {
503 : 90196 : unsigned i;
504 : 90196 : edge edge_to_cancel;
505 : 90196 : gimple_stmt_iterator gsi;
506 : :
507 : : /* We want only one predecestor of the loop. */
508 : 90196 : if (EDGE_COUNT (loop->latch->preds) > 1)
509 : : return NULL;
510 : :
511 : 76560 : auto_vec<edge> exits = get_loop_exit_edges (loop);
512 : :
513 : 240238 : FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
514 : : {
515 : : /* Find the other edge than the loop exit
516 : : leaving the conditoinal. */
517 : 85078 : if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
518 : 82 : continue;
519 : 84996 : if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
520 : 20995 : edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
521 : : else
522 : : edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
523 : :
524 : : /* We only can handle conditionals. */
525 : 84996 : if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
526 : 434 : continue;
527 : :
528 : : /* We should never have conditionals in the loop latch. */
529 : 84562 : gcc_assert (edge_to_cancel->dest != loop->header);
530 : :
531 : : /* Check that it leads to loop latch. */
532 : 84562 : if (edge_to_cancel->dest != loop->latch)
533 : 10042 : continue;
534 : :
535 : : /* Verify that the code in loop latch does nothing that may end program
536 : : execution without really reaching the exit. This may include
537 : : non-pure/const function calls, EH statements, volatile ASMs etc. */
538 : 267626 : for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
539 : 119081 : if (gimple_has_side_effects (gsi_stmt (gsi)))
540 : : return NULL;
541 : : return edge_to_cancel;
542 : : }
543 : : return NULL;
544 : 76560 : }
545 : :
546 : : /* Remove all tests for exits that are known to be taken after LOOP was
547 : : peeled NPEELED times. Put gcc_unreachable before every statement
548 : : known to not be executed. */
549 : :
550 : : static bool
551 : 121867 : remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
552 : : {
553 : 121867 : class nb_iter_bound *elt;
554 : 121867 : bool changed = false;
555 : :
556 : 371970 : for (elt = loop->bounds; elt; elt = elt->next)
557 : : {
558 : : /* If statement is known to be undefined after peeling, turn it
559 : : into unreachable (or trap when debugging experience is supposed
560 : : to be good). */
561 : 250103 : if (!elt->is_exit
562 : 250103 : && wi::ltu_p (elt->bound, npeeled))
563 : : {
564 : 36366 : gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
565 : 36366 : location_t loc = gimple_location (elt->stmt);
566 : 36366 : gcall *stmt = gimple_build_builtin_unreachable (loc);
567 : 36366 : gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
568 : 36366 : split_block (gimple_bb (stmt), stmt);
569 : 36366 : changed = true;
570 : 36366 : if (dump_file && (dump_flags & TDF_DETAILS))
571 : : {
572 : 4 : fprintf (dump_file, "Forced statement unreachable: ");
573 : 4 : print_gimple_stmt (dump_file, elt->stmt, 0);
574 : : }
575 : : }
576 : : /* If we know the exit will be taken after peeling, update. */
577 : 213737 : else if (elt->is_exit
578 : 213737 : && wi::leu_p (elt->bound, npeeled))
579 : : {
580 : 93998 : basic_block bb = gimple_bb (elt->stmt);
581 : 93998 : edge exit_edge = EDGE_SUCC (bb, 0);
582 : :
583 : 93998 : if (dump_file && (dump_flags & TDF_DETAILS))
584 : : {
585 : 78 : fprintf (dump_file, "Forced exit to be taken: ");
586 : 78 : print_gimple_stmt (dump_file, elt->stmt, 0);
587 : : }
588 : 93998 : if (!loop_exit_edge_p (loop, exit_edge))
589 : 34701 : exit_edge = EDGE_SUCC (bb, 1);
590 : 93998 : exit_edge->probability = profile_probability::always ();
591 : 93998 : gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
592 : 93998 : gcond *cond_stmt = as_a <gcond *> (elt->stmt);
593 : 93998 : if (exit_edge->flags & EDGE_TRUE_VALUE)
594 : 57653 : gimple_cond_make_true (cond_stmt);
595 : : else
596 : 36345 : gimple_cond_make_false (cond_stmt);
597 : 93998 : update_stmt (cond_stmt);
598 : 93998 : changed = true;
599 : : }
600 : : }
601 : 121867 : return changed;
602 : : }
603 : :
604 : : /* Remove all exits that are known to be never taken because of the loop bound
605 : : discovered. */
606 : :
607 : : static bool
608 : 2097880 : remove_redundant_iv_tests (class loop *loop)
609 : : {
610 : 2097880 : class nb_iter_bound *elt;
611 : 2097880 : bool changed = false;
612 : :
613 : 2097880 : if (!loop->any_upper_bound)
614 : : return false;
615 : 4990259 : for (elt = loop->bounds; elt; elt = elt->next)
616 : : {
617 : : /* Exit is pointless if it won't be taken before loop reaches
618 : : upper bound. */
619 : 1496896 : if (elt->is_exit && loop->any_upper_bound
620 : 4920960 : && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
621 : : {
622 : 293754 : basic_block bb = gimple_bb (elt->stmt);
623 : 293754 : edge exit_edge = EDGE_SUCC (bb, 0);
624 : 293754 : class tree_niter_desc niter;
625 : :
626 : 293754 : if (!loop_exit_edge_p (loop, exit_edge))
627 : 213857 : exit_edge = EDGE_SUCC (bb, 1);
628 : :
629 : : /* Only when we know the actual number of iterations, not
630 : : just a bound, we can remove the exit. */
631 : 293754 : if (!number_of_iterations_exit (loop, exit_edge,
632 : : &niter, false, false)
633 : 293752 : || !integer_onep (niter.assumptions)
634 : 293752 : || !integer_zerop (niter.may_be_zero)
635 : 191870 : || !niter.niter
636 : 191870 : || TREE_CODE (niter.niter) != INTEGER_CST
637 : 298066 : || !wi::ltu_p (widest_int::from (loop->nb_iterations_upper_bound,
638 : : SIGNED),
639 : 295910 : wi::to_widest (niter.niter)))
640 : 291598 : continue;
641 : :
642 : 2156 : if (dump_file && (dump_flags & TDF_DETAILS))
643 : : {
644 : 2 : fprintf (dump_file, "Removed pointless exit: ");
645 : 2 : print_gimple_stmt (dump_file, elt->stmt, 0);
646 : : }
647 : 2156 : gcond *cond_stmt = as_a <gcond *> (elt->stmt);
648 : 2156 : if (exit_edge->flags & EDGE_TRUE_VALUE)
649 : 1751 : gimple_cond_make_false (cond_stmt);
650 : : else
651 : 405 : gimple_cond_make_true (cond_stmt);
652 : 2156 : update_stmt (cond_stmt);
653 : 2156 : changed = true;
654 : 293754 : }
655 : : }
656 : : return changed;
657 : : }
658 : :
659 : : /* Stores loops that will be unlooped and edges that will be removed
660 : : after we process whole loop tree. */
661 : : static vec<loop_p> loops_to_unloop;
662 : : static vec<int> loops_to_unloop_nunroll;
663 : : static vec<edge> edges_to_remove;
664 : : /* Stores loops that has been peeled. */
665 : : static bitmap peeled_loops;
666 : :
667 : : /* Cancel all fully unrolled loops by putting __builtin_unreachable
668 : : on the latch edge.
669 : : We do it after all unrolling since unlooping moves basic blocks
670 : : across loop boundaries trashing loop closed SSA form as well
671 : : as SCEV info needed to be intact during unrolling.
672 : :
673 : : IRRED_INVALIDATED is used to bookkeep if information about
674 : : irreducible regions may become invalid as a result
675 : : of the transformation.
676 : : LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
677 : : when we need to go into loop closed SSA form. */
678 : :
679 : : void
680 : 277448 : unloop_loops (vec<class loop *> &loops_to_unloop,
681 : : vec<int> &loops_to_unloop_nunroll,
682 : : vec<edge> &edges_to_remove,
683 : : bitmap loop_closed_ssa_invalidated,
684 : : bool *irred_invalidated)
685 : : {
686 : 399315 : while (loops_to_unloop.length ())
687 : : {
688 : 121867 : class loop *loop = loops_to_unloop.pop ();
689 : 121867 : int n_unroll = loops_to_unloop_nunroll.pop ();
690 : 121867 : basic_block latch = loop->latch;
691 : 121867 : edge latch_edge = loop_latch_edge (loop);
692 : 121867 : int flags = latch_edge->flags;
693 : 121867 : location_t locus = latch_edge->goto_locus;
694 : 121867 : gcall *stmt;
695 : 121867 : gimple_stmt_iterator gsi;
696 : :
697 : 121867 : remove_exits_and_undefined_stmts (loop, n_unroll);
698 : :
699 : : /* Unloop destroys the latch edge. */
700 : 121867 : unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
701 : :
702 : : /* Create new basic block for the latch edge destination and wire
703 : : it in. */
704 : 121867 : stmt = gimple_build_builtin_unreachable (locus);
705 : 121867 : latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
706 : 121867 : latch_edge->probability = profile_probability::never ();
707 : 121867 : latch_edge->flags |= flags;
708 : 121867 : latch_edge->goto_locus = locus;
709 : :
710 : 121867 : add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
711 : 121867 : latch_edge->dest->count = profile_count::zero ();
712 : 121867 : set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
713 : :
714 : 121867 : gsi = gsi_start_bb (latch_edge->dest);
715 : 121867 : gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
716 : : }
717 : :
718 : : /* Remove edges in peeled copies. Given remove_path removes dominated
719 : : regions we need to cope with removal of already removed paths. */
720 : 277448 : unsigned i;
721 : 277448 : edge e;
722 : 277448 : auto_vec<int, 20> src_bbs;
723 : 309130 : src_bbs.reserve_exact (edges_to_remove.length ());
724 : 812769 : FOR_EACH_VEC_ELT (edges_to_remove, i, e)
725 : 257873 : src_bbs.quick_push (e->src->index);
726 : 535321 : FOR_EACH_VEC_ELT (edges_to_remove, i, e)
727 : 257873 : if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
728 : : {
729 : 257873 : bool ok = remove_path (e, irred_invalidated,
730 : : loop_closed_ssa_invalidated);
731 : 257873 : gcc_assert (ok);
732 : : }
733 : 277448 : edges_to_remove.release ();
734 : 277448 : }
735 : :
736 : : /* Tries to unroll LOOP completely, i.e. NITER times.
737 : : UL determines which loops we are allowed to unroll.
738 : : EXIT is the exit of the loop that should be eliminated.
739 : : MAXITER specfy bound on number of iterations, -1 if it is
740 : : not known or too large for HOST_WIDE_INT. The location
741 : : LOCUS corresponding to the loop is used when emitting
742 : : a summary of the unroll to the dump file. */
743 : :
744 : : static bool
745 : 2097880 : try_unroll_loop_completely (class loop *loop,
746 : : edge exit, tree niter, bool may_be_zero,
747 : : enum unroll_level ul,
748 : : HOST_WIDE_INT maxiter,
749 : : dump_user_location_t locus, bool allow_peel,
750 : : bool cunrolli)
751 : : {
752 : 2097880 : unsigned HOST_WIDE_INT n_unroll = 0;
753 : 2097880 : bool n_unroll_found = false;
754 : 2097880 : edge edge_to_cancel = NULL;
755 : :
756 : : /* See if we proved number of iterations to be low constant.
757 : :
758 : : EXIT is an edge that will be removed in all but last iteration of
759 : : the loop.
760 : :
761 : : EDGE_TO_CACNEL is an edge that will be removed from the last iteration
762 : : of the unrolled sequence and is expected to make the final loop not
763 : : rolling.
764 : :
765 : : If the number of execution of loop is determined by standard induction
766 : : variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
767 : : from the iv test. */
768 : 2097880 : if (tree_fits_uhwi_p (niter))
769 : : {
770 : 967058 : n_unroll = tree_to_uhwi (niter);
771 : 967058 : n_unroll_found = true;
772 : 967058 : edge_to_cancel = EDGE_SUCC (exit->src, 0);
773 : 967058 : if (edge_to_cancel == exit)
774 : 310595 : edge_to_cancel = EDGE_SUCC (exit->src, 1);
775 : : }
776 : : /* We do not know the number of iterations and thus we cannot eliminate
777 : : the EXIT edge. */
778 : : else
779 : : exit = NULL;
780 : :
781 : : /* See if we can improve our estimate by using recorded loop bounds. */
782 : 2097880 : if ((maxiter == 0 || ul != UL_SINGLE_ITER)
783 : 1458762 : && maxiter >= 0
784 : 1051142 : && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
785 : : {
786 : 373929 : n_unroll = maxiter;
787 : 373929 : n_unroll_found = true;
788 : : /* Loop terminates before the IV variable test, so we cannot
789 : : remove it in the last iteration. */
790 : 373929 : edge_to_cancel = NULL;
791 : : /* If we do not allow peeling and we iterate just allow cases
792 : : that do not grow code. */
793 : 373929 : if (!allow_peel && maxiter != 0)
794 : : ul = UL_NO_GROWTH;
795 : : }
796 : :
797 : 1723951 : if (!n_unroll_found)
798 : : return false;
799 : :
800 : 1340812 : if (!loop->unroll
801 : 1338756 : && n_unroll > (unsigned) param_max_completely_peel_times)
802 : : {
803 : 720078 : if (dump_file && (dump_flags & TDF_DETAILS))
804 : 33 : fprintf (dump_file, "Not unrolling loop %d "
805 : : "(--param max-completely-peel-times limit reached).\n",
806 : : loop->num);
807 : 720078 : return false;
808 : : }
809 : :
810 : 620734 : if (!edge_to_cancel)
811 : 90196 : edge_to_cancel = loop_edge_to_cancel (loop);
812 : :
813 : 620734 : if (n_unroll)
814 : : {
815 : 596778 : if (ul == UL_SINGLE_ITER)
816 : 2097880 : return false;
817 : :
818 : 454598 : if (loop->unroll)
819 : : {
820 : : /* If the unrolling factor is too large, bail out. */
821 : 1657 : if (n_unroll > (unsigned)loop->unroll)
822 : : {
823 : 1154 : if (dump_file && (dump_flags & TDF_DETAILS))
824 : 44 : fprintf (dump_file,
825 : : "Not unrolling loop %d: "
826 : : "user didn't want it unrolled completely.\n",
827 : : loop->num);
828 : 1154 : return false;
829 : : }
830 : : }
831 : : else
832 : : {
833 : 452941 : struct loop_size size;
834 : : /* EXIT can be removed only if we are sure it passes first N_UNROLL
835 : : iterations. */
836 : 452941 : bool remove_exit = (exit && niter
837 : 373120 : && TREE_CODE (niter) == INTEGER_CST
838 : 826061 : && wi::leu_p (n_unroll, wi::to_widest (niter)));
839 : 452941 : bool large
840 : : = tree_estimate_loop_size
841 : 452941 : (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
842 : : param_max_completely_peeled_insns);
843 : 452941 : if (large)
844 : : {
845 : 6011 : if (dump_file && (dump_flags & TDF_DETAILS))
846 : 0 : fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
847 : : loop->num);
848 : 357920 : return false;
849 : : }
850 : :
851 : 446930 : unsigned HOST_WIDE_INT ninsns = size.overall;
852 : 446930 : unsigned HOST_WIDE_INT est_eliminated;
853 : 446930 : unsigned HOST_WIDE_INT unr_insns
854 : 446930 : = estimated_unrolled_size (&size, &est_eliminated, n_unroll);
855 : 446930 : if (dump_file && (dump_flags & TDF_DETAILS))
856 : : {
857 : 62 : fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
858 : 62 : fprintf (dump_file, " Estimated size after unrolling: %d-%d\n",
859 : : (int) unr_insns, (int) est_eliminated);
860 : : }
861 : :
862 : : /* If the code is going to shrink, we don't need to be extra
863 : : cautious on guessing if the unrolling is going to be
864 : : profitable.
865 : : Move from estimated_unrolled_size to unroll small loops. */
866 : 446930 : if (unr_insns - est_eliminated
867 : : /* If there is IV variable that will become constant, we
868 : : save one instruction in the loop prologue we do not
869 : : account otherwise. */
870 : 446930 : <= ninsns + (size.constant_iv != false))
871 : : ;
872 : : /* We unroll only inner loops, because we do not consider it
873 : : profitable otheriwse. We still can cancel loopback edge
874 : : of not rolling loop; this is always a good idea. */
875 : 378014 : else if (ul == UL_NO_GROWTH)
876 : : {
877 : 334814 : if (dump_file && (dump_flags & TDF_DETAILS))
878 : 24 : fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
879 : : loop->num);
880 : 334814 : return false;
881 : : }
882 : : /* Outer loops tend to be less interesting candidates for
883 : : complete unrolling unless we can do a lot of propagation
884 : : into the inner loop body. For now we disable outer loop
885 : : unrolling when the code would grow. */
886 : 43200 : else if (loop->inner)
887 : : {
888 : 4304 : if (dump_file && (dump_flags & TDF_DETAILS))
889 : 0 : fprintf (dump_file, "Not unrolling loop %d: "
890 : : "it is not innermost and code would grow.\n",
891 : : loop->num);
892 : 4304 : return false;
893 : : }
894 : : /* If there is call on a hot path through the loop, then
895 : : there is most probably not much to optimize. */
896 : 38896 : else if (size.num_non_pure_calls_on_hot_path)
897 : : {
898 : 9825 : if (dump_file && (dump_flags & TDF_DETAILS))
899 : 1 : fprintf (dump_file, "Not unrolling loop %d: "
900 : : "contains call and code would grow.\n",
901 : : loop->num);
902 : 9825 : return false;
903 : : }
904 : : /* If there is pure/const call in the function, then we can
905 : : still optimize the unrolled loop body if it contains some
906 : : other interesting code than the calls and code storing or
907 : : cumulating the return value. */
908 : 29071 : else if (size.num_pure_calls_on_hot_path
909 : : /* One IV increment, one test, one ivtmp store and
910 : : one useful stmt. That is about minimal loop
911 : : doing pure call. */
912 : 2442 : && (size.non_call_stmts_on_hot_path
913 : 2442 : <= 3 + size.num_pure_calls_on_hot_path))
914 : : {
915 : 27 : if (dump_file && (dump_flags & TDF_DETAILS))
916 : 0 : fprintf (dump_file, "Not unrolling loop %d: "
917 : : "contains just pure calls and code would grow.\n",
918 : : loop->num);
919 : 27 : return false;
920 : : }
921 : : /* Complete unrolling is major win when control flow is
922 : : removed and one big basic block is created. If the loop
923 : : contains control flow the optimization may still be a win
924 : : because of eliminating the loop overhead but it also may
925 : : blow the branch predictor tables. Limit number of
926 : : branches on the hot path through the peeled sequence. */
927 : 29044 : else if (size.num_branches_on_hot_path * (int)n_unroll
928 : 29044 : > param_max_peel_branches)
929 : : {
930 : 1399 : if (dump_file && (dump_flags & TDF_DETAILS))
931 : 0 : fprintf (dump_file, "Not unrolling loop %d: "
932 : : "number of branches on hot path in the unrolled "
933 : : "sequence reaches --param max-peel-branches limit.\n",
934 : : loop->num);
935 : 1399 : return false;
936 : : }
937 : : /* Move 2 / 3 reduction from estimated_unrolled_size, but don't reduce
938 : : unrolled size for innermost loop.
939 : : 1) It could increase register pressure.
940 : : 2) Big loop after completely unroll may not be vectorized
941 : : by BB vectorizer. */
942 : 27645 : else if ((cunrolli ? unr_insns : unr_insns - est_eliminated)
943 : 27645 : > (unsigned) param_max_completely_peeled_insns)
944 : : {
945 : 1540 : if (dump_file && (dump_flags & TDF_DETAILS))
946 : 1 : fprintf (dump_file, "Not unrolling loop %d: "
947 : : "number of insns in the unrolled sequence reaches "
948 : : "--param max-completely-peeled-insns limit.\n",
949 : : loop->num);
950 : 1540 : return false;
951 : : }
952 : : }
953 : :
954 : 95524 : if (!dbg_cnt (gimple_unroll))
955 : : return false;
956 : :
957 : 95524 : initialize_original_copy_tables ();
958 : 95524 : auto_sbitmap wont_exit (n_unroll + 1);
959 : 95524 : if (exit && niter
960 : 87845 : && TREE_CODE (niter) == INTEGER_CST
961 : 183369 : && wi::leu_p (n_unroll, wi::to_widest (niter)))
962 : : {
963 : 87845 : bitmap_ones (wont_exit);
964 : 175690 : if (wi::eq_p (wi::to_widest (niter), n_unroll)
965 : 87845 : || edge_to_cancel)
966 : 87840 : bitmap_clear_bit (wont_exit, 0);
967 : : }
968 : : else
969 : : {
970 : 7679 : exit = NULL;
971 : 7679 : bitmap_clear (wont_exit);
972 : : }
973 : 95524 : if (may_be_zero)
974 : 2 : bitmap_clear_bit (wont_exit, 1);
975 : :
976 : : /* If loop was originally estimated to iterate too many times,
977 : : reduce the profile to avoid new profile inconsistencies. */
978 : 95524 : scale_loop_profile (loop, profile_probability::always (), n_unroll);
979 : :
980 : 95524 : if (!gimple_duplicate_loop_body_to_header_edge (
981 : : loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit,
982 : : &edges_to_remove,
983 : : DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL))
984 : : {
985 : 9 : free_original_copy_tables ();
986 : 9 : if (dump_file && (dump_flags & TDF_DETAILS))
987 : 0 : fprintf (dump_file, "Failed to duplicate the loop\n");
988 : 9 : return false;
989 : : }
990 : :
991 : 95515 : free_original_copy_tables ();
992 : 95524 : }
993 : : else
994 : 23956 : scale_loop_profile (loop, profile_probability::always (), 0);
995 : :
996 : : /* Remove the conditional from the last copy of the loop. */
997 : 119471 : if (edge_to_cancel)
998 : : {
999 : 238808 : gcond *cond = as_a <gcond *> (*gsi_last_bb (edge_to_cancel->src));
1000 : 119404 : force_edge_cold (edge_to_cancel, true);
1001 : 119404 : if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
1002 : 54046 : gimple_cond_make_false (cond);
1003 : : else
1004 : 65358 : gimple_cond_make_true (cond);
1005 : 119404 : update_stmt (cond);
1006 : : /* Do not remove the path, as doing so may remove outer loop and
1007 : : confuse bookkeeping code in tree_unroll_loops_completely. */
1008 : : }
1009 : :
1010 : : /* Store the loop for later unlooping and exit removal. */
1011 : 119471 : loops_to_unloop.safe_push (loop);
1012 : 119471 : loops_to_unloop_nunroll.safe_push (n_unroll);
1013 : :
1014 : 119471 : if (dump_enabled_p ())
1015 : : {
1016 : 170 : if (!n_unroll)
1017 : 63 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1018 : : "loop turned into non-loop; it never loops\n");
1019 : : else
1020 : : {
1021 : 107 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1022 : : "loop with %d iterations completely unrolled",
1023 : : (int) n_unroll);
1024 : 107 : if (loop->header->count.initialized_p ())
1025 : 106 : dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
1026 : : " (header execution count %d)",
1027 : 106 : (int)loop->header->count.to_gcov_type ());
1028 : 107 : dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
1029 : : }
1030 : : }
1031 : :
1032 : 119471 : if (dump_file && (dump_flags & TDF_DETAILS))
1033 : : {
1034 : 101 : if (exit)
1035 : 76 : fprintf (dump_file, "Exit condition of peeled iterations was "
1036 : : "eliminated.\n");
1037 : 101 : if (edge_to_cancel)
1038 : 101 : fprintf (dump_file, "Last iteration exit edge was proved true.\n");
1039 : : else
1040 : 0 : fprintf (dump_file, "Latch of last iteration was marked by "
1041 : : "__builtin_unreachable ().\n");
1042 : : }
1043 : :
1044 : : return true;
1045 : : }
1046 : :
1047 : : /* Return number of instructions after peeling. */
1048 : : static unsigned HOST_WIDE_INT
1049 : 2702 : estimated_peeled_sequence_size (struct loop_size *size,
1050 : : unsigned HOST_WIDE_INT npeel)
1051 : : {
1052 : 2702 : return MAX (npeel * (HOST_WIDE_INT) (size->overall
1053 : : - size->eliminated_by_peeling), 1);
1054 : : }
1055 : :
1056 : : /* Update loop estimates after peeling LOOP by NPEEL.
1057 : : If PRECISE is false only likely exists were duplicated and thus
1058 : : do not update any estimates that are supposed to be always reliable. */
1059 : : void
1060 : 416523 : adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise)
1061 : : {
1062 : 416523 : if (loop->any_estimate)
1063 : : {
1064 : : /* Since peeling is mostly about loops where first few
1065 : : iterations are special, it is not quite correct to
1066 : : assume that the remaining iterations will behave
1067 : : the same way. However we do not have better info
1068 : : so update the esitmate, since it is likely better
1069 : : than keeping it as it is.
1070 : :
1071 : : Remove it if it looks wrong.
1072 : :
1073 : : TODO: We likely want to special case the situation where
1074 : : peeling is optimizing out exit edges and only update
1075 : : estimates here. */
1076 : 211910 : if (wi::leu_p (npeel, loop->nb_iterations_estimate))
1077 : 211882 : loop->nb_iterations_estimate -= npeel;
1078 : : else
1079 : 28 : loop->any_estimate = false;
1080 : : }
1081 : 416523 : if (loop->any_upper_bound && precise)
1082 : : {
1083 : 247021 : if (wi::leu_p (npeel, loop->nb_iterations_upper_bound))
1084 : 247021 : loop->nb_iterations_upper_bound -= npeel;
1085 : : else
1086 : : {
1087 : : /* Peeling maximal number of iterations or more
1088 : : makes no sense and is a bug.
1089 : : We should peel completely. */
1090 : 0 : gcc_unreachable ();
1091 : : }
1092 : : }
1093 : 416523 : if (loop->any_likely_upper_bound)
1094 : : {
1095 : 347182 : if (wi::leu_p (npeel, loop->nb_iterations_likely_upper_bound))
1096 : 346385 : loop->nb_iterations_likely_upper_bound -= npeel;
1097 : : else
1098 : : {
1099 : 797 : loop->any_estimate = true;
1100 : 797 : loop->nb_iterations_estimate = 0;
1101 : 797 : loop->nb_iterations_likely_upper_bound = 0;
1102 : : }
1103 : : }
1104 : 416523 : }
1105 : :
1106 : : /* If the loop is expected to iterate N times and is
1107 : : small enough, duplicate the loop body N+1 times before
1108 : : the loop itself. This way the hot path will never
1109 : : enter the loop.
1110 : : Parameters are the same as for try_unroll_loops_completely */
1111 : :
1112 : : static bool
1113 : 118584 : try_peel_loop (class loop *loop,
1114 : : edge exit, tree niter, bool may_be_zero,
1115 : : HOST_WIDE_INT maxiter)
1116 : : {
1117 : 118584 : HOST_WIDE_INT npeel;
1118 : 118584 : struct loop_size size;
1119 : 118584 : int peeled_size;
1120 : :
1121 : 118584 : if (!flag_peel_loops
1122 : 100741 : || param_max_peel_times <= 0
1123 : 100741 : || !peeled_loops)
1124 : : return false;
1125 : :
1126 : 76900 : if (bitmap_bit_p (peeled_loops, loop->num))
1127 : : {
1128 : 780 : if (dump_file)
1129 : 2 : fprintf (dump_file, "Not peeling: loop is already peeled\n");
1130 : 780 : return false;
1131 : : }
1132 : :
1133 : : /* We don't peel loops that will be unrolled as this can duplicate a
1134 : : loop more times than the user requested. */
1135 : 76120 : if (loop->unroll)
1136 : : {
1137 : 121 : if (dump_file)
1138 : 0 : fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1139 : 121 : return false;
1140 : : }
1141 : :
1142 : : /* Peel only innermost loops.
1143 : : While the code is perfectly capable of peeling non-innermost loops,
1144 : : the heuristics would probably need some improvements. */
1145 : 75999 : if (loop->inner)
1146 : : {
1147 : 14309 : if (dump_file)
1148 : 2 : fprintf (dump_file, "Not peeling: outer loop\n");
1149 : 14309 : return false;
1150 : : }
1151 : :
1152 : 61690 : if (!optimize_loop_for_speed_p (loop))
1153 : : {
1154 : 0 : if (dump_file)
1155 : 0 : fprintf (dump_file, "Not peeling: cold loop\n");
1156 : 0 : return false;
1157 : : }
1158 : :
1159 : : /* Check if there is an estimate on the number of iterations. */
1160 : 61690 : npeel = estimated_loop_iterations_int (loop);
1161 : 61690 : if (npeel < 0)
1162 : 45143 : npeel = likely_max_loop_iterations_int (loop);
1163 : 61690 : if (npeel < 0)
1164 : : {
1165 : 12818 : if (dump_file)
1166 : 0 : fprintf (dump_file, "Not peeling: number of iterations is not "
1167 : : "estimated\n");
1168 : 12818 : return false;
1169 : : }
1170 : 48872 : if (maxiter >= 0 && maxiter <= npeel)
1171 : : {
1172 : 45396 : if (dump_file)
1173 : 23 : fprintf (dump_file, "Not peeling: upper bound is known so can "
1174 : : "unroll completely\n");
1175 : 45396 : return false;
1176 : : }
1177 : :
1178 : : /* We want to peel estimated number of iterations + 1 (so we never
1179 : : enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
1180 : : and be sure to avoid overflows. */
1181 : 3476 : if (npeel > param_max_peel_times - 1)
1182 : : {
1183 : 774 : if (dump_file)
1184 : 0 : fprintf (dump_file, "Not peeling: rolls too much "
1185 : : "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1186 : 774 : return false;
1187 : : }
1188 : 2702 : npeel++;
1189 : :
1190 : : /* Check peeled loops size. */
1191 : 2702 : tree_estimate_loop_size (loop, exit, NULL, &size,
1192 : : param_max_peeled_insns);
1193 : 2702 : if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1194 : 2702 : > param_max_peeled_insns)
1195 : : {
1196 : 1884 : if (dump_file)
1197 : 0 : fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1198 : : "(%i insns > --param max-peel-insns)", peeled_size);
1199 : 1884 : return false;
1200 : : }
1201 : :
1202 : 818 : if (!dbg_cnt (gimple_unroll))
1203 : : return false;
1204 : :
1205 : : /* Duplicate possibly eliminating the exits. */
1206 : 818 : initialize_original_copy_tables ();
1207 : 818 : auto_sbitmap wont_exit (npeel + 1);
1208 : 818 : if (exit && niter
1209 : 9 : && TREE_CODE (niter) == INTEGER_CST
1210 : 827 : && wi::leu_p (npeel, wi::to_widest (niter)))
1211 : : {
1212 : 9 : bitmap_ones (wont_exit);
1213 : 9 : bitmap_clear_bit (wont_exit, 0);
1214 : : }
1215 : : else
1216 : : {
1217 : 809 : exit = NULL;
1218 : 809 : bitmap_clear (wont_exit);
1219 : : }
1220 : 818 : if (may_be_zero)
1221 : 0 : bitmap_clear_bit (wont_exit, 1);
1222 : :
1223 : 818 : if (!gimple_duplicate_loop_body_to_header_edge (
1224 : : loop, loop_preheader_edge (loop), npeel, wont_exit, exit,
1225 : : &edges_to_remove, DLTHE_FLAG_UPDATE_FREQ))
1226 : : {
1227 : 0 : free_original_copy_tables ();
1228 : 0 : return false;
1229 : : }
1230 : 818 : free_original_copy_tables ();
1231 : 818 : if (dump_file && (dump_flags & TDF_DETAILS))
1232 : : {
1233 : 3 : fprintf (dump_file, "Peeled loop %d, %i times.\n",
1234 : : loop->num, (int) npeel);
1235 : : }
1236 : 818 : adjust_loop_info_after_peeling (loop, npeel, true);
1237 : :
1238 : 818 : bitmap_set_bit (peeled_loops, loop->num);
1239 : 818 : return true;
1240 : 818 : }
1241 : : /* Adds a canonical induction variable to LOOP if suitable.
1242 : : CREATE_IV is true if we may create a new iv. UL determines
1243 : : which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
1244 : : to determine the number of iterations of a loop by direct evaluation.
1245 : : Returns true if cfg is changed. */
1246 : :
1247 : : static bool
1248 : 2097880 : canonicalize_loop_induction_variables (class loop *loop,
1249 : : bool create_iv, enum unroll_level ul,
1250 : : bool try_eval, bool allow_peel,
1251 : : const_sbitmap innermost,
1252 : : bool cunrolli)
1253 : : {
1254 : 2097880 : edge exit = NULL;
1255 : 2097880 : tree niter;
1256 : 2097880 : HOST_WIDE_INT maxiter;
1257 : 2097880 : bool modified = false;
1258 : 2097880 : class tree_niter_desc niter_desc;
1259 : 2097880 : bool may_be_zero = false;
1260 : 2097880 : bool by_eval = false;
1261 : :
1262 : : /* For unrolling allow conditional constant or zero iterations, thus
1263 : : perform loop-header copying on-the-fly. */
1264 : 2097880 : exit = single_exit (loop);
1265 : 2097880 : niter = chrec_dont_know;
1266 : 2097880 : if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1267 : : {
1268 : 1027435 : niter = niter_desc.niter;
1269 : 1027435 : may_be_zero
1270 : 1027435 : = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1271 : : }
1272 : 2097880 : if (TREE_CODE (niter) != INTEGER_CST)
1273 : : {
1274 : : /* For non-constant niter fold may_be_zero into niter again. */
1275 : 1453418 : if (may_be_zero)
1276 : : {
1277 : 112050 : if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1278 : 112047 : niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1279 : : niter_desc.may_be_zero,
1280 : : build_int_cst (TREE_TYPE (niter), 0), niter);
1281 : : else
1282 : 3 : niter = chrec_dont_know;
1283 : : may_be_zero = false;
1284 : : }
1285 : :
1286 : : /* If the loop has more than one exit, try checking all of them
1287 : : for # of iterations determinable through scev. */
1288 : 1453418 : if (!exit)
1289 : 704944 : niter = find_loop_niter (loop, &exit);
1290 : :
1291 : : /* Finally if everything else fails, try brute force evaluation. */
1292 : 1453418 : if (try_eval
1293 : 1453418 : && (chrec_contains_undetermined (niter)
1294 : 463799 : || TREE_CODE (niter) != INTEGER_CST))
1295 : : {
1296 : 731955 : niter = find_loop_niter_by_eval (loop, &exit);
1297 : 731955 : if (TREE_CODE (niter) == INTEGER_CST)
1298 : 13241 : by_eval = true;
1299 : : }
1300 : :
1301 : 1453418 : if (TREE_CODE (niter) != INTEGER_CST)
1302 : 1130822 : exit = NULL;
1303 : : }
1304 : :
1305 : : /* We work exceptionally hard here to estimate the bound
1306 : : by find_loop_niter_by_eval. Be sure to keep it for future. */
1307 : 3228702 : if (niter && TREE_CODE (niter) == INTEGER_CST)
1308 : : {
1309 : 967058 : auto_vec<edge> exits = get_loop_exit_edges (loop);
1310 : 967058 : record_niter_bound (loop, wi::to_widest (niter),
1311 : 967058 : exit == single_likely_exit (loop, exits), true);
1312 : 967058 : }
1313 : :
1314 : : /* Force re-computation of loop bounds so we can remove redundant exits. */
1315 : 2097880 : maxiter = max_loop_iterations_int (loop);
1316 : :
1317 : 304 : if (dump_file && (dump_flags & TDF_DETAILS)
1318 : 2098126 : && TREE_CODE (niter) == INTEGER_CST)
1319 : : {
1320 : 146 : fprintf (dump_file, "Loop %d iterates ", loop->num);
1321 : 146 : print_generic_expr (dump_file, niter, TDF_SLIM);
1322 : 146 : fprintf (dump_file, " times.\n");
1323 : : }
1324 : 304 : if (dump_file && (dump_flags & TDF_DETAILS)
1325 : 2098126 : && maxiter >= 0)
1326 : : {
1327 : 210 : fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1328 : : (int)maxiter);
1329 : : }
1330 : 304 : if (dump_file && (dump_flags & TDF_DETAILS)
1331 : 2098126 : && likely_max_loop_iterations_int (loop) >= 0)
1332 : : {
1333 : 210 : fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1334 : 210 : loop->num, (int)likely_max_loop_iterations_int (loop));
1335 : : }
1336 : :
1337 : : /* Remove exits that are known to be never taken based on loop bound.
1338 : : Needs to be called after compilation of max_loop_iterations_int that
1339 : : populates the loop bounds. */
1340 : 2097880 : modified |= remove_redundant_iv_tests (loop);
1341 : :
1342 : 2097880 : dump_user_location_t locus = find_loop_location (loop);
1343 : :
1344 : 2097880 : bool innermost_cunrolli_p
1345 : : = cunrolli
1346 : 702455 : && (unsigned) loop->num < SBITMAP_SIZE (innermost)
1347 : 2797535 : && bitmap_bit_p (innermost, loop->num);
1348 : :
1349 : 2097880 : if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1350 : : maxiter, locus, allow_peel,
1351 : : innermost_cunrolli_p))
1352 : : return true;
1353 : :
1354 : 1978409 : if ((create_iv || by_eval)
1355 : 641072 : && niter && !chrec_contains_undetermined (niter)
1356 : 2270001 : && exit && just_once_each_iteration_p (loop, exit->src))
1357 : : {
1358 : 291588 : tree iv_niter = niter;
1359 : 291588 : if (may_be_zero)
1360 : : {
1361 : 22 : if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1362 : 22 : iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1363 : : niter_desc.may_be_zero,
1364 : : build_int_cst (TREE_TYPE (iv_niter), 0),
1365 : : iv_niter);
1366 : : else
1367 : : iv_niter = NULL_TREE;
1368 : : }
1369 : 291588 : if (iv_niter)
1370 : 291588 : create_canonical_iv (loop, exit, iv_niter);
1371 : : }
1372 : :
1373 : 1978409 : if (ul == UL_ALL)
1374 : 118584 : modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1375 : :
1376 : : return modified;
1377 : 2097880 : }
1378 : :
1379 : : /* The main entry point of the pass. Adds canonical induction variables
1380 : : to the suitable loops. */
1381 : :
1382 : : unsigned int
1383 : 230053 : canonicalize_induction_variables (void)
1384 : : {
1385 : 230053 : bool changed = false;
1386 : 230053 : bool irred_invalidated = false;
1387 : 230053 : bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1388 : 460106 : auto_sbitmap innermost (number_of_loops (cfun));
1389 : 230053 : bitmap_clear (innermost);
1390 : :
1391 : 230053 : estimate_numbers_of_iterations (cfun);
1392 : :
1393 : 1329377 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1394 : : {
1395 : 639218 : changed
1396 : 639218 : |= canonicalize_loop_induction_variables (loop,
1397 : : true, UL_SINGLE_ITER,
1398 : : true, false,
1399 : 639218 : (const_sbitmap) innermost,
1400 : : false);
1401 : 230053 : }
1402 : 230053 : gcc_assert (!need_ssa_update_p (cfun));
1403 : :
1404 : 230053 : unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, edges_to_remove,
1405 : : loop_closed_ssa_invalidated, &irred_invalidated);
1406 : 230053 : loops_to_unloop.release ();
1407 : 230053 : loops_to_unloop_nunroll.release ();
1408 : 230053 : if (irred_invalidated
1409 : 230053 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1410 : 1 : mark_irreducible_loops ();
1411 : :
1412 : : /* Clean up the information about numbers of iterations, since brute force
1413 : : evaluation could reveal new information. */
1414 : 230053 : free_numbers_of_iterations_estimates (cfun);
1415 : 230053 : scev_reset ();
1416 : :
1417 : 230053 : if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1418 : : {
1419 : 16 : gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1420 : 16 : rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1421 : : }
1422 : 230053 : BITMAP_FREE (loop_closed_ssa_invalidated);
1423 : :
1424 : 230053 : if (changed)
1425 : 795 : return TODO_cleanup_cfg;
1426 : : return 0;
1427 : 230053 : }
1428 : :
1429 : : /* Process loops from innermost to outer, stopping at the innermost
1430 : : loop we unrolled. */
1431 : :
1432 : : static bool
1433 : 1993605 : tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1434 : : bitmap father_bbs, class loop *loop,
1435 : : const_sbitmap innermost, bool cunrolli)
1436 : : {
1437 : 1993605 : class loop *loop_father;
1438 : 1993605 : bool changed = false;
1439 : 1993605 : class loop *inner;
1440 : 1993605 : enum unroll_level ul;
1441 : 1993605 : unsigned num = number_of_loops (cfun);
1442 : :
1443 : : /* Process inner loops first. Don't walk loops added by the recursive
1444 : : calls because SSA form is not up-to-date. They can be handled in the
1445 : : next iteration. */
1446 : 1993605 : bitmap child_father_bbs = NULL;
1447 : 3499213 : for (inner = loop->inner; inner != NULL; inner = inner->next)
1448 : 1505608 : if ((unsigned) inner->num < num)
1449 : : {
1450 : 1503646 : if (!child_father_bbs)
1451 : 722128 : child_father_bbs = BITMAP_ALLOC (NULL);
1452 : 1503646 : if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1453 : : child_father_bbs, inner,
1454 : : innermost, cunrolli))
1455 : : {
1456 : 155123 : bitmap_ior_into (father_bbs, child_father_bbs);
1457 : 155123 : bitmap_clear (child_father_bbs);
1458 : 155123 : changed = true;
1459 : : }
1460 : : }
1461 : 1993605 : if (child_father_bbs)
1462 : 722128 : BITMAP_FREE (child_father_bbs);
1463 : :
1464 : : /* If we changed an inner loop we cannot process outer loops in this
1465 : : iteration because SSA form is not up-to-date. Continue with
1466 : : siblings of outer loops instead. */
1467 : 1993605 : if (changed)
1468 : : {
1469 : : /* If we are recorded as father clear all other fathers that
1470 : : are necessarily covered already to avoid redundant work. */
1471 : 81500 : if (bitmap_bit_p (father_bbs, loop->header->index))
1472 : : {
1473 : 24764 : bitmap_clear (father_bbs);
1474 : 24764 : bitmap_set_bit (father_bbs, loop->header->index);
1475 : : }
1476 : 81500 : return true;
1477 : : }
1478 : :
1479 : : /* Don't unroll #pragma omp simd loops until the vectorizer
1480 : : attempts to vectorize those. */
1481 : 1912105 : if (loop->force_vectorize)
1482 : : return false;
1483 : :
1484 : : /* Try to unroll this loop. */
1485 : 1901862 : loop_father = loop_outer (loop);
1486 : 1901862 : if (!loop_father)
1487 : : return false;
1488 : :
1489 : 1458662 : if (loop->unroll > 1)
1490 : : ul = UL_ALL;
1491 : 245304 : else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1492 : : /* Unroll outermost loops only if asked to do so or they do
1493 : : not cause code growth. */
1494 : 1695393 : && (unroll_outer || loop_outer (loop_father)))
1495 : : ul = UL_ALL;
1496 : : else
1497 : : ul = UL_NO_GROWTH;
1498 : :
1499 : 1458662 : if (canonicalize_loop_induction_variables
1500 : 1458689 : (loop, false, ul, !flag_tree_loop_ivcanon || cunrolli, unroll_outer,
1501 : : innermost, cunrolli))
1502 : : {
1503 : : /* If we'll continue unrolling, we need to propagate constants
1504 : : within the new basic blocks to fold away induction variable
1505 : : computations; otherwise, the size might blow up before the
1506 : : iteration is complete and the IR eventually cleaned up. */
1507 : 120382 : if (loop_outer (loop_father))
1508 : : {
1509 : : /* Once we process our father we will have processed
1510 : : the fathers of our children as well, so avoid doing
1511 : : redundant work and clear fathers we've gathered sofar. */
1512 : 31917 : bitmap_clear (father_bbs);
1513 : 31917 : bitmap_set_bit (father_bbs, loop_father->header->index);
1514 : : }
1515 : 88465 : else if (unroll_outer)
1516 : : /* Trigger scalar cleanup once any outermost loop gets unrolled. */
1517 : 50878 : cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
1518 : :
1519 : 120382 : return true;
1520 : : }
1521 : :
1522 : : return false;
1523 : : }
1524 : :
1525 : : /* Unroll LOOPS completely if they iterate just few times. Unless
1526 : : MAY_INCREASE_SIZE is true, perform the unrolling only if the
1527 : : size of the code does not increase.
1528 : : cunrolli is true when passs is cunrolli. */
1529 : :
1530 : : static unsigned int
1531 : 443209 : tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer, bool cunrolli)
1532 : : {
1533 : 443209 : bitmap father_bbs = BITMAP_ALLOC (NULL);
1534 : 443209 : bool changed;
1535 : 443209 : int iteration = 0;
1536 : 443209 : bool irred_invalidated = false;
1537 : 886418 : auto_sbitmap innermost (number_of_loops (cfun));
1538 : 443209 : bitmap_clear (innermost);
1539 : :
1540 : 443209 : estimate_numbers_of_iterations (cfun);
1541 : :
1542 : : /* Mark all innermost loop at the begining. */
1543 : 2632795 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1544 : : {
1545 : 1303168 : if (!loop->inner)
1546 : 1074932 : bitmap_set_bit (innermost, loop->num);
1547 : 443209 : }
1548 : :
1549 : 489959 : do
1550 : : {
1551 : 489959 : changed = false;
1552 : 489959 : bitmap loop_closed_ssa_invalidated = NULL;
1553 : :
1554 : 489959 : if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1555 : 259418 : loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1556 : :
1557 : 489959 : free_numbers_of_iterations_estimates (cfun);
1558 : 489959 : estimate_numbers_of_iterations (cfun);
1559 : :
1560 : 979918 : changed = tree_unroll_loops_completely_1 (may_increase_size,
1561 : : unroll_outer, father_bbs,
1562 : 489959 : current_loops->tree_root,
1563 : 489959 : (const_sbitmap) innermost,
1564 : : cunrolli);
1565 : 489959 : if (changed)
1566 : : {
1567 : 46759 : unsigned i;
1568 : :
1569 : 46759 : unloop_loops (loops_to_unloop, loops_to_unloop_nunroll,
1570 : : edges_to_remove, loop_closed_ssa_invalidated,
1571 : : &irred_invalidated);
1572 : 46759 : loops_to_unloop.release ();
1573 : 46759 : loops_to_unloop_nunroll.release ();
1574 : :
1575 : : /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused. */
1576 : 46759 : if (loop_closed_ssa_invalidated
1577 : 46759 : && !bitmap_empty_p (loop_closed_ssa_invalidated))
1578 : 6989 : rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1579 : : TODO_update_ssa);
1580 : : else
1581 : 39770 : update_ssa (TODO_update_ssa);
1582 : :
1583 : : /* father_bbs is a bitmap of loop father header BB indices.
1584 : : Translate that to what non-root loops these BBs belong to now. */
1585 : 46759 : bitmap_iterator bi;
1586 : 46759 : bitmap fathers = BITMAP_ALLOC (NULL);
1587 : 70262 : EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1588 : : {
1589 : 23503 : basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1590 : 23503 : if (! unrolled_loop_bb)
1591 : 0 : continue;
1592 : 23503 : if (loop_outer (unrolled_loop_bb->loop_father))
1593 : 23503 : bitmap_set_bit (fathers,
1594 : : unrolled_loop_bb->loop_father->num);
1595 : : }
1596 : 46759 : bitmap_clear (father_bbs);
1597 : : /* Propagate the constants within the new basic blocks. */
1598 : 70262 : EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1599 : : {
1600 : 23503 : loop_p father = get_loop (cfun, i);
1601 : 23503 : bitmap exit_bbs = BITMAP_ALLOC (NULL);
1602 : 23503 : loop_exit *exit = father->exits->next;
1603 : 110878 : while (exit->e)
1604 : : {
1605 : 87375 : bitmap_set_bit (exit_bbs, exit->e->dest->index);
1606 : 87375 : exit = exit->next;
1607 : : }
1608 : 23503 : do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1609 : : }
1610 : 46759 : BITMAP_FREE (fathers);
1611 : :
1612 : : /* Clean up the information about numbers of iterations, since
1613 : : complete unrolling might have invalidated it. */
1614 : 46759 : scev_reset ();
1615 : :
1616 : : /* This will take care of removing completely unrolled loops
1617 : : from the loop structures so we can continue unrolling now
1618 : : innermost loops. */
1619 : 46759 : if (cleanup_tree_cfg ())
1620 : 46759 : update_ssa (TODO_update_ssa_only_virtuals);
1621 : :
1622 : 46759 : if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1623 : 29375 : verify_loop_closed_ssa (true);
1624 : : }
1625 : 489959 : if (loop_closed_ssa_invalidated)
1626 : 259418 : BITMAP_FREE (loop_closed_ssa_invalidated);
1627 : : }
1628 : : while (changed
1629 : 489968 : && ++iteration <= param_max_unroll_iterations);
1630 : :
1631 : 443209 : BITMAP_FREE (father_bbs);
1632 : :
1633 : 443209 : if (irred_invalidated
1634 : 443209 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1635 : 18 : mark_irreducible_loops ();
1636 : :
1637 : 443209 : return 0;
1638 : 443209 : }
1639 : :
1640 : : /* Canonical induction variable creation pass. */
1641 : :
1642 : : namespace {
1643 : :
1644 : : const pass_data pass_data_iv_canon =
1645 : : {
1646 : : GIMPLE_PASS, /* type */
1647 : : "ivcanon", /* name */
1648 : : OPTGROUP_LOOP, /* optinfo_flags */
1649 : : TV_TREE_LOOP_IVCANON, /* tv_id */
1650 : : ( PROP_cfg | PROP_ssa ), /* properties_required */
1651 : : 0, /* properties_provided */
1652 : : 0, /* properties_destroyed */
1653 : : 0, /* todo_flags_start */
1654 : : 0, /* todo_flags_finish */
1655 : : };
1656 : :
1657 : : class pass_iv_canon : public gimple_opt_pass
1658 : : {
1659 : : public:
1660 : 283157 : pass_iv_canon (gcc::context *ctxt)
1661 : 566314 : : gimple_opt_pass (pass_data_iv_canon, ctxt)
1662 : : {}
1663 : :
1664 : : /* opt_pass methods: */
1665 : 230065 : bool gate (function *) final override { return flag_tree_loop_ivcanon != 0; }
1666 : : unsigned int execute (function *fun) final override;
1667 : :
1668 : : }; // class pass_iv_canon
1669 : :
1670 : : unsigned int
1671 : 230053 : pass_iv_canon::execute (function *fun)
1672 : : {
1673 : 460106 : if (number_of_loops (fun) <= 1)
1674 : : return 0;
1675 : :
1676 : 230053 : return canonicalize_induction_variables ();
1677 : : }
1678 : :
1679 : : } // anon namespace
1680 : :
1681 : : gimple_opt_pass *
1682 : 283157 : make_pass_iv_canon (gcc::context *ctxt)
1683 : : {
1684 : 283157 : return new pass_iv_canon (ctxt);
1685 : : }
1686 : :
1687 : : /* Complete unrolling of loops. */
1688 : :
1689 : : namespace {
1690 : :
1691 : : const pass_data pass_data_complete_unroll =
1692 : : {
1693 : : GIMPLE_PASS, /* type */
1694 : : "cunroll", /* name */
1695 : : OPTGROUP_LOOP, /* optinfo_flags */
1696 : : TV_COMPLETE_UNROLL, /* tv_id */
1697 : : ( PROP_cfg | PROP_ssa ), /* properties_required */
1698 : : 0, /* properties_provided */
1699 : : 0, /* properties_destroyed */
1700 : : 0, /* todo_flags_start */
1701 : : 0, /* todo_flags_finish */
1702 : : };
1703 : :
1704 : : class pass_complete_unroll : public gimple_opt_pass
1705 : : {
1706 : : public:
1707 : 283157 : pass_complete_unroll (gcc::context *ctxt)
1708 : 566314 : : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1709 : : {}
1710 : :
1711 : : /* opt_pass methods: */
1712 : : unsigned int execute (function *) final override;
1713 : :
1714 : : }; // class pass_complete_unroll
1715 : :
1716 : : unsigned int
1717 : 230045 : pass_complete_unroll::execute (function *fun)
1718 : : {
1719 : 460090 : if (number_of_loops (fun) <= 1)
1720 : : return 0;
1721 : :
1722 : : /* If we ever decide to run loop peeling more than once, we will need to
1723 : : track loops already peeled in loop structures themselves to avoid
1724 : : re-peeling the same loop multiple times. */
1725 : 230045 : if (flag_peel_loops)
1726 : 25766 : peeled_loops = BITMAP_ALLOC (NULL);
1727 : 230045 : unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_size, true, false);
1728 : 230045 : if (peeled_loops)
1729 : : {
1730 : 25766 : BITMAP_FREE (peeled_loops);
1731 : 25766 : peeled_loops = NULL;
1732 : : }
1733 : : return val;
1734 : : }
1735 : :
1736 : : } // anon namespace
1737 : :
1738 : : gimple_opt_pass *
1739 : 283157 : make_pass_complete_unroll (gcc::context *ctxt)
1740 : : {
1741 : 283157 : return new pass_complete_unroll (ctxt);
1742 : : }
1743 : :
1744 : : /* Complete unrolling of inner loops. */
1745 : :
1746 : : namespace {
1747 : :
1748 : : const pass_data pass_data_complete_unrolli =
1749 : : {
1750 : : GIMPLE_PASS, /* type */
1751 : : "cunrolli", /* name */
1752 : : OPTGROUP_LOOP, /* optinfo_flags */
1753 : : TV_COMPLETE_UNROLL, /* tv_id */
1754 : : ( PROP_cfg | PROP_ssa ), /* properties_required */
1755 : : 0, /* properties_provided */
1756 : : 0, /* properties_destroyed */
1757 : : 0, /* todo_flags_start */
1758 : : 0, /* todo_flags_finish */
1759 : : };
1760 : :
1761 : : class pass_complete_unrolli : public gimple_opt_pass
1762 : : {
1763 : : public:
1764 : 283157 : pass_complete_unrolli (gcc::context *ctxt)
1765 : 566314 : : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1766 : : {}
1767 : :
1768 : : /* opt_pass methods: */
1769 : 1006350 : bool gate (function *) final override { return optimize >= 2; }
1770 : : unsigned int execute (function *) final override;
1771 : :
1772 : : }; // class pass_complete_unrolli
1773 : :
1774 : : unsigned int
1775 : 933237 : pass_complete_unrolli::execute (function *fun)
1776 : : {
1777 : 933237 : unsigned ret = 0;
1778 : :
1779 : 933237 : loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1780 : 1866474 : if (number_of_loops (fun) > 1)
1781 : : {
1782 : 213164 : scev_initialize ();
1783 : 213164 : ret = tree_unroll_loops_completely (optimize >= 3, false, true);
1784 : 213164 : scev_finalize ();
1785 : : }
1786 : 933237 : loop_optimizer_finalize ();
1787 : :
1788 : 933237 : return ret;
1789 : : }
1790 : :
1791 : : } // anon namespace
1792 : :
1793 : : gimple_opt_pass *
1794 : 283157 : make_pass_complete_unrolli (gcc::context *ctxt)
1795 : : {
1796 : 283157 : return new pass_complete_unrolli (ctxt);
1797 : : }
1798 : :
1799 : :
|