Branch data Line data Source code
1 : : /* Induction variable canonicalization and loop peeling.
2 : : Copyright (C) 2004-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it
7 : : under the terms of the GNU General Public License as published by the
8 : : Free Software Foundation; either version 3, or (at your option) any
9 : : later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : /* This pass detects the loops that iterate a constant number of times,
21 : : adds a canonical induction variable (step -1, tested against 0)
22 : : and replaces the exit test. This enables the less powerful rtl
23 : : level analysis to use this information.
24 : :
25 : : This might spoil the code in some cases (by increasing register pressure).
26 : : Note that in the case the new variable is not needed, ivopts will get rid
27 : : of it, so it might only be a problem when there are no other linear induction
28 : : variables. In that case the created optimization possibilities are likely
29 : : to pay up.
30 : :
31 : : We also perform
32 : : - complete unrolling (or peeling) when the loops is rolling few enough
33 : : times
34 : : - simple peeling (i.e. copying few initial iterations prior the loop)
35 : : when number of iteration estimate is known (typically by the profile
36 : : info). */
37 : :
38 : : #include "config.h"
39 : : #include "system.h"
40 : : #include "coretypes.h"
41 : : #include "backend.h"
42 : : #include "tree.h"
43 : : #include "gimple.h"
44 : : #include "cfghooks.h"
45 : : #include "tree-pass.h"
46 : : #include "ssa.h"
47 : : #include "cgraph.h"
48 : : #include "gimple-pretty-print.h"
49 : : #include "fold-const.h"
50 : : #include "profile.h"
51 : : #include "gimple-iterator.h"
52 : : #include "gimple-fold.h"
53 : : #include "tree-eh.h"
54 : : #include "tree-cfg.h"
55 : : #include "tree-ssa-loop-manip.h"
56 : : #include "tree-ssa-loop-niter.h"
57 : : #include "tree-ssa-loop.h"
58 : : #include "tree-into-ssa.h"
59 : : #include "cfgloop.h"
60 : : #include "tree-chrec.h"
61 : : #include "tree-scalar-evolution.h"
62 : : #include "tree-inline.h"
63 : : #include "tree-cfgcleanup.h"
64 : : #include "builtins.h"
65 : : #include "tree-ssa-sccvn.h"
66 : : #include "tree-vectorizer.h" /* For find_loop_location */
67 : : #include "dbgcnt.h"
68 : :
69 : : /* Specifies types of loops that may be unrolled. */
70 : :
71 : : enum unroll_level
72 : : {
73 : : UL_SINGLE_ITER, /* Only loops that exit immediately in the first
74 : : iteration. */
75 : : UL_NO_GROWTH, /* Only loops whose unrolling will not cause increase
76 : : of code size. */
77 : : UL_ALL /* All suitable loops. */
78 : : };
79 : :
80 : : /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
81 : : is the exit edge whose condition is replaced. The ssa versions of the new
82 : : IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
83 : : if they are not NULL. */
84 : :
85 : : void
86 : 267052 : create_canonical_iv (class loop *loop, edge exit, tree niter,
87 : : tree *var_before = NULL, tree *var_after = NULL)
88 : : {
89 : 267052 : edge in;
90 : 267052 : tree type, var;
91 : 267052 : gcond *cond;
92 : 267052 : gimple_stmt_iterator incr_at;
93 : 267052 : enum tree_code cmp;
94 : :
95 : 267052 : if (dump_file && (dump_flags & TDF_DETAILS))
96 : : {
97 : 42 : fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
98 : 42 : print_generic_expr (dump_file, niter, TDF_SLIM);
99 : 42 : fprintf (dump_file, " iterations.\n");
100 : : }
101 : :
102 : 534104 : cond = as_a <gcond *> (*gsi_last_bb (exit->src));
103 : 267052 : in = EDGE_SUCC (exit->src, 0);
104 : 267052 : if (in == exit)
105 : 68121 : in = EDGE_SUCC (exit->src, 1);
106 : :
107 : : /* Note that we do not need to worry about overflows, since
108 : : type of niter is always unsigned and all comparisons are
109 : : just for equality/nonequality -- i.e. everything works
110 : : with a modulo arithmetics. */
111 : :
112 : 267052 : type = TREE_TYPE (niter);
113 : 267052 : niter = fold_build2 (PLUS_EXPR, type,
114 : : niter,
115 : : build_int_cst (type, 1));
116 : 267052 : incr_at = gsi_last_bb (in->src);
117 : 267052 : create_iv (niter, PLUS_EXPR,
118 : 267052 : build_int_cst (type, -1),
119 : : NULL_TREE, loop,
120 : : &incr_at, false, var_before, &var);
121 : 267052 : if (var_after)
122 : 92 : *var_after = var;
123 : :
124 : 267052 : cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
125 : 267052 : gimple_cond_set_code (cond, cmp);
126 : 267052 : gimple_cond_set_lhs (cond, var);
127 : 267052 : gimple_cond_set_rhs (cond, build_int_cst (type, 0));
128 : 267052 : update_stmt (cond);
129 : 267052 : }
130 : :
131 : : /* Describe size of loop as detected by tree_estimate_loop_size. */
132 : : struct loop_size
133 : : {
134 : : /* Number of instructions in the loop. */
135 : : int overall;
136 : :
137 : : /* Number of instructions that will be likely optimized out in
138 : : peeled iterations of loop (i.e. computation based on induction
139 : : variable where induction variable starts at known constant.) */
140 : : int eliminated_by_peeling;
141 : :
142 : : /* Same statistics for last iteration of loop: it is smaller because
143 : : instructions after exit are not executed. */
144 : : int last_iteration;
145 : : int last_iteration_eliminated_by_peeling;
146 : :
147 : : /* If some IV computation will become constant. */
148 : : bool constant_iv;
149 : :
150 : : /* Number of call stmts that are not a builtin and are pure or const
151 : : present on the hot path. */
152 : : int num_pure_calls_on_hot_path;
153 : : /* Number of call stmts that are not a builtin and are not pure nor const
154 : : present on the hot path. */
155 : : int num_non_pure_calls_on_hot_path;
156 : : /* Number of statements other than calls in the loop. */
157 : : int non_call_stmts_on_hot_path;
158 : : /* Number of branches seen on the hot path. */
159 : : int num_branches_on_hot_path;
160 : : };
161 : :
162 : : /* Return true if OP in STMT will be constant after peeling LOOP. */
163 : :
164 : : static bool
165 : 9751949 : constant_after_peeling (tree op, gimple *stmt, class loop *loop)
166 : : {
167 : 9751949 : if (CONSTANT_CLASS_P (op))
168 : : return true;
169 : :
170 : : /* Get at the actual SSA operand. */
171 : 9140869 : if (handled_component_p (op)
172 : 1075231 : && TREE_CODE (TREE_OPERAND (op, 0)) == SSA_NAME)
173 : 22830 : op = TREE_OPERAND (op, 0);
174 : :
175 : : /* We can still fold accesses to constant arrays when index is known. */
176 : 9140869 : if (TREE_CODE (op) != SSA_NAME)
177 : : {
178 : : tree base = op;
179 : :
180 : : /* First make fast look if we see constant array inside. */
181 : 3270538 : while (handled_component_p (base))
182 : 1700873 : base = TREE_OPERAND (base, 0);
183 : 1569665 : if ((DECL_P (base)
184 : 756012 : && ctor_for_folding (base) != error_mark_node)
185 : 2277217 : || CONSTANT_CLASS_P (base))
186 : : {
187 : : /* If so, see if we understand all the indices. */
188 : : base = op;
189 : 2313284 : while (handled_component_p (base))
190 : : {
191 : 54856 : if (TREE_CODE (base) == ARRAY_REF
192 : 54856 : && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
193 : : return false;
194 : 44604 : base = TREE_OPERAND (base, 0);
195 : : }
196 : : return true;
197 : : }
198 : : return false;
199 : : }
200 : :
201 : : /* Induction variables are constants when defined in loop. */
202 : 15142408 : if (loop_containing_stmt (stmt) != loop)
203 : : return false;
204 : 5629271 : tree ev = analyze_scalar_evolution (loop, op);
205 : 5629271 : if (chrec_contains_undetermined (ev)
206 : 5629271 : || chrec_contains_symbols (ev))
207 : : {
208 : 4026495 : if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (op)))
209 : : {
210 : 2927544 : gassign *ass = nullptr;
211 : 2927544 : gphi *phi = nullptr;
212 : 2927544 : if (is_a <gassign *> (SSA_NAME_DEF_STMT (op)))
213 : : {
214 : 2629349 : ass = as_a <gassign *> (SSA_NAME_DEF_STMT (op));
215 : 2629349 : if (TREE_CODE (gimple_assign_rhs1 (ass)) == SSA_NAME)
216 : 1629181 : phi = dyn_cast <gphi *>
217 : 1629181 : (SSA_NAME_DEF_STMT (gimple_assign_rhs1 (ass)));
218 : : }
219 : 298195 : else if (is_a <gphi *> (SSA_NAME_DEF_STMT (op)))
220 : : {
221 : 212943 : phi = as_a <gphi *> (SSA_NAME_DEF_STMT (op));
222 : 212943 : if (gimple_bb (phi) == loop->header)
223 : : {
224 : 134355 : tree def = gimple_phi_arg_def_from_edge
225 : 134355 : (phi, loop_latch_edge (loop));
226 : 134355 : if (TREE_CODE (def) == SSA_NAME
227 : 134355 : && is_a <gassign *> (SSA_NAME_DEF_STMT (def)))
228 : 91825 : ass = as_a <gassign *> (SSA_NAME_DEF_STMT (def));
229 : : }
230 : : }
231 : 2927544 : if (ass && phi)
232 : : {
233 : 346371 : tree rhs1 = gimple_assign_rhs1 (ass);
234 : 346371 : if (gimple_assign_rhs_class (ass) == GIMPLE_BINARY_RHS
235 : 262514 : && CONSTANT_CLASS_P (gimple_assign_rhs2 (ass))
236 : 186670 : && rhs1 == gimple_phi_result (phi)
237 : 185875 : && gimple_bb (phi) == loop->header
238 : 154724 : && (gimple_phi_arg_def_from_edge (phi, loop_latch_edge (loop))
239 : 154724 : == gimple_assign_lhs (ass))
240 : 463309 : && (CONSTANT_CLASS_P (gimple_phi_arg_def_from_edge
241 : : (phi, loop_preheader_edge (loop)))))
242 : : return true;
243 : : }
244 : : }
245 : 4020146 : return false;
246 : : }
247 : : return true;
248 : : }
249 : :
250 : : /* Computes an estimated number of insns in LOOP.
251 : : EXIT (if non-NULL) is an exite edge that will be eliminated in all but last
252 : : iteration of the loop.
253 : : EDGE_TO_CANCEL (if non-NULL) is an non-exit edge eliminated in the last iteration
254 : : of loop.
255 : : Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.
256 : : Stop estimating after UPPER_BOUND is met. Return true in this case. */
257 : :
258 : : static bool
259 : 374961 : tree_estimate_loop_size (class loop *loop, edge exit, edge edge_to_cancel,
260 : : struct loop_size *size, int upper_bound)
261 : : {
262 : 374961 : basic_block *body = get_loop_body (loop);
263 : 374961 : gimple_stmt_iterator gsi;
264 : 374961 : unsigned int i;
265 : 374961 : bool after_exit;
266 : 374961 : auto_vec<basic_block> path = get_loop_hot_path (loop);
267 : :
268 : 374961 : size->overall = 0;
269 : 374961 : size->eliminated_by_peeling = 0;
270 : 374961 : size->last_iteration = 0;
271 : 374961 : size->last_iteration_eliminated_by_peeling = 0;
272 : 374961 : size->num_pure_calls_on_hot_path = 0;
273 : 374961 : size->num_non_pure_calls_on_hot_path = 0;
274 : 374961 : size->non_call_stmts_on_hot_path = 0;
275 : 374961 : size->num_branches_on_hot_path = 0;
276 : 374961 : size->constant_iv = 0;
277 : :
278 : 374961 : if (dump_file && (dump_flags & TDF_DETAILS))
279 : 60 : fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
280 : 2083128 : for (i = 0; i < loop->num_nodes; i++)
281 : : {
282 : 1595305 : if (edge_to_cancel && body[i] != edge_to_cancel->src
283 : 2950203 : && dominated_by_p (CDI_DOMINATORS, body[i], edge_to_cancel->src))
284 : : after_exit = true;
285 : : else
286 : : after_exit = false;
287 : 1714126 : if (dump_file && (dump_flags & TDF_DETAILS))
288 : 180 : fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index,
289 : : after_exit);
290 : :
291 : 10770950 : for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
292 : : {
293 : 7348657 : gimple *stmt = gsi_stmt (gsi);
294 : 7348657 : int num = estimate_num_insns (stmt, &eni_size_weights);
295 : 7348657 : bool likely_eliminated = false;
296 : 7348657 : bool likely_eliminated_last = false;
297 : 7348657 : bool likely_eliminated_peeled = false;
298 : :
299 : 7348657 : if (dump_file && (dump_flags & TDF_DETAILS))
300 : : {
301 : 839 : fprintf (dump_file, " size: %3i ", num);
302 : 839 : print_gimple_stmt (dump_file, gsi_stmt (gsi), 0);
303 : : }
304 : :
305 : : /* Look for reasons why we might optimize this stmt away. */
306 : :
307 : 7348657 : if (!gimple_has_side_effects (stmt))
308 : : {
309 : : /* Exit conditional. */
310 : 5574376 : if (exit && body[i] == exit->src
311 : 9708157 : && stmt == *gsi_last_bb (exit->src))
312 : : {
313 : 303830 : if (dump_file && (dump_flags & TDF_DETAILS))
314 : 29 : fprintf (dump_file, " Exit condition will be eliminated "
315 : : "in peeled copies.\n");
316 : : likely_eliminated_peeled = true;
317 : : }
318 : 6713191 : if (edge_to_cancel && body[i] == edge_to_cancel->src
319 : 10807377 : && stmt == *gsi_last_bb (edge_to_cancel->src))
320 : : {
321 : 359067 : if (dump_file && (dump_flags & TDF_DETAILS))
322 : 54 : fprintf (dump_file, " Exit condition will be eliminated "
323 : : "in last copy.\n");
324 : : likely_eliminated_last = true;
325 : : }
326 : : /* Sets of IV variables */
327 : 7117789 : if (gimple_code (stmt) == GIMPLE_ASSIGN
328 : 7117789 : && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
329 : : {
330 : 867060 : if (dump_file && (dump_flags & TDF_DETAILS))
331 : 101 : fprintf (dump_file, " Induction variable computation will"
332 : : " be folded away.\n");
333 : : likely_eliminated = true;
334 : : }
335 : : /* Assignments of IV variables. */
336 : 6250729 : else if (gimple_code (stmt) == GIMPLE_ASSIGN
337 : 3577918 : && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
338 : 2948119 : && constant_after_peeling (gimple_assign_rhs1 (stmt),
339 : : stmt, loop)
340 : 129021 : && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
341 : 68251 : || constant_after_peeling (gimple_assign_rhs2 (stmt),
342 : : stmt, loop))
343 : 6323258 : && gimple_assign_rhs_class (stmt) != GIMPLE_TERNARY_RHS)
344 : : {
345 : 72304 : size->constant_iv = true;
346 : 72304 : if (dump_file && (dump_flags & TDF_DETAILS))
347 : 14 : fprintf (dump_file,
348 : : " Constant expression will be folded away.\n");
349 : : likely_eliminated = true;
350 : : }
351 : : /* Conditionals. */
352 : 6178425 : else if ((gimple_code (stmt) == GIMPLE_COND
353 : 904355 : && constant_after_peeling (gimple_cond_lhs (stmt), stmt,
354 : : loop)
355 : 329258 : && constant_after_peeling (gimple_cond_rhs (stmt), stmt,
356 : : loop)
357 : : /* We don't simplify all constant compares so make sure
358 : : they are not both constant already. See PR70288. */
359 : 310406 : && (! is_gimple_min_invariant (gimple_cond_lhs (stmt))
360 : 135 : || ! is_gimple_min_invariant
361 : 135 : (gimple_cond_rhs (stmt))))
362 : 6772509 : || (gimple_code (stmt) == GIMPLE_SWITCH
363 : 1333 : && constant_after_peeling (gimple_switch_index (
364 : : as_a <gswitch *>
365 : 1333 : (stmt)),
366 : : stmt, loop)
367 : 333 : && ! is_gimple_min_invariant
368 : 333 : (gimple_switch_index
369 : 333 : (as_a <gswitch *> (stmt)))))
370 : : {
371 : 310604 : if (dump_file && (dump_flags & TDF_DETAILS))
372 : 27 : fprintf (dump_file, " Constant conditional.\n");
373 : : likely_eliminated = true;
374 : : }
375 : : }
376 : :
377 : 7348657 : size->overall += num;
378 : 7348657 : if (likely_eliminated || likely_eliminated_peeled)
379 : 1255162 : size->eliminated_by_peeling += num;
380 : 7348657 : if (!after_exit)
381 : : {
382 : 4656320 : size->last_iteration += num;
383 : 4656320 : if (likely_eliminated || likely_eliminated_last)
384 : 907524 : size->last_iteration_eliminated_by_peeling += num;
385 : : }
386 : 7348657 : if ((size->overall * 3 / 2 - size->eliminated_by_peeling
387 : 7348657 : - size->last_iteration_eliminated_by_peeling) > upper_bound)
388 : : {
389 : 5959 : free (body);
390 : 5959 : return true;
391 : : }
392 : : }
393 : : }
394 : 1599590 : while (path.length ())
395 : : {
396 : 1230588 : basic_block bb = path.pop ();
397 : 8013630 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
398 : : {
399 : 5552454 : gimple *stmt = gsi_stmt (gsi);
400 : 5552454 : if (gimple_code (stmt) == GIMPLE_CALL
401 : 5552454 : && !gimple_inexpensive_call_p (as_a <gcall *> (stmt)))
402 : : {
403 : 124289 : int flags = gimple_call_flags (stmt);
404 : 124289 : if (flags & (ECF_PURE | ECF_CONST))
405 : 27740 : size->num_pure_calls_on_hot_path++;
406 : : else
407 : 96549 : size->num_non_pure_calls_on_hot_path++;
408 : 124289 : size->num_branches_on_hot_path ++;
409 : : }
410 : : /* Count inexpensive calls as non-calls, because they will likely
411 : : expand inline. */
412 : 5428165 : else if (gimple_code (stmt) != GIMPLE_DEBUG)
413 : 4271818 : size->non_call_stmts_on_hot_path++;
414 : 5552454 : if (((gimple_code (stmt) == GIMPLE_COND
415 : 710472 : && (!constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
416 : 294818 : || !constant_after_peeling (gimple_cond_rhs (stmt), stmt,
417 : : loop)))
418 : 5118351 : || (gimple_code (stmt) == GIMPLE_SWITCH
419 : 924 : && !constant_after_peeling (gimple_switch_index (
420 : 924 : as_a <gswitch *> (stmt)),
421 : : stmt, loop)))
422 : 5987266 : && (!exit || bb != exit->src))
423 : 429734 : size->num_branches_on_hot_path++;
424 : : }
425 : : }
426 : :
427 : 369002 : if (dump_file && (dump_flags & TDF_DETAILS))
428 : 60 : fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
429 : : size->eliminated_by_peeling, size->last_iteration,
430 : : size->last_iteration_eliminated_by_peeling);
431 : :
432 : 369002 : free (body);
433 : 369002 : return false;
434 : 374961 : }
435 : :
436 : : /* Estimate number of insns of completely unrolled loop.
437 : : It is (NUNROLL + 1) * size of loop body with taking into account
438 : : the fact that in last copy everything after exit conditional
439 : : is dead and that some instructions will be eliminated after
440 : : peeling.
441 : :
442 : : Loop body is likely going to simplify further, this is difficult
443 : : to guess, we just decrease the result by 1/3. */
444 : :
445 : : static unsigned HOST_WIDE_INT
446 : 366411 : estimated_unrolled_size (struct loop_size *size,
447 : : unsigned HOST_WIDE_INT nunroll)
448 : : {
449 : 366411 : HOST_WIDE_INT unr_insns = ((nunroll)
450 : 366411 : * (HOST_WIDE_INT) (size->overall
451 : 366411 : - size->eliminated_by_peeling));
452 : 366411 : if (!nunroll)
453 : 0 : unr_insns = 0;
454 : 366411 : unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
455 : :
456 : 366411 : unr_insns = unr_insns * 2 / 3;
457 : 366411 : if (unr_insns <= 0)
458 : 2140 : unr_insns = 1;
459 : :
460 : 366411 : return unr_insns;
461 : : }
462 : :
463 : : /* Loop LOOP is known to not loop. See if there is an edge in the loop
464 : : body that can be remove to make the loop to always exit and at
465 : : the same time it does not make any code potentially executed
466 : : during the last iteration dead.
467 : :
468 : : After complete unrolling we still may get rid of the conditional
469 : : on the exit in the last copy even if we have no idea what it does.
470 : : This is quite common case for loops of form
471 : :
472 : : int a[5];
473 : : for (i=0;i<b;i++)
474 : : a[i]=0;
475 : :
476 : : Here we prove the loop to iterate 5 times but we do not know
477 : : it from induction variable.
478 : :
479 : : For now we handle only simple case where there is exit condition
480 : : just before the latch block and the latch block contains no statements
481 : : with side effect that may otherwise terminate the execution of loop
482 : : (such as by EH or by terminating the program or longjmp).
483 : :
484 : : In the general case we may want to cancel the paths leading to statements
485 : : loop-niter identified as having undefined effect in the last iteration.
486 : : The other cases are hopefully rare and will be cleaned up later. */
487 : :
488 : : static edge
489 : 78491 : loop_edge_to_cancel (class loop *loop)
490 : : {
491 : 78491 : unsigned i;
492 : 78491 : edge edge_to_cancel;
493 : 78491 : gimple_stmt_iterator gsi;
494 : :
495 : : /* We want only one predecestor of the loop. */
496 : 78491 : if (EDGE_COUNT (loop->latch->preds) > 1)
497 : : return NULL;
498 : :
499 : 67761 : auto_vec<edge> exits = get_loop_exit_edges (loop);
500 : :
501 : 214250 : FOR_EACH_VEC_ELT (exits, i, edge_to_cancel)
502 : : {
503 : : /* Find the other edge than the loop exit
504 : : leaving the conditoinal. */
505 : 77538 : if (EDGE_COUNT (edge_to_cancel->src->succs) != 2)
506 : 78 : continue;
507 : 77460 : if (EDGE_SUCC (edge_to_cancel->src, 0) == edge_to_cancel)
508 : 22410 : edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 1);
509 : : else
510 : : edge_to_cancel = EDGE_SUCC (edge_to_cancel->src, 0);
511 : :
512 : : /* We only can handle conditionals. */
513 : 77460 : if (!(edge_to_cancel->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
514 : 514 : continue;
515 : :
516 : : /* We should never have conditionals in the loop latch. */
517 : 76946 : gcc_assert (edge_to_cancel->dest != loop->header);
518 : :
519 : : /* Check that it leads to loop latch. */
520 : 76946 : if (edge_to_cancel->dest != loop->latch)
521 : 10375 : continue;
522 : :
523 : : /* Verify that the code in loop latch does nothing that may end program
524 : : execution without really reaching the exit. This may include
525 : : non-pure/const function calls, EH statements, volatile ASMs etc. */
526 : 273180 : for (gsi = gsi_start_bb (loop->latch); !gsi_end_p (gsi); gsi_next (&gsi))
527 : 141270 : if (gimple_has_side_effects (gsi_stmt (gsi)))
528 : : return NULL;
529 : : return edge_to_cancel;
530 : : }
531 : : return NULL;
532 : 67761 : }
533 : :
534 : : /* Remove all tests for exits that are known to be taken after LOOP was
535 : : peeled NPEELED times. Put gcc_unreachable before every statement
536 : : known to not be executed. */
537 : :
538 : : static bool
539 : 107591 : remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled)
540 : : {
541 : 107591 : class nb_iter_bound *elt;
542 : 107591 : bool changed = false;
543 : :
544 : 321210 : for (elt = loop->bounds; elt; elt = elt->next)
545 : : {
546 : : /* If statement is known to be undefined after peeling, turn it
547 : : into unreachable (or trap when debugging experience is supposed
548 : : to be good). */
549 : 213619 : if (!elt->is_exit
550 : 213619 : && wi::ltu_p (elt->bound, npeeled))
551 : : {
552 : 29267 : gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
553 : 29267 : location_t loc = gimple_location (elt->stmt);
554 : 29267 : gcall *stmt = gimple_build_builtin_unreachable (loc);
555 : 29267 : gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
556 : 29267 : split_block (gimple_bb (stmt), stmt);
557 : 29267 : changed = true;
558 : 29267 : if (dump_file && (dump_flags & TDF_DETAILS))
559 : : {
560 : 4 : fprintf (dump_file, "Forced statement unreachable: ");
561 : 4 : print_gimple_stmt (dump_file, elt->stmt, 0);
562 : : }
563 : : }
564 : : /* If we know the exit will be taken after peeling, update. */
565 : 184352 : else if (elt->is_exit
566 : 184352 : && wi::leu_p (elt->bound, npeeled))
567 : : {
568 : 82705 : basic_block bb = gimple_bb (elt->stmt);
569 : 82705 : edge exit_edge = EDGE_SUCC (bb, 0);
570 : :
571 : 82705 : if (dump_file && (dump_flags & TDF_DETAILS))
572 : : {
573 : 81 : fprintf (dump_file, "Forced exit to be taken: ");
574 : 81 : print_gimple_stmt (dump_file, elt->stmt, 0);
575 : : }
576 : 82705 : if (!loop_exit_edge_p (loop, exit_edge))
577 : 31047 : exit_edge = EDGE_SUCC (bb, 1);
578 : 82705 : exit_edge->probability = profile_probability::always ();
579 : 82705 : gcc_checking_assert (loop_exit_edge_p (loop, exit_edge));
580 : 82705 : gcond *cond_stmt = as_a <gcond *> (elt->stmt);
581 : 82705 : if (exit_edge->flags & EDGE_TRUE_VALUE)
582 : 50059 : gimple_cond_make_true (cond_stmt);
583 : : else
584 : 32646 : gimple_cond_make_false (cond_stmt);
585 : 82705 : update_stmt (cond_stmt);
586 : 82705 : changed = true;
587 : : }
588 : : }
589 : 107591 : return changed;
590 : : }
591 : :
592 : : /* Remove all exits that are known to be never taken because of the loop bound
593 : : discovered. */
594 : :
595 : : static bool
596 : 1894353 : remove_redundant_iv_tests (class loop *loop)
597 : : {
598 : 1894353 : class nb_iter_bound *elt;
599 : 1894353 : bool changed = false;
600 : :
601 : 1894353 : if (!loop->any_upper_bound)
602 : : return false;
603 : 4444426 : for (elt = loop->bounds; elt; elt = elt->next)
604 : : {
605 : : /* Exit is pointless if it won't be taken before loop reaches
606 : : upper bound. */
607 : 1351774 : if (elt->is_exit && loop->any_upper_bound
608 : 4382258 : && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound))
609 : : {
610 : 267876 : basic_block bb = gimple_bb (elt->stmt);
611 : 267876 : edge exit_edge = EDGE_SUCC (bb, 0);
612 : 267876 : class tree_niter_desc niter;
613 : :
614 : 267876 : if (!loop_exit_edge_p (loop, exit_edge))
615 : 196746 : exit_edge = EDGE_SUCC (bb, 1);
616 : :
617 : : /* Only when we know the actual number of iterations, not
618 : : just a bound, we can remove the exit. */
619 : 267876 : if (!number_of_iterations_exit (loop, exit_edge,
620 : : &niter, false, false)
621 : 267872 : || !integer_onep (niter.assumptions)
622 : 267872 : || !integer_zerop (niter.may_be_zero)
623 : 172874 : || !niter.niter
624 : 172874 : || TREE_CODE (niter.niter) != INTEGER_CST
625 : 270656 : || !wi::ltu_p (widest_int::from (loop->nb_iterations_upper_bound,
626 : : SIGNED),
627 : 269266 : wi::to_widest (niter.niter)))
628 : 266486 : continue;
629 : :
630 : 1390 : if (dump_file && (dump_flags & TDF_DETAILS))
631 : : {
632 : 2 : fprintf (dump_file, "Removed pointless exit: ");
633 : 2 : print_gimple_stmt (dump_file, elt->stmt, 0);
634 : : }
635 : 1390 : gcond *cond_stmt = as_a <gcond *> (elt->stmt);
636 : 1390 : if (exit_edge->flags & EDGE_TRUE_VALUE)
637 : 1006 : gimple_cond_make_false (cond_stmt);
638 : : else
639 : 384 : gimple_cond_make_true (cond_stmt);
640 : 1390 : update_stmt (cond_stmt);
641 : 1390 : changed = true;
642 : 267876 : }
643 : : }
644 : : return changed;
645 : : }
646 : :
647 : : /* Stores loops that will be unlooped and edges that will be removed
648 : : after we process whole loop tree. */
649 : : static vec<loop_p> loops_to_unloop;
650 : : static vec<int> loops_to_unloop_nunroll;
651 : : static vec<edge> edges_to_remove;
652 : : /* Stores loops that has been peeled. */
653 : : static bitmap peeled_loops;
654 : :
655 : : /* Cancel all fully unrolled loops by putting __builtin_unreachable
656 : : on the latch edge.
657 : : We do it after all unrolling since unlooping moves basic blocks
658 : : across loop boundaries trashing loop closed SSA form as well
659 : : as SCEV info needed to be intact during unrolling.
660 : :
661 : : IRRED_INVALIDATED is used to bookkeep if information about
662 : : irreducible regions may become invalid as a result
663 : : of the transformation.
664 : : LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
665 : : when we need to go into loop closed SSA form. */
666 : :
667 : : void
668 : 261686 : unloop_loops (vec<class loop *> &loops_to_unloop,
669 : : vec<int> &loops_to_unloop_nunroll,
670 : : vec<edge> &edges_to_remove,
671 : : bitmap loop_closed_ssa_invalidated,
672 : : bool *irred_invalidated)
673 : : {
674 : 369277 : while (loops_to_unloop.length ())
675 : : {
676 : 107591 : class loop *loop = loops_to_unloop.pop ();
677 : 107591 : int n_unroll = loops_to_unloop_nunroll.pop ();
678 : 107591 : basic_block latch = loop->latch;
679 : 107591 : edge latch_edge = loop_latch_edge (loop);
680 : 107591 : int flags = latch_edge->flags;
681 : 107591 : location_t locus = latch_edge->goto_locus;
682 : 107591 : gcall *stmt;
683 : 107591 : gimple_stmt_iterator gsi;
684 : :
685 : 107591 : remove_exits_and_undefined_stmts (loop, n_unroll);
686 : :
687 : : /* Unloop destroys the latch edge. */
688 : 107591 : unloop (loop, irred_invalidated, loop_closed_ssa_invalidated);
689 : :
690 : : /* Create new basic block for the latch edge destination and wire
691 : : it in. */
692 : 107591 : stmt = gimple_build_builtin_unreachable (locus);
693 : 107591 : latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags);
694 : 107591 : latch_edge->probability = profile_probability::never ();
695 : 107591 : latch_edge->flags |= flags;
696 : 107591 : latch_edge->goto_locus = locus;
697 : :
698 : 107591 : add_bb_to_loop (latch_edge->dest, current_loops->tree_root);
699 : 107591 : latch_edge->dest->count = profile_count::zero ();
700 : 107591 : set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src);
701 : :
702 : 107591 : gsi = gsi_start_bb (latch_edge->dest);
703 : 107591 : gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
704 : : }
705 : :
706 : : /* Remove edges in peeled copies. Given remove_path removes dominated
707 : : regions we need to cope with removal of already removed paths. */
708 : 261686 : unsigned i;
709 : 261686 : edge e;
710 : 261686 : auto_vec<int, 20> src_bbs;
711 : 288105 : src_bbs.reserve_exact (edges_to_remove.length ());
712 : 745605 : FOR_EACH_VEC_ELT (edges_to_remove, i, e)
713 : 222233 : src_bbs.quick_push (e->src->index);
714 : 483919 : FOR_EACH_VEC_ELT (edges_to_remove, i, e)
715 : 222233 : if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i]))
716 : : {
717 : 222233 : bool ok = remove_path (e, irred_invalidated,
718 : : loop_closed_ssa_invalidated);
719 : 222233 : gcc_assert (ok);
720 : : }
721 : 261686 : edges_to_remove.release ();
722 : 261686 : }
723 : :
724 : : /* Tries to unroll LOOP completely, i.e. NITER times.
725 : : UL determines which loops we are allowed to unroll.
726 : : EXIT is the exit of the loop that should be eliminated.
727 : : MAXITER specfy bound on number of iterations, -1 if it is
728 : : not known or too large for HOST_WIDE_INT. The location
729 : : LOCUS corresponding to the loop is used when emitting
730 : : a summary of the unroll to the dump file. */
731 : :
732 : : static bool
733 : 1894353 : try_unroll_loop_completely (class loop *loop,
734 : : edge exit, tree niter, bool may_be_zero,
735 : : enum unroll_level ul,
736 : : HOST_WIDE_INT maxiter,
737 : : dump_user_location_t locus, bool allow_peel)
738 : : {
739 : 1894353 : unsigned HOST_WIDE_INT n_unroll = 0;
740 : 1894353 : bool n_unroll_found = false;
741 : 1894353 : edge edge_to_cancel = NULL;
742 : :
743 : : /* See if we proved number of iterations to be low constant.
744 : :
745 : : EXIT is an edge that will be removed in all but last iteration of
746 : : the loop.
747 : :
748 : : EDGE_TO_CACNEL is an edge that will be removed from the last iteration
749 : : of the unrolled sequence and is expected to make the final loop not
750 : : rolling.
751 : :
752 : : If the number of execution of loop is determined by standard induction
753 : : variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
754 : : from the iv test. */
755 : 1894353 : if (tree_fits_uhwi_p (niter))
756 : : {
757 : 858479 : n_unroll = tree_to_uhwi (niter);
758 : 858479 : n_unroll_found = true;
759 : 858479 : edge_to_cancel = EDGE_SUCC (exit->src, 0);
760 : 858479 : if (edge_to_cancel == exit)
761 : 253775 : edge_to_cancel = EDGE_SUCC (exit->src, 1);
762 : : }
763 : : /* We do not know the number of iterations and thus we cannot eliminate
764 : : the EXIT edge. */
765 : : else
766 : : exit = NULL;
767 : :
768 : : /* See if we can improve our estimate by using recorded loop bounds. */
769 : 1894353 : if ((maxiter == 0 || ul != UL_SINGLE_ITER)
770 : 1307464 : && maxiter >= 0
771 : 936519 : && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
772 : : {
773 : 345190 : n_unroll = maxiter;
774 : 345190 : n_unroll_found = true;
775 : : /* Loop terminates before the IV variable test, so we cannot
776 : : remove it in the last iteration. */
777 : 345190 : edge_to_cancel = NULL;
778 : : /* If we do not allow peeling and we iterate just allow cases
779 : : that do not grow code. */
780 : 345190 : if (!allow_peel && maxiter != 0)
781 : 151743 : ul = UL_NO_GROWTH;
782 : : }
783 : :
784 : 1894353 : if (!n_unroll_found)
785 : : return false;
786 : :
787 : 1203511 : if (!loop->unroll
788 : 1202967 : && n_unroll > (unsigned) param_max_completely_peel_times)
789 : : {
790 : 684667 : if (dump_file && (dump_flags & TDF_DETAILS))
791 : 33 : fprintf (dump_file, "Not unrolling loop %d "
792 : : "(--param max-completely-peel-times limit reached).\n",
793 : : loop->num);
794 : 684667 : return false;
795 : : }
796 : :
797 : 518844 : if (!edge_to_cancel)
798 : 78491 : edge_to_cancel = loop_edge_to_cancel (loop);
799 : :
800 : 518844 : if (n_unroll)
801 : : {
802 : 497017 : if (ul == UL_SINGLE_ITER)
803 : 1894353 : return false;
804 : :
805 : 372793 : if (loop->unroll)
806 : : {
807 : : /* If the unrolling factor is too large, bail out. */
808 : 424 : if (n_unroll > (unsigned)loop->unroll)
809 : : {
810 : 225 : if (dump_file && (dump_flags & TDF_DETAILS))
811 : 53 : fprintf (dump_file,
812 : : "Not unrolling loop %d: "
813 : : "user didn't want it unrolled completely.\n",
814 : : loop->num);
815 : 225 : return false;
816 : : }
817 : : }
818 : : else
819 : : {
820 : 372369 : struct loop_size size;
821 : : /* EXIT can be removed only if we are sure it passes first N_UNROLL
822 : : iterations. */
823 : 372369 : bool remove_exit = (exit && niter
824 : 303939 : && TREE_CODE (niter) == INTEGER_CST
825 : 676308 : && wi::leu_p (n_unroll, wi::to_widest (niter)));
826 : 372369 : bool large
827 : : = tree_estimate_loop_size
828 : 372369 : (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
829 : : param_max_completely_peeled_insns);
830 : 372369 : if (large)
831 : : {
832 : 5958 : if (dump_file && (dump_flags & TDF_DETAILS))
833 : 0 : fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
834 : : loop->num);
835 : 289175 : return false;
836 : : }
837 : :
838 : 366411 : unsigned HOST_WIDE_INT ninsns = size.overall;
839 : 366411 : unsigned HOST_WIDE_INT unr_insns
840 : 366411 : = estimated_unrolled_size (&size, n_unroll);
841 : 366411 : if (dump_file && (dump_flags & TDF_DETAILS))
842 : : {
843 : 57 : fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
844 : 57 : fprintf (dump_file, " Estimated size after unrolling: %d\n",
845 : : (int) unr_insns);
846 : : }
847 : :
848 : : /* If the code is going to shrink, we don't need to be extra
849 : : cautious on guessing if the unrolling is going to be
850 : : profitable. */
851 : 366411 : if (unr_insns
852 : : /* If there is IV variable that will become constant, we
853 : : save one instruction in the loop prologue we do not
854 : : account otherwise. */
855 : 366411 : <= ninsns + (size.constant_iv != false))
856 : : ;
857 : : /* We unroll only inner loops, because we do not consider it
858 : : profitable otheriwse. We still can cancel loopback edge
859 : : of not rolling loop; this is always a good idea. */
860 : 303848 : else if (ul == UL_NO_GROWTH)
861 : : {
862 : 269899 : if (dump_file && (dump_flags & TDF_DETAILS))
863 : 23 : fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
864 : : loop->num);
865 : 269899 : return false;
866 : : }
867 : : /* Outer loops tend to be less interesting candidates for
868 : : complete unrolling unless we can do a lot of propagation
869 : : into the inner loop body. For now we disable outer loop
870 : : unrolling when the code would grow. */
871 : 33949 : else if (loop->inner)
872 : : {
873 : 3335 : if (dump_file && (dump_flags & TDF_DETAILS))
874 : 0 : fprintf (dump_file, "Not unrolling loop %d: "
875 : : "it is not innermost and code would grow.\n",
876 : : loop->num);
877 : 3335 : return false;
878 : : }
879 : : /* If there is call on a hot path through the loop, then
880 : : there is most probably not much to optimize. */
881 : 30614 : else if (size.num_non_pure_calls_on_hot_path)
882 : : {
883 : 7720 : if (dump_file && (dump_flags & TDF_DETAILS))
884 : 0 : fprintf (dump_file, "Not unrolling loop %d: "
885 : : "contains call and code would grow.\n",
886 : : loop->num);
887 : 7720 : return false;
888 : : }
889 : : /* If there is pure/const call in the function, then we can
890 : : still optimize the unrolled loop body if it contains some
891 : : other interesting code than the calls and code storing or
892 : : cumulating the return value. */
893 : 22894 : else if (size.num_pure_calls_on_hot_path
894 : : /* One IV increment, one test, one ivtmp store and
895 : : one useful stmt. That is about minimal loop
896 : : doing pure call. */
897 : 2328 : && (size.non_call_stmts_on_hot_path
898 : 2328 : <= 3 + size.num_pure_calls_on_hot_path))
899 : : {
900 : 23 : if (dump_file && (dump_flags & TDF_DETAILS))
901 : 0 : fprintf (dump_file, "Not unrolling loop %d: "
902 : : "contains just pure calls and code would grow.\n",
903 : : loop->num);
904 : 23 : return false;
905 : : }
906 : : /* Complete unrolling is major win when control flow is
907 : : removed and one big basic block is created. If the loop
908 : : contains control flow the optimization may still be a win
909 : : because of eliminating the loop overhead but it also may
910 : : blow the branch predictor tables. Limit number of
911 : : branches on the hot path through the peeled sequence. */
912 : 22871 : else if (size.num_branches_on_hot_path * (int)n_unroll
913 : 22871 : > param_max_peel_branches)
914 : : {
915 : 793 : if (dump_file && (dump_flags & TDF_DETAILS))
916 : 0 : fprintf (dump_file, "Not unrolling loop %d: "
917 : : "number of branches on hot path in the unrolled "
918 : : "sequence reaches --param max-peel-branches limit.\n",
919 : : loop->num);
920 : 793 : return false;
921 : : }
922 : 22078 : else if (unr_insns
923 : 22078 : > (unsigned) param_max_completely_peeled_insns)
924 : : {
925 : 1447 : if (dump_file && (dump_flags & TDF_DETAILS))
926 : 1 : fprintf (dump_file, "Not unrolling loop %d: "
927 : : "number of insns in the unrolled sequence reaches "
928 : : "--param max-completely-peeled-insns limit.\n",
929 : : loop->num);
930 : 1447 : return false;
931 : : }
932 : : }
933 : :
934 : 83393 : if (!dbg_cnt (gimple_unroll))
935 : : return false;
936 : :
937 : 83393 : initialize_original_copy_tables ();
938 : 83393 : auto_sbitmap wont_exit (n_unroll + 1);
939 : 83393 : if (exit && niter
940 : 70247 : && TREE_CODE (niter) == INTEGER_CST
941 : 153640 : && wi::leu_p (n_unroll, wi::to_widest (niter)))
942 : : {
943 : 70247 : bitmap_ones (wont_exit);
944 : 140494 : if (wi::eq_p (wi::to_widest (niter), n_unroll)
945 : 70247 : || edge_to_cancel)
946 : 70246 : bitmap_clear_bit (wont_exit, 0);
947 : : }
948 : : else
949 : : {
950 : 13146 : exit = NULL;
951 : 13146 : bitmap_clear (wont_exit);
952 : : }
953 : 83393 : if (may_be_zero)
954 : 2 : bitmap_clear_bit (wont_exit, 1);
955 : :
956 : : /* If loop was originally estimated to iterate too many times,
957 : : reduce the profile to avoid new profile inconsistencies. */
958 : 83393 : scale_loop_profile (loop, profile_probability::always (), n_unroll);
959 : :
960 : 83393 : if (!gimple_duplicate_loop_body_to_header_edge (
961 : : loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit,
962 : : &edges_to_remove,
963 : : DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL))
964 : : {
965 : 8 : free_original_copy_tables ();
966 : 8 : if (dump_file && (dump_flags & TDF_DETAILS))
967 : 0 : fprintf (dump_file, "Failed to duplicate the loop\n");
968 : 8 : return false;
969 : : }
970 : :
971 : 83385 : free_original_copy_tables ();
972 : 83393 : }
973 : : else
974 : 21827 : scale_loop_profile (loop, profile_probability::always (), 0);
975 : :
976 : : /* Remove the conditional from the last copy of the loop. */
977 : 105212 : if (edge_to_cancel)
978 : : {
979 : 210258 : gcond *cond = as_a <gcond *> (*gsi_last_bb (edge_to_cancel->src));
980 : 105129 : force_edge_cold (edge_to_cancel, true);
981 : 105129 : if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
982 : 48367 : gimple_cond_make_false (cond);
983 : : else
984 : 56762 : gimple_cond_make_true (cond);
985 : 105129 : update_stmt (cond);
986 : : /* Do not remove the path, as doing so may remove outer loop and
987 : : confuse bookkeeping code in tree_unroll_loops_completely. */
988 : : }
989 : :
990 : : /* Store the loop for later unlooping and exit removal. */
991 : 105212 : loops_to_unloop.safe_push (loop);
992 : 105212 : loops_to_unloop_nunroll.safe_push (n_unroll);
993 : :
994 : 105212 : if (dump_enabled_p ())
995 : : {
996 : 164 : if (!n_unroll)
997 : 56 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
998 : : "loop turned into non-loop; it never loops\n");
999 : : else
1000 : : {
1001 : 108 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
1002 : : "loop with %d iterations completely unrolled",
1003 : : (int) n_unroll);
1004 : 108 : if (loop->header->count.initialized_p ())
1005 : 107 : dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
1006 : : " (header execution count %d)",
1007 : 107 : (int)loop->header->count.to_gcov_type ());
1008 : 108 : dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, "\n");
1009 : : }
1010 : : }
1011 : :
1012 : 105212 : if (dump_file && (dump_flags & TDF_DETAILS))
1013 : : {
1014 : 102 : if (exit)
1015 : 78 : fprintf (dump_file, "Exit condition of peeled iterations was "
1016 : : "eliminated.\n");
1017 : 102 : if (edge_to_cancel)
1018 : 102 : fprintf (dump_file, "Last iteration exit edge was proved true.\n");
1019 : : else
1020 : 0 : fprintf (dump_file, "Latch of last iteration was marked by "
1021 : : "__builtin_unreachable ().\n");
1022 : : }
1023 : :
1024 : : return true;
1025 : : }
1026 : :
1027 : : /* Return number of instructions after peeling. */
1028 : : static unsigned HOST_WIDE_INT
1029 : 2592 : estimated_peeled_sequence_size (struct loop_size *size,
1030 : : unsigned HOST_WIDE_INT npeel)
1031 : : {
1032 : 2592 : return MAX (npeel * (HOST_WIDE_INT) (size->overall
1033 : : - size->eliminated_by_peeling), 1);
1034 : : }
1035 : :
1036 : : /* Update loop estimates after peeling LOOP by NPEEL.
1037 : : If PRECISE is false only likely exists were duplicated and thus
1038 : : do not update any estimates that are supposed to be always reliable. */
1039 : : void
1040 : 382184 : adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise)
1041 : : {
1042 : 382184 : if (loop->any_estimate)
1043 : : {
1044 : : /* Since peeling is mostly about loops where first few
1045 : : iterations are special, it is not quite correct to
1046 : : assume that the remaining iterations will behave
1047 : : the same way. However we do not have better info
1048 : : so update the esitmate, since it is likely better
1049 : : than keeping it as it is.
1050 : :
1051 : : Remove it if it looks wrong.
1052 : :
1053 : : TODO: We likely want to special case the situation where
1054 : : peeling is optimizing out exit edges and only update
1055 : : estimates here. */
1056 : 196236 : if (wi::leu_p (npeel, loop->nb_iterations_estimate))
1057 : 196212 : loop->nb_iterations_estimate -= npeel;
1058 : : else
1059 : 24 : loop->any_estimate = false;
1060 : : }
1061 : 382184 : if (loop->any_upper_bound && precise)
1062 : : {
1063 : 226474 : if (wi::leu_p (npeel, loop->nb_iterations_upper_bound))
1064 : 226474 : loop->nb_iterations_upper_bound -= npeel;
1065 : : else
1066 : : {
1067 : : /* Peeling maximal number of iterations or more
1068 : : makes no sense and is a bug.
1069 : : We should peel completely. */
1070 : 0 : gcc_unreachable ();
1071 : : }
1072 : : }
1073 : 382184 : if (loop->any_likely_upper_bound)
1074 : : {
1075 : 321711 : if (wi::leu_p (npeel, loop->nb_iterations_likely_upper_bound))
1076 : 320927 : loop->nb_iterations_likely_upper_bound -= npeel;
1077 : : else
1078 : : {
1079 : 784 : loop->any_estimate = true;
1080 : 784 : loop->nb_iterations_estimate = 0;
1081 : 784 : loop->nb_iterations_likely_upper_bound = 0;
1082 : : }
1083 : : }
1084 : 382184 : }
1085 : :
1086 : : /* If the loop is expected to iterate N times and is
1087 : : small enough, duplicate the loop body N+1 times before
1088 : : the loop itself. This way the hot path will never
1089 : : enter the loop.
1090 : : Parameters are the same as for try_unroll_loops_completely */
1091 : :
1092 : : static bool
1093 : 105714 : try_peel_loop (class loop *loop,
1094 : : edge exit, tree niter, bool may_be_zero,
1095 : : HOST_WIDE_INT maxiter)
1096 : : {
1097 : 105714 : HOST_WIDE_INT npeel;
1098 : 105714 : struct loop_size size;
1099 : 105714 : int peeled_size;
1100 : :
1101 : 105714 : if (!flag_peel_loops
1102 : 91042 : || param_max_peel_times <= 0
1103 : 91042 : || !peeled_loops)
1104 : : return false;
1105 : :
1106 : 70447 : if (bitmap_bit_p (peeled_loops, loop->num))
1107 : : {
1108 : 769 : if (dump_file)
1109 : 2 : fprintf (dump_file, "Not peeling: loop is already peeled\n");
1110 : 769 : return false;
1111 : : }
1112 : :
1113 : : /* We don't peel loops that will be unrolled as this can duplicate a
1114 : : loop more times than the user requested. */
1115 : 69678 : if (loop->unroll)
1116 : : {
1117 : 17 : if (dump_file)
1118 : 0 : fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
1119 : 17 : return false;
1120 : : }
1121 : :
1122 : : /* Peel only innermost loops.
1123 : : While the code is perfectly capable of peeling non-innermost loops,
1124 : : the heuristics would probably need some improvements. */
1125 : 69661 : if (loop->inner)
1126 : : {
1127 : 13066 : if (dump_file)
1128 : 2 : fprintf (dump_file, "Not peeling: outer loop\n");
1129 : 13066 : return false;
1130 : : }
1131 : :
1132 : 56595 : if (!optimize_loop_for_speed_p (loop))
1133 : : {
1134 : 0 : if (dump_file)
1135 : 0 : fprintf (dump_file, "Not peeling: cold loop\n");
1136 : 0 : return false;
1137 : : }
1138 : :
1139 : : /* Check if there is an estimate on the number of iterations. */
1140 : 56595 : npeel = estimated_loop_iterations_int (loop);
1141 : 56595 : if (npeel < 0)
1142 : 42493 : npeel = likely_max_loop_iterations_int (loop);
1143 : 56595 : if (npeel < 0)
1144 : : {
1145 : 12337 : if (dump_file)
1146 : 0 : fprintf (dump_file, "Not peeling: number of iterations is not "
1147 : : "estimated\n");
1148 : 12337 : return false;
1149 : : }
1150 : 44258 : if (maxiter >= 0 && maxiter <= npeel)
1151 : : {
1152 : 40705 : if (dump_file)
1153 : 22 : fprintf (dump_file, "Not peeling: upper bound is known so can "
1154 : : "unroll completely\n");
1155 : 40705 : return false;
1156 : : }
1157 : :
1158 : : /* We want to peel estimated number of iterations + 1 (so we never
1159 : : enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
1160 : : and be sure to avoid overflows. */
1161 : 3553 : if (npeel > param_max_peel_times - 1)
1162 : : {
1163 : 961 : if (dump_file)
1164 : 0 : fprintf (dump_file, "Not peeling: rolls too much "
1165 : : "(%i + 1 > --param max-peel-times)\n", (int) npeel);
1166 : 961 : return false;
1167 : : }
1168 : 2592 : npeel++;
1169 : :
1170 : : /* Check peeled loops size. */
1171 : 2592 : tree_estimate_loop_size (loop, exit, NULL, &size,
1172 : : param_max_peeled_insns);
1173 : 2592 : if ((peeled_size = estimated_peeled_sequence_size (&size, (int) npeel))
1174 : 2592 : > param_max_peeled_insns)
1175 : : {
1176 : 1791 : if (dump_file)
1177 : 0 : fprintf (dump_file, "Not peeling: peeled sequence size is too large "
1178 : : "(%i insns > --param max-peel-insns)", peeled_size);
1179 : 1791 : return false;
1180 : : }
1181 : :
1182 : 801 : if (!dbg_cnt (gimple_unroll))
1183 : : return false;
1184 : :
1185 : : /* Duplicate possibly eliminating the exits. */
1186 : 801 : initialize_original_copy_tables ();
1187 : 801 : auto_sbitmap wont_exit (npeel + 1);
1188 : 801 : if (exit && niter
1189 : 9 : && TREE_CODE (niter) == INTEGER_CST
1190 : 810 : && wi::leu_p (npeel, wi::to_widest (niter)))
1191 : : {
1192 : 9 : bitmap_ones (wont_exit);
1193 : 9 : bitmap_clear_bit (wont_exit, 0);
1194 : : }
1195 : : else
1196 : : {
1197 : 792 : exit = NULL;
1198 : 792 : bitmap_clear (wont_exit);
1199 : : }
1200 : 801 : if (may_be_zero)
1201 : 0 : bitmap_clear_bit (wont_exit, 1);
1202 : :
1203 : 801 : if (!gimple_duplicate_loop_body_to_header_edge (
1204 : : loop, loop_preheader_edge (loop), npeel, wont_exit, exit,
1205 : : &edges_to_remove, DLTHE_FLAG_UPDATE_FREQ))
1206 : : {
1207 : 0 : free_original_copy_tables ();
1208 : 0 : return false;
1209 : : }
1210 : 801 : free_original_copy_tables ();
1211 : 801 : if (dump_file && (dump_flags & TDF_DETAILS))
1212 : : {
1213 : 3 : fprintf (dump_file, "Peeled loop %d, %i times.\n",
1214 : : loop->num, (int) npeel);
1215 : : }
1216 : 801 : adjust_loop_info_after_peeling (loop, npeel, true);
1217 : :
1218 : 801 : bitmap_set_bit (peeled_loops, loop->num);
1219 : 801 : return true;
1220 : 801 : }
1221 : : /* Adds a canonical induction variable to LOOP if suitable.
1222 : : CREATE_IV is true if we may create a new iv. UL determines
1223 : : which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
1224 : : to determine the number of iterations of a loop by direct evaluation.
1225 : : Returns true if cfg is changed. */
1226 : :
1227 : : static bool
1228 : 1894353 : canonicalize_loop_induction_variables (class loop *loop,
1229 : : bool create_iv, enum unroll_level ul,
1230 : : bool try_eval, bool allow_peel)
1231 : : {
1232 : 1894353 : edge exit = NULL;
1233 : 1894353 : tree niter;
1234 : 1894353 : HOST_WIDE_INT maxiter;
1235 : 1894353 : bool modified = false;
1236 : 1894353 : class tree_niter_desc niter_desc;
1237 : 1894353 : bool may_be_zero = false;
1238 : :
1239 : : /* For unrolling allow conditional constant or zero iterations, thus
1240 : : perform loop-header copying on-the-fly. */
1241 : 1894353 : exit = single_exit (loop);
1242 : 1894353 : niter = chrec_dont_know;
1243 : 1894353 : if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
1244 : : {
1245 : 934354 : niter = niter_desc.niter;
1246 : 934354 : may_be_zero
1247 : 934354 : = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero);
1248 : : }
1249 : 1894353 : if (TREE_CODE (niter) != INTEGER_CST)
1250 : : {
1251 : : /* For non-constant niter fold may_be_zero into niter again. */
1252 : 1301307 : if (may_be_zero)
1253 : : {
1254 : 105174 : if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1255 : 105171 : niter = fold_build3 (COND_EXPR, TREE_TYPE (niter),
1256 : : niter_desc.may_be_zero,
1257 : : build_int_cst (TREE_TYPE (niter), 0), niter);
1258 : : else
1259 : 3 : niter = chrec_dont_know;
1260 : : may_be_zero = false;
1261 : : }
1262 : :
1263 : : /* If the loop has more than one exit, try checking all of them
1264 : : for # of iterations determinable through scev. */
1265 : 1301307 : if (!exit)
1266 : 624904 : niter = find_loop_niter (loop, &exit);
1267 : :
1268 : : /* Finally if everything else fails, try brute force evaluation. */
1269 : 1301307 : if (try_eval
1270 : 1301307 : && (chrec_contains_undetermined (niter)
1271 : 222255 : || TREE_CODE (niter) != INTEGER_CST))
1272 : 322564 : niter = find_loop_niter_by_eval (loop, &exit);
1273 : :
1274 : 1301307 : if (TREE_CODE (niter) != INTEGER_CST)
1275 : 1035874 : exit = NULL;
1276 : : }
1277 : :
1278 : : /* We work exceptionally hard here to estimate the bound
1279 : : by find_loop_niter_by_eval. Be sure to keep it for future. */
1280 : 2930227 : if (niter && TREE_CODE (niter) == INTEGER_CST)
1281 : : {
1282 : 858479 : auto_vec<edge> exits = get_loop_exit_edges (loop);
1283 : 858479 : record_niter_bound (loop, wi::to_widest (niter),
1284 : 858479 : exit == single_likely_exit (loop, exits), true);
1285 : 858479 : }
1286 : :
1287 : : /* Force re-computation of loop bounds so we can remove redundant exits. */
1288 : 1894353 : maxiter = max_loop_iterations_int (loop);
1289 : :
1290 : 322 : if (dump_file && (dump_flags & TDF_DETAILS)
1291 : 1894618 : && TREE_CODE (niter) == INTEGER_CST)
1292 : : {
1293 : 158 : fprintf (dump_file, "Loop %d iterates ", loop->num);
1294 : 158 : print_generic_expr (dump_file, niter, TDF_SLIM);
1295 : 158 : fprintf (dump_file, " times.\n");
1296 : : }
1297 : 322 : if (dump_file && (dump_flags & TDF_DETAILS)
1298 : 1894618 : && maxiter >= 0)
1299 : : {
1300 : 220 : fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
1301 : : (int)maxiter);
1302 : : }
1303 : 322 : if (dump_file && (dump_flags & TDF_DETAILS)
1304 : 1894618 : && likely_max_loop_iterations_int (loop) >= 0)
1305 : : {
1306 : 220 : fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
1307 : 220 : loop->num, (int)likely_max_loop_iterations_int (loop));
1308 : : }
1309 : :
1310 : : /* Remove exits that are known to be never taken based on loop bound.
1311 : : Needs to be called after compilation of max_loop_iterations_int that
1312 : : populates the loop bounds. */
1313 : 1894353 : modified |= remove_redundant_iv_tests (loop);
1314 : :
1315 : 1894353 : dump_user_location_t locus = find_loop_location (loop);
1316 : 1894353 : if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul,
1317 : : maxiter, locus, allow_peel))
1318 : : return true;
1319 : :
1320 : 1789141 : if (create_iv
1321 : 586889 : && niter && !chrec_contains_undetermined (niter)
1322 : 2056105 : && exit && just_once_each_iteration_p (loop, exit->src))
1323 : : {
1324 : 266960 : tree iv_niter = niter;
1325 : 266960 : if (may_be_zero)
1326 : : {
1327 : 11 : if (COMPARISON_CLASS_P (niter_desc.may_be_zero))
1328 : 11 : iv_niter = fold_build3 (COND_EXPR, TREE_TYPE (iv_niter),
1329 : : niter_desc.may_be_zero,
1330 : : build_int_cst (TREE_TYPE (iv_niter), 0),
1331 : : iv_niter);
1332 : : else
1333 : : iv_niter = NULL_TREE;
1334 : : }
1335 : 266960 : if (iv_niter)
1336 : 266960 : create_canonical_iv (loop, exit, iv_niter);
1337 : : }
1338 : :
1339 : 1789141 : if (ul == UL_ALL)
1340 : 105714 : modified |= try_peel_loop (loop, exit, niter, may_be_zero, maxiter);
1341 : :
1342 : : return modified;
1343 : 1894353 : }
1344 : :
1345 : : /* The main entry point of the pass. Adds canonical induction variables
1346 : : to the suitable loops. */
1347 : :
1348 : : unsigned int
1349 : 219523 : canonicalize_induction_variables (void)
1350 : : {
1351 : 219523 : bool changed = false;
1352 : 219523 : bool irred_invalidated = false;
1353 : 219523 : bitmap loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1354 : :
1355 : 219523 : estimate_numbers_of_iterations (cfun);
1356 : :
1357 : 1245548 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1358 : : {
1359 : 586979 : changed |= canonicalize_loop_induction_variables (loop,
1360 : : true, UL_SINGLE_ITER,
1361 : : true, false);
1362 : 219523 : }
1363 : 219523 : gcc_assert (!need_ssa_update_p (cfun));
1364 : :
1365 : 219523 : unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, edges_to_remove,
1366 : : loop_closed_ssa_invalidated, &irred_invalidated);
1367 : 219523 : loops_to_unloop.release ();
1368 : 219523 : loops_to_unloop_nunroll.release ();
1369 : 219523 : if (irred_invalidated
1370 : 219523 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1371 : 1 : mark_irreducible_loops ();
1372 : :
1373 : : /* Clean up the information about numbers of iterations, since brute force
1374 : : evaluation could reveal new information. */
1375 : 219523 : free_numbers_of_iterations_estimates (cfun);
1376 : 219523 : scev_reset ();
1377 : :
1378 : 219523 : if (!bitmap_empty_p (loop_closed_ssa_invalidated))
1379 : : {
1380 : 10 : gcc_checking_assert (loops_state_satisfies_p (LOOP_CLOSED_SSA));
1381 : 10 : rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1382 : : }
1383 : 219523 : BITMAP_FREE (loop_closed_ssa_invalidated);
1384 : :
1385 : 219523 : if (changed)
1386 : 566 : return TODO_cleanup_cfg;
1387 : : return 0;
1388 : : }
1389 : :
1390 : : /* Process loops from innermost to outer, stopping at the innermost
1391 : : loop we unrolled. */
1392 : :
1393 : : static bool
1394 : 1808855 : tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
1395 : : bitmap father_bbs, class loop *loop)
1396 : : {
1397 : 1808855 : class loop *loop_father;
1398 : 1808855 : bool changed = false;
1399 : 1808855 : class loop *inner;
1400 : 1808855 : enum unroll_level ul;
1401 : 1808855 : unsigned num = number_of_loops (cfun);
1402 : :
1403 : : /* Process inner loops first. Don't walk loops added by the recursive
1404 : : calls because SSA form is not up-to-date. They can be handled in the
1405 : : next iteration. */
1406 : 1808855 : bitmap child_father_bbs = NULL;
1407 : 3154059 : for (inner = loop->inner; inner != NULL; inner = inner->next)
1408 : 1345204 : if ((unsigned) inner->num < num)
1409 : : {
1410 : 1344026 : if (!child_father_bbs)
1411 : 653669 : child_father_bbs = BITMAP_ALLOC (NULL);
1412 : 1344026 : if (tree_unroll_loops_completely_1 (may_increase_size, unroll_outer,
1413 : : child_father_bbs, inner))
1414 : : {
1415 : 132583 : bitmap_ior_into (father_bbs, child_father_bbs);
1416 : 132583 : bitmap_clear (child_father_bbs);
1417 : 132583 : changed = true;
1418 : : }
1419 : : }
1420 : 1808855 : if (child_father_bbs)
1421 : 653669 : BITMAP_FREE (child_father_bbs);
1422 : :
1423 : : /* If we changed an inner loop we cannot process outer loops in this
1424 : : iteration because SSA form is not up-to-date. Continue with
1425 : : siblings of outer loops instead. */
1426 : 1808855 : if (changed)
1427 : : {
1428 : : /* If we are recorded as father clear all other fathers that
1429 : : are necessarily covered already to avoid redundant work. */
1430 : 68027 : if (bitmap_bit_p (father_bbs, loop->header->index))
1431 : : {
1432 : 19802 : bitmap_clear (father_bbs);
1433 : 19802 : bitmap_set_bit (father_bbs, loop->header->index);
1434 : : }
1435 : 68027 : return true;
1436 : : }
1437 : :
1438 : : /* Don't unroll #pragma omp simd loops until the vectorizer
1439 : : attempts to vectorize those. */
1440 : 1740828 : if (loop->force_vectorize)
1441 : : return false;
1442 : :
1443 : : /* Try to unroll this loop. */
1444 : 1730655 : loop_father = loop_outer (loop);
1445 : 1730655 : if (!loop_father)
1446 : : return false;
1447 : :
1448 : 1307374 : if (loop->unroll > 1)
1449 : : ul = UL_ALL;
1450 : 220167 : else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
1451 : : /* Unroll outermost loops only if asked to do so or they do
1452 : : not cause code growth. */
1453 : 1522959 : && (unroll_outer || loop_outer (loop_father)))
1454 : : ul = UL_ALL;
1455 : : else
1456 : : ul = UL_NO_GROWTH;
1457 : :
1458 : 1307374 : if (canonicalize_loop_induction_variables
1459 : 1307374 : (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
1460 : : {
1461 : : /* If we'll continue unrolling, we need to propagate constants
1462 : : within the new basic blocks to fold away induction variable
1463 : : computations; otherwise, the size might blow up before the
1464 : : iteration is complete and the IR eventually cleaned up. */
1465 : 106104 : if (loop_outer (loop_father))
1466 : : {
1467 : : /* Once we process our father we will have processed
1468 : : the fathers of our children as well, so avoid doing
1469 : : redundant work and clear fathers we've gathered sofar. */
1470 : 25411 : bitmap_clear (father_bbs);
1471 : 25411 : bitmap_set_bit (father_bbs, loop_father->header->index);
1472 : : }
1473 : 80693 : else if (unroll_outer)
1474 : : /* Trigger scalar cleanup once any outermost loop gets unrolled. */
1475 : 45336 : cfun->pending_TODOs |= PENDING_TODO_force_next_scalar_cleanup;
1476 : :
1477 : 106104 : return true;
1478 : : }
1479 : :
1480 : : return false;
1481 : : }
1482 : :
1483 : : /* Unroll LOOPS completely if they iterate just few times. Unless
1484 : : MAY_INCREASE_SIZE is true, perform the unrolling only if the
1485 : : size of the code does not increase. */
1486 : :
1487 : : static unsigned int
1488 : 423288 : tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
1489 : : {
1490 : 423288 : bitmap father_bbs = BITMAP_ALLOC (NULL);
1491 : 423288 : bool changed;
1492 : 423288 : int iteration = 0;
1493 : 423288 : bool irred_invalidated = false;
1494 : :
1495 : 423288 : estimate_numbers_of_iterations (cfun);
1496 : :
1497 : 464829 : do
1498 : : {
1499 : 464829 : changed = false;
1500 : 464829 : bitmap loop_closed_ssa_invalidated = NULL;
1501 : :
1502 : 464829 : if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
1503 : 245374 : loop_closed_ssa_invalidated = BITMAP_ALLOC (NULL);
1504 : :
1505 : 464829 : free_numbers_of_iterations_estimates (cfun);
1506 : 464829 : estimate_numbers_of_iterations (cfun);
1507 : :
1508 : 929658 : changed = tree_unroll_loops_completely_1 (may_increase_size,
1509 : : unroll_outer, father_bbs,
1510 : 464829 : current_loops->tree_root);
1511 : 464829 : if (changed)
1512 : : {
1513 : 41548 : unsigned i;
1514 : :
1515 : 41548 : unloop_loops (loops_to_unloop, loops_to_unloop_nunroll,
1516 : : edges_to_remove, loop_closed_ssa_invalidated,
1517 : : &irred_invalidated);
1518 : 41548 : loops_to_unloop.release ();
1519 : 41548 : loops_to_unloop_nunroll.release ();
1520 : :
1521 : : /* We cannot use TODO_update_ssa_no_phi because VOPS gets confused. */
1522 : 41548 : if (loop_closed_ssa_invalidated
1523 : 41548 : && !bitmap_empty_p (loop_closed_ssa_invalidated))
1524 : 5590 : rewrite_into_loop_closed_ssa (loop_closed_ssa_invalidated,
1525 : : TODO_update_ssa);
1526 : : else
1527 : 35958 : update_ssa (TODO_update_ssa);
1528 : :
1529 : : /* father_bbs is a bitmap of loop father header BB indices.
1530 : : Translate that to what non-root loops these BBs belong to now. */
1531 : 41548 : bitmap_iterator bi;
1532 : 41548 : bitmap fathers = BITMAP_ALLOC (NULL);
1533 : 60155 : EXECUTE_IF_SET_IN_BITMAP (father_bbs, 0, i, bi)
1534 : : {
1535 : 18607 : basic_block unrolled_loop_bb = BASIC_BLOCK_FOR_FN (cfun, i);
1536 : 18607 : if (! unrolled_loop_bb)
1537 : 0 : continue;
1538 : 18607 : if (loop_outer (unrolled_loop_bb->loop_father))
1539 : 18607 : bitmap_set_bit (fathers,
1540 : : unrolled_loop_bb->loop_father->num);
1541 : : }
1542 : 41548 : bitmap_clear (father_bbs);
1543 : : /* Propagate the constants within the new basic blocks. */
1544 : 60155 : EXECUTE_IF_SET_IN_BITMAP (fathers, 0, i, bi)
1545 : : {
1546 : 18607 : loop_p father = get_loop (cfun, i);
1547 : 18607 : bitmap exit_bbs = BITMAP_ALLOC (NULL);
1548 : 18607 : loop_exit *exit = father->exits->next;
1549 : 88526 : while (exit->e)
1550 : : {
1551 : 69919 : bitmap_set_bit (exit_bbs, exit->e->dest->index);
1552 : 69919 : exit = exit->next;
1553 : : }
1554 : 18607 : do_rpo_vn (cfun, loop_preheader_edge (father), exit_bbs);
1555 : : }
1556 : 41548 : BITMAP_FREE (fathers);
1557 : :
1558 : : /* Clean up the information about numbers of iterations, since
1559 : : complete unrolling might have invalidated it. */
1560 : 41548 : scev_reset ();
1561 : :
1562 : : /* This will take care of removing completely unrolled loops
1563 : : from the loop structures so we can continue unrolling now
1564 : : innermost loops. */
1565 : 41548 : if (cleanup_tree_cfg ())
1566 : 41548 : update_ssa (TODO_update_ssa_only_virtuals);
1567 : :
1568 : 41548 : if (flag_checking && loops_state_satisfies_p (LOOP_CLOSED_SSA))
1569 : 25862 : verify_loop_closed_ssa (true);
1570 : : }
1571 : 464829 : if (loop_closed_ssa_invalidated)
1572 : 245374 : BITMAP_FREE (loop_closed_ssa_invalidated);
1573 : : }
1574 : : while (changed
1575 : 464836 : && ++iteration <= param_max_unroll_iterations);
1576 : :
1577 : 423288 : BITMAP_FREE (father_bbs);
1578 : :
1579 : 423288 : if (irred_invalidated
1580 : 423288 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1581 : 23 : mark_irreducible_loops ();
1582 : :
1583 : 423288 : return 0;
1584 : : }
1585 : :
1586 : : /* Canonical induction variable creation pass. */
1587 : :
1588 : : namespace {
1589 : :
1590 : : const pass_data pass_data_iv_canon =
1591 : : {
1592 : : GIMPLE_PASS, /* type */
1593 : : "ivcanon", /* name */
1594 : : OPTGROUP_LOOP, /* optinfo_flags */
1595 : : TV_TREE_LOOP_IVCANON, /* tv_id */
1596 : : ( PROP_cfg | PROP_ssa ), /* properties_required */
1597 : : 0, /* properties_provided */
1598 : : 0, /* properties_destroyed */
1599 : : 0, /* todo_flags_start */
1600 : : 0, /* todo_flags_finish */
1601 : : };
1602 : :
1603 : : class pass_iv_canon : public gimple_opt_pass
1604 : : {
1605 : : public:
1606 : 281914 : pass_iv_canon (gcc::context *ctxt)
1607 : 563828 : : gimple_opt_pass (pass_data_iv_canon, ctxt)
1608 : : {}
1609 : :
1610 : : /* opt_pass methods: */
1611 : 219536 : bool gate (function *) final override { return flag_tree_loop_ivcanon != 0; }
1612 : : unsigned int execute (function *fun) final override;
1613 : :
1614 : : }; // class pass_iv_canon
1615 : :
1616 : : unsigned int
1617 : 219523 : pass_iv_canon::execute (function *fun)
1618 : : {
1619 : 439046 : if (number_of_loops (fun) <= 1)
1620 : : return 0;
1621 : :
1622 : 219523 : return canonicalize_induction_variables ();
1623 : : }
1624 : :
1625 : : } // anon namespace
1626 : :
1627 : : gimple_opt_pass *
1628 : 281914 : make_pass_iv_canon (gcc::context *ctxt)
1629 : : {
1630 : 281914 : return new pass_iv_canon (ctxt);
1631 : : }
1632 : :
1633 : : /* Complete unrolling of loops. */
1634 : :
1635 : : namespace {
1636 : :
1637 : : const pass_data pass_data_complete_unroll =
1638 : : {
1639 : : GIMPLE_PASS, /* type */
1640 : : "cunroll", /* name */
1641 : : OPTGROUP_LOOP, /* optinfo_flags */
1642 : : TV_COMPLETE_UNROLL, /* tv_id */
1643 : : ( PROP_cfg | PROP_ssa ), /* properties_required */
1644 : : 0, /* properties_provided */
1645 : : 0, /* properties_destroyed */
1646 : : 0, /* todo_flags_start */
1647 : : 0, /* todo_flags_finish */
1648 : : };
1649 : :
1650 : : class pass_complete_unroll : public gimple_opt_pass
1651 : : {
1652 : : public:
1653 : 281914 : pass_complete_unroll (gcc::context *ctxt)
1654 : 563828 : : gimple_opt_pass (pass_data_complete_unroll, ctxt)
1655 : : {}
1656 : :
1657 : : /* opt_pass methods: */
1658 : : unsigned int execute (function *) final override;
1659 : :
1660 : : }; // class pass_complete_unroll
1661 : :
1662 : : unsigned int
1663 : 219514 : pass_complete_unroll::execute (function *fun)
1664 : : {
1665 : 439028 : if (number_of_loops (fun) <= 1)
1666 : : return 0;
1667 : :
1668 : : /* If we ever decide to run loop peeling more than once, we will need to
1669 : : track loops already peeled in loop structures themselves to avoid
1670 : : re-peeling the same loop multiple times. */
1671 : 219514 : if (flag_peel_loops)
1672 : 24139 : peeled_loops = BITMAP_ALLOC (NULL);
1673 : 219514 : unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_size,
1674 : : true);
1675 : 219514 : if (peeled_loops)
1676 : : {
1677 : 24139 : BITMAP_FREE (peeled_loops);
1678 : 24139 : peeled_loops = NULL;
1679 : : }
1680 : : return val;
1681 : : }
1682 : :
1683 : : } // anon namespace
1684 : :
1685 : : gimple_opt_pass *
1686 : 281914 : make_pass_complete_unroll (gcc::context *ctxt)
1687 : : {
1688 : 281914 : return new pass_complete_unroll (ctxt);
1689 : : }
1690 : :
1691 : : /* Complete unrolling of inner loops. */
1692 : :
1693 : : namespace {
1694 : :
1695 : : const pass_data pass_data_complete_unrolli =
1696 : : {
1697 : : GIMPLE_PASS, /* type */
1698 : : "cunrolli", /* name */
1699 : : OPTGROUP_LOOP, /* optinfo_flags */
1700 : : TV_COMPLETE_UNROLL, /* tv_id */
1701 : : ( PROP_cfg | PROP_ssa ), /* properties_required */
1702 : : 0, /* properties_provided */
1703 : : 0, /* properties_destroyed */
1704 : : 0, /* todo_flags_start */
1705 : : 0, /* todo_flags_finish */
1706 : : };
1707 : :
1708 : : class pass_complete_unrolli : public gimple_opt_pass
1709 : : {
1710 : : public:
1711 : 281914 : pass_complete_unrolli (gcc::context *ctxt)
1712 : 563828 : : gimple_opt_pass (pass_data_complete_unrolli, ctxt)
1713 : : {}
1714 : :
1715 : : /* opt_pass methods: */
1716 : 985383 : bool gate (function *) final override { return optimize >= 2; }
1717 : : unsigned int execute (function *) final override;
1718 : :
1719 : : }; // class pass_complete_unrolli
1720 : :
1721 : : unsigned int
1722 : 914574 : pass_complete_unrolli::execute (function *fun)
1723 : : {
1724 : 914574 : unsigned ret = 0;
1725 : :
1726 : 914574 : loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1727 : 1829148 : if (number_of_loops (fun) > 1)
1728 : : {
1729 : 203774 : scev_initialize ();
1730 : 203774 : ret = tree_unroll_loops_completely (optimize >= 3, false);
1731 : 203774 : scev_finalize ();
1732 : : }
1733 : 914574 : loop_optimizer_finalize ();
1734 : :
1735 : 914574 : return ret;
1736 : : }
1737 : :
1738 : : } // anon namespace
1739 : :
1740 : : gimple_opt_pass *
1741 : 281914 : make_pass_complete_unrolli (gcc::context *ctxt)
1742 : : {
1743 : 281914 : return new pass_complete_unrolli (ctxt);
1744 : : }
1745 : :
1746 : :
|