Branch data Line data Source code
1 : : /* Loop manipulation code for GNU compiler.
2 : : Copyright (C) 2002-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "rtl.h"
25 : : #include "tree.h"
26 : : #include "gimple.h"
27 : : #include "cfghooks.h"
28 : : #include "cfganal.h"
29 : : #include "cfgloop.h"
30 : : #include "gimple-iterator.h"
31 : : #include "gimplify-me.h"
32 : : #include "tree-ssa-loop-manip.h"
33 : : #include "dumpfile.h"
34 : : #include "sreal.h"
35 : : #include "tree-cfg.h"
36 : : #include "tree-pass.h"
37 : :
38 : : static void copy_loops_to (class loop **, int,
39 : : class loop *);
40 : : static void loop_redirect_edge (edge, basic_block);
41 : : static void remove_bbs (basic_block *, int);
42 : : static bool rpe_enum_p (const_basic_block, const void *);
43 : : static int find_path (edge, basic_block **);
44 : : static void fix_loop_placements (class loop *, bool *, bitmap);
45 : : static bool fix_bb_placement (basic_block);
46 : : static void fix_bb_placements (basic_block, bool *, bitmap);
47 : :
48 : : /* Checks whether basic block BB is dominated by DATA. */
49 : : static bool
50 : 428566 : rpe_enum_p (const_basic_block bb, const void *data)
51 : : {
52 : 428566 : return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
53 : : }
54 : :
55 : : /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
56 : :
57 : : static void
58 : 428566 : remove_bbs (basic_block *bbs, int nbbs)
59 : : {
60 : 428566 : int i;
61 : :
62 : 857132 : for (i = 0; i < nbbs; i++)
63 : 428566 : delete_basic_block (bbs[i]);
64 : 428566 : }
65 : :
66 : : /* Find path -- i.e. the basic blocks dominated by edge E and put them
67 : : into array BBS, that will be allocated large enough to contain them.
68 : : E->dest must have exactly one predecessor for this to work (it is
69 : : easy to achieve and we do not put it here because we do not want to
70 : : alter anything by this function). The number of basic blocks in the
71 : : path is returned. */
72 : : static int
73 : 428566 : find_path (edge e, basic_block **bbs)
74 : : {
75 : 428566 : gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
76 : :
77 : : /* Find bbs in the path. */
78 : 428566 : *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
79 : 428566 : return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
80 : 428566 : n_basic_blocks_for_fn (cfun), e->dest);
81 : : }
82 : :
83 : : /* Fix placement of basic block BB inside loop hierarchy --
84 : : Let L be a loop to that BB belongs. Then every successor of BB must either
85 : : 1) belong to some superloop of loop L, or
86 : : 2) be a header of loop K such that K->outer is superloop of L
87 : : Returns true if we had to move BB into other loop to enforce this condition,
88 : : false if the placement of BB was already correct (provided that placements
89 : : of its successors are correct). */
90 : : static bool
91 : 236495 : fix_bb_placement (basic_block bb)
92 : : {
93 : 236495 : edge e;
94 : 236495 : edge_iterator ei;
95 : 236495 : class loop *loop = current_loops->tree_root, *act;
96 : :
97 : 483639 : FOR_EACH_EDGE (e, ei, bb->succs)
98 : : {
99 : 247144 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
100 : 0 : continue;
101 : :
102 : 247144 : act = e->dest->loop_father;
103 : 247144 : if (act->header == e->dest)
104 : 296 : act = loop_outer (act);
105 : :
106 : 247144 : if (flow_loop_nested_p (loop, act))
107 : 247144 : loop = act;
108 : : }
109 : :
110 : 236495 : if (loop == bb->loop_father)
111 : : return false;
112 : :
113 : 49786 : remove_bb_from_loops (bb);
114 : 49786 : add_bb_to_loop (bb, loop);
115 : :
116 : 49786 : return true;
117 : : }
118 : :
119 : : /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
120 : : of LOOP to that leads at least one exit edge of LOOP, and set it
121 : : as the immediate superloop of LOOP. Return true if the immediate superloop
122 : : of LOOP changed.
123 : :
124 : : IRRED_INVALIDATED is set to true if a change in the loop structures might
125 : : invalidate the information about irreducible regions. */
126 : :
127 : : static bool
128 : 195896 : fix_loop_placement (class loop *loop, bool *irred_invalidated,
129 : : bitmap loop_closed_ssa_invalidated)
130 : : {
131 : 195896 : unsigned i;
132 : 195896 : edge e;
133 : 195896 : auto_vec<edge> exits = get_loop_exit_edges (loop);
134 : 195896 : class loop *father = current_loops->tree_root, *act;
135 : 195896 : bool ret = false;
136 : :
137 : 1216793 : FOR_EACH_VEC_ELT (exits, i, e)
138 : : {
139 : 1020897 : act = find_common_loop (loop, e->dest->loop_father);
140 : 1020897 : if (flow_loop_nested_p (father, act))
141 : 71307 : father = act;
142 : : }
143 : :
144 : 195896 : if (father != loop_outer (loop))
145 : : {
146 : 783 : for (act = loop_outer (loop); act != father; act = loop_outer (act))
147 : 487 : act->num_nodes -= loop->num_nodes;
148 : 296 : flow_loop_tree_node_remove (loop);
149 : 296 : flow_loop_tree_node_add (father, loop);
150 : :
151 : : /* The exit edges of LOOP no longer exits its original immediate
152 : : superloops; remove them from the appropriate exit lists. */
153 : 907 : FOR_EACH_VEC_ELT (exits, i, e)
154 : : {
155 : : /* We may need to recompute irreducible loops. */
156 : 315 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
157 : 0 : *irred_invalidated = true;
158 : 315 : rescan_loop_exit (e, false, false);
159 : : }
160 : : /* Any LC SSA PHIs on e->dest might now be on the wrong edge
161 : : if their defs were in a former outer loop. Also all uses
162 : : in the original inner loop of defs in the outer loop(s) now
163 : : require LC PHI nodes. */
164 : 296 : if (loop_closed_ssa_invalidated)
165 : : {
166 : 10 : basic_block *bbs = get_loop_body (loop);
167 : 55 : for (unsigned i = 0; i < loop->num_nodes; ++i)
168 : 45 : bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
169 : 10 : free (bbs);
170 : : }
171 : :
172 : : ret = true;
173 : : }
174 : :
175 : 195896 : return ret;
176 : 195896 : }
177 : :
178 : : /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
179 : : enforce condition stated in description of fix_bb_placement. We
180 : : start from basic block FROM that had some of its successors removed, so that
181 : : his placement no longer has to be correct, and iteratively fix placement of
182 : : its predecessors that may change if placement of FROM changed. Also fix
183 : : placement of subloops of FROM->loop_father, that might also be altered due
184 : : to this change; the condition for them is similar, except that instead of
185 : : successors we consider edges coming out of the loops.
186 : :
187 : : If the changes may invalidate the information about irreducible regions,
188 : : IRRED_INVALIDATED is set to true.
189 : :
190 : : If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
191 : : changed loop_father are collected there. */
192 : :
193 : : static void
194 : 558467 : fix_bb_placements (basic_block from,
195 : : bool *irred_invalidated,
196 : : bitmap loop_closed_ssa_invalidated)
197 : : {
198 : 558467 : basic_block *queue, *qtop, *qbeg, *qend;
199 : 558467 : class loop *base_loop, *target_loop;
200 : 558467 : edge e;
201 : :
202 : : /* We pass through blocks back-reachable from FROM, testing whether some
203 : : of their successors moved to outer loop. It may be necessary to
204 : : iterate several times, but it is finite, as we stop unless we move
205 : : the basic block up the loop structure. The whole story is a bit
206 : : more complicated due to presence of subloops, those are moved using
207 : : fix_loop_placement. */
208 : :
209 : 558467 : base_loop = from->loop_father;
210 : : /* If we are already in the outermost loop, the basic blocks cannot be moved
211 : : outside of it. If FROM is the header of the base loop, it cannot be moved
212 : : outside of it, either. In both cases, we can end now. */
213 : 558467 : if (base_loop == current_loops->tree_root
214 : 229180 : || from == base_loop->header)
215 : 374712 : return;
216 : :
217 : 183755 : auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
218 : 183755 : bitmap_clear (in_queue);
219 : 183755 : bitmap_set_bit (in_queue, from->index);
220 : : /* Prevent us from going out of the base_loop. */
221 : 183755 : bitmap_set_bit (in_queue, base_loop->header->index);
222 : :
223 : 183755 : queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
224 : 183755 : qtop = queue + base_loop->num_nodes + 1;
225 : 183755 : qbeg = queue;
226 : 183755 : qend = queue + 1;
227 : 183755 : *qbeg = from;
228 : :
229 : 420854 : while (qbeg != qend)
230 : : {
231 : 237099 : edge_iterator ei;
232 : 237099 : from = *qbeg;
233 : 237099 : qbeg++;
234 : 237099 : if (qbeg == qtop)
235 : 0 : qbeg = queue;
236 : 237099 : bitmap_clear_bit (in_queue, from->index);
237 : :
238 : 237099 : if (from->loop_father->header == from)
239 : : {
240 : : /* Subloop header, maybe move the loop upward. */
241 : 604 : if (!fix_loop_placement (from->loop_father, irred_invalidated,
242 : : loop_closed_ssa_invalidated))
243 : 187021 : continue;
244 : 292 : target_loop = loop_outer (from->loop_father);
245 : : }
246 : : else
247 : : {
248 : : /* Ordinary basic block. */
249 : 236495 : if (!fix_bb_placement (from))
250 : 186709 : continue;
251 : 49786 : target_loop = from->loop_father;
252 : 49786 : if (loop_closed_ssa_invalidated)
253 : 19684 : bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
254 : : }
255 : :
256 : 74250 : FOR_EACH_EDGE (e, ei, from->succs)
257 : : {
258 : 24172 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
259 : 1 : *irred_invalidated = true;
260 : : }
261 : :
262 : : /* Something has changed, insert predecessors into queue. */
263 : 104315 : FOR_EACH_EDGE (e, ei, from->preds)
264 : : {
265 : 54237 : basic_block pred = e->src;
266 : 54237 : class loop *nca;
267 : :
268 : 54237 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
269 : 0 : *irred_invalidated = true;
270 : :
271 : 54237 : if (bitmap_bit_p (in_queue, pred->index))
272 : 893 : continue;
273 : :
274 : : /* If it is subloop, then it either was not moved, or
275 : : the path up the loop tree from base_loop do not contain
276 : : it. */
277 : 53344 : nca = find_common_loop (pred->loop_father, base_loop);
278 : 53344 : if (pred->loop_father != base_loop
279 : 604 : && (nca == base_loop
280 : 292 : || nca != pred->loop_father))
281 : 604 : pred = pred->loop_father->header;
282 : 52740 : else if (!flow_loop_nested_p (target_loop, pred->loop_father))
283 : : {
284 : : /* If PRED is already higher in the loop hierarchy than the
285 : : TARGET_LOOP to that we moved FROM, the change of the position
286 : : of FROM does not affect the position of PRED, so there is no
287 : : point in processing it. */
288 : 0 : continue;
289 : : }
290 : :
291 : 53344 : if (bitmap_bit_p (in_queue, pred->index))
292 : 0 : continue;
293 : :
294 : : /* Schedule the basic block. */
295 : 53344 : *qend = pred;
296 : 53344 : qend++;
297 : 53344 : if (qend == qtop)
298 : 0 : qend = queue;
299 : 53344 : bitmap_set_bit (in_queue, pred->index);
300 : : }
301 : : }
302 : 183755 : free (queue);
303 : 183755 : }
304 : :
305 : : /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
306 : : and update loop structures and dominators. Return true if we were able
307 : : to remove the path, false otherwise (and nothing is affected then). */
308 : : bool
309 : 428566 : remove_path (edge e, bool *irred_invalidated,
310 : : bitmap loop_closed_ssa_invalidated)
311 : : {
312 : 428566 : edge ae;
313 : 428566 : basic_block *rem_bbs, *bord_bbs, from, bb;
314 : 428566 : vec<basic_block> dom_bbs;
315 : 428566 : int i, nrem, n_bord_bbs;
316 : 428566 : bool local_irred_invalidated = false;
317 : 428566 : edge_iterator ei;
318 : 428566 : class loop *l, *f;
319 : :
320 : 428566 : if (! irred_invalidated)
321 : 144264 : irred_invalidated = &local_irred_invalidated;
322 : :
323 : 428566 : if (!can_remove_branch_p (e))
324 : : return false;
325 : :
326 : : /* Keep track of whether we need to update information about irreducible
327 : : regions. This is the case if the removed area is a part of the
328 : : irreducible region, or if the set of basic blocks that belong to a loop
329 : : that is inside an irreducible region is changed, or if such a loop is
330 : : removed. */
331 : 428566 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
332 : 449 : *irred_invalidated = true;
333 : :
334 : : /* We need to check whether basic blocks are dominated by the edge
335 : : e, but we only have basic block dominators. This is easy to
336 : : fix -- when e->dest has exactly one predecessor, this corresponds
337 : : to blocks dominated by e->dest, if not, split the edge. */
338 : 428566 : if (!single_pred_p (e->dest))
339 : 428566 : e = single_pred_edge (split_edge (e));
340 : :
341 : : /* It may happen that by removing path we remove one or more loops
342 : : we belong to. In this case first unloop the loops, then proceed
343 : : normally. We may assume that e->dest is not a header of any loop,
344 : : as it now has exactly one predecessor. */
345 : 727574 : for (l = e->src->loop_father; loop_outer (l); l = f)
346 : : {
347 : 299008 : f = loop_outer (l);
348 : 299008 : if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
349 : 0 : unloop (l, irred_invalidated, loop_closed_ssa_invalidated);
350 : : }
351 : :
352 : : /* Identify the path. */
353 : 428566 : nrem = find_path (e, &rem_bbs);
354 : :
355 : 428566 : n_bord_bbs = 0;
356 : 428566 : bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
357 : 428566 : auto_sbitmap seen (last_basic_block_for_fn (cfun));
358 : 428566 : bitmap_clear (seen);
359 : :
360 : : /* Find "border" hexes -- i.e. those with predecessor in removed path. */
361 : 1285698 : for (i = 0; i < nrem; i++)
362 : 428566 : bitmap_set_bit (seen, rem_bbs[i]->index);
363 : 428566 : if (!*irred_invalidated)
364 : 1284236 : FOR_EACH_EDGE (ae, ei, e->src->succs)
365 : 428117 : if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
366 : 428117 : && !bitmap_bit_p (seen, ae->dest->index)
367 : 1284321 : && ae->flags & EDGE_IRREDUCIBLE_LOOP)
368 : : {
369 : 85 : *irred_invalidated = true;
370 : 85 : break;
371 : : }
372 : :
373 : 857132 : for (i = 0; i < nrem; i++)
374 : : {
375 : 857132 : FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
376 : 428566 : if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
377 : 428566 : && !bitmap_bit_p (seen, ae->dest->index))
378 : : {
379 : 428566 : bitmap_set_bit (seen, ae->dest->index);
380 : 428566 : bord_bbs[n_bord_bbs++] = ae->dest;
381 : :
382 : 428566 : if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
383 : 449 : *irred_invalidated = true;
384 : : }
385 : : }
386 : :
387 : : /* Remove the path. */
388 : 428566 : from = e->src;
389 : 428566 : remove_branch (e);
390 : 428566 : dom_bbs.create (0);
391 : :
392 : : /* Cancel loops contained in the path. */
393 : 857132 : for (i = 0; i < nrem; i++)
394 : 428566 : if (rem_bbs[i]->loop_father->header == rem_bbs[i])
395 : 0 : cancel_loop_tree (rem_bbs[i]->loop_father);
396 : :
397 : 428566 : remove_bbs (rem_bbs, nrem);
398 : 428566 : free (rem_bbs);
399 : :
400 : : /* Find blocks whose dominators may be affected. */
401 : 428566 : bitmap_clear (seen);
402 : 857132 : for (i = 0; i < n_bord_bbs; i++)
403 : : {
404 : 428566 : basic_block ldom;
405 : :
406 : 428566 : bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
407 : 428566 : if (bitmap_bit_p (seen, bb->index))
408 : 0 : continue;
409 : 428566 : bitmap_set_bit (seen, bb->index);
410 : :
411 : 428566 : for (ldom = first_dom_son (CDI_DOMINATORS, bb);
412 : 1402530 : ldom;
413 : 973964 : ldom = next_dom_son (CDI_DOMINATORS, ldom))
414 : 973964 : if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
415 : 825857 : dom_bbs.safe_push (ldom);
416 : : }
417 : :
418 : : /* Recount dominators. */
419 : 428566 : iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
420 : 428566 : dom_bbs.release ();
421 : 428566 : free (bord_bbs);
422 : :
423 : : /* Fix placements of basic blocks inside loops and the placement of
424 : : loops in the loop tree. */
425 : 428566 : fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated);
426 : 428566 : fix_loop_placements (from->loop_father, irred_invalidated,
427 : : loop_closed_ssa_invalidated);
428 : :
429 : 428566 : if (local_irred_invalidated
430 : 428566 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
431 : 513 : mark_irreducible_loops ();
432 : :
433 : 428566 : return true;
434 : 428566 : }
435 : :
436 : : /* Creates place for a new LOOP in loops structure of FN. */
437 : :
438 : : void
439 : 810571 : place_new_loop (struct function *fn, class loop *loop)
440 : : {
441 : 810571 : loop->num = number_of_loops (fn);
442 : 810571 : vec_safe_push (loops_for_fn (fn)->larray, loop);
443 : 810571 : }
444 : :
445 : : /* Given LOOP structure with filled header and latch, find the body of the
446 : : corresponding loop and add it to loops tree. Insert the LOOP as a son of
447 : : outer. */
448 : :
449 : : void
450 : 119763 : add_loop (class loop *loop, class loop *outer)
451 : : {
452 : 119763 : basic_block *bbs;
453 : 119763 : int i, n;
454 : 119763 : class loop *subloop;
455 : 119763 : edge e;
456 : 119763 : edge_iterator ei;
457 : :
458 : : /* Add it to loop structure. */
459 : 119763 : place_new_loop (cfun, loop);
460 : 119763 : flow_loop_tree_node_add (outer, loop);
461 : :
462 : : /* Find its nodes. */
463 : 119763 : bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
464 : 119763 : n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
465 : :
466 : 853409 : for (i = 0; i < n; i++)
467 : : {
468 : 733646 : if (bbs[i]->loop_father == outer)
469 : : {
470 : 507699 : remove_bb_from_loops (bbs[i]);
471 : 507699 : add_bb_to_loop (bbs[i], loop);
472 : 507699 : continue;
473 : : }
474 : :
475 : 225947 : loop->num_nodes++;
476 : :
477 : : /* If we find a direct subloop of OUTER, move it to LOOP. */
478 : 225947 : subloop = bbs[i]->loop_father;
479 : 225947 : if (loop_outer (subloop) == outer
480 : 225947 : && subloop->header == bbs[i])
481 : : {
482 : 23927 : flow_loop_tree_node_remove (subloop);
483 : 23927 : flow_loop_tree_node_add (loop, subloop);
484 : : }
485 : : }
486 : :
487 : : /* Update the information about loop exit edges. */
488 : 853409 : for (i = 0; i < n; i++)
489 : : {
490 : 1812052 : FOR_EACH_EDGE (e, ei, bbs[i]->succs)
491 : : {
492 : 1078406 : rescan_loop_exit (e, false, false);
493 : : }
494 : : }
495 : :
496 : 119763 : free (bbs);
497 : 119763 : }
498 : :
499 : : /* Scale profile of loop by P. */
500 : :
501 : : void
502 : 245525 : scale_loop_frequencies (class loop *loop, profile_probability p)
503 : : {
504 : 245525 : basic_block *bbs;
505 : :
506 : 245525 : bbs = get_loop_body (loop);
507 : 245525 : scale_bbs_frequencies (bbs, loop->num_nodes, p);
508 : 245525 : free (bbs);
509 : 245525 : }
510 : :
511 : : /* Scales the frequencies of all basic blocks in LOOP that are strictly
512 : : dominated by BB by NUM/DEN. */
513 : :
514 : : void
515 : 36156 : scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
516 : : profile_count num, profile_count den)
517 : : {
518 : 36156 : basic_block son;
519 : :
520 : 36156 : if (!den.nonzero_p () && !(num == profile_count::zero ()))
521 : 0 : return;
522 : 36156 : auto_vec <basic_block, 8> worklist;
523 : 36156 : worklist.safe_push (bb);
524 : :
525 : 293321 : while (!worklist.is_empty ())
526 : 184853 : for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ());
527 : 368820 : son;
528 : 183967 : son = next_dom_son (CDI_DOMINATORS, son))
529 : : {
530 : 183967 : if (!flow_bb_inside_loop_p (loop, son))
531 : 35270 : continue;
532 : 148697 : son->count = son->count.apply_scale (num, den);
533 : 148697 : worklist.safe_push (son);
534 : : }
535 : 36156 : }
536 : :
537 : : /* Return exit that suitable for update when loop iterations
538 : : changed. */
539 : :
540 : : static edge
541 : 101610 : loop_exit_for_scaling (class loop *loop)
542 : : {
543 : 101610 : edge exit_edge = single_exit (loop);
544 : 101610 : if (!exit_edge)
545 : : {
546 : 8447 : auto_vec<edge> exits = get_loop_exit_edges (loop);
547 : 8447 : exit_edge = single_likely_exit (loop, exits);
548 : 8447 : }
549 : 101610 : return exit_edge;
550 : : }
551 : :
552 : : /* Assume that loop's entry count and profile up to a given EXIT_EDGE is
553 : : consistent. Update exit probability so loop exists with PROFILE_COUNT
554 : : and rescale profile of basic blocks inside loop dominated by EXIT_EDGE->src.
555 : :
556 : : This is useful after number of iteraitons of loop has changed.
557 : : If EXIT_EDGE is NULL, the function will try to identify suitable exit.
558 : : If DESIRED_COUNT is NULL, loop entry count will be used.
559 : : In consistent profile usually loop exists as many times as it is entred.
560 : :
561 : : Return updated exit if successfull and NULL otherwise. */
562 : :
563 : : edge
564 : 131684 : update_loop_exit_probability_scale_dom_bbs (class loop *loop,
565 : : edge exit_edge,
566 : : profile_count desired_count)
567 : : {
568 : 131684 : if (!exit_edge)
569 : 2826 : exit_edge = loop_exit_for_scaling (loop);
570 : 2826 : if (!exit_edge)
571 : : {
572 : 2060 : if (dump_file && (dump_flags & TDF_DETAILS))
573 : : {
574 : 219 : fprintf (dump_file, ";; Not updating exit probability of loop %i;"
575 : : " it has no single exit\n",
576 : : loop->num);
577 : : }
578 : 2060 : return NULL;
579 : : }
580 : : /* If exit is inside another loop, adjusting its probability will also
581 : : adjust number of the inner loop iterations. Just do noting for now. */
582 : 129624 : if (!just_once_each_iteration_p (loop, exit_edge->src))
583 : : {
584 : 11 : if (dump_file && (dump_flags & TDF_DETAILS))
585 : : {
586 : 0 : fprintf (dump_file, ";; Not updating exit probability of loop %i;"
587 : : " exit is inside inner loop\n",
588 : : loop->num);
589 : : }
590 : 11 : return NULL;
591 : : }
592 : : /* Normal loops exit as many times as they are entered. */
593 : 129613 : if (!desired_count.initialized_p ())
594 : 1484 : desired_count = loop_count_in (loop);
595 : : /* See if everything is already perfect. */
596 : 129613 : if (desired_count == exit_edge->count ())
597 : 5932 : return exit_edge;
598 : 123681 : profile_count old_exit_count = exit_edge->count ();
599 : : /* See if update is possible.
600 : : Avoid turning probability to 0 or 1 just trying to reach impossible
601 : : value.
602 : :
603 : : Natural profile update after some loop will happily scale header count to
604 : : be less than count of entering the loop. This is bad idea and they should
605 : : special case maybe_flat_loop_profile.
606 : :
607 : : It may also happen that the source basic block of the exit edge is
608 : : inside in-loop condition:
609 : :
610 : : +-> header
611 : : | |
612 : : | B1
613 : : | / \
614 : : | | B2--exit_edge-->
615 : : | \ /
616 : : | B3
617 : : +__/
618 : :
619 : : If B2 count is smaller than desired exit edge count
620 : : the profile was inconsistent with the newly discovered upper bound.
621 : : Probablity of edge B1->B2 is too low. We do not attempt to fix
622 : : that (as it is hard in general). */
623 : 123681 : if (desired_count > exit_edge->src->count
624 : 123681 : && exit_edge->src->count.differs_from_p (desired_count))
625 : : {
626 : 450 : if (dump_file)
627 : : {
628 : 3 : fprintf (dump_file, ";; Source bb of loop %i has count ",
629 : : loop->num);
630 : 3 : exit_edge->src->count.dump (dump_file, cfun);
631 : 3 : fprintf (dump_file,
632 : : " which is smaller then desired count of exitting loop ");
633 : 3 : desired_count.dump (dump_file, cfun);
634 : 3 : fprintf (dump_file, ". Profile update is impossible.\n");
635 : : }
636 : : /* Drop quality of probability since we know it likely
637 : : bad. */
638 : 450 : exit_edge->probability = exit_edge->probability.guessed ();
639 : 450 : return NULL;
640 : : }
641 : 123231 : if (!exit_edge->src->count.nonzero_p ())
642 : : {
643 : 1 : if (dump_file)
644 : : {
645 : 0 : fprintf (dump_file, ";; Not updating exit edge probability"
646 : : " in loop %i since profile is zero ",
647 : : loop->num);
648 : : }
649 : 1 : return NULL;
650 : : }
651 : 123230 : set_edge_probability_and_rescale_others
652 : 123230 : (exit_edge, desired_count.probability_in (exit_edge->src->count));
653 : : /* Rescale the remaining edge probabilities and see if there is only
654 : : one. */
655 : 123230 : edge other_edge = NULL;
656 : 123230 : bool found = false;
657 : 123230 : edge e;
658 : 123230 : edge_iterator ei;
659 : 369690 : FOR_EACH_EDGE (e, ei, exit_edge->src->succs)
660 : 246471 : if (!(e->flags & EDGE_FAKE)
661 : 246471 : && !loop_exit_edge_p (loop, e))
662 : : {
663 : 123241 : if (found)
664 : : {
665 : : other_edge = NULL;
666 : : break;
667 : : }
668 : : other_edge = e;
669 : : found = true;
670 : : }
671 : : /* If there is only loop latch after other edge,
672 : : update its profile. This is tiny bit more precise
673 : : than scaling. */
674 : 123230 : if (other_edge && other_edge->dest == loop->latch)
675 : : {
676 : 87761 : if (single_pred_p (loop->latch))
677 : 87761 : loop->latch->count = loop->latch->count
678 : 87761 : + old_exit_count - exit_edge->count ();
679 : : }
680 : : else
681 : : /* If there are multiple blocks, just scale. */
682 : 70938 : scale_dominated_blocks_in_loop (loop, exit_edge->src,
683 : 70938 : exit_edge->src->count - exit_edge->count (),
684 : 35469 : exit_edge->src->count - old_exit_count);
685 : : return exit_edge;
686 : : }
687 : :
688 : : /* Scale profile in LOOP by P.
689 : : If ITERATION_BOUND is not -1, scale even further if loop is predicted
690 : : to iterate too many times.
691 : : Before caling this function, preheader block profile should be already
692 : : scaled to final count. This is necessary because loop iterations are
693 : : determined by comparing header edge count to latch ege count and thus
694 : : they need to be scaled synchronously. */
695 : :
696 : : void
697 : 270574 : scale_loop_profile (class loop *loop, profile_probability p,
698 : : gcov_type iteration_bound)
699 : : {
700 : 270574 : if (!(p == profile_probability::always ()))
701 : : {
702 : 74909 : if (dump_file && (dump_flags & TDF_DETAILS))
703 : : {
704 : 10248 : fprintf (dump_file, ";; Scaling loop %i with scale ",
705 : : loop->num);
706 : 10248 : p.dump (dump_file);
707 : 10248 : fprintf (dump_file, "\n");
708 : : }
709 : :
710 : : /* Scale the probabilities. */
711 : 74909 : scale_loop_frequencies (loop, p);
712 : : }
713 : :
714 : 270574 : if (iteration_bound == -1)
715 : 171790 : return;
716 : :
717 : 220524 : sreal iterations;
718 : 220524 : if (!expected_loop_iterations_by_profile (loop, &iterations))
719 : : return;
720 : :
721 : 219869 : if (dump_file && (dump_flags & TDF_DETAILS))
722 : : {
723 : 14404 : fprintf (dump_file,
724 : : ";; Guessed iterations of loop %i is %f. New upper bound %i.\n",
725 : : loop->num,
726 : : iterations.to_double (),
727 : : (int)iteration_bound);
728 : : }
729 : :
730 : : /* See if loop is predicted to iterate too many times. */
731 : 219869 : if (iterations <= (sreal)iteration_bound)
732 : : return;
733 : :
734 : 98784 : profile_count count_in = loop_count_in (loop);
735 : :
736 : : /* Now scale the loop body so header count is
737 : : count_in * (iteration_bound + 1) */
738 : 98784 : profile_probability scale_prob
739 : 98784 : = (count_in * (iteration_bound + 1)).probability_in (loop->header->count);
740 : 98784 : if (dump_file && (dump_flags & TDF_DETAILS))
741 : : {
742 : 10095 : fprintf (dump_file, ";; Scaling loop %i with scale ",
743 : : loop->num);
744 : 10095 : scale_prob.dump (dump_file);
745 : 10095 : fprintf (dump_file, " to reach upper bound %i\n",
746 : : (int)iteration_bound);
747 : : }
748 : : /* Finally attempt to fix exit edge probability. */
749 : 98784 : edge exit_edge = loop_exit_for_scaling (loop);
750 : :
751 : : /* In a consistent profile unadjusted_exit_count should be same as count_in,
752 : : however to preserve as much of the original info, avoid recomputing
753 : : it. */
754 : 98784 : profile_count unadjusted_exit_count = profile_count::uninitialized ();
755 : 98784 : if (exit_edge)
756 : 96724 : unadjusted_exit_count = exit_edge->count ();
757 : 98784 : scale_loop_frequencies (loop, scale_prob);
758 : 98784 : update_loop_exit_probability_scale_dom_bbs (loop, exit_edge,
759 : : unadjusted_exit_count);
760 : : }
761 : :
762 : : /* Recompute dominance information for basic blocks outside LOOP. */
763 : :
764 : : static void
765 : 71650 : update_dominators_in_loop (class loop *loop)
766 : : {
767 : 71650 : vec<basic_block> dom_bbs = vNULL;
768 : 71650 : basic_block *body;
769 : 71650 : unsigned i;
770 : :
771 : 71650 : auto_sbitmap seen (last_basic_block_for_fn (cfun));
772 : 71650 : bitmap_clear (seen);
773 : 71650 : body = get_loop_body (loop);
774 : :
775 : 510774 : for (i = 0; i < loop->num_nodes; i++)
776 : 367474 : bitmap_set_bit (seen, body[i]->index);
777 : :
778 : 439124 : for (i = 0; i < loop->num_nodes; i++)
779 : : {
780 : 367474 : basic_block ldom;
781 : :
782 : 367474 : for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
783 : 699593 : ldom;
784 : 332119 : ldom = next_dom_son (CDI_DOMINATORS, ldom))
785 : 332119 : if (!bitmap_bit_p (seen, ldom->index))
786 : : {
787 : 36295 : bitmap_set_bit (seen, ldom->index);
788 : 36295 : dom_bbs.safe_push (ldom);
789 : : }
790 : : }
791 : :
792 : 71650 : iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
793 : 71650 : free (body);
794 : 71650 : dom_bbs.release ();
795 : 71650 : }
796 : :
797 : : /* Creates an if region as shown above. CONDITION is used to create
798 : : the test for the if.
799 : :
800 : : |
801 : : | ------------- -------------
802 : : | | pred_bb | | pred_bb |
803 : : | ------------- -------------
804 : : | | |
805 : : | | | ENTRY_EDGE
806 : : | | ENTRY_EDGE V
807 : : | | ====> -------------
808 : : | | | cond_bb |
809 : : | | | CONDITION |
810 : : | | -------------
811 : : | V / \
812 : : | ------------- e_false / \ e_true
813 : : | | succ_bb | V V
814 : : | ------------- ----------- -----------
815 : : | | false_bb | | true_bb |
816 : : | ----------- -----------
817 : : | \ /
818 : : | \ /
819 : : | V V
820 : : | -------------
821 : : | | join_bb |
822 : : | -------------
823 : : | | exit_edge (result)
824 : : | V
825 : : | -----------
826 : : | | succ_bb |
827 : : | -----------
828 : : |
829 : : */
830 : :
831 : : edge
832 : 203 : create_empty_if_region_on_edge (edge entry_edge, tree condition)
833 : : {
834 : :
835 : 203 : basic_block cond_bb, true_bb, false_bb, join_bb;
836 : 203 : edge e_true, e_false, exit_edge;
837 : 203 : gcond *cond_stmt;
838 : 203 : tree simple_cond;
839 : 203 : gimple_stmt_iterator gsi;
840 : :
841 : 203 : cond_bb = split_edge (entry_edge);
842 : :
843 : : /* Insert condition in cond_bb. */
844 : 203 : gsi = gsi_last_bb (cond_bb);
845 : 203 : simple_cond =
846 : 203 : force_gimple_operand_gsi (&gsi, condition, true, NULL,
847 : : false, GSI_NEW_STMT);
848 : 203 : cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
849 : 203 : gsi = gsi_last_bb (cond_bb);
850 : 203 : gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
851 : :
852 : 203 : join_bb = split_edge (single_succ_edge (cond_bb));
853 : :
854 : 203 : e_true = single_succ_edge (cond_bb);
855 : 203 : true_bb = split_edge (e_true);
856 : :
857 : 203 : e_false = make_edge (cond_bb, join_bb, 0);
858 : 203 : false_bb = split_edge (e_false);
859 : :
860 : 203 : e_true->flags &= ~EDGE_FALLTHRU;
861 : 203 : e_true->flags |= EDGE_TRUE_VALUE;
862 : 203 : e_false->flags &= ~EDGE_FALLTHRU;
863 : 203 : e_false->flags |= EDGE_FALSE_VALUE;
864 : :
865 : 203 : set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
866 : 203 : set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
867 : 203 : set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
868 : 203 : set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
869 : :
870 : 203 : exit_edge = single_succ_edge (join_bb);
871 : :
872 : 203 : if (single_pred_p (exit_edge->dest))
873 : 203 : set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
874 : :
875 : 203 : return exit_edge;
876 : : }
877 : :
878 : : /* create_empty_loop_on_edge
879 : : |
880 : : | - pred_bb - ------ pred_bb ------
881 : : | | | | iv0 = initial_value |
882 : : | -----|----- ---------|-----------
883 : : | | ______ | entry_edge
884 : : | | entry_edge / | |
885 : : | | ====> | -V---V- loop_header -------------
886 : : | V | | iv_before = phi (iv0, iv_after) |
887 : : | - succ_bb - | ---|-----------------------------
888 : : | | | | |
889 : : | ----------- | ---V--- loop_body ---------------
890 : : | | | iv_after = iv_before + stride |
891 : : | | | if (iv_before < upper_bound) |
892 : : | | ---|--------------\--------------
893 : : | | | \ exit_e
894 : : | | V \
895 : : | | - loop_latch - V- succ_bb -
896 : : | | | | | |
897 : : | | /------------- -----------
898 : : | \ ___ /
899 : :
900 : : Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
901 : : that is used before the increment of IV. IV_BEFORE should be used for
902 : : adding code to the body that uses the IV. OUTER is the outer loop in
903 : : which the new loop should be inserted.
904 : :
905 : : Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
906 : : inserted on the loop entry edge. This implies that this function
907 : : should be used only when the UPPER_BOUND expression is a loop
908 : : invariant. */
909 : :
910 : : class loop *
911 : 498 : create_empty_loop_on_edge (edge entry_edge,
912 : : tree initial_value,
913 : : tree stride, tree upper_bound,
914 : : tree iv,
915 : : tree *iv_before,
916 : : tree *iv_after,
917 : : class loop *outer)
918 : : {
919 : 498 : basic_block loop_header, loop_latch, succ_bb, pred_bb;
920 : 498 : class loop *loop;
921 : 498 : gimple_stmt_iterator gsi;
922 : 498 : gimple_seq stmts;
923 : 498 : gcond *cond_expr;
924 : 498 : tree exit_test;
925 : 498 : edge exit_e;
926 : :
927 : 498 : gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
928 : :
929 : : /* Create header, latch and wire up the loop. */
930 : 498 : pred_bb = entry_edge->src;
931 : 498 : loop_header = split_edge (entry_edge);
932 : 498 : loop_latch = split_edge (single_succ_edge (loop_header));
933 : 498 : succ_bb = single_succ (loop_latch);
934 : 498 : make_edge (loop_header, succ_bb, 0);
935 : 498 : redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
936 : :
937 : : /* Set immediate dominator information. */
938 : 498 : set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
939 : 498 : set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
940 : 498 : set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
941 : :
942 : : /* Initialize a loop structure and put it in a loop hierarchy. */
943 : 498 : loop = alloc_loop ();
944 : 498 : loop->header = loop_header;
945 : 498 : loop->latch = loop_latch;
946 : 498 : add_loop (loop, outer);
947 : :
948 : : /* TODO: Fix counts. */
949 : 498 : scale_loop_frequencies (loop, profile_probability::even ());
950 : :
951 : : /* Update dominators. */
952 : 498 : update_dominators_in_loop (loop);
953 : :
954 : : /* Modify edge flags. */
955 : 498 : exit_e = single_exit (loop);
956 : 498 : exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
957 : 498 : single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
958 : :
959 : : /* Construct IV code in loop. */
960 : 498 : initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
961 : 498 : if (stmts)
962 : : {
963 : 17 : gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
964 : 17 : gsi_commit_edge_inserts ();
965 : : }
966 : :
967 : 498 : upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
968 : 498 : if (stmts)
969 : : {
970 : 161 : gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
971 : 161 : gsi_commit_edge_inserts ();
972 : : }
973 : :
974 : 498 : gsi = gsi_last_bb (loop_header);
975 : 498 : create_iv (initial_value, PLUS_EXPR, stride, iv, loop, &gsi, false,
976 : : iv_before, iv_after);
977 : :
978 : : /* Insert loop exit condition. */
979 : 498 : cond_expr = gimple_build_cond
980 : 498 : (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
981 : :
982 : 498 : exit_test = gimple_cond_lhs (cond_expr);
983 : 498 : exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
984 : : false, GSI_NEW_STMT);
985 : 498 : gimple_cond_set_lhs (cond_expr, exit_test);
986 : 498 : gsi = gsi_last_bb (exit_e->src);
987 : 498 : gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
988 : :
989 : 498 : split_block_after_labels (loop_header);
990 : :
991 : 498 : return loop;
992 : : }
993 : :
994 : : /* Remove the latch edge of a LOOP and update loops to indicate that
995 : : the LOOP was removed. After this function, original loop latch will
996 : : have no successor, which caller is expected to fix somehow.
997 : :
998 : : If this may cause the information about irreducible regions to become
999 : : invalid, IRRED_INVALIDATED is set to true.
1000 : :
1001 : : LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
1002 : : basic blocks that had non-trivial update on their loop_father.*/
1003 : :
1004 : : void
1005 : 129897 : unloop (class loop *loop, bool *irred_invalidated,
1006 : : bitmap loop_closed_ssa_invalidated)
1007 : : {
1008 : 129897 : basic_block *body;
1009 : 129897 : class loop *ploop;
1010 : 129897 : unsigned i, n;
1011 : 129897 : basic_block latch = loop->latch;
1012 : 129897 : bool dummy = false;
1013 : :
1014 : 129897 : if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
1015 : 20 : *irred_invalidated = true;
1016 : :
1017 : : /* This is relatively straightforward. The dominators are unchanged, as
1018 : : loop header dominates loop latch, so the only thing we have to care of
1019 : : is the placement of loops and basic blocks inside the loop tree. We
1020 : : move them all to the loop->outer, and then let fix_bb_placements do
1021 : : its work. */
1022 : :
1023 : 129897 : body = get_loop_body (loop);
1024 : 129897 : n = loop->num_nodes;
1025 : 536456 : for (i = 0; i < n; i++)
1026 : 406559 : if (body[i]->loop_father == loop)
1027 : : {
1028 : 387277 : remove_bb_from_loops (body[i]);
1029 : 387277 : add_bb_to_loop (body[i], loop_outer (loop));
1030 : : }
1031 : 129897 : free (body);
1032 : :
1033 : 132256 : while (loop->inner)
1034 : : {
1035 : 2359 : ploop = loop->inner;
1036 : 2359 : flow_loop_tree_node_remove (ploop);
1037 : 2359 : flow_loop_tree_node_add (loop_outer (loop), ploop);
1038 : : }
1039 : :
1040 : : /* Remove the loop and free its data. */
1041 : 129897 : delete_loop (loop);
1042 : :
1043 : 129897 : remove_edge (single_succ_edge (latch));
1044 : :
1045 : : /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
1046 : : there is an irreducible region inside the cancelled loop, the flags will
1047 : : be still correct. */
1048 : 129897 : fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
1049 : 129897 : }
1050 : :
1051 : : /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
1052 : : condition stated in description of fix_loop_placement holds for them.
1053 : : It is used in case when we removed some edges coming out of LOOP, which
1054 : : may cause the right placement of LOOP inside loop tree to change.
1055 : :
1056 : : IRRED_INVALIDATED is set to true if a change in the loop structures might
1057 : : invalidate the information about irreducible regions. */
1058 : :
1059 : : static void
1060 : 428566 : fix_loop_placements (class loop *loop, bool *irred_invalidated,
1061 : : bitmap loop_closed_ssa_invalidated)
1062 : : {
1063 : 428566 : class loop *outer;
1064 : :
1065 : 428570 : while (loop_outer (loop))
1066 : : {
1067 : 195292 : outer = loop_outer (loop);
1068 : 195292 : if (!fix_loop_placement (loop, irred_invalidated,
1069 : : loop_closed_ssa_invalidated))
1070 : : break;
1071 : :
1072 : : /* Changing the placement of a loop in the loop tree may alter the
1073 : : validity of condition 2) of the description of fix_bb_placement
1074 : : for its preheader, because the successor is the header and belongs
1075 : : to the loop. So call fix_bb_placements to fix up the placement
1076 : : of the preheader and (possibly) of its predecessors. */
1077 : 4 : fix_bb_placements (loop_preheader_edge (loop)->src,
1078 : : irred_invalidated, loop_closed_ssa_invalidated);
1079 : 4 : loop = outer;
1080 : : }
1081 : 428566 : }
1082 : :
1083 : : /* Duplicate loop bounds and other information we store about
1084 : : the loop into its duplicate. */
1085 : :
1086 : : void
1087 : 689452 : copy_loop_info (class loop *loop, class loop *target)
1088 : : {
1089 : 689452 : gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1090 : 689452 : target->any_upper_bound = loop->any_upper_bound;
1091 : 689452 : target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1092 : 689452 : target->any_likely_upper_bound = loop->any_likely_upper_bound;
1093 : 689452 : target->nb_iterations_likely_upper_bound
1094 : 689452 : = loop->nb_iterations_likely_upper_bound;
1095 : 689452 : target->any_estimate = loop->any_estimate;
1096 : 689452 : target->nb_iterations_estimate = loop->nb_iterations_estimate;
1097 : 689452 : target->estimate_state = loop->estimate_state;
1098 : 689452 : target->safelen = loop->safelen;
1099 : 689452 : target->simdlen = loop->simdlen;
1100 : 689452 : target->constraints = loop->constraints;
1101 : 689452 : target->can_be_parallel = loop->can_be_parallel;
1102 : 689452 : target->warned_aggressive_loop_optimizations
1103 : 689452 : |= loop->warned_aggressive_loop_optimizations;
1104 : 689452 : target->dont_vectorize = loop->dont_vectorize;
1105 : 689452 : target->force_vectorize = loop->force_vectorize;
1106 : 689452 : target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
1107 : 689452 : target->finite_p = loop->finite_p;
1108 : 689452 : target->unroll = loop->unroll;
1109 : 689452 : target->owned_clique = loop->owned_clique;
1110 : 689452 : }
1111 : :
1112 : : /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1113 : : created loop into loops structure. If AFTER is non-null
1114 : : the new loop is added at AFTER->next, otherwise in front of TARGETs
1115 : : sibling list. */
1116 : : class loop *
1117 : 39303 : duplicate_loop (class loop *loop, class loop *target, class loop *after)
1118 : : {
1119 : 39303 : class loop *cloop;
1120 : 39303 : cloop = alloc_loop ();
1121 : 39303 : place_new_loop (cfun, cloop);
1122 : :
1123 : 39303 : copy_loop_info (loop, cloop);
1124 : :
1125 : : /* Mark the new loop as copy of LOOP. */
1126 : 39303 : set_loop_copy (loop, cloop);
1127 : :
1128 : : /* Add it to target. */
1129 : 39303 : flow_loop_tree_node_add (target, cloop, after);
1130 : :
1131 : 39303 : return cloop;
1132 : : }
1133 : :
1134 : : /* Copies structure of subloops of LOOP into TARGET loop, placing
1135 : : newly created loops into loop tree at the end of TARGETs sibling
1136 : : list in the original order. */
1137 : : void
1138 : 39303 : duplicate_subloops (class loop *loop, class loop *target)
1139 : : {
1140 : 39303 : class loop *aloop, *cloop, *tail;
1141 : :
1142 : 39303 : for (tail = target->inner; tail && tail->next; tail = tail->next)
1143 : : ;
1144 : 40710 : for (aloop = loop->inner; aloop; aloop = aloop->next)
1145 : : {
1146 : 1407 : cloop = duplicate_loop (aloop, target, tail);
1147 : 1407 : tail = cloop;
1148 : 1407 : gcc_assert(!tail->next);
1149 : 1407 : duplicate_subloops (aloop, cloop);
1150 : : }
1151 : 39303 : }
1152 : :
1153 : : /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1154 : : into TARGET loop, placing newly created loops into loop tree adding
1155 : : them to TARGETs sibling list at the end in order. */
1156 : : static void
1157 : 525652 : copy_loops_to (class loop **copied_loops, int n, class loop *target)
1158 : : {
1159 : 525652 : class loop *aloop, *tail;
1160 : 525652 : int i;
1161 : :
1162 : 12297435 : for (tail = target->inner; tail && tail->next; tail = tail->next)
1163 : : ;
1164 : 530205 : for (i = 0; i < n; i++)
1165 : : {
1166 : 4553 : aloop = duplicate_loop (copied_loops[i], target, tail);
1167 : 4553 : tail = aloop;
1168 : 4553 : gcc_assert(!tail->next);
1169 : 4553 : duplicate_subloops (copied_loops[i], aloop);
1170 : : }
1171 : 525652 : }
1172 : :
1173 : : /* Redirects edge E to basic block DEST. */
1174 : : static void
1175 : 35576 : loop_redirect_edge (edge e, basic_block dest)
1176 : : {
1177 : 0 : if (e->dest == dest)
1178 : : return;
1179 : :
1180 : 35576 : redirect_edge_and_branch_force (e, dest);
1181 : : }
1182 : :
1183 : : /* Check whether LOOP's body can be duplicated. */
1184 : : bool
1185 : 527173 : can_duplicate_loop_p (const class loop *loop)
1186 : : {
1187 : 527173 : int ret;
1188 : 527173 : basic_block *bbs = get_loop_body (loop);
1189 : :
1190 : 527173 : ret = can_copy_bbs_p (bbs, loop->num_nodes);
1191 : 527173 : free (bbs);
1192 : :
1193 : 527173 : return ret;
1194 : : }
1195 : :
1196 : : /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1197 : : loop structure and dominators (order of inner subloops is retained).
1198 : : E's destination must be LOOP header for this to work, i.e. it must be entry
1199 : : or latch edge of this loop; these are unique, as the loops must have
1200 : : preheaders for this function to work correctly (in case E is latch, the
1201 : : function unrolls the loop, if E is entry edge, it peels the loop). Store
1202 : : edges created by copying ORIG edge from copies corresponding to set bits in
1203 : : WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
1204 : : are numbered in order given by control flow through them) into TO_REMOVE
1205 : : array. Returns false if duplication is
1206 : : impossible. */
1207 : :
1208 : : bool
1209 : 252884 : duplicate_loop_body_to_header_edge (class loop *loop, edge e,
1210 : : unsigned int ndupl, sbitmap wont_exit,
1211 : : edge orig, vec<edge> *to_remove, int flags)
1212 : : {
1213 : 252884 : class loop *target, *aloop;
1214 : 252884 : class loop **orig_loops;
1215 : 252884 : unsigned n_orig_loops;
1216 : 252884 : basic_block header = loop->header, latch = loop->latch;
1217 : 252884 : basic_block *new_bbs, *bbs, *first_active;
1218 : 252884 : basic_block new_bb, bb, first_active_latch = NULL;
1219 : 252884 : edge ae, latch_edge;
1220 : 252884 : edge spec_edges[2], new_spec_edges[2];
1221 : 252884 : const int SE_LATCH = 0;
1222 : 252884 : const int SE_ORIG = 1;
1223 : 252884 : unsigned i, j, n;
1224 : 252884 : int is_latch = (latch == e->src);
1225 : 252884 : profile_probability *scale_step = NULL;
1226 : 252884 : profile_probability scale_main = profile_probability::always ();
1227 : 252884 : profile_probability scale_act = profile_probability::always ();
1228 : 252884 : profile_count after_exit_num = profile_count::zero (),
1229 : 252884 : after_exit_den = profile_count::zero ();
1230 : 252884 : bool scale_after_exit = false;
1231 : 252884 : int add_irreducible_flag;
1232 : 252884 : basic_block place_after;
1233 : 252884 : bitmap bbs_to_scale = NULL;
1234 : 252884 : bitmap_iterator bi;
1235 : :
1236 : 252884 : gcc_assert (e->dest == loop->header);
1237 : 252884 : gcc_assert (ndupl > 0);
1238 : :
1239 : 252884 : if (orig)
1240 : : {
1241 : : /* Orig must be edge out of the loop. */
1242 : 208004 : gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1243 : 208004 : gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1244 : : }
1245 : :
1246 : 252884 : n = loop->num_nodes;
1247 : 252884 : bbs = get_loop_body_in_dom_order (loop);
1248 : 252884 : gcc_assert (bbs[0] == loop->header);
1249 : 252884 : gcc_assert (bbs[n - 1] == loop->latch);
1250 : :
1251 : : /* Check whether duplication is possible. */
1252 : 252884 : if (!can_copy_bbs_p (bbs, loop->num_nodes))
1253 : : {
1254 : 19 : free (bbs);
1255 : 19 : return false;
1256 : : }
1257 : 252865 : new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1258 : :
1259 : : /* In case we are doing loop peeling and the loop is in the middle of
1260 : : irreducible region, the peeled copies will be inside it too. */
1261 : 252865 : add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1262 : 252865 : gcc_assert (!is_latch || !add_irreducible_flag);
1263 : :
1264 : : /* Find edge from latch. */
1265 : 252865 : latch_edge = loop_latch_edge (loop);
1266 : :
1267 : 252865 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1268 : : {
1269 : : /* Calculate coefficients by that we have to scale counts
1270 : : of duplicated loop bodies. */
1271 : 217289 : profile_count count_in = header->count;
1272 : 217289 : profile_count count_le = latch_edge->count ();
1273 : 217289 : profile_count count_out_orig = orig ? orig->count () : count_in - count_le;
1274 : 217289 : profile_probability prob_pass_thru = count_le.probability_in (count_in);
1275 : 217289 : profile_count new_count_le = count_le + count_out_orig;
1276 : :
1277 : 207995 : if (orig && orig->probability.initialized_p ()
1278 : 425284 : && !(orig->probability == profile_probability::always ()))
1279 : : {
1280 : : /* The blocks that are dominated by a removed exit edge ORIG have
1281 : : frequencies scaled by this. */
1282 : 207868 : if (orig->count ().initialized_p ())
1283 : : {
1284 : 207868 : after_exit_num = orig->src->count;
1285 : 207868 : after_exit_den = after_exit_num - orig->count ();
1286 : 207868 : scale_after_exit = true;
1287 : : }
1288 : 207868 : bbs_to_scale = BITMAP_ALLOC (NULL);
1289 : 742521 : for (i = 0; i < n; i++)
1290 : : {
1291 : 534653 : if (bbs[i] != orig->src
1292 : 534653 : && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1293 : 254767 : bitmap_set_bit (bbs_to_scale, i);
1294 : : }
1295 : : /* Since we will scale up all basic blocks dominated by orig, exits
1296 : : will become more likely; compensate for that. */
1297 : 207868 : if (after_exit_den.nonzero_p ())
1298 : : {
1299 : 207225 : auto_vec<edge> exits = get_loop_exit_edges (loop);
1300 : 894470 : for (edge ex : exits)
1301 : 272795 : if (ex != orig
1302 : 272795 : && dominated_by_p (CDI_DOMINATORS, ex->src, orig->src))
1303 : 52902 : new_count_le -= ex->count ().apply_scale (after_exit_num
1304 : : - after_exit_den,
1305 : 26451 : after_exit_den);
1306 : 207225 : }
1307 : : }
1308 : 217289 : profile_probability prob_pass_wont_exit =
1309 : 217289 : new_count_le.probability_in (count_in);
1310 : : /* If profile count is 0, the probability will be uninitialized.
1311 : : We can set probability to any initialized value to avoid
1312 : : precision loss. If profile is sane, all counts will be 0 anyway. */
1313 : 217289 : if (!count_in.nonzero_p ())
1314 : : {
1315 : 614 : prob_pass_thru
1316 : 614 : = profile_probability::always ().apply_scale (1, 2);
1317 : 614 : prob_pass_wont_exit
1318 : 614 : = profile_probability::always ().apply_scale (1, 2);
1319 : : }
1320 : :
1321 : 217289 : scale_step = XNEWVEC (profile_probability, ndupl);
1322 : :
1323 : 707365 : for (i = 1; i <= ndupl; i++)
1324 : 980152 : scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1325 : 490076 : ? prob_pass_wont_exit
1326 : : : prob_pass_thru;
1327 : :
1328 : : /* Complete peeling is special as the probability of exit in last
1329 : : copy becomes 1. */
1330 : 434499 : if (!count_in.nonzero_p ())
1331 : : ;
1332 : 216675 : else if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1333 : : {
1334 : 101362 : profile_count wanted_count = e->count ();
1335 : :
1336 : 101362 : gcc_assert (!is_latch);
1337 : : /* First copy has count of incoming edge. Each subsequent
1338 : : count should be reduced by prob_pass_wont_exit. Caller
1339 : : should've managed the flags so all except for original loop
1340 : : has won't exist set. */
1341 : 101362 : scale_act = wanted_count.probability_in (count_in);
1342 : :
1343 : : /* Now simulate the duplication adjustments and compute header
1344 : : frequency of the last copy. */
1345 : 418367 : for (i = 0; i < ndupl; i++)
1346 : 317005 : wanted_count = wanted_count.apply_probability (scale_step [i]);
1347 : 101362 : scale_main = wanted_count.probability_in (count_in);
1348 : : }
1349 : : /* Here we insert loop bodies inside the loop itself (for loop unrolling).
1350 : : First iteration will be original loop followed by duplicated bodies.
1351 : : It is necessary to scale down the original so we get right overall
1352 : : number of iterations. */
1353 : 115313 : else if (is_latch)
1354 : : {
1355 : 51150 : profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
1356 : 51150 : ? prob_pass_wont_exit
1357 : : : prob_pass_thru;
1358 : 51150 : if (!(flags & DLTHE_FLAG_FLAT_PROFILE))
1359 : : {
1360 : 21093 : profile_probability p = prob_pass_main;
1361 : 21093 : profile_count scale_main_den = count_in;
1362 : 82873 : for (i = 0; i < ndupl; i++)
1363 : : {
1364 : 61780 : scale_main_den += count_in.apply_probability (p);
1365 : 61780 : p = p * scale_step[i];
1366 : : }
1367 : : /* If original loop is executed COUNT_IN times, the unrolled
1368 : : loop will account SCALE_MAIN_DEN times. */
1369 : 21093 : scale_main = count_in.probability_in (scale_main_den);
1370 : : }
1371 : : else
1372 : : scale_main = profile_probability::always ();
1373 : 51150 : scale_act = scale_main * prob_pass_main;
1374 : : }
1375 : : else
1376 : : {
1377 : 64163 : profile_count preheader_count = e->count ();
1378 : 133979 : for (i = 0; i < ndupl; i++)
1379 : 69816 : scale_main = scale_main * scale_step[i];
1380 : 64163 : scale_act = preheader_count.probability_in (count_in);
1381 : : }
1382 : : }
1383 : :
1384 : : /* Loop the new bbs will belong to. */
1385 : 252865 : target = e->src->loop_father;
1386 : :
1387 : : /* Original loops. */
1388 : 252865 : n_orig_loops = 0;
1389 : 256660 : for (aloop = loop->inner; aloop; aloop = aloop->next)
1390 : 3795 : n_orig_loops++;
1391 : 252865 : orig_loops = XNEWVEC (class loop *, n_orig_loops);
1392 : 256660 : for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1393 : 3795 : orig_loops[i] = aloop;
1394 : :
1395 : 252865 : set_loop_copy (loop, target);
1396 : :
1397 : 252865 : first_active = XNEWVEC (basic_block, n);
1398 : 252865 : if (is_latch)
1399 : : {
1400 : 51239 : memcpy (first_active, bbs, n * sizeof (basic_block));
1401 : 51239 : first_active_latch = latch;
1402 : : }
1403 : :
1404 : 252865 : spec_edges[SE_ORIG] = orig;
1405 : 252865 : spec_edges[SE_LATCH] = latch_edge;
1406 : :
1407 : 252865 : place_after = e->src;
1408 : 778517 : for (j = 0; j < ndupl; j++)
1409 : : {
1410 : : /* Copy loops. */
1411 : 525652 : copy_loops_to (orig_loops, n_orig_loops, target);
1412 : :
1413 : : /* Copy bbs. */
1414 : 525652 : copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1415 : : place_after, true);
1416 : 525652 : place_after = new_spec_edges[SE_LATCH]->src;
1417 : :
1418 : 525652 : if (flags & DLTHE_RECORD_COPY_NUMBER)
1419 : 348077 : for (i = 0; i < n; i++)
1420 : : {
1421 : 245145 : gcc_assert (!new_bbs[i]->aux);
1422 : 245145 : new_bbs[i]->aux = (void *)(size_t)(j + 1);
1423 : : }
1424 : :
1425 : : /* Note whether the blocks and edges belong to an irreducible loop. */
1426 : 525652 : if (add_irreducible_flag)
1427 : : {
1428 : 1501 : for (i = 0; i < n; i++)
1429 : 1110 : new_bbs[i]->flags |= BB_DUPLICATED;
1430 : 1501 : for (i = 0; i < n; i++)
1431 : : {
1432 : 1110 : edge_iterator ei;
1433 : 1110 : new_bb = new_bbs[i];
1434 : 1110 : if (new_bb->loop_father == target)
1435 : 1096 : new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1436 : :
1437 : 2925 : FOR_EACH_EDGE (ae, ei, new_bb->succs)
1438 : 1815 : if ((ae->dest->flags & BB_DUPLICATED)
1439 : 1117 : && (ae->src->loop_father == target
1440 : 21 : || ae->dest->loop_father == target))
1441 : 1103 : ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1442 : : }
1443 : 1501 : for (i = 0; i < n; i++)
1444 : 1110 : new_bbs[i]->flags &= ~BB_DUPLICATED;
1445 : : }
1446 : :
1447 : : /* Redirect the special edges. */
1448 : 525652 : if (is_latch)
1449 : : {
1450 : 102336 : redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1451 : 102336 : redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1452 : : loop->header);
1453 : 102336 : set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1454 : 102336 : latch = loop->latch = new_bbs[n - 1];
1455 : 102336 : e = latch_edge = new_spec_edges[SE_LATCH];
1456 : : }
1457 : : else
1458 : : {
1459 : 423316 : redirect_edge_and_branch_force (e, new_bbs[0]);
1460 : 423316 : redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1461 : : loop->header);
1462 : 423316 : set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1463 : 423316 : e = new_spec_edges[SE_LATCH];
1464 : : }
1465 : :
1466 : : /* Record exit edge in this copy. */
1467 : 525652 : if (orig && bitmap_bit_p (wont_exit, j + 1))
1468 : : {
1469 : 378937 : if (to_remove)
1470 : 378937 : to_remove->safe_push (new_spec_edges[SE_ORIG]);
1471 : 378937 : force_edge_cold (new_spec_edges[SE_ORIG], true);
1472 : :
1473 : : /* Scale the frequencies of the blocks dominated by the exit. */
1474 : 378937 : if (bbs_to_scale && scale_after_exit)
1475 : : {
1476 : 872555 : EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1477 : 493923 : scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num,
1478 : : after_exit_den);
1479 : : }
1480 : : }
1481 : :
1482 : : /* Record the first copy in the control flow order if it is not
1483 : : the original loop (i.e. in case of peeling). */
1484 : 525652 : if (!first_active_latch)
1485 : : {
1486 : 201626 : memcpy (first_active, new_bbs, n * sizeof (basic_block));
1487 : 201626 : first_active_latch = new_bbs[n - 1];
1488 : : }
1489 : :
1490 : : /* Set counts and frequencies. */
1491 : 525652 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1492 : : {
1493 : 490076 : scale_bbs_frequencies (new_bbs, n, scale_act);
1494 : 490076 : scale_act = scale_act * scale_step[j];
1495 : : }
1496 : : }
1497 : 252865 : free (new_bbs);
1498 : 252865 : free (orig_loops);
1499 : :
1500 : : /* Record the exit edge in the original loop body, and update the frequencies. */
1501 : 460860 : if (orig && bitmap_bit_p (wont_exit, 0))
1502 : : {
1503 : 48942 : if (to_remove)
1504 : 48942 : to_remove->safe_push (orig);
1505 : 48942 : force_edge_cold (orig, true);
1506 : :
1507 : : /* Scale the frequencies of the blocks dominated by the exit. */
1508 : 48942 : if (bbs_to_scale && scale_after_exit)
1509 : : {
1510 : 97797 : EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1511 : 48903 : scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num,
1512 : : after_exit_den);
1513 : : }
1514 : : }
1515 : :
1516 : : /* Update the original loop. */
1517 : 252865 : if (!is_latch)
1518 : 201626 : set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1519 : 252865 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1520 : : {
1521 : 217289 : scale_bbs_frequencies (bbs, n, scale_main);
1522 : 217289 : free (scale_step);
1523 : : }
1524 : :
1525 : : /* Update dominators of outer blocks if affected. */
1526 : 998043 : for (i = 0; i < n; i++)
1527 : : {
1528 : 745178 : basic_block dominated, dom_bb;
1529 : 745178 : unsigned j;
1530 : :
1531 : 745178 : bb = bbs[i];
1532 : :
1533 : 745178 : auto_vec<basic_block> dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1534 : 2693298 : FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1535 : : {
1536 : 751971 : if (flow_bb_inside_loop_p (loop, dominated))
1537 : 543552 : continue;
1538 : 416838 : dom_bb = nearest_common_dominator (
1539 : 208419 : CDI_DOMINATORS, first_active[i], first_active_latch);
1540 : 208419 : set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1541 : : }
1542 : 745178 : }
1543 : 252865 : free (first_active);
1544 : :
1545 : 252865 : free (bbs);
1546 : 252865 : BITMAP_FREE (bbs_to_scale);
1547 : :
1548 : 252865 : return true;
1549 : : }
1550 : :
1551 : : /* A callback for make_forwarder block, to redirect all edges except for
1552 : : MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1553 : : whether to redirect it. */
1554 : :
1555 : : edge mfb_kj_edge;
1556 : : bool
1557 : 284969 : mfb_keep_just (edge e)
1558 : : {
1559 : 284969 : return e != mfb_kj_edge;
1560 : : }
1561 : :
1562 : : /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1563 : :
1564 : : static bool
1565 : 34 : has_preds_from_loop (basic_block block, class loop *loop)
1566 : : {
1567 : 34 : edge e;
1568 : 34 : edge_iterator ei;
1569 : :
1570 : 74 : FOR_EACH_EDGE (e, ei, block->preds)
1571 : 40 : if (e->src->loop_father == loop)
1572 : : return true;
1573 : : return false;
1574 : : }
1575 : :
1576 : : /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1577 : : CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1578 : : entry; otherwise we also force preheader block to have only one successor.
1579 : : When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1580 : : to be a fallthru predecessor to the loop header and to have only
1581 : : predecessors from outside of the loop.
1582 : : The function also updates dominators. */
1583 : :
1584 : : basic_block
1585 : 17480748 : create_preheader (class loop *loop, int flags)
1586 : : {
1587 : 17480748 : edge e;
1588 : 17480748 : basic_block dummy;
1589 : 17480748 : int nentry = 0;
1590 : 17480748 : bool irred = false;
1591 : 17480748 : bool latch_edge_was_fallthru;
1592 : 17480748 : edge one_succ_pred = NULL, single_entry = NULL;
1593 : 17480748 : edge_iterator ei;
1594 : :
1595 : 52560554 : FOR_EACH_EDGE (e, ei, loop->header->preds)
1596 : : {
1597 : 35079806 : if (e->src == loop->latch)
1598 : 17480748 : continue;
1599 : 17599058 : irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1600 : 17599058 : nentry++;
1601 : 17599058 : single_entry = e;
1602 : 49246533 : if (single_succ_p (e->src))
1603 : 14166727 : one_succ_pred = e;
1604 : : }
1605 : 17480748 : gcc_assert (nentry);
1606 : 17480748 : if (nentry == 1)
1607 : : {
1608 : 17400286 : bool need_forwarder_block = false;
1609 : :
1610 : : /* We do not allow entry block to be the loop preheader, since we
1611 : : cannot emit code there. */
1612 : 17400286 : if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1613 : : need_forwarder_block = true;
1614 : : else
1615 : : {
1616 : : /* If we want simple preheaders, also force the preheader to have
1617 : : just a single successor and a normal edge. */
1618 : 17397444 : if ((flags & CP_SIMPLE_PREHEADERS)
1619 : 17397444 : && ((single_entry->flags & EDGE_COMPLEX)
1620 : 17371874 : || !single_succ_p (single_entry->src)
1621 : : /* We document LOOPS_HAVE_PREHEADERS as to forming a
1622 : : natural place to move code outside of the loop, so it
1623 : : should not end with a control instruction. */
1624 : : /* ??? This, and below JUMP_P check needs to be a new
1625 : : CFG hook. */
1626 : 14101491 : || (cfun->curr_properties & PROP_gimple
1627 : 26733002 : && !gsi_end_p (gsi_last_bb (single_entry->src))
1628 : 8668550 : && stmt_ends_bb_p (*gsi_last_bb (single_entry->src)))))
1629 : : need_forwarder_block = true;
1630 : : /* If we want fallthru preheaders, also create forwarder block when
1631 : : preheader ends with a jump or has predecessors from loop. */
1632 : 14101417 : else if ((flags & CP_FALLTHRU_PREHEADERS)
1633 : 14101417 : && (JUMP_P (BB_END (single_entry->src))
1634 : 34 : || has_preds_from_loop (single_entry->src, loop)))
1635 : : need_forwarder_block = true;
1636 : : }
1637 : 3296027 : if (! need_forwarder_block)
1638 : : return NULL;
1639 : : }
1640 : :
1641 : 3379344 : mfb_kj_edge = loop_latch_edge (loop);
1642 : 3379344 : latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1643 : 3379344 : if (nentry == 1
1644 : 3298882 : && ((flags & CP_FALLTHRU_PREHEADERS) == 0
1645 : 24 : || (single_entry->flags & EDGE_CROSSING) == 0))
1646 : 3298881 : dummy = split_edge (single_entry);
1647 : : else
1648 : : {
1649 : 80463 : edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1650 : 80463 : dummy = fallthru->src;
1651 : 80463 : loop->header = fallthru->dest;
1652 : : }
1653 : :
1654 : : /* Try to be clever in placing the newly created preheader. The idea is to
1655 : : avoid breaking any "fallthruness" relationship between blocks.
1656 : :
1657 : : The preheader was created just before the header and all incoming edges
1658 : : to the header were redirected to the preheader, except the latch edge.
1659 : : So the only problematic case is when this latch edge was a fallthru
1660 : : edge: it is not anymore after the preheader creation so we have broken
1661 : : the fallthruness. We're therefore going to look for a better place. */
1662 : 3379344 : if (latch_edge_was_fallthru)
1663 : : {
1664 : 1516209 : if (one_succ_pred)
1665 : 38332 : e = one_succ_pred;
1666 : : else
1667 : 1477877 : e = EDGE_PRED (dummy, 0);
1668 : :
1669 : 1516209 : move_block_after (dummy, e->src);
1670 : : }
1671 : :
1672 : 3379344 : if (irred)
1673 : : {
1674 : 4283 : dummy->flags |= BB_IRREDUCIBLE_LOOP;
1675 : 4283 : single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1676 : : }
1677 : :
1678 : 3379344 : if (dump_file)
1679 : 173 : fprintf (dump_file, "Created preheader block for loop %i\n",
1680 : : loop->num);
1681 : :
1682 : 3379344 : if (flags & CP_FALLTHRU_PREHEADERS)
1683 : 31 : gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1684 : : && !JUMP_P (BB_END (dummy)));
1685 : :
1686 : : return dummy;
1687 : : }
1688 : :
1689 : : /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1690 : :
1691 : : void
1692 : 30963184 : create_preheaders (int flags)
1693 : : {
1694 : 30963184 : if (!current_loops)
1695 : : return;
1696 : :
1697 : 110369277 : for (auto loop : loops_list (cfun, 0))
1698 : 17479725 : create_preheader (loop, flags);
1699 : 30963184 : loops_state_set (LOOPS_HAVE_PREHEADERS);
1700 : : }
1701 : :
1702 : : /* Forces all loop latches to have only single successor. */
1703 : :
1704 : : void
1705 : 28452144 : force_single_succ_latches (void)
1706 : : {
1707 : 28452144 : edge e;
1708 : :
1709 : 102188166 : for (auto loop : loops_list (cfun, 0))
1710 : : {
1711 : 16831734 : if (loop->latch != loop->header && single_succ_p (loop->latch))
1712 : 11692371 : continue;
1713 : :
1714 : 5139363 : e = find_edge (loop->latch, loop->header);
1715 : 5139363 : gcc_checking_assert (e != NULL);
1716 : :
1717 : 5139363 : split_edge (e);
1718 : 28452144 : }
1719 : 28452144 : loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1720 : 28452144 : }
1721 : :
1722 : : /* This function is called from loop_version. It splits the entry edge
1723 : : of the loop we want to version, adds the versioning condition, and
1724 : : adjust the edges to the two versions of the loop appropriately.
1725 : : e is an incoming edge. Returns the basic block containing the
1726 : : condition.
1727 : :
1728 : : --- edge e ---- > [second_head]
1729 : :
1730 : : Split it and insert new conditional expression and adjust edges.
1731 : :
1732 : : --- edge e ---> [cond expr] ---> [first_head]
1733 : : |
1734 : : +---------> [second_head]
1735 : :
1736 : : THEN_PROB is the probability of then branch of the condition.
1737 : : ELSE_PROB is the probability of else branch. Note that they may be both
1738 : : REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or
1739 : : IFN_LOOP_DIST_ALIAS. */
1740 : :
1741 : : static basic_block
1742 : 35576 : lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1743 : : edge e, void *cond_expr,
1744 : : profile_probability then_prob,
1745 : : profile_probability else_prob)
1746 : : {
1747 : 35576 : basic_block new_head = NULL;
1748 : 35576 : edge e1;
1749 : :
1750 : 35576 : gcc_assert (e->dest == second_head);
1751 : :
1752 : : /* Split edge 'e'. This will create a new basic block, where we can
1753 : : insert conditional expr. */
1754 : 35576 : new_head = split_edge (e);
1755 : :
1756 : 35576 : lv_add_condition_to_bb (first_head, second_head, new_head,
1757 : : cond_expr);
1758 : :
1759 : : /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1760 : 35576 : e = single_succ_edge (new_head);
1761 : 35576 : e1 = make_edge (new_head, first_head,
1762 : 35576 : current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1763 : 35576 : e1->probability = then_prob;
1764 : 35576 : e->probability = else_prob;
1765 : :
1766 : 35576 : set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1767 : 35576 : set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1768 : :
1769 : : /* Adjust loop header phi nodes. */
1770 : 35576 : lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1771 : :
1772 : 35576 : return new_head;
1773 : : }
1774 : :
1775 : : /* Main entry point for Loop Versioning transformation.
1776 : :
1777 : : This transformation given a condition and a loop, creates
1778 : : -if (condition) { loop_copy1 } else { loop_copy2 },
1779 : : where loop_copy1 is the loop transformed in one way, and loop_copy2
1780 : : is the loop transformed in another way (or unchanged). COND_EXPR
1781 : : may be a run time test for things that were not resolved by static
1782 : : analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1783 : :
1784 : : If non-NULL, CONDITION_BB is set to the basic block containing the
1785 : : condition.
1786 : :
1787 : : THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1788 : : is the ratio by that the frequencies in the original loop should
1789 : : be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1790 : : new loop should be scaled.
1791 : :
1792 : : If PLACE_AFTER is true, we place the new loop after LOOP in the
1793 : : instruction stream, otherwise it is placed before LOOP. */
1794 : :
1795 : : class loop *
1796 : 35586 : loop_version (class loop *loop,
1797 : : void *cond_expr, basic_block *condition_bb,
1798 : : profile_probability then_prob, profile_probability else_prob,
1799 : : profile_probability then_scale, profile_probability else_scale,
1800 : : bool place_after)
1801 : : {
1802 : 35586 : basic_block first_head, second_head;
1803 : 35586 : edge entry, latch_edge;
1804 : 35586 : int irred_flag;
1805 : 35586 : class loop *nloop;
1806 : 35586 : basic_block cond_bb;
1807 : :
1808 : : /* Record entry and latch edges for the loop */
1809 : 35586 : entry = loop_preheader_edge (loop);
1810 : 35586 : irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1811 : 35586 : entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1812 : :
1813 : : /* Note down head of loop as first_head. */
1814 : 35586 : first_head = entry->dest;
1815 : :
1816 : : /* 1) Duplicate loop on the entry edge. */
1817 : 35586 : if (!cfg_hook_duplicate_loop_body_to_header_edge (loop, entry, 1, NULL, NULL,
1818 : : NULL, 0))
1819 : : {
1820 : 10 : entry->flags |= irred_flag;
1821 : 10 : return NULL;
1822 : : }
1823 : :
1824 : : /* 2) loopify the duplicated new loop. */
1825 : 35576 : latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1826 : 35576 : nloop = alloc_loop ();
1827 : 35576 : class loop *outer = loop_outer (latch_edge->dest->loop_father);
1828 : 35576 : edge new_header_edge = single_pred_edge (get_bb_copy (loop->header));
1829 : 35576 : nloop->header = new_header_edge->dest;
1830 : 35576 : nloop->latch = latch_edge->src;
1831 : 35576 : loop_redirect_edge (latch_edge, nloop->header);
1832 : :
1833 : : /* Compute new loop. */
1834 : 35576 : add_loop (nloop, outer);
1835 : 35576 : copy_loop_info (loop, nloop);
1836 : 35576 : set_loop_copy (loop, nloop);
1837 : :
1838 : : /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1839 : 35576 : lv_flush_pending_stmts (latch_edge);
1840 : :
1841 : : /* After duplication entry edge now points to new loop head block.
1842 : : Note down new head as second_head. */
1843 : 35576 : second_head = entry->dest;
1844 : :
1845 : : /* 3) Split loop entry edge and insert new block with cond expr. */
1846 : 35576 : cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1847 : : entry, cond_expr, then_prob, else_prob);
1848 : 35576 : if (condition_bb)
1849 : 31441 : *condition_bb = cond_bb;
1850 : :
1851 : 35576 : if (!cond_bb)
1852 : : {
1853 : 0 : entry->flags |= irred_flag;
1854 : 0 : return NULL;
1855 : : }
1856 : :
1857 : : /* Add cond_bb to appropriate loop. */
1858 : 35576 : if (cond_bb->loop_father)
1859 : 35576 : remove_bb_from_loops (cond_bb);
1860 : 35576 : add_bb_to_loop (cond_bb, outer);
1861 : :
1862 : : /* 4) Scale the original loop and new loop frequency. */
1863 : 35576 : scale_loop_frequencies (loop, then_scale);
1864 : 35576 : scale_loop_frequencies (nloop, else_scale);
1865 : 35576 : update_dominators_in_loop (loop);
1866 : 35576 : update_dominators_in_loop (nloop);
1867 : :
1868 : : /* Adjust irreducible flag. */
1869 : 35576 : if (irred_flag)
1870 : : {
1871 : 48 : cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1872 : 48 : loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1873 : 48 : loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1874 : 48 : single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1875 : : }
1876 : :
1877 : 35576 : if (place_after)
1878 : : {
1879 : 33123 : basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1880 : 33123 : unsigned i;
1881 : :
1882 : 33123 : after = loop->latch;
1883 : :
1884 : 194452 : for (i = 0; i < nloop->num_nodes; i++)
1885 : : {
1886 : 161329 : move_block_after (bbs[i], after);
1887 : 161329 : after = bbs[i];
1888 : : }
1889 : 33123 : free (bbs);
1890 : : }
1891 : :
1892 : : /* At this point condition_bb is loop preheader with two successors,
1893 : : first_head and second_head. Make sure that loop preheader has only
1894 : : one successor. */
1895 : 35576 : split_edge (loop_preheader_edge (loop));
1896 : 35576 : split_edge (loop_preheader_edge (nloop));
1897 : :
1898 : 35576 : return nloop;
1899 : : }
|