Line data Source code
1 : /* Loop manipulation code for GNU compiler.
2 : Copyright (C) 2002-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "gimple.h"
27 : #include "cfghooks.h"
28 : #include "cfganal.h"
29 : #include "cfgloop.h"
30 : #include "gimple-iterator.h"
31 : #include "gimplify-me.h"
32 : #include "tree-ssa-loop-manip.h"
33 : #include "dumpfile.h"
34 : #include "sreal.h"
35 : #include "tree-cfg.h"
36 : #include "tree-pass.h"
37 : #include "hierarchical_discriminator.h"
38 :
39 : static void copy_loops_to (class loop **, int,
40 : class loop *);
41 : static void loop_redirect_edge (edge, basic_block);
42 : static void remove_bbs (basic_block *, int);
43 : static bool rpe_enum_p (const_basic_block, const void *);
44 : static int find_path (edge, basic_block **);
45 : static void fix_loop_placements (class loop *, bool *, bitmap);
46 : static bool fix_bb_placement (basic_block);
47 : static void fix_bb_placements (basic_block, bool *, bitmap);
48 :
49 : /* Checks whether basic block BB is dominated by DATA. */
50 : static bool
51 439510 : rpe_enum_p (const_basic_block bb, const void *data)
52 : {
53 439510 : return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
54 : }
55 :
56 : /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
57 :
58 : static void
59 439512 : remove_bbs (basic_block *bbs, int nbbs)
60 : {
61 439512 : int i;
62 :
63 879024 : for (i = 0; i < nbbs; i++)
64 439512 : delete_basic_block (bbs[i]);
65 439512 : }
66 :
67 : /* Find path -- i.e. the basic blocks dominated by edge E and put them
68 : into array BBS, that will be allocated large enough to contain them.
69 : E->dest must have exactly one predecessor for this to work (it is
70 : easy to achieve and we do not put it here because we do not want to
71 : alter anything by this function). The number of basic blocks in the
72 : path is returned. */
73 : static int
74 439512 : find_path (edge e, basic_block **bbs)
75 : {
76 439512 : gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
77 :
78 : /* Find bbs in the path. */
79 439512 : *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
80 439512 : return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
81 439512 : n_basic_blocks_for_fn (cfun), e->dest);
82 : }
83 :
84 : /* Fix placement of basic block BB inside loop hierarchy --
85 : Let L be a loop to that BB belongs. Then every successor of BB must either
86 : 1) belong to some superloop of loop L, or
87 : 2) be a header of loop K such that K->outer is superloop of L
88 : Returns true if we had to move BB into other loop to enforce this condition,
89 : false if the placement of BB was already correct (provided that placements
90 : of its successors are correct). */
91 : static bool
92 252259 : fix_bb_placement (basic_block bb)
93 : {
94 252259 : edge e;
95 252259 : edge_iterator ei;
96 252259 : class loop *loop = current_loops->tree_root, *act;
97 :
98 518632 : FOR_EACH_EDGE (e, ei, bb->succs)
99 : {
100 266373 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
101 0 : continue;
102 :
103 266373 : act = e->dest->loop_father;
104 266373 : if (act->header == e->dest)
105 308 : act = loop_outer (act);
106 :
107 266373 : if (flow_loop_nested_p (loop, act))
108 266373 : loop = act;
109 : }
110 :
111 252259 : if (loop == bb->loop_father)
112 : return false;
113 :
114 55879 : remove_bb_from_loops (bb);
115 55879 : add_bb_to_loop (bb, loop);
116 :
117 55879 : return true;
118 : }
119 :
120 : /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
121 : of LOOP to that leads at least one exit edge of LOOP, and set it
122 : as the immediate superloop of LOOP. Return true if the immediate superloop
123 : of LOOP changed.
124 :
125 : IRRED_INVALIDATED is set to true if a change in the loop structures might
126 : invalidate the information about irreducible regions. */
127 :
128 : static bool
129 248319 : fix_loop_placement (class loop *loop, bool *irred_invalidated,
130 : bitmap loop_closed_ssa_invalidated)
131 : {
132 248319 : unsigned i;
133 248319 : edge e;
134 248319 : auto_vec<edge> exits = get_loop_exit_edges (loop);
135 248319 : class loop *father = current_loops->tree_root, *act;
136 248319 : bool ret = false;
137 :
138 1639144 : FOR_EACH_VEC_ELT (exits, i, e)
139 : {
140 1390825 : act = find_common_loop (loop, e->dest->loop_father);
141 1390825 : if (flow_loop_nested_p (father, act))
142 87577 : father = act;
143 : }
144 :
145 248319 : if (father != loop_outer (loop))
146 : {
147 891 : for (act = loop_outer (loop); act != father; act = loop_outer (act))
148 583 : act->num_nodes -= loop->num_nodes;
149 308 : flow_loop_tree_node_remove (loop);
150 308 : flow_loop_tree_node_add (father, loop);
151 :
152 : /* The exit edges of LOOP no longer exits its original immediate
153 : superloops; remove them from the appropriate exit lists. */
154 955 : FOR_EACH_VEC_ELT (exits, i, e)
155 : {
156 : /* We may need to recompute irreducible loops. */
157 339 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
158 0 : *irred_invalidated = true;
159 339 : rescan_loop_exit (e, false, false);
160 : }
161 : /* Any LC SSA PHIs on e->dest might now be on the wrong edge
162 : if their defs were in a former outer loop. Also all uses
163 : in the original inner loop of defs in the outer loop(s) now
164 : require LC PHI nodes. */
165 308 : if (loop_closed_ssa_invalidated)
166 : {
167 12 : basic_block *bbs = get_loop_body (loop);
168 91 : for (unsigned i = 0; i < loop->num_nodes; ++i)
169 79 : bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
170 12 : free (bbs);
171 : }
172 :
173 : ret = true;
174 : }
175 :
176 248319 : return ret;
177 248319 : }
178 :
179 : /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
180 : enforce condition stated in description of fix_bb_placement. We
181 : start from basic block FROM that had some of its successors removed, so that
182 : his placement no longer has to be correct, and iteratively fix placement of
183 : its predecessors that may change if placement of FROM changed. Also fix
184 : placement of subloops of FROM->loop_father, that might also be altered due
185 : to this change; the condition for them is similar, except that instead of
186 : successors we consider edges coming out of the loops.
187 :
188 : If the changes may invalidate the information about irreducible regions,
189 : IRRED_INVALIDATED is set to true.
190 :
191 : If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
192 : changed loop_father are collected there. */
193 :
194 : static void
195 577548 : fix_bb_placements (basic_block from,
196 : bool *irred_invalidated,
197 : bitmap loop_closed_ssa_invalidated)
198 : {
199 577548 : basic_block *queue, *qtop, *qbeg, *qend;
200 577548 : class loop *base_loop, *target_loop;
201 577548 : edge e;
202 :
203 : /* We pass through blocks back-reachable from FROM, testing whether some
204 : of their successors moved to outer loop. It may be necessary to
205 : iterate several times, but it is finite, as we stop unless we move
206 : the basic block up the loop structure. The whole story is a bit
207 : more complicated due to presence of subloops, those are moved using
208 : fix_loop_placement. */
209 :
210 577548 : base_loop = from->loop_father;
211 : /* If we are already in the outermost loop, the basic blocks cannot be moved
212 : outside of it. If FROM is the header of the base loop, it cannot be moved
213 : outside of it, either. In both cases, we can end now. */
214 577548 : if (base_loop == current_loops->tree_root
215 237781 : || from == base_loop->header)
216 385611 : return;
217 :
218 191937 : auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
219 191937 : bitmap_clear (in_queue);
220 191937 : bitmap_set_bit (in_queue, from->index);
221 : /* Prevent us from going out of the base_loop. */
222 191937 : bitmap_set_bit (in_queue, base_loop->header->index);
223 :
224 191937 : queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
225 191937 : qtop = queue + base_loop->num_nodes + 1;
226 191937 : qbeg = queue;
227 191937 : qend = queue + 1;
228 191937 : *qbeg = from;
229 :
230 444812 : while (qbeg != qend)
231 : {
232 252875 : edge_iterator ei;
233 252875 : from = *qbeg;
234 252875 : qbeg++;
235 252875 : if (qbeg == qtop)
236 0 : qbeg = queue;
237 252875 : bitmap_clear_bit (in_queue, from->index);
238 :
239 252875 : if (from->loop_father->header == from)
240 : {
241 : /* Subloop header, maybe move the loop upward. */
242 616 : if (!fix_loop_placement (from->loop_father, irred_invalidated,
243 : loop_closed_ssa_invalidated))
244 196694 : continue;
245 302 : target_loop = loop_outer (from->loop_father);
246 : }
247 : else
248 : {
249 : /* Ordinary basic block. */
250 252259 : if (!fix_bb_placement (from))
251 196380 : continue;
252 55879 : target_loop = from->loop_father;
253 55879 : if (loop_closed_ssa_invalidated)
254 20826 : bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
255 : }
256 :
257 85986 : FOR_EACH_EDGE (e, ei, from->succs)
258 : {
259 29805 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
260 1 : *irred_invalidated = true;
261 : }
262 :
263 : /* Something has changed, insert predecessors into queue. */
264 118039 : FOR_EACH_EDGE (e, ei, from->preds)
265 : {
266 61858 : basic_block pred = e->src;
267 61858 : class loop *nca;
268 :
269 61858 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
270 0 : *irred_invalidated = true;
271 :
272 61858 : if (bitmap_bit_p (in_queue, pred->index))
273 920 : continue;
274 :
275 : /* If it is subloop, then it either was not moved, or
276 : the path up the loop tree from base_loop do not contain
277 : it. */
278 60938 : nca = find_common_loop (pred->loop_father, base_loop);
279 60938 : if (pred->loop_father != base_loop
280 616 : && (nca == base_loop
281 302 : || nca != pred->loop_father))
282 616 : pred = pred->loop_father->header;
283 60322 : else if (!flow_loop_nested_p (target_loop, pred->loop_father))
284 : {
285 : /* If PRED is already higher in the loop hierarchy than the
286 : TARGET_LOOP to that we moved FROM, the change of the position
287 : of FROM does not affect the position of PRED, so there is no
288 : point in processing it. */
289 0 : continue;
290 : }
291 :
292 60938 : if (bitmap_bit_p (in_queue, pred->index))
293 0 : continue;
294 :
295 : /* Schedule the basic block. */
296 60938 : *qend = pred;
297 60938 : qend++;
298 60938 : if (qend == qtop)
299 0 : qend = queue;
300 60938 : bitmap_set_bit (in_queue, pred->index);
301 : }
302 : }
303 191937 : free (queue);
304 191937 : }
305 :
306 : /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
307 : and update loop structures and dominators. Return true if we were able
308 : to remove the path, false otherwise (and nothing is affected then). */
309 : bool
310 439512 : remove_path (edge e, bool *irred_invalidated,
311 : bitmap loop_closed_ssa_invalidated)
312 : {
313 439512 : edge ae;
314 439512 : basic_block *rem_bbs, *bord_bbs, from, bb;
315 439512 : vec<basic_block> dom_bbs;
316 439512 : int i, nrem, n_bord_bbs;
317 439512 : bool local_irred_invalidated = false;
318 439512 : edge_iterator ei;
319 439512 : class loop *l, *f;
320 :
321 439512 : if (! irred_invalidated)
322 146924 : irred_invalidated = &local_irred_invalidated;
323 :
324 439512 : if (!can_remove_branch_p (e))
325 : return false;
326 :
327 : /* Keep track of whether we need to update information about irreducible
328 : regions. This is the case if the removed area is a part of the
329 : irreducible region, or if the set of basic blocks that belong to a loop
330 : that is inside an irreducible region is changed, or if such a loop is
331 : removed. */
332 439512 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
333 380 : *irred_invalidated = true;
334 :
335 : /* We need to check whether basic blocks are dominated by the edge
336 : e, but we only have basic block dominators. This is easy to
337 : fix -- when e->dest has exactly one predecessor, this corresponds
338 : to blocks dominated by e->dest, if not, split the edge. */
339 439512 : if (!single_pred_p (e->dest))
340 439510 : e = single_pred_edge (split_edge (e));
341 :
342 : /* It may happen that by removing path we remove one or more loops
343 : we belong to. In this case first unloop the loops, then proceed
344 : normally. We may assume that e->dest is not a header of any loop,
345 : as it now has exactly one predecessor. */
346 747921 : for (l = e->src->loop_father; loop_outer (l); l = f)
347 : {
348 308409 : f = loop_outer (l);
349 308409 : if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
350 2 : unloop (l, irred_invalidated, loop_closed_ssa_invalidated);
351 : }
352 :
353 : /* Identify the path. */
354 439512 : nrem = find_path (e, &rem_bbs);
355 :
356 439512 : n_bord_bbs = 0;
357 439512 : bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
358 439512 : auto_sbitmap seen (last_basic_block_for_fn (cfun));
359 439512 : bitmap_clear (seen);
360 :
361 : /* Find "border" hexes -- i.e. those with predecessor in removed path. */
362 1318536 : for (i = 0; i < nrem; i++)
363 439512 : bitmap_set_bit (seen, rem_bbs[i]->index);
364 439512 : if (!*irred_invalidated)
365 1317268 : FOR_EACH_EDGE (ae, ei, e->src->succs)
366 439132 : if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
367 439132 : && !bitmap_bit_p (seen, ae->dest->index)
368 1317371 : && ae->flags & EDGE_IRREDUCIBLE_LOOP)
369 : {
370 103 : *irred_invalidated = true;
371 103 : break;
372 : }
373 :
374 879024 : for (i = 0; i < nrem; i++)
375 : {
376 879022 : FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
377 439510 : if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
378 439510 : && !bitmap_bit_p (seen, ae->dest->index))
379 : {
380 439510 : bitmap_set_bit (seen, ae->dest->index);
381 439510 : bord_bbs[n_bord_bbs++] = ae->dest;
382 :
383 439510 : if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
384 380 : *irred_invalidated = true;
385 : }
386 : }
387 :
388 : /* Remove the path. */
389 439512 : from = e->src;
390 439512 : remove_branch (e);
391 439512 : dom_bbs.create (0);
392 :
393 : /* Cancel loops contained in the path. */
394 879024 : for (i = 0; i < nrem; i++)
395 439512 : if (rem_bbs[i]->loop_father->header == rem_bbs[i])
396 0 : cancel_loop_tree (rem_bbs[i]->loop_father);
397 :
398 439512 : remove_bbs (rem_bbs, nrem);
399 439512 : free (rem_bbs);
400 :
401 : /* Find blocks whose dominators may be affected. */
402 439512 : bitmap_clear (seen);
403 879022 : for (i = 0; i < n_bord_bbs; i++)
404 : {
405 439510 : basic_block ldom;
406 :
407 439510 : bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
408 439510 : if (bitmap_bit_p (seen, bb->index))
409 0 : continue;
410 439510 : bitmap_set_bit (seen, bb->index);
411 :
412 439510 : for (ldom = first_dom_son (CDI_DOMINATORS, bb);
413 1440697 : ldom;
414 1001187 : ldom = next_dom_son (CDI_DOMINATORS, ldom))
415 1001187 : if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
416 845915 : dom_bbs.safe_push (ldom);
417 : }
418 :
419 : /* Recount dominators. */
420 439512 : iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
421 439512 : dom_bbs.release ();
422 439512 : free (bord_bbs);
423 :
424 : /* Fix placements of basic blocks inside loops and the placement of
425 : loops in the loop tree. */
426 439512 : fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated);
427 439512 : fix_loop_placements (from->loop_father, irred_invalidated,
428 : loop_closed_ssa_invalidated);
429 :
430 439512 : if (local_irred_invalidated
431 439512 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
432 451 : mark_irreducible_loops ();
433 :
434 439512 : return true;
435 439512 : }
436 :
437 : /* Creates place for a new LOOP in loops structure of FN. */
438 :
439 : void
440 795755 : place_new_loop (struct function *fn, class loop *loop)
441 : {
442 795755 : loop->num = number_of_loops (fn);
443 795755 : vec_safe_push (loops_for_fn (fn)->larray, loop);
444 795755 : }
445 :
446 : /* Given LOOP structure with filled header and latch, find the body of the
447 : corresponding loop and add it to loops tree. Insert the LOOP as a son of
448 : outer. */
449 :
450 : void
451 116643 : add_loop (class loop *loop, class loop *outer)
452 : {
453 116643 : basic_block *bbs;
454 116643 : int i, n;
455 116643 : class loop *subloop;
456 116643 : edge e;
457 116643 : edge_iterator ei;
458 :
459 : /* Add it to loop structure. */
460 116643 : place_new_loop (cfun, loop);
461 116643 : flow_loop_tree_node_add (outer, loop);
462 :
463 : /* Find its nodes. */
464 116643 : bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
465 116643 : n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
466 :
467 834348 : for (i = 0; i < n; i++)
468 : {
469 717705 : if (bbs[i]->loop_father == outer)
470 : {
471 493998 : remove_bb_from_loops (bbs[i]);
472 493998 : add_bb_to_loop (bbs[i], loop);
473 493998 : continue;
474 : }
475 :
476 223707 : loop->num_nodes++;
477 :
478 : /* If we find a direct subloop of OUTER, move it to LOOP. */
479 223707 : subloop = bbs[i]->loop_father;
480 223707 : if (loop_outer (subloop) == outer
481 223707 : && subloop->header == bbs[i])
482 : {
483 23615 : flow_loop_tree_node_remove (subloop);
484 23615 : flow_loop_tree_node_add (loop, subloop);
485 : }
486 : }
487 :
488 : /* Update the information about loop exit edges. */
489 834348 : for (i = 0; i < n; i++)
490 : {
491 1761039 : FOR_EACH_EDGE (e, ei, bbs[i]->succs)
492 : {
493 1043334 : rescan_loop_exit (e, false, false);
494 : }
495 : }
496 :
497 116643 : free (bbs);
498 116643 : }
499 :
500 : /* Scale profile of loop by P. */
501 :
502 : void
503 265084 : scale_loop_frequencies (class loop *loop, profile_probability p)
504 : {
505 265084 : basic_block *bbs;
506 :
507 265084 : bbs = get_loop_body (loop);
508 265084 : scale_bbs_frequencies (bbs, loop->num_nodes, p);
509 265084 : free (bbs);
510 265084 : }
511 :
512 : /* Scales the frequencies of all basic blocks in LOOP that are strictly
513 : dominated by BB by NUM/DEN. */
514 :
515 : void
516 37091 : scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
517 : profile_count num, profile_count den)
518 : {
519 37091 : basic_block son;
520 :
521 37091 : if (!den.nonzero_p () && !(num == profile_count::zero ()))
522 0 : return;
523 37091 : auto_vec <basic_block, 8> worklist;
524 37091 : worklist.safe_push (bb);
525 :
526 302053 : while (!worklist.is_empty ())
527 190780 : for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ());
528 380841 : son;
529 190061 : son = next_dom_son (CDI_DOMINATORS, son))
530 : {
531 190061 : if (!flow_bb_inside_loop_p (loop, son))
532 36372 : continue;
533 153689 : son->count = son->count.apply_scale (num, den);
534 153689 : worklist.safe_push (son);
535 : }
536 37091 : }
537 :
538 : /* Return exit that suitable for update when loop iterations
539 : changed. */
540 :
541 : static edge
542 109959 : loop_exit_for_scaling (class loop *loop)
543 : {
544 109959 : edge exit_edge = single_exit (loop);
545 109959 : if (!exit_edge)
546 : {
547 8771 : auto_vec<edge> exits = get_loop_exit_edges (loop);
548 8771 : exit_edge = single_likely_exit (loop, exits);
549 8771 : }
550 109959 : return exit_edge;
551 : }
552 :
553 : /* Assume that loop's entry count and profile up to a given EXIT_EDGE is
554 : consistent. Update exit probability so loop exists with PROFILE_COUNT
555 : and rescale profile of basic blocks inside loop dominated by EXIT_EDGE->src.
556 :
557 : This is useful after number of iteraitons of loop has changed.
558 : If EXIT_EDGE is NULL, the function will try to identify suitable exit.
559 : If DESIRED_COUNT is NULL, loop entry count will be used.
560 : In consistent profile usually loop exists as many times as it is entred.
561 :
562 : Return updated exit if successfull and NULL otherwise. */
563 :
564 : edge
565 139956 : update_loop_exit_probability_scale_dom_bbs (class loop *loop,
566 : edge exit_edge,
567 : profile_count desired_count)
568 : {
569 139956 : if (!exit_edge)
570 3040 : exit_edge = loop_exit_for_scaling (loop);
571 3040 : if (!exit_edge)
572 : {
573 2284 : if (dump_file && (dump_flags & TDF_DETAILS))
574 : {
575 266 : fprintf (dump_file, ";; Not updating exit probability of loop %i;"
576 : " it has no single exit\n",
577 : loop->num);
578 : }
579 2284 : return NULL;
580 : }
581 : /* If exit is inside another loop, adjusting its probability will also
582 : adjust number of the inner loop iterations. Just do noting for now. */
583 137672 : if (!just_once_each_iteration_p (loop, exit_edge->src))
584 : {
585 6 : if (dump_file && (dump_flags & TDF_DETAILS))
586 : {
587 0 : fprintf (dump_file, ";; Not updating exit probability of loop %i;"
588 : " exit is inside inner loop\n",
589 : loop->num);
590 : }
591 6 : return NULL;
592 : }
593 : /* Normal loops exit as many times as they are entered. */
594 137666 : if (!desired_count.initialized_p ())
595 1572 : desired_count = loop_count_in (loop);
596 : /* See if everything is already perfect. */
597 137666 : if (desired_count == exit_edge->count ())
598 6343 : return exit_edge;
599 131323 : profile_count old_exit_count = exit_edge->count ();
600 : /* See if update is possible.
601 : Avoid turning probability to 0 or 1 just trying to reach impossible
602 : value.
603 :
604 : Natural profile update after some loop will happily scale header count to
605 : be less than count of entering the loop. This is bad idea and they should
606 : special case maybe_flat_loop_profile.
607 :
608 : It may also happen that the source basic block of the exit edge is
609 : inside in-loop condition:
610 :
611 : +-> header
612 : | |
613 : | B1
614 : | / \
615 : | | B2--exit_edge-->
616 : | \ /
617 : | B3
618 : +__/
619 :
620 : If B2 count is smaller than desired exit edge count
621 : the profile was inconsistent with the newly discovered upper bound.
622 : Probablity of edge B1->B2 is too low. We do not attempt to fix
623 : that (as it is hard in general). */
624 131323 : if (desired_count > exit_edge->src->count
625 131323 : && exit_edge->src->count.differs_from_p (desired_count))
626 : {
627 362 : if (dump_file)
628 : {
629 7 : fprintf (dump_file, ";; Source bb of loop %i has count ",
630 : loop->num);
631 7 : exit_edge->src->count.dump (dump_file, cfun);
632 7 : fprintf (dump_file,
633 : " which is smaller then desired count of exitting loop ");
634 7 : desired_count.dump (dump_file, cfun);
635 7 : fprintf (dump_file, ". Profile update is impossible.\n");
636 : }
637 : /* Drop quality of probability since we know it likely
638 : bad. */
639 362 : exit_edge->probability = exit_edge->probability.guessed ();
640 362 : return NULL;
641 : }
642 130961 : if (!exit_edge->src->count.nonzero_p ())
643 : {
644 1 : if (dump_file)
645 : {
646 0 : fprintf (dump_file, ";; Not updating exit edge probability"
647 : " in loop %i since profile is zero ",
648 : loop->num);
649 : }
650 1 : return NULL;
651 : }
652 130960 : set_edge_probability_and_rescale_others
653 130960 : (exit_edge, desired_count.probability_in (exit_edge->src->count));
654 : /* Rescale the remaining edge probabilities and see if there is only
655 : one. */
656 130960 : edge other_edge = NULL;
657 130960 : bool found = false;
658 130960 : edge e;
659 130960 : edge_iterator ei;
660 392881 : FOR_EACH_EDGE (e, ei, exit_edge->src->succs)
661 261932 : if (!(e->flags & EDGE_FAKE)
662 261932 : && !loop_exit_edge_p (loop, e))
663 : {
664 130971 : if (found)
665 : {
666 : other_edge = NULL;
667 : break;
668 : }
669 : other_edge = e;
670 : found = true;
671 : }
672 : /* If there is only loop latch after other edge,
673 : update its profile. This is tiny bit more precise
674 : than scaling. */
675 130960 : if (other_edge && other_edge->dest == loop->latch)
676 : {
677 94654 : if (single_pred_p (loop->latch))
678 94654 : loop->latch->count = loop->latch->count
679 94654 : + old_exit_count - exit_edge->count ();
680 : }
681 : else
682 : /* If there are multiple blocks, just scale. */
683 72612 : scale_dominated_blocks_in_loop (loop, exit_edge->src,
684 72612 : exit_edge->src->count - exit_edge->count (),
685 36306 : exit_edge->src->count - old_exit_count);
686 : return exit_edge;
687 : }
688 :
689 : /* Scale profile in LOOP by P.
690 : If ITERATION_BOUND is not -1, scale even further if loop is predicted
691 : to iterate too many times.
692 : Before caling this function, preheader block profile should be already
693 : scaled to final count. This is necessary because loop iterations are
694 : determined by comparing header edge count to latch ege count and thus
695 : they need to be scaled synchronously. */
696 :
697 : void
698 287257 : scale_loop_profile (class loop *loop, profile_probability p,
699 : gcov_type iteration_bound)
700 : {
701 287257 : if (!(p == profile_probability::always ()))
702 : {
703 80201 : if (dump_file && (dump_flags & TDF_DETAILS))
704 : {
705 11021 : fprintf (dump_file, ";; Scaling loop %i with scale ",
706 : loop->num);
707 11021 : p.dump (dump_file);
708 11021 : fprintf (dump_file, "\n");
709 : }
710 :
711 : /* Scale the probabilities. */
712 80201 : scale_loop_frequencies (loop, p);
713 : }
714 :
715 287257 : if (iteration_bound == -1)
716 180338 : return;
717 :
718 234224 : sreal iterations;
719 234224 : if (!expected_loop_iterations_by_profile (loop, &iterations))
720 : return;
721 :
722 233723 : if (dump_file && (dump_flags & TDF_DETAILS))
723 : {
724 15482 : fprintf (dump_file,
725 : ";; Guessed iterations of loop %i is %f. New upper bound %i.\n",
726 : loop->num,
727 : iterations.to_double (),
728 : (int)iteration_bound);
729 : }
730 :
731 : /* See if loop is predicted to iterate too many times. */
732 233723 : if (iterations <= (sreal)iteration_bound)
733 : return;
734 :
735 106919 : profile_count count_in = loop_count_in (loop);
736 :
737 : /* Now scale the loop body so header count is
738 : count_in * (iteration_bound + 1) */
739 106919 : profile_probability scale_prob
740 106919 : = (count_in * (iteration_bound + 1)).probability_in (loop->header->count);
741 106919 : if (dump_file && (dump_flags & TDF_DETAILS))
742 : {
743 10640 : fprintf (dump_file, ";; Scaling loop %i with scale ",
744 : loop->num);
745 10640 : scale_prob.dump (dump_file);
746 10640 : fprintf (dump_file, " to reach upper bound %i\n",
747 : (int)iteration_bound);
748 : }
749 : /* Finally attempt to fix exit edge probability. */
750 106919 : edge exit_edge = loop_exit_for_scaling (loop);
751 :
752 : /* In a consistent profile unadjusted_exit_count should be same as count_in,
753 : however to preserve as much of the original info, avoid recomputing
754 : it. */
755 106919 : profile_count unadjusted_exit_count = profile_count::uninitialized ();
756 106919 : if (exit_edge)
757 104635 : unadjusted_exit_count = exit_edge->count ();
758 106919 : scale_loop_frequencies (loop, scale_prob);
759 106919 : update_loop_exit_probability_scale_dom_bbs (loop, exit_edge,
760 : unadjusted_exit_count);
761 : }
762 :
763 : /* Recompute dominance information for basic blocks outside LOOP. */
764 :
765 : static void
766 77788 : update_dominators_in_loop (class loop *loop)
767 : {
768 77788 : vec<basic_block> dom_bbs = vNULL;
769 77788 : basic_block *body;
770 77788 : unsigned i;
771 :
772 77788 : auto_sbitmap seen (last_basic_block_for_fn (cfun));
773 77788 : bitmap_clear (seen);
774 77788 : body = get_loop_body (loop);
775 :
776 552844 : for (i = 0; i < loop->num_nodes; i++)
777 397268 : bitmap_set_bit (seen, body[i]->index);
778 :
779 475056 : for (i = 0; i < loop->num_nodes; i++)
780 : {
781 397268 : basic_block ldom;
782 :
783 397268 : for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
784 748912 : ldom;
785 351644 : ldom = next_dom_son (CDI_DOMINATORS, ldom))
786 351644 : if (!bitmap_bit_p (seen, ldom->index))
787 : {
788 32164 : bitmap_set_bit (seen, ldom->index);
789 32164 : dom_bbs.safe_push (ldom);
790 : }
791 : }
792 :
793 77788 : iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
794 77788 : free (body);
795 77788 : dom_bbs.release ();
796 77788 : }
797 :
798 : /* Creates an if region as shown above. CONDITION is used to create
799 : the test for the if.
800 :
801 : |
802 : | ------------- -------------
803 : | | pred_bb | | pred_bb |
804 : | ------------- -------------
805 : | | |
806 : | | | ENTRY_EDGE
807 : | | ENTRY_EDGE V
808 : | | ====> -------------
809 : | | | cond_bb |
810 : | | | CONDITION |
811 : | | -------------
812 : | V / \
813 : | ------------- e_false / \ e_true
814 : | | succ_bb | V V
815 : | ------------- ----------- -----------
816 : | | false_bb | | true_bb |
817 : | ----------- -----------
818 : | \ /
819 : | \ /
820 : | V V
821 : | -------------
822 : | | join_bb |
823 : | -------------
824 : | | exit_edge (result)
825 : | V
826 : | -----------
827 : | | succ_bb |
828 : | -----------
829 : |
830 : */
831 :
832 : edge
833 203 : create_empty_if_region_on_edge (edge entry_edge, tree condition)
834 : {
835 :
836 203 : basic_block cond_bb, true_bb, false_bb, join_bb;
837 203 : edge e_true, e_false, exit_edge;
838 203 : gcond *cond_stmt;
839 203 : tree simple_cond;
840 203 : gimple_stmt_iterator gsi;
841 :
842 203 : cond_bb = split_edge (entry_edge);
843 :
844 : /* Insert condition in cond_bb. */
845 203 : gsi = gsi_last_bb (cond_bb);
846 203 : simple_cond =
847 203 : force_gimple_operand_gsi (&gsi, condition, true, NULL,
848 : false, GSI_NEW_STMT);
849 203 : cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
850 203 : gsi = gsi_last_bb (cond_bb);
851 203 : gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
852 :
853 203 : join_bb = split_edge (single_succ_edge (cond_bb));
854 :
855 203 : e_true = single_succ_edge (cond_bb);
856 203 : true_bb = split_edge (e_true);
857 :
858 203 : e_false = make_edge (cond_bb, join_bb, 0);
859 203 : false_bb = split_edge (e_false);
860 :
861 203 : e_true->flags &= ~EDGE_FALLTHRU;
862 203 : e_true->flags |= EDGE_TRUE_VALUE;
863 203 : e_false->flags &= ~EDGE_FALLTHRU;
864 203 : e_false->flags |= EDGE_FALSE_VALUE;
865 :
866 203 : set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
867 203 : set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
868 203 : set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
869 203 : set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
870 :
871 203 : exit_edge = single_succ_edge (join_bb);
872 :
873 203 : if (single_pred_p (exit_edge->dest))
874 203 : set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
875 :
876 203 : return exit_edge;
877 : }
878 :
879 : /* create_empty_loop_on_edge
880 : |
881 : | - pred_bb - ------ pred_bb ------
882 : | | | | iv0 = initial_value |
883 : | -----|----- ---------|-----------
884 : | | ______ | entry_edge
885 : | | entry_edge / | |
886 : | | ====> | -V---V- loop_header -------------
887 : | V | | iv_before = phi (iv0, iv_after) |
888 : | - succ_bb - | ---|-----------------------------
889 : | | | | |
890 : | ----------- | ---V--- loop_body ---------------
891 : | | | iv_after = iv_before + stride |
892 : | | | if (iv_before < upper_bound) |
893 : | | ---|--------------\--------------
894 : | | | \ exit_e
895 : | | V \
896 : | | - loop_latch - V- succ_bb -
897 : | | | | | |
898 : | | /------------- -----------
899 : | \ ___ /
900 :
901 : Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
902 : that is used before the increment of IV. IV_BEFORE should be used for
903 : adding code to the body that uses the IV. OUTER is the outer loop in
904 : which the new loop should be inserted.
905 :
906 : Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
907 : inserted on the loop entry edge. This implies that this function
908 : should be used only when the UPPER_BOUND expression is a loop
909 : invariant. */
910 :
911 : class loop *
912 500 : create_empty_loop_on_edge (edge entry_edge,
913 : tree initial_value,
914 : tree stride, tree upper_bound,
915 : tree iv,
916 : tree *iv_before,
917 : tree *iv_after,
918 : class loop *outer)
919 : {
920 500 : basic_block loop_header, loop_latch, succ_bb, pred_bb;
921 500 : class loop *loop;
922 500 : gimple_stmt_iterator gsi;
923 500 : gimple_seq stmts;
924 500 : gcond *cond_expr;
925 500 : tree exit_test;
926 500 : edge exit_e;
927 :
928 500 : gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
929 :
930 : /* Create header, latch and wire up the loop. */
931 500 : pred_bb = entry_edge->src;
932 500 : loop_header = split_edge (entry_edge);
933 500 : loop_latch = split_edge (single_succ_edge (loop_header));
934 500 : succ_bb = single_succ (loop_latch);
935 500 : make_edge (loop_header, succ_bb, 0);
936 500 : redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
937 :
938 : /* Set immediate dominator information. */
939 500 : set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
940 500 : set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
941 500 : set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
942 :
943 : /* Initialize a loop structure and put it in a loop hierarchy. */
944 500 : loop = alloc_loop ();
945 500 : loop->header = loop_header;
946 500 : loop->latch = loop_latch;
947 500 : add_loop (loop, outer);
948 :
949 : /* TODO: Fix counts. */
950 500 : scale_loop_frequencies (loop, profile_probability::even ());
951 :
952 : /* Update dominators. */
953 500 : update_dominators_in_loop (loop);
954 :
955 : /* Modify edge flags. */
956 500 : exit_e = single_exit (loop);
957 500 : exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
958 500 : single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
959 :
960 : /* Construct IV code in loop. */
961 500 : initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
962 500 : if (stmts)
963 : {
964 17 : gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
965 17 : gsi_commit_edge_inserts ();
966 : }
967 :
968 500 : upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
969 500 : if (stmts)
970 : {
971 161 : gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
972 161 : gsi_commit_edge_inserts ();
973 : }
974 :
975 500 : gsi = gsi_last_bb (loop_header);
976 500 : create_iv (initial_value, PLUS_EXPR, stride, iv, loop, &gsi, false,
977 : iv_before, iv_after);
978 :
979 : /* Insert loop exit condition. */
980 500 : cond_expr = gimple_build_cond
981 500 : (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
982 :
983 500 : exit_test = gimple_cond_lhs (cond_expr);
984 500 : exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
985 : false, GSI_NEW_STMT);
986 500 : gimple_cond_set_lhs (cond_expr, exit_test);
987 500 : gsi = gsi_last_bb (exit_e->src);
988 500 : gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
989 :
990 500 : split_block_after_labels (loop_header);
991 :
992 500 : return loop;
993 : }
994 :
995 : /* Remove the latch edge of a LOOP and update loops to indicate that
996 : the LOOP was removed. After this function, original loop latch will
997 : have no successor, which caller is expected to fix somehow.
998 :
999 : If this may cause the information about irreducible regions to become
1000 : invalid, IRRED_INVALIDATED is set to true.
1001 :
1002 : LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
1003 : basic blocks that had non-trivial update on their loop_father.*/
1004 :
1005 : void
1006 138030 : unloop (class loop *loop, bool *irred_invalidated,
1007 : bitmap loop_closed_ssa_invalidated)
1008 : {
1009 138030 : basic_block *body;
1010 138030 : class loop *ploop;
1011 138030 : unsigned i, n;
1012 138030 : basic_block latch = loop->latch;
1013 138030 : bool dummy = false;
1014 :
1015 138030 : if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
1016 23 : *irred_invalidated = true;
1017 :
1018 : /* This is relatively straightforward. The dominators are unchanged, as
1019 : loop header dominates loop latch, so the only thing we have to care of
1020 : is the placement of loops and basic blocks inside the loop tree. We
1021 : move them all to the loop->outer, and then let fix_bb_placements do
1022 : its work. */
1023 :
1024 138030 : body = get_loop_body (loop);
1025 138030 : n = loop->num_nodes;
1026 563910 : for (i = 0; i < n; i++)
1027 425880 : if (body[i]->loop_father == loop)
1028 : {
1029 407613 : remove_bb_from_loops (body[i]);
1030 407613 : add_bb_to_loop (body[i], loop_outer (loop));
1031 : }
1032 138030 : free (body);
1033 :
1034 140457 : while (loop->inner)
1035 : {
1036 2427 : ploop = loop->inner;
1037 2427 : flow_loop_tree_node_remove (ploop);
1038 2427 : flow_loop_tree_node_add (loop_outer (loop), ploop);
1039 : }
1040 :
1041 : /* Remove the loop and free its data. */
1042 138030 : delete_loop (loop);
1043 :
1044 138030 : remove_edge (single_succ_edge (latch));
1045 :
1046 : /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
1047 : there is an irreducible region inside the cancelled loop, the flags will
1048 : be still correct. */
1049 138030 : fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
1050 138030 : }
1051 :
1052 : /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
1053 : condition stated in description of fix_loop_placement holds for them.
1054 : It is used in case when we removed some edges coming out of LOOP, which
1055 : may cause the right placement of LOOP inside loop tree to change.
1056 :
1057 : IRRED_INVALIDATED is set to true if a change in the loop structures might
1058 : invalidate the information about irreducible regions. */
1059 :
1060 : static void
1061 439512 : fix_loop_placements (class loop *loop, bool *irred_invalidated,
1062 : bitmap loop_closed_ssa_invalidated)
1063 : {
1064 439512 : if (!loop_outer (loop))
1065 238079 : return;
1066 :
1067 201433 : auto_vec<edge> exits = get_loop_exit_edges (loop);
1068 201433 : unsigned i;
1069 201433 : edge e;
1070 :
1071 : /* We might need to only re-parent an outer loop, but as the constraint
1072 : is that we removed an exit from LOOP, we have a limit for what level
1073 : of outer loop we eventually have to re-parent to. */
1074 201433 : class loop *outermost = loop;
1075 1482011 : FOR_EACH_VEC_ELT (exits, i, e)
1076 1079145 : outermost = find_common_loop (outermost, e->dest->loop_father);
1077 :
1078 : class loop *outer;
1079 247728 : while (loop_outer (loop))
1080 : {
1081 247703 : outer = loop_outer (loop);
1082 247703 : if (fix_loop_placement (loop, irred_invalidated,
1083 : loop_closed_ssa_invalidated))
1084 : /* Changing the placement of a loop in the loop tree may alter the
1085 : validity of condition 2) of the description of fix_bb_placement
1086 : for its preheader, because the successor is the header and belongs
1087 : to the loop. So call fix_bb_placements to fix up the placement
1088 : of the preheader and (possibly) of its predecessors. */
1089 6 : fix_bb_placements (loop_preheader_edge (loop)->src,
1090 : irred_invalidated, loop_closed_ssa_invalidated);
1091 247703 : loop = outer;
1092 247703 : if (outer == outermost)
1093 : break;
1094 : }
1095 201433 : }
1096 :
1097 : /* Duplicate loop bounds and other information we store about
1098 : the loop into its duplicate. */
1099 :
1100 : void
1101 680274 : copy_loop_info (class loop *loop, class loop *target)
1102 : {
1103 680274 : gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1104 680274 : target->any_upper_bound = loop->any_upper_bound;
1105 680274 : target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1106 680274 : target->any_likely_upper_bound = loop->any_likely_upper_bound;
1107 680274 : target->nb_iterations_likely_upper_bound
1108 680274 : = loop->nb_iterations_likely_upper_bound;
1109 680274 : target->any_estimate = loop->any_estimate;
1110 680274 : target->nb_iterations_estimate = loop->nb_iterations_estimate;
1111 680274 : target->estimate_state = loop->estimate_state;
1112 680274 : target->safelen = loop->safelen;
1113 680274 : target->simdlen = loop->simdlen;
1114 680274 : target->constraints = loop->constraints;
1115 680274 : target->can_be_parallel = loop->can_be_parallel;
1116 680274 : target->warned_aggressive_loop_optimizations
1117 680274 : |= loop->warned_aggressive_loop_optimizations;
1118 680274 : target->dont_vectorize = loop->dont_vectorize;
1119 680274 : target->force_vectorize = loop->force_vectorize;
1120 680274 : target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
1121 680274 : target->finite_p = loop->finite_p;
1122 680274 : target->unroll = loop->unroll;
1123 680274 : target->owned_clique = loop->owned_clique;
1124 680274 : }
1125 :
1126 : /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1127 : created loop into loops structure. If AFTER is non-null
1128 : the new loop is added at AFTER->next, otherwise in front of TARGETs
1129 : sibling list. */
1130 : class loop *
1131 41088 : duplicate_loop (class loop *loop, class loop *target, class loop *after)
1132 : {
1133 41088 : class loop *cloop;
1134 41088 : cloop = alloc_loop ();
1135 41088 : place_new_loop (cfun, cloop);
1136 :
1137 41088 : copy_loop_info (loop, cloop);
1138 :
1139 : /* Mark the new loop as copy of LOOP. */
1140 41088 : set_loop_copy (loop, cloop);
1141 :
1142 : /* Add it to target. */
1143 41088 : flow_loop_tree_node_add (target, cloop, after);
1144 :
1145 41088 : return cloop;
1146 : }
1147 :
1148 : /* Copies structure of subloops of LOOP into TARGET loop, placing
1149 : newly created loops into loop tree at the end of TARGETs sibling
1150 : list in the original order. */
1151 : void
1152 41088 : duplicate_subloops (class loop *loop, class loop *target)
1153 : {
1154 41088 : class loop *aloop, *cloop, *tail;
1155 :
1156 41088 : for (tail = target->inner; tail && tail->next; tail = tail->next)
1157 : ;
1158 42501 : for (aloop = loop->inner; aloop; aloop = aloop->next)
1159 : {
1160 1413 : cloop = duplicate_loop (aloop, target, tail);
1161 1413 : tail = cloop;
1162 1413 : gcc_assert(!tail->next);
1163 1413 : duplicate_subloops (aloop, cloop);
1164 : }
1165 41088 : }
1166 :
1167 : /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1168 : into TARGET loop, placing newly created loops into loop tree adding
1169 : them to TARGETs sibling list at the end in order. */
1170 : static void
1171 541197 : copy_loops_to (class loop **copied_loops, int n, class loop *target)
1172 : {
1173 541197 : class loop *aloop, *tail;
1174 541197 : int i;
1175 :
1176 12505523 : for (tail = target->inner; tail && tail->next; tail = tail->next)
1177 : ;
1178 545807 : for (i = 0; i < n; i++)
1179 : {
1180 4610 : aloop = duplicate_loop (copied_loops[i], target, tail);
1181 4610 : tail = aloop;
1182 4610 : gcc_assert(!tail->next);
1183 4610 : duplicate_subloops (copied_loops[i], aloop);
1184 : }
1185 541197 : }
1186 :
1187 : /* Redirects edge E to basic block DEST. */
1188 : static void
1189 38644 : loop_redirect_edge (edge e, basic_block dest)
1190 : {
1191 0 : if (e->dest == dest)
1192 : return;
1193 :
1194 38644 : redirect_edge_and_branch_force (e, dest);
1195 : }
1196 :
1197 : /* Check whether LOOP's body can be duplicated. */
1198 : bool
1199 519452 : can_duplicate_loop_p (const class loop *loop)
1200 : {
1201 519452 : int ret;
1202 519452 : basic_block *bbs = get_loop_body (loop);
1203 :
1204 519452 : ret = can_copy_bbs_p (bbs, loop->num_nodes);
1205 519452 : free (bbs);
1206 :
1207 519452 : return ret;
1208 : }
1209 :
1210 : /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1211 : loop structure and dominators (order of inner subloops is retained).
1212 : E's destination must be LOOP header for this to work, i.e. it must be entry
1213 : or latch edge of this loop; these are unique, as the loops must have
1214 : preheaders for this function to work correctly (in case E is latch, the
1215 : function unrolls the loop, if E is entry edge, it peels the loop). Store
1216 : edges created by copying ORIG edge from copies corresponding to set bits in
1217 : WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
1218 : are numbered in order given by control flow through them) into TO_REMOVE
1219 : array. Returns false if duplication is
1220 : impossible. */
1221 :
1222 : bool
1223 262806 : duplicate_loop_body_to_header_edge (class loop *loop, edge e,
1224 : unsigned int ndupl, sbitmap wont_exit,
1225 : edge orig, vec<edge> *to_remove, int flags)
1226 : {
1227 262806 : class loop *target, *aloop;
1228 262806 : class loop **orig_loops;
1229 262806 : unsigned n_orig_loops;
1230 262806 : basic_block header = loop->header, latch = loop->latch;
1231 262806 : basic_block *new_bbs, *bbs, *first_active;
1232 262806 : basic_block new_bb, bb, first_active_latch = NULL;
1233 262806 : edge ae, latch_edge;
1234 262806 : edge spec_edges[2], new_spec_edges[2];
1235 262806 : const int SE_LATCH = 0;
1236 262806 : const int SE_ORIG = 1;
1237 262806 : unsigned i, j, n;
1238 262806 : int is_latch = (latch == e->src);
1239 262806 : profile_probability *scale_step = NULL;
1240 262806 : profile_probability scale_main = profile_probability::always ();
1241 262806 : profile_probability scale_act = profile_probability::always ();
1242 262806 : profile_count after_exit_num = profile_count::zero (),
1243 262806 : after_exit_den = profile_count::zero ();
1244 262806 : bool scale_after_exit = false;
1245 262806 : int add_irreducible_flag;
1246 262806 : basic_block place_after;
1247 262806 : bitmap bbs_to_scale = NULL;
1248 262806 : bitmap_iterator bi;
1249 :
1250 262806 : gcc_assert (e->dest == loop->header);
1251 262806 : gcc_assert (ndupl > 0);
1252 :
1253 262806 : if (orig)
1254 : {
1255 : /* Orig must be edge out of the loop. */
1256 214595 : gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1257 214595 : gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1258 : }
1259 :
1260 262806 : n = loop->num_nodes;
1261 262806 : bbs = get_loop_body_in_dom_order (loop);
1262 262806 : gcc_assert (bbs[0] == loop->header);
1263 262806 : gcc_assert (bbs[n - 1] == loop->latch);
1264 :
1265 : /* Check whether duplication is possible. */
1266 262806 : if (!can_copy_bbs_p (bbs, loop->num_nodes))
1267 : {
1268 19 : free (bbs);
1269 19 : return false;
1270 : }
1271 262787 : new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1272 :
1273 : /* In case we are doing loop peeling and the loop is in the middle of
1274 : irreducible region, the peeled copies will be inside it too. */
1275 262787 : add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1276 262787 : gcc_assert (!is_latch || !add_irreducible_flag);
1277 :
1278 : /* Find edge from latch. */
1279 262787 : latch_edge = loop_latch_edge (loop);
1280 :
1281 262787 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1282 : {
1283 : /* Calculate coefficients by that we have to scale counts
1284 : of duplicated loop bodies. */
1285 224143 : profile_count count_in = header->count;
1286 224143 : profile_count count_le = latch_edge->count ();
1287 224143 : profile_count count_out_orig = orig ? orig->count () : count_in - count_le;
1288 224143 : profile_probability prob_pass_thru = count_le.probability_in (count_in);
1289 224143 : profile_count new_count_le = count_le + count_out_orig;
1290 :
1291 214586 : if (orig && orig->probability.initialized_p ()
1292 438729 : && !(orig->probability == profile_probability::always ()))
1293 : {
1294 : /* The blocks that are dominated by a removed exit edge ORIG have
1295 : frequencies scaled by this. */
1296 214459 : if (orig->count ().initialized_p ())
1297 : {
1298 214459 : after_exit_num = orig->src->count;
1299 214459 : after_exit_den = after_exit_num - orig->count ();
1300 214459 : scale_after_exit = true;
1301 : }
1302 214459 : bbs_to_scale = BITMAP_ALLOC (NULL);
1303 768428 : for (i = 0; i < n; i++)
1304 : {
1305 553969 : if (bbs[i] != orig->src
1306 553969 : && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1307 264489 : bitmap_set_bit (bbs_to_scale, i);
1308 : }
1309 : /* Since we will scale up all basic blocks dominated by orig, exits
1310 : will become more likely; compensate for that. */
1311 214459 : if (after_exit_den.nonzero_p ())
1312 : {
1313 213868 : auto_vec<edge> exits = get_loop_exit_edges (loop);
1314 923963 : for (edge ex : exits)
1315 282359 : if (ex != orig
1316 282359 : && dominated_by_p (CDI_DOMINATORS, ex->src, orig->src))
1317 54958 : new_count_le -= ex->count ().apply_scale (after_exit_num
1318 : - after_exit_den,
1319 27479 : after_exit_den);
1320 213868 : }
1321 : }
1322 224143 : profile_probability prob_pass_wont_exit =
1323 224143 : new_count_le.probability_in (count_in);
1324 : /* If profile count is 0, the probability will be uninitialized.
1325 : We can set probability to any initialized value to avoid
1326 : precision loss. If profile is sane, all counts will be 0 anyway. */
1327 224143 : if (!count_in.nonzero_p ())
1328 : {
1329 571 : prob_pass_thru
1330 571 : = profile_probability::always ().apply_scale (1, 2);
1331 571 : prob_pass_wont_exit
1332 571 : = profile_probability::always ().apply_scale (1, 2);
1333 : }
1334 :
1335 224143 : scale_step = XNEWVEC (profile_probability, ndupl);
1336 :
1337 726696 : for (i = 1; i <= ndupl; i++)
1338 1005106 : scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1339 502553 : ? prob_pass_wont_exit
1340 : : prob_pass_thru;
1341 :
1342 : /* Complete peeling is special as the probability of exit in last
1343 : copy becomes 1. */
1344 448207 : if (!count_in.nonzero_p ())
1345 : ;
1346 223572 : else if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1347 : {
1348 106354 : profile_count wanted_count = e->count ();
1349 :
1350 106354 : gcc_assert (!is_latch);
1351 : /* First copy has count of incoming edge. Each subsequent
1352 : count should be reduced by prob_pass_wont_exit. Caller
1353 : should've managed the flags so all except for original loop
1354 : has won't exist set. */
1355 106354 : scale_act = wanted_count.probability_in (count_in);
1356 :
1357 : /* Now simulate the duplication adjustments and compute header
1358 : frequency of the last copy. */
1359 432638 : for (i = 0; i < ndupl; i++)
1360 326284 : wanted_count = wanted_count.apply_probability (scale_step [i]);
1361 106354 : scale_main = wanted_count.probability_in (count_in);
1362 : }
1363 : /* Here we insert loop bodies inside the loop itself (for loop unrolling).
1364 : First iteration will be original loop followed by duplicated bodies.
1365 : It is necessary to scale down the original so we get right overall
1366 : number of iterations. */
1367 117218 : else if (is_latch)
1368 : {
1369 51579 : profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
1370 51579 : ? prob_pass_wont_exit
1371 : : prob_pass_thru;
1372 51579 : if (!(flags & DLTHE_FLAG_FLAT_PROFILE))
1373 : {
1374 21043 : profile_probability p = prob_pass_main;
1375 21043 : profile_count scale_main_den = count_in;
1376 83284 : for (i = 0; i < ndupl; i++)
1377 : {
1378 62241 : scale_main_den += count_in.apply_probability (p);
1379 62241 : p = p * scale_step[i];
1380 : }
1381 : /* If original loop is executed COUNT_IN times, the unrolled
1382 : loop will account SCALE_MAIN_DEN times. */
1383 21043 : scale_main = count_in.probability_in (scale_main_den);
1384 : }
1385 : else
1386 : scale_main = profile_probability::always ();
1387 51579 : scale_act = scale_main * prob_pass_main;
1388 : }
1389 : else
1390 : {
1391 65639 : profile_count preheader_count = e->count ();
1392 136993 : for (i = 0; i < ndupl; i++)
1393 71354 : scale_main = scale_main * scale_step[i];
1394 65639 : scale_act = preheader_count.probability_in (count_in);
1395 : }
1396 : }
1397 :
1398 : /* Loop the new bbs will belong to. */
1399 262787 : target = e->src->loop_father;
1400 :
1401 : /* Original loops. */
1402 262787 : n_orig_loops = 0;
1403 266632 : for (aloop = loop->inner; aloop; aloop = aloop->next)
1404 3845 : n_orig_loops++;
1405 262787 : orig_loops = XNEWVEC (class loop *, n_orig_loops);
1406 266632 : for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1407 3845 : orig_loops[i] = aloop;
1408 :
1409 262787 : set_loop_copy (loop, target);
1410 :
1411 262787 : first_active = XNEWVEC (basic_block, n);
1412 262787 : if (is_latch)
1413 : {
1414 51694 : memcpy (first_active, bbs, n * sizeof (basic_block));
1415 51694 : first_active_latch = latch;
1416 : }
1417 :
1418 262787 : spec_edges[SE_ORIG] = orig;
1419 262787 : spec_edges[SE_LATCH] = latch_edge;
1420 :
1421 262787 : place_after = e->src;
1422 262787 : location_t loop_loc = UNKNOWN_LOCATION;
1423 262787 : unsigned int loop_copyid_base = 0;
1424 :
1425 : /* Find a location from the loop header - works for both GIMPLE and RTL. */
1426 262787 : if (current_ir_type () == IR_GIMPLE)
1427 : {
1428 146875 : gimple *last = last_nondebug_stmt (loop->header);
1429 146875 : loop_loc = last ? gimple_location (last) : UNKNOWN_LOCATION;
1430 : }
1431 : else
1432 : {
1433 : /* For RTL, try to find an instruction with a valid location. */
1434 115912 : rtx_insn *insn = BB_END (loop->header);
1435 192333 : while (insn && insn != BB_HEAD (loop->header))
1436 : {
1437 : /* Only check location if this is a valid insn. */
1438 191747 : if (INSN_P (insn))
1439 : {
1440 191161 : location_t loc = INSN_LOCATION (insn);
1441 191161 : if (loc != UNKNOWN_LOCATION)
1442 : {
1443 115326 : loop_loc = get_pure_location (loc);
1444 115326 : break;
1445 : }
1446 : }
1447 76421 : insn = PREV_INSN (insn);
1448 : }
1449 : }
1450 :
1451 : /* Allocate copyid base for this loop duplication - works for both
1452 : GIMPLE and RTL since allocator is per-function. */
1453 262281 : if (loop_loc != UNKNOWN_LOCATION)
1454 248516 : loop_copyid_base = allocate_copyid_base (loop_loc, ndupl);
1455 :
1456 803984 : for (j = 0; j < ndupl; j++)
1457 : {
1458 : /* Copy loops. */
1459 541197 : copy_loops_to (orig_loops, n_orig_loops, target);
1460 :
1461 : /* Copy bbs. */
1462 541197 : copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1463 : place_after, true);
1464 541197 : place_after = new_spec_edges[SE_LATCH]->src;
1465 :
1466 541197 : if (flags & DLTHE_RECORD_COPY_NUMBER)
1467 352720 : for (i = 0; i < n; i++)
1468 : {
1469 248800 : gcc_assert (!new_bbs[i]->aux);
1470 248800 : new_bbs[i]->aux = (void *)(size_t)(j + 1);
1471 : }
1472 :
1473 : /* Assign hierarchical discriminators to distinguish loop iterations. */
1474 541197 : if (flags & DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR
1475 332804 : && loop_copyid_base > 0)
1476 : {
1477 : /* Calculate copyid for this iteration. */
1478 295604 : unsigned int copyid = loop_copyid_base + j;
1479 295604 : if (copyid > DISCR_COPYID_MAX)
1480 : copyid = DISCR_COPYID_MAX;
1481 :
1482 295604 : if (current_ir_type () == IR_GIMPLE)
1483 : {
1484 : /* Update all basic blocks created in this iteration. */
1485 1147482 : for (i = 0; i < n; i++)
1486 851878 : assign_discriminators_to_bb (new_bbs[i], 0, copyid);
1487 : }
1488 : else
1489 : {
1490 : /* For RTL, manually update instruction locations. */
1491 0 : for (i = 0; i < n; i++)
1492 : {
1493 0 : basic_block bb = new_bbs[i];
1494 0 : rtx_insn *insn;
1495 :
1496 : /* Iterate through all instructions in the block. */
1497 0 : FOR_BB_INSNS (bb, insn)
1498 : {
1499 0 : if (INSN_HAS_LOCATION (insn))
1500 : {
1501 0 : location_t loc = INSN_LOCATION (insn);
1502 : /* Get existing discriminator components. */
1503 0 : discriminator_components comp
1504 0 : = get_discriminator_components_from_loc (loc);
1505 0 : comp.copyid = copyid;
1506 :
1507 : /* Apply hierarchical discriminator format. */
1508 0 : INSN_LOCATION (insn)
1509 0 : = location_with_discriminator_components (loc, comp);
1510 : }
1511 : }
1512 : }
1513 : }
1514 : }
1515 :
1516 : /* Note whether the blocks and edges belong to an irreducible loop. */
1517 541197 : if (add_irreducible_flag)
1518 : {
1519 1421 : for (i = 0; i < n; i++)
1520 1061 : new_bbs[i]->flags |= BB_DUPLICATED;
1521 1421 : for (i = 0; i < n; i++)
1522 : {
1523 1061 : edge_iterator ei;
1524 1061 : new_bb = new_bbs[i];
1525 1061 : if (new_bb->loop_father == target)
1526 1047 : new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1527 :
1528 2798 : FOR_EACH_EDGE (ae, ei, new_bb->succs)
1529 1737 : if ((ae->dest->flags & BB_DUPLICATED)
1530 1079 : && (ae->src->loop_father == target
1531 21 : || ae->dest->loop_father == target))
1532 1065 : ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1533 : }
1534 1421 : for (i = 0; i < n; i++)
1535 1061 : new_bbs[i]->flags &= ~BB_DUPLICATED;
1536 : }
1537 :
1538 : /* Redirect the special edges. */
1539 541197 : if (is_latch)
1540 : {
1541 104206 : redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1542 104206 : redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1543 : loop->header);
1544 104206 : set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1545 104206 : latch = loop->latch = new_bbs[n - 1];
1546 104206 : e = latch_edge = new_spec_edges[SE_LATCH];
1547 : }
1548 : else
1549 : {
1550 436991 : redirect_edge_and_branch_force (e, new_bbs[0]);
1551 436991 : redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1552 : loop->header);
1553 436991 : set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1554 436991 : e = new_spec_edges[SE_LATCH];
1555 : }
1556 :
1557 : /* Record exit edge in this copy. */
1558 541197 : if (orig && bitmap_bit_p (wont_exit, j + 1))
1559 : {
1560 389323 : if (to_remove)
1561 389323 : to_remove->safe_push (new_spec_edges[SE_ORIG]);
1562 389323 : force_edge_cold (new_spec_edges[SE_ORIG], true);
1563 :
1564 : /* Scale the frequencies of the blocks dominated by the exit. */
1565 389323 : if (bbs_to_scale && scale_after_exit)
1566 : {
1567 898769 : EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1568 509745 : scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num,
1569 : after_exit_den);
1570 : }
1571 : }
1572 :
1573 : /* Record the first copy in the control flow order if it is not
1574 : the original loop (i.e. in case of peeling). */
1575 541197 : if (!first_active_latch)
1576 : {
1577 211093 : memcpy (first_active, new_bbs, n * sizeof (basic_block));
1578 211093 : first_active_latch = new_bbs[n - 1];
1579 : }
1580 :
1581 : /* Set counts and frequencies. */
1582 541197 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1583 : {
1584 502553 : scale_bbs_frequencies (new_bbs, n, scale_act);
1585 502553 : scale_act = scale_act * scale_step[j];
1586 : }
1587 : }
1588 262787 : free (new_bbs);
1589 262787 : free (orig_loops);
1590 :
1591 : /* Record the exit edge in the original loop body, and update the frequencies. */
1592 477373 : if (orig && bitmap_bit_p (wont_exit, 0))
1593 : {
1594 49404 : if (to_remove)
1595 49404 : to_remove->safe_push (orig);
1596 49404 : force_edge_cold (orig, true);
1597 :
1598 : /* Scale the frequencies of the blocks dominated by the exit. */
1599 49404 : if (bbs_to_scale && scale_after_exit)
1600 : {
1601 98733 : EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1602 49377 : scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num,
1603 : after_exit_den);
1604 : }
1605 : }
1606 :
1607 : /* Update the original loop. */
1608 262787 : if (!is_latch)
1609 211093 : set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1610 262787 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1611 : {
1612 224143 : scale_bbs_frequencies (bbs, n, scale_main);
1613 224143 : free (scale_step);
1614 : }
1615 :
1616 : /* Update dominators of outer blocks if affected. */
1617 1042383 : for (i = 0; i < n; i++)
1618 : {
1619 779596 : basic_block dominated, dom_bb;
1620 779596 : unsigned j;
1621 :
1622 779596 : bb = bbs[i];
1623 :
1624 779596 : auto_vec<basic_block> dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1625 2802332 : FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1626 : {
1627 773773 : if (flow_bb_inside_loop_p (loop, dominated))
1628 568503 : continue;
1629 410540 : dom_bb = nearest_common_dominator (
1630 205270 : CDI_DOMINATORS, first_active[i], first_active_latch);
1631 205270 : set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1632 : }
1633 779596 : }
1634 262787 : free (first_active);
1635 :
1636 262787 : free (bbs);
1637 262787 : BITMAP_FREE (bbs_to_scale);
1638 :
1639 262787 : return true;
1640 : }
1641 :
1642 : /* A callback for make_forwarder block, to redirect all edges except for
1643 : MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1644 : whether to redirect it. */
1645 :
1646 : edge mfb_kj_edge;
1647 : bool
1648 77024 : mfb_keep_just (edge e)
1649 : {
1650 77024 : return e != mfb_kj_edge;
1651 : }
1652 :
1653 : /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1654 :
1655 : static bool
1656 35 : has_preds_from_loop (basic_block block, class loop *loop)
1657 : {
1658 35 : edge e;
1659 35 : edge_iterator ei;
1660 :
1661 76 : FOR_EACH_EDGE (e, ei, block->preds)
1662 41 : if (e->src->loop_father == loop)
1663 : return true;
1664 : return false;
1665 : }
1666 :
1667 : /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1668 : CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1669 : entry; otherwise we also force preheader block to have only one successor.
1670 : When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1671 : to be a fallthru predecessor to the loop header and to have only
1672 : predecessors from outside of the loop.
1673 : The function also updates dominators. */
1674 :
1675 : basic_block
1676 17412441 : create_preheader (class loop *loop, int flags)
1677 : {
1678 17412441 : edge e;
1679 17412441 : basic_block dummy;
1680 17412441 : int nentry = 0;
1681 17412441 : bool irred = false;
1682 17412441 : bool latch_edge_was_fallthru;
1683 17412441 : edge one_succ_pred = NULL, single_entry = NULL;
1684 17412441 : edge_iterator ei;
1685 :
1686 52265722 : FOR_EACH_EDGE (e, ei, loop->header->preds)
1687 : {
1688 34853281 : if (e->src == loop->latch)
1689 17412441 : continue;
1690 17440840 : irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1691 17440840 : nentry++;
1692 17440840 : single_entry = e;
1693 48921728 : if (single_succ_p (e->src))
1694 14068447 : one_succ_pred = e;
1695 : }
1696 17412441 : gcc_assert (nentry);
1697 17412441 : if (nentry == 1)
1698 : {
1699 17391010 : bool need_forwarder_block = false;
1700 :
1701 : /* We do not allow entry block to be the loop preheader, since we
1702 : cannot emit code there. */
1703 17391010 : if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1704 : need_forwarder_block = true;
1705 : else
1706 : {
1707 : /* If we want simple preheaders, also force the preheader to have
1708 : just a single successor and a normal edge. */
1709 17387987 : if ((flags & CP_SIMPLE_PREHEADERS)
1710 17387987 : && ((single_entry->flags & EDGE_COMPLEX)
1711 17362866 : || !single_succ_p (single_entry->src)
1712 : /* We document LOOPS_HAVE_PREHEADERS as to forming a
1713 : natural place to move code outside of the loop, so it
1714 : should not end with a control instruction. */
1715 : /* ??? This, and below JUMP_P check needs to be a new
1716 : CFG hook. */
1717 14059894 : || (cfun->curr_properties & PROP_gimple
1718 26637548 : && !gsi_end_p (gsi_last_bb (single_entry->src))
1719 8818724 : && stmt_ends_bb_p (*gsi_last_bb (single_entry->src)))))
1720 : need_forwarder_block = true;
1721 : /* If we want fallthru preheaders, also create forwarder block when
1722 : preheader ends with a jump or has predecessors from loop. */
1723 14059894 : else if ((flags & CP_FALLTHRU_PREHEADERS)
1724 14059894 : && (JUMP_P (BB_END (single_entry->src))
1725 35 : || has_preds_from_loop (single_entry->src, loop)))
1726 : need_forwarder_block = true;
1727 : }
1728 3328093 : if (! need_forwarder_block)
1729 : return NULL;
1730 : }
1731 :
1732 3352559 : mfb_kj_edge = loop_latch_edge (loop);
1733 3352559 : latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1734 3352559 : if (nentry == 1
1735 3331128 : && ((flags & CP_FALLTHRU_PREHEADERS) == 0
1736 23 : || (single_entry->flags & EDGE_CROSSING) == 0))
1737 3331128 : dummy = split_edge (single_entry);
1738 : else
1739 : {
1740 21431 : edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1741 21431 : dummy = fallthru->src;
1742 21431 : loop->header = fallthru->dest;
1743 : }
1744 :
1745 : /* Try to be clever in placing the newly created preheader. The idea is to
1746 : avoid breaking any "fallthruness" relationship between blocks.
1747 :
1748 : The preheader was created just before the header and all incoming edges
1749 : to the header were redirected to the preheader, except the latch edge.
1750 : So the only problematic case is when this latch edge was a fallthru
1751 : edge: it is not anymore after the preheader creation so we have broken
1752 : the fallthruness. We're therefore going to look for a better place. */
1753 3352559 : if (latch_edge_was_fallthru)
1754 : {
1755 1465191 : if (one_succ_pred)
1756 4985 : e = one_succ_pred;
1757 : else
1758 1460206 : e = EDGE_PRED (dummy, 0);
1759 :
1760 1465191 : move_block_after (dummy, e->src);
1761 : }
1762 :
1763 3352559 : if (irred)
1764 : {
1765 3987 : dummy->flags |= BB_IRREDUCIBLE_LOOP;
1766 3987 : single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1767 : }
1768 :
1769 3352559 : if (dump_file)
1770 180 : fprintf (dump_file, "Created preheader block for loop %i\n",
1771 : loop->num);
1772 :
1773 3352559 : if (flags & CP_FALLTHRU_PREHEADERS)
1774 30 : gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1775 : && !JUMP_P (BB_END (dummy)));
1776 :
1777 : return dummy;
1778 : }
1779 :
1780 : /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1781 :
1782 : void
1783 30659467 : create_preheaders (int flags)
1784 : {
1785 30659467 : if (!current_loops)
1786 : return;
1787 :
1788 109389689 : for (auto loop : loops_list (cfun, 0))
1789 17411288 : create_preheader (loop, flags);
1790 30659467 : loops_state_set (LOOPS_HAVE_PREHEADERS);
1791 : }
1792 :
1793 : /* Forces all loop latches to have only single successor. */
1794 :
1795 : void
1796 28224373 : force_single_succ_latches (void)
1797 : {
1798 28224373 : edge e;
1799 :
1800 101437899 : for (auto loop : loops_list (cfun, 0))
1801 : {
1802 16764780 : if (loop->latch != loop->header && single_succ_p (loop->latch))
1803 11564185 : continue;
1804 :
1805 5200595 : e = find_edge (loop->latch, loop->header);
1806 5200595 : gcc_checking_assert (e != NULL);
1807 :
1808 5200595 : split_edge (e);
1809 28224373 : }
1810 28224373 : loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1811 28224373 : }
1812 :
1813 : /* This function is called from loop_version. It splits the entry edge
1814 : of the loop we want to version, adds the versioning condition, and
1815 : adjust the edges to the two versions of the loop appropriately.
1816 : e is an incoming edge. Returns the basic block containing the
1817 : condition.
1818 :
1819 : --- edge e ---- > [second_head]
1820 :
1821 : Split it and insert new conditional expression and adjust edges.
1822 :
1823 : --- edge e ---> [cond expr] ---> [first_head]
1824 : |
1825 : +---------> [second_head]
1826 :
1827 : THEN_PROB is the probability of then branch of the condition.
1828 : ELSE_PROB is the probability of else branch. Note that they may be both
1829 : REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or
1830 : IFN_LOOP_DIST_ALIAS. */
1831 :
1832 : static basic_block
1833 38644 : lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1834 : edge e, void *cond_expr,
1835 : profile_probability then_prob,
1836 : profile_probability else_prob)
1837 : {
1838 38644 : basic_block new_head = NULL;
1839 38644 : edge e1;
1840 :
1841 38644 : gcc_assert (e->dest == second_head);
1842 :
1843 : /* Split edge 'e'. This will create a new basic block, where we can
1844 : insert conditional expr. */
1845 38644 : new_head = split_edge (e);
1846 :
1847 38644 : lv_add_condition_to_bb (first_head, second_head, new_head,
1848 : cond_expr);
1849 :
1850 : /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1851 38644 : e = single_succ_edge (new_head);
1852 38644 : e1 = make_edge (new_head, first_head,
1853 38644 : current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1854 38644 : e1->probability = then_prob;
1855 38644 : e->probability = else_prob;
1856 :
1857 38644 : set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1858 38644 : set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1859 :
1860 : /* Adjust loop header phi nodes. */
1861 38644 : lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1862 :
1863 38644 : return new_head;
1864 : }
1865 :
1866 : /* Main entry point for Loop Versioning transformation.
1867 :
1868 : This transformation given a condition and a loop, creates
1869 : -if (condition) { loop_copy1 } else { loop_copy2 },
1870 : where loop_copy1 is the loop transformed in one way, and loop_copy2
1871 : is the loop transformed in another way (or unchanged). COND_EXPR
1872 : may be a run time test for things that were not resolved by static
1873 : analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1874 :
1875 : If non-NULL, CONDITION_BB is set to the basic block containing the
1876 : condition.
1877 :
1878 : THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1879 : is the ratio by that the frequencies in the original loop should
1880 : be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1881 : new loop should be scaled.
1882 :
1883 : If PLACE_AFTER is true, we place the new loop after LOOP in the
1884 : instruction stream, otherwise it is placed before LOOP. */
1885 :
1886 : class loop *
1887 38654 : loop_version (class loop *loop,
1888 : void *cond_expr, basic_block *condition_bb,
1889 : profile_probability then_prob, profile_probability else_prob,
1890 : profile_probability then_scale, profile_probability else_scale,
1891 : bool place_after)
1892 : {
1893 38654 : basic_block first_head, second_head;
1894 38654 : edge entry, latch_edge;
1895 38654 : int irred_flag;
1896 38654 : class loop *nloop;
1897 38654 : basic_block cond_bb;
1898 :
1899 : /* Record entry and latch edges for the loop */
1900 38654 : entry = loop_preheader_edge (loop);
1901 38654 : irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1902 38654 : entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1903 :
1904 : /* Note down head of loop as first_head. */
1905 38654 : first_head = entry->dest;
1906 :
1907 : /* 1) Duplicate loop on the entry edge. */
1908 38654 : if (!cfg_hook_duplicate_loop_body_to_header_edge (loop, entry, 1, NULL, NULL,
1909 : NULL, 0))
1910 : {
1911 10 : entry->flags |= irred_flag;
1912 10 : return NULL;
1913 : }
1914 :
1915 : /* 2) loopify the duplicated new loop. */
1916 38644 : latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1917 38644 : nloop = alloc_loop ();
1918 38644 : class loop *outer = loop_outer (latch_edge->dest->loop_father);
1919 38644 : edge new_header_edge = single_pred_edge (get_bb_copy (loop->header));
1920 38644 : nloop->header = new_header_edge->dest;
1921 38644 : nloop->latch = latch_edge->src;
1922 38644 : loop_redirect_edge (latch_edge, nloop->header);
1923 :
1924 : /* Compute new loop. */
1925 38644 : add_loop (nloop, outer);
1926 38644 : copy_loop_info (loop, nloop);
1927 38644 : set_loop_copy (loop, nloop);
1928 :
1929 : /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1930 38644 : lv_flush_pending_stmts (latch_edge);
1931 :
1932 : /* After duplication entry edge now points to new loop head block.
1933 : Note down new head as second_head. */
1934 38644 : second_head = entry->dest;
1935 :
1936 : /* 3) Split loop entry edge and insert new block with cond expr. */
1937 38644 : cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1938 : entry, cond_expr, then_prob, else_prob);
1939 38644 : if (condition_bb)
1940 34369 : *condition_bb = cond_bb;
1941 :
1942 38644 : if (!cond_bb)
1943 : {
1944 0 : entry->flags |= irred_flag;
1945 0 : return NULL;
1946 : }
1947 :
1948 : /* Add cond_bb to appropriate loop. */
1949 38644 : if (cond_bb->loop_father)
1950 38644 : remove_bb_from_loops (cond_bb);
1951 38644 : add_bb_to_loop (cond_bb, outer);
1952 :
1953 : /* 4) Scale the original loop and new loop frequency. */
1954 38644 : scale_loop_frequencies (loop, then_scale);
1955 38644 : scale_loop_frequencies (nloop, else_scale);
1956 38644 : update_dominators_in_loop (loop);
1957 38644 : update_dominators_in_loop (nloop);
1958 :
1959 : /* Adjust irreducible flag. */
1960 38644 : if (irred_flag)
1961 : {
1962 51 : cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1963 51 : loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1964 51 : loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1965 51 : single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1966 : }
1967 :
1968 38644 : if (place_after)
1969 : {
1970 36220 : basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1971 36220 : unsigned i;
1972 :
1973 36220 : after = loop->latch;
1974 :
1975 212348 : for (i = 0; i < nloop->num_nodes; i++)
1976 : {
1977 176128 : move_block_after (bbs[i], after);
1978 176128 : after = bbs[i];
1979 : }
1980 36220 : free (bbs);
1981 : }
1982 :
1983 : /* At this point condition_bb is loop preheader with two successors,
1984 : first_head and second_head. Make sure that loop preheader has only
1985 : one successor. */
1986 38644 : split_edge (loop_preheader_edge (loop));
1987 38644 : split_edge (loop_preheader_edge (nloop));
1988 :
1989 38644 : return nloop;
1990 : }
|