Line data Source code
1 : /* Loop manipulation code for GNU compiler.
2 : Copyright (C) 2002-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "gimple.h"
27 : #include "cfghooks.h"
28 : #include "cfganal.h"
29 : #include "cfgloop.h"
30 : #include "gimple-iterator.h"
31 : #include "gimplify-me.h"
32 : #include "tree-ssa-loop-manip.h"
33 : #include "dumpfile.h"
34 : #include "sreal.h"
35 : #include "tree-cfg.h"
36 : #include "tree-pass.h"
37 : #include "hierarchical_discriminator.h"
38 :
39 : static void copy_loops_to (class loop **, int,
40 : class loop *);
41 : static void loop_redirect_edge (edge, basic_block);
42 : static void remove_bbs (basic_block *, int);
43 : static bool rpe_enum_p (const_basic_block, const void *);
44 : static int find_path (edge, basic_block **);
45 : static void fix_loop_placements (class loop *, bool *, bitmap);
46 : static bool fix_bb_placement (basic_block);
47 : static void fix_bb_placements (basic_block, bool *, bitmap);
48 :
49 : /* Checks whether basic block BB is dominated by DATA. */
50 : static bool
51 443732 : rpe_enum_p (const_basic_block bb, const void *data)
52 : {
53 443732 : return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
54 : }
55 :
56 : /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
57 :
58 : static void
59 443734 : remove_bbs (basic_block *bbs, int nbbs)
60 : {
61 443734 : int i;
62 :
63 887468 : for (i = 0; i < nbbs; i++)
64 443734 : delete_basic_block (bbs[i]);
65 443734 : }
66 :
67 : /* Find path -- i.e. the basic blocks dominated by edge E and put them
68 : into array BBS, that will be allocated large enough to contain them.
69 : E->dest must have exactly one predecessor for this to work (it is
70 : easy to achieve and we do not put it here because we do not want to
71 : alter anything by this function). The number of basic blocks in the
72 : path is returned. */
73 : static int
74 443734 : find_path (edge e, basic_block **bbs)
75 : {
76 443734 : gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
77 :
78 : /* Find bbs in the path. */
79 443734 : *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
80 443734 : return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
81 443734 : n_basic_blocks_for_fn (cfun), e->dest);
82 : }
83 :
84 : /* Fix placement of basic block BB inside loop hierarchy --
85 : Let L be a loop to that BB belongs. Then every successor of BB must either
86 : 1) belong to some superloop of loop L, or
87 : 2) be a header of loop K such that K->outer is superloop of L
88 : Returns true if we had to move BB into other loop to enforce this condition,
89 : false if the placement of BB was already correct (provided that placements
90 : of its successors are correct). */
91 : static bool
92 254084 : fix_bb_placement (basic_block bb)
93 : {
94 254084 : edge e;
95 254084 : edge_iterator ei;
96 254084 : class loop *loop = current_loops->tree_root, *act;
97 :
98 522292 : FOR_EACH_EDGE (e, ei, bb->succs)
99 : {
100 268208 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
101 0 : continue;
102 :
103 268208 : act = e->dest->loop_father;
104 268208 : if (act->header == e->dest)
105 317 : act = loop_outer (act);
106 :
107 268208 : if (flow_loop_nested_p (loop, act))
108 268208 : loop = act;
109 : }
110 :
111 254084 : if (loop == bb->loop_father)
112 : return false;
113 :
114 55737 : remove_bb_from_loops (bb);
115 55737 : add_bb_to_loop (bb, loop);
116 :
117 55737 : return true;
118 : }
119 :
120 : /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
121 : of LOOP to that leads at least one exit edge of LOOP, and set it
122 : as the immediate superloop of LOOP. Return true if the immediate superloop
123 : of LOOP changed.
124 :
125 : IRRED_INVALIDATED is set to true if a change in the loop structures might
126 : invalidate the information about irreducible regions. */
127 :
128 : static bool
129 249617 : fix_loop_placement (class loop *loop, bool *irred_invalidated,
130 : bitmap loop_closed_ssa_invalidated)
131 : {
132 249617 : unsigned i;
133 249617 : edge e;
134 249617 : auto_vec<edge> exits = get_loop_exit_edges (loop);
135 249617 : class loop *father = current_loops->tree_root, *act;
136 249617 : bool ret = false;
137 :
138 1634013 : FOR_EACH_VEC_ELT (exits, i, e)
139 : {
140 1384396 : act = find_common_loop (loop, e->dest->loop_father);
141 1384396 : if (flow_loop_nested_p (father, act))
142 85194 : father = act;
143 : }
144 :
145 249617 : if (father != loop_outer (loop))
146 : {
147 909 : for (act = loop_outer (loop); act != father; act = loop_outer (act))
148 592 : act->num_nodes -= loop->num_nodes;
149 317 : flow_loop_tree_node_remove (loop);
150 317 : flow_loop_tree_node_add (father, loop);
151 :
152 : /* The exit edges of LOOP no longer exits its original immediate
153 : superloops; remove them from the appropriate exit lists. */
154 982 : FOR_EACH_VEC_ELT (exits, i, e)
155 : {
156 : /* We may need to recompute irreducible loops. */
157 348 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
158 0 : *irred_invalidated = true;
159 348 : rescan_loop_exit (e, false, false);
160 : }
161 : /* Any LC SSA PHIs on e->dest might now be on the wrong edge
162 : if their defs were in a former outer loop. Also all uses
163 : in the original inner loop of defs in the outer loop(s) now
164 : require LC PHI nodes. */
165 317 : if (loop_closed_ssa_invalidated)
166 : {
167 12 : basic_block *bbs = get_loop_body (loop);
168 91 : for (unsigned i = 0; i < loop->num_nodes; ++i)
169 79 : bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
170 12 : free (bbs);
171 : }
172 :
173 : ret = true;
174 : }
175 :
176 249617 : return ret;
177 249617 : }
178 :
179 : /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
180 : enforce condition stated in description of fix_bb_placement. We
181 : start from basic block FROM that had some of its successors removed, so that
182 : his placement no longer has to be correct, and iteratively fix placement of
183 : its predecessors that may change if placement of FROM changed. Also fix
184 : placement of subloops of FROM->loop_father, that might also be altered due
185 : to this change; the condition for them is similar, except that instead of
186 : successors we consider edges coming out of the loops.
187 :
188 : If the changes may invalidate the information about irreducible regions,
189 : IRRED_INVALIDATED is set to true.
190 :
191 : If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
192 : changed loop_father are collected there. */
193 :
194 : static void
195 581662 : fix_bb_placements (basic_block from,
196 : bool *irred_invalidated,
197 : bitmap loop_closed_ssa_invalidated)
198 : {
199 581662 : basic_block *queue, *qtop, *qbeg, *qend;
200 581662 : class loop *base_loop, *target_loop;
201 581662 : edge e;
202 :
203 : /* We pass through blocks back-reachable from FROM, testing whether some
204 : of their successors moved to outer loop. It may be necessary to
205 : iterate several times, but it is finite, as we stop unless we move
206 : the basic block up the loop structure. The whole story is a bit
207 : more complicated due to presence of subloops, those are moved using
208 : fix_loop_placement. */
209 :
210 581662 : base_loop = from->loop_father;
211 : /* If we are already in the outermost loop, the basic blocks cannot be moved
212 : outside of it. If FROM is the header of the base loop, it cannot be moved
213 : outside of it, either. In both cases, we can end now. */
214 581662 : if (base_loop == current_loops->tree_root
215 240298 : || from == base_loop->header)
216 387763 : return;
217 :
218 193899 : auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
219 193899 : bitmap_clear (in_queue);
220 193899 : bitmap_set_bit (in_queue, from->index);
221 : /* Prevent us from going out of the base_loop. */
222 193899 : bitmap_set_bit (in_queue, base_loop->header->index);
223 :
224 193899 : queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
225 193899 : qtop = queue + base_loop->num_nodes + 1;
226 193899 : qbeg = queue;
227 193899 : qend = queue + 1;
228 193899 : *qbeg = from;
229 :
230 448617 : while (qbeg != qend)
231 : {
232 254718 : edge_iterator ei;
233 254718 : from = *qbeg;
234 254718 : qbeg++;
235 254718 : if (qbeg == qtop)
236 0 : qbeg = queue;
237 254718 : bitmap_clear_bit (in_queue, from->index);
238 :
239 254718 : if (from->loop_father->header == from)
240 : {
241 : /* Subloop header, maybe move the loop upward. */
242 634 : if (!fix_loop_placement (from->loop_father, irred_invalidated,
243 : loop_closed_ssa_invalidated))
244 198670 : continue;
245 311 : target_loop = loop_outer (from->loop_father);
246 : }
247 : else
248 : {
249 : /* Ordinary basic block. */
250 254084 : if (!fix_bb_placement (from))
251 198347 : continue;
252 55737 : target_loop = from->loop_father;
253 55737 : if (loop_closed_ssa_invalidated)
254 20618 : bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
255 : }
256 :
257 85895 : FOR_EACH_EDGE (e, ei, from->succs)
258 : {
259 29847 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
260 1 : *irred_invalidated = true;
261 : }
262 :
263 : /* Something has changed, insert predecessors into queue. */
264 117787 : FOR_EACH_EDGE (e, ei, from->preds)
265 : {
266 61739 : basic_block pred = e->src;
267 61739 : class loop *nca;
268 :
269 61739 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
270 0 : *irred_invalidated = true;
271 :
272 61739 : if (bitmap_bit_p (in_queue, pred->index))
273 920 : continue;
274 :
275 : /* If it is subloop, then it either was not moved, or
276 : the path up the loop tree from base_loop do not contain
277 : it. */
278 60819 : nca = find_common_loop (pred->loop_father, base_loop);
279 60819 : if (pred->loop_father != base_loop
280 634 : && (nca == base_loop
281 311 : || nca != pred->loop_father))
282 634 : pred = pred->loop_father->header;
283 60185 : else if (!flow_loop_nested_p (target_loop, pred->loop_father))
284 : {
285 : /* If PRED is already higher in the loop hierarchy than the
286 : TARGET_LOOP to that we moved FROM, the change of the position
287 : of FROM does not affect the position of PRED, so there is no
288 : point in processing it. */
289 0 : continue;
290 : }
291 :
292 60819 : if (bitmap_bit_p (in_queue, pred->index))
293 0 : continue;
294 :
295 : /* Schedule the basic block. */
296 60819 : *qend = pred;
297 60819 : qend++;
298 60819 : if (qend == qtop)
299 0 : qend = queue;
300 60819 : bitmap_set_bit (in_queue, pred->index);
301 : }
302 : }
303 193899 : free (queue);
304 193899 : }
305 :
306 : /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
307 : and update loop structures and dominators. Return true if we were able
308 : to remove the path, false otherwise (and nothing is affected then). */
309 : bool
310 443734 : remove_path (edge e, bool *irred_invalidated,
311 : bitmap loop_closed_ssa_invalidated)
312 : {
313 443734 : edge ae;
314 443734 : basic_block *rem_bbs, *bord_bbs, from, bb;
315 443734 : vec<basic_block> dom_bbs;
316 443734 : int i, nrem, n_bord_bbs;
317 443734 : bool local_irred_invalidated = false;
318 443734 : edge_iterator ei;
319 443734 : class loop *l, *f;
320 :
321 443734 : if (! irred_invalidated)
322 150791 : irred_invalidated = &local_irred_invalidated;
323 :
324 443734 : if (!can_remove_branch_p (e))
325 : return false;
326 :
327 : /* Keep track of whether we need to update information about irreducible
328 : regions. This is the case if the removed area is a part of the
329 : irreducible region, or if the set of basic blocks that belong to a loop
330 : that is inside an irreducible region is changed, or if such a loop is
331 : removed. */
332 443734 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
333 365 : *irred_invalidated = true;
334 :
335 : /* We need to check whether basic blocks are dominated by the edge
336 : e, but we only have basic block dominators. This is easy to
337 : fix -- when e->dest has exactly one predecessor, this corresponds
338 : to blocks dominated by e->dest, if not, split the edge. */
339 443734 : if (!single_pred_p (e->dest))
340 443732 : e = single_pred_edge (split_edge (e));
341 :
342 : /* It may happen that by removing path we remove one or more loops
343 : we belong to. In this case first unloop the loops, then proceed
344 : normally. We may assume that e->dest is not a header of any loop,
345 : as it now has exactly one predecessor. */
346 755033 : for (l = e->src->loop_father; loop_outer (l); l = f)
347 : {
348 311299 : f = loop_outer (l);
349 311299 : if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
350 2 : unloop (l, irred_invalidated, loop_closed_ssa_invalidated);
351 : }
352 :
353 : /* Identify the path. */
354 443734 : nrem = find_path (e, &rem_bbs);
355 :
356 443734 : n_bord_bbs = 0;
357 443734 : bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
358 443734 : auto_sbitmap seen (last_basic_block_for_fn (cfun));
359 443734 : bitmap_clear (seen);
360 :
361 : /* Find "border" hexes -- i.e. those with predecessor in removed path. */
362 1331202 : for (i = 0; i < nrem; i++)
363 443734 : bitmap_set_bit (seen, rem_bbs[i]->index);
364 443734 : if (!*irred_invalidated)
365 1329967 : FOR_EACH_EDGE (ae, ei, e->src->succs)
366 443369 : if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
367 443369 : && !bitmap_bit_p (seen, ae->dest->index)
368 1330082 : && ae->flags & EDGE_IRREDUCIBLE_LOOP)
369 : {
370 115 : *irred_invalidated = true;
371 115 : break;
372 : }
373 :
374 887468 : for (i = 0; i < nrem; i++)
375 : {
376 887466 : FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
377 443732 : if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
378 443732 : && !bitmap_bit_p (seen, ae->dest->index))
379 : {
380 443732 : bitmap_set_bit (seen, ae->dest->index);
381 443732 : bord_bbs[n_bord_bbs++] = ae->dest;
382 :
383 443732 : if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
384 365 : *irred_invalidated = true;
385 : }
386 : }
387 :
388 : /* Remove the path. */
389 443734 : from = e->src;
390 443734 : remove_branch (e);
391 443734 : dom_bbs.create (0);
392 :
393 : /* Cancel loops contained in the path. */
394 887468 : for (i = 0; i < nrem; i++)
395 443734 : if (rem_bbs[i]->loop_father->header == rem_bbs[i])
396 0 : cancel_loop_tree (rem_bbs[i]->loop_father);
397 :
398 443734 : remove_bbs (rem_bbs, nrem);
399 443734 : free (rem_bbs);
400 :
401 : /* Find blocks whose dominators may be affected. */
402 443734 : bitmap_clear (seen);
403 887466 : for (i = 0; i < n_bord_bbs; i++)
404 : {
405 443732 : basic_block ldom;
406 :
407 443732 : bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
408 443732 : if (bitmap_bit_p (seen, bb->index))
409 0 : continue;
410 443732 : bitmap_set_bit (seen, bb->index);
411 :
412 443732 : for (ldom = first_dom_son (CDI_DOMINATORS, bb);
413 1457007 : ldom;
414 1013275 : ldom = next_dom_son (CDI_DOMINATORS, ldom))
415 1013275 : if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
416 854007 : dom_bbs.safe_push (ldom);
417 : }
418 :
419 : /* Recount dominators. */
420 443734 : iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
421 443734 : dom_bbs.release ();
422 443734 : free (bord_bbs);
423 :
424 : /* Fix placements of basic blocks inside loops and the placement of
425 : loops in the loop tree. */
426 443734 : fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated);
427 443734 : fix_loop_placements (from->loop_father, irred_invalidated,
428 : loop_closed_ssa_invalidated);
429 :
430 443734 : if (local_irred_invalidated
431 443734 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
432 448 : mark_irreducible_loops ();
433 :
434 443734 : return true;
435 443734 : }
436 :
437 : /* Creates place for a new LOOP in loops structure of FN. */
438 :
439 : void
440 794361 : place_new_loop (struct function *fn, class loop *loop)
441 : {
442 794361 : loop->num = number_of_loops (fn);
443 794361 : vec_safe_push (loops_for_fn (fn)->larray, loop);
444 794361 : }
445 :
446 : /* Given LOOP structure with filled header and latch, find the body of the
447 : corresponding loop and add it to loops tree. Insert the LOOP as a son of
448 : outer. */
449 :
450 : void
451 116903 : add_loop (class loop *loop, class loop *outer)
452 : {
453 116903 : basic_block *bbs;
454 116903 : int i, n;
455 116903 : class loop *subloop;
456 116903 : edge e;
457 116903 : edge_iterator ei;
458 :
459 : /* Add it to loop structure. */
460 116903 : place_new_loop (cfun, loop);
461 116903 : flow_loop_tree_node_add (outer, loop);
462 :
463 : /* Find its nodes. */
464 116903 : bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
465 116903 : n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
466 :
467 835103 : for (i = 0; i < n; i++)
468 : {
469 718200 : if (bbs[i]->loop_father == outer)
470 : {
471 494387 : remove_bb_from_loops (bbs[i]);
472 494387 : add_bb_to_loop (bbs[i], loop);
473 494387 : continue;
474 : }
475 :
476 223813 : loop->num_nodes++;
477 :
478 : /* If we find a direct subloop of OUTER, move it to LOOP. */
479 223813 : subloop = bbs[i]->loop_father;
480 223813 : if (loop_outer (subloop) == outer
481 223813 : && subloop->header == bbs[i])
482 : {
483 23618 : flow_loop_tree_node_remove (subloop);
484 23618 : flow_loop_tree_node_add (loop, subloop);
485 : }
486 : }
487 :
488 : /* Update the information about loop exit edges. */
489 835103 : for (i = 0; i < n; i++)
490 : {
491 1762363 : FOR_EACH_EDGE (e, ei, bbs[i]->succs)
492 : {
493 1044163 : rescan_loop_exit (e, false, false);
494 : }
495 : }
496 :
497 116903 : free (bbs);
498 116903 : }
499 :
500 : /* Scale profile of loop by P. */
501 :
502 : void
503 264108 : scale_loop_frequencies (class loop *loop, profile_probability p)
504 : {
505 264108 : basic_block *bbs;
506 :
507 264108 : bbs = get_loop_body (loop);
508 264108 : scale_bbs_frequencies (bbs, loop->num_nodes, p);
509 264108 : free (bbs);
510 264108 : }
511 :
512 : /* Scales the frequencies of all basic blocks in LOOP that are strictly
513 : dominated by BB by NUM/DEN. */
514 :
515 : void
516 37096 : scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
517 : profile_count num, profile_count den)
518 : {
519 37096 : basic_block son;
520 :
521 37096 : if (!den.nonzero_p () && !(num == profile_count::zero ()))
522 0 : return;
523 37096 : auto_vec <basic_block, 8> worklist;
524 37096 : worklist.safe_push (bb);
525 :
526 302385 : while (!worklist.is_empty ())
527 191097 : for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ());
528 381474 : son;
529 190377 : son = next_dom_son (CDI_DOMINATORS, son))
530 : {
531 190377 : if (!flow_bb_inside_loop_p (loop, son))
532 36376 : continue;
533 154001 : son->count = son->count.apply_scale (num, den);
534 154001 : worklist.safe_push (son);
535 : }
536 37096 : }
537 :
538 : /* Return exit that suitable for update when loop iterations
539 : changed. */
540 :
541 : static edge
542 109551 : loop_exit_for_scaling (class loop *loop)
543 : {
544 109551 : edge exit_edge = single_exit (loop);
545 109551 : if (!exit_edge)
546 : {
547 8787 : auto_vec<edge> exits = get_loop_exit_edges (loop);
548 8787 : exit_edge = single_likely_exit (loop, exits);
549 8787 : }
550 109551 : return exit_edge;
551 : }
552 :
553 : /* Assume that loop's entry count and profile up to a given EXIT_EDGE is
554 : consistent. Update exit probability so loop exists with PROFILE_COUNT
555 : and rescale profile of basic blocks inside loop dominated by EXIT_EDGE->src.
556 :
557 : This is useful after number of iteraitons of loop has changed.
558 : If EXIT_EDGE is NULL, the function will try to identify suitable exit.
559 : If DESIRED_COUNT is NULL, loop entry count will be used.
560 : In consistent profile usually loop exists as many times as it is entred.
561 :
562 : Return updated exit if successfull and NULL otherwise. */
563 :
564 : edge
565 139539 : update_loop_exit_probability_scale_dom_bbs (class loop *loop,
566 : edge exit_edge,
567 : profile_count desired_count)
568 : {
569 139539 : if (!exit_edge)
570 3055 : exit_edge = loop_exit_for_scaling (loop);
571 3055 : if (!exit_edge)
572 : {
573 2291 : if (dump_file && (dump_flags & TDF_DETAILS))
574 : {
575 266 : fprintf (dump_file, ";; Not updating exit probability of loop %i;"
576 : " it has no single exit\n",
577 : loop->num);
578 : }
579 2291 : return NULL;
580 : }
581 : /* If exit is inside another loop, adjusting its probability will also
582 : adjust number of the inner loop iterations. Just do noting for now. */
583 137248 : if (!just_once_each_iteration_p (loop, exit_edge->src))
584 : {
585 6 : if (dump_file && (dump_flags & TDF_DETAILS))
586 : {
587 0 : fprintf (dump_file, ";; Not updating exit probability of loop %i;"
588 : " exit is inside inner loop\n",
589 : loop->num);
590 : }
591 6 : return NULL;
592 : }
593 : /* Normal loops exit as many times as they are entered. */
594 137242 : if (!desired_count.initialized_p ())
595 1580 : desired_count = loop_count_in (loop);
596 : /* See if everything is already perfect. */
597 137242 : if (desired_count == exit_edge->count ())
598 6406 : return exit_edge;
599 130836 : profile_count old_exit_count = exit_edge->count ();
600 : /* See if update is possible.
601 : Avoid turning probability to 0 or 1 just trying to reach impossible
602 : value.
603 :
604 : Natural profile update after some loop will happily scale header count to
605 : be less than count of entering the loop. This is bad idea and they should
606 : special case maybe_flat_loop_profile.
607 :
608 : It may also happen that the source basic block of the exit edge is
609 : inside in-loop condition:
610 :
611 : +-> header
612 : | |
613 : | B1
614 : | / \
615 : | | B2--exit_edge-->
616 : | \ /
617 : | B3
618 : +__/
619 :
620 : If B2 count is smaller than desired exit edge count
621 : the profile was inconsistent with the newly discovered upper bound.
622 : Probablity of edge B1->B2 is too low. We do not attempt to fix
623 : that (as it is hard in general). */
624 130836 : if (desired_count > exit_edge->src->count
625 130836 : && exit_edge->src->count.differs_from_p (desired_count))
626 : {
627 367 : if (dump_file)
628 : {
629 7 : fprintf (dump_file, ";; Source bb of loop %i has count ",
630 : loop->num);
631 7 : exit_edge->src->count.dump (dump_file, cfun);
632 7 : fprintf (dump_file,
633 : " which is smaller then desired count of exitting loop ");
634 7 : desired_count.dump (dump_file, cfun);
635 7 : fprintf (dump_file, ". Profile update is impossible.\n");
636 : }
637 : /* Drop quality of probability since we know it likely
638 : bad. */
639 367 : exit_edge->probability = exit_edge->probability.guessed ();
640 367 : return NULL;
641 : }
642 130469 : if (!exit_edge->src->count.nonzero_p ())
643 : {
644 1 : if (dump_file)
645 : {
646 0 : fprintf (dump_file, ";; Not updating exit edge probability"
647 : " in loop %i since profile is zero ",
648 : loop->num);
649 : }
650 1 : return NULL;
651 : }
652 130468 : set_edge_probability_and_rescale_others
653 130468 : (exit_edge, desired_count.probability_in (exit_edge->src->count));
654 : /* Rescale the remaining edge probabilities and see if there is only
655 : one. */
656 130468 : edge other_edge = NULL;
657 130468 : bool found = false;
658 130468 : edge e;
659 130468 : edge_iterator ei;
660 391405 : FOR_EACH_EDGE (e, ei, exit_edge->src->succs)
661 260948 : if (!(e->flags & EDGE_FAKE)
662 260948 : && !loop_exit_edge_p (loop, e))
663 : {
664 130479 : if (found)
665 : {
666 : other_edge = NULL;
667 : break;
668 : }
669 : other_edge = e;
670 : found = true;
671 : }
672 : /* If there is only loop latch after other edge,
673 : update its profile. This is tiny bit more precise
674 : than scaling. */
675 130468 : if (other_edge && other_edge->dest == loop->latch)
676 : {
677 94157 : if (single_pred_p (loop->latch))
678 94157 : loop->latch->count = loop->latch->count
679 94157 : + old_exit_count - exit_edge->count ();
680 : }
681 : else
682 : /* If there are multiple blocks, just scale. */
683 72622 : scale_dominated_blocks_in_loop (loop, exit_edge->src,
684 72622 : exit_edge->src->count - exit_edge->count (),
685 36311 : exit_edge->src->count - old_exit_count);
686 : return exit_edge;
687 : }
688 :
689 : /* Scale profile in LOOP by P.
690 : If ITERATION_BOUND is not -1, scale even further if loop is predicted
691 : to iterate too many times.
692 : Before caling this function, preheader block profile should be already
693 : scaled to final count. This is necessary because loop iterations are
694 : determined by comparing header edge count to latch ege count and thus
695 : they need to be scaled synchronously. */
696 :
697 : void
698 286361 : scale_loop_profile (class loop *loop, profile_probability p,
699 : gcov_type iteration_bound)
700 : {
701 286361 : if (!(p == profile_probability::always ()))
702 : {
703 79650 : if (dump_file && (dump_flags & TDF_DETAILS))
704 : {
705 11036 : fprintf (dump_file, ";; Scaling loop %i with scale ",
706 : loop->num);
707 11036 : p.dump (dump_file);
708 11036 : fprintf (dump_file, "\n");
709 : }
710 :
711 : /* Scale the probabilities. */
712 79650 : scale_loop_frequencies (loop, p);
713 : }
714 :
715 286361 : if (iteration_bound == -1)
716 179865 : return;
717 :
718 233709 : sreal iterations;
719 233709 : if (!expected_loop_iterations_by_profile (loop, &iterations))
720 : return;
721 :
722 233208 : if (dump_file && (dump_flags & TDF_DETAILS))
723 : {
724 15497 : fprintf (dump_file,
725 : ";; Guessed iterations of loop %i is %f. New upper bound %i.\n",
726 : loop->num,
727 : iterations.to_double (),
728 : (int)iteration_bound);
729 : }
730 :
731 : /* See if loop is predicted to iterate too many times. */
732 233208 : if (iterations <= (sreal)iteration_bound)
733 : return;
734 :
735 106496 : profile_count count_in = loop_count_in (loop);
736 :
737 : /* Now scale the loop body so header count is
738 : count_in * (iteration_bound + 1) */
739 106496 : profile_probability scale_prob
740 106496 : = (count_in * (iteration_bound + 1)).probability_in (loop->header->count);
741 106496 : if (dump_file && (dump_flags & TDF_DETAILS))
742 : {
743 10648 : fprintf (dump_file, ";; Scaling loop %i with scale ",
744 : loop->num);
745 10648 : scale_prob.dump (dump_file);
746 10648 : fprintf (dump_file, " to reach upper bound %i\n",
747 : (int)iteration_bound);
748 : }
749 : /* Finally attempt to fix exit edge probability. */
750 106496 : edge exit_edge = loop_exit_for_scaling (loop);
751 :
752 : /* In a consistent profile unadjusted_exit_count should be same as count_in,
753 : however to preserve as much of the original info, avoid recomputing
754 : it. */
755 106496 : profile_count unadjusted_exit_count = profile_count::uninitialized ();
756 106496 : if (exit_edge)
757 104205 : unadjusted_exit_count = exit_edge->count ();
758 106496 : scale_loop_frequencies (loop, scale_prob);
759 106496 : update_loop_exit_probability_scale_dom_bbs (loop, exit_edge,
760 : unadjusted_exit_count);
761 : }
762 :
763 : /* Recompute dominance information for basic blocks outside LOOP. */
764 :
765 : static void
766 77786 : update_dominators_in_loop (class loop *loop)
767 : {
768 77786 : vec<basic_block> dom_bbs = vNULL;
769 77786 : basic_block *body;
770 77786 : unsigned i;
771 :
772 77786 : auto_sbitmap seen (last_basic_block_for_fn (cfun));
773 77786 : bitmap_clear (seen);
774 77786 : body = get_loop_body (loop);
775 :
776 553032 : for (i = 0; i < loop->num_nodes; i++)
777 397460 : bitmap_set_bit (seen, body[i]->index);
778 :
779 475246 : for (i = 0; i < loop->num_nodes; i++)
780 : {
781 397460 : basic_block ldom;
782 :
783 397460 : for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
784 749310 : ldom;
785 351850 : ldom = next_dom_son (CDI_DOMINATORS, ldom))
786 351850 : if (!bitmap_bit_p (seen, ldom->index))
787 : {
788 32176 : bitmap_set_bit (seen, ldom->index);
789 32176 : dom_bbs.safe_push (ldom);
790 : }
791 : }
792 :
793 77786 : iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
794 77786 : free (body);
795 77786 : dom_bbs.release ();
796 77786 : }
797 :
798 : /* Creates an if region as shown above. CONDITION is used to create
799 : the test for the if.
800 :
801 : |
802 : | ------------- -------------
803 : | | pred_bb | | pred_bb |
804 : | ------------- -------------
805 : | | |
806 : | | | ENTRY_EDGE
807 : | | ENTRY_EDGE V
808 : | | ====> -------------
809 : | | | cond_bb |
810 : | | | CONDITION |
811 : | | -------------
812 : | V / \
813 : | ------------- e_false / \ e_true
814 : | | succ_bb | V V
815 : | ------------- ----------- -----------
816 : | | false_bb | | true_bb |
817 : | ----------- -----------
818 : | \ /
819 : | \ /
820 : | V V
821 : | -------------
822 : | | join_bb |
823 : | -------------
824 : | | exit_edge (result)
825 : | V
826 : | -----------
827 : | | succ_bb |
828 : | -----------
829 : |
830 : */
831 :
832 : edge
833 203 : create_empty_if_region_on_edge (edge entry_edge, tree condition)
834 : {
835 :
836 203 : basic_block cond_bb, true_bb, false_bb, join_bb;
837 203 : edge e_true, e_false, exit_edge;
838 203 : gcond *cond_stmt;
839 203 : tree simple_cond;
840 203 : gimple_stmt_iterator gsi;
841 :
842 203 : cond_bb = split_edge (entry_edge);
843 :
844 : /* Insert condition in cond_bb. */
845 203 : gsi = gsi_last_bb (cond_bb);
846 203 : simple_cond =
847 203 : force_gimple_operand_gsi (&gsi, condition, true, NULL,
848 : false, GSI_NEW_STMT);
849 203 : cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
850 203 : gsi = gsi_last_bb (cond_bb);
851 203 : gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
852 :
853 203 : join_bb = split_edge (single_succ_edge (cond_bb));
854 :
855 203 : e_true = single_succ_edge (cond_bb);
856 203 : true_bb = split_edge (e_true);
857 :
858 203 : e_false = make_edge (cond_bb, join_bb, 0);
859 203 : false_bb = split_edge (e_false);
860 :
861 203 : e_true->flags &= ~EDGE_FALLTHRU;
862 203 : e_true->flags |= EDGE_TRUE_VALUE;
863 203 : e_false->flags &= ~EDGE_FALLTHRU;
864 203 : e_false->flags |= EDGE_FALSE_VALUE;
865 :
866 203 : set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
867 203 : set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
868 203 : set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
869 203 : set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
870 :
871 203 : exit_edge = single_succ_edge (join_bb);
872 :
873 203 : if (single_pred_p (exit_edge->dest))
874 203 : set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
875 :
876 203 : return exit_edge;
877 : }
878 :
879 : /* create_empty_loop_on_edge
880 : |
881 : | - pred_bb - ------ pred_bb ------
882 : | | | | iv0 = initial_value |
883 : | -----|----- ---------|-----------
884 : | | ______ | entry_edge
885 : | | entry_edge / | |
886 : | | ====> | -V---V- loop_header -------------
887 : | V | | iv_before = phi (iv0, iv_after) |
888 : | - succ_bb - | ---|-----------------------------
889 : | | | | |
890 : | ----------- | ---V--- loop_body ---------------
891 : | | | iv_after = iv_before + stride |
892 : | | | if (iv_before < upper_bound) |
893 : | | ---|--------------\--------------
894 : | | | \ exit_e
895 : | | V \
896 : | | - loop_latch - V- succ_bb -
897 : | | | | | |
898 : | | /------------- -----------
899 : | \ ___ /
900 :
901 : Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
902 : that is used before the increment of IV. IV_BEFORE should be used for
903 : adding code to the body that uses the IV. OUTER is the outer loop in
904 : which the new loop should be inserted.
905 :
906 : Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
907 : inserted on the loop entry edge. This implies that this function
908 : should be used only when the UPPER_BOUND expression is a loop
909 : invariant. */
910 :
911 : class loop *
912 500 : create_empty_loop_on_edge (edge entry_edge,
913 : tree initial_value,
914 : tree stride, tree upper_bound,
915 : tree iv,
916 : tree *iv_before,
917 : tree *iv_after,
918 : class loop *outer)
919 : {
920 500 : basic_block loop_header, loop_latch, succ_bb, pred_bb;
921 500 : class loop *loop;
922 500 : gimple_stmt_iterator gsi;
923 500 : gimple_seq stmts;
924 500 : gcond *cond_expr;
925 500 : tree exit_test;
926 500 : edge exit_e;
927 :
928 500 : gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
929 :
930 : /* Create header, latch and wire up the loop. */
931 500 : pred_bb = entry_edge->src;
932 500 : loop_header = split_edge (entry_edge);
933 500 : loop_latch = split_edge (single_succ_edge (loop_header));
934 500 : succ_bb = single_succ (loop_latch);
935 500 : make_edge (loop_header, succ_bb, 0);
936 500 : redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
937 :
938 : /* Set immediate dominator information. */
939 500 : set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
940 500 : set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
941 500 : set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
942 :
943 : /* Initialize a loop structure and put it in a loop hierarchy. */
944 500 : loop = alloc_loop ();
945 500 : loop->header = loop_header;
946 500 : loop->latch = loop_latch;
947 500 : add_loop (loop, outer);
948 :
949 : /* TODO: Fix counts. */
950 500 : scale_loop_frequencies (loop, profile_probability::even ());
951 :
952 : /* Update dominators. */
953 500 : update_dominators_in_loop (loop);
954 :
955 : /* Modify edge flags. */
956 500 : exit_e = single_exit (loop);
957 500 : exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
958 500 : single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
959 :
960 : /* Construct IV code in loop. */
961 500 : initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
962 500 : if (stmts)
963 : {
964 17 : gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
965 17 : gsi_commit_edge_inserts ();
966 : }
967 :
968 500 : upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
969 500 : if (stmts)
970 : {
971 161 : gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
972 161 : gsi_commit_edge_inserts ();
973 : }
974 :
975 500 : gsi = gsi_last_bb (loop_header);
976 500 : create_iv (initial_value, PLUS_EXPR, stride, iv, loop, &gsi, false,
977 : iv_before, iv_after);
978 :
979 : /* Insert loop exit condition. */
980 500 : cond_expr = gimple_build_cond
981 500 : (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
982 :
983 500 : exit_test = gimple_cond_lhs (cond_expr);
984 500 : exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
985 : false, GSI_NEW_STMT);
986 500 : gimple_cond_set_lhs (cond_expr, exit_test);
987 500 : gsi = gsi_last_bb (exit_e->src);
988 500 : gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
989 :
990 500 : split_block_after_labels (loop_header);
991 :
992 500 : return loop;
993 : }
994 :
995 : /* Remove the latch edge of a LOOP and update loops to indicate that
996 : the LOOP was removed. After this function, original loop latch will
997 : have no successor, which caller is expected to fix somehow.
998 :
999 : If this may cause the information about irreducible regions to become
1000 : invalid, IRRED_INVALIDATED is set to true.
1001 :
1002 : LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
1003 : basic blocks that had non-trivial update on their loop_father.*/
1004 :
1005 : void
1006 137922 : unloop (class loop *loop, bool *irred_invalidated,
1007 : bitmap loop_closed_ssa_invalidated)
1008 : {
1009 137922 : basic_block *body;
1010 137922 : class loop *ploop;
1011 137922 : unsigned i, n;
1012 137922 : basic_block latch = loop->latch;
1013 137922 : bool dummy = false;
1014 :
1015 137922 : if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
1016 24 : *irred_invalidated = true;
1017 :
1018 : /* This is relatively straightforward. The dominators are unchanged, as
1019 : loop header dominates loop latch, so the only thing we have to care of
1020 : is the placement of loops and basic blocks inside the loop tree. We
1021 : move them all to the loop->outer, and then let fix_bb_placements do
1022 : its work. */
1023 :
1024 137922 : body = get_loop_body (loop);
1025 137922 : n = loop->num_nodes;
1026 563724 : for (i = 0; i < n; i++)
1027 425802 : if (body[i]->loop_father == loop)
1028 : {
1029 407495 : remove_bb_from_loops (body[i]);
1030 407495 : add_bb_to_loop (body[i], loop_outer (loop));
1031 : }
1032 137922 : free (body);
1033 :
1034 140365 : while (loop->inner)
1035 : {
1036 2443 : ploop = loop->inner;
1037 2443 : flow_loop_tree_node_remove (ploop);
1038 2443 : flow_loop_tree_node_add (loop_outer (loop), ploop);
1039 : }
1040 :
1041 : /* Remove the loop and free its data. */
1042 137922 : delete_loop (loop);
1043 :
1044 137922 : remove_edge (single_succ_edge (latch));
1045 :
1046 : /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
1047 : there is an irreducible region inside the cancelled loop, the flags will
1048 : be still correct. */
1049 137922 : fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
1050 137922 : }
1051 :
1052 : /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
1053 : condition stated in description of fix_loop_placement holds for them.
1054 : It is used in case when we removed some edges coming out of LOOP, which
1055 : may cause the right placement of LOOP inside loop tree to change.
1056 :
1057 : IRRED_INVALIDATED is set to true if a change in the loop structures might
1058 : invalidate the information about irreducible regions. */
1059 :
1060 : static void
1061 443734 : fix_loop_placements (class loop *loop, bool *irred_invalidated,
1062 : bitmap loop_closed_ssa_invalidated)
1063 : {
1064 443734 : if (!loop_outer (loop))
1065 239623 : return;
1066 :
1067 204111 : auto_vec<edge> exits = get_loop_exit_edges (loop);
1068 204111 : unsigned i;
1069 204111 : edge e;
1070 :
1071 : /* We might need to only re-parent an outer loop, but as the constraint
1072 : is that we removed an exit from LOOP, we have a limit for what level
1073 : of outer loop we eventually have to re-parent to. */
1074 204111 : class loop *outermost = loop;
1075 1487048 : FOR_EACH_VEC_ELT (exits, i, e)
1076 1078826 : outermost = find_common_loop (outermost, e->dest->loop_father);
1077 :
1078 : class loop *outer;
1079 249008 : while (loop_outer (loop))
1080 : {
1081 248983 : outer = loop_outer (loop);
1082 248983 : if (fix_loop_placement (loop, irred_invalidated,
1083 : loop_closed_ssa_invalidated))
1084 : /* Changing the placement of a loop in the loop tree may alter the
1085 : validity of condition 2) of the description of fix_bb_placement
1086 : for its preheader, because the successor is the header and belongs
1087 : to the loop. So call fix_bb_placements to fix up the placement
1088 : of the preheader and (possibly) of its predecessors. */
1089 6 : fix_bb_placements (loop_preheader_edge (loop)->src,
1090 : irred_invalidated, loop_closed_ssa_invalidated);
1091 248983 : loop = outer;
1092 248983 : if (outer == outermost)
1093 : break;
1094 : }
1095 204111 : }
1096 :
1097 : /* Duplicate loop bounds and other information we store about
1098 : the loop into its duplicate. */
1099 :
1100 : void
1101 678615 : copy_loop_info (class loop *loop, class loop *target)
1102 : {
1103 678615 : gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1104 678615 : target->any_upper_bound = loop->any_upper_bound;
1105 678615 : target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1106 678615 : target->any_likely_upper_bound = loop->any_likely_upper_bound;
1107 678615 : target->nb_iterations_likely_upper_bound
1108 678615 : = loop->nb_iterations_likely_upper_bound;
1109 678615 : target->any_estimate = loop->any_estimate;
1110 678615 : target->nb_iterations_estimate = loop->nb_iterations_estimate;
1111 678615 : target->estimate_state = loop->estimate_state;
1112 678615 : target->safelen = loop->safelen;
1113 678615 : target->simdlen = loop->simdlen;
1114 678615 : target->constraints = loop->constraints;
1115 678615 : target->can_be_parallel = loop->can_be_parallel;
1116 678615 : target->warned_aggressive_loop_optimizations
1117 678615 : |= loop->warned_aggressive_loop_optimizations;
1118 678615 : target->dont_vectorize = loop->dont_vectorize;
1119 678615 : target->force_vectorize = loop->force_vectorize;
1120 678615 : target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
1121 678615 : target->finite_p = loop->finite_p;
1122 678615 : target->unroll = loop->unroll;
1123 678615 : target->owned_clique = loop->owned_clique;
1124 678615 : }
1125 :
1126 : /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1127 : created loop into loops structure. If AFTER is non-null
1128 : the new loop is added at AFTER->next, otherwise in front of TARGETs
1129 : sibling list. */
1130 : class loop *
1131 40868 : duplicate_loop (class loop *loop, class loop *target, class loop *after)
1132 : {
1133 40868 : class loop *cloop;
1134 40868 : cloop = alloc_loop ();
1135 40868 : place_new_loop (cfun, cloop);
1136 :
1137 40868 : copy_loop_info (loop, cloop);
1138 :
1139 : /* Mark the new loop as copy of LOOP. */
1140 40868 : set_loop_copy (loop, cloop);
1141 :
1142 : /* Add it to target. */
1143 40868 : flow_loop_tree_node_add (target, cloop, after);
1144 :
1145 40868 : return cloop;
1146 : }
1147 :
1148 : /* Copies structure of subloops of LOOP into TARGET loop, placing
1149 : newly created loops into loop tree at the end of TARGETs sibling
1150 : list in the original order. */
1151 : void
1152 40868 : duplicate_subloops (class loop *loop, class loop *target)
1153 : {
1154 40868 : class loop *aloop, *cloop, *tail;
1155 :
1156 40868 : for (tail = target->inner; tail && tail->next; tail = tail->next)
1157 : ;
1158 42297 : for (aloop = loop->inner; aloop; aloop = aloop->next)
1159 : {
1160 1429 : cloop = duplicate_loop (aloop, target, tail);
1161 1429 : tail = cloop;
1162 1429 : gcc_assert(!tail->next);
1163 1429 : duplicate_subloops (aloop, cloop);
1164 : }
1165 40868 : }
1166 :
1167 : /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1168 : into TARGET loop, placing newly created loops into loop tree adding
1169 : them to TARGETs sibling list at the end in order. */
1170 : static void
1171 547874 : copy_loops_to (class loop **copied_loops, int n, class loop *target)
1172 : {
1173 547874 : class loop *aloop, *tail;
1174 547874 : int i;
1175 :
1176 12544462 : for (tail = target->inner; tail && tail->next; tail = tail->next)
1177 : ;
1178 552491 : for (i = 0; i < n; i++)
1179 : {
1180 4617 : aloop = duplicate_loop (copied_loops[i], target, tail);
1181 4617 : tail = aloop;
1182 4617 : gcc_assert(!tail->next);
1183 4617 : duplicate_subloops (copied_loops[i], aloop);
1184 : }
1185 547874 : }
1186 :
1187 : /* Redirects edge E to basic block DEST. */
1188 : static void
1189 38643 : loop_redirect_edge (edge e, basic_block dest)
1190 : {
1191 0 : if (e->dest == dest)
1192 : return;
1193 :
1194 38643 : redirect_edge_and_branch_force (e, dest);
1195 : }
1196 :
1197 : /* Check whether LOOP's body can be duplicated. */
1198 : bool
1199 518205 : can_duplicate_loop_p (const class loop *loop)
1200 : {
1201 518205 : int ret;
1202 518205 : basic_block *bbs = get_loop_body (loop);
1203 :
1204 518205 : ret = can_copy_bbs_p (bbs, loop->num_nodes);
1205 518205 : free (bbs);
1206 :
1207 518205 : return ret;
1208 : }
1209 :
1210 : /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1211 : loop structure and dominators (order of inner subloops is retained).
1212 : E's destination must be LOOP header for this to work, i.e. it must be entry
1213 : or latch edge of this loop; these are unique, as the loops must have
1214 : preheaders for this function to work correctly (in case E is latch, the
1215 : function unrolls the loop, if E is entry edge, it peels the loop). Store
1216 : edges created by copying ORIG edge from copies corresponding to set bits in
1217 : WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
1218 : are numbered in order given by control flow through them) into TO_REMOVE
1219 : array. Returns false if duplication is
1220 : impossible. */
1221 :
1222 : bool
1223 267363 : duplicate_loop_body_to_header_edge (class loop *loop, edge e,
1224 : unsigned int ndupl, sbitmap wont_exit,
1225 : edge orig, vec<edge> *to_remove, int flags)
1226 : {
1227 267363 : class loop *target, *aloop;
1228 267363 : class loop **orig_loops;
1229 267363 : unsigned n_orig_loops;
1230 267363 : basic_block header = loop->header, latch = loop->latch;
1231 267363 : basic_block *new_bbs, *bbs, *first_active;
1232 267363 : basic_block new_bb, bb, first_active_latch = NULL;
1233 267363 : edge ae, latch_edge;
1234 267363 : edge spec_edges[2], new_spec_edges[2];
1235 267363 : const int SE_LATCH = 0;
1236 267363 : const int SE_ORIG = 1;
1237 267363 : unsigned i, j, n;
1238 267363 : int is_latch = (latch == e->src);
1239 267363 : profile_probability *scale_step = NULL;
1240 267363 : profile_probability scale_main = profile_probability::always ();
1241 267363 : profile_probability scale_act = profile_probability::always ();
1242 267363 : profile_count after_exit_num = profile_count::zero (),
1243 267363 : after_exit_den = profile_count::zero ();
1244 267363 : bool scale_after_exit = false;
1245 267363 : int add_irreducible_flag;
1246 267363 : basic_block place_after;
1247 267363 : bitmap bbs_to_scale = NULL;
1248 267363 : bitmap_iterator bi;
1249 :
1250 267363 : gcc_assert (e->dest == loop->header);
1251 267363 : gcc_assert (ndupl > 0);
1252 :
1253 267363 : if (orig)
1254 : {
1255 : /* Orig must be edge out of the loop. */
1256 219112 : gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1257 219112 : gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1258 : }
1259 :
1260 267363 : n = loop->num_nodes;
1261 267363 : bbs = get_loop_body_in_dom_order (loop);
1262 267363 : gcc_assert (bbs[0] == loop->header);
1263 267363 : gcc_assert (bbs[n - 1] == loop->latch);
1264 :
1265 : /* Check whether duplication is possible. */
1266 267363 : if (!can_copy_bbs_p (bbs, loop->num_nodes))
1267 : {
1268 19 : free (bbs);
1269 19 : return false;
1270 : }
1271 267344 : new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1272 :
1273 : /* In case we are doing loop peeling and the loop is in the middle of
1274 : irreducible region, the peeled copies will be inside it too. */
1275 267344 : add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1276 267344 : gcc_assert (!is_latch || !add_irreducible_flag);
1277 :
1278 : /* Find edge from latch. */
1279 267344 : latch_edge = loop_latch_edge (loop);
1280 :
1281 267344 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1282 : {
1283 : /* Calculate coefficients by that we have to scale counts
1284 : of duplicated loop bodies. */
1285 228701 : profile_count count_in = header->count;
1286 228701 : profile_count count_le = latch_edge->count ();
1287 228701 : profile_count count_out_orig = orig ? orig->count () : count_in - count_le;
1288 228701 : profile_probability prob_pass_thru = count_le.probability_in (count_in);
1289 228701 : profile_count new_count_le = count_le + count_out_orig;
1290 :
1291 219103 : if (orig && orig->probability.initialized_p ()
1292 447804 : && !(orig->probability == profile_probability::always ()))
1293 : {
1294 : /* The blocks that are dominated by a removed exit edge ORIG have
1295 : frequencies scaled by this. */
1296 218968 : if (orig->count ().initialized_p ())
1297 : {
1298 218968 : after_exit_num = orig->src->count;
1299 218968 : after_exit_den = after_exit_num - orig->count ();
1300 218968 : scale_after_exit = true;
1301 : }
1302 218968 : bbs_to_scale = BITMAP_ALLOC (NULL);
1303 782710 : for (i = 0; i < n; i++)
1304 : {
1305 563742 : if (bbs[i] != orig->src
1306 563742 : && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1307 262183 : bitmap_set_bit (bbs_to_scale, i);
1308 : }
1309 : /* Since we will scale up all basic blocks dominated by orig, exits
1310 : will become more likely; compensate for that. */
1311 218968 : if (after_exit_den.nonzero_p ())
1312 : {
1313 218346 : auto_vec<edge> exits = get_loop_exit_edges (loop);
1314 943005 : for (edge ex : exits)
1315 287967 : if (ex != orig
1316 287967 : && dominated_by_p (CDI_DOMINATORS, ex->src, orig->src))
1317 41510 : new_count_le -= ex->count ().apply_scale (after_exit_num
1318 : - after_exit_den,
1319 20755 : after_exit_den);
1320 218346 : }
1321 : }
1322 228701 : profile_probability prob_pass_wont_exit =
1323 228701 : new_count_le.probability_in (count_in);
1324 : /* If profile count is 0, the probability will be uninitialized.
1325 : We can set probability to any initialized value to avoid
1326 : precision loss. If profile is sane, all counts will be 0 anyway. */
1327 228701 : if (!count_in.nonzero_p ())
1328 : {
1329 530 : prob_pass_thru
1330 530 : = profile_probability::always ().apply_scale (1, 2);
1331 530 : prob_pass_wont_exit
1332 530 : = profile_probability::always ().apply_scale (1, 2);
1333 : }
1334 :
1335 228701 : scale_step = XNEWVEC (profile_probability, ndupl);
1336 :
1337 737932 : for (i = 1; i <= ndupl; i++)
1338 1018462 : scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1339 509231 : ? prob_pass_wont_exit
1340 : : prob_pass_thru;
1341 :
1342 : /* Complete peeling is special as the probability of exit in last
1343 : copy becomes 1. */
1344 457315 : if (!count_in.nonzero_p ())
1345 : ;
1346 228171 : else if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1347 : {
1348 106653 : profile_count wanted_count = e->count ();
1349 :
1350 106653 : gcc_assert (!is_latch);
1351 : /* First copy has count of incoming edge. Each subsequent
1352 : count should be reduced by prob_pass_wont_exit. Caller
1353 : should've managed the flags so all except for original loop
1354 : has won't exist set. */
1355 106653 : scale_act = wanted_count.probability_in (count_in);
1356 :
1357 : /* Now simulate the duplication adjustments and compute header
1358 : frequency of the last copy. */
1359 433277 : for (i = 0; i < ndupl; i++)
1360 326624 : wanted_count = wanted_count.apply_probability (scale_step [i]);
1361 106653 : scale_main = wanted_count.probability_in (count_in);
1362 : }
1363 : /* Here we insert loop bodies inside the loop itself (for loop unrolling).
1364 : First iteration will be original loop followed by duplicated bodies.
1365 : It is necessary to scale down the original so we get right overall
1366 : number of iterations. */
1367 121518 : else if (is_latch)
1368 : {
1369 52510 : profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
1370 52510 : ? prob_pass_wont_exit
1371 : : prob_pass_thru;
1372 52510 : if (!(flags & DLTHE_FLAG_FLAT_PROFILE))
1373 : {
1374 21969 : profile_probability p = prob_pass_main;
1375 21969 : profile_count scale_main_den = count_in;
1376 86955 : for (i = 0; i < ndupl; i++)
1377 : {
1378 64986 : scale_main_den += count_in.apply_probability (p);
1379 64986 : p = p * scale_step[i];
1380 : }
1381 : /* If original loop is executed COUNT_IN times, the unrolled
1382 : loop will account SCALE_MAIN_DEN times. */
1383 21969 : scale_main = count_in.probability_in (scale_main_den);
1384 : }
1385 : else
1386 : scale_main = profile_probability::always ();
1387 52510 : scale_act = scale_main * prob_pass_main;
1388 : }
1389 : else
1390 : {
1391 69008 : profile_count preheader_count = e->count ();
1392 143749 : for (i = 0; i < ndupl; i++)
1393 74741 : scale_main = scale_main * scale_step[i];
1394 69008 : scale_act = preheader_count.probability_in (count_in);
1395 : }
1396 : }
1397 :
1398 : /* Loop the new bbs will belong to. */
1399 267344 : target = e->src->loop_father;
1400 :
1401 : /* Original loops. */
1402 267344 : n_orig_loops = 0;
1403 271191 : for (aloop = loop->inner; aloop; aloop = aloop->next)
1404 3847 : n_orig_loops++;
1405 267344 : orig_loops = XNEWVEC (class loop *, n_orig_loops);
1406 271191 : for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1407 3847 : orig_loops[i] = aloop;
1408 :
1409 267344 : set_loop_copy (loop, target);
1410 :
1411 267344 : first_active = XNEWVEC (basic_block, n);
1412 267344 : if (is_latch)
1413 : {
1414 52652 : memcpy (first_active, bbs, n * sizeof (basic_block));
1415 52652 : first_active_latch = latch;
1416 : }
1417 :
1418 267344 : spec_edges[SE_ORIG] = orig;
1419 267344 : spec_edges[SE_LATCH] = latch_edge;
1420 :
1421 267344 : place_after = e->src;
1422 267344 : location_t loop_loc = UNKNOWN_LOCATION;
1423 267344 : unsigned int loop_copyid_base = 0;
1424 :
1425 : /* Find a location from the loop header - works for both GIMPLE and RTL. */
1426 267344 : if (current_ir_type () == IR_GIMPLE)
1427 : {
1428 147177 : gimple *last = last_nondebug_stmt (loop->header);
1429 147177 : loop_loc = last ? gimple_location (last) : UNKNOWN_LOCATION;
1430 : }
1431 : else
1432 : {
1433 : /* For RTL, try to find an instruction with a valid location. */
1434 120167 : rtx_insn *insn = BB_END (loop->header);
1435 196934 : while (insn && insn != BB_HEAD (loop->header))
1436 : {
1437 : /* Only check location if this is a valid insn. */
1438 196257 : if (INSN_P (insn))
1439 : {
1440 195580 : location_t loc = INSN_LOCATION (insn);
1441 195580 : if (loc != UNKNOWN_LOCATION)
1442 : {
1443 119490 : loop_loc = get_pure_location (loc);
1444 119490 : break;
1445 : }
1446 : }
1447 76767 : insn = PREV_INSN (insn);
1448 : }
1449 : }
1450 :
1451 : /* Allocate copyid base for this loop duplication - works for both
1452 : GIMPLE and RTL since allocator is per-function. */
1453 266840 : if (loop_loc != UNKNOWN_LOCATION)
1454 252774 : loop_copyid_base = allocate_copyid_base (loop_loc, ndupl);
1455 :
1456 815218 : for (j = 0; j < ndupl; j++)
1457 : {
1458 : /* Copy loops. */
1459 547874 : copy_loops_to (orig_loops, n_orig_loops, target);
1460 :
1461 : /* Copy bbs. */
1462 547874 : copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1463 : place_after, true);
1464 547874 : place_after = new_spec_edges[SE_LATCH]->src;
1465 :
1466 547874 : if (flags & DLTHE_RECORD_COPY_NUMBER)
1467 362041 : for (i = 0; i < n; i++)
1468 : {
1469 255130 : gcc_assert (!new_bbs[i]->aux);
1470 255130 : new_bbs[i]->aux = (void *)(size_t)(j + 1);
1471 : }
1472 :
1473 : /* Assign hierarchical discriminators to distinguish loop iterations. */
1474 547874 : if (flags & DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR
1475 333183 : && loop_copyid_base > 0)
1476 : {
1477 : /* Calculate copyid for this iteration. */
1478 295507 : unsigned int copyid = loop_copyid_base + j;
1479 295507 : if (copyid > DISCR_COPYID_MAX)
1480 : copyid = DISCR_COPYID_MAX;
1481 :
1482 295507 : if (current_ir_type () == IR_GIMPLE)
1483 : {
1484 : /* Update all basic blocks created in this iteration. */
1485 1147431 : for (i = 0; i < n; i++)
1486 851924 : assign_discriminators_to_bb (new_bbs[i], 0, copyid);
1487 : }
1488 : else
1489 : {
1490 : /* For RTL, manually update instruction locations. */
1491 0 : for (i = 0; i < n; i++)
1492 : {
1493 0 : basic_block bb = new_bbs[i];
1494 0 : rtx_insn *insn;
1495 :
1496 : /* Iterate through all instructions in the block. */
1497 0 : FOR_BB_INSNS (bb, insn)
1498 : {
1499 0 : if (INSN_HAS_LOCATION (insn))
1500 : {
1501 0 : location_t loc = INSN_LOCATION (insn);
1502 : /* Get existing discriminator components. */
1503 0 : discriminator_components comp
1504 0 : = get_discriminator_components_from_loc (loc);
1505 0 : comp.copyid = copyid;
1506 :
1507 : /* Apply hierarchical discriminator format. */
1508 0 : INSN_LOCATION (insn)
1509 0 : = location_with_discriminator_components (loc, comp);
1510 : }
1511 : }
1512 : }
1513 : }
1514 : }
1515 :
1516 : /* Note whether the blocks and edges belong to an irreducible loop. */
1517 547874 : if (add_irreducible_flag)
1518 : {
1519 1435 : for (i = 0; i < n; i++)
1520 1068 : new_bbs[i]->flags |= BB_DUPLICATED;
1521 1435 : for (i = 0; i < n; i++)
1522 : {
1523 1068 : edge_iterator ei;
1524 1068 : new_bb = new_bbs[i];
1525 1068 : if (new_bb->loop_father == target)
1526 1054 : new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1527 :
1528 2812 : FOR_EACH_EDGE (ae, ei, new_bb->succs)
1529 1744 : if ((ae->dest->flags & BB_DUPLICATED)
1530 1086 : && (ae->src->loop_father == target
1531 21 : || ae->dest->loop_father == target))
1532 1072 : ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1533 : }
1534 1435 : for (i = 0; i < n; i++)
1535 1068 : new_bbs[i]->flags &= ~BB_DUPLICATED;
1536 : }
1537 :
1538 : /* Redirect the special edges. */
1539 547874 : if (is_latch)
1540 : {
1541 107225 : redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1542 107225 : redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1543 : loop->header);
1544 107225 : set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1545 107225 : latch = loop->latch = new_bbs[n - 1];
1546 107225 : e = latch_edge = new_spec_edges[SE_LATCH];
1547 : }
1548 : else
1549 : {
1550 440649 : redirect_edge_and_branch_force (e, new_bbs[0]);
1551 440649 : redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1552 : loop->header);
1553 440649 : set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1554 440649 : e = new_spec_edges[SE_LATCH];
1555 : }
1556 :
1557 : /* Record exit edge in this copy. */
1558 547874 : if (orig && bitmap_bit_p (wont_exit, j + 1))
1559 : {
1560 391192 : if (to_remove)
1561 391192 : to_remove->safe_push (new_spec_edges[SE_ORIG]);
1562 391192 : force_edge_cold (new_spec_edges[SE_ORIG], true);
1563 :
1564 : /* Scale the frequencies of the blocks dominated by the exit. */
1565 391192 : if (bbs_to_scale && scale_after_exit)
1566 : {
1567 892360 : EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1568 501479 : scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num,
1569 : after_exit_den);
1570 : }
1571 : }
1572 :
1573 : /* Record the first copy in the control flow order if it is not
1574 : the original loop (i.e. in case of peeling). */
1575 547874 : if (!first_active_latch)
1576 : {
1577 214692 : memcpy (first_active, new_bbs, n * sizeof (basic_block));
1578 214692 : first_active_latch = new_bbs[n - 1];
1579 : }
1580 :
1581 : /* Set counts and frequencies. */
1582 547874 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1583 : {
1584 509231 : scale_bbs_frequencies (new_bbs, n, scale_act);
1585 509231 : scale_act = scale_act * scale_step[j];
1586 : }
1587 : }
1588 267344 : free (new_bbs);
1589 267344 : free (orig_loops);
1590 :
1591 : /* Record the exit edge in the original loop body, and update the frequencies. */
1592 486447 : if (orig && bitmap_bit_p (wont_exit, 0))
1593 : {
1594 51757 : if (to_remove)
1595 51757 : to_remove->safe_push (orig);
1596 51757 : force_edge_cold (orig, true);
1597 :
1598 : /* Scale the frequencies of the blocks dominated by the exit. */
1599 51757 : if (bbs_to_scale && scale_after_exit)
1600 : {
1601 103437 : EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1602 51729 : scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num,
1603 : after_exit_den);
1604 : }
1605 : }
1606 :
1607 : /* Update the original loop. */
1608 267344 : if (!is_latch)
1609 214692 : set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1610 267344 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1611 : {
1612 228701 : scale_bbs_frequencies (bbs, n, scale_main);
1613 228701 : free (scale_step);
1614 : }
1615 :
1616 : /* Update dominators of outer blocks if affected. */
1617 1056937 : for (i = 0; i < n; i++)
1618 : {
1619 789593 : basic_block dominated, dom_bb;
1620 789593 : unsigned j;
1621 :
1622 789593 : bb = bbs[i];
1623 :
1624 789593 : auto_vec<basic_block> dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1625 2838114 : FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1626 : {
1627 782843 : if (flow_bb_inside_loop_p (loop, dominated))
1628 574901 : continue;
1629 415884 : dom_bb = nearest_common_dominator (
1630 207942 : CDI_DOMINATORS, first_active[i], first_active_latch);
1631 207942 : set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1632 : }
1633 789593 : }
1634 267344 : free (first_active);
1635 :
1636 267344 : free (bbs);
1637 267344 : BITMAP_FREE (bbs_to_scale);
1638 :
1639 267344 : return true;
1640 : }
1641 :
1642 : /* A callback for make_forwarder block, to redirect all edges except for
1643 : OTHER(DATA) to the entry part. E is the edge for that we should decide
1644 : whether to redirect it. */
1645 :
1646 : bool
1647 74054 : mfb_keep_just (edge e, void *data)
1648 : {
1649 74054 : edge other = (edge)data;
1650 74054 : return e != other;
1651 : }
1652 :
1653 : /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1654 :
1655 : static bool
1656 36 : has_preds_from_loop (basic_block block, class loop *loop)
1657 : {
1658 36 : edge e;
1659 36 : edge_iterator ei;
1660 :
1661 78 : FOR_EACH_EDGE (e, ei, block->preds)
1662 42 : if (e->src->loop_father == loop)
1663 : return true;
1664 : return false;
1665 : }
1666 :
1667 : /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1668 : CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1669 : entry; otherwise we also force preheader block to have only one successor.
1670 : When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1671 : to be a fallthru predecessor to the loop header and to have only
1672 : predecessors from outside of the loop.
1673 : The function also updates dominators. */
1674 :
1675 : basic_block
1676 17375391 : create_preheader (class loop *loop, int flags)
1677 : {
1678 17375391 : edge e;
1679 17375391 : basic_block dummy;
1680 17375391 : int nentry = 0;
1681 17375391 : bool irred = false;
1682 17375391 : bool latch_edge_was_fallthru;
1683 17375391 : edge one_succ_pred = NULL, single_entry = NULL;
1684 17375391 : edge_iterator ei;
1685 :
1686 52153405 : FOR_EACH_EDGE (e, ei, loop->header->preds)
1687 : {
1688 34778014 : if (e->src == loop->latch)
1689 17375391 : continue;
1690 17402623 : irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1691 17402623 : nentry++;
1692 17402623 : single_entry = e;
1693 48836235 : if (single_succ_p (e->src))
1694 14058221 : one_succ_pred = e;
1695 : }
1696 17375391 : gcc_assert (nentry);
1697 17375391 : if (nentry == 1)
1698 : {
1699 17354780 : bool need_forwarder_block = false;
1700 :
1701 : /* We do not allow entry block to be the loop preheader, since we
1702 : cannot emit code there. */
1703 17354780 : if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1704 : need_forwarder_block = true;
1705 : else
1706 : {
1707 : /* If we want simple preheaders, also force the preheader to have
1708 : just a single successor and a normal edge. */
1709 17351751 : if ((flags & CP_SIMPLE_PREHEADERS)
1710 17351751 : && ((single_entry->flags & EDGE_COMPLEX)
1711 17326630 : || !single_succ_p (single_entry->src)
1712 : /* We document LOOPS_HAVE_PREHEADERS as to forming a
1713 : natural place to move code outside of the loop, so it
1714 : should not end with a control instruction. */
1715 : /* ??? This, and below JUMP_P check needs to be a new
1716 : CFG hook. */
1717 14049896 : || (cfun->curr_properties & PROP_gimple
1718 26618258 : && !gsi_end_p (gsi_last_bb (single_entry->src))
1719 8814908 : && stmt_ends_bb_p (*gsi_last_bb (single_entry->src)))))
1720 : need_forwarder_block = true;
1721 : /* If we want fallthru preheaders, also create forwarder block when
1722 : preheader ends with a jump or has predecessors from loop. */
1723 14049896 : else if ((flags & CP_FALLTHRU_PREHEADERS)
1724 14049896 : && (JUMP_P (BB_END (single_entry->src))
1725 36 : || has_preds_from_loop (single_entry->src, loop)))
1726 : need_forwarder_block = true;
1727 : }
1728 3301855 : if (! need_forwarder_block)
1729 : return NULL;
1730 : }
1731 :
1732 3325507 : edge latch;
1733 3325507 : latch = loop_latch_edge (loop);
1734 3325507 : latch_edge_was_fallthru = (latch->flags & EDGE_FALLTHRU) != 0;
1735 3325507 : if (nentry == 1
1736 3304896 : && ((flags & CP_FALLTHRU_PREHEADERS) == 0
1737 22 : || (single_entry->flags & EDGE_CROSSING) == 0))
1738 3304896 : dummy = split_edge (single_entry);
1739 : else
1740 : {
1741 20611 : edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, latch);
1742 20611 : dummy = fallthru->src;
1743 20611 : loop->header = fallthru->dest;
1744 : }
1745 :
1746 : /* Try to be clever in placing the newly created preheader. The idea is to
1747 : avoid breaking any "fallthruness" relationship between blocks.
1748 :
1749 : The preheader was created just before the header and all incoming edges
1750 : to the header were redirected to the preheader, except the latch edge.
1751 : So the only problematic case is when this latch edge was a fallthru
1752 : edge: it is not anymore after the preheader creation so we have broken
1753 : the fallthruness. We're therefore going to look for a better place. */
1754 3325507 : if (latch_edge_was_fallthru)
1755 : {
1756 1460724 : if (one_succ_pred)
1757 4794 : e = one_succ_pred;
1758 : else
1759 1455930 : e = EDGE_PRED (dummy, 0);
1760 :
1761 1460724 : move_block_after (dummy, e->src);
1762 : }
1763 :
1764 3325507 : if (irred)
1765 : {
1766 3881 : dummy->flags |= BB_IRREDUCIBLE_LOOP;
1767 3881 : single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1768 : }
1769 :
1770 3325507 : if (dump_file)
1771 179 : fprintf (dump_file, "Created preheader block for loop %i\n",
1772 : loop->num);
1773 :
1774 3325507 : if (flags & CP_FALLTHRU_PREHEADERS)
1775 28 : gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1776 : && !JUMP_P (BB_END (dummy)));
1777 :
1778 : return dummy;
1779 : }
1780 :
1781 : /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1782 :
1783 : void
1784 30481902 : create_preheaders (int flags)
1785 : {
1786 30481902 : if (!current_loops)
1787 : return;
1788 :
1789 108819942 : for (auto loop : loops_list (cfun, 0))
1790 17374236 : create_preheader (loop, flags);
1791 30481902 : loops_state_set (LOOPS_HAVE_PREHEADERS);
1792 : }
1793 :
1794 : /* Forces all loop latches to have only single successor. */
1795 :
1796 : void
1797 28070724 : force_single_succ_latches (void)
1798 : {
1799 28070724 : edge e;
1800 :
1801 100940609 : for (auto loop : loops_list (cfun, 0))
1802 : {
1803 16728437 : if (loop->latch != loop->header && single_succ_p (loop->latch))
1804 11554157 : continue;
1805 :
1806 5174280 : e = find_edge (loop->latch, loop->header);
1807 5174280 : gcc_checking_assert (e != NULL);
1808 :
1809 5174280 : split_edge (e);
1810 28070724 : }
1811 28070724 : loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1812 28070724 : }
1813 :
1814 : /* This function is called from loop_version. It splits the entry edge
1815 : of the loop we want to version, adds the versioning condition, and
1816 : adjust the edges to the two versions of the loop appropriately.
1817 : e is an incoming edge. Returns the basic block containing the
1818 : condition.
1819 :
1820 : --- edge e ---- > [second_head]
1821 :
1822 : Split it and insert new conditional expression and adjust edges.
1823 :
1824 : --- edge e ---> [cond expr] ---> [first_head]
1825 : |
1826 : +---------> [second_head]
1827 :
1828 : THEN_PROB is the probability of then branch of the condition.
1829 : ELSE_PROB is the probability of else branch. Note that they may be both
1830 : REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or
1831 : IFN_LOOP_DIST_ALIAS. */
1832 :
1833 : static basic_block
1834 38643 : lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1835 : edge e, void *cond_expr,
1836 : profile_probability then_prob,
1837 : profile_probability else_prob)
1838 : {
1839 38643 : basic_block new_head = NULL;
1840 38643 : edge e1;
1841 :
1842 38643 : gcc_assert (e->dest == second_head);
1843 :
1844 : /* Split edge 'e'. This will create a new basic block, where we can
1845 : insert conditional expr. */
1846 38643 : new_head = split_edge (e);
1847 :
1848 38643 : lv_add_condition_to_bb (first_head, second_head, new_head,
1849 : cond_expr);
1850 :
1851 : /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1852 38643 : e = single_succ_edge (new_head);
1853 38643 : e1 = make_edge (new_head, first_head,
1854 38643 : current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1855 38643 : e1->probability = then_prob;
1856 38643 : e->probability = else_prob;
1857 :
1858 38643 : set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1859 38643 : set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1860 :
1861 : /* Adjust loop header phi nodes. */
1862 38643 : lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1863 :
1864 38643 : return new_head;
1865 : }
1866 :
1867 : /* Main entry point for Loop Versioning transformation.
1868 :
1869 : This transformation given a condition and a loop, creates
1870 : -if (condition) { loop_copy1 } else { loop_copy2 },
1871 : where loop_copy1 is the loop transformed in one way, and loop_copy2
1872 : is the loop transformed in another way (or unchanged). COND_EXPR
1873 : may be a run time test for things that were not resolved by static
1874 : analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1875 :
1876 : If non-NULL, CONDITION_BB is set to the basic block containing the
1877 : condition.
1878 :
1879 : THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1880 : is the ratio by that the frequencies in the original loop should
1881 : be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1882 : new loop should be scaled.
1883 :
1884 : If PLACE_AFTER is true, we place the new loop after LOOP in the
1885 : instruction stream, otherwise it is placed before LOOP. */
1886 :
1887 : class loop *
1888 38653 : loop_version (class loop *loop,
1889 : void *cond_expr, basic_block *condition_bb,
1890 : profile_probability then_prob, profile_probability else_prob,
1891 : profile_probability then_scale, profile_probability else_scale,
1892 : bool place_after)
1893 : {
1894 38653 : basic_block first_head, second_head;
1895 38653 : edge entry, latch_edge;
1896 38653 : int irred_flag;
1897 38653 : class loop *nloop;
1898 38653 : basic_block cond_bb;
1899 :
1900 : /* Record entry and latch edges for the loop */
1901 38653 : entry = loop_preheader_edge (loop);
1902 38653 : irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1903 38653 : entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1904 :
1905 : /* Note down head of loop as first_head. */
1906 38653 : first_head = entry->dest;
1907 :
1908 : /* 1) Duplicate loop on the entry edge. */
1909 38653 : if (!cfg_hook_duplicate_loop_body_to_header_edge (loop, entry, 1, NULL, NULL,
1910 : NULL, 0))
1911 : {
1912 10 : entry->flags |= irred_flag;
1913 10 : return NULL;
1914 : }
1915 :
1916 : /* 2) loopify the duplicated new loop. */
1917 38643 : latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1918 38643 : nloop = alloc_loop ();
1919 38643 : class loop *outer = loop_outer (latch_edge->dest->loop_father);
1920 38643 : edge new_header_edge = single_pred_edge (get_bb_copy (loop->header));
1921 38643 : nloop->header = new_header_edge->dest;
1922 38643 : nloop->latch = latch_edge->src;
1923 38643 : loop_redirect_edge (latch_edge, nloop->header);
1924 :
1925 : /* Compute new loop. */
1926 38643 : add_loop (nloop, outer);
1927 38643 : copy_loop_info (loop, nloop);
1928 38643 : set_loop_copy (loop, nloop);
1929 :
1930 : /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1931 38643 : lv_flush_pending_stmts (latch_edge);
1932 :
1933 : /* After duplication entry edge now points to new loop head block.
1934 : Note down new head as second_head. */
1935 38643 : second_head = entry->dest;
1936 :
1937 : /* 3) Split loop entry edge and insert new block with cond expr. */
1938 38643 : cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1939 : entry, cond_expr, then_prob, else_prob);
1940 38643 : if (condition_bb)
1941 34380 : *condition_bb = cond_bb;
1942 :
1943 38643 : if (!cond_bb)
1944 : {
1945 0 : entry->flags |= irred_flag;
1946 0 : return NULL;
1947 : }
1948 :
1949 : /* Add cond_bb to appropriate loop. */
1950 38643 : if (cond_bb->loop_father)
1951 38643 : remove_bb_from_loops (cond_bb);
1952 38643 : add_bb_to_loop (cond_bb, outer);
1953 :
1954 : /* 4) Scale the original loop and new loop frequency. */
1955 38643 : scale_loop_frequencies (loop, then_scale);
1956 38643 : scale_loop_frequencies (nloop, else_scale);
1957 38643 : update_dominators_in_loop (loop);
1958 38643 : update_dominators_in_loop (nloop);
1959 :
1960 : /* Adjust irreducible flag. */
1961 38643 : if (irred_flag)
1962 : {
1963 51 : cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1964 51 : loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1965 51 : loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1966 51 : single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1967 : }
1968 :
1969 38643 : if (place_after)
1970 : {
1971 36232 : basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1972 36232 : unsigned i;
1973 :
1974 36232 : after = loop->latch;
1975 :
1976 212566 : for (i = 0; i < nloop->num_nodes; i++)
1977 : {
1978 176334 : move_block_after (bbs[i], after);
1979 176334 : after = bbs[i];
1980 : }
1981 36232 : free (bbs);
1982 : }
1983 :
1984 : /* At this point condition_bb is loop preheader with two successors,
1985 : first_head and second_head. Make sure that loop preheader has only
1986 : one successor. */
1987 38643 : split_edge (loop_preheader_edge (loop));
1988 38643 : split_edge (loop_preheader_edge (nloop));
1989 :
1990 38643 : return nloop;
1991 : }
|