Line data Source code
1 : /* Loop manipulation code for GNU compiler.
2 : Copyright (C) 2002-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "gimple.h"
27 : #include "cfghooks.h"
28 : #include "cfganal.h"
29 : #include "cfgloop.h"
30 : #include "gimple-iterator.h"
31 : #include "gimplify-me.h"
32 : #include "tree-ssa-loop-manip.h"
33 : #include "dumpfile.h"
34 : #include "sreal.h"
35 : #include "tree-cfg.h"
36 : #include "tree-pass.h"
37 : #include "hierarchical_discriminator.h"
38 :
39 : static void copy_loops_to (class loop **, int,
40 : class loop *);
41 : static void loop_redirect_edge (edge, basic_block);
42 : static void remove_bbs (basic_block *, int);
43 : static bool rpe_enum_p (const_basic_block, const void *);
44 : static int find_path (edge, basic_block **);
45 : static void fix_loop_placements (class loop *, bool *, bitmap);
46 : static bool fix_bb_placement (basic_block);
47 : static void fix_bb_placements (basic_block, bool *, bitmap);
48 :
49 : /* Checks whether basic block BB is dominated by DATA. */
50 : static bool
51 435843 : rpe_enum_p (const_basic_block bb, const void *data)
52 : {
53 435843 : return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
54 : }
55 :
56 : /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
57 :
58 : static void
59 435843 : remove_bbs (basic_block *bbs, int nbbs)
60 : {
61 435843 : int i;
62 :
63 871686 : for (i = 0; i < nbbs; i++)
64 435843 : delete_basic_block (bbs[i]);
65 435843 : }
66 :
67 : /* Find path -- i.e. the basic blocks dominated by edge E and put them
68 : into array BBS, that will be allocated large enough to contain them.
69 : E->dest must have exactly one predecessor for this to work (it is
70 : easy to achieve and we do not put it here because we do not want to
71 : alter anything by this function). The number of basic blocks in the
72 : path is returned. */
73 : static int
74 435843 : find_path (edge e, basic_block **bbs)
75 : {
76 435843 : gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
77 :
78 : /* Find bbs in the path. */
79 435843 : *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
80 435843 : return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
81 435843 : n_basic_blocks_for_fn (cfun), e->dest);
82 : }
83 :
84 : /* Fix placement of basic block BB inside loop hierarchy --
85 : Let L be a loop to that BB belongs. Then every successor of BB must either
86 : 1) belong to some superloop of loop L, or
87 : 2) be a header of loop K such that K->outer is superloop of L
88 : Returns true if we had to move BB into other loop to enforce this condition,
89 : false if the placement of BB was already correct (provided that placements
90 : of its successors are correct). */
91 : static bool
92 250697 : fix_bb_placement (basic_block bb)
93 : {
94 250697 : edge e;
95 250697 : edge_iterator ei;
96 250697 : class loop *loop = current_loops->tree_root, *act;
97 :
98 515019 : FOR_EACH_EDGE (e, ei, bb->succs)
99 : {
100 264322 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
101 0 : continue;
102 :
103 264322 : act = e->dest->loop_father;
104 264322 : if (act->header == e->dest)
105 306 : act = loop_outer (act);
106 :
107 264322 : if (flow_loop_nested_p (loop, act))
108 264322 : loop = act;
109 : }
110 :
111 250697 : if (loop == bb->loop_father)
112 : return false;
113 :
114 55272 : remove_bb_from_loops (bb);
115 55272 : add_bb_to_loop (bb, loop);
116 :
117 55272 : return true;
118 : }
119 :
120 : /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
121 : of LOOP to that leads at least one exit edge of LOOP, and set it
122 : as the immediate superloop of LOOP. Return true if the immediate superloop
123 : of LOOP changed.
124 :
125 : IRRED_INVALIDATED is set to true if a change in the loop structures might
126 : invalidate the information about irreducible regions. */
127 :
128 : static bool
129 201317 : fix_loop_placement (class loop *loop, bool *irred_invalidated,
130 : bitmap loop_closed_ssa_invalidated)
131 : {
132 201317 : unsigned i;
133 201317 : edge e;
134 201317 : auto_vec<edge> exits = get_loop_exit_edges (loop);
135 201317 : class loop *father = current_loops->tree_root, *act;
136 201317 : bool ret = false;
137 :
138 1270101 : FOR_EACH_VEC_ELT (exits, i, e)
139 : {
140 1068784 : act = find_common_loop (loop, e->dest->loop_father);
141 1068784 : if (flow_loop_nested_p (father, act))
142 72504 : father = act;
143 : }
144 :
145 201317 : if (father != loop_outer (loop))
146 : {
147 887 : for (act = loop_outer (loop); act != father; act = loop_outer (act))
148 581 : act->num_nodes -= loop->num_nodes;
149 306 : flow_loop_tree_node_remove (loop);
150 306 : flow_loop_tree_node_add (father, loop);
151 :
152 : /* The exit edges of LOOP no longer exits its original immediate
153 : superloops; remove them from the appropriate exit lists. */
154 949 : FOR_EACH_VEC_ELT (exits, i, e)
155 : {
156 : /* We may need to recompute irreducible loops. */
157 337 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
158 0 : *irred_invalidated = true;
159 337 : rescan_loop_exit (e, false, false);
160 : }
161 : /* Any LC SSA PHIs on e->dest might now be on the wrong edge
162 : if their defs were in a former outer loop. Also all uses
163 : in the original inner loop of defs in the outer loop(s) now
164 : require LC PHI nodes. */
165 306 : if (loop_closed_ssa_invalidated)
166 : {
167 10 : basic_block *bbs = get_loop_body (loop);
168 55 : for (unsigned i = 0; i < loop->num_nodes; ++i)
169 45 : bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
170 10 : free (bbs);
171 : }
172 :
173 : ret = true;
174 : }
175 :
176 201317 : return ret;
177 201317 : }
178 :
179 : /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
180 : enforce condition stated in description of fix_bb_placement. We
181 : start from basic block FROM that had some of its successors removed, so that
182 : his placement no longer has to be correct, and iteratively fix placement of
183 : its predecessors that may change if placement of FROM changed. Also fix
184 : placement of subloops of FROM->loop_father, that might also be altered due
185 : to this change; the condition for them is similar, except that instead of
186 : successors we consider edges coming out of the loops.
187 :
188 : If the changes may invalidate the information about irreducible regions,
189 : IRRED_INVALIDATED is set to true.
190 :
191 : If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
192 : changed loop_father are collected there. */
193 :
194 : static void
195 572761 : fix_bb_placements (basic_block from,
196 : bool *irred_invalidated,
197 : bitmap loop_closed_ssa_invalidated)
198 : {
199 572761 : basic_block *queue, *qtop, *qbeg, *qend;
200 572761 : class loop *base_loop, *target_loop;
201 572761 : edge e;
202 :
203 : /* We pass through blocks back-reachable from FROM, testing whether some
204 : of their successors moved to outer loop. It may be necessary to
205 : iterate several times, but it is finite, as we stop unless we move
206 : the basic block up the loop structure. The whole story is a bit
207 : more complicated due to presence of subloops, those are moved using
208 : fix_loop_placement. */
209 :
210 572761 : base_loop = from->loop_father;
211 : /* If we are already in the outermost loop, the basic blocks cannot be moved
212 : outside of it. If FROM is the header of the base loop, it cannot be moved
213 : outside of it, either. In both cases, we can end now. */
214 572761 : if (base_loop == current_loops->tree_root
215 236946 : || from == base_loop->header)
216 381604 : return;
217 :
218 191157 : auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
219 191157 : bitmap_clear (in_queue);
220 191157 : bitmap_set_bit (in_queue, from->index);
221 : /* Prevent us from going out of the base_loop. */
222 191157 : bitmap_set_bit (in_queue, base_loop->header->index);
223 :
224 191157 : queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
225 191157 : qtop = queue + base_loop->num_nodes + 1;
226 191157 : qbeg = queue;
227 191157 : qend = queue + 1;
228 191157 : *qbeg = from;
229 :
230 442470 : while (qbeg != qend)
231 : {
232 251313 : edge_iterator ei;
233 251313 : from = *qbeg;
234 251313 : qbeg++;
235 251313 : if (qbeg == qtop)
236 0 : qbeg = queue;
237 251313 : bitmap_clear_bit (in_queue, from->index);
238 :
239 251313 : if (from->loop_father->header == from)
240 : {
241 : /* Subloop header, maybe move the loop upward. */
242 616 : if (!fix_loop_placement (from->loop_father, irred_invalidated,
243 : loop_closed_ssa_invalidated))
244 195739 : continue;
245 302 : target_loop = loop_outer (from->loop_father);
246 : }
247 : else
248 : {
249 : /* Ordinary basic block. */
250 250697 : if (!fix_bb_placement (from))
251 195425 : continue;
252 55272 : target_loop = from->loop_father;
253 55272 : if (loop_closed_ssa_invalidated)
254 20525 : bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
255 : }
256 :
257 84557 : FOR_EACH_EDGE (e, ei, from->succs)
258 : {
259 28983 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
260 1 : *irred_invalidated = true;
261 : }
262 :
263 : /* Something has changed, insert predecessors into queue. */
264 116650 : FOR_EACH_EDGE (e, ei, from->preds)
265 : {
266 61076 : basic_block pred = e->src;
267 61076 : class loop *nca;
268 :
269 61076 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
270 0 : *irred_invalidated = true;
271 :
272 61076 : if (bitmap_bit_p (in_queue, pred->index))
273 920 : continue;
274 :
275 : /* If it is subloop, then it either was not moved, or
276 : the path up the loop tree from base_loop do not contain
277 : it. */
278 60156 : nca = find_common_loop (pred->loop_father, base_loop);
279 60156 : if (pred->loop_father != base_loop
280 616 : && (nca == base_loop
281 302 : || nca != pred->loop_father))
282 616 : pred = pred->loop_father->header;
283 59540 : else if (!flow_loop_nested_p (target_loop, pred->loop_father))
284 : {
285 : /* If PRED is already higher in the loop hierarchy than the
286 : TARGET_LOOP to that we moved FROM, the change of the position
287 : of FROM does not affect the position of PRED, so there is no
288 : point in processing it. */
289 0 : continue;
290 : }
291 :
292 60156 : if (bitmap_bit_p (in_queue, pred->index))
293 0 : continue;
294 :
295 : /* Schedule the basic block. */
296 60156 : *qend = pred;
297 60156 : qend++;
298 60156 : if (qend == qtop)
299 0 : qend = queue;
300 60156 : bitmap_set_bit (in_queue, pred->index);
301 : }
302 : }
303 191157 : free (queue);
304 191157 : }
305 :
306 : /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
307 : and update loop structures and dominators. Return true if we were able
308 : to remove the path, false otherwise (and nothing is affected then). */
309 : bool
310 435843 : remove_path (edge e, bool *irred_invalidated,
311 : bitmap loop_closed_ssa_invalidated)
312 : {
313 435843 : edge ae;
314 435843 : basic_block *rem_bbs, *bord_bbs, from, bb;
315 435843 : vec<basic_block> dom_bbs;
316 435843 : int i, nrem, n_bord_bbs;
317 435843 : bool local_irred_invalidated = false;
318 435843 : edge_iterator ei;
319 435843 : class loop *l, *f;
320 :
321 435843 : if (! irred_invalidated)
322 146107 : irred_invalidated = &local_irred_invalidated;
323 :
324 435843 : if (!can_remove_branch_p (e))
325 : return false;
326 :
327 : /* Keep track of whether we need to update information about irreducible
328 : regions. This is the case if the removed area is a part of the
329 : irreducible region, or if the set of basic blocks that belong to a loop
330 : that is inside an irreducible region is changed, or if such a loop is
331 : removed. */
332 435843 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
333 380 : *irred_invalidated = true;
334 :
335 : /* We need to check whether basic blocks are dominated by the edge
336 : e, but we only have basic block dominators. This is easy to
337 : fix -- when e->dest has exactly one predecessor, this corresponds
338 : to blocks dominated by e->dest, if not, split the edge. */
339 435843 : if (!single_pred_p (e->dest))
340 435843 : e = single_pred_edge (split_edge (e));
341 :
342 : /* It may happen that by removing path we remove one or more loops
343 : we belong to. In this case first unloop the loops, then proceed
344 : normally. We may assume that e->dest is not a header of any loop,
345 : as it now has exactly one predecessor. */
346 743510 : for (l = e->src->loop_father; loop_outer (l); l = f)
347 : {
348 307667 : f = loop_outer (l);
349 307667 : if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
350 0 : unloop (l, irred_invalidated, loop_closed_ssa_invalidated);
351 : }
352 :
353 : /* Identify the path. */
354 435843 : nrem = find_path (e, &rem_bbs);
355 :
356 435843 : n_bord_bbs = 0;
357 435843 : bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
358 435843 : auto_sbitmap seen (last_basic_block_for_fn (cfun));
359 435843 : bitmap_clear (seen);
360 :
361 : /* Find "border" hexes -- i.e. those with predecessor in removed path. */
362 1307529 : for (i = 0; i < nrem; i++)
363 435843 : bitmap_set_bit (seen, rem_bbs[i]->index);
364 435843 : if (!*irred_invalidated)
365 1306261 : FOR_EACH_EDGE (ae, ei, e->src->succs)
366 435463 : if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
367 435463 : && !bitmap_bit_p (seen, ae->dest->index)
368 1306364 : && ae->flags & EDGE_IRREDUCIBLE_LOOP)
369 : {
370 103 : *irred_invalidated = true;
371 103 : break;
372 : }
373 :
374 871686 : for (i = 0; i < nrem; i++)
375 : {
376 871686 : FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
377 435843 : if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
378 435843 : && !bitmap_bit_p (seen, ae->dest->index))
379 : {
380 435843 : bitmap_set_bit (seen, ae->dest->index);
381 435843 : bord_bbs[n_bord_bbs++] = ae->dest;
382 :
383 435843 : if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
384 380 : *irred_invalidated = true;
385 : }
386 : }
387 :
388 : /* Remove the path. */
389 435843 : from = e->src;
390 435843 : remove_branch (e);
391 435843 : dom_bbs.create (0);
392 :
393 : /* Cancel loops contained in the path. */
394 871686 : for (i = 0; i < nrem; i++)
395 435843 : if (rem_bbs[i]->loop_father->header == rem_bbs[i])
396 0 : cancel_loop_tree (rem_bbs[i]->loop_father);
397 :
398 435843 : remove_bbs (rem_bbs, nrem);
399 435843 : free (rem_bbs);
400 :
401 : /* Find blocks whose dominators may be affected. */
402 435843 : bitmap_clear (seen);
403 871686 : for (i = 0; i < n_bord_bbs; i++)
404 : {
405 435843 : basic_block ldom;
406 :
407 435843 : bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
408 435843 : if (bitmap_bit_p (seen, bb->index))
409 0 : continue;
410 435843 : bitmap_set_bit (seen, bb->index);
411 :
412 435843 : for (ldom = first_dom_son (CDI_DOMINATORS, bb);
413 1428883 : ldom;
414 993040 : ldom = next_dom_son (CDI_DOMINATORS, ldom))
415 993040 : if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
416 838726 : dom_bbs.safe_push (ldom);
417 : }
418 :
419 : /* Recount dominators. */
420 435843 : iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
421 435843 : dom_bbs.release ();
422 435843 : free (bord_bbs);
423 :
424 : /* Fix placements of basic blocks inside loops and the placement of
425 : loops in the loop tree. */
426 435843 : fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated);
427 435843 : fix_loop_placements (from->loop_father, irred_invalidated,
428 : loop_closed_ssa_invalidated);
429 :
430 435843 : if (local_irred_invalidated
431 435843 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
432 451 : mark_irreducible_loops ();
433 :
434 435843 : return true;
435 435843 : }
436 :
437 : /* Creates place for a new LOOP in loops structure of FN. */
438 :
439 : void
440 803291 : place_new_loop (struct function *fn, class loop *loop)
441 : {
442 803291 : loop->num = number_of_loops (fn);
443 803291 : vec_safe_push (loops_for_fn (fn)->larray, loop);
444 803291 : }
445 :
446 : /* Given LOOP structure with filled header and latch, find the body of the
447 : corresponding loop and add it to loops tree. Insert the LOOP as a son of
448 : outer. */
449 :
450 : void
451 116545 : add_loop (class loop *loop, class loop *outer)
452 : {
453 116545 : basic_block *bbs;
454 116545 : int i, n;
455 116545 : class loop *subloop;
456 116545 : edge e;
457 116545 : edge_iterator ei;
458 :
459 : /* Add it to loop structure. */
460 116545 : place_new_loop (cfun, loop);
461 116545 : flow_loop_tree_node_add (outer, loop);
462 :
463 : /* Find its nodes. */
464 116545 : bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
465 116545 : n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
466 :
467 834606 : for (i = 0; i < n; i++)
468 : {
469 718061 : if (bbs[i]->loop_father == outer)
470 : {
471 494475 : remove_bb_from_loops (bbs[i]);
472 494475 : add_bb_to_loop (bbs[i], loop);
473 494475 : continue;
474 : }
475 :
476 223586 : loop->num_nodes++;
477 :
478 : /* If we find a direct subloop of OUTER, move it to LOOP. */
479 223586 : subloop = bbs[i]->loop_father;
480 223586 : if (loop_outer (subloop) == outer
481 223586 : && subloop->header == bbs[i])
482 : {
483 23582 : flow_loop_tree_node_remove (subloop);
484 23582 : flow_loop_tree_node_add (loop, subloop);
485 : }
486 : }
487 :
488 : /* Update the information about loop exit edges. */
489 834606 : for (i = 0; i < n; i++)
490 : {
491 1761807 : FOR_EACH_EDGE (e, ei, bbs[i]->succs)
492 : {
493 1043746 : rescan_loop_exit (e, false, false);
494 : }
495 : }
496 :
497 116545 : free (bbs);
498 116545 : }
499 :
500 : /* Scale profile of loop by P. */
501 :
502 : void
503 264639 : scale_loop_frequencies (class loop *loop, profile_probability p)
504 : {
505 264639 : basic_block *bbs;
506 :
507 264639 : bbs = get_loop_body (loop);
508 264639 : scale_bbs_frequencies (bbs, loop->num_nodes, p);
509 264639 : free (bbs);
510 264639 : }
511 :
512 : /* Scales the frequencies of all basic blocks in LOOP that are strictly
513 : dominated by BB by NUM/DEN. */
514 :
515 : void
516 37030 : scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
517 : profile_count num, profile_count den)
518 : {
519 37030 : basic_block son;
520 :
521 37030 : if (!den.nonzero_p () && !(num == profile_count::zero ()))
522 0 : return;
523 37030 : auto_vec <basic_block, 8> worklist;
524 37030 : worklist.safe_push (bb);
525 :
526 301468 : while (!worklist.is_empty ())
527 190378 : for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ());
528 379988 : son;
529 189610 : son = next_dom_son (CDI_DOMINATORS, son))
530 : {
531 189610 : if (!flow_bb_inside_loop_p (loop, son))
532 36262 : continue;
533 153348 : son->count = son->count.apply_scale (num, den);
534 153348 : worklist.safe_push (son);
535 : }
536 37030 : }
537 :
538 : /* Return exit that suitable for update when loop iterations
539 : changed. */
540 :
541 : static edge
542 109404 : loop_exit_for_scaling (class loop *loop)
543 : {
544 109404 : edge exit_edge = single_exit (loop);
545 109404 : if (!exit_edge)
546 : {
547 8630 : auto_vec<edge> exits = get_loop_exit_edges (loop);
548 8630 : exit_edge = single_likely_exit (loop, exits);
549 8630 : }
550 109404 : return exit_edge;
551 : }
552 :
553 : /* Assume that loop's entry count and profile up to a given EXIT_EDGE is
554 : consistent. Update exit probability so loop exists with PROFILE_COUNT
555 : and rescale profile of basic blocks inside loop dominated by EXIT_EDGE->src.
556 :
557 : This is useful after number of iteraitons of loop has changed.
558 : If EXIT_EDGE is NULL, the function will try to identify suitable exit.
559 : If DESIRED_COUNT is NULL, loop entry count will be used.
560 : In consistent profile usually loop exists as many times as it is entred.
561 :
562 : Return updated exit if successfull and NULL otherwise. */
563 :
564 : edge
565 139448 : update_loop_exit_probability_scale_dom_bbs (class loop *loop,
566 : edge exit_edge,
567 : profile_count desired_count)
568 : {
569 139448 : if (!exit_edge)
570 2997 : exit_edge = loop_exit_for_scaling (loop);
571 2997 : if (!exit_edge)
572 : {
573 2241 : if (dump_file && (dump_flags & TDF_DETAILS))
574 : {
575 260 : fprintf (dump_file, ";; Not updating exit probability of loop %i;"
576 : " it has no single exit\n",
577 : loop->num);
578 : }
579 2241 : return NULL;
580 : }
581 : /* If exit is inside another loop, adjusting its probability will also
582 : adjust number of the inner loop iterations. Just do noting for now. */
583 137207 : if (!just_once_each_iteration_p (loop, exit_edge->src))
584 : {
585 6 : if (dump_file && (dump_flags & TDF_DETAILS))
586 : {
587 0 : fprintf (dump_file, ";; Not updating exit probability of loop %i;"
588 : " exit is inside inner loop\n",
589 : loop->num);
590 : }
591 6 : return NULL;
592 : }
593 : /* Normal loops exit as many times as they are entered. */
594 137201 : if (!desired_count.initialized_p ())
595 1564 : desired_count = loop_count_in (loop);
596 : /* See if everything is already perfect. */
597 137201 : if (desired_count == exit_edge->count ())
598 6338 : return exit_edge;
599 130863 : profile_count old_exit_count = exit_edge->count ();
600 : /* See if update is possible.
601 : Avoid turning probability to 0 or 1 just trying to reach impossible
602 : value.
603 :
604 : Natural profile update after some loop will happily scale header count to
605 : be less than count of entering the loop. This is bad idea and they should
606 : special case maybe_flat_loop_profile.
607 :
608 : It may also happen that the source basic block of the exit edge is
609 : inside in-loop condition:
610 :
611 : +-> header
612 : | |
613 : | B1
614 : | / \
615 : | | B2--exit_edge-->
616 : | \ /
617 : | B3
618 : +__/
619 :
620 : If B2 count is smaller than desired exit edge count
621 : the profile was inconsistent with the newly discovered upper bound.
622 : Probablity of edge B1->B2 is too low. We do not attempt to fix
623 : that (as it is hard in general). */
624 130863 : if (desired_count > exit_edge->src->count
625 130863 : && exit_edge->src->count.differs_from_p (desired_count))
626 : {
627 360 : if (dump_file)
628 : {
629 7 : fprintf (dump_file, ";; Source bb of loop %i has count ",
630 : loop->num);
631 7 : exit_edge->src->count.dump (dump_file, cfun);
632 7 : fprintf (dump_file,
633 : " which is smaller then desired count of exitting loop ");
634 7 : desired_count.dump (dump_file, cfun);
635 7 : fprintf (dump_file, ". Profile update is impossible.\n");
636 : }
637 : /* Drop quality of probability since we know it likely
638 : bad. */
639 360 : exit_edge->probability = exit_edge->probability.guessed ();
640 360 : return NULL;
641 : }
642 130503 : if (!exit_edge->src->count.nonzero_p ())
643 : {
644 1 : if (dump_file)
645 : {
646 0 : fprintf (dump_file, ";; Not updating exit edge probability"
647 : " in loop %i since profile is zero ",
648 : loop->num);
649 : }
650 1 : return NULL;
651 : }
652 130502 : set_edge_probability_and_rescale_others
653 130502 : (exit_edge, desired_count.probability_in (exit_edge->src->count));
654 : /* Rescale the remaining edge probabilities and see if there is only
655 : one. */
656 130502 : edge other_edge = NULL;
657 130502 : bool found = false;
658 130502 : edge e;
659 130502 : edge_iterator ei;
660 391507 : FOR_EACH_EDGE (e, ei, exit_edge->src->succs)
661 261016 : if (!(e->flags & EDGE_FAKE)
662 261016 : && !loop_exit_edge_p (loop, e))
663 : {
664 130513 : if (found)
665 : {
666 : other_edge = NULL;
667 : break;
668 : }
669 : other_edge = e;
670 : found = true;
671 : }
672 : /* If there is only loop latch after other edge,
673 : update its profile. This is tiny bit more precise
674 : than scaling. */
675 130502 : if (other_edge && other_edge->dest == loop->latch)
676 : {
677 94249 : if (single_pred_p (loop->latch))
678 94249 : loop->latch->count = loop->latch->count
679 94249 : + old_exit_count - exit_edge->count ();
680 : }
681 : else
682 : /* If there are multiple blocks, just scale. */
683 72506 : scale_dominated_blocks_in_loop (loop, exit_edge->src,
684 72506 : exit_edge->src->count - exit_edge->count (),
685 36253 : exit_edge->src->count - old_exit_count);
686 : return exit_edge;
687 : }
688 :
689 : /* Scale profile in LOOP by P.
690 : If ITERATION_BOUND is not -1, scale even further if loop is predicted
691 : to iterate too many times.
692 : Before caling this function, preheader block profile should be already
693 : scaled to final count. This is necessary because loop iterations are
694 : determined by comparing header edge count to latch ege count and thus
695 : they need to be scaled synchronously. */
696 :
697 : void
698 285601 : scale_loop_profile (class loop *loop, profile_probability p,
699 : gcov_type iteration_bound)
700 : {
701 285601 : if (!(p == profile_probability::always ()))
702 : {
703 79946 : if (dump_file && (dump_flags & TDF_DETAILS))
704 : {
705 11012 : fprintf (dump_file, ";; Scaling loop %i with scale ",
706 : loop->num);
707 11012 : p.dump (dump_file);
708 11012 : fprintf (dump_file, "\n");
709 : }
710 :
711 : /* Scale the probabilities. */
712 79946 : scale_loop_frequencies (loop, p);
713 : }
714 :
715 285601 : if (iteration_bound == -1)
716 179194 : return;
717 :
718 232816 : sreal iterations;
719 232816 : if (!expected_loop_iterations_by_profile (loop, &iterations))
720 : return;
721 :
722 232318 : if (dump_file && (dump_flags & TDF_DETAILS))
723 : {
724 15468 : fprintf (dump_file,
725 : ";; Guessed iterations of loop %i is %f. New upper bound %i.\n",
726 : loop->num,
727 : iterations.to_double (),
728 : (int)iteration_bound);
729 : }
730 :
731 : /* See if loop is predicted to iterate too many times. */
732 232318 : if (iterations <= (sreal)iteration_bound)
733 : return;
734 :
735 106407 : profile_count count_in = loop_count_in (loop);
736 :
737 : /* Now scale the loop body so header count is
738 : count_in * (iteration_bound + 1) */
739 106407 : profile_probability scale_prob
740 106407 : = (count_in * (iteration_bound + 1)).probability_in (loop->header->count);
741 106407 : if (dump_file && (dump_flags & TDF_DETAILS))
742 : {
743 10628 : fprintf (dump_file, ";; Scaling loop %i with scale ",
744 : loop->num);
745 10628 : scale_prob.dump (dump_file);
746 10628 : fprintf (dump_file, " to reach upper bound %i\n",
747 : (int)iteration_bound);
748 : }
749 : /* Finally attempt to fix exit edge probability. */
750 106407 : edge exit_edge = loop_exit_for_scaling (loop);
751 :
752 : /* In a consistent profile unadjusted_exit_count should be same as count_in,
753 : however to preserve as much of the original info, avoid recomputing
754 : it. */
755 106407 : profile_count unadjusted_exit_count = profile_count::uninitialized ();
756 106407 : if (exit_edge)
757 104166 : unadjusted_exit_count = exit_edge->count ();
758 106407 : scale_loop_frequencies (loop, scale_prob);
759 106407 : update_loop_exit_probability_scale_dom_bbs (loop, exit_edge,
760 : unadjusted_exit_count);
761 : }
762 :
763 : /* Recompute dominance information for basic blocks outside LOOP. */
764 :
765 : static void
766 78104 : update_dominators_in_loop (class loop *loop)
767 : {
768 78104 : vec<basic_block> dom_bbs = vNULL;
769 78104 : basic_block *body;
770 78104 : unsigned i;
771 :
772 78104 : auto_sbitmap seen (last_basic_block_for_fn (cfun));
773 78104 : bitmap_clear (seen);
774 78104 : body = get_loop_body (loop);
775 :
776 554834 : for (i = 0; i < loop->num_nodes; i++)
777 398626 : bitmap_set_bit (seen, body[i]->index);
778 :
779 476730 : for (i = 0; i < loop->num_nodes; i++)
780 : {
781 398626 : basic_block ldom;
782 :
783 398626 : for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
784 751554 : ldom;
785 352928 : ldom = next_dom_son (CDI_DOMINATORS, ldom))
786 352928 : if (!bitmap_bit_p (seen, ldom->index))
787 : {
788 32406 : bitmap_set_bit (seen, ldom->index);
789 32406 : dom_bbs.safe_push (ldom);
790 : }
791 : }
792 :
793 78104 : iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
794 78104 : free (body);
795 78104 : dom_bbs.release ();
796 78104 : }
797 :
798 : /* Creates an if region as shown above. CONDITION is used to create
799 : the test for the if.
800 :
801 : |
802 : | ------------- -------------
803 : | | pred_bb | | pred_bb |
804 : | ------------- -------------
805 : | | |
806 : | | | ENTRY_EDGE
807 : | | ENTRY_EDGE V
808 : | | ====> -------------
809 : | | | cond_bb |
810 : | | | CONDITION |
811 : | | -------------
812 : | V / \
813 : | ------------- e_false / \ e_true
814 : | | succ_bb | V V
815 : | ------------- ----------- -----------
816 : | | false_bb | | true_bb |
817 : | ----------- -----------
818 : | \ /
819 : | \ /
820 : | V V
821 : | -------------
822 : | | join_bb |
823 : | -------------
824 : | | exit_edge (result)
825 : | V
826 : | -----------
827 : | | succ_bb |
828 : | -----------
829 : |
830 : */
831 :
832 : edge
833 203 : create_empty_if_region_on_edge (edge entry_edge, tree condition)
834 : {
835 :
836 203 : basic_block cond_bb, true_bb, false_bb, join_bb;
837 203 : edge e_true, e_false, exit_edge;
838 203 : gcond *cond_stmt;
839 203 : tree simple_cond;
840 203 : gimple_stmt_iterator gsi;
841 :
842 203 : cond_bb = split_edge (entry_edge);
843 :
844 : /* Insert condition in cond_bb. */
845 203 : gsi = gsi_last_bb (cond_bb);
846 203 : simple_cond =
847 203 : force_gimple_operand_gsi (&gsi, condition, true, NULL,
848 : false, GSI_NEW_STMT);
849 203 : cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
850 203 : gsi = gsi_last_bb (cond_bb);
851 203 : gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
852 :
853 203 : join_bb = split_edge (single_succ_edge (cond_bb));
854 :
855 203 : e_true = single_succ_edge (cond_bb);
856 203 : true_bb = split_edge (e_true);
857 :
858 203 : e_false = make_edge (cond_bb, join_bb, 0);
859 203 : false_bb = split_edge (e_false);
860 :
861 203 : e_true->flags &= ~EDGE_FALLTHRU;
862 203 : e_true->flags |= EDGE_TRUE_VALUE;
863 203 : e_false->flags &= ~EDGE_FALLTHRU;
864 203 : e_false->flags |= EDGE_FALSE_VALUE;
865 :
866 203 : set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
867 203 : set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
868 203 : set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
869 203 : set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
870 :
871 203 : exit_edge = single_succ_edge (join_bb);
872 :
873 203 : if (single_pred_p (exit_edge->dest))
874 203 : set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
875 :
876 203 : return exit_edge;
877 : }
878 :
879 : /* create_empty_loop_on_edge
880 : |
881 : | - pred_bb - ------ pred_bb ------
882 : | | | | iv0 = initial_value |
883 : | -----|----- ---------|-----------
884 : | | ______ | entry_edge
885 : | | entry_edge / | |
886 : | | ====> | -V---V- loop_header -------------
887 : | V | | iv_before = phi (iv0, iv_after) |
888 : | - succ_bb - | ---|-----------------------------
889 : | | | | |
890 : | ----------- | ---V--- loop_body ---------------
891 : | | | iv_after = iv_before + stride |
892 : | | | if (iv_before < upper_bound) |
893 : | | ---|--------------\--------------
894 : | | | \ exit_e
895 : | | V \
896 : | | - loop_latch - V- succ_bb -
897 : | | | | | |
898 : | | /------------- -----------
899 : | \ ___ /
900 :
901 : Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
902 : that is used before the increment of IV. IV_BEFORE should be used for
903 : adding code to the body that uses the IV. OUTER is the outer loop in
904 : which the new loop should be inserted.
905 :
906 : Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
907 : inserted on the loop entry edge. This implies that this function
908 : should be used only when the UPPER_BOUND expression is a loop
909 : invariant. */
910 :
911 : class loop *
912 500 : create_empty_loop_on_edge (edge entry_edge,
913 : tree initial_value,
914 : tree stride, tree upper_bound,
915 : tree iv,
916 : tree *iv_before,
917 : tree *iv_after,
918 : class loop *outer)
919 : {
920 500 : basic_block loop_header, loop_latch, succ_bb, pred_bb;
921 500 : class loop *loop;
922 500 : gimple_stmt_iterator gsi;
923 500 : gimple_seq stmts;
924 500 : gcond *cond_expr;
925 500 : tree exit_test;
926 500 : edge exit_e;
927 :
928 500 : gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
929 :
930 : /* Create header, latch and wire up the loop. */
931 500 : pred_bb = entry_edge->src;
932 500 : loop_header = split_edge (entry_edge);
933 500 : loop_latch = split_edge (single_succ_edge (loop_header));
934 500 : succ_bb = single_succ (loop_latch);
935 500 : make_edge (loop_header, succ_bb, 0);
936 500 : redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
937 :
938 : /* Set immediate dominator information. */
939 500 : set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
940 500 : set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
941 500 : set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
942 :
943 : /* Initialize a loop structure and put it in a loop hierarchy. */
944 500 : loop = alloc_loop ();
945 500 : loop->header = loop_header;
946 500 : loop->latch = loop_latch;
947 500 : add_loop (loop, outer);
948 :
949 : /* TODO: Fix counts. */
950 500 : scale_loop_frequencies (loop, profile_probability::even ());
951 :
952 : /* Update dominators. */
953 500 : update_dominators_in_loop (loop);
954 :
955 : /* Modify edge flags. */
956 500 : exit_e = single_exit (loop);
957 500 : exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
958 500 : single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
959 :
960 : /* Construct IV code in loop. */
961 500 : initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
962 500 : if (stmts)
963 : {
964 17 : gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
965 17 : gsi_commit_edge_inserts ();
966 : }
967 :
968 500 : upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
969 500 : if (stmts)
970 : {
971 161 : gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
972 161 : gsi_commit_edge_inserts ();
973 : }
974 :
975 500 : gsi = gsi_last_bb (loop_header);
976 500 : create_iv (initial_value, PLUS_EXPR, stride, iv, loop, &gsi, false,
977 : iv_before, iv_after);
978 :
979 : /* Insert loop exit condition. */
980 500 : cond_expr = gimple_build_cond
981 500 : (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
982 :
983 500 : exit_test = gimple_cond_lhs (cond_expr);
984 500 : exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
985 : false, GSI_NEW_STMT);
986 500 : gimple_cond_set_lhs (cond_expr, exit_test);
987 500 : gsi = gsi_last_bb (exit_e->src);
988 500 : gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
989 :
990 500 : split_block_after_labels (loop_header);
991 :
992 500 : return loop;
993 : }
994 :
995 : /* Remove the latch edge of a LOOP and update loops to indicate that
996 : the LOOP was removed. After this function, original loop latch will
997 : have no successor, which caller is expected to fix somehow.
998 :
999 : If this may cause the information about irreducible regions to become
1000 : invalid, IRRED_INVALIDATED is set to true.
1001 :
1002 : LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
1003 : basic blocks that had non-trivial update on their loop_father.*/
1004 :
1005 : void
1006 136914 : unloop (class loop *loop, bool *irred_invalidated,
1007 : bitmap loop_closed_ssa_invalidated)
1008 : {
1009 136914 : basic_block *body;
1010 136914 : class loop *ploop;
1011 136914 : unsigned i, n;
1012 136914 : basic_block latch = loop->latch;
1013 136914 : bool dummy = false;
1014 :
1015 136914 : if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
1016 23 : *irred_invalidated = true;
1017 :
1018 : /* This is relatively straightforward. The dominators are unchanged, as
1019 : loop header dominates loop latch, so the only thing we have to care of
1020 : is the placement of loops and basic blocks inside the loop tree. We
1021 : move them all to the loop->outer, and then let fix_bb_placements do
1022 : its work. */
1023 :
1024 136914 : body = get_loop_body (loop);
1025 136914 : n = loop->num_nodes;
1026 559620 : for (i = 0; i < n; i++)
1027 422706 : if (body[i]->loop_father == loop)
1028 : {
1029 404517 : remove_bb_from_loops (body[i]);
1030 404517 : add_bb_to_loop (body[i], loop_outer (loop));
1031 : }
1032 136914 : free (body);
1033 :
1034 139326 : while (loop->inner)
1035 : {
1036 2412 : ploop = loop->inner;
1037 2412 : flow_loop_tree_node_remove (ploop);
1038 2412 : flow_loop_tree_node_add (loop_outer (loop), ploop);
1039 : }
1040 :
1041 : /* Remove the loop and free its data. */
1042 136914 : delete_loop (loop);
1043 :
1044 136914 : remove_edge (single_succ_edge (latch));
1045 :
1046 : /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
1047 : there is an irreducible region inside the cancelled loop, the flags will
1048 : be still correct. */
1049 136914 : fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
1050 136914 : }
1051 :
1052 : /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
1053 : condition stated in description of fix_loop_placement holds for them.
1054 : It is used in case when we removed some edges coming out of LOOP, which
1055 : may cause the right placement of LOOP inside loop tree to change.
1056 :
1057 : IRRED_INVALIDATED is set to true if a change in the loop structures might
1058 : invalidate the information about irreducible regions. */
1059 :
1060 : static void
1061 435843 : fix_loop_placements (class loop *loop, bool *irred_invalidated,
1062 : bitmap loop_closed_ssa_invalidated)
1063 : {
1064 435843 : class loop *outer;
1065 :
1066 435847 : while (loop_outer (loop))
1067 : {
1068 200701 : outer = loop_outer (loop);
1069 200701 : if (!fix_loop_placement (loop, irred_invalidated,
1070 : loop_closed_ssa_invalidated))
1071 : break;
1072 :
1073 : /* Changing the placement of a loop in the loop tree may alter the
1074 : validity of condition 2) of the description of fix_bb_placement
1075 : for its preheader, because the successor is the header and belongs
1076 : to the loop. So call fix_bb_placements to fix up the placement
1077 : of the preheader and (possibly) of its predecessors. */
1078 4 : fix_bb_placements (loop_preheader_edge (loop)->src,
1079 : irred_invalidated, loop_closed_ssa_invalidated);
1080 4 : loop = outer;
1081 : }
1082 435843 : }
1083 :
1084 : /* Duplicate loop bounds and other information we store about
1085 : the loop into its duplicate. */
1086 :
1087 : void
1088 688078 : copy_loop_info (class loop *loop, class loop *target)
1089 : {
1090 688078 : gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1091 688078 : target->any_upper_bound = loop->any_upper_bound;
1092 688078 : target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1093 688078 : target->any_likely_upper_bound = loop->any_likely_upper_bound;
1094 688078 : target->nb_iterations_likely_upper_bound
1095 688078 : = loop->nb_iterations_likely_upper_bound;
1096 688078 : target->any_estimate = loop->any_estimate;
1097 688078 : target->nb_iterations_estimate = loop->nb_iterations_estimate;
1098 688078 : target->estimate_state = loop->estimate_state;
1099 688078 : target->safelen = loop->safelen;
1100 688078 : target->simdlen = loop->simdlen;
1101 688078 : target->constraints = loop->constraints;
1102 688078 : target->can_be_parallel = loop->can_be_parallel;
1103 688078 : target->warned_aggressive_loop_optimizations
1104 688078 : |= loop->warned_aggressive_loop_optimizations;
1105 688078 : target->dont_vectorize = loop->dont_vectorize;
1106 688078 : target->force_vectorize = loop->force_vectorize;
1107 688078 : target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
1108 688078 : target->finite_p = loop->finite_p;
1109 688078 : target->unroll = loop->unroll;
1110 688078 : target->owned_clique = loop->owned_clique;
1111 688078 : }
1112 :
1113 : /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1114 : created loop into loops structure. If AFTER is non-null
1115 : the new loop is added at AFTER->next, otherwise in front of TARGETs
1116 : sibling list. */
1117 : class loop *
1118 40904 : duplicate_loop (class loop *loop, class loop *target, class loop *after)
1119 : {
1120 40904 : class loop *cloop;
1121 40904 : cloop = alloc_loop ();
1122 40904 : place_new_loop (cfun, cloop);
1123 :
1124 40904 : copy_loop_info (loop, cloop);
1125 :
1126 : /* Mark the new loop as copy of LOOP. */
1127 40904 : set_loop_copy (loop, cloop);
1128 :
1129 : /* Add it to target. */
1130 40904 : flow_loop_tree_node_add (target, cloop, after);
1131 :
1132 40904 : return cloop;
1133 : }
1134 :
1135 : /* Copies structure of subloops of LOOP into TARGET loop, placing
1136 : newly created loops into loop tree at the end of TARGETs sibling
1137 : list in the original order. */
1138 : void
1139 40904 : duplicate_subloops (class loop *loop, class loop *target)
1140 : {
1141 40904 : class loop *aloop, *cloop, *tail;
1142 :
1143 40904 : for (tail = target->inner; tail && tail->next; tail = tail->next)
1144 : ;
1145 42313 : for (aloop = loop->inner; aloop; aloop = aloop->next)
1146 : {
1147 1409 : cloop = duplicate_loop (aloop, target, tail);
1148 1409 : tail = cloop;
1149 1409 : gcc_assert(!tail->next);
1150 1409 : duplicate_subloops (aloop, cloop);
1151 : }
1152 40904 : }
1153 :
1154 : /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1155 : into TARGET loop, placing newly created loops into loop tree adding
1156 : them to TARGETs sibling list at the end in order. */
1157 : static void
1158 537476 : copy_loops_to (class loop **copied_loops, int n, class loop *target)
1159 : {
1160 537476 : class loop *aloop, *tail;
1161 537476 : int i;
1162 :
1163 12293177 : for (tail = target->inner; tail && tail->next; tail = tail->next)
1164 : ;
1165 542056 : for (i = 0; i < n; i++)
1166 : {
1167 4580 : aloop = duplicate_loop (copied_loops[i], target, tail);
1168 4580 : tail = aloop;
1169 4580 : gcc_assert(!tail->next);
1170 4580 : duplicate_subloops (copied_loops[i], aloop);
1171 : }
1172 537476 : }
1173 :
1174 : /* Redirects edge E to basic block DEST. */
1175 : static void
1176 38802 : loop_redirect_edge (edge e, basic_block dest)
1177 : {
1178 0 : if (e->dest == dest)
1179 : return;
1180 :
1181 38802 : redirect_edge_and_branch_force (e, dest);
1182 : }
1183 :
1184 : /* Check whether LOOP's body can be duplicated. */
1185 : bool
1186 522777 : can_duplicate_loop_p (const class loop *loop)
1187 : {
1188 522777 : int ret;
1189 522777 : basic_block *bbs = get_loop_body (loop);
1190 :
1191 522777 : ret = can_copy_bbs_p (bbs, loop->num_nodes);
1192 522777 : free (bbs);
1193 :
1194 522777 : return ret;
1195 : }
1196 :
1197 : /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1198 : loop structure and dominators (order of inner subloops is retained).
1199 : E's destination must be LOOP header for this to work, i.e. it must be entry
1200 : or latch edge of this loop; these are unique, as the loops must have
1201 : preheaders for this function to work correctly (in case E is latch, the
1202 : function unrolls the loop, if E is entry edge, it peels the loop). Store
1203 : edges created by copying ORIG edge from copies corresponding to set bits in
1204 : WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
1205 : are numbered in order given by control flow through them) into TO_REMOVE
1206 : array. Returns false if duplication is
1207 : impossible. */
1208 :
1209 : bool
1210 261228 : duplicate_loop_body_to_header_edge (class loop *loop, edge e,
1211 : unsigned int ndupl, sbitmap wont_exit,
1212 : edge orig, vec<edge> *to_remove, int flags)
1213 : {
1214 261228 : class loop *target, *aloop;
1215 261228 : class loop **orig_loops;
1216 261228 : unsigned n_orig_loops;
1217 261228 : basic_block header = loop->header, latch = loop->latch;
1218 261228 : basic_block *new_bbs, *bbs, *first_active;
1219 261228 : basic_block new_bb, bb, first_active_latch = NULL;
1220 261228 : edge ae, latch_edge;
1221 261228 : edge spec_edges[2], new_spec_edges[2];
1222 261228 : const int SE_LATCH = 0;
1223 261228 : const int SE_ORIG = 1;
1224 261228 : unsigned i, j, n;
1225 261228 : int is_latch = (latch == e->src);
1226 261228 : profile_probability *scale_step = NULL;
1227 261228 : profile_probability scale_main = profile_probability::always ();
1228 261228 : profile_probability scale_act = profile_probability::always ();
1229 261228 : profile_count after_exit_num = profile_count::zero (),
1230 261228 : after_exit_den = profile_count::zero ();
1231 261228 : bool scale_after_exit = false;
1232 261228 : int add_irreducible_flag;
1233 261228 : basic_block place_after;
1234 261228 : bitmap bbs_to_scale = NULL;
1235 261228 : bitmap_iterator bi;
1236 :
1237 261228 : gcc_assert (e->dest == loop->header);
1238 261228 : gcc_assert (ndupl > 0);
1239 :
1240 261228 : if (orig)
1241 : {
1242 : /* Orig must be edge out of the loop. */
1243 212865 : gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1244 212865 : gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1245 : }
1246 :
1247 261228 : n = loop->num_nodes;
1248 261228 : bbs = get_loop_body_in_dom_order (loop);
1249 261228 : gcc_assert (bbs[0] == loop->header);
1250 261228 : gcc_assert (bbs[n - 1] == loop->latch);
1251 :
1252 : /* Check whether duplication is possible. */
1253 261228 : if (!can_copy_bbs_p (bbs, loop->num_nodes))
1254 : {
1255 19 : free (bbs);
1256 19 : return false;
1257 : }
1258 261209 : new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1259 :
1260 : /* In case we are doing loop peeling and the loop is in the middle of
1261 : irreducible region, the peeled copies will be inside it too. */
1262 261209 : add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1263 261209 : gcc_assert (!is_latch || !add_irreducible_flag);
1264 :
1265 : /* Find edge from latch. */
1266 261209 : latch_edge = loop_latch_edge (loop);
1267 :
1268 261209 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1269 : {
1270 : /* Calculate coefficients by that we have to scale counts
1271 : of duplicated loop bodies. */
1272 222407 : profile_count count_in = header->count;
1273 222407 : profile_count count_le = latch_edge->count ();
1274 222407 : profile_count count_out_orig = orig ? orig->count () : count_in - count_le;
1275 222407 : profile_probability prob_pass_thru = count_le.probability_in (count_in);
1276 222407 : profile_count new_count_le = count_le + count_out_orig;
1277 :
1278 212856 : if (orig && orig->probability.initialized_p ()
1279 435263 : && !(orig->probability == profile_probability::always ()))
1280 : {
1281 : /* The blocks that are dominated by a removed exit edge ORIG have
1282 : frequencies scaled by this. */
1283 212729 : if (orig->count ().initialized_p ())
1284 : {
1285 212729 : after_exit_num = orig->src->count;
1286 212729 : after_exit_den = after_exit_num - orig->count ();
1287 212729 : scale_after_exit = true;
1288 : }
1289 212729 : bbs_to_scale = BITMAP_ALLOC (NULL);
1290 761144 : for (i = 0; i < n; i++)
1291 : {
1292 548415 : if (bbs[i] != orig->src
1293 548415 : && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1294 261922 : bitmap_set_bit (bbs_to_scale, i);
1295 : }
1296 : /* Since we will scale up all basic blocks dominated by orig, exits
1297 : will become more likely; compensate for that. */
1298 212729 : if (after_exit_den.nonzero_p ())
1299 : {
1300 212143 : auto_vec<edge> exits = get_loop_exit_edges (loop);
1301 915981 : for (edge ex : exits)
1302 279552 : if (ex != orig
1303 279552 : && dominated_by_p (CDI_DOMINATORS, ex->src, orig->src))
1304 54370 : new_count_le -= ex->count ().apply_scale (after_exit_num
1305 : - after_exit_den,
1306 27185 : after_exit_den);
1307 212143 : }
1308 : }
1309 222407 : profile_probability prob_pass_wont_exit =
1310 222407 : new_count_le.probability_in (count_in);
1311 : /* If profile count is 0, the probability will be uninitialized.
1312 : We can set probability to any initialized value to avoid
1313 : precision loss. If profile is sane, all counts will be 0 anyway. */
1314 222407 : if (!count_in.nonzero_p ())
1315 : {
1316 566 : prob_pass_thru
1317 566 : = profile_probability::always ().apply_scale (1, 2);
1318 566 : prob_pass_wont_exit
1319 566 : = profile_probability::always ().apply_scale (1, 2);
1320 : }
1321 :
1322 222407 : scale_step = XNEWVEC (profile_probability, ndupl);
1323 :
1324 721081 : for (i = 1; i <= ndupl; i++)
1325 997348 : scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1326 498674 : ? prob_pass_wont_exit
1327 : : prob_pass_thru;
1328 :
1329 : /* Complete peeling is special as the probability of exit in last
1330 : copy becomes 1. */
1331 444735 : if (!count_in.nonzero_p ())
1332 : ;
1333 221841 : else if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1334 : {
1335 105309 : profile_count wanted_count = e->count ();
1336 :
1337 105309 : gcc_assert (!is_latch);
1338 : /* First copy has count of incoming edge. Each subsequent
1339 : count should be reduced by prob_pass_wont_exit. Caller
1340 : should've managed the flags so all except for original loop
1341 : has won't exist set. */
1342 105309 : scale_act = wanted_count.probability_in (count_in);
1343 :
1344 : /* Now simulate the duplication adjustments and compute header
1345 : frequency of the last copy. */
1346 428810 : for (i = 0; i < ndupl; i++)
1347 323501 : wanted_count = wanted_count.apply_probability (scale_step [i]);
1348 105309 : scale_main = wanted_count.probability_in (count_in);
1349 : }
1350 : /* Here we insert loop bodies inside the loop itself (for loop unrolling).
1351 : First iteration will be original loop followed by duplicated bodies.
1352 : It is necessary to scale down the original so we get right overall
1353 : number of iterations. */
1354 116532 : else if (is_latch)
1355 : {
1356 51433 : profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
1357 51433 : ? prob_pass_wont_exit
1358 : : prob_pass_thru;
1359 51433 : if (!(flags & DLTHE_FLAG_FLAT_PROFILE))
1360 : {
1361 20882 : profile_probability p = prob_pass_main;
1362 20882 : profile_count scale_main_den = count_in;
1363 82625 : for (i = 0; i < ndupl; i++)
1364 : {
1365 61743 : scale_main_den += count_in.apply_probability (p);
1366 61743 : p = p * scale_step[i];
1367 : }
1368 : /* If original loop is executed COUNT_IN times, the unrolled
1369 : loop will account SCALE_MAIN_DEN times. */
1370 20882 : scale_main = count_in.probability_in (scale_main_den);
1371 : }
1372 : else
1373 : scale_main = profile_probability::always ();
1374 51433 : scale_act = scale_main * prob_pass_main;
1375 : }
1376 : else
1377 : {
1378 65099 : profile_count preheader_count = e->count ();
1379 135805 : for (i = 0; i < ndupl; i++)
1380 70706 : scale_main = scale_main * scale_step[i];
1381 65099 : scale_act = preheader_count.probability_in (count_in);
1382 : }
1383 : }
1384 :
1385 : /* Loop the new bbs will belong to. */
1386 261209 : target = e->src->loop_father;
1387 :
1388 : /* Original loops. */
1389 261209 : n_orig_loops = 0;
1390 265024 : for (aloop = loop->inner; aloop; aloop = aloop->next)
1391 3815 : n_orig_loops++;
1392 261209 : orig_loops = XNEWVEC (class loop *, n_orig_loops);
1393 265024 : for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1394 3815 : orig_loops[i] = aloop;
1395 :
1396 261209 : set_loop_copy (loop, target);
1397 :
1398 261209 : first_active = XNEWVEC (basic_block, n);
1399 261209 : if (is_latch)
1400 : {
1401 51546 : memcpy (first_active, bbs, n * sizeof (basic_block));
1402 51546 : first_active_latch = latch;
1403 : }
1404 :
1405 261209 : spec_edges[SE_ORIG] = orig;
1406 261209 : spec_edges[SE_LATCH] = latch_edge;
1407 :
1408 261209 : place_after = e->src;
1409 261209 : location_t loop_loc = UNKNOWN_LOCATION;
1410 261209 : unsigned int loop_copyid_base = 0;
1411 :
1412 : /* Find a location from the loop header - works for both GIMPLE and RTL. */
1413 261209 : if (current_ir_type () == IR_GIMPLE)
1414 : {
1415 145959 : gimple *last = last_nondebug_stmt (loop->header);
1416 145959 : loop_loc = last ? gimple_location (last) : UNKNOWN_LOCATION;
1417 : }
1418 : else
1419 : {
1420 : /* For RTL, try to find an instruction with a valid location. */
1421 115250 : rtx_insn *insn = BB_END (loop->header);
1422 191535 : while (insn && insn != BB_HEAD (loop->header))
1423 : {
1424 : /* Only check location if this is a valid insn. */
1425 190949 : if (INSN_P (insn))
1426 : {
1427 190363 : location_t loc = INSN_LOCATION (insn);
1428 190363 : if (loc != UNKNOWN_LOCATION)
1429 : {
1430 114664 : loop_loc = get_pure_location (loc);
1431 114664 : break;
1432 : }
1433 : }
1434 76285 : insn = PREV_INSN (insn);
1435 : }
1436 : }
1437 :
1438 : /* Allocate copyid base for this loop duplication - works for both
1439 : GIMPLE and RTL since allocator is per-function. */
1440 260707 : if (loop_loc != UNKNOWN_LOCATION)
1441 246975 : loop_copyid_base = allocate_copyid_base (loop_loc, ndupl);
1442 :
1443 798685 : for (j = 0; j < ndupl; j++)
1444 : {
1445 : /* Copy loops. */
1446 537476 : copy_loops_to (orig_loops, n_orig_loops, target);
1447 :
1448 : /* Copy bbs. */
1449 537476 : copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1450 : place_after, true);
1451 537476 : place_after = new_spec_edges[SE_LATCH]->src;
1452 :
1453 537476 : if (flags & DLTHE_RECORD_COPY_NUMBER)
1454 350584 : for (i = 0; i < n; i++)
1455 : {
1456 247101 : gcc_assert (!new_bbs[i]->aux);
1457 247101 : new_bbs[i]->aux = (void *)(size_t)(j + 1);
1458 : }
1459 :
1460 : /* Assign hierarchical discriminators to distinguish loop iterations. */
1461 537476 : if (flags & DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR
1462 329892 : && loop_copyid_base > 0)
1463 : {
1464 : /* Calculate copyid for this iteration. */
1465 292870 : unsigned int copyid = loop_copyid_base + j;
1466 292870 : if (copyid > DISCR_COPYID_MAX)
1467 : copyid = DISCR_COPYID_MAX;
1468 :
1469 292870 : if (current_ir_type () == IR_GIMPLE)
1470 : {
1471 : /* Update all basic blocks created in this iteration. */
1472 1138683 : for (i = 0; i < n; i++)
1473 845813 : assign_discriminators_to_bb (new_bbs[i], 0, copyid);
1474 : }
1475 : else
1476 : {
1477 : /* For RTL, manually update instruction locations. */
1478 0 : for (i = 0; i < n; i++)
1479 : {
1480 0 : basic_block bb = new_bbs[i];
1481 0 : rtx_insn *insn;
1482 :
1483 : /* Iterate through all instructions in the block. */
1484 0 : FOR_BB_INSNS (bb, insn)
1485 : {
1486 0 : if (INSN_HAS_LOCATION (insn))
1487 : {
1488 0 : location_t loc = INSN_LOCATION (insn);
1489 : /* Get existing discriminator components. */
1490 0 : discriminator_components comp
1491 0 : = get_discriminator_components_from_loc (loc);
1492 0 : comp.copyid = copyid;
1493 :
1494 : /* Apply hierarchical discriminator format. */
1495 0 : INSN_LOCATION (insn)
1496 0 : = location_with_discriminator_components (loc, comp);
1497 : }
1498 : }
1499 : }
1500 : }
1501 : }
1502 :
1503 : /* Note whether the blocks and edges belong to an irreducible loop. */
1504 537476 : if (add_irreducible_flag)
1505 : {
1506 1421 : for (i = 0; i < n; i++)
1507 1061 : new_bbs[i]->flags |= BB_DUPLICATED;
1508 1421 : for (i = 0; i < n; i++)
1509 : {
1510 1061 : edge_iterator ei;
1511 1061 : new_bb = new_bbs[i];
1512 1061 : if (new_bb->loop_father == target)
1513 1047 : new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1514 :
1515 2798 : FOR_EACH_EDGE (ae, ei, new_bb->succs)
1516 1737 : if ((ae->dest->flags & BB_DUPLICATED)
1517 1079 : && (ae->src->loop_father == target
1518 21 : || ae->dest->loop_father == target))
1519 1065 : ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1520 : }
1521 1421 : for (i = 0; i < n; i++)
1522 1061 : new_bbs[i]->flags &= ~BB_DUPLICATED;
1523 : }
1524 :
1525 : /* Redirect the special edges. */
1526 537476 : if (is_latch)
1527 : {
1528 103761 : redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1529 103761 : redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1530 : loop->header);
1531 103761 : set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1532 103761 : latch = loop->latch = new_bbs[n - 1];
1533 103761 : e = latch_edge = new_spec_edges[SE_LATCH];
1534 : }
1535 : else
1536 : {
1537 433715 : redirect_edge_and_branch_force (e, new_bbs[0]);
1538 433715 : redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1539 : loop->header);
1540 433715 : set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1541 433715 : e = new_spec_edges[SE_LATCH];
1542 : }
1543 :
1544 : /* Record exit edge in this copy. */
1545 537476 : if (orig && bitmap_bit_p (wont_exit, j + 1))
1546 : {
1547 385806 : if (to_remove)
1548 385806 : to_remove->safe_push (new_spec_edges[SE_ORIG]);
1549 385806 : force_edge_cold (new_spec_edges[SE_ORIG], true);
1550 :
1551 : /* Scale the frequencies of the blocks dominated by the exit. */
1552 385806 : if (bbs_to_scale && scale_after_exit)
1553 : {
1554 890805 : EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1555 505298 : scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num,
1556 : after_exit_den);
1557 : }
1558 : }
1559 :
1560 : /* Record the first copy in the control flow order if it is not
1561 : the original loop (i.e. in case of peeling). */
1562 537476 : if (!first_active_latch)
1563 : {
1564 209663 : memcpy (first_active, new_bbs, n * sizeof (basic_block));
1565 209663 : first_active_latch = new_bbs[n - 1];
1566 : }
1567 :
1568 : /* Set counts and frequencies. */
1569 537476 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1570 : {
1571 498674 : scale_bbs_frequencies (new_bbs, n, scale_act);
1572 498674 : scale_act = scale_act * scale_step[j];
1573 : }
1574 : }
1575 261209 : free (new_bbs);
1576 261209 : free (orig_loops);
1577 :
1578 : /* Record the exit edge in the original loop body, and update the frequencies. */
1579 474065 : if (orig && bitmap_bit_p (wont_exit, 0))
1580 : {
1581 49260 : if (to_remove)
1582 49260 : to_remove->safe_push (orig);
1583 49260 : force_edge_cold (orig, true);
1584 :
1585 : /* Scale the frequencies of the blocks dominated by the exit. */
1586 49260 : if (bbs_to_scale && scale_after_exit)
1587 : {
1588 98433 : EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1589 49221 : scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num,
1590 : after_exit_den);
1591 : }
1592 : }
1593 :
1594 : /* Update the original loop. */
1595 261209 : if (!is_latch)
1596 209663 : set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1597 261209 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1598 : {
1599 222407 : scale_bbs_frequencies (bbs, n, scale_main);
1600 222407 : free (scale_step);
1601 : }
1602 :
1603 : /* Update dominators of outer blocks if affected. */
1604 1036024 : for (i = 0; i < n; i++)
1605 : {
1606 774815 : basic_block dominated, dom_bb;
1607 774815 : unsigned j;
1608 :
1609 774815 : bb = bbs[i];
1610 :
1611 774815 : auto_vec<basic_block> dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1612 2785320 : FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1613 : {
1614 769200 : if (flow_bb_inside_loop_p (loop, dominated))
1615 565152 : continue;
1616 408096 : dom_bb = nearest_common_dominator (
1617 204048 : CDI_DOMINATORS, first_active[i], first_active_latch);
1618 204048 : set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1619 : }
1620 774815 : }
1621 261209 : free (first_active);
1622 :
1623 261209 : free (bbs);
1624 261209 : BITMAP_FREE (bbs_to_scale);
1625 :
1626 261209 : return true;
1627 : }
1628 :
1629 : /* A callback for make_forwarder block, to redirect all edges except for
1630 : MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1631 : whether to redirect it. */
1632 :
1633 : edge mfb_kj_edge;
1634 : bool
1635 77952 : mfb_keep_just (edge e)
1636 : {
1637 77952 : return e != mfb_kj_edge;
1638 : }
1639 :
1640 : /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1641 :
1642 : static bool
1643 35 : has_preds_from_loop (basic_block block, class loop *loop)
1644 : {
1645 35 : edge e;
1646 35 : edge_iterator ei;
1647 :
1648 76 : FOR_EACH_EDGE (e, ei, block->preds)
1649 41 : if (e->src->loop_father == loop)
1650 : return true;
1651 : return false;
1652 : }
1653 :
1654 : /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1655 : CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1656 : entry; otherwise we also force preheader block to have only one successor.
1657 : When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1658 : to be a fallthru predecessor to the loop header and to have only
1659 : predecessors from outside of the loop.
1660 : The function also updates dominators. */
1661 :
1662 : basic_block
1663 17490747 : create_preheader (class loop *loop, int flags)
1664 : {
1665 17490747 : edge e;
1666 17490747 : basic_block dummy;
1667 17490747 : int nentry = 0;
1668 17490747 : bool irred = false;
1669 17490747 : bool latch_edge_was_fallthru;
1670 17490747 : edge one_succ_pred = NULL, single_entry = NULL;
1671 17490747 : edge_iterator ei;
1672 :
1673 52501058 : FOR_EACH_EDGE (e, ei, loop->header->preds)
1674 : {
1675 35010311 : if (e->src == loop->latch)
1676 17490747 : continue;
1677 17519564 : irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1678 17519564 : nentry++;
1679 17519564 : single_entry = e;
1680 49117210 : if (single_succ_p (e->src))
1681 14106899 : one_succ_pred = e;
1682 : }
1683 17490747 : gcc_assert (nentry);
1684 17490747 : if (nentry == 1)
1685 : {
1686 17469063 : bool need_forwarder_block = false;
1687 :
1688 : /* We do not allow entry block to be the loop preheader, since we
1689 : cannot emit code there. */
1690 17469063 : if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1691 : need_forwarder_block = true;
1692 : else
1693 : {
1694 : /* If we want simple preheaders, also force the preheader to have
1695 : just a single successor and a normal edge. */
1696 17466048 : if ((flags & CP_SIMPLE_PREHEADERS)
1697 17466048 : && ((single_entry->flags & EDGE_COMPLEX)
1698 17440949 : || !single_succ_p (single_entry->src)
1699 : /* We document LOOPS_HAVE_PREHEADERS as to forming a
1700 : natural place to move code outside of the loop, so it
1701 : should not end with a control instruction. */
1702 : /* ??? This, and below JUMP_P check needs to be a new
1703 : CFG hook. */
1704 14098360 : || (cfun->curr_properties & PROP_gimple
1705 26712662 : && !gsi_end_p (gsi_last_bb (single_entry->src))
1706 8834120 : && stmt_ends_bb_p (*gsi_last_bb (single_entry->src)))))
1707 : need_forwarder_block = true;
1708 : /* If we want fallthru preheaders, also create forwarder block when
1709 : preheader ends with a jump or has predecessors from loop. */
1710 14098360 : else if ((flags & CP_FALLTHRU_PREHEADERS)
1711 14098360 : && (JUMP_P (BB_END (single_entry->src))
1712 35 : || has_preds_from_loop (single_entry->src, loop)))
1713 : need_forwarder_block = true;
1714 : }
1715 3367688 : if (! need_forwarder_block)
1716 : return NULL;
1717 : }
1718 :
1719 3392399 : mfb_kj_edge = loop_latch_edge (loop);
1720 3392399 : latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1721 3392399 : if (nentry == 1
1722 3370715 : && ((flags & CP_FALLTHRU_PREHEADERS) == 0
1723 23 : || (single_entry->flags & EDGE_CROSSING) == 0))
1724 3370715 : dummy = split_edge (single_entry);
1725 : else
1726 : {
1727 21684 : edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1728 21684 : dummy = fallthru->src;
1729 21684 : loop->header = fallthru->dest;
1730 : }
1731 :
1732 : /* Try to be clever in placing the newly created preheader. The idea is to
1733 : avoid breaking any "fallthruness" relationship between blocks.
1734 :
1735 : The preheader was created just before the header and all incoming edges
1736 : to the header were redirected to the preheader, except the latch edge.
1737 : So the only problematic case is when this latch edge was a fallthru
1738 : edge: it is not anymore after the preheader creation so we have broken
1739 : the fallthruness. We're therefore going to look for a better place. */
1740 3392399 : if (latch_edge_was_fallthru)
1741 : {
1742 1471393 : if (one_succ_pred)
1743 4966 : e = one_succ_pred;
1744 : else
1745 1466427 : e = EDGE_PRED (dummy, 0);
1746 :
1747 1471393 : move_block_after (dummy, e->src);
1748 : }
1749 :
1750 3392399 : if (irred)
1751 : {
1752 4269 : dummy->flags |= BB_IRREDUCIBLE_LOOP;
1753 4269 : single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1754 : }
1755 :
1756 3392399 : if (dump_file)
1757 180 : fprintf (dump_file, "Created preheader block for loop %i\n",
1758 : loop->num);
1759 :
1760 3392399 : if (flags & CP_FALLTHRU_PREHEADERS)
1761 30 : gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1762 : && !JUMP_P (BB_END (dummy)));
1763 :
1764 : return dummy;
1765 : }
1766 :
1767 : /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1768 :
1769 : void
1770 30536580 : create_preheaders (int flags)
1771 : {
1772 30536580 : if (!current_loops)
1773 : return;
1774 :
1775 109099352 : for (auto loop : loops_list (cfun, 0))
1776 17489612 : create_preheader (loop, flags);
1777 30536580 : loops_state_set (LOOPS_HAVE_PREHEADERS);
1778 : }
1779 :
1780 : /* Forces all loop latches to have only single successor. */
1781 :
1782 : void
1783 28124700 : force_single_succ_latches (void)
1784 : {
1785 28124700 : edge e;
1786 :
1787 101217253 : for (auto loop : loops_list (cfun, 0))
1788 : {
1789 16843153 : if (loop->latch != loop->header && single_succ_p (loop->latch))
1790 11598914 : continue;
1791 :
1792 5244239 : e = find_edge (loop->latch, loop->header);
1793 5244239 : gcc_checking_assert (e != NULL);
1794 :
1795 5244239 : split_edge (e);
1796 28124700 : }
1797 28124700 : loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1798 28124700 : }
1799 :
1800 : /* This function is called from loop_version. It splits the entry edge
1801 : of the loop we want to version, adds the versioning condition, and
1802 : adjust the edges to the two versions of the loop appropriately.
1803 : e is an incoming edge. Returns the basic block containing the
1804 : condition.
1805 :
1806 : --- edge e ---- > [second_head]
1807 :
1808 : Split it and insert new conditional expression and adjust edges.
1809 :
1810 : --- edge e ---> [cond expr] ---> [first_head]
1811 : |
1812 : +---------> [second_head]
1813 :
1814 : THEN_PROB is the probability of then branch of the condition.
1815 : ELSE_PROB is the probability of else branch. Note that they may be both
1816 : REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or
1817 : IFN_LOOP_DIST_ALIAS. */
1818 :
1819 : static basic_block
1820 38802 : lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1821 : edge e, void *cond_expr,
1822 : profile_probability then_prob,
1823 : profile_probability else_prob)
1824 : {
1825 38802 : basic_block new_head = NULL;
1826 38802 : edge e1;
1827 :
1828 38802 : gcc_assert (e->dest == second_head);
1829 :
1830 : /* Split edge 'e'. This will create a new basic block, where we can
1831 : insert conditional expr. */
1832 38802 : new_head = split_edge (e);
1833 :
1834 38802 : lv_add_condition_to_bb (first_head, second_head, new_head,
1835 : cond_expr);
1836 :
1837 : /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1838 38802 : e = single_succ_edge (new_head);
1839 38802 : e1 = make_edge (new_head, first_head,
1840 38802 : current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1841 38802 : e1->probability = then_prob;
1842 38802 : e->probability = else_prob;
1843 :
1844 38802 : set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1845 38802 : set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1846 :
1847 : /* Adjust loop header phi nodes. */
1848 38802 : lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1849 :
1850 38802 : return new_head;
1851 : }
1852 :
1853 : /* Main entry point for Loop Versioning transformation.
1854 :
1855 : This transformation given a condition and a loop, creates
1856 : -if (condition) { loop_copy1 } else { loop_copy2 },
1857 : where loop_copy1 is the loop transformed in one way, and loop_copy2
1858 : is the loop transformed in another way (or unchanged). COND_EXPR
1859 : may be a run time test for things that were not resolved by static
1860 : analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1861 :
1862 : If non-NULL, CONDITION_BB is set to the basic block containing the
1863 : condition.
1864 :
1865 : THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1866 : is the ratio by that the frequencies in the original loop should
1867 : be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1868 : new loop should be scaled.
1869 :
1870 : If PLACE_AFTER is true, we place the new loop after LOOP in the
1871 : instruction stream, otherwise it is placed before LOOP. */
1872 :
1873 : class loop *
1874 38812 : loop_version (class loop *loop,
1875 : void *cond_expr, basic_block *condition_bb,
1876 : profile_probability then_prob, profile_probability else_prob,
1877 : profile_probability then_scale, profile_probability else_scale,
1878 : bool place_after)
1879 : {
1880 38812 : basic_block first_head, second_head;
1881 38812 : edge entry, latch_edge;
1882 38812 : int irred_flag;
1883 38812 : class loop *nloop;
1884 38812 : basic_block cond_bb;
1885 :
1886 : /* Record entry and latch edges for the loop */
1887 38812 : entry = loop_preheader_edge (loop);
1888 38812 : irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1889 38812 : entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1890 :
1891 : /* Note down head of loop as first_head. */
1892 38812 : first_head = entry->dest;
1893 :
1894 : /* 1) Duplicate loop on the entry edge. */
1895 38812 : if (!cfg_hook_duplicate_loop_body_to_header_edge (loop, entry, 1, NULL, NULL,
1896 : NULL, 0))
1897 : {
1898 10 : entry->flags |= irred_flag;
1899 10 : return NULL;
1900 : }
1901 :
1902 : /* 2) loopify the duplicated new loop. */
1903 38802 : latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1904 38802 : nloop = alloc_loop ();
1905 38802 : class loop *outer = loop_outer (latch_edge->dest->loop_father);
1906 38802 : edge new_header_edge = single_pred_edge (get_bb_copy (loop->header));
1907 38802 : nloop->header = new_header_edge->dest;
1908 38802 : nloop->latch = latch_edge->src;
1909 38802 : loop_redirect_edge (latch_edge, nloop->header);
1910 :
1911 : /* Compute new loop. */
1912 38802 : add_loop (nloop, outer);
1913 38802 : copy_loop_info (loop, nloop);
1914 38802 : set_loop_copy (loop, nloop);
1915 :
1916 : /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1917 38802 : lv_flush_pending_stmts (latch_edge);
1918 :
1919 : /* After duplication entry edge now points to new loop head block.
1920 : Note down new head as second_head. */
1921 38802 : second_head = entry->dest;
1922 :
1923 : /* 3) Split loop entry edge and insert new block with cond expr. */
1924 38802 : cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1925 : entry, cond_expr, then_prob, else_prob);
1926 38802 : if (condition_bb)
1927 34531 : *condition_bb = cond_bb;
1928 :
1929 38802 : if (!cond_bb)
1930 : {
1931 0 : entry->flags |= irred_flag;
1932 0 : return NULL;
1933 : }
1934 :
1935 : /* Add cond_bb to appropriate loop. */
1936 38802 : if (cond_bb->loop_father)
1937 38802 : remove_bb_from_loops (cond_bb);
1938 38802 : add_bb_to_loop (cond_bb, outer);
1939 :
1940 : /* 4) Scale the original loop and new loop frequency. */
1941 38802 : scale_loop_frequencies (loop, then_scale);
1942 38802 : scale_loop_frequencies (nloop, else_scale);
1943 38802 : update_dominators_in_loop (loop);
1944 38802 : update_dominators_in_loop (nloop);
1945 :
1946 : /* Adjust irreducible flag. */
1947 38802 : if (irred_flag)
1948 : {
1949 51 : cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1950 51 : loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1951 51 : loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1952 51 : single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1953 : }
1954 :
1955 38802 : if (place_after)
1956 : {
1957 36389 : basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1958 36389 : unsigned i;
1959 :
1960 36389 : after = loop->latch;
1961 :
1962 213261 : for (i = 0; i < nloop->num_nodes; i++)
1963 : {
1964 176872 : move_block_after (bbs[i], after);
1965 176872 : after = bbs[i];
1966 : }
1967 36389 : free (bbs);
1968 : }
1969 :
1970 : /* At this point condition_bb is loop preheader with two successors,
1971 : first_head and second_head. Make sure that loop preheader has only
1972 : one successor. */
1973 38802 : split_edge (loop_preheader_edge (loop));
1974 38802 : split_edge (loop_preheader_edge (nloop));
1975 :
1976 38802 : return nloop;
1977 : }
|