Branch data Line data Source code
1 : : /* Loop manipulation code for GNU compiler.
2 : : Copyright (C) 2002-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "rtl.h"
25 : : #include "tree.h"
26 : : #include "gimple.h"
27 : : #include "cfghooks.h"
28 : : #include "cfganal.h"
29 : : #include "cfgloop.h"
30 : : #include "gimple-iterator.h"
31 : : #include "gimplify-me.h"
32 : : #include "tree-ssa-loop-manip.h"
33 : : #include "dumpfile.h"
34 : : #include "sreal.h"
35 : :
36 : : static void copy_loops_to (class loop **, int,
37 : : class loop *);
38 : : static void loop_redirect_edge (edge, basic_block);
39 : : static void remove_bbs (basic_block *, int);
40 : : static bool rpe_enum_p (const_basic_block, const void *);
41 : : static int find_path (edge, basic_block **);
42 : : static void fix_loop_placements (class loop *, bool *, bitmap);
43 : : static bool fix_bb_placement (basic_block);
44 : : static void fix_bb_placements (basic_block, bool *, bitmap);
45 : :
46 : : /* Checks whether basic block BB is dominated by DATA. */
47 : : static bool
48 : 399567 : rpe_enum_p (const_basic_block bb, const void *data)
49 : : {
50 : 399567 : return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
51 : : }
52 : :
53 : : /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
54 : :
55 : : static void
56 : 399567 : remove_bbs (basic_block *bbs, int nbbs)
57 : : {
58 : 399567 : int i;
59 : :
60 : 799134 : for (i = 0; i < nbbs; i++)
61 : 399567 : delete_basic_block (bbs[i]);
62 : 399567 : }
63 : :
64 : : /* Find path -- i.e. the basic blocks dominated by edge E and put them
65 : : into array BBS, that will be allocated large enough to contain them.
66 : : E->dest must have exactly one predecessor for this to work (it is
67 : : easy to achieve and we do not put it here because we do not want to
68 : : alter anything by this function). The number of basic blocks in the
69 : : path is returned. */
70 : : static int
71 : 399567 : find_path (edge e, basic_block **bbs)
72 : : {
73 : 399567 : gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
74 : :
75 : : /* Find bbs in the path. */
76 : 399567 : *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
77 : 399567 : return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
78 : 399567 : n_basic_blocks_for_fn (cfun), e->dest);
79 : : }
80 : :
81 : : /* Fix placement of basic block BB inside loop hierarchy --
82 : : Let L be a loop to that BB belongs. Then every successor of BB must either
83 : : 1) belong to some superloop of loop L, or
84 : : 2) be a header of loop K such that K->outer is superloop of L
85 : : Returns true if we had to move BB into other loop to enforce this condition,
86 : : false if the placement of BB was already correct (provided that placements
87 : : of its successors are correct). */
88 : : static bool
89 : 219688 : fix_bb_placement (basic_block bb)
90 : : {
91 : 219688 : edge e;
92 : 219688 : edge_iterator ei;
93 : 219688 : class loop *loop = current_loops->tree_root, *act;
94 : :
95 : 449314 : FOR_EACH_EDGE (e, ei, bb->succs)
96 : : {
97 : 229626 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
98 : 0 : continue;
99 : :
100 : 229626 : act = e->dest->loop_father;
101 : 229626 : if (act->header == e->dest)
102 : 281 : act = loop_outer (act);
103 : :
104 : 229626 : if (flow_loop_nested_p (loop, act))
105 : 229626 : loop = act;
106 : : }
107 : :
108 : 219688 : if (loop == bb->loop_father)
109 : : return false;
110 : :
111 : 47174 : remove_bb_from_loops (bb);
112 : 47174 : add_bb_to_loop (bb, loop);
113 : :
114 : 47174 : return true;
115 : : }
116 : :
117 : : /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
118 : : of LOOP to that leads at least one exit edge of LOOP, and set it
119 : : as the immediate superloop of LOOP. Return true if the immediate superloop
120 : : of LOOP changed.
121 : :
122 : : IRRED_INVALIDATED is set to true if a change in the loop structures might
123 : : invalidate the information about irreducible regions. */
124 : :
125 : : static bool
126 : 181699 : fix_loop_placement (class loop *loop, bool *irred_invalidated,
127 : : bitmap loop_closed_ssa_invalidated)
128 : : {
129 : 181699 : unsigned i;
130 : 181699 : edge e;
131 : 181699 : auto_vec<edge> exits = get_loop_exit_edges (loop);
132 : 181699 : class loop *father = current_loops->tree_root, *act;
133 : 181699 : bool ret = false;
134 : :
135 : 1143128 : FOR_EACH_VEC_ELT (exits, i, e)
136 : : {
137 : 961429 : act = find_common_loop (loop, e->dest->loop_father);
138 : 961429 : if (flow_loop_nested_p (father, act))
139 : 61462 : father = act;
140 : : }
141 : :
142 : 181699 : if (father != loop_outer (loop))
143 : : {
144 : 741 : for (act = loop_outer (loop); act != father; act = loop_outer (act))
145 : 460 : act->num_nodes -= loop->num_nodes;
146 : 281 : flow_loop_tree_node_remove (loop);
147 : 281 : flow_loop_tree_node_add (father, loop);
148 : :
149 : : /* The exit edges of LOOP no longer exits its original immediate
150 : : superloops; remove them from the appropriate exit lists. */
151 : 862 : FOR_EACH_VEC_ELT (exits, i, e)
152 : : {
153 : : /* We may need to recompute irreducible loops. */
154 : 300 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
155 : 0 : *irred_invalidated = true;
156 : 300 : rescan_loop_exit (e, false, false);
157 : : }
158 : : /* Any LC SSA PHIs on e->dest might now be on the wrong edge
159 : : if their defs were in a former outer loop. Also all uses
160 : : in the original inner loop of defs in the outer loop(s) now
161 : : require LC PHI nodes. */
162 : 281 : if (loop_closed_ssa_invalidated)
163 : : {
164 : 10 : basic_block *bbs = get_loop_body (loop);
165 : 55 : for (unsigned i = 0; i < loop->num_nodes; ++i)
166 : 45 : bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
167 : 10 : free (bbs);
168 : : }
169 : :
170 : : ret = true;
171 : : }
172 : :
173 : 181699 : return ret;
174 : 181699 : }
175 : :
176 : : /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
177 : : enforce condition stated in description of fix_bb_placement. We
178 : : start from basic block FROM that had some of its successors removed, so that
179 : : his placement no longer has to be correct, and iteratively fix placement of
180 : : its predecessors that may change if placement of FROM changed. Also fix
181 : : placement of subloops of FROM->loop_father, that might also be altered due
182 : : to this change; the condition for them is similar, except that instead of
183 : : successors we consider edges coming out of the loops.
184 : :
185 : : If the changes may invalidate the information about irreducible regions,
186 : : IRRED_INVALIDATED is set to true.
187 : :
188 : : If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
189 : : changed loop_father are collected there. */
190 : :
191 : : static void
192 : 521438 : fix_bb_placements (basic_block from,
193 : : bool *irred_invalidated,
194 : : bitmap loop_closed_ssa_invalidated)
195 : : {
196 : 521438 : basic_block *queue, *qtop, *qbeg, *qend;
197 : 521438 : class loop *base_loop, *target_loop;
198 : 521438 : edge e;
199 : :
200 : : /* We pass through blocks back-reachable from FROM, testing whether some
201 : : of their successors moved to outer loop. It may be necessary to
202 : : iterate several times, but it is finite, as we stop unless we move
203 : : the basic block up the loop structure. The whole story is a bit
204 : : more complicated due to presence of subloops, those are moved using
205 : : fix_loop_placement. */
206 : :
207 : 521438 : base_loop = from->loop_father;
208 : : /* If we are already in the outermost loop, the basic blocks cannot be moved
209 : : outside of it. If FROM is the header of the base loop, it cannot be moved
210 : : outside of it, either. In both cases, we can end now. */
211 : 521438 : if (base_loop == current_loops->tree_root
212 : 213122 : || from == base_loop->header)
213 : 351340 : return;
214 : :
215 : 170098 : auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
216 : 170098 : bitmap_clear (in_queue);
217 : 170098 : bitmap_set_bit (in_queue, from->index);
218 : : /* Prevent us from going out of the base_loop. */
219 : 170098 : bitmap_set_bit (in_queue, base_loop->header->index);
220 : :
221 : 170098 : queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
222 : 170098 : qtop = queue + base_loop->num_nodes + 1;
223 : 170098 : qbeg = queue;
224 : 170098 : qend = queue + 1;
225 : 170098 : *qbeg = from;
226 : :
227 : 390356 : while (qbeg != qend)
228 : : {
229 : 220258 : edge_iterator ei;
230 : 220258 : from = *qbeg;
231 : 220258 : qbeg++;
232 : 220258 : if (qbeg == qtop)
233 : 0 : qbeg = queue;
234 : 220258 : bitmap_clear_bit (in_queue, from->index);
235 : :
236 : 220258 : if (from->loop_father->header == from)
237 : : {
238 : : /* Subloop header, maybe move the loop upward. */
239 : 570 : if (!fix_loop_placement (from->loop_father, irred_invalidated,
240 : : loop_closed_ssa_invalidated))
241 : 172807 : continue;
242 : 277 : target_loop = loop_outer (from->loop_father);
243 : : }
244 : : else
245 : : {
246 : : /* Ordinary basic block. */
247 : 219688 : if (!fix_bb_placement (from))
248 : 172514 : continue;
249 : 47174 : target_loop = from->loop_father;
250 : 47174 : if (loop_closed_ssa_invalidated)
251 : 18765 : bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
252 : : }
253 : :
254 : 70703 : FOR_EACH_EDGE (e, ei, from->succs)
255 : : {
256 : 23252 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
257 : 1 : *irred_invalidated = true;
258 : : }
259 : :
260 : : /* Something has changed, insert predecessors into queue. */
261 : 98517 : FOR_EACH_EDGE (e, ei, from->preds)
262 : : {
263 : 51066 : basic_block pred = e->src;
264 : 51066 : class loop *nca;
265 : :
266 : 51066 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
267 : 0 : *irred_invalidated = true;
268 : :
269 : 51066 : if (bitmap_bit_p (in_queue, pred->index))
270 : 906 : continue;
271 : :
272 : : /* If it is subloop, then it either was not moved, or
273 : : the path up the loop tree from base_loop do not contain
274 : : it. */
275 : 50160 : nca = find_common_loop (pred->loop_father, base_loop);
276 : 50160 : if (pred->loop_father != base_loop
277 : 570 : && (nca == base_loop
278 : 277 : || nca != pred->loop_father))
279 : 570 : pred = pred->loop_father->header;
280 : 49590 : else if (!flow_loop_nested_p (target_loop, pred->loop_father))
281 : : {
282 : : /* If PRED is already higher in the loop hierarchy than the
283 : : TARGET_LOOP to that we moved FROM, the change of the position
284 : : of FROM does not affect the position of PRED, so there is no
285 : : point in processing it. */
286 : 0 : continue;
287 : : }
288 : :
289 : 50160 : if (bitmap_bit_p (in_queue, pred->index))
290 : 0 : continue;
291 : :
292 : : /* Schedule the basic block. */
293 : 50160 : *qend = pred;
294 : 50160 : qend++;
295 : 50160 : if (qend == qtop)
296 : 0 : qend = queue;
297 : 50160 : bitmap_set_bit (in_queue, pred->index);
298 : : }
299 : : }
300 : 170098 : free (queue);
301 : 170098 : }
302 : :
303 : : /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
304 : : and update loop structures and dominators. Return true if we were able
305 : : to remove the path, false otherwise (and nothing is affected then). */
306 : : bool
307 : 399567 : remove_path (edge e, bool *irred_invalidated,
308 : : bitmap loop_closed_ssa_invalidated)
309 : : {
310 : 399567 : edge ae;
311 : 399567 : basic_block *rem_bbs, *bord_bbs, from, bb;
312 : 399567 : vec<basic_block> dom_bbs;
313 : 399567 : int i, nrem, n_bord_bbs;
314 : 399567 : bool local_irred_invalidated = false;
315 : 399567 : edge_iterator ei;
316 : 399567 : class loop *l, *f;
317 : :
318 : 399567 : if (! irred_invalidated)
319 : 141694 : irred_invalidated = &local_irred_invalidated;
320 : :
321 : 399567 : if (!can_remove_branch_p (e))
322 : : return false;
323 : :
324 : : /* Keep track of whether we need to update information about irreducible
325 : : regions. This is the case if the removed area is a part of the
326 : : irreducible region, or if the set of basic blocks that belong to a loop
327 : : that is inside an irreducible region is changed, or if such a loop is
328 : : removed. */
329 : 399567 : if (e->flags & EDGE_IRREDUCIBLE_LOOP)
330 : 371 : *irred_invalidated = true;
331 : :
332 : : /* We need to check whether basic blocks are dominated by the edge
333 : : e, but we only have basic block dominators. This is easy to
334 : : fix -- when e->dest has exactly one predecessor, this corresponds
335 : : to blocks dominated by e->dest, if not, split the edge. */
336 : 399567 : if (!single_pred_p (e->dest))
337 : 399567 : e = single_pred_edge (split_edge (e));
338 : :
339 : : /* It may happen that by removing path we remove one or more loops
340 : : we belong to. In this case first unloop the loops, then proceed
341 : : normally. We may assume that e->dest is not a header of any loop,
342 : : as it now has exactly one predecessor. */
343 : 677033 : for (l = e->src->loop_father; loop_outer (l); l = f)
344 : : {
345 : 277466 : f = loop_outer (l);
346 : 277466 : if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
347 : 0 : unloop (l, irred_invalidated, loop_closed_ssa_invalidated);
348 : : }
349 : :
350 : : /* Identify the path. */
351 : 399567 : nrem = find_path (e, &rem_bbs);
352 : :
353 : 399567 : n_bord_bbs = 0;
354 : 399567 : bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
355 : 399567 : auto_sbitmap seen (last_basic_block_for_fn (cfun));
356 : 399567 : bitmap_clear (seen);
357 : :
358 : : /* Find "border" hexes -- i.e. those with predecessor in removed path. */
359 : 1198701 : for (i = 0; i < nrem; i++)
360 : 399567 : bitmap_set_bit (seen, rem_bbs[i]->index);
361 : 399567 : if (!*irred_invalidated)
362 : 1197451 : FOR_EACH_EDGE (ae, ei, e->src->succs)
363 : 399196 : if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
364 : 399196 : && !bitmap_bit_p (seen, ae->dest->index)
365 : 1197564 : && ae->flags & EDGE_IRREDUCIBLE_LOOP)
366 : : {
367 : 113 : *irred_invalidated = true;
368 : 113 : break;
369 : : }
370 : :
371 : 799134 : for (i = 0; i < nrem; i++)
372 : : {
373 : 799134 : FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
374 : 399567 : if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
375 : 399567 : && !bitmap_bit_p (seen, ae->dest->index))
376 : : {
377 : 399567 : bitmap_set_bit (seen, ae->dest->index);
378 : 399567 : bord_bbs[n_bord_bbs++] = ae->dest;
379 : :
380 : 399567 : if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
381 : 371 : *irred_invalidated = true;
382 : : }
383 : : }
384 : :
385 : : /* Remove the path. */
386 : 399567 : from = e->src;
387 : 399567 : remove_branch (e);
388 : 399567 : dom_bbs.create (0);
389 : :
390 : : /* Cancel loops contained in the path. */
391 : 799134 : for (i = 0; i < nrem; i++)
392 : 399567 : if (rem_bbs[i]->loop_father->header == rem_bbs[i])
393 : 0 : cancel_loop_tree (rem_bbs[i]->loop_father);
394 : :
395 : 399567 : remove_bbs (rem_bbs, nrem);
396 : 399567 : free (rem_bbs);
397 : :
398 : : /* Find blocks whose dominators may be affected. */
399 : 399567 : bitmap_clear (seen);
400 : 799134 : for (i = 0; i < n_bord_bbs; i++)
401 : : {
402 : 399567 : basic_block ldom;
403 : :
404 : 399567 : bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
405 : 399567 : if (bitmap_bit_p (seen, bb->index))
406 : 0 : continue;
407 : 399567 : bitmap_set_bit (seen, bb->index);
408 : :
409 : 399567 : for (ldom = first_dom_son (CDI_DOMINATORS, bb);
410 : 1310117 : ldom;
411 : 910550 : ldom = next_dom_son (CDI_DOMINATORS, ldom))
412 : 910550 : if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
413 : 767472 : dom_bbs.safe_push (ldom);
414 : : }
415 : :
416 : : /* Recount dominators. */
417 : 399567 : iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
418 : 399567 : dom_bbs.release ();
419 : 399567 : free (bord_bbs);
420 : :
421 : : /* Fix placements of basic blocks inside loops and the placement of
422 : : loops in the loop tree. */
423 : 399567 : fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated);
424 : 399567 : fix_loop_placements (from->loop_father, irred_invalidated,
425 : : loop_closed_ssa_invalidated);
426 : :
427 : 399567 : if (local_irred_invalidated
428 : 399567 : && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
429 : 463 : mark_irreducible_loops ();
430 : :
431 : 399567 : return true;
432 : 399567 : }
433 : :
434 : : /* Creates place for a new LOOP in loops structure of FN. */
435 : :
436 : : void
437 : 735673 : place_new_loop (struct function *fn, class loop *loop)
438 : : {
439 : 735673 : loop->num = number_of_loops (fn);
440 : 735673 : vec_safe_push (loops_for_fn (fn)->larray, loop);
441 : 735673 : }
442 : :
443 : : /* Given LOOP structure with filled header and latch, find the body of the
444 : : corresponding loop and add it to loops tree. Insert the LOOP as a son of
445 : : outer. */
446 : :
447 : : void
448 : 115292 : add_loop (class loop *loop, class loop *outer)
449 : : {
450 : 115292 : basic_block *bbs;
451 : 115292 : int i, n;
452 : 115292 : class loop *subloop;
453 : 115292 : edge e;
454 : 115292 : edge_iterator ei;
455 : :
456 : : /* Add it to loop structure. */
457 : 115292 : place_new_loop (cfun, loop);
458 : 115292 : flow_loop_tree_node_add (outer, loop);
459 : :
460 : : /* Find its nodes. */
461 : 115292 : bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
462 : 115292 : n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
463 : :
464 : 832399 : for (i = 0; i < n; i++)
465 : : {
466 : 717107 : if (bbs[i]->loop_father == outer)
467 : : {
468 : 492682 : remove_bb_from_loops (bbs[i]);
469 : 492682 : add_bb_to_loop (bbs[i], loop);
470 : 492682 : continue;
471 : : }
472 : :
473 : 224425 : loop->num_nodes++;
474 : :
475 : : /* If we find a direct subloop of OUTER, move it to LOOP. */
476 : 224425 : subloop = bbs[i]->loop_father;
477 : 224425 : if (loop_outer (subloop) == outer
478 : 224425 : && subloop->header == bbs[i])
479 : : {
480 : 23673 : flow_loop_tree_node_remove (subloop);
481 : 23673 : flow_loop_tree_node_add (loop, subloop);
482 : : }
483 : : }
484 : :
485 : : /* Update the information about loop exit edges. */
486 : 832399 : for (i = 0; i < n; i++)
487 : : {
488 : 1767701 : FOR_EACH_EDGE (e, ei, bbs[i]->succs)
489 : : {
490 : 1050594 : rescan_loop_exit (e, false, false);
491 : : }
492 : : }
493 : :
494 : 115292 : free (bbs);
495 : 115292 : }
496 : :
497 : : /* Scale profile of loop by P. */
498 : :
499 : : void
500 : 232449 : scale_loop_frequencies (class loop *loop, profile_probability p)
501 : : {
502 : 232449 : basic_block *bbs;
503 : :
504 : 232449 : bbs = get_loop_body (loop);
505 : 232449 : scale_bbs_frequencies (bbs, loop->num_nodes, p);
506 : 232449 : free (bbs);
507 : 232449 : }
508 : :
509 : : /* Scales the frequencies of all basic blocks in LOOP that are strictly
510 : : dominated by BB by NUM/DEN. */
511 : :
512 : : void
513 : 32631 : scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
514 : : profile_count num, profile_count den)
515 : : {
516 : 32631 : basic_block son;
517 : :
518 : 32631 : if (!den.nonzero_p () && !(num == profile_count::zero ()))
519 : 0 : return;
520 : 32631 : auto_vec <basic_block, 8> worklist;
521 : 32631 : worklist.safe_push (bb);
522 : :
523 : 264412 : while (!worklist.is_empty ())
524 : 166519 : for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ());
525 : 332204 : son;
526 : 165685 : son = next_dom_son (CDI_DOMINATORS, son))
527 : : {
528 : 165685 : if (!flow_bb_inside_loop_p (loop, son))
529 : 31797 : continue;
530 : 133888 : son->count = son->count.apply_scale (num, den);
531 : 133888 : worklist.safe_push (son);
532 : : }
533 : 32631 : }
534 : :
535 : : /* Return exit that suitable for update when loop iterations
536 : : changed. */
537 : :
538 : : static edge
539 : 94673 : loop_exit_for_scaling (class loop *loop)
540 : : {
541 : 94673 : edge exit_edge = single_exit (loop);
542 : 94673 : if (!exit_edge)
543 : : {
544 : 6981 : auto_vec<edge> exits = get_loop_exit_edges (loop);
545 : 6981 : exit_edge = single_likely_exit (loop, exits);
546 : 6981 : }
547 : 94673 : return exit_edge;
548 : : }
549 : :
550 : : /* Assume that loop's entry count and profile up to a given EXIT_EDGE is
551 : : consistent. Update exit probability so loop exists with PROFILE_COUNT
552 : : and rescale profile of basic blocks inside loop dominated by EXIT_EDGE->src.
553 : :
554 : : This is useful after number of iteraitons of loop has changed.
555 : : If EXIT_EDGE is NULL, the function will try to identify suitable exit.
556 : : If DESIRED_COUNT is NULL, loop entry count will be used.
557 : : In consistent profile usually loop exists as many times as it is entred.
558 : :
559 : : Return updated exit if successfull and NULL otherwise. */
560 : :
561 : : edge
562 : 122489 : update_loop_exit_probability_scale_dom_bbs (class loop *loop,
563 : : edge exit_edge,
564 : : profile_count desired_count)
565 : : {
566 : 122489 : if (!exit_edge)
567 : 2223 : exit_edge = loop_exit_for_scaling (loop);
568 : 2223 : if (!exit_edge)
569 : : {
570 : 1477 : if (dump_file && (dump_flags & TDF_DETAILS))
571 : : {
572 : 210 : fprintf (dump_file, ";; Not updating exit probability of loop %i;"
573 : : " it has no single exit\n",
574 : : loop->num);
575 : : }
576 : 1477 : return NULL;
577 : : }
578 : : /* If exit is inside another loop, adjusting its probability will also
579 : : adjust number of the inner loop iterations. Just do noting for now. */
580 : 121012 : if (!just_once_each_iteration_p (loop, exit_edge->src))
581 : : {
582 : 9 : if (dump_file && (dump_flags & TDF_DETAILS))
583 : : {
584 : 0 : fprintf (dump_file, ";; Not updating exit probability of loop %i;"
585 : : " exit is inside inner loop\n",
586 : : loop->num);
587 : : }
588 : 9 : return NULL;
589 : : }
590 : : /* Normal loops exit as many times as they are entered. */
591 : 121003 : if (!desired_count.initialized_p ())
592 : 1457 : desired_count = loop_count_in (loop);
593 : : /* See if everything is already perfect. */
594 : 121003 : if (desired_count == exit_edge->count ())
595 : 5896 : return exit_edge;
596 : 115107 : profile_count old_exit_count = exit_edge->count ();
597 : : /* See if update is possible.
598 : : Avoid turning probability to 0 or 1 just trying to reach impossible
599 : : value.
600 : :
601 : : Natural profile update after some loop will happily scale header count to
602 : : be less than count of entering the loop. This is bad idea and they should
603 : : special case maybe_flat_loop_profile.
604 : :
605 : : It may also happen that the source basic block of the exit edge is
606 : : inside in-loop condition:
607 : :
608 : : +-> header
609 : : | |
610 : : | B1
611 : : | / \
612 : : | | B2--exit_edge-->
613 : : | \ /
614 : : | B3
615 : : +__/
616 : :
617 : : If B2 count is smaller than desired exit edge count
618 : : the profile was inconsistent with the newly discovered upper bound.
619 : : Probablity of edge B1->B2 is too low. We do not attempt to fix
620 : : that (as it is hard in general). */
621 : 115107 : if (desired_count > exit_edge->src->count
622 : 115107 : && exit_edge->src->count.differs_from_p (desired_count))
623 : : {
624 : 416 : if (dump_file)
625 : : {
626 : 3 : fprintf (dump_file, ";; Source bb of loop %i has count ",
627 : : loop->num);
628 : 3 : exit_edge->src->count.dump (dump_file, cfun);
629 : 3 : fprintf (dump_file,
630 : : " which is smaller then desired count of exitting loop ");
631 : 3 : desired_count.dump (dump_file, cfun);
632 : 3 : fprintf (dump_file, ". Profile update is impossible.\n");
633 : : }
634 : : /* Drop quality of probability since we know it likely
635 : : bad. */
636 : 416 : exit_edge->probability = exit_edge->probability.guessed ();
637 : 416 : return NULL;
638 : : }
639 : 114691 : if (!exit_edge->src->count.nonzero_p ())
640 : : {
641 : 1 : if (dump_file)
642 : : {
643 : 0 : fprintf (dump_file, ";; Not updating exit edge probability"
644 : : " in loop %i since profile is zero ",
645 : : loop->num);
646 : : }
647 : 1 : return NULL;
648 : : }
649 : 114690 : set_edge_probability_and_rescale_others
650 : 114690 : (exit_edge, desired_count.probability_in (exit_edge->src->count));
651 : : /* Rescale the remaining edge probabilities and see if there is only
652 : : one. */
653 : 114690 : edge other_edge = NULL;
654 : 114690 : bool found = false;
655 : 114690 : edge e;
656 : 114690 : edge_iterator ei;
657 : 344071 : FOR_EACH_EDGE (e, ei, exit_edge->src->succs)
658 : 229392 : if (!(e->flags & EDGE_FAKE)
659 : 229392 : && !loop_exit_edge_p (loop, e))
660 : : {
661 : 114701 : if (found)
662 : : {
663 : : other_edge = NULL;
664 : : break;
665 : : }
666 : : other_edge = e;
667 : : found = true;
668 : : }
669 : : /* If there is only loop latch after other edge,
670 : : update its profile. This is tiny bit more precise
671 : : than scaling. */
672 : 114690 : if (other_edge && other_edge->dest == loop->latch)
673 : : {
674 : 82742 : if (single_pred_p (loop->latch))
675 : 82742 : loop->latch->count = loop->latch->count
676 : 82742 : + old_exit_count - exit_edge->count ();
677 : : }
678 : : else
679 : : /* If there are multiple blocks, just scale. */
680 : 63896 : scale_dominated_blocks_in_loop (loop, exit_edge->src,
681 : 63896 : exit_edge->src->count - exit_edge->count (),
682 : 31948 : exit_edge->src->count - old_exit_count);
683 : : return exit_edge;
684 : : }
685 : :
686 : : /* Scale profile in LOOP by P.
687 : : If ITERATION_BOUND is not -1, scale even further if loop is predicted
688 : : to iterate too many times.
689 : : Before caling this function, preheader block profile should be already
690 : : scaled to final count. This is necessary because loop iterations are
691 : : determined by comparing header edge count to latch ege count and thus
692 : : they need to be scaled synchronously. */
693 : :
694 : : void
695 : 255812 : scale_loop_profile (class loop *loop, profile_probability p,
696 : : gcov_type iteration_bound)
697 : : {
698 : 255812 : if (!(p == profile_probability::always ()))
699 : : {
700 : 71502 : if (dump_file && (dump_flags & TDF_DETAILS))
701 : : {
702 : 9901 : fprintf (dump_file, ";; Scaling loop %i with scale ",
703 : : loop->num);
704 : 9901 : p.dump (dump_file);
705 : 9901 : fprintf (dump_file, "\n");
706 : : }
707 : :
708 : : /* Scale the probabilities. */
709 : 71502 : scale_loop_frequencies (loop, p);
710 : : }
711 : :
712 : 255812 : if (iteration_bound == -1)
713 : 163362 : return;
714 : :
715 : 208351 : sreal iterations;
716 : 208351 : if (!expected_loop_iterations_by_profile (loop, &iterations))
717 : : return;
718 : :
719 : 207791 : if (dump_file && (dump_flags & TDF_DETAILS))
720 : : {
721 : 13972 : fprintf (dump_file,
722 : : ";; Guessed iterations of loop %i is %f. New upper bound %i.\n",
723 : : loop->num,
724 : : iterations.to_double (),
725 : : (int)iteration_bound);
726 : : }
727 : :
728 : : /* See if loop is predicted to iterate too many times. */
729 : 207791 : if (iterations <= (sreal)iteration_bound)
730 : : return;
731 : :
732 : 92450 : profile_count count_in = loop_count_in (loop);
733 : :
734 : : /* Now scale the loop body so header count is
735 : : count_in * (iteration_bound + 1) */
736 : 92450 : profile_probability scale_prob
737 : 92450 : = (count_in * (iteration_bound + 1)).probability_in (loop->header->count);
738 : 92450 : if (dump_file && (dump_flags & TDF_DETAILS))
739 : : {
740 : 9829 : fprintf (dump_file, ";; Scaling loop %i with scale ",
741 : : loop->num);
742 : 9829 : scale_prob.dump (dump_file);
743 : 9829 : fprintf (dump_file, " to reach upper bound %i\n",
744 : : (int)iteration_bound);
745 : : }
746 : : /* Finally attempt to fix exit edge probability. */
747 : 92450 : edge exit_edge = loop_exit_for_scaling (loop);
748 : :
749 : : /* In a consistent profile unadjusted_exit_count should be same as count_in,
750 : : however to preserve as much of the original info, avoid recomputing
751 : : it. */
752 : 92450 : profile_count unadjusted_exit_count = profile_count::uninitialized ();
753 : 92450 : if (exit_edge)
754 : 90973 : unadjusted_exit_count = exit_edge->count ();
755 : 92450 : scale_loop_frequencies (loop, scale_prob);
756 : 92450 : update_loop_exit_probability_scale_dom_bbs (loop, exit_edge,
757 : : unadjusted_exit_count);
758 : : }
759 : :
760 : : /* Recompute dominance information for basic blocks outside LOOP. */
761 : :
762 : : static void
763 : 68341 : update_dominators_in_loop (class loop *loop)
764 : : {
765 : 68341 : vec<basic_block> dom_bbs = vNULL;
766 : 68341 : basic_block *body;
767 : 68341 : unsigned i;
768 : :
769 : 68341 : auto_sbitmap seen (last_basic_block_for_fn (cfun));
770 : 68341 : bitmap_clear (seen);
771 : 68341 : body = get_loop_body (loop);
772 : :
773 : 490252 : for (i = 0; i < loop->num_nodes; i++)
774 : 353570 : bitmap_set_bit (seen, body[i]->index);
775 : :
776 : 421911 : for (i = 0; i < loop->num_nodes; i++)
777 : : {
778 : 353570 : basic_block ldom;
779 : :
780 : 353570 : for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
781 : 673472 : ldom;
782 : 319902 : ldom = next_dom_son (CDI_DOMINATORS, ldom))
783 : 319902 : if (!bitmap_bit_p (seen, ldom->index))
784 : : {
785 : 34673 : bitmap_set_bit (seen, ldom->index);
786 : 34673 : dom_bbs.safe_push (ldom);
787 : : }
788 : : }
789 : :
790 : 68341 : iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
791 : 68341 : free (body);
792 : 68341 : dom_bbs.release ();
793 : 68341 : }
794 : :
795 : : /* Creates an if region as shown above. CONDITION is used to create
796 : : the test for the if.
797 : :
798 : : |
799 : : | ------------- -------------
800 : : | | pred_bb | | pred_bb |
801 : : | ------------- -------------
802 : : | | |
803 : : | | | ENTRY_EDGE
804 : : | | ENTRY_EDGE V
805 : : | | ====> -------------
806 : : | | | cond_bb |
807 : : | | | CONDITION |
808 : : | | -------------
809 : : | V / \
810 : : | ------------- e_false / \ e_true
811 : : | | succ_bb | V V
812 : : | ------------- ----------- -----------
813 : : | | false_bb | | true_bb |
814 : : | ----------- -----------
815 : : | \ /
816 : : | \ /
817 : : | V V
818 : : | -------------
819 : : | | join_bb |
820 : : | -------------
821 : : | | exit_edge (result)
822 : : | V
823 : : | -----------
824 : : | | succ_bb |
825 : : | -----------
826 : : |
827 : : */
828 : :
829 : : edge
830 : 201 : create_empty_if_region_on_edge (edge entry_edge, tree condition)
831 : : {
832 : :
833 : 201 : basic_block cond_bb, true_bb, false_bb, join_bb;
834 : 201 : edge e_true, e_false, exit_edge;
835 : 201 : gcond *cond_stmt;
836 : 201 : tree simple_cond;
837 : 201 : gimple_stmt_iterator gsi;
838 : :
839 : 201 : cond_bb = split_edge (entry_edge);
840 : :
841 : : /* Insert condition in cond_bb. */
842 : 201 : gsi = gsi_last_bb (cond_bb);
843 : 201 : simple_cond =
844 : 201 : force_gimple_operand_gsi (&gsi, condition, true, NULL,
845 : : false, GSI_NEW_STMT);
846 : 201 : cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
847 : 201 : gsi = gsi_last_bb (cond_bb);
848 : 201 : gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
849 : :
850 : 201 : join_bb = split_edge (single_succ_edge (cond_bb));
851 : :
852 : 201 : e_true = single_succ_edge (cond_bb);
853 : 201 : true_bb = split_edge (e_true);
854 : :
855 : 201 : e_false = make_edge (cond_bb, join_bb, 0);
856 : 201 : false_bb = split_edge (e_false);
857 : :
858 : 201 : e_true->flags &= ~EDGE_FALLTHRU;
859 : 201 : e_true->flags |= EDGE_TRUE_VALUE;
860 : 201 : e_false->flags &= ~EDGE_FALLTHRU;
861 : 201 : e_false->flags |= EDGE_FALSE_VALUE;
862 : :
863 : 201 : set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
864 : 201 : set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
865 : 201 : set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
866 : 201 : set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
867 : :
868 : 201 : exit_edge = single_succ_edge (join_bb);
869 : :
870 : 201 : if (single_pred_p (exit_edge->dest))
871 : 201 : set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
872 : :
873 : 201 : return exit_edge;
874 : : }
875 : :
876 : : /* create_empty_loop_on_edge
877 : : |
878 : : | - pred_bb - ------ pred_bb ------
879 : : | | | | iv0 = initial_value |
880 : : | -----|----- ---------|-----------
881 : : | | ______ | entry_edge
882 : : | | entry_edge / | |
883 : : | | ====> | -V---V- loop_header -------------
884 : : | V | | iv_before = phi (iv0, iv_after) |
885 : : | - succ_bb - | ---|-----------------------------
886 : : | | | | |
887 : : | ----------- | ---V--- loop_body ---------------
888 : : | | | iv_after = iv_before + stride |
889 : : | | | if (iv_before < upper_bound) |
890 : : | | ---|--------------\--------------
891 : : | | | \ exit_e
892 : : | | V \
893 : : | | - loop_latch - V- succ_bb -
894 : : | | | | | |
895 : : | | /------------- -----------
896 : : | \ ___ /
897 : :
898 : : Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
899 : : that is used before the increment of IV. IV_BEFORE should be used for
900 : : adding code to the body that uses the IV. OUTER is the outer loop in
901 : : which the new loop should be inserted.
902 : :
903 : : Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
904 : : inserted on the loop entry edge. This implies that this function
905 : : should be used only when the UPPER_BOUND expression is a loop
906 : : invariant. */
907 : :
908 : : class loop *
909 : 473 : create_empty_loop_on_edge (edge entry_edge,
910 : : tree initial_value,
911 : : tree stride, tree upper_bound,
912 : : tree iv,
913 : : tree *iv_before,
914 : : tree *iv_after,
915 : : class loop *outer)
916 : : {
917 : 473 : basic_block loop_header, loop_latch, succ_bb, pred_bb;
918 : 473 : class loop *loop;
919 : 473 : gimple_stmt_iterator gsi;
920 : 473 : gimple_seq stmts;
921 : 473 : gcond *cond_expr;
922 : 473 : tree exit_test;
923 : 473 : edge exit_e;
924 : :
925 : 473 : gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
926 : :
927 : : /* Create header, latch and wire up the loop. */
928 : 473 : pred_bb = entry_edge->src;
929 : 473 : loop_header = split_edge (entry_edge);
930 : 473 : loop_latch = split_edge (single_succ_edge (loop_header));
931 : 473 : succ_bb = single_succ (loop_latch);
932 : 473 : make_edge (loop_header, succ_bb, 0);
933 : 473 : redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
934 : :
935 : : /* Set immediate dominator information. */
936 : 473 : set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
937 : 473 : set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
938 : 473 : set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
939 : :
940 : : /* Initialize a loop structure and put it in a loop hierarchy. */
941 : 473 : loop = alloc_loop ();
942 : 473 : loop->header = loop_header;
943 : 473 : loop->latch = loop_latch;
944 : 473 : add_loop (loop, outer);
945 : :
946 : : /* TODO: Fix counts. */
947 : 473 : scale_loop_frequencies (loop, profile_probability::even ());
948 : :
949 : : /* Update dominators. */
950 : 473 : update_dominators_in_loop (loop);
951 : :
952 : : /* Modify edge flags. */
953 : 473 : exit_e = single_exit (loop);
954 : 473 : exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
955 : 473 : single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
956 : :
957 : : /* Construct IV code in loop. */
958 : 473 : initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
959 : 473 : if (stmts)
960 : : {
961 : 17 : gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
962 : 17 : gsi_commit_edge_inserts ();
963 : : }
964 : :
965 : 473 : upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
966 : 473 : if (stmts)
967 : : {
968 : 161 : gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
969 : 161 : gsi_commit_edge_inserts ();
970 : : }
971 : :
972 : 473 : gsi = gsi_last_bb (loop_header);
973 : 473 : create_iv (initial_value, PLUS_EXPR, stride, iv, loop, &gsi, false,
974 : : iv_before, iv_after);
975 : :
976 : : /* Insert loop exit condition. */
977 : 473 : cond_expr = gimple_build_cond
978 : 473 : (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
979 : :
980 : 473 : exit_test = gimple_cond_lhs (cond_expr);
981 : 473 : exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
982 : : false, GSI_NEW_STMT);
983 : 473 : gimple_cond_set_lhs (cond_expr, exit_test);
984 : 473 : gsi = gsi_last_bb (exit_e->src);
985 : 473 : gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
986 : :
987 : 473 : split_block_after_labels (loop_header);
988 : :
989 : 473 : return loop;
990 : : }
991 : :
992 : : /* Remove the latch edge of a LOOP and update loops to indicate that
993 : : the LOOP was removed. After this function, original loop latch will
994 : : have no successor, which caller is expected to fix somehow.
995 : :
996 : : If this may cause the information about irreducible regions to become
997 : : invalid, IRRED_INVALIDATED is set to true.
998 : :
999 : : LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
1000 : : basic blocks that had non-trivial update on their loop_father.*/
1001 : :
1002 : : void
1003 : 121867 : unloop (class loop *loop, bool *irred_invalidated,
1004 : : bitmap loop_closed_ssa_invalidated)
1005 : : {
1006 : 121867 : basic_block *body;
1007 : 121867 : class loop *ploop;
1008 : 121867 : unsigned i, n;
1009 : 121867 : basic_block latch = loop->latch;
1010 : 121867 : bool dummy = false;
1011 : :
1012 : 121867 : if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
1013 : 20 : *irred_invalidated = true;
1014 : :
1015 : : /* This is relatively straightforward. The dominators are unchanged, as
1016 : : loop header dominates loop latch, so the only thing we have to care of
1017 : : is the placement of loops and basic blocks inside the loop tree. We
1018 : : move them all to the loop->outer, and then let fix_bb_placements do
1019 : : its work. */
1020 : :
1021 : 121867 : body = get_loop_body (loop);
1022 : 121867 : n = loop->num_nodes;
1023 : 502996 : for (i = 0; i < n; i++)
1024 : 381129 : if (body[i]->loop_father == loop)
1025 : : {
1026 : 362626 : remove_bb_from_loops (body[i]);
1027 : 362626 : add_bb_to_loop (body[i], loop_outer (loop));
1028 : : }
1029 : 121867 : free (body);
1030 : :
1031 : 124084 : while (loop->inner)
1032 : : {
1033 : 2217 : ploop = loop->inner;
1034 : 2217 : flow_loop_tree_node_remove (ploop);
1035 : 2217 : flow_loop_tree_node_add (loop_outer (loop), ploop);
1036 : : }
1037 : :
1038 : : /* Remove the loop and free its data. */
1039 : 121867 : delete_loop (loop);
1040 : :
1041 : 121867 : remove_edge (single_succ_edge (latch));
1042 : :
1043 : : /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
1044 : : there is an irreducible region inside the cancelled loop, the flags will
1045 : : be still correct. */
1046 : 121867 : fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
1047 : 121867 : }
1048 : :
1049 : : /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
1050 : : condition stated in description of fix_loop_placement holds for them.
1051 : : It is used in case when we removed some edges coming out of LOOP, which
1052 : : may cause the right placement of LOOP inside loop tree to change.
1053 : :
1054 : : IRRED_INVALIDATED is set to true if a change in the loop structures might
1055 : : invalidate the information about irreducible regions. */
1056 : :
1057 : : static void
1058 : 399567 : fix_loop_placements (class loop *loop, bool *irred_invalidated,
1059 : : bitmap loop_closed_ssa_invalidated)
1060 : : {
1061 : 399567 : class loop *outer;
1062 : :
1063 : 399571 : while (loop_outer (loop))
1064 : : {
1065 : 181129 : outer = loop_outer (loop);
1066 : 181129 : if (!fix_loop_placement (loop, irred_invalidated,
1067 : : loop_closed_ssa_invalidated))
1068 : : break;
1069 : :
1070 : : /* Changing the placement of a loop in the loop tree may alter the
1071 : : validity of condition 2) of the description of fix_bb_placement
1072 : : for its preheader, because the successor is the header and belongs
1073 : : to the loop. So call fix_bb_placements to fix up the placement
1074 : : of the preheader and (possibly) of its predecessors. */
1075 : 4 : fix_bb_placements (loop_preheader_edge (loop)->src,
1076 : : irred_invalidated, loop_closed_ssa_invalidated);
1077 : 4 : loop = outer;
1078 : : }
1079 : 399567 : }
1080 : :
1081 : : /* Duplicate loop bounds and other information we store about
1082 : : the loop into its duplicate. */
1083 : :
1084 : : void
1085 : 617484 : copy_loop_info (class loop *loop, class loop *target)
1086 : : {
1087 : 617484 : gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1088 : 617484 : target->any_upper_bound = loop->any_upper_bound;
1089 : 617484 : target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1090 : 617484 : target->any_likely_upper_bound = loop->any_likely_upper_bound;
1091 : 617484 : target->nb_iterations_likely_upper_bound
1092 : 617484 : = loop->nb_iterations_likely_upper_bound;
1093 : 617484 : target->any_estimate = loop->any_estimate;
1094 : 617484 : target->nb_iterations_estimate = loop->nb_iterations_estimate;
1095 : 617484 : target->estimate_state = loop->estimate_state;
1096 : 617484 : target->safelen = loop->safelen;
1097 : 617484 : target->simdlen = loop->simdlen;
1098 : 617484 : target->constraints = loop->constraints;
1099 : 617484 : target->can_be_parallel = loop->can_be_parallel;
1100 : 617484 : target->warned_aggressive_loop_optimizations
1101 : 617484 : |= loop->warned_aggressive_loop_optimizations;
1102 : 617484 : target->dont_vectorize = loop->dont_vectorize;
1103 : 617484 : target->force_vectorize = loop->force_vectorize;
1104 : 617484 : target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
1105 : 617484 : target->finite_p = loop->finite_p;
1106 : 617484 : target->unroll = loop->unroll;
1107 : 617484 : target->owned_clique = loop->owned_clique;
1108 : 617484 : }
1109 : :
1110 : : /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1111 : : created loop into loops structure. If AFTER is non-null
1112 : : the new loop is added at AFTER->next, otherwise in front of TARGETs
1113 : : sibling list. */
1114 : : class loop *
1115 : 37443 : duplicate_loop (class loop *loop, class loop *target, class loop *after)
1116 : : {
1117 : 37443 : class loop *cloop;
1118 : 37443 : cloop = alloc_loop ();
1119 : 37443 : place_new_loop (cfun, cloop);
1120 : :
1121 : 37443 : copy_loop_info (loop, cloop);
1122 : :
1123 : : /* Mark the new loop as copy of LOOP. */
1124 : 37443 : set_loop_copy (loop, cloop);
1125 : :
1126 : : /* Add it to target. */
1127 : 37443 : flow_loop_tree_node_add (target, cloop, after);
1128 : :
1129 : 37443 : return cloop;
1130 : : }
1131 : :
1132 : : /* Copies structure of subloops of LOOP into TARGET loop, placing
1133 : : newly created loops into loop tree at the end of TARGETs sibling
1134 : : list in the original order. */
1135 : : void
1136 : 37443 : duplicate_subloops (class loop *loop, class loop *target)
1137 : : {
1138 : 37443 : class loop *aloop, *cloop, *tail;
1139 : :
1140 : 37443 : for (tail = target->inner; tail && tail->next; tail = tail->next)
1141 : : ;
1142 : 38820 : for (aloop = loop->inner; aloop; aloop = aloop->next)
1143 : : {
1144 : 1377 : cloop = duplicate_loop (aloop, target, tail);
1145 : 1377 : tail = cloop;
1146 : 1377 : gcc_assert(!tail->next);
1147 : 1377 : duplicate_subloops (aloop, cloop);
1148 : : }
1149 : 37443 : }
1150 : :
1151 : : /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1152 : : into TARGET loop, placing newly created loops into loop tree adding
1153 : : them to TARGETs sibling list at the end in order. */
1154 : : static void
1155 : 496390 : copy_loops_to (class loop **copied_loops, int n, class loop *target)
1156 : : {
1157 : 496390 : class loop *aloop, *tail;
1158 : 496390 : int i;
1159 : :
1160 : 8079580 : for (tail = target->inner; tail && tail->next; tail = tail->next)
1161 : : ;
1162 : 500735 : for (i = 0; i < n; i++)
1163 : : {
1164 : 4345 : aloop = duplicate_loop (copied_loops[i], target, tail);
1165 : 4345 : tail = aloop;
1166 : 4345 : gcc_assert(!tail->next);
1167 : 4345 : duplicate_subloops (copied_loops[i], aloop);
1168 : : }
1169 : 496390 : }
1170 : :
1171 : : /* Redirects edge E to basic block DEST. */
1172 : : static void
1173 : 33934 : loop_redirect_edge (edge e, basic_block dest)
1174 : : {
1175 : 0 : if (e->dest == dest)
1176 : : return;
1177 : :
1178 : 33934 : redirect_edge_and_branch_force (e, dest);
1179 : : }
1180 : :
1181 : : /* Check whether LOOP's body can be duplicated. */
1182 : : bool
1183 : 482777 : can_duplicate_loop_p (const class loop *loop)
1184 : : {
1185 : 482777 : int ret;
1186 : 482777 : basic_block *bbs = get_loop_body (loop);
1187 : :
1188 : 482777 : ret = can_copy_bbs_p (bbs, loop->num_nodes);
1189 : 482777 : free (bbs);
1190 : :
1191 : 482777 : return ret;
1192 : : }
1193 : :
1194 : : /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1195 : : loop structure and dominators (order of inner subloops is retained).
1196 : : E's destination must be LOOP header for this to work, i.e. it must be entry
1197 : : or latch edge of this loop; these are unique, as the loops must have
1198 : : preheaders for this function to work correctly (in case E is latch, the
1199 : : function unrolls the loop, if E is entry edge, it peels the loop). Store
1200 : : edges created by copying ORIG edge from copies corresponding to set bits in
1201 : : WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
1202 : : are numbered in order given by control flow through them) into TO_REMOVE
1203 : : array. Returns false if duplication is
1204 : : impossible. */
1205 : :
1206 : : bool
1207 : 244218 : duplicate_loop_body_to_header_edge (class loop *loop, edge e,
1208 : : unsigned int ndupl, sbitmap wont_exit,
1209 : : edge orig, vec<edge> *to_remove, int flags)
1210 : : {
1211 : 244218 : class loop *target, *aloop;
1212 : 244218 : class loop **orig_loops;
1213 : 244218 : unsigned n_orig_loops;
1214 : 244218 : basic_block header = loop->header, latch = loop->latch;
1215 : 244218 : basic_block *new_bbs, *bbs, *first_active;
1216 : 244218 : basic_block new_bb, bb, first_active_latch = NULL;
1217 : 244218 : edge ae, latch_edge;
1218 : 244218 : edge spec_edges[2], new_spec_edges[2];
1219 : 244218 : const int SE_LATCH = 0;
1220 : 244218 : const int SE_ORIG = 1;
1221 : 244218 : unsigned i, j, n;
1222 : 244218 : int is_latch = (latch == e->src);
1223 : 244218 : profile_probability *scale_step = NULL;
1224 : 244218 : profile_probability scale_main = profile_probability::always ();
1225 : 244218 : profile_probability scale_act = profile_probability::always ();
1226 : 244218 : profile_count after_exit_num = profile_count::zero (),
1227 : 244218 : after_exit_den = profile_count::zero ();
1228 : 244218 : bool scale_after_exit = false;
1229 : 244218 : int add_irreducible_flag;
1230 : 244218 : basic_block place_after;
1231 : 244218 : bitmap bbs_to_scale = NULL;
1232 : 244218 : bitmap_iterator bi;
1233 : :
1234 : 244218 : gcc_assert (e->dest == loop->header);
1235 : 244218 : gcc_assert (ndupl > 0);
1236 : :
1237 : 244218 : if (orig)
1238 : : {
1239 : : /* Orig must be edge out of the loop. */
1240 : 201682 : gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1241 : 201682 : gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1242 : : }
1243 : :
1244 : 244218 : n = loop->num_nodes;
1245 : 244218 : bbs = get_loop_body_in_dom_order (loop);
1246 : 244218 : gcc_assert (bbs[0] == loop->header);
1247 : 244218 : gcc_assert (bbs[n - 1] == loop->latch);
1248 : :
1249 : : /* Check whether duplication is possible. */
1250 : 244218 : if (!can_copy_bbs_p (bbs, loop->num_nodes))
1251 : : {
1252 : 19 : free (bbs);
1253 : 19 : return false;
1254 : : }
1255 : 244199 : new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1256 : :
1257 : : /* In case we are doing loop peeling and the loop is in the middle of
1258 : : irreducible region, the peeled copies will be inside it too. */
1259 : 244199 : add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1260 : 244199 : gcc_assert (!is_latch || !add_irreducible_flag);
1261 : :
1262 : : /* Find edge from latch. */
1263 : 244199 : latch_edge = loop_latch_edge (loop);
1264 : :
1265 : 244199 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1266 : : {
1267 : : /* Calculate coefficients by that we have to scale counts
1268 : : of duplicated loop bodies. */
1269 : 210265 : profile_count count_in = header->count;
1270 : 210265 : profile_count count_le = latch_edge->count ();
1271 : 210265 : profile_count count_out_orig = orig ? orig->count () : count_in - count_le;
1272 : 210265 : profile_probability prob_pass_thru = count_le.probability_in (count_in);
1273 : 210265 : profile_count new_count_le = count_le + count_out_orig;
1274 : :
1275 : 201673 : if (orig && orig->probability.initialized_p ()
1276 : 411860 : && !(orig->probability == profile_probability::always ()))
1277 : : {
1278 : : /* The blocks that are dominated by a removed exit edge ORIG have
1279 : : frequencies scaled by this. */
1280 : 201522 : if (orig->count ().initialized_p ())
1281 : : {
1282 : 201522 : after_exit_num = orig->src->count;
1283 : 201522 : after_exit_den = after_exit_num - orig->count ();
1284 : 201522 : scale_after_exit = true;
1285 : : }
1286 : 201522 : bbs_to_scale = BITMAP_ALLOC (NULL);
1287 : 715380 : for (i = 0; i < n; i++)
1288 : : {
1289 : 513858 : if (bbs[i] != orig->src
1290 : 513858 : && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1291 : 238077 : bitmap_set_bit (bbs_to_scale, i);
1292 : : }
1293 : : /* Since we will scale up all basic blocks dominated by orig, exits
1294 : : will become more likely; compensate for that. */
1295 : 201522 : if (after_exit_den.nonzero_p ())
1296 : : {
1297 : 200875 : auto_vec<edge> exits = get_loop_exit_edges (loop);
1298 : 865365 : for (edge ex : exits)
1299 : 262740 : if (ex != orig
1300 : 262740 : && dominated_by_p (CDI_DOMINATORS, ex->src, orig->src))
1301 : 37246 : new_count_le -= ex->count ().apply_scale (after_exit_num
1302 : : - after_exit_den,
1303 : 18623 : after_exit_den);
1304 : 200875 : }
1305 : : }
1306 : 210265 : profile_probability prob_pass_wont_exit =
1307 : 210265 : new_count_le.probability_in (count_in);
1308 : : /* If profile count is 0, the probability will be uninitialized.
1309 : : We can set probability to any initialized value to avoid
1310 : : precision loss. If profile is sane, all counts will be 0 anyway. */
1311 : 210265 : if (!count_in.nonzero_p ())
1312 : : {
1313 : 556 : prob_pass_thru
1314 : 556 : = profile_probability::always ().apply_scale (1, 2);
1315 : 556 : prob_pass_wont_exit
1316 : 556 : = profile_probability::always ().apply_scale (1, 2);
1317 : : }
1318 : :
1319 : 210265 : scale_step = XNEWVEC (profile_probability, ndupl);
1320 : :
1321 : 672721 : for (i = 1; i <= ndupl; i++)
1322 : 924912 : scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1323 : 462456 : ? prob_pass_wont_exit
1324 : : : prob_pass_thru;
1325 : :
1326 : : /* Complete peeling is special as the probability of exit in last
1327 : : copy becomes 1. */
1328 : 420445 : if (!count_in.nonzero_p ())
1329 : : ;
1330 : 209709 : else if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1331 : : {
1332 : 95271 : profile_count wanted_count = e->count ();
1333 : :
1334 : 95271 : gcc_assert (!is_latch);
1335 : : /* First copy has count of incoming edge. Each subsequent
1336 : : count should be reduced by prob_pass_wont_exit. Caller
1337 : : should've managed the flags so all except for original loop
1338 : : has won't exist set. */
1339 : 95271 : scale_act = wanted_count.probability_in (count_in);
1340 : :
1341 : : /* Now simulate the duplication adjustments and compute header
1342 : : frequency of the last copy. */
1343 : 381337 : for (i = 0; i < ndupl; i++)
1344 : 286066 : wanted_count = wanted_count.apply_probability (scale_step [i]);
1345 : 95271 : scale_main = wanted_count.probability_in (count_in);
1346 : : }
1347 : : /* Here we insert loop bodies inside the loop itself (for loop unrolling).
1348 : : First iteration will be original loop followed by duplicated bodies.
1349 : : It is necessary to scale down the original so we get right overall
1350 : : number of iterations. */
1351 : 114438 : else if (is_latch)
1352 : : {
1353 : 48626 : profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
1354 : 48626 : ? prob_pass_wont_exit
1355 : : : prob_pass_thru;
1356 : 48626 : if (!(flags & DLTHE_FLAG_FLAT_PROFILE))
1357 : : {
1358 : 21144 : profile_probability p = prob_pass_main;
1359 : 21144 : profile_count scale_main_den = count_in;
1360 : 83638 : for (i = 0; i < ndupl; i++)
1361 : : {
1362 : 62494 : scale_main_den += count_in.apply_probability (p);
1363 : 62494 : p = p * scale_step[i];
1364 : : }
1365 : : /* If original loop is executed COUNT_IN times, the unrolled
1366 : : loop will account SCALE_MAIN_DEN times. */
1367 : 21144 : scale_main = count_in.probability_in (scale_main_den);
1368 : : }
1369 : : else
1370 : : scale_main = profile_probability::always ();
1371 : 48626 : scale_act = scale_main * prob_pass_main;
1372 : : }
1373 : : else
1374 : : {
1375 : 65812 : profile_count preheader_count = e->count ();
1376 : 141588 : for (i = 0; i < ndupl; i++)
1377 : 75776 : scale_main = scale_main * scale_step[i];
1378 : 65812 : scale_act = preheader_count.probability_in (count_in);
1379 : : }
1380 : : }
1381 : :
1382 : : /* Loop the new bbs will belong to. */
1383 : 244199 : target = e->src->loop_father;
1384 : :
1385 : : /* Original loops. */
1386 : 244199 : n_orig_loops = 0;
1387 : 247804 : for (aloop = loop->inner; aloop; aloop = aloop->next)
1388 : 3605 : n_orig_loops++;
1389 : 244199 : orig_loops = XNEWVEC (class loop *, n_orig_loops);
1390 : 247804 : for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1391 : 3605 : orig_loops[i] = aloop;
1392 : :
1393 : 244199 : set_loop_copy (loop, target);
1394 : :
1395 : 244199 : first_active = XNEWVEC (basic_block, n);
1396 : 244199 : if (is_latch)
1397 : : {
1398 : 48758 : memcpy (first_active, bbs, n * sizeof (basic_block));
1399 : 48758 : first_active_latch = latch;
1400 : : }
1401 : :
1402 : 244199 : spec_edges[SE_ORIG] = orig;
1403 : 244199 : spec_edges[SE_LATCH] = latch_edge;
1404 : :
1405 : 244199 : place_after = e->src;
1406 : 740589 : for (j = 0; j < ndupl; j++)
1407 : : {
1408 : : /* Copy loops. */
1409 : 496390 : copy_loops_to (orig_loops, n_orig_loops, target);
1410 : :
1411 : : /* Copy bbs. */
1412 : 496390 : copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1413 : : place_after, true);
1414 : 496390 : place_after = new_spec_edges[SE_LATCH]->src;
1415 : :
1416 : 496390 : if (flags & DLTHE_RECORD_COPY_NUMBER)
1417 : 337289 : for (i = 0; i < n; i++)
1418 : : {
1419 : 236985 : gcc_assert (!new_bbs[i]->aux);
1420 : 236985 : new_bbs[i]->aux = (void *)(size_t)(j + 1);
1421 : : }
1422 : :
1423 : : /* Note whether the blocks and edges belong to an irreducible loop. */
1424 : 496390 : if (add_irreducible_flag)
1425 : : {
1426 : 1426 : for (i = 0; i < n; i++)
1427 : 1062 : new_bbs[i]->flags |= BB_DUPLICATED;
1428 : 1426 : for (i = 0; i < n; i++)
1429 : : {
1430 : 1062 : edge_iterator ei;
1431 : 1062 : new_bb = new_bbs[i];
1432 : 1062 : if (new_bb->loop_father == target)
1433 : 1048 : new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1434 : :
1435 : 2808 : FOR_EACH_EDGE (ae, ei, new_bb->succs)
1436 : 1746 : if ((ae->dest->flags & BB_DUPLICATED)
1437 : 1069 : && (ae->src->loop_father == target
1438 : 21 : || ae->dest->loop_father == target))
1439 : 1055 : ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1440 : : }
1441 : 1426 : for (i = 0; i < n; i++)
1442 : 1062 : new_bbs[i]->flags &= ~BB_DUPLICATED;
1443 : : }
1444 : :
1445 : : /* Redirect the special edges. */
1446 : 496390 : if (is_latch)
1447 : : {
1448 : 100005 : redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1449 : 100005 : redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1450 : : loop->header);
1451 : 100005 : set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1452 : 100005 : latch = loop->latch = new_bbs[n - 1];
1453 : 100005 : e = latch_edge = new_spec_edges[SE_LATCH];
1454 : : }
1455 : : else
1456 : : {
1457 : 396385 : redirect_edge_and_branch_force (e, new_bbs[0]);
1458 : 396385 : redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1459 : : loop->header);
1460 : 396385 : set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1461 : 396385 : e = new_spec_edges[SE_LATCH];
1462 : : }
1463 : :
1464 : : /* Record exit edge in this copy. */
1465 : 496390 : if (orig && bitmap_bit_p (wont_exit, j + 1))
1466 : : {
1467 : 350982 : if (to_remove)
1468 : 350982 : to_remove->safe_push (new_spec_edges[SE_ORIG]);
1469 : 350982 : force_edge_cold (new_spec_edges[SE_ORIG], true);
1470 : :
1471 : : /* Scale the frequencies of the blocks dominated by the exit. */
1472 : 350982 : if (bbs_to_scale && scale_after_exit)
1473 : : {
1474 : 790972 : EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1475 : 440328 : scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num,
1476 : : after_exit_den);
1477 : : }
1478 : : }
1479 : :
1480 : : /* Record the first copy in the control flow order if it is not
1481 : : the original loop (i.e. in case of peeling). */
1482 : 496390 : if (!first_active_latch)
1483 : : {
1484 : 195441 : memcpy (first_active, new_bbs, n * sizeof (basic_block));
1485 : 195441 : first_active_latch = new_bbs[n - 1];
1486 : : }
1487 : :
1488 : : /* Set counts and frequencies. */
1489 : 496390 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1490 : : {
1491 : 462456 : scale_bbs_frequencies (new_bbs, n, scale_act);
1492 : 462456 : scale_act = scale_act * scale_step[j];
1493 : : }
1494 : : }
1495 : 244199 : free (new_bbs);
1496 : 244199 : free (orig_loops);
1497 : :
1498 : : /* Record the exit edge in the original loop body, and update the frequencies. */
1499 : 445872 : if (orig && bitmap_bit_p (wont_exit, 0))
1500 : : {
1501 : 47902 : if (to_remove)
1502 : 47902 : to_remove->safe_push (orig);
1503 : 47902 : force_edge_cold (orig, true);
1504 : :
1505 : : /* Scale the frequencies of the blocks dominated by the exit. */
1506 : 47902 : if (bbs_to_scale && scale_after_exit)
1507 : : {
1508 : 95709 : EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1509 : 47859 : scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num,
1510 : : after_exit_den);
1511 : : }
1512 : : }
1513 : :
1514 : : /* Update the original loop. */
1515 : 244199 : if (!is_latch)
1516 : 195441 : set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1517 : 244199 : if (flags & DLTHE_FLAG_UPDATE_FREQ)
1518 : : {
1519 : 210265 : scale_bbs_frequencies (bbs, n, scale_main);
1520 : 210265 : free (scale_step);
1521 : : }
1522 : :
1523 : : /* Update dominators of outer blocks if affected. */
1524 : 959359 : for (i = 0; i < n; i++)
1525 : : {
1526 : 715160 : basic_block dominated, dom_bb;
1527 : 715160 : unsigned j;
1528 : :
1529 : 715160 : bb = bbs[i];
1530 : :
1531 : 715160 : auto_vec<basic_block> dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1532 : 2579491 : FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1533 : : {
1534 : 717048 : if (flow_bb_inside_loop_p (loop, dominated))
1535 : 519719 : continue;
1536 : 394658 : dom_bb = nearest_common_dominator (
1537 : 197329 : CDI_DOMINATORS, first_active[i], first_active_latch);
1538 : 197329 : set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1539 : : }
1540 : 715160 : }
1541 : 244199 : free (first_active);
1542 : :
1543 : 244199 : free (bbs);
1544 : 244199 : BITMAP_FREE (bbs_to_scale);
1545 : :
1546 : 244199 : return true;
1547 : : }
1548 : :
1549 : : /* A callback for make_forwarder block, to redirect all edges except for
1550 : : MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1551 : : whether to redirect it. */
1552 : :
1553 : : edge mfb_kj_edge;
1554 : : bool
1555 : 267423 : mfb_keep_just (edge e)
1556 : : {
1557 : 267423 : return e != mfb_kj_edge;
1558 : : }
1559 : :
1560 : : /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1561 : :
1562 : : static bool
1563 : 34 : has_preds_from_loop (basic_block block, class loop *loop)
1564 : : {
1565 : 34 : edge e;
1566 : 34 : edge_iterator ei;
1567 : :
1568 : 74 : FOR_EACH_EDGE (e, ei, block->preds)
1569 : 40 : if (e->src->loop_father == loop)
1570 : : return true;
1571 : : return false;
1572 : : }
1573 : :
1574 : : /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1575 : : CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1576 : : entry; otherwise we also force preheader block to have only one successor.
1577 : : When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1578 : : to be a fallthru predecessor to the loop header and to have only
1579 : : predecessors from outside of the loop.
1580 : : The function also updates dominators. */
1581 : :
1582 : : basic_block
1583 : 16370555 : create_preheader (class loop *loop, int flags)
1584 : : {
1585 : 16370555 : edge e;
1586 : 16370555 : basic_block dummy;
1587 : 16370555 : int nentry = 0;
1588 : 16370555 : bool irred = false;
1589 : 16370555 : bool latch_edge_was_fallthru;
1590 : 16370555 : edge one_succ_pred = NULL, single_entry = NULL;
1591 : 16370555 : edge_iterator ei;
1592 : :
1593 : 49223316 : FOR_EACH_EDGE (e, ei, loop->header->preds)
1594 : : {
1595 : 32852761 : if (e->src == loop->latch)
1596 : 16370555 : continue;
1597 : 16482206 : irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1598 : 16482206 : nentry++;
1599 : 16482206 : single_entry = e;
1600 : 46152229 : if (single_succ_p (e->src))
1601 : 13299468 : one_succ_pred = e;
1602 : : }
1603 : 16370555 : gcc_assert (nentry);
1604 : 16370555 : if (nentry == 1)
1605 : : {
1606 : 16294985 : bool need_forwarder_block = false;
1607 : :
1608 : : /* We do not allow entry block to be the loop preheader, since we
1609 : : cannot emit code there. */
1610 : 16294985 : if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1611 : : need_forwarder_block = true;
1612 : : else
1613 : : {
1614 : : /* If we want simple preheaders, also force the preheader to have
1615 : : just a single successor and a normal edge. */
1616 : 16292407 : if ((flags & CP_SIMPLE_PREHEADERS)
1617 : 16292407 : && ((single_entry->flags & EDGE_COMPLEX)
1618 : 16267305 : || !single_succ_p (single_entry->src)))
1619 : : need_forwarder_block = true;
1620 : : /* If we want fallthru preheaders, also create forwarder block when
1621 : : preheader ends with a jump or has predecessors from loop. */
1622 : 13236898 : else if ((flags & CP_FALLTHRU_PREHEADERS)
1623 : 13236898 : && (JUMP_P (BB_END (single_entry->src))
1624 : 34 : || has_preds_from_loop (single_entry->src, loop)))
1625 : : need_forwarder_block = true;
1626 : : }
1627 : : if (! need_forwarder_block)
1628 : : return NULL;
1629 : : }
1630 : :
1631 : 3133670 : mfb_kj_edge = loop_latch_edge (loop);
1632 : 3133670 : latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1633 : 3133670 : if (nentry == 1
1634 : 3058100 : && ((flags & CP_FALLTHRU_PREHEADERS) == 0
1635 : 24 : || (single_entry->flags & EDGE_CROSSING) == 0))
1636 : 3058099 : dummy = split_edge (single_entry);
1637 : : else
1638 : : {
1639 : 75571 : edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1640 : 75571 : dummy = fallthru->src;
1641 : 75571 : loop->header = fallthru->dest;
1642 : : }
1643 : :
1644 : : /* Try to be clever in placing the newly created preheader. The idea is to
1645 : : avoid breaking any "fallthruness" relationship between blocks.
1646 : :
1647 : : The preheader was created just before the header and all incoming edges
1648 : : to the header were redirected to the preheader, except the latch edge.
1649 : : So the only problematic case is when this latch edge was a fallthru
1650 : : edge: it is not anymore after the preheader creation so we have broken
1651 : : the fallthruness. We're therefore going to look for a better place. */
1652 : 3133670 : if (latch_edge_was_fallthru)
1653 : : {
1654 : 1412199 : if (one_succ_pred)
1655 : 35820 : e = one_succ_pred;
1656 : : else
1657 : 1376379 : e = EDGE_PRED (dummy, 0);
1658 : :
1659 : 1412199 : move_block_after (dummy, e->src);
1660 : : }
1661 : :
1662 : 3133670 : if (irred)
1663 : : {
1664 : 3676 : dummy->flags |= BB_IRREDUCIBLE_LOOP;
1665 : 3676 : single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1666 : : }
1667 : :
1668 : 3133670 : if (dump_file)
1669 : 178 : fprintf (dump_file, "Created preheader block for loop %i\n",
1670 : : loop->num);
1671 : :
1672 : 3133670 : if (flags & CP_FALLTHRU_PREHEADERS)
1673 : 31 : gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1674 : : && !JUMP_P (BB_END (dummy)));
1675 : :
1676 : : return dummy;
1677 : : }
1678 : :
1679 : : /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1680 : :
1681 : : void
1682 : 29397758 : create_preheaders (int flags)
1683 : : {
1684 : 29397758 : if (!current_loops)
1685 : : return;
1686 : :
1687 : 104562822 : for (auto loop : loops_list (cfun, 0))
1688 : 16369548 : create_preheader (loop, flags);
1689 : 29397758 : loops_state_set (LOOPS_HAVE_PREHEADERS);
1690 : : }
1691 : :
1692 : : /* Forces all loop latches to have only single successor. */
1693 : :
1694 : : void
1695 : 27088771 : force_single_succ_latches (void)
1696 : : {
1697 : 27088771 : edge e;
1698 : :
1699 : 97014813 : for (auto loop : loops_list (cfun, 0))
1700 : : {
1701 : 15748500 : if (loop->latch != loop->header && single_succ_p (loop->latch))
1702 : 10925800 : continue;
1703 : :
1704 : 4822700 : e = find_edge (loop->latch, loop->header);
1705 : 4822700 : gcc_checking_assert (e != NULL);
1706 : :
1707 : 4822700 : split_edge (e);
1708 : 27088771 : }
1709 : 27088771 : loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1710 : 27088771 : }
1711 : :
1712 : : /* This function is called from loop_version. It splits the entry edge
1713 : : of the loop we want to version, adds the versioning condition, and
1714 : : adjust the edges to the two versions of the loop appropriately.
1715 : : e is an incoming edge. Returns the basic block containing the
1716 : : condition.
1717 : :
1718 : : --- edge e ---- > [second_head]
1719 : :
1720 : : Split it and insert new conditional expression and adjust edges.
1721 : :
1722 : : --- edge e ---> [cond expr] ---> [first_head]
1723 : : |
1724 : : +---------> [second_head]
1725 : :
1726 : : THEN_PROB is the probability of then branch of the condition.
1727 : : ELSE_PROB is the probability of else branch. Note that they may be both
1728 : : REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or
1729 : : IFN_LOOP_DIST_ALIAS. */
1730 : :
1731 : : static basic_block
1732 : 33934 : lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1733 : : edge e, void *cond_expr,
1734 : : profile_probability then_prob,
1735 : : profile_probability else_prob)
1736 : : {
1737 : 33934 : basic_block new_head = NULL;
1738 : 33934 : edge e1;
1739 : :
1740 : 33934 : gcc_assert (e->dest == second_head);
1741 : :
1742 : : /* Split edge 'e'. This will create a new basic block, where we can
1743 : : insert conditional expr. */
1744 : 33934 : new_head = split_edge (e);
1745 : :
1746 : 33934 : lv_add_condition_to_bb (first_head, second_head, new_head,
1747 : : cond_expr);
1748 : :
1749 : : /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1750 : 33934 : e = single_succ_edge (new_head);
1751 : 33934 : e1 = make_edge (new_head, first_head,
1752 : 33934 : current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1753 : 33934 : e1->probability = then_prob;
1754 : 33934 : e->probability = else_prob;
1755 : :
1756 : 33934 : set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1757 : 33934 : set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1758 : :
1759 : : /* Adjust loop header phi nodes. */
1760 : 33934 : lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1761 : :
1762 : 33934 : return new_head;
1763 : : }
1764 : :
1765 : : /* Main entry point for Loop Versioning transformation.
1766 : :
1767 : : This transformation given a condition and a loop, creates
1768 : : -if (condition) { loop_copy1 } else { loop_copy2 },
1769 : : where loop_copy1 is the loop transformed in one way, and loop_copy2
1770 : : is the loop transformed in another way (or unchanged). COND_EXPR
1771 : : may be a run time test for things that were not resolved by static
1772 : : analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1773 : :
1774 : : If non-NULL, CONDITION_BB is set to the basic block containing the
1775 : : condition.
1776 : :
1777 : : THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1778 : : is the ratio by that the frequencies in the original loop should
1779 : : be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1780 : : new loop should be scaled.
1781 : :
1782 : : If PLACE_AFTER is true, we place the new loop after LOOP in the
1783 : : instruction stream, otherwise it is placed before LOOP. */
1784 : :
1785 : : class loop *
1786 : 33944 : loop_version (class loop *loop,
1787 : : void *cond_expr, basic_block *condition_bb,
1788 : : profile_probability then_prob, profile_probability else_prob,
1789 : : profile_probability then_scale, profile_probability else_scale,
1790 : : bool place_after)
1791 : : {
1792 : 33944 : basic_block first_head, second_head;
1793 : 33944 : edge entry, latch_edge;
1794 : 33944 : int irred_flag;
1795 : 33944 : class loop *nloop;
1796 : 33944 : basic_block cond_bb;
1797 : :
1798 : : /* Record entry and latch edges for the loop */
1799 : 33944 : entry = loop_preheader_edge (loop);
1800 : 33944 : irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1801 : 33944 : entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1802 : :
1803 : : /* Note down head of loop as first_head. */
1804 : 33944 : first_head = entry->dest;
1805 : :
1806 : : /* 1) Duplicate loop on the entry edge. */
1807 : 33944 : if (!cfg_hook_duplicate_loop_body_to_header_edge (loop, entry, 1, NULL, NULL,
1808 : : NULL, 0))
1809 : : {
1810 : 10 : entry->flags |= irred_flag;
1811 : 10 : return NULL;
1812 : : }
1813 : :
1814 : : /* 2) loopify the duplicated new loop. */
1815 : 33934 : latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1816 : 33934 : nloop = alloc_loop ();
1817 : 33934 : class loop *outer = loop_outer (latch_edge->dest->loop_father);
1818 : 33934 : edge new_header_edge = single_pred_edge (get_bb_copy (loop->header));
1819 : 33934 : nloop->header = new_header_edge->dest;
1820 : 33934 : nloop->latch = latch_edge->src;
1821 : 33934 : loop_redirect_edge (latch_edge, nloop->header);
1822 : :
1823 : : /* Compute new loop. */
1824 : 33934 : add_loop (nloop, outer);
1825 : 33934 : copy_loop_info (loop, nloop);
1826 : 33934 : set_loop_copy (loop, nloop);
1827 : :
1828 : : /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1829 : 33934 : lv_flush_pending_stmts (latch_edge);
1830 : :
1831 : : /* After duplication entry edge now points to new loop head block.
1832 : : Note down new head as second_head. */
1833 : 33934 : second_head = entry->dest;
1834 : :
1835 : : /* 3) Split loop entry edge and insert new block with cond expr. */
1836 : 33934 : cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1837 : : entry, cond_expr, then_prob, else_prob);
1838 : 33934 : if (condition_bb)
1839 : 29947 : *condition_bb = cond_bb;
1840 : :
1841 : 33934 : if (!cond_bb)
1842 : : {
1843 : 0 : entry->flags |= irred_flag;
1844 : 0 : return NULL;
1845 : : }
1846 : :
1847 : : /* Add cond_bb to appropriate loop. */
1848 : 33934 : if (cond_bb->loop_father)
1849 : 33934 : remove_bb_from_loops (cond_bb);
1850 : 33934 : add_bb_to_loop (cond_bb, outer);
1851 : :
1852 : : /* 4) Scale the original loop and new loop frequency. */
1853 : 33934 : scale_loop_frequencies (loop, then_scale);
1854 : 33934 : scale_loop_frequencies (nloop, else_scale);
1855 : 33934 : update_dominators_in_loop (loop);
1856 : 33934 : update_dominators_in_loop (nloop);
1857 : :
1858 : : /* Adjust irreducible flag. */
1859 : 33934 : if (irred_flag)
1860 : : {
1861 : 48 : cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1862 : 48 : loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1863 : 48 : loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1864 : 48 : single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1865 : : }
1866 : :
1867 : 33934 : if (place_after)
1868 : : {
1869 : 31585 : basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1870 : 31585 : unsigned i;
1871 : :
1872 : 31585 : after = loop->latch;
1873 : :
1874 : 186211 : for (i = 0; i < nloop->num_nodes; i++)
1875 : : {
1876 : 154626 : move_block_after (bbs[i], after);
1877 : 154626 : after = bbs[i];
1878 : : }
1879 : 31585 : free (bbs);
1880 : : }
1881 : :
1882 : : /* At this point condition_bb is loop preheader with two successors,
1883 : : first_head and second_head. Make sure that loop preheader has only
1884 : : one successor. */
1885 : 33934 : split_edge (loop_preheader_edge (loop));
1886 : 33934 : split_edge (loop_preheader_edge (nloop));
1887 : :
1888 : 33934 : return nloop;
1889 : : }
|