Line data Source code
1 : /* Natural loop analysis code for GNU compiler.
2 : Copyright (C) 2002-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "predict.h"
27 : #include "memmodel.h"
28 : #include "emit-rtl.h"
29 : #include "cfgloop.h"
30 : #include "explow.h"
31 : #include "expr.h"
32 : #include "graphds.h"
33 : #include "sreal.h"
34 : #include "regs.h"
35 : #include "function-abi.h"
36 :
37 : struct target_cfgloop default_target_cfgloop;
38 : #if SWITCHABLE_TARGET
39 : struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
40 : #endif
41 :
42 : /* Checks whether BB is executed exactly once in each LOOP iteration. */
43 :
44 : bool
45 4586685 : just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
46 : {
47 : /* It must be executed at least once each iteration. */
48 4586685 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
49 : return false;
50 :
51 : /* And just once. */
52 4146569 : if (bb->loop_father != loop)
53 : return false;
54 :
55 : /* But this was not enough. We might have some irreducible loop here. */
56 4134507 : if (bb->flags & BB_IRREDUCIBLE_LOOP)
57 19 : return false;
58 :
59 : return true;
60 : }
61 :
62 : /* Marks blocks and edges that are part of non-recognized loops; i.e. we
63 : throw away all latch edges and mark blocks inside any remaining cycle.
64 : Everything is a bit complicated due to fact we do not want to do this
65 : for parts of cycles that only "pass" through some loop -- i.e. for
66 : each cycle, we want to mark blocks that belong directly to innermost
67 : loop containing the whole cycle.
68 :
69 : LOOPS is the loop tree. */
70 :
71 : #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
72 : #define BB_REPR(BB) ((BB)->index + 1)
73 :
74 : bool
75 60997518 : mark_irreducible_loops (void)
76 : {
77 60997518 : basic_block act;
78 60997518 : struct graph_edge *ge;
79 60997518 : edge e;
80 60997518 : edge_iterator ei;
81 60997518 : int src, dest;
82 60997518 : unsigned depth;
83 60997518 : struct graph *g;
84 60997518 : int num = number_of_loops (cfun);
85 60997518 : class loop *cloop;
86 60997518 : bool irred_loop_found = false;
87 60997518 : int i;
88 :
89 60997518 : gcc_assert (current_loops != NULL);
90 :
91 : /* Reset the flags. */
92 778034843 : FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
93 : EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
94 : {
95 717037325 : act->flags &= ~BB_IRREDUCIBLE_LOOP;
96 1697145538 : FOR_EACH_EDGE (e, ei, act->succs)
97 980108213 : e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
98 : }
99 :
100 : /* Create the edge lists. */
101 60997518 : g = new_graph (last_basic_block_for_fn (cfun) + num);
102 :
103 778034843 : FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
104 : EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
105 1697145538 : FOR_EACH_EDGE (e, ei, act->succs)
106 : {
107 : /* Ignore edges to exit. */
108 980108213 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
109 59972218 : continue;
110 :
111 920135995 : src = BB_REPR (act);
112 920135995 : dest = BB_REPR (e->dest);
113 :
114 : /* Ignore latch edges. */
115 963623394 : if (e->dest->loop_father->header == e->dest
116 920135995 : && dominated_by_p (CDI_DOMINATORS, act, e->dest))
117 43487399 : continue;
118 :
119 : /* Edges inside a single loop should be left where they are. Edges
120 : to subloop headers should lead to representative of the subloop,
121 : but from the same place.
122 :
123 : Edges exiting loops should lead from representative
124 : of the son of nearest common ancestor of the loops in that
125 : act lays. */
126 :
127 876648596 : if (e->dest->loop_father->header == e->dest)
128 43487923 : dest = LOOP_REPR (e->dest->loop_father);
129 :
130 876648596 : if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
131 : {
132 73728402 : depth = 1 + loop_depth (find_common_loop (act->loop_father,
133 73728402 : e->dest->loop_father));
134 147456804 : if (depth == loop_depth (act->loop_father))
135 : cloop = act->loop_father;
136 : else
137 4297269 : cloop = (*act->loop_father->superloops)[depth];
138 :
139 73728402 : src = LOOP_REPR (cloop);
140 : }
141 :
142 876648596 : add_edge (g, src, dest)->data = e;
143 : }
144 :
145 : /* Find the strongly connected components. */
146 60997518 : graphds_scc (g, NULL);
147 :
148 : /* Mark the irreducible loops. */
149 1010581272 : for (i = 0; i < g->n_vertices; i++)
150 1826232350 : for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
151 : {
152 876648596 : edge real = (edge) ge->data;
153 : /* edge E in graph G is irreducible if it connects two vertices in the
154 : same scc. */
155 :
156 : /* All edges should lead from a component with higher number to the
157 : one with lower one. */
158 876648596 : gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
159 :
160 876648596 : if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
161 875161299 : continue;
162 :
163 1487297 : real->flags |= EDGE_IRREDUCIBLE_LOOP;
164 1487297 : irred_loop_found = true;
165 1487297 : if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
166 1371024 : real->src->flags |= BB_IRREDUCIBLE_LOOP;
167 : }
168 :
169 60997518 : free_graph (g);
170 :
171 60997518 : loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
172 60997518 : return irred_loop_found;
173 : }
174 :
175 : /* Counts number of insns inside LOOP. */
176 : int
177 421395 : num_loop_insns (const class loop *loop)
178 : {
179 421395 : basic_block *bbs, bb;
180 421395 : unsigned i, ninsns = 0;
181 421395 : rtx_insn *insn;
182 :
183 421395 : bbs = get_loop_body (loop);
184 2571062 : for (i = 0; i < loop->num_nodes; i++)
185 : {
186 1728272 : bb = bbs[i];
187 18888686 : FOR_BB_INSNS (bb, insn)
188 17160414 : if (NONDEBUG_INSN_P (insn))
189 7512355 : ninsns++;
190 : }
191 421395 : free (bbs);
192 :
193 421395 : if (!ninsns)
194 : ninsns = 1; /* To avoid division by zero. */
195 :
196 421395 : return ninsns;
197 : }
198 :
199 : /* Counts number of insns executed on average per iteration LOOP. */
200 : int
201 421268 : average_num_loop_insns (const class loop *loop)
202 : {
203 421268 : basic_block *bbs, bb;
204 421268 : unsigned i, binsns;
205 421268 : sreal ninsns;
206 421268 : rtx_insn *insn;
207 :
208 421268 : ninsns = 0;
209 421268 : bbs = get_loop_body (loop);
210 2569625 : for (i = 0; i < loop->num_nodes; i++)
211 : {
212 1727093 : bb = bbs[i];
213 :
214 1727093 : binsns = 0;
215 18881624 : FOR_BB_INSNS (bb, insn)
216 17154531 : if (NONDEBUG_INSN_P (insn))
217 7508479 : binsns++;
218 :
219 1727093 : ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
220 : /* Avoid overflows. */
221 1727093 : if (ninsns > 1000000)
222 : {
223 4 : free (bbs);
224 4 : return 1000000;
225 : }
226 : }
227 421264 : free (bbs);
228 :
229 421264 : int64_t ret = ninsns.to_int ();
230 421264 : if (!ret)
231 4653 : ret = 1; /* To avoid division by zero. */
232 :
233 421264 : return ret;
234 : }
235 :
236 : /* Compute how many times loop is entered. */
237 :
238 : profile_count
239 9133688 : loop_count_in (const class loop *loop)
240 : {
241 : /* Compute number of invocations of the loop. */
242 9133688 : profile_count count_in = profile_count::zero ();
243 9133688 : edge e;
244 9133688 : edge_iterator ei;
245 9133688 : bool found_latch = false;
246 :
247 9133688 : if (loops_state_satisfies_p (LOOPS_MAY_HAVE_MULTIPLE_LATCHES))
248 2796 : FOR_EACH_EDGE (e, ei, loop->header->preds)
249 1906 : if (!flow_bb_inside_loop_p (loop, e->src))
250 1007 : count_in += e->count ();
251 : else
252 : found_latch = true;
253 : else
254 27398394 : FOR_EACH_EDGE (e, ei, loop->header->preds)
255 18265596 : if (e->src != loop->latch)
256 9132798 : count_in += e->count ();
257 : else
258 : found_latch = true;
259 9133688 : gcc_checking_assert (found_latch);
260 9133688 : return count_in;
261 : }
262 :
263 : /* Return true if BB profile can be used to determine the expected number of
264 : iterations (that is number of executions of latch edge(s) for each
265 : entry of the loop. If this is the case initialize RET with the number
266 : of iterations.
267 :
268 : RELIABLE is set if profile indiates that the returned value should be
269 : realistic estimate. (This is the case if we read profile and did not
270 : messed it up yet and not the case of guessed profiles.)
271 :
272 : This function uses only CFG profile. We track more reliable info in
273 : loop_info structure and for loop optimization heuristics more relevant
274 : is get_estimated_loop_iterations API. */
275 :
276 : bool
277 10154590 : expected_loop_iterations_by_profile (const class loop *loop, sreal *ret,
278 : bool *reliable)
279 : {
280 10154590 : profile_count header_count = loop->header->count;
281 10154590 : if (reliable)
282 9920981 : *reliable = false;
283 :
284 : /* TODO: For single exit loops we can use loop exit edge probability.
285 : It also may be reliable while loop itself was adjusted. */
286 10154590 : if (!header_count.initialized_p ()
287 10310455 : || !header_count.nonzero_p ())
288 : return false;
289 :
290 9023371 : profile_count count_in = loop_count_in (loop);
291 :
292 9023371 : bool known;
293 : /* Number of iterations is number of executions of latch edge. */
294 9023371 : *ret = (header_count - count_in).to_sreal_scale (count_in, &known);
295 9023371 : if (!known)
296 : return false;
297 9012425 : if (reliable)
298 : {
299 : /* Header should have at least count_in many executions.
300 : Give up on clearly inconsistent profile. */
301 8779317 : if (header_count < count_in && header_count.differs_from_p (count_in))
302 : {
303 20301 : if (dump_file && (dump_flags & TDF_DETAILS))
304 0 : fprintf (dump_file, "Inconsistent bb profile of loop %i\n",
305 0 : loop->num);
306 20301 : *reliable = false;
307 : }
308 : else
309 17517764 : *reliable = count_in.reliable_p () && header_count.reliable_p ();
310 : }
311 : return true;
312 : }
313 :
314 : /* Return true if loop CFG profile may be unrealistically flat.
315 : This is a common case, since average loops iterate only about 5 times.
316 : In the case we do not have profile feedback or do not know real number of
317 : iterations during profile estimation, we are likely going to predict it with
318 : similar low iteration count. For static loop profiles we also artificially
319 : cap profile of loops with known large iteration count so they do not appear
320 : significantly more hot than other loops with unknown iteration counts.
321 :
322 : For loop optimization heuristics we ignore CFG profile and instead
323 : use get_estimated_loop_iterations API which returns estimate
324 : only when it is realistic. For unknown counts some optimizations,
325 : like vectorizer or unroller make guess that iteration count will
326 : be large. In this case we need to avoid scaling down the profile
327 : after the loop transform. */
328 :
329 : bool
330 96245 : maybe_flat_loop_profile (const class loop *loop)
331 : {
332 96245 : bool reliable;
333 96245 : sreal ret;
334 :
335 96245 : if (!expected_loop_iterations_by_profile (loop, &ret, &reliable))
336 : return true;
337 :
338 : /* Reliable CFG estimates ought never be flat. Sanity check with
339 : nb_iterations_estimate. If those differ, it is a but in profile
340 : updating code */
341 96218 : if (reliable)
342 : {
343 58 : int64_t intret = ret.to_nearest_int ();
344 58 : if (loop->any_estimate
345 58 : && (wi::ltu_p (intret * 2, loop->nb_iterations_estimate)
346 55 : || wi::gtu_p (intret, loop->nb_iterations_estimate * 2)))
347 : {
348 0 : if (dump_file && (dump_flags & TDF_DETAILS))
349 0 : fprintf (dump_file,
350 : "Loop %i has inconsistent iterations estimates: "
351 : "reliable CFG based iteration estimate is %f "
352 : "while nb_iterations_estimate is %i\n",
353 0 : loop->num,
354 : ret.to_double (),
355 0 : (int)loop->nb_iterations_estimate.to_shwi ());
356 0 : return true;
357 : }
358 58 : return false;
359 : }
360 :
361 : /* Allow some margin of error and see if we are close to known bounds.
362 : sreal (9,-3) is 9/8 */
363 96160 : int64_t intret = (ret * sreal (9, -3)).to_nearest_int ();
364 96160 : if (loop->any_upper_bound && wi::geu_p (intret, loop->nb_iterations_upper_bound))
365 : return false;
366 67049 : if (loop->any_likely_upper_bound
367 67049 : && wi::geu_p (intret, loop->nb_iterations_likely_upper_bound))
368 : return false;
369 66994 : if (loop->any_estimate
370 66994 : && wi::geu_p (intret, loop->nb_iterations_estimate))
371 : return false;
372 : return true;
373 : }
374 :
375 : /* Returns expected number of iterations of LOOP, according to
376 : measured or guessed profile.
377 :
378 : This functions attempts to return "sane" value even if profile
379 : information is not good enough to derive osmething. */
380 :
381 : gcov_type
382 39 : expected_loop_iterations_unbounded (const class loop *loop,
383 : bool *read_profile_p)
384 : {
385 39 : gcov_type expected = -1;
386 :
387 39 : if (read_profile_p)
388 0 : *read_profile_p = false;
389 :
390 39 : sreal sreal_expected;
391 39 : if (expected_loop_iterations_by_profile
392 39 : (loop, &sreal_expected, read_profile_p))
393 39 : expected = sreal_expected.to_nearest_int ();
394 : else
395 0 : expected = param_avg_loop_niter;
396 :
397 39 : HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
398 39 : if (max != -1 && max < expected)
399 0 : return max;
400 :
401 : return expected;
402 : }
403 :
404 : /* Returns expected number of LOOP iterations. The returned value is bounded
405 : by REG_BR_PROB_BASE. */
406 :
407 : unsigned
408 39 : expected_loop_iterations (class loop *loop)
409 : {
410 39 : gcov_type expected = expected_loop_iterations_unbounded (loop);
411 39 : return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
412 : }
413 :
414 : /* Returns the maximum level of nesting of subloops of LOOP. */
415 :
416 : unsigned
417 0 : get_loop_level (const class loop *loop)
418 : {
419 0 : const class loop *ploop;
420 0 : unsigned mx = 0, l;
421 :
422 0 : for (ploop = loop->inner; ploop; ploop = ploop->next)
423 : {
424 0 : l = get_loop_level (ploop);
425 0 : if (l >= mx)
426 0 : mx = l + 1;
427 : }
428 0 : return mx;
429 : }
430 :
431 : /* Initialize the constants for computing set costs. */
432 :
433 : void
434 215715 : init_set_costs (void)
435 : {
436 215715 : int speed;
437 215715 : rtx_insn *seq;
438 215715 : rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
439 215715 : rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
440 221418 : rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
441 215715 : rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
442 215715 : unsigned i;
443 :
444 215715 : target_avail_regs = 0;
445 215715 : target_clobbered_regs = 0;
446 20061495 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
447 19845780 : if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
448 19845780 : && !fixed_regs[i])
449 : {
450 3191820 : target_avail_regs++;
451 : /* ??? This is only a rough heuristic. It doesn't cope well
452 : with alternative ABIs, but that's an optimization rather than
453 : correctness issue. */
454 3191820 : if (default_function_abi.clobbers_full_reg_p (i))
455 1908085 : target_clobbered_regs++;
456 : }
457 :
458 215715 : target_res_regs = 3;
459 :
460 647145 : for (speed = 0; speed < 2; speed++)
461 : {
462 431430 : crtl->maybe_hot_insn_p = speed;
463 : /* Set up the costs for using extra registers:
464 :
465 : 1) If not many free registers remain, we should prefer having an
466 : additional move to decreasing the number of available registers.
467 : (TARGET_REG_COST).
468 : 2) If no registers are available, we need to spill, which may require
469 : storing the old value to memory and loading it back
470 : (TARGET_SPILL_COST). */
471 :
472 431430 : start_sequence ();
473 431430 : emit_move_insn (reg1, reg2);
474 431430 : seq = end_sequence ();
475 431430 : target_reg_cost [speed] = seq_cost (seq, speed);
476 :
477 431430 : start_sequence ();
478 431430 : emit_move_insn (mem, reg1);
479 431430 : emit_move_insn (reg2, mem);
480 431430 : seq = end_sequence ();
481 431430 : target_spill_cost [speed] = seq_cost (seq, speed);
482 : }
483 215715 : default_rtl_profile ();
484 215715 : }
485 :
486 : /* Estimates cost of increased register pressure caused by making N_NEW new
487 : registers live around the loop. N_OLD is the number of registers live
488 : around the loop. If CALL_P is true, also take into account that
489 : call-used registers may be clobbered in the loop body, reducing the
490 : number of available registers before we spill. */
491 :
492 : unsigned
493 4078370 : estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
494 : bool call_p)
495 : {
496 4078370 : unsigned cost;
497 4078370 : unsigned regs_needed = n_new + n_old;
498 4078370 : unsigned available_regs = target_avail_regs;
499 :
500 : /* If there is a call in the loop body, the call-clobbered registers
501 : are not available for loop invariants. */
502 4078370 : if (call_p)
503 2730440 : available_regs = available_regs - target_clobbered_regs;
504 :
505 : /* If we have enough registers, we should use them and not restrict
506 : the transformations unnecessarily. */
507 4078370 : if (regs_needed + target_res_regs <= available_regs)
508 : return 0;
509 :
510 1559372 : if (regs_needed <= available_regs)
511 : /* If we are close to running out of registers, try to preserve
512 : them. */
513 1213804 : cost = target_reg_cost [speed] * n_new;
514 : else
515 : /* If we run out of registers, it is very expensive to add another
516 : one. */
517 345568 : cost = target_spill_cost [speed] * n_new;
518 :
519 3118744 : if (optimize && (flag_ira_region == IRA_REGION_ALL
520 1559372 : || flag_ira_region == IRA_REGION_MIXED)
521 4623458 : && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
522 : /* IRA regional allocation deals with high register pressure
523 : better. So decrease the cost (to do more accurate the cost
524 : calculation for IRA, we need to know how many registers lives
525 : through the loop transparently). */
526 1515942 : cost /= 2;
527 :
528 : return cost;
529 : }
530 :
531 : /* Sets EDGE_LOOP_EXIT flag for all loop exits. */
532 :
533 : void
534 3112682 : mark_loop_exit_edges (void)
535 : {
536 3112682 : basic_block bb;
537 3112682 : edge e;
538 :
539 6225364 : if (number_of_loops (cfun) <= 1)
540 2453050 : return;
541 :
542 21190410 : FOR_EACH_BB_FN (bb, cfun)
543 : {
544 20530778 : edge_iterator ei;
545 :
546 51504500 : FOR_EACH_EDGE (e, ei, bb->succs)
547 : {
548 30973722 : if (loop_outer (bb->loop_father)
549 30973722 : && loop_exit_edge_p (bb->loop_father, e))
550 3403215 : e->flags |= EDGE_LOOP_EXIT;
551 : else
552 27570507 : e->flags &= ~EDGE_LOOP_EXIT;
553 : }
554 : }
555 : }
556 :
557 : /* Return exit edge if loop has only one exit that is likely
558 : to be executed on runtime (i.e. it is not EH or leading
559 : to noreturn call. */
560 :
561 : edge
562 7584991 : single_likely_exit (class loop *loop, const vec<edge> &exits)
563 : {
564 7584991 : edge found = single_exit (loop);
565 7584991 : unsigned i;
566 7584991 : edge ex;
567 :
568 7584991 : if (found)
569 : return found;
570 7456605 : FOR_EACH_VEC_ELT (exits, i, ex)
571 : {
572 6694958 : if (probably_never_executed_edge_p (cfun, ex)
573 : /* We want to rule out paths to noreturns but not low probabilities
574 : resulting from adjustments or combining.
575 : FIXME: once we have better quality tracking, make this more
576 : robust. */
577 6694958 : || ex->probability <= profile_probability::very_unlikely ())
578 2655991 : continue;
579 4038967 : if (!found)
580 : found = ex;
581 : else
582 : return NULL;
583 : }
584 : return found;
585 : }
586 :
587 :
588 : /* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs
589 : order against direction of edges from latch. Specially, if
590 : header != latch, latch is the 1-st block. */
591 :
592 : auto_vec<basic_block>
593 500261 : get_loop_hot_path (const class loop *loop)
594 : {
595 500261 : basic_block bb = loop->header;
596 500261 : auto_vec<basic_block> path;
597 500261 : bitmap visited = BITMAP_ALLOC (NULL);
598 :
599 3199275 : while (true)
600 : {
601 1849768 : edge_iterator ei;
602 1849768 : edge e;
603 1849768 : edge best = NULL;
604 :
605 1849768 : path.safe_push (bb);
606 1849768 : bitmap_set_bit (visited, bb->index);
607 4809555 : FOR_EACH_EDGE (e, ei, bb->succs)
608 3585528 : if ((!best || e->probability > best->probability)
609 2516721 : && !loop_exit_edge_p (loop, e)
610 4984973 : && !bitmap_bit_p (visited, e->dest->index))
611 : best = e;
612 1849768 : if (!best || best->dest == loop->header)
613 : break;
614 1349507 : bb = best->dest;
615 1349507 : }
616 500261 : BITMAP_FREE (visited);
617 500261 : return path;
618 : }
|