Line data Source code
1 : /* Natural loop analysis code for GNU compiler.
2 : Copyright (C) 2002-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "predict.h"
27 : #include "memmodel.h"
28 : #include "emit-rtl.h"
29 : #include "cfgloop.h"
30 : #include "explow.h"
31 : #include "expr.h"
32 : #include "graphds.h"
33 : #include "sreal.h"
34 : #include "regs.h"
35 : #include "function-abi.h"
36 :
37 : struct target_cfgloop default_target_cfgloop;
38 : #if SWITCHABLE_TARGET
39 : struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
40 : #endif
41 :
42 : /* Checks whether BB is executed exactly once in each LOOP iteration. */
43 :
44 : bool
45 4600051 : just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
46 : {
47 : /* It must be executed at least once each iteration. */
48 4600051 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
49 : return false;
50 :
51 : /* And just once. */
52 4153346 : if (bb->loop_father != loop)
53 : return false;
54 :
55 : /* But this was not enough. We might have some irreducible loop here. */
56 4141239 : if (bb->flags & BB_IRREDUCIBLE_LOOP)
57 19 : return false;
58 :
59 : return true;
60 : }
61 :
62 : /* Marks blocks and edges that are part of non-recognized loops; i.e. we
63 : throw away all latch edges and mark blocks inside any remaining cycle.
64 : Everything is a bit complicated due to fact we do not want to do this
65 : for parts of cycles that only "pass" through some loop -- i.e. for
66 : each cycle, we want to mark blocks that belong directly to innermost
67 : loop containing the whole cycle.
68 :
69 : LOOPS is the loop tree. */
70 :
71 : #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
72 : #define BB_REPR(BB) ((BB)->index + 1)
73 :
74 : bool
75 61209205 : mark_irreducible_loops (void)
76 : {
77 61209205 : basic_block act;
78 61209205 : struct graph_edge *ge;
79 61209205 : edge e;
80 61209205 : edge_iterator ei;
81 61209205 : int src, dest;
82 61209205 : unsigned depth;
83 61209205 : struct graph *g;
84 61209205 : int num = number_of_loops (cfun);
85 61209205 : class loop *cloop;
86 61209205 : bool irred_loop_found = false;
87 61209205 : int i;
88 :
89 61209205 : gcc_assert (current_loops != NULL);
90 :
91 : /* Reset the flags. */
92 780041198 : FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
93 : EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
94 : {
95 718831993 : act->flags &= ~BB_IRREDUCIBLE_LOOP;
96 1701152128 : FOR_EACH_EDGE (e, ei, act->succs)
97 982320135 : e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
98 : }
99 :
100 : /* Create the edge lists. */
101 61209205 : g = new_graph (last_basic_block_for_fn (cfun) + num);
102 :
103 780041198 : FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
104 : EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
105 1701152128 : FOR_EACH_EDGE (e, ei, act->succs)
106 : {
107 : /* Ignore edges to exit. */
108 982320135 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
109 60183241 : continue;
110 :
111 922136894 : src = BB_REPR (act);
112 922136894 : dest = BB_REPR (e->dest);
113 :
114 : /* Ignore latch edges. */
115 965695865 : if (e->dest->loop_father->header == e->dest
116 922136894 : && dominated_by_p (CDI_DOMINATORS, act, e->dest))
117 43558971 : continue;
118 :
119 : /* Edges inside a single loop should be left where they are. Edges
120 : to subloop headers should lead to representative of the subloop,
121 : but from the same place.
122 :
123 : Edges exiting loops should lead from representative
124 : of the son of nearest common ancestor of the loops in that
125 : act lays. */
126 :
127 878577923 : if (e->dest->loop_father->header == e->dest)
128 43559495 : dest = LOOP_REPR (e->dest->loop_father);
129 :
130 878577923 : if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
131 : {
132 74062261 : depth = 1 + loop_depth (find_common_loop (act->loop_father,
133 74062261 : e->dest->loop_father));
134 148124522 : if (depth == loop_depth (act->loop_father))
135 : cloop = act->loop_father;
136 : else
137 4316231 : cloop = (*act->loop_father->superloops)[depth];
138 :
139 74062261 : src = LOOP_REPR (cloop);
140 : }
141 :
142 878577923 : add_edge (g, src, dest)->data = e;
143 : }
144 :
145 : /* Find the strongly connected components. */
146 61209205 : graphds_scc (g, NULL);
147 :
148 : /* Mark the irreducible loops. */
149 1013176316 : for (i = 0; i < g->n_vertices; i++)
150 1830545034 : for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
151 : {
152 878577923 : edge real = (edge) ge->data;
153 : /* edge E in graph G is irreducible if it connects two vertices in the
154 : same scc. */
155 :
156 : /* All edges should lead from a component with higher number to the
157 : one with lower one. */
158 878577923 : gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
159 :
160 878577923 : if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
161 877051744 : continue;
162 :
163 1526179 : real->flags |= EDGE_IRREDUCIBLE_LOOP;
164 1526179 : irred_loop_found = true;
165 1526179 : if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
166 1407539 : real->src->flags |= BB_IRREDUCIBLE_LOOP;
167 : }
168 :
169 61209205 : free_graph (g);
170 :
171 61209205 : loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
172 61209205 : return irred_loop_found;
173 : }
174 :
175 : /* Counts number of insns inside LOOP. */
176 : int
177 422359 : num_loop_insns (const class loop *loop)
178 : {
179 422359 : basic_block *bbs, bb;
180 422359 : unsigned i, ninsns = 0;
181 422359 : rtx_insn *insn;
182 :
183 422359 : bbs = get_loop_body (loop);
184 2583220 : for (i = 0; i < loop->num_nodes; i++)
185 : {
186 1738502 : bb = bbs[i];
187 19130311 : FOR_BB_INSNS (bb, insn)
188 17391809 : if (NONDEBUG_INSN_P (insn))
189 7539342 : ninsns++;
190 : }
191 422359 : free (bbs);
192 :
193 422359 : if (!ninsns)
194 : ninsns = 1; /* To avoid division by zero. */
195 :
196 422359 : return ninsns;
197 : }
198 :
199 : /* Counts number of insns executed on average per iteration LOOP. */
200 : int
201 422232 : average_num_loop_insns (const class loop *loop)
202 : {
203 422232 : basic_block *bbs, bb;
204 422232 : unsigned i, binsns;
205 422232 : sreal ninsns;
206 422232 : rtx_insn *insn;
207 :
208 422232 : ninsns = 0;
209 422232 : bbs = get_loop_body (loop);
210 2581783 : for (i = 0; i < loop->num_nodes; i++)
211 : {
212 1737323 : bb = bbs[i];
213 :
214 1737323 : binsns = 0;
215 19123249 : FOR_BB_INSNS (bb, insn)
216 17385926 : if (NONDEBUG_INSN_P (insn))
217 7535466 : binsns++;
218 :
219 1737323 : ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
220 : /* Avoid overflows. */
221 1737323 : if (ninsns > 1000000)
222 : {
223 4 : free (bbs);
224 4 : return 1000000;
225 : }
226 : }
227 422228 : free (bbs);
228 :
229 422228 : int64_t ret = ninsns.to_int ();
230 422228 : if (!ret)
231 4649 : ret = 1; /* To avoid division by zero. */
232 :
233 422228 : return ret;
234 : }
235 :
236 : /* Compute how many times loop is entered. */
237 :
238 : profile_count
239 9133982 : loop_count_in (const class loop *loop)
240 : {
241 : /* Compute number of invocations of the loop. */
242 9133982 : profile_count count_in = profile_count::zero ();
243 9133982 : edge e;
244 9133982 : edge_iterator ei;
245 9133982 : bool found_latch = false;
246 :
247 9133982 : if (loops_state_satisfies_p (LOOPS_MAY_HAVE_MULTIPLE_LATCHES))
248 2796 : FOR_EACH_EDGE (e, ei, loop->header->preds)
249 1906 : if (!flow_bb_inside_loop_p (loop, e->src))
250 1007 : count_in += e->count ();
251 : else
252 : found_latch = true;
253 : else
254 27399276 : FOR_EACH_EDGE (e, ei, loop->header->preds)
255 18266184 : if (e->src != loop->latch)
256 9133092 : count_in += e->count ();
257 : else
258 : found_latch = true;
259 9133982 : gcc_checking_assert (found_latch);
260 9133982 : return count_in;
261 : }
262 :
263 : /* Return true if BB profile can be used to determine the expected number of
264 : iterations (that is number of executions of latch edge(s) for each
265 : entry of the loop. If this is the case initialize RET with the number
266 : of iterations.
267 :
268 : RELIABLE is set if profile indiates that the returned value should be
269 : realistic estimate. (This is the case if we read profile and did not
270 : messed it up yet and not the case of guessed profiles.)
271 :
272 : This function uses only CFG profile. We track more reliable info in
273 : loop_info structure and for loop optimization heuristics more relevant
274 : is get_estimated_loop_iterations API. */
275 :
276 : bool
277 10156264 : expected_loop_iterations_by_profile (const class loop *loop, sreal *ret,
278 : bool *reliable)
279 : {
280 10156264 : profile_count header_count = loop->header->count;
281 10156264 : if (reliable)
282 9923408 : *reliable = false;
283 :
284 : /* TODO: For single exit loops we can use loop exit edge probability.
285 : It also may be reliable while loop itself was adjusted. */
286 10156264 : if (!header_count.initialized_p ()
287 10312463 : || !header_count.nonzero_p ())
288 : return false;
289 :
290 9023768 : profile_count count_in = loop_count_in (loop);
291 :
292 9023768 : bool known;
293 : /* Number of iterations is number of executions of latch edge. */
294 9023768 : *ret = (header_count - count_in).to_sreal_scale (count_in, &known);
295 9023768 : if (!known)
296 : return false;
297 9012823 : if (reliable)
298 : {
299 : /* Header should have at least count_in many executions.
300 : Give up on clearly inconsistent profile. */
301 8780465 : if (header_count < count_in && header_count.differs_from_p (count_in))
302 : {
303 20284 : if (dump_file && (dump_flags & TDF_DETAILS))
304 0 : fprintf (dump_file, "Inconsistent bb profile of loop %i\n",
305 0 : loop->num);
306 20284 : *reliable = false;
307 : }
308 : else
309 17520094 : *reliable = count_in.reliable_p () && header_count.reliable_p ();
310 : }
311 : return true;
312 : }
313 :
314 : /* Return true if loop CFG profile may be unrealistically flat.
315 : This is a common case, since average loops iterate only about 5 times.
316 : In the case we do not have profile feedback or do not know real number of
317 : iterations during profile estimation, we are likely going to predict it with
318 : similar low iteration count. For static loop profiles we also artificially
319 : cap profile of loops with known large iteration count so they do not appear
320 : significantly more hot than other loops with unknown iteration counts.
321 :
322 : For loop optimization heuristics we ignore CFG profile and instead
323 : use get_estimated_loop_iterations API which returns estimate
324 : only when it is realistic. For unknown counts some optimizations,
325 : like vectorizer or unroller make guess that iteration count will
326 : be large. In this case we need to avoid scaling down the profile
327 : after the loop transform. */
328 :
329 : bool
330 95946 : maybe_flat_loop_profile (const class loop *loop)
331 : {
332 95946 : bool reliable;
333 95946 : sreal ret;
334 :
335 95946 : if (!expected_loop_iterations_by_profile (loop, &ret, &reliable))
336 : return true;
337 :
338 : /* Reliable CFG estimates ought never be flat. Sanity check with
339 : nb_iterations_estimate. If those differ, it is a but in profile
340 : updating code */
341 95919 : if (reliable)
342 : {
343 58 : int64_t intret = ret.to_nearest_int ();
344 58 : if (loop->any_estimate
345 58 : && (wi::ltu_p (intret * 2, loop->nb_iterations_estimate)
346 55 : || wi::gtu_p (intret, loop->nb_iterations_estimate * 2)))
347 : {
348 0 : if (dump_file && (dump_flags & TDF_DETAILS))
349 0 : fprintf (dump_file,
350 : "Loop %i has inconsistent iterations estimates: "
351 : "reliable CFG based iteration estimate is %f "
352 : "while nb_iterations_estimate is %i\n",
353 0 : loop->num,
354 : ret.to_double (),
355 0 : (int)loop->nb_iterations_estimate.to_shwi ());
356 0 : return true;
357 : }
358 58 : return false;
359 : }
360 :
361 : /* Allow some margin of error and see if we are close to known bounds.
362 : sreal (9,-3) is 9/8 */
363 95861 : int64_t intret = (ret * sreal (9, -3)).to_nearest_int ();
364 95861 : if (loop->any_upper_bound && wi::geu_p (intret, loop->nb_iterations_upper_bound))
365 : return false;
366 66743 : if (loop->any_likely_upper_bound
367 66743 : && wi::geu_p (intret, loop->nb_iterations_likely_upper_bound))
368 : return false;
369 66688 : if (loop->any_estimate
370 66688 : && wi::geu_p (intret, loop->nb_iterations_estimate))
371 : return false;
372 : return true;
373 : }
374 :
375 : /* Returns expected number of iterations of LOOP, according to
376 : measured or guessed profile.
377 :
378 : This functions attempts to return "sane" value even if profile
379 : information is not good enough to derive osmething. */
380 :
381 : gcov_type
382 39 : expected_loop_iterations_unbounded (const class loop *loop,
383 : bool *read_profile_p)
384 : {
385 39 : gcov_type expected = -1;
386 :
387 39 : if (read_profile_p)
388 0 : *read_profile_p = false;
389 :
390 39 : sreal sreal_expected;
391 39 : if (expected_loop_iterations_by_profile
392 39 : (loop, &sreal_expected, read_profile_p))
393 39 : expected = sreal_expected.to_nearest_int ();
394 : else
395 0 : expected = param_avg_loop_niter;
396 :
397 39 : HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
398 39 : if (max != -1 && max < expected)
399 0 : return max;
400 :
401 : return expected;
402 : }
403 :
404 : /* Returns expected number of LOOP iterations. The returned value is bounded
405 : by REG_BR_PROB_BASE. */
406 :
407 : unsigned
408 39 : expected_loop_iterations (class loop *loop)
409 : {
410 39 : gcov_type expected = expected_loop_iterations_unbounded (loop);
411 39 : return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
412 : }
413 :
414 : /* Returns the maximum level of nesting of subloops of LOOP. */
415 :
416 : unsigned
417 0 : get_loop_level (const class loop *loop)
418 : {
419 0 : const class loop *ploop;
420 0 : unsigned mx = 0, l;
421 :
422 0 : for (ploop = loop->inner; ploop; ploop = ploop->next)
423 : {
424 0 : l = get_loop_level (ploop);
425 0 : if (l >= mx)
426 0 : mx = l + 1;
427 : }
428 0 : return mx;
429 : }
430 :
431 : /* Initialize the constants for computing set costs. */
432 :
433 : void
434 213440 : init_set_costs (void)
435 : {
436 213440 : int speed;
437 213440 : rtx_insn *seq;
438 213440 : rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
439 213440 : rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
440 219144 : rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
441 213440 : rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
442 213440 : unsigned i;
443 :
444 213440 : target_avail_regs = 0;
445 213440 : target_clobbered_regs = 0;
446 19849920 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
447 19636480 : if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
448 19636480 : && !fixed_regs[i])
449 : {
450 3157687 : target_avail_regs++;
451 : /* ??? This is only a rough heuristic. It doesn't cope well
452 : with alternative ABIs, but that's an optimization rather than
453 : correctness issue. */
454 3157687 : if (default_function_abi.clobbers_full_reg_p (i))
455 1887600 : target_clobbered_regs++;
456 : }
457 :
458 213440 : target_res_regs = 3;
459 :
460 640320 : for (speed = 0; speed < 2; speed++)
461 : {
462 426880 : crtl->maybe_hot_insn_p = speed;
463 : /* Set up the costs for using extra registers:
464 :
465 : 1) If not many free registers remain, we should prefer having an
466 : additional move to decreasing the number of available registers.
467 : (TARGET_REG_COST).
468 : 2) If no registers are available, we need to spill, which may require
469 : storing the old value to memory and loading it back
470 : (TARGET_SPILL_COST). */
471 :
472 426880 : start_sequence ();
473 426880 : emit_move_insn (reg1, reg2);
474 426880 : seq = end_sequence ();
475 426880 : target_reg_cost [speed] = seq_cost (seq, speed);
476 :
477 426880 : start_sequence ();
478 426880 : emit_move_insn (mem, reg1);
479 426880 : emit_move_insn (reg2, mem);
480 426880 : seq = end_sequence ();
481 426880 : target_spill_cost [speed] = seq_cost (seq, speed);
482 : }
483 213440 : default_rtl_profile ();
484 213440 : }
485 :
486 : /* Estimates cost of increased register pressure caused by making N_NEW new
487 : registers live around the loop. N_OLD is the number of registers live
488 : around the loop. If CALL_P is true, also take into account that
489 : call-used registers may be clobbered in the loop body, reducing the
490 : number of available registers before we spill. */
491 :
492 : unsigned
493 4075392 : estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
494 : bool call_p)
495 : {
496 4075392 : unsigned cost;
497 4075392 : unsigned regs_needed = n_new + n_old;
498 4075392 : unsigned available_regs = target_avail_regs;
499 :
500 : /* If there is a call in the loop body, the call-clobbered registers
501 : are not available for loop invariants. */
502 4075392 : if (call_p)
503 2726862 : available_regs = available_regs - target_clobbered_regs;
504 :
505 : /* If we have enough registers, we should use them and not restrict
506 : the transformations unnecessarily. */
507 4075392 : if (regs_needed + target_res_regs <= available_regs)
508 : return 0;
509 :
510 1554695 : if (regs_needed <= available_regs)
511 : /* If we are close to running out of registers, try to preserve
512 : them. */
513 1210283 : cost = target_reg_cost [speed] * n_new;
514 : else
515 : /* If we run out of registers, it is very expensive to add another
516 : one. */
517 344412 : cost = target_spill_cost [speed] * n_new;
518 :
519 3109390 : if (optimize && (flag_ira_region == IRA_REGION_ALL
520 1554695 : || flag_ira_region == IRA_REGION_MIXED)
521 4609461 : && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
522 : /* IRA regional allocation deals with high register pressure
523 : better. So decrease the cost (to do more accurate the cost
524 : calculation for IRA, we need to know how many registers lives
525 : through the loop transparently). */
526 1511264 : cost /= 2;
527 :
528 : return cost;
529 : }
530 :
531 : /* Sets EDGE_LOOP_EXIT flag for all loop exits. */
532 :
533 : void
534 3123053 : mark_loop_exit_edges (void)
535 : {
536 3123053 : basic_block bb;
537 3123053 : edge e;
538 :
539 6246106 : if (number_of_loops (cfun) <= 1)
540 2461472 : return;
541 :
542 21282942 : FOR_EACH_BB_FN (bb, cfun)
543 : {
544 20621361 : edge_iterator ei;
545 :
546 51728061 : FOR_EACH_EDGE (e, ei, bb->succs)
547 : {
548 31106700 : if (loop_outer (bb->loop_father)
549 31106700 : && loop_exit_edge_p (bb->loop_father, e))
550 3415428 : e->flags |= EDGE_LOOP_EXIT;
551 : else
552 27691272 : e->flags &= ~EDGE_LOOP_EXIT;
553 : }
554 : }
555 : }
556 :
557 : /* Return exit edge if loop has only one exit that is likely
558 : to be executed on runtime (i.e. it is not EH or leading
559 : to noreturn call. */
560 :
561 : edge
562 7589253 : single_likely_exit (class loop *loop, const vec<edge> &exits)
563 : {
564 7589253 : edge found = single_exit (loop);
565 7589253 : unsigned i;
566 7589253 : edge ex;
567 :
568 7589253 : if (found)
569 : return found;
570 7504821 : FOR_EACH_VEC_ELT (exits, i, ex)
571 : {
572 6737853 : if (probably_never_executed_edge_p (cfun, ex)
573 : /* We want to rule out paths to noreturns but not low probabilities
574 : resulting from adjustments or combining.
575 : FIXME: once we have better quality tracking, make this more
576 : robust. */
577 6737853 : || ex->probability <= profile_probability::very_unlikely ())
578 2689130 : continue;
579 4048723 : if (!found)
580 : found = ex;
581 : else
582 : return NULL;
583 : }
584 : return found;
585 : }
586 :
587 :
588 : /* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs
589 : order against direction of edges from latch. Specially, if
590 : header != latch, latch is the 1-st block. */
591 :
592 : auto_vec<basic_block>
593 499040 : get_loop_hot_path (const class loop *loop)
594 : {
595 499040 : basic_block bb = loop->header;
596 499040 : auto_vec<basic_block> path;
597 499040 : bitmap visited = BITMAP_ALLOC (NULL);
598 :
599 3193624 : while (true)
600 : {
601 1846332 : edge_iterator ei;
602 1846332 : edge e;
603 1846332 : edge best = NULL;
604 :
605 1846332 : path.safe_push (bb);
606 1846332 : bitmap_set_bit (visited, bb->index);
607 4800963 : FOR_EACH_EDGE (e, ei, bb->succs)
608 3579170 : if ((!best || e->probability > best->probability)
609 2512678 : && !loop_exit_edge_p (loop, e)
610 4976290 : && !bitmap_bit_p (visited, e->dest->index))
611 : best = e;
612 1846332 : if (!best || best->dest == loop->header)
613 : break;
614 1347292 : bb = best->dest;
615 1347292 : }
616 499040 : BITMAP_FREE (visited);
617 499040 : return path;
618 : }
|