Line data Source code
1 : /* Natural loop analysis code for GNU compiler.
2 : Copyright (C) 2002-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "rtl.h"
25 : #include "tree.h"
26 : #include "predict.h"
27 : #include "memmodel.h"
28 : #include "emit-rtl.h"
29 : #include "cfgloop.h"
30 : #include "explow.h"
31 : #include "expr.h"
32 : #include "graphds.h"
33 : #include "sreal.h"
34 : #include "regs.h"
35 : #include "function-abi.h"
36 :
37 : struct target_cfgloop default_target_cfgloop;
38 : #if SWITCHABLE_TARGET
39 : struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
40 : #endif
41 :
42 : /* Checks whether BB is executed exactly once in each LOOP iteration. */
43 :
44 : bool
45 4595588 : just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
46 : {
47 : /* It must be executed at least once each iteration. */
48 4595588 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
49 : return false;
50 :
51 : /* And just once. */
52 4135331 : if (bb->loop_father != loop)
53 : return false;
54 :
55 : /* But this was not enough. We might have some irreducible loop here. */
56 4123161 : if (bb->flags & BB_IRREDUCIBLE_LOOP)
57 19 : return false;
58 :
59 : return true;
60 : }
61 :
62 : /* Marks blocks and edges that are part of non-recognized loops; i.e. we
63 : throw away all latch edges and mark blocks inside any remaining cycle.
64 : Everything is a bit complicated due to fact we do not want to do this
65 : for parts of cycles that only "pass" through some loop -- i.e. for
66 : each cycle, we want to mark blocks that belong directly to innermost
67 : loop containing the whole cycle.
68 :
69 : LOOPS is the loop tree. */
70 :
71 : #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
72 : #define BB_REPR(BB) ((BB)->index + 1)
73 :
74 : bool
75 61413526 : mark_irreducible_loops (void)
76 : {
77 61413526 : basic_block act;
78 61413526 : struct graph_edge *ge;
79 61413526 : edge e;
80 61413526 : edge_iterator ei;
81 61413526 : int src, dest;
82 61413526 : unsigned depth;
83 61413526 : struct graph *g;
84 61413526 : int num = number_of_loops (cfun);
85 61413526 : class loop *cloop;
86 61413526 : bool irred_loop_found = false;
87 61413526 : int i;
88 :
89 61413526 : gcc_assert (current_loops != NULL);
90 :
91 : /* Reset the flags. */
92 777725745 : FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
93 : EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
94 : {
95 716312219 : act->flags &= ~BB_IRREDUCIBLE_LOOP;
96 1694441662 : FOR_EACH_EDGE (e, ei, act->succs)
97 978129443 : e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
98 : }
99 :
100 : /* Create the edge lists. */
101 61413526 : g = new_graph (last_basic_block_for_fn (cfun) + num);
102 :
103 777725745 : FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
104 : EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
105 1694441662 : FOR_EACH_EDGE (e, ei, act->succs)
106 : {
107 : /* Ignore edges to exit. */
108 978129443 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
109 60379851 : continue;
110 :
111 917749592 : src = BB_REPR (act);
112 917749592 : dest = BB_REPR (e->dest);
113 :
114 : /* Ignore latch edges. */
115 961093038 : if (e->dest->loop_father->header == e->dest
116 917749592 : && dominated_by_p (CDI_DOMINATORS, act, e->dest))
117 43343446 : continue;
118 :
119 : /* Edges inside a single loop should be left where they are. Edges
120 : to subloop headers should lead to representative of the subloop,
121 : but from the same place.
122 :
123 : Edges exiting loops should lead from representative
124 : of the son of nearest common ancestor of the loops in that
125 : act lays. */
126 :
127 874406146 : if (e->dest->loop_father->header == e->dest)
128 43343974 : dest = LOOP_REPR (e->dest->loop_father);
129 :
130 874406146 : if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
131 : {
132 73985420 : depth = 1 + loop_depth (find_common_loop (act->loop_father,
133 73985420 : e->dest->loop_father));
134 147970840 : if (depth == loop_depth (act->loop_father))
135 : cloop = act->loop_father;
136 : else
137 4330488 : cloop = (*act->loop_father->superloops)[depth];
138 :
139 73985420 : src = LOOP_REPR (cloop);
140 : }
141 :
142 874406146 : add_edge (g, src, dest)->data = e;
143 : }
144 :
145 : /* Find the strongly connected components. */
146 61413526 : graphds_scc (g, NULL);
147 :
148 : /* Mark the irreducible loops. */
149 1010990512 : for (i = 0; i < g->n_vertices; i++)
150 1823983132 : for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
151 : {
152 874406146 : edge real = (edge) ge->data;
153 : /* edge E in graph G is irreducible if it connects two vertices in the
154 : same scc. */
155 :
156 : /* All edges should lead from a component with higher number to the
157 : one with lower one. */
158 874406146 : gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
159 :
160 874406146 : if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
161 872969137 : continue;
162 :
163 1437009 : real->flags |= EDGE_IRREDUCIBLE_LOOP;
164 1437009 : irred_loop_found = true;
165 1437009 : if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
166 1322600 : real->src->flags |= BB_IRREDUCIBLE_LOOP;
167 : }
168 :
169 61413526 : free_graph (g);
170 :
171 61413526 : loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
172 61413526 : return irred_loop_found;
173 : }
174 :
175 : /* Counts number of insns inside LOOP. */
176 : int
177 419254 : num_loop_insns (const class loop *loop)
178 : {
179 419254 : basic_block *bbs, bb;
180 419254 : unsigned i, ninsns = 0;
181 419254 : rtx_insn *insn;
182 :
183 419254 : bbs = get_loop_body (loop);
184 2581007 : for (i = 0; i < loop->num_nodes; i++)
185 : {
186 1742499 : bb = bbs[i];
187 19236397 : FOR_BB_INSNS (bb, insn)
188 17493898 : if (NONDEBUG_INSN_P (insn))
189 7550881 : ninsns++;
190 : }
191 419254 : free (bbs);
192 :
193 419254 : if (!ninsns)
194 : ninsns = 1; /* To avoid division by zero. */
195 :
196 419254 : return ninsns;
197 : }
198 :
199 : /* Counts number of insns executed on average per iteration LOOP. */
200 : int
201 419127 : average_num_loop_insns (const class loop *loop)
202 : {
203 419127 : basic_block *bbs, bb;
204 419127 : unsigned i, binsns;
205 419127 : sreal ninsns;
206 419127 : rtx_insn *insn;
207 :
208 419127 : ninsns = 0;
209 419127 : bbs = get_loop_body (loop);
210 2579570 : for (i = 0; i < loop->num_nodes; i++)
211 : {
212 1741320 : bb = bbs[i];
213 :
214 1741320 : binsns = 0;
215 19229335 : FOR_BB_INSNS (bb, insn)
216 17488015 : if (NONDEBUG_INSN_P (insn))
217 7547005 : binsns++;
218 :
219 1741320 : ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
220 : /* Avoid overflows. */
221 1741320 : if (ninsns > 1000000)
222 : {
223 4 : free (bbs);
224 4 : return 1000000;
225 : }
226 : }
227 419123 : free (bbs);
228 :
229 419123 : int64_t ret = ninsns.to_int ();
230 419123 : if (!ret)
231 4649 : ret = 1; /* To avoid division by zero. */
232 :
233 419123 : return ret;
234 : }
235 :
236 : /* Compute how many times loop is entered. */
237 :
238 : profile_count
239 9069820 : loop_count_in (const class loop *loop)
240 : {
241 : /* Compute number of invocations of the loop. */
242 9069820 : profile_count count_in = profile_count::zero ();
243 9069820 : edge e;
244 9069820 : edge_iterator ei;
245 9069820 : bool found_latch = false;
246 :
247 9069820 : if (loops_state_satisfies_p (LOOPS_MAY_HAVE_MULTIPLE_LATCHES))
248 2796 : FOR_EACH_EDGE (e, ei, loop->header->preds)
249 1906 : if (!flow_bb_inside_loop_p (loop, e->src))
250 1007 : count_in += e->count ();
251 : else
252 : found_latch = true;
253 : else
254 27206790 : FOR_EACH_EDGE (e, ei, loop->header->preds)
255 18137860 : if (e->src != loop->latch)
256 9068930 : count_in += e->count ();
257 : else
258 : found_latch = true;
259 9069820 : gcc_checking_assert (found_latch);
260 9069820 : return count_in;
261 : }
262 :
263 : /* Return true if BB profile can be used to determine the expected number of
264 : iterations (that is number of executions of latch edge(s) for each
265 : entry of the loop. If this is the case initialize RET with the number
266 : of iterations.
267 :
268 : RELIABLE is set if profile indiates that the returned value should be
269 : realistic estimate. (This is the case if we read profile and did not
270 : messed it up yet and not the case of guessed profiles.)
271 :
272 : This function uses only CFG profile. We track more reliable info in
273 : loop_info structure and for loop optimization heuristics more relevant
274 : is get_estimated_loop_iterations API. */
275 :
276 : bool
277 10090618 : expected_loop_iterations_by_profile (const class loop *loop, sreal *ret,
278 : bool *reliable)
279 : {
280 10090618 : profile_count header_count = loop->header->count;
281 10090618 : if (reliable)
282 9856354 : *reliable = false;
283 :
284 : /* TODO: For single exit loops we can use loop exit edge probability.
285 : It also may be reliable while loop itself was adjusted. */
286 10090618 : if (!header_count.initialized_p ()
287 10247244 : || !header_count.nonzero_p ())
288 : return false;
289 :
290 8959086 : profile_count count_in = loop_count_in (loop);
291 :
292 8959086 : bool known;
293 : /* Number of iterations is number of executions of latch edge. */
294 8959086 : *ret = (header_count - count_in).to_sreal_scale (count_in, &known);
295 8959086 : if (!known)
296 : return false;
297 8948076 : if (reliable)
298 : {
299 : /* Header should have at least count_in many executions.
300 : Give up on clearly inconsistent profile. */
301 8714313 : if (header_count < count_in && header_count.differs_from_p (count_in))
302 : {
303 20510 : if (dump_file && (dump_flags & TDF_DETAILS))
304 0 : fprintf (dump_file, "Inconsistent bb profile of loop %i\n",
305 0 : loop->num);
306 20510 : *reliable = false;
307 : }
308 : else
309 17387338 : *reliable = count_in.reliable_p () && header_count.reliable_p ();
310 : }
311 : return true;
312 : }
313 :
314 : /* Return true if loop CFG profile may be unrealistically flat.
315 : This is a common case, since average loops iterate only about 5 times.
316 : In the case we do not have profile feedback or do not know real number of
317 : iterations during profile estimation, we are likely going to predict it with
318 : similar low iteration count. For static loop profiles we also artificially
319 : cap profile of loops with known large iteration count so they do not appear
320 : significantly more hot than other loops with unknown iteration counts.
321 :
322 : For loop optimization heuristics we ignore CFG profile and instead
323 : use get_estimated_loop_iterations API which returns estimate
324 : only when it is realistic. For unknown counts some optimizations,
325 : like vectorizer or unroller make guess that iteration count will
326 : be large. In this case we need to avoid scaling down the profile
327 : after the loop transform. */
328 :
329 : bool
330 96096 : maybe_flat_loop_profile (const class loop *loop)
331 : {
332 96096 : bool reliable;
333 96096 : sreal ret;
334 :
335 96096 : if (!expected_loop_iterations_by_profile (loop, &ret, &reliable))
336 : return true;
337 :
338 : /* Reliable CFG estimates ought never be flat. Sanity check with
339 : nb_iterations_estimate. If those differ, it is a but in profile
340 : updating code */
341 96069 : if (reliable)
342 : {
343 58 : int64_t intret = ret.to_nearest_int ();
344 58 : if (loop->any_estimate
345 58 : && (wi::ltu_p (intret * 2, loop->nb_iterations_estimate)
346 55 : || wi::gtu_p (intret, loop->nb_iterations_estimate * 2)))
347 : {
348 0 : if (dump_file && (dump_flags & TDF_DETAILS))
349 0 : fprintf (dump_file,
350 : "Loop %i has inconsistent iterations estimates: "
351 : "reliable CFG based iteration estimate is %f "
352 : "while nb_iterations_estimate is %i\n",
353 0 : loop->num,
354 : ret.to_double (),
355 0 : (int)loop->nb_iterations_estimate.to_shwi ());
356 0 : return true;
357 : }
358 58 : return false;
359 : }
360 :
361 : /* Allow some margin of error and see if we are close to known bounds.
362 : sreal (9,-3) is 9/8 */
363 96011 : int64_t intret = (ret * sreal (9, -3)).to_nearest_int ();
364 96011 : if (loop->any_upper_bound && wi::geu_p (intret, loop->nb_iterations_upper_bound))
365 : return false;
366 66875 : if (loop->any_likely_upper_bound
367 66875 : && wi::geu_p (intret, loop->nb_iterations_likely_upper_bound))
368 : return false;
369 66820 : if (loop->any_estimate
370 66820 : && wi::geu_p (intret, loop->nb_iterations_estimate))
371 : return false;
372 : return true;
373 : }
374 :
375 : /* Returns expected number of iterations of LOOP, according to
376 : measured or guessed profile.
377 :
378 : This functions attempts to return "sane" value even if profile
379 : information is not good enough to derive osmething. */
380 :
381 : gcov_type
382 39 : expected_loop_iterations_unbounded (const class loop *loop,
383 : bool *read_profile_p)
384 : {
385 39 : gcov_type expected = -1;
386 :
387 39 : if (read_profile_p)
388 0 : *read_profile_p = false;
389 :
390 39 : sreal sreal_expected;
391 39 : if (expected_loop_iterations_by_profile
392 39 : (loop, &sreal_expected, read_profile_p))
393 39 : expected = sreal_expected.to_nearest_int ();
394 : else
395 0 : expected = param_avg_loop_niter;
396 :
397 39 : HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
398 39 : if (max != -1 && max < expected)
399 0 : return max;
400 :
401 : return expected;
402 : }
403 :
404 : /* Returns expected number of LOOP iterations. The returned value is bounded
405 : by REG_BR_PROB_BASE. */
406 :
407 : unsigned
408 39 : expected_loop_iterations (class loop *loop)
409 : {
410 39 : gcov_type expected = expected_loop_iterations_unbounded (loop);
411 39 : return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
412 : }
413 :
414 : /* Returns the maximum level of nesting of subloops of LOOP. */
415 :
416 : unsigned
417 0 : get_loop_level (const class loop *loop)
418 : {
419 0 : const class loop *ploop;
420 0 : unsigned mx = 0, l;
421 :
422 0 : for (ploop = loop->inner; ploop; ploop = ploop->next)
423 : {
424 0 : l = get_loop_level (ploop);
425 0 : if (l >= mx)
426 0 : mx = l + 1;
427 : }
428 0 : return mx;
429 : }
430 :
431 : /* Initialize the constants for computing set costs. */
432 :
433 : void
434 215732 : init_set_costs (void)
435 : {
436 215732 : int speed;
437 215732 : rtx_insn *seq;
438 215732 : rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
439 215732 : rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
440 221443 : rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
441 215732 : rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
442 215732 : unsigned i;
443 :
444 215732 : target_avail_regs = 0;
445 215732 : target_clobbered_regs = 0;
446 20063076 : for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
447 19847344 : if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
448 19847344 : && !fixed_regs[i])
449 : {
450 3192075 : target_avail_regs++;
451 : /* ??? This is only a rough heuristic. It doesn't cope well
452 : with alternative ABIs, but that's an optimization rather than
453 : correctness issue. */
454 3192075 : if (default_function_abi.clobbers_full_reg_p (i))
455 1908246 : target_clobbered_regs++;
456 : }
457 :
458 215732 : target_res_regs = 3;
459 :
460 647196 : for (speed = 0; speed < 2; speed++)
461 : {
462 431464 : crtl->maybe_hot_insn_p = speed;
463 : /* Set up the costs for using extra registers:
464 :
465 : 1) If not many free registers remain, we should prefer having an
466 : additional move to decreasing the number of available registers.
467 : (TARGET_REG_COST).
468 : 2) If no registers are available, we need to spill, which may require
469 : storing the old value to memory and loading it back
470 : (TARGET_SPILL_COST). */
471 :
472 431464 : start_sequence ();
473 431464 : emit_move_insn (reg1, reg2);
474 431464 : seq = end_sequence ();
475 431464 : target_reg_cost [speed] = seq_cost (seq, speed);
476 :
477 431464 : start_sequence ();
478 431464 : emit_move_insn (mem, reg1);
479 431464 : emit_move_insn (reg2, mem);
480 431464 : seq = end_sequence ();
481 431464 : target_spill_cost [speed] = seq_cost (seq, speed);
482 : }
483 215732 : default_rtl_profile ();
484 215732 : }
485 :
486 : /* Estimates cost of increased register pressure caused by making N_NEW new
487 : registers live around the loop. N_OLD is the number of registers live
488 : around the loop. If CALL_P is true, also take into account that
489 : call-used registers may be clobbered in the loop body, reducing the
490 : number of available registers before we spill. */
491 :
492 : unsigned
493 4076414 : estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
494 : bool call_p)
495 : {
496 4076414 : unsigned cost;
497 4076414 : unsigned regs_needed = n_new + n_old;
498 4076414 : unsigned available_regs = target_avail_regs;
499 :
500 : /* If there is a call in the loop body, the call-clobbered registers
501 : are not available for loop invariants. */
502 4076414 : if (call_p)
503 2739690 : available_regs = available_regs - target_clobbered_regs;
504 :
505 : /* If we have enough registers, we should use them and not restrict
506 : the transformations unnecessarily. */
507 4076414 : if (regs_needed + target_res_regs <= available_regs)
508 : return 0;
509 :
510 1559014 : if (regs_needed <= available_regs)
511 : /* If we are close to running out of registers, try to preserve
512 : them. */
513 1214708 : cost = target_reg_cost [speed] * n_new;
514 : else
515 : /* If we run out of registers, it is very expensive to add another
516 : one. */
517 344306 : cost = target_spill_cost [speed] * n_new;
518 :
519 3118028 : if (optimize && (flag_ira_region == IRA_REGION_ALL
520 1559014 : || flag_ira_region == IRA_REGION_MIXED)
521 4622414 : && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
522 : /* IRA regional allocation deals with high register pressure
523 : better. So decrease the cost (to do more accurate the cost
524 : calculation for IRA, we need to know how many registers lives
525 : through the loop transparently). */
526 1515599 : cost /= 2;
527 :
528 : return cost;
529 : }
530 :
531 : /* Sets EDGE_LOOP_EXIT flag for all loop exits. */
532 :
533 : void
534 3128677 : mark_loop_exit_edges (void)
535 : {
536 3128677 : basic_block bb;
537 3128677 : edge e;
538 :
539 6257354 : if (number_of_loops (cfun) <= 1)
540 2469257 : return;
541 :
542 21196168 : FOR_EACH_BB_FN (bb, cfun)
543 : {
544 20536748 : edge_iterator ei;
545 :
546 51511885 : FOR_EACH_EDGE (e, ei, bb->succs)
547 : {
548 30975137 : if (loop_outer (bb->loop_father)
549 30975137 : && loop_exit_edge_p (bb->loop_father, e))
550 3404571 : e->flags |= EDGE_LOOP_EXIT;
551 : else
552 27570566 : e->flags &= ~EDGE_LOOP_EXIT;
553 : }
554 : }
555 : }
556 :
557 : /* Return exit edge if loop has only one exit that is likely
558 : to be executed on runtime (i.e. it is not EH or leading
559 : to noreturn call. */
560 :
561 : edge
562 7566396 : single_likely_exit (class loop *loop, const vec<edge> &exits)
563 : {
564 7566396 : edge found = single_exit (loop);
565 7566396 : unsigned i;
566 7566396 : edge ex;
567 :
568 7566396 : if (found)
569 : return found;
570 7529021 : FOR_EACH_VEC_ELT (exits, i, ex)
571 : {
572 6761287 : if (probably_never_executed_edge_p (cfun, ex)
573 : /* We want to rule out paths to noreturns but not low probabilities
574 : resulting from adjustments or combining.
575 : FIXME: once we have better quality tracking, make this more
576 : robust. */
577 6761287 : || ex->probability <= profile_probability::very_unlikely ())
578 2724291 : continue;
579 4036996 : if (!found)
580 : found = ex;
581 : else
582 : return NULL;
583 : }
584 : return found;
585 : }
586 :
587 :
588 : /* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs
589 : order against direction of edges from latch. Specially, if
590 : header != latch, latch is the 1-st block. */
591 :
592 : auto_vec<basic_block>
593 501525 : get_loop_hot_path (const class loop *loop)
594 : {
595 501525 : basic_block bb = loop->header;
596 501525 : auto_vec<basic_block> path;
597 501525 : bitmap visited = BITMAP_ALLOC (NULL);
598 :
599 3209045 : while (true)
600 : {
601 1855285 : edge_iterator ei;
602 1855285 : edge e;
603 1855285 : edge best = NULL;
604 :
605 1855285 : path.safe_push (bb);
606 1855285 : bitmap_set_bit (visited, bb->index);
607 4824335 : FOR_EACH_EDGE (e, ei, bb->succs)
608 3596300 : if ((!best || e->probability > best->probability)
609 2525006 : && !loop_exit_edge_p (loop, e)
610 5000276 : && !bitmap_bit_p (visited, e->dest->index))
611 : best = e;
612 1855285 : if (!best || best->dest == loop->header)
613 : break;
614 1353760 : bb = best->dest;
615 1353760 : }
616 501525 : BITMAP_FREE (visited);
617 501525 : return path;
618 : }
|