LCOV - code coverage report
Current view: top level - gcc - tracer.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 99.5 % 195 194
Test Date: 2026-03-28 14:25:54 Functions: 100.0 % 14 14
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* The tracer pass for the GNU compiler.
       2              :    Contributed by Jan Hubicka, SuSE Labs.
       3              :    Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC.
       4              :    Copyright (C) 2001-2026 Free Software Foundation, Inc.
       5              : 
       6              :    This file is part of GCC.
       7              : 
       8              :    GCC is free software; you can redistribute it and/or modify it
       9              :    under the terms of the GNU General Public License as published by
      10              :    the Free Software Foundation; either version 3, or (at your option)
      11              :    any later version.
      12              : 
      13              :    GCC is distributed in the hope that it will be useful, but WITHOUT
      14              :    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15              :    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
      16              :    License for more details.
      17              : 
      18              :    You should have received a copy of the GNU General Public License
      19              :    along with GCC; see the file COPYING3.  If not see
      20              :    <http://www.gnu.org/licenses/>.  */
      21              : 
      22              : /* This pass performs the tail duplication needed for superblock formation.
      23              :    For more information see:
      24              : 
      25              :      Design and Analysis of Profile-Based Optimization in Compaq's
      26              :      Compilation Tools for Alpha; Journal of Instruction-Level
      27              :      Parallelism 3 (2000) 1-25
      28              : 
      29              :    Unlike Compaq's implementation we don't do the loop peeling as most
      30              :    probably a better job can be done by a special pass and we don't
      31              :    need to worry too much about the code size implications as the tail
      32              :    duplicates are crossjumped again if optimizations are not
      33              :    performed.  */
      34              : 
      35              : 
      36              : #include "config.h"
      37              : #include "system.h"
      38              : #include "coretypes.h"
      39              : #include "backend.h"
      40              : #include "rtl.h"
      41              : #include "tree.h"
      42              : #include "gimple.h"
      43              : #include "cfghooks.h"
      44              : #include "tree-pass.h"
      45              : #include "profile.h"
      46              : #include "cfganal.h"
      47              : #include "gimple-iterator.h"
      48              : #include "tree-cfg.h"
      49              : #include "tree-ssa.h"
      50              : #include "tree-inline.h"
      51              : #include "cfgloop.h"
      52              : #include "alloc-pool.h"
      53              : #include "fibonacci_heap.h"
      54              : #include "tracer.h"
      55              : 
      56              : static void analyze_bb (basic_block, int *);
      57              : static bool better_p (const_edge, const_edge);
      58              : static edge find_best_successor (basic_block);
      59              : static edge find_best_predecessor (basic_block);
      60              : static int find_trace (basic_block, basic_block *);
      61              : 
      62              : /* Minimal outgoing edge probability considered for superblock formation.  */
      63              : static int probability_cutoff;
      64              : static int branch_ratio_cutoff;
      65              : 
      66              : /* A bit BB->index is set if BB has already been seen, i.e. it is
      67              :    connected to some trace already.  */
      68              : static sbitmap bb_seen;
      69              : 
      70              : static inline void
      71        82167 : mark_bb_seen (basic_block bb)
      72              : {
      73        82167 :   unsigned int size = SBITMAP_SIZE (bb_seen);
      74              : 
      75        82167 :   if ((unsigned int)bb->index >= size)
      76            0 :     bb_seen = sbitmap_resize (bb_seen, size * 2, 0);
      77              : 
      78        82167 :   bitmap_set_bit (bb_seen, bb->index);
      79        82167 : }
      80              : 
      81              : static inline bool
      82       443840 : bb_seen_p (basic_block bb)
      83              : {
      84       443840 :   return bitmap_bit_p (bb_seen, bb->index);
      85              : }
      86              : 
      87              : static sbitmap can_duplicate_bb;
      88              : 
      89              : /* Cache VAL as value of can_duplicate_bb_p for BB.  */
      90              : static inline void
      91       267778 : cache_can_duplicate_bb_p (const_basic_block bb, bool val)
      92              : {
      93       267778 :   if (val)
      94       267748 :     bitmap_set_bit (can_duplicate_bb, bb->index);
      95       267778 : }
      96              : 
      97              : /* Return cached value of can_duplicate_bb_p for BB.  */
      98              : static bool
      99      1184676 : cached_can_duplicate_bb_p (const_basic_block bb)
     100              : {
     101      1184676 :   if (can_duplicate_bb)
     102              :     {
     103      1127751 :       unsigned int size = SBITMAP_SIZE (can_duplicate_bb);
     104      1127751 :       if ((unsigned int)bb->index < size)
     105      1106217 :         return bitmap_bit_p (can_duplicate_bb, bb->index);
     106              : 
     107              :       /* Assume added bb's should not be duplicated.  */
     108              :       return false;
     109              :     }
     110              : 
     111        56925 :   return can_duplicate_block_p (bb);
     112              : }
     113              : 
     114              : /* Return true if we should ignore the basic block for purposes of tracing.  */
     115              : bool
     116      1263418 : ignore_bb_p (const_basic_block bb)
     117              : {
     118      1263418 :   if (bb->index < NUM_FIXED_BLOCKS)
     119              :     return true;
     120      1247181 :   if (optimize_bb_for_size_p (bb))
     121              :     return true;
     122              : 
     123      1184676 :   return !cached_can_duplicate_bb_p (bb);
     124              : }
     125              : 
     126              : /* Return number of instructions in the block.  */
     127              : 
     128              : static void
     129       267778 : analyze_bb (basic_block bb, int *count)
     130              : {
     131       267778 :   gimple_stmt_iterator gsi;
     132       267778 :   gimple *stmt;
     133       267778 :   int n = 0;
     134              : 
     135      1762213 :   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     136              :     {
     137      1226657 :       stmt = gsi_stmt (gsi);
     138      1226657 :       n += estimate_num_insns (stmt, &eni_size_weights);
     139              :     }
     140       267778 :   *count = n;
     141              : 
     142       267778 :   cache_can_duplicate_bb_p (bb, can_duplicate_block_p (const_cast<basic_block>
     143              :                                                        (bb)));
     144       267778 : }
     145              : 
     146              : /* Return true if E1 is more frequent than E2.  */
     147              : static bool
     148       603236 : better_p (const_edge e1, const_edge e2)
     149              : {
     150       882487 :   if ((e1->count () > e2->count ()) || (e1->count () < e2->count ()))
     151       581302 :     return e1->count () > e2->count ();
     152              :   /* This is needed to avoid changes in the decision after
     153              :      CFG is modified.  */
     154        21934 :   if (e1->src != e2->src)
     155         8296 :     return e1->src->index > e2->src->index;
     156        13638 :   return e1->dest->index > e2->dest->index;
     157              : }
     158              : 
     159              : /* Return most frequent successor of basic block BB.  */
     160              : 
     161              : static edge
     162       410282 : find_best_successor (basic_block bb)
     163              : {
     164       410282 :   edge e;
     165       410282 :   edge best = NULL;
     166       410282 :   edge_iterator ei;
     167              : 
     168      1160803 :   FOR_EACH_EDGE (e, ei, bb->succs)
     169              :     {
     170       750612 :       if (!e->count ().initialized_p ())
     171              :         return NULL;
     172       750521 :       if (!best || better_p (e, best))
     173              :         best = e;
     174              :     }
     175       410191 :   if (!best || ignore_bb_p (best->dest))
     176        13130 :     return NULL;
     177       397061 :   if (!best->probability.initialized_p ()
     178       397061 :       || best->probability.to_reg_br_prob_base () <= probability_cutoff)
     179              :     return NULL;
     180              :   return best;
     181              : }
     182              : 
     183              : /* Return most frequent predecessor of basic block BB.  */
     184              : 
     185              : static edge
     186       406395 : find_best_predecessor (basic_block bb)
     187              : {
     188       406395 :   edge e;
     189       406395 :   edge best = NULL;
     190       406395 :   edge_iterator ei;
     191              : 
     192      1075442 :   FOR_EACH_EDGE (e, ei, bb->preds)
     193              :     {
     194       669143 :       if (!e->count ().initialized_p ())
     195              :         return NULL;
     196       669047 :       if (!best || better_p (e, best))
     197              :         best = e;
     198              :     }
     199       406299 :   if (!best || ignore_bb_p (best->src))
     200        17257 :     return NULL;
     201       389042 :   if (bb->count.initialized_p ()
     202      1166982 :       && (best->count ().to_frequency (cfun) * REG_BR_PROB_BASE
     203       389042 :           < bb->count.to_frequency (cfun) * branch_ratio_cutoff))
     204          144 :     return NULL;
     205              :   return best;
     206              : }
     207              : 
     208              : /* Find the trace using bb and record it in the TRACE array.
     209              :    Return number of basic blocks recorded.  */
     210              : 
     211              : static int
     212        39537 : find_trace (basic_block bb, basic_block *trace)
     213              : {
     214        39537 :   int i = 0;
     215        39537 :   edge e;
     216              : 
     217        39537 :   if (dump_file)
     218           17 :     fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun));
     219              : 
     220       103190 :   while ((e = find_best_predecessor (bb)) != NULL)
     221              :     {
     222        86748 :       basic_block bb2 = e->src;
     223       172025 :       if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
     224       160066 :           || find_best_successor (bb2) != e)
     225              :         break;
     226        63653 :       if (dump_file)
     227           13 :         fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
     228              :       bb = bb2;
     229              :     }
     230        39537 :   if (dump_file)
     231           17 :     fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun));
     232        39537 :   trace[i++] = bb;
     233              : 
     234              :   /* Follow the trace in forward direction.  */
     235       336964 :   while ((e = find_best_successor (bb)) != NULL)
     236              :     {
     237       317555 :       bb = e->dest;
     238       635042 :       if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
     239       620760 :           || find_best_predecessor (bb) != e)
     240              :         break;
     241       297427 :       if (dump_file)
     242           30 :         fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
     243       297427 :       trace[i++] = bb;
     244              :     }
     245        39537 :   if (dump_file)
     246           17 :     fprintf (dump_file, "\n");
     247        39537 :   return i;
     248              : }
     249              : 
     250              : /* Duplicate block BB2, placing it after BB in the CFG.  Return the
     251              :    newly created block.  */
     252              : basic_block
     253        15595 : transform_duplicate (basic_block bb, basic_block bb2)
     254              : {
     255        15595 :   edge e;
     256        15595 :   basic_block copy;
     257              : 
     258        15595 :   e = find_edge (bb, bb2);
     259              : 
     260        15595 :   copy = duplicate_block (bb2, e, bb);
     261        15595 :   flush_pending_stmts (e);
     262              : 
     263        15595 :   add_phi_args_after_copy (&copy, 1, NULL);
     264              : 
     265        15595 :   return (copy);
     266              : }
     267              : 
     268              : /* Look for basic blocks in frequency order, construct traces and tail duplicate
     269              :    if profitable.  */
     270              : 
     271              : static bool
     272        11380 : tail_duplicate (void)
     273              : {
     274        11380 :   auto_vec<fibonacci_node<long, basic_block_def>*> blocks;
     275        11380 :   blocks.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
     276              : 
     277        11380 :   basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
     278        11380 :   int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun));
     279        11380 :   int ninsns = 0, nduplicated = 0;
     280        11380 :   gcov_type weighted_insns = 0, traced_insns = 0;
     281        11380 :   fibonacci_heap<long, basic_block_def> heap (LONG_MIN);
     282        11380 :   gcov_type cover_insns;
     283        11380 :   int max_dup_insns;
     284        11380 :   basic_block bb;
     285        11380 :   bool changed = false;
     286              : 
     287              :   /* Create an oversized sbitmap to reduce the chance that we need to
     288              :      resize it.  */
     289        11380 :   bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2);
     290        11380 :   bitmap_clear (bb_seen);
     291        11380 :   can_duplicate_bb = sbitmap_alloc (last_basic_block_for_fn (cfun));
     292        11380 :   bitmap_clear (can_duplicate_bb);
     293        11380 :   initialize_original_copy_tables ();
     294              : 
     295        11380 :   if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
     296          187 :     probability_cutoff = param_tracer_min_branch_probability_feedback;
     297              :   else
     298        11193 :     probability_cutoff = param_tracer_min_branch_probability;
     299        11380 :   probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
     300              : 
     301        11380 :   branch_ratio_cutoff =
     302        11380 :     (REG_BR_PROB_BASE / 100 * param_tracer_min_branch_ratio);
     303              : 
     304       279158 :   FOR_EACH_BB_FN (bb, cfun)
     305              :     {
     306       267778 :       int n;
     307       267778 :       analyze_bb (bb, &n);
     308       267778 :       if (!ignore_bb_p (bb))
     309       212204 :         blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb);
     310              : 
     311       267778 :       counts [bb->index] = n;
     312       267778 :       ninsns += n;
     313       267778 :       weighted_insns += n * bb->count.to_frequency (cfun);
     314              :     }
     315              : 
     316        11380 :   if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
     317          187 :     cover_insns = param_tracer_dynamic_coverage_feedback;
     318              :   else
     319        11193 :     cover_insns = param_tracer_dynamic_coverage;
     320        11380 :   cover_insns = (weighted_insns * cover_insns + 50) / 100;
     321        11380 :   max_dup_insns = (ninsns * param_tracer_max_code_growth + 50) / 100;
     322              : 
     323        11380 :   while (traced_insns < cover_insns && nduplicated < max_dup_insns
     324        51339 :          && !heap.empty ())
     325              :     {
     326        39959 :       basic_block bb = heap.extract_min ();
     327        39959 :       int n, pos;
     328              : 
     329        39959 :       if (!bb)
     330              :         break;
     331              : 
     332        39959 :       blocks[bb->index] = NULL;
     333              : 
     334        39959 :       if (ignore_bb_p (bb))
     335          422 :         continue;
     336        39537 :       gcc_assert (!bb_seen_p (bb));
     337              : 
     338        39537 :       n = find_trace (bb, trace);
     339              : 
     340        39537 :       bb = trace[0];
     341        39537 :       traced_insns += bb->count.to_frequency (cfun) * counts [bb->index];
     342        39537 :       if (blocks[bb->index])
     343              :         {
     344        10813 :           heap.delete_node (blocks[bb->index]);
     345        10813 :           blocks[bb->index] = NULL;
     346              :         }
     347              : 
     348       107850 :       for (pos = 1; pos < n; pos++)
     349              :         {
     350        82167 :           basic_block bb2 = trace[pos];
     351              : 
     352        82167 :           if (blocks[bb2->index])
     353              :             {
     354        73760 :               heap.delete_node (blocks[bb2->index]);
     355        73760 :               blocks[bb2->index] = NULL;
     356              :             }
     357        82167 :           traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index];
     358        82167 :           if (EDGE_COUNT (bb2->preds) > 1
     359        15128 :               && can_duplicate_block_p (bb2)
     360              :               /* We have the tendency to duplicate the loop header
     361              :                  of all do { } while loops.  Do not do that - it is
     362              :                  not profitable and it might create a loop with multiple
     363              :                  entries or at least rotate the loop.  */
     364        97295 :               && bb2->loop_father->header != bb2)
     365              :             {
     366        13854 :               nduplicated += counts [bb2->index];
     367        13854 :               basic_block copy = transform_duplicate (bb, bb2);
     368              : 
     369              :               /* Reconsider the original copy of block we've duplicated.
     370              :                  Removing the most common predecessor may make it to be
     371              :                  head.  */
     372        13854 :               blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2);
     373              : 
     374        13854 :               if (dump_file)
     375            7 :                 fprintf (dump_file, "Duplicated %i as %i [%i]\n",
     376              :                          bb2->index, copy->index, copy->count.to_frequency (cfun));
     377              : 
     378              :               bb2 = copy;
     379              :               changed = true;
     380              :             }
     381        82167 :           mark_bb_seen (bb2);
     382        82167 :           bb = bb2;
     383              :           /* In case the trace became infrequent, stop duplicating.  */
     384        82167 :           if (ignore_bb_p (bb))
     385              :             break;
     386              :         }
     387        39537 :       if (dump_file)
     388           17 :         fprintf (dump_file, " covered now %.1f\n\n",
     389           17 :                  traced_insns * 100.0 / weighted_insns);
     390              :     }
     391        11380 :   if (dump_file)
     392           10 :     fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
     393           10 :              nduplicated * 100 / ninsns);
     394              : 
     395        11380 :   free_original_copy_tables ();
     396        11380 :   sbitmap_free (bb_seen);
     397        11380 :   sbitmap_free (can_duplicate_bb);
     398        11380 :   can_duplicate_bb = NULL;
     399        11380 :   free (trace);
     400        11380 :   free (counts);
     401              : 
     402        22760 :   return changed;
     403        11380 : }
     404              : 
     405              : namespace {
     406              : 
     407              : const pass_data pass_data_tracer =
     408              : {
     409              :   GIMPLE_PASS, /* type */
     410              :   "tracer", /* name */
     411              :   OPTGROUP_NONE, /* optinfo_flags */
     412              :   TV_TRACER, /* tv_id */
     413              :   0, /* properties_required */
     414              :   0, /* properties_provided */
     415              :   0, /* properties_destroyed */
     416              :   0, /* todo_flags_start */
     417              :   TODO_update_ssa, /* todo_flags_finish */
     418              : };
     419              : 
     420              : class pass_tracer : public gimple_opt_pass
     421              : {
     422              : public:
     423       287872 :   pass_tracer (gcc::context *ctxt)
     424       575744 :     : gimple_opt_pass (pass_data_tracer, ctxt)
     425              :   {}
     426              : 
     427              :   /* opt_pass methods: */
     428      1038027 :   bool gate (function *) final override
     429              :     {
     430      1038027 :       return (optimize > 0 && flag_tracer && flag_reorder_blocks);
     431              :     }
     432              : 
     433              :   unsigned int execute (function *) final override;
     434              : 
     435              : }; // class pass_tracer
     436              : 
     437              : unsigned int
     438        20219 : pass_tracer::execute (function *fun)
     439              : {
     440        20219 :   bool changed;
     441              : 
     442        20219 :   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
     443              :     return 0;
     444              : 
     445        11380 :   mark_dfs_back_edges ();
     446        11380 :   if (dump_file)
     447           10 :     brief_dump_cfg (dump_file, dump_flags);
     448              : 
     449              :   /* Trace formation is done on the fly inside tail_duplicate */
     450        11380 :   changed = tail_duplicate ();
     451        11380 :   if (changed)
     452              :     {
     453         5146 :       free_dominance_info (CDI_DOMINATORS);
     454              :       /* If we changed the CFG schedule loops for fixup by cleanup_cfg.  */
     455         5146 :       loops_state_set (LOOPS_NEED_FIXUP);
     456              :     }
     457              : 
     458        11380 :   if (dump_file)
     459           10 :     brief_dump_cfg (dump_file, dump_flags);
     460              : 
     461        11380 :   return changed ? TODO_cleanup_cfg : 0;
     462              : }
     463              : } // anon namespace
     464              : 
     465              : gimple_opt_pass *
     466       287872 : make_pass_tracer (gcc::context *ctxt)
     467              : {
     468       287872 :   return new pass_tracer (ctxt);
     469              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.