LCOV - code coverage report
Current view: top level - gcc - tracer.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 99.5 % 195 194
Test Date: 2026-02-28 14:20:25 Functions: 100.0 % 14 14
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* The tracer pass for the GNU compiler.
       2              :    Contributed by Jan Hubicka, SuSE Labs.
       3              :    Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC.
       4              :    Copyright (C) 2001-2026 Free Software Foundation, Inc.
       5              : 
       6              :    This file is part of GCC.
       7              : 
       8              :    GCC is free software; you can redistribute it and/or modify it
       9              :    under the terms of the GNU General Public License as published by
      10              :    the Free Software Foundation; either version 3, or (at your option)
      11              :    any later version.
      12              : 
      13              :    GCC is distributed in the hope that it will be useful, but WITHOUT
      14              :    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      15              :    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
      16              :    License for more details.
      17              : 
      18              :    You should have received a copy of the GNU General Public License
      19              :    along with GCC; see the file COPYING3.  If not see
      20              :    <http://www.gnu.org/licenses/>.  */
      21              : 
      22              : /* This pass performs the tail duplication needed for superblock formation.
      23              :    For more information see:
      24              : 
      25              :      Design and Analysis of Profile-Based Optimization in Compaq's
      26              :      Compilation Tools for Alpha; Journal of Instruction-Level
      27              :      Parallelism 3 (2000) 1-25
      28              : 
      29              :    Unlike Compaq's implementation we don't do the loop peeling as most
      30              :    probably a better job can be done by a special pass and we don't
      31              :    need to worry too much about the code size implications as the tail
      32              :    duplicates are crossjumped again if optimizations are not
      33              :    performed.  */
      34              : 
      35              : 
      36              : #include "config.h"
      37              : #include "system.h"
      38              : #include "coretypes.h"
      39              : #include "backend.h"
      40              : #include "rtl.h"
      41              : #include "tree.h"
      42              : #include "gimple.h"
      43              : #include "cfghooks.h"
      44              : #include "tree-pass.h"
      45              : #include "profile.h"
      46              : #include "cfganal.h"
      47              : #include "gimple-iterator.h"
      48              : #include "tree-cfg.h"
      49              : #include "tree-ssa.h"
      50              : #include "tree-inline.h"
      51              : #include "cfgloop.h"
      52              : #include "alloc-pool.h"
      53              : #include "fibonacci_heap.h"
      54              : #include "tracer.h"
      55              : 
      56              : static void analyze_bb (basic_block, int *);
      57              : static bool better_p (const_edge, const_edge);
      58              : static edge find_best_successor (basic_block);
      59              : static edge find_best_predecessor (basic_block);
      60              : static int find_trace (basic_block, basic_block *);
      61              : 
      62              : /* Minimal outgoing edge probability considered for superblock formation.  */
      63              : static int probability_cutoff;
      64              : static int branch_ratio_cutoff;
      65              : 
      66              : /* A bit BB->index is set if BB has already been seen, i.e. it is
      67              :    connected to some trace already.  */
      68              : static sbitmap bb_seen;
      69              : 
      70              : static inline void
      71        82423 : mark_bb_seen (basic_block bb)
      72              : {
      73        82423 :   unsigned int size = SBITMAP_SIZE (bb_seen);
      74              : 
      75        82423 :   if ((unsigned int)bb->index >= size)
      76            0 :     bb_seen = sbitmap_resize (bb_seen, size * 2, 0);
      77              : 
      78        82423 :   bitmap_set_bit (bb_seen, bb->index);
      79        82423 : }
      80              : 
      81              : static inline bool
      82       444631 : bb_seen_p (basic_block bb)
      83              : {
      84       444631 :   return bitmap_bit_p (bb_seen, bb->index);
      85              : }
      86              : 
      87              : static sbitmap can_duplicate_bb;
      88              : 
      89              : /* Cache VAL as value of can_duplicate_bb_p for BB.  */
      90              : static inline void
      91       268212 : cache_can_duplicate_bb_p (const_basic_block bb, bool val)
      92              : {
      93       268212 :   if (val)
      94       268182 :     bitmap_set_bit (can_duplicate_bb, bb->index);
      95       268212 : }
      96              : 
      97              : /* Return cached value of can_duplicate_bb_p for BB.  */
      98              : static bool
      99      1187005 : cached_can_duplicate_bb_p (const_basic_block bb)
     100              : {
     101      1187005 :   if (can_duplicate_bb)
     102              :     {
     103      1130141 :       unsigned int size = SBITMAP_SIZE (can_duplicate_bb);
     104      1130141 :       if ((unsigned int)bb->index < size)
     105      1108511 :         return bitmap_bit_p (can_duplicate_bb, bb->index);
     106              : 
     107              :       /* Assume added bb's should not be duplicated.  */
     108              :       return false;
     109              :     }
     110              : 
     111        56864 :   return can_duplicate_block_p (bb);
     112              : }
     113              : 
     114              : /* Return true if we should ignore the basic block for purposes of tracing.  */
     115              : bool
     116      1265737 : ignore_bb_p (const_basic_block bb)
     117              : {
     118      1265737 :   if (bb->index < NUM_FIXED_BLOCKS)
     119              :     return true;
     120      1249505 :   if (optimize_bb_for_size_p (bb))
     121              :     return true;
     122              : 
     123      1187005 :   return !cached_can_duplicate_bb_p (bb);
     124              : }
     125              : 
     126              : /* Return number of instructions in the block.  */
     127              : 
     128              : static void
     129       268212 : analyze_bb (basic_block bb, int *count)
     130              : {
     131       268212 :   gimple_stmt_iterator gsi;
     132       268212 :   gimple *stmt;
     133       268212 :   int n = 0;
     134              : 
     135      1754059 :   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     136              :     {
     137      1217635 :       stmt = gsi_stmt (gsi);
     138      1217635 :       n += estimate_num_insns (stmt, &eni_size_weights);
     139              :     }
     140       268212 :   *count = n;
     141              : 
     142       268212 :   cache_can_duplicate_bb_p (bb, can_duplicate_block_p (const_cast<basic_block>
     143              :                                                        (bb)));
     144       268212 : }
     145              : 
     146              : /* Return true if E1 is more frequent than E2.  */
     147              : static bool
     148       604699 : better_p (const_edge e1, const_edge e2)
     149              : {
     150       884640 :   if ((e1->count () > e2->count ()) || (e1->count () < e2->count ()))
     151       582727 :     return e1->count () > e2->count ();
     152              :   /* This is needed to avoid changes in the decision after
     153              :      CFG is modified.  */
     154        21972 :   if (e1->src != e2->src)
     155         8237 :     return e1->src->index > e2->src->index;
     156        13735 :   return e1->dest->index > e2->dest->index;
     157              : }
     158              : 
     159              : /* Return most frequent successor of basic block BB.  */
     160              : 
     161              : static edge
     162       411119 : find_best_successor (basic_block bb)
     163              : {
     164       411119 :   edge e;
     165       411119 :   edge best = NULL;
     166       411119 :   edge_iterator ei;
     167              : 
     168      1163080 :   FOR_EACH_EDGE (e, ei, bb->succs)
     169              :     {
     170       752052 :       if (!e->count ().initialized_p ())
     171              :         return NULL;
     172       751961 :       if (!best || better_p (e, best))
     173              :         best = e;
     174              :     }
     175       411028 :   if (!best || ignore_bb_p (best->dest))
     176        13172 :     return NULL;
     177       397856 :   if (!best->probability.initialized_p ()
     178       397856 :       || best->probability.to_reg_br_prob_base () <= probability_cutoff)
     179              :     return NULL;
     180              :   return best;
     181              : }
     182              : 
     183              : /* Return most frequent predecessor of basic block BB.  */
     184              : 
     185              : static edge
     186       407182 : find_best_predecessor (basic_block bb)
     187              : {
     188       407182 :   edge e;
     189       407182 :   edge best = NULL;
     190       407182 :   edge_iterator ei;
     191              : 
     192      1077876 :   FOR_EACH_EDGE (e, ei, bb->preds)
     193              :     {
     194       670790 :       if (!e->count ().initialized_p ())
     195              :         return NULL;
     196       670694 :       if (!best || better_p (e, best))
     197              :         best = e;
     198              :     }
     199       407086 :   if (!best || ignore_bb_p (best->src))
     200        17294 :     return NULL;
     201       389792 :   if (bb->count.initialized_p ()
     202      1169236 :       && (best->count ().to_frequency (cfun) * REG_BR_PROB_BASE
     203       389792 :           < bb->count.to_frequency (cfun) * branch_ratio_cutoff))
     204          140 :     return NULL;
     205              :   return best;
     206              : }
     207              : 
     208              : /* Find the trace using bb and record it in the TRACE array.
     209              :    Return number of basic blocks recorded.  */
     210              : 
     211              : static int
     212        39602 : find_trace (basic_block bb, basic_block *trace)
     213              : {
     214        39602 :   int i = 0;
     215        39602 :   edge e;
     216              : 
     217        39602 :   if (dump_file)
     218           17 :     fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun));
     219              : 
     220       103430 :   while ((e = find_best_predecessor (bb)) != NULL)
     221              :     {
     222        86939 :       basic_block bb2 = e->src;
     223       172386 :       if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
     224       160452 :           || find_best_successor (bb2) != e)
     225              :         break;
     226        63828 :       if (dump_file)
     227           13 :         fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
     228              :       bb = bb2;
     229              :     }
     230        39602 :   if (dump_file)
     231           17 :     fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun));
     232        39602 :   trace[i++] = bb;
     233              : 
     234              :   /* Follow the trace in forward direction.  */
     235       337606 :   while ((e = find_best_successor (bb)) != NULL)
     236              :     {
     237       318090 :       bb = e->dest;
     238       636110 :       if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
     239       621842 :           || find_best_predecessor (bb) != e)
     240              :         break;
     241       298004 :       if (dump_file)
     242           30 :         fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
     243       298004 :       trace[i++] = bb;
     244              :     }
     245        39602 :   if (dump_file)
     246           17 :     fprintf (dump_file, "\n");
     247        39602 :   return i;
     248              : }
     249              : 
     250              : /* Duplicate block BB2, placing it after BB in the CFG.  Return the
     251              :    newly created block.  */
     252              : basic_block
     253        15615 : transform_duplicate (basic_block bb, basic_block bb2)
     254              : {
     255        15615 :   edge e;
     256        15615 :   basic_block copy;
     257              : 
     258        15615 :   e = find_edge (bb, bb2);
     259              : 
     260        15615 :   copy = duplicate_block (bb2, e, bb);
     261        15615 :   flush_pending_stmts (e);
     262              : 
     263        15615 :   add_phi_args_after_copy (&copy, 1, NULL);
     264              : 
     265        15615 :   return (copy);
     266              : }
     267              : 
     268              : /* Look for basic blocks in frequency order, construct traces and tail duplicate
     269              :    if profitable.  */
     270              : 
     271              : static bool
     272        11356 : tail_duplicate (void)
     273              : {
     274        11356 :   auto_vec<fibonacci_node<long, basic_block_def>*> blocks;
     275        11356 :   blocks.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
     276              : 
     277        11356 :   basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
     278        11356 :   int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun));
     279        11356 :   int ninsns = 0, nduplicated = 0;
     280        11356 :   gcov_type weighted_insns = 0, traced_insns = 0;
     281        11356 :   fibonacci_heap<long, basic_block_def> heap (LONG_MIN);
     282        11356 :   gcov_type cover_insns;
     283        11356 :   int max_dup_insns;
     284        11356 :   basic_block bb;
     285        11356 :   bool changed = false;
     286              : 
     287              :   /* Create an oversized sbitmap to reduce the chance that we need to
     288              :      resize it.  */
     289        11356 :   bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2);
     290        11356 :   bitmap_clear (bb_seen);
     291        11356 :   can_duplicate_bb = sbitmap_alloc (last_basic_block_for_fn (cfun));
     292        11356 :   bitmap_clear (can_duplicate_bb);
     293        11356 :   initialize_original_copy_tables ();
     294              : 
     295        11356 :   if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
     296          187 :     probability_cutoff = param_tracer_min_branch_probability_feedback;
     297              :   else
     298        11169 :     probability_cutoff = param_tracer_min_branch_probability;
     299        11356 :   probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
     300              : 
     301        11356 :   branch_ratio_cutoff =
     302        11356 :     (REG_BR_PROB_BASE / 100 * param_tracer_min_branch_ratio);
     303              : 
     304       279568 :   FOR_EACH_BB_FN (bb, cfun)
     305              :     {
     306       268212 :       int n;
     307       268212 :       analyze_bb (bb, &n);
     308       268212 :       if (!ignore_bb_p (bb))
     309       212670 :         blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb);
     310              : 
     311       268212 :       counts [bb->index] = n;
     312       268212 :       ninsns += n;
     313       268212 :       weighted_insns += n * bb->count.to_frequency (cfun);
     314              :     }
     315              : 
     316        11356 :   if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
     317          187 :     cover_insns = param_tracer_dynamic_coverage_feedback;
     318              :   else
     319        11169 :     cover_insns = param_tracer_dynamic_coverage;
     320        11356 :   cover_insns = (weighted_insns * cover_insns + 50) / 100;
     321        11356 :   max_dup_insns = (ninsns * param_tracer_max_code_growth + 50) / 100;
     322              : 
     323        11356 :   while (traced_insns < cover_insns && nduplicated < max_dup_insns
     324        51380 :          && !heap.empty ())
     325              :     {
     326        40024 :       basic_block bb = heap.extract_min ();
     327        40024 :       int n, pos;
     328              : 
     329        40024 :       if (!bb)
     330              :         break;
     331              : 
     332        40024 :       blocks[bb->index] = NULL;
     333              : 
     334        40024 :       if (ignore_bb_p (bb))
     335          422 :         continue;
     336        39602 :       gcc_assert (!bb_seen_p (bb));
     337              : 
     338        39602 :       n = find_trace (bb, trace);
     339              : 
     340        39602 :       bb = trace[0];
     341        39602 :       traced_insns += bb->count.to_frequency (cfun) * counts [bb->index];
     342        39602 :       if (blocks[bb->index])
     343              :         {
     344        10838 :           heap.delete_node (blocks[bb->index]);
     345        10838 :           blocks[bb->index] = NULL;
     346              :         }
     347              : 
     348       108133 :       for (pos = 1; pos < n; pos++)
     349              :         {
     350        82423 :           basic_block bb2 = trace[pos];
     351              : 
     352        82423 :           if (blocks[bb2->index])
     353              :             {
     354        73984 :               heap.delete_node (blocks[bb2->index]);
     355        73984 :               blocks[bb2->index] = NULL;
     356              :             }
     357        82423 :           traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index];
     358        82423 :           if (EDGE_COUNT (bb2->preds) > 1
     359        15167 :               && can_duplicate_block_p (bb2)
     360              :               /* We have the tendency to duplicate the loop header
     361              :                  of all do { } while loops.  Do not do that - it is
     362              :                  not profitable and it might create a loop with multiple
     363              :                  entries or at least rotate the loop.  */
     364        97590 :               && bb2->loop_father->header != bb2)
     365              :             {
     366        13892 :               nduplicated += counts [bb2->index];
     367        13892 :               basic_block copy = transform_duplicate (bb, bb2);
     368              : 
     369              :               /* Reconsider the original copy of block we've duplicated.
     370              :                  Removing the most common predecessor may make it to be
     371              :                  head.  */
     372        13892 :               blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2);
     373              : 
     374        13892 :               if (dump_file)
     375            7 :                 fprintf (dump_file, "Duplicated %i as %i [%i]\n",
     376              :                          bb2->index, copy->index, copy->count.to_frequency (cfun));
     377              : 
     378              :               bb2 = copy;
     379              :               changed = true;
     380              :             }
     381        82423 :           mark_bb_seen (bb2);
     382        82423 :           bb = bb2;
     383              :           /* In case the trace became infrequent, stop duplicating.  */
     384        82423 :           if (ignore_bb_p (bb))
     385              :             break;
     386              :         }
     387        39602 :       if (dump_file)
     388           17 :         fprintf (dump_file, " covered now %.1f\n\n",
     389           17 :                  traced_insns * 100.0 / weighted_insns);
     390              :     }
     391        11356 :   if (dump_file)
     392           10 :     fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
     393           10 :              nduplicated * 100 / ninsns);
     394              : 
     395        11356 :   free_original_copy_tables ();
     396        11356 :   sbitmap_free (bb_seen);
     397        11356 :   sbitmap_free (can_duplicate_bb);
     398        11356 :   can_duplicate_bb = NULL;
     399        11356 :   free (trace);
     400        11356 :   free (counts);
     401              : 
     402        22712 :   return changed;
     403        11356 : }
     404              : 
     405              : namespace {
     406              : 
     407              : const pass_data pass_data_tracer =
     408              : {
     409              :   GIMPLE_PASS, /* type */
     410              :   "tracer", /* name */
     411              :   OPTGROUP_NONE, /* optinfo_flags */
     412              :   TV_TRACER, /* tv_id */
     413              :   0, /* properties_required */
     414              :   0, /* properties_provided */
     415              :   0, /* properties_destroyed */
     416              :   0, /* todo_flags_start */
     417              :   TODO_update_ssa, /* todo_flags_finish */
     418              : };
     419              : 
     420              : class pass_tracer : public gimple_opt_pass
     421              : {
     422              : public:
     423       285722 :   pass_tracer (gcc::context *ctxt)
     424       571444 :     : gimple_opt_pass (pass_data_tracer, ctxt)
     425              :   {}
     426              : 
     427              :   /* opt_pass methods: */
     428      1041484 :   bool gate (function *) final override
     429              :     {
     430      1041484 :       return (optimize > 0 && flag_tracer && flag_reorder_blocks);
     431              :     }
     432              : 
     433              :   unsigned int execute (function *) final override;
     434              : 
     435              : }; // class pass_tracer
     436              : 
     437              : unsigned int
     438        20166 : pass_tracer::execute (function *fun)
     439              : {
     440        20166 :   bool changed;
     441              : 
     442        20166 :   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
     443              :     return 0;
     444              : 
     445        11356 :   mark_dfs_back_edges ();
     446        11356 :   if (dump_file)
     447           10 :     brief_dump_cfg (dump_file, dump_flags);
     448              : 
     449              :   /* Trace formation is done on the fly inside tail_duplicate */
     450        11356 :   changed = tail_duplicate ();
     451        11356 :   if (changed)
     452              :     {
     453         5131 :       free_dominance_info (CDI_DOMINATORS);
     454              :       /* If we changed the CFG schedule loops for fixup by cleanup_cfg.  */
     455         5131 :       loops_state_set (LOOPS_NEED_FIXUP);
     456              :     }
     457              : 
     458        11356 :   if (dump_file)
     459           10 :     brief_dump_cfg (dump_file, dump_flags);
     460              : 
     461        11356 :   return changed ? TODO_cleanup_cfg : 0;
     462              : }
     463              : } // anon namespace
     464              : 
     465              : gimple_opt_pass *
     466       285722 : make_pass_tracer (gcc::context *ctxt)
     467              : {
     468       285722 :   return new pass_tracer (ctxt);
     469              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.