LCOV - code coverage report
Current view: top level - gcc - ipa-profile.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 94.0 % 501 471
Test Date: 2026-02-28 14:20:25 Functions: 100.0 % 22 22
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Basic IPA optimizations based on profile.
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it under
       7              : the terms of the GNU General Public License as published by the Free
       8              : Software Foundation; either version 3, or (at your option) any later
       9              : version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : /* ipa-profile pass implements the following analysis propagating profille
      21              :    inter-procedurally.
      22              : 
      23              :    - Count histogram construction.  This is a histogram analyzing how much
      24              :      time is spent executing statements with a given execution count read
      25              :      from profile feedback. This histogram is complete only with LTO,
      26              :      otherwise it contains information only about the current unit.
      27              : 
      28              :      The information is used to set hot/cold thresholds.
      29              :    - Next speculative indirect call resolution is performed:  the local
      30              :      profile pass assigns profile-id to each function and provide us with a
      31              :      histogram specifying the most common target.  We look up the callgraph
      32              :      node corresponding to the target and produce a speculative call.
      33              : 
      34              :      This call may or may not survive through IPA optimization based on decision
      35              :      of inliner.
      36              :    - Finally we propagate the following flags: unlikely executed, executed
      37              :      once, executed at startup and executed at exit.  These flags are used to
      38              :      control code size/performance threshold and code placement (by producing
      39              :      .text.unlikely/.text.hot/.text.startup/.text.exit subsections).  */
      40              : #include "config.h"
      41              : #include "system.h"
      42              : #include "coretypes.h"
      43              : #include "backend.h"
      44              : #include "tree.h"
      45              : #include "gimple.h"
      46              : #include "predict.h"
      47              : #include "alloc-pool.h"
      48              : #include "tree-pass.h"
      49              : #include "cgraph.h"
      50              : #include "data-streamer.h"
      51              : #include "gimple-iterator.h"
      52              : #include "ipa-utils.h"
      53              : #include "profile.h"
      54              : #include "value-prof.h"
      55              : #include "tree-inline.h"
      56              : #include "symbol-summary.h"
      57              : #include "tree-vrp.h"
      58              : #include "sreal.h"
      59              : #include "ipa-cp.h"
      60              : #include "ipa-prop.h"
      61              : #include "ipa-fnsummary.h"
      62              : 
      63              : /* Entry in the histogram.  */
      64              : 
      65              : struct histogram_entry
      66              : {
      67              :   gcov_type count;
      68              :   int time;
      69              :   int size;
      70              : };
      71              : 
      72              : /* Histogram of profile values.
      73              :    The histogram is represented as an ordered vector of entries allocated via
      74              :    histogram_pool. During construction a separate hashtable is kept to lookup
      75              :    duplicate entries.  */
      76              : 
      77              : vec<histogram_entry *> histogram;
      78              : static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
      79              : 
      80              : /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR.  */
      81              : 
      82              : struct histogram_hash : nofree_ptr_hash <histogram_entry>
      83              : {
      84              :   static inline hashval_t hash (const histogram_entry *);
      85              :   static inline int equal (const histogram_entry *, const histogram_entry *);
      86              : };
      87              : 
      88              : inline hashval_t
      89        35456 : histogram_hash::hash (const histogram_entry *val)
      90              : {
      91        35456 :   return val->count;
      92              : }
      93              : 
      94              : inline int
      95        18337 : histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
      96              : {
      97        18337 :   return val->count == val2->count;
      98              : }
      99              : 
     100              : /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
     101              :    HASHTABLE is the on-side hash kept to avoid duplicates.  */
     102              : 
     103              : static void
     104        17259 : account_time_size (hash_table<histogram_hash> *hashtable,
     105              :                    vec<histogram_entry *> &histogram,
     106              :                    gcov_type count, int time, int size)
     107              : {
     108        17259 :   histogram_entry key = {count, 0, 0};
     109        17259 :   histogram_entry **val = hashtable->find_slot (&key, INSERT);
     110              : 
     111        17259 :   if (!*val)
     112              :     {
     113         3517 :       *val = histogram_pool.allocate ();
     114         3517 :       **val = key;
     115         3517 :       histogram.safe_push (*val);
     116              :     }
     117        17259 :   (*val)->time += time;
     118        17259 :   (*val)->size += size;
     119        17259 : }
     120              : 
     121              : int
     122         4660 : cmp_counts (const void *v1, const void *v2)
     123              : {
     124         4660 :   const histogram_entry *h1 = *(const histogram_entry * const *)v1;
     125         4660 :   const histogram_entry *h2 = *(const histogram_entry * const *)v2;
     126         4660 :   if (h1->count < h2->count)
     127              :     return 1;
     128         2699 :   if (h1->count > h2->count)
     129         2699 :     return -1;
     130              :   return 0;
     131              : }
     132              : 
     133              : /* Dump HISTOGRAM to FILE.  */
     134              : 
     135              : static void
     136           27 : dump_histogram (FILE *file, vec<histogram_entry *> histogram)
     137              : {
     138           27 :   unsigned int i;
     139           27 :   gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0,
     140           27 :             overall_size = 0;
     141              : 
     142           27 :   fprintf (dump_file, "Histogram:\n");
     143           73 :   for (i = 0; i < histogram.length (); i++)
     144              :     {
     145           19 :       overall_time += histogram[i]->count * histogram[i]->time;
     146           19 :       overall_size += histogram[i]->size;
     147              :     }
     148           27 :   if (!overall_time)
     149           23 :     overall_time = 1;
     150           27 :   if (!overall_size)
     151           23 :     overall_size = 1;
     152           46 :   for (i = 0; i < histogram.length (); i++)
     153              :     {
     154           19 :       cumulated_time += histogram[i]->count * histogram[i]->time;
     155           19 :       cumulated_size += histogram[i]->size;
     156           19 :       fprintf (file, "  %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
     157           19 :                (int64_t) histogram[i]->count,
     158           19 :                histogram[i]->time,
     159           19 :                cumulated_time * 100.0 / overall_time,
     160           19 :                histogram[i]->size,
     161           19 :                cumulated_size * 100.0 / overall_size);
     162              :    }
     163           27 : }
     164              : 
     165              : /* Structure containing speculative target information from profile.  */
     166              : 
     167              : struct speculative_call_target
     168              : {
     169          116 :   speculative_call_target (unsigned int id = 0, int prob = 0)
     170           55 :     : target_id (id), target_probability (prob)
     171              :   {
     172              :   }
     173              : 
     174              :   /* Profile_id of target obtained from profile.  */
     175              :   unsigned int target_id;
     176              :   /* Probability that call will land in function with target_id.  */
     177              :   unsigned int target_probability;
     178              : };
     179              : 
     180       141730 : class speculative_call_summary
     181              : {
     182              : public:
     183       141730 :   speculative_call_summary () : speculative_call_targets ()
     184              :   {}
     185              : 
     186              :   auto_vec<speculative_call_target> speculative_call_targets;
     187              : 
     188              :   void dump (FILE *f);
     189              : 
     190              : };
     191              : 
     192              :   /* Class to manage call summaries.  */
     193              : 
     194              : class ipa_profile_call_summaries
     195              :   : public call_summary<speculative_call_summary *>
     196              : {
     197              : public:
     198       160846 :   ipa_profile_call_summaries (symbol_table *table)
     199       321692 :     : call_summary<speculative_call_summary *> (table)
     200              :   {}
     201              : 
     202              :   /* Duplicate info when an edge is cloned.  */
     203              :   void duplicate (cgraph_edge *, cgraph_edge *,
     204              :                   speculative_call_summary *old_sum,
     205              :                   speculative_call_summary *new_sum) final override;
     206              : };
     207              : 
     208              : static ipa_profile_call_summaries *call_sums = NULL;
     209              : 
     210              : /* Dump all information in speculative call summary to F.  */
     211              : 
     212              : void
     213           34 : speculative_call_summary::dump (FILE *f)
     214              : {
     215           34 :   cgraph_node *n2;
     216              : 
     217           34 :   unsigned spec_count = speculative_call_targets.length ();
     218           42 :   for (unsigned i = 0; i < spec_count; i++)
     219              :     {
     220            8 :       speculative_call_target item = speculative_call_targets[i];
     221            8 :       n2 = find_func_by_profile_id (item.target_id);
     222            8 :       if (n2)
     223            8 :         fprintf (f, "    The %i speculative target is %s with prob %3.2f\n", i,
     224              :                  n2->dump_name (),
     225            8 :                  item.target_probability / (float) REG_BR_PROB_BASE);
     226              :       else
     227            0 :         fprintf (f, "    The %i speculative target is %u with prob %3.2f\n", i,
     228              :                  item.target_id,
     229            0 :                  item.target_probability / (float) REG_BR_PROB_BASE);
     230              :     }
     231           34 : }
     232              : 
     233              : /* Duplicate info when an edge is cloned.  */
     234              : 
     235              : void
     236           42 : ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
     237              :                                        speculative_call_summary *old_sum,
     238              :                                        speculative_call_summary *new_sum)
     239              : {
     240           42 :   if (!old_sum)
     241              :     return;
     242              : 
     243           42 :   unsigned old_count = old_sum->speculative_call_targets.length ();
     244           42 :   if (!old_count)
     245              :     return;
     246              : 
     247           42 :   new_sum->speculative_call_targets.reserve_exact (old_count);
     248           42 :   new_sum->speculative_call_targets.quick_grow_cleared (old_count);
     249              : 
     250           97 :   for (unsigned i = 0; i < old_count; i++)
     251              :     {
     252          165 :       new_sum->speculative_call_targets[i]
     253           55 :         = old_sum->speculative_call_targets[i];
     254              :     }
     255              : }
     256              : 
     257              : /* Collect histogram and speculative target summaries from CFG profiles.  */
     258              : 
     259              : static void
     260       148644 : ipa_profile_generate_summary (void)
     261              : {
     262       148644 :   struct cgraph_node *node;
     263       148644 :   gimple_stmt_iterator gsi;
     264       148644 :   basic_block bb;
     265              : 
     266       148644 :   hash_table<histogram_hash> hashtable (10);
     267              : 
     268       148644 :   gcc_checking_assert (!call_sums);
     269       148644 :   call_sums = new ipa_profile_call_summaries (symtab);
     270              : 
     271      1483579 :   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
     272      1334935 :     if (ENTRY_BLOCK_PTR_FOR_FN
     273      2669870 :           (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
     274        27001 :       FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
     275              :         {
     276        20755 :           int time = 0;
     277        20755 :           int size = 0;
     278       124961 :           for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     279              :             {
     280        83451 :               gimple *stmt = gsi_stmt (gsi);
     281        83451 :               if (gimple_code (stmt) == GIMPLE_CALL
     282        83451 :                   && !gimple_call_fndecl (stmt))
     283              :                 {
     284          214 :                   histogram_value h;
     285          214 :                   h = gimple_histogram_value_of_type
     286          214 :                         (DECL_STRUCT_FUNCTION (node->decl),
     287              :                          stmt, HIST_TYPE_INDIR_CALL);
     288              :                   /* No need to do sanity check: gimple_ic_transform already
     289              :                      takes away bad histograms.  */
     290          214 :                   if (h)
     291              :                     {
     292           40 :                       gcov_type val, count, all;
     293           40 :                       struct cgraph_edge *e = node->get_edge (stmt);
     294           40 :                       if (e && !e->indirect_unknown_callee)
     295            0 :                         continue;
     296              : 
     297           40 :                       speculative_call_summary *csum
     298           40 :                         = call_sums->get_create (e);
     299              : 
     300         1320 :                       for (unsigned j = 0; j < GCOV_TOPN_MAXIMUM_TRACKED_VALUES;
     301              :                            j++)
     302              :                         {
     303         1280 :                           if (!get_nth_most_common_value (NULL, "indirect call",
     304              :                                                           h, &val, &count, &all,
     305              :                                                           j))
     306         1229 :                             continue;
     307              : 
     308           51 :                           if (val == 0 || count == 0)
     309            0 :                             continue;
     310              : 
     311           51 :                           if (count > all)
     312              :                             {
     313            0 :                               if (dump_file)
     314            0 :                                 fprintf (dump_file,
     315              :                                          "Probability capped to 1\n");
     316            0 :                               count = all;
     317              :                             }
     318           51 :                           speculative_call_target item (
     319              :                             val,
     320           51 :                             profile_count::from_gcov_type (count)
     321              :                               .probability_in
     322           51 :                                 (profile_count::from_gcov_type (all))
     323           51 :                                  .to_reg_br_prob_base ());
     324           51 :                           csum->speculative_call_targets.safe_push (item);
     325              :                         }
     326              : 
     327           40 :                       gimple_remove_histogram_value
     328           40 :                          (DECL_STRUCT_FUNCTION (node->decl), stmt, h);
     329              :                     }
     330              :                 }
     331        83451 :               time += estimate_num_insns (stmt, &eni_time_weights);
     332        83451 :               size += estimate_num_insns (stmt, &eni_size_weights);
     333              :             }
     334        54777 :           if (bb->count.ipa_p () && bb->count.initialized_p ())
     335        17011 :             account_time_size (&hashtable, histogram,
     336        34022 :                                bb->count.ipa ().to_gcov_type (),
     337              :                                time, size);
     338              :         }
     339       148644 :   histogram.qsort (cmp_counts);
     340       148644 : }
     341              : 
     342              : /* Serialize the speculative summary info for LTO.  */
     343              : 
     344              : static void
     345         2601 : ipa_profile_write_edge_summary (lto_simple_output_block *ob,
     346              :                                 speculative_call_summary *csum)
     347              : {
     348         2601 :   unsigned len = 0;
     349              : 
     350         2601 :   len = csum->speculative_call_targets.length ();
     351              : 
     352            4 :   gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
     353              : 
     354         2601 :   streamer_write_hwi_stream (ob->main_stream, len);
     355              : 
     356         2601 :   if (len)
     357              :     {
     358            4 :       unsigned spec_count = csum->speculative_call_targets.length ();
     359           14 :       for (unsigned i = 0; i < spec_count; i++)
     360              :         {
     361           10 :           speculative_call_target item = csum->speculative_call_targets[i];
     362           10 :           gcc_assert (item.target_id);
     363           10 :           streamer_write_hwi_stream (ob->main_stream, item.target_id);
     364           10 :           streamer_write_hwi_stream (ob->main_stream, item.target_probability);
     365              :         }
     366              :     }
     367         2601 : }
     368              : 
     369              : /* Serialize the ipa info for lto.  */
     370              : 
     371              : static void
     372        20000 : ipa_profile_write_summary (void)
     373              : {
     374        20000 :   struct lto_simple_output_block *ob
     375        20000 :     = lto_create_simple_output_block (LTO_section_ipa_profile);
     376        20000 :   unsigned int i;
     377              : 
     378        20000 :   streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
     379        40439 :   for (i = 0; i < histogram.length (); i++)
     380              :     {
     381          439 :       streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
     382          439 :       streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
     383          439 :       streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
     384              :     }
     385              : 
     386        20000 :   if (!call_sums)
     387            0 :     return;
     388              : 
     389              :   /* Serialize speculative targets information.  */
     390        20000 :   unsigned int count = 0;
     391        20000 :   lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
     392        20000 :   lto_symtab_encoder_iterator lsei;
     393        20000 :   cgraph_node *node;
     394              : 
     395       113628 :   for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
     396        93628 :        lsei_next_function_in_partition (&lsei))
     397              :     {
     398        93628 :       node = lsei_cgraph_node (lsei);
     399        93628 :       if (node->definition && node->has_gimple_body_p ()
     400       186281 :           && node->indirect_calls)
     401         1023 :         count++;
     402              :     }
     403              : 
     404        20000 :   streamer_write_uhwi_stream (ob->main_stream, count);
     405              : 
     406              :   /* Process all of the functions.  */
     407        20000 :   for (lsei = lsei_start_function_in_partition (encoder);
     408        28068 :        !lsei_end_p (lsei) && count; lsei_next_function_in_partition (&lsei))
     409              :     {
     410         8068 :       cgraph_node *node = lsei_cgraph_node (lsei);
     411         8068 :       if (node->definition && node->has_gimple_body_p ()
     412        15787 :           && node->indirect_calls)
     413              :         {
     414         1023 :           int node_ref = lto_symtab_encoder_encode (encoder, node);
     415         1023 :           streamer_write_uhwi_stream (ob->main_stream, node_ref);
     416              : 
     417         3624 :           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
     418              :             {
     419         2601 :               speculative_call_summary *csum = call_sums->get_create (e);
     420         2601 :               ipa_profile_write_edge_summary (ob, csum);
     421              :             }
     422              :       }
     423              :     }
     424              : 
     425        20000 :   lto_destroy_simple_output_block (ob);
     426              : }
     427              : 
     428              : /* Dump all profile summary data for all cgraph nodes and edges to file F.  */
     429              : 
     430              : static void
     431           27 : ipa_profile_dump_all_summaries (FILE *f)
     432              : {
     433           27 :   fprintf (dump_file,
     434              :            "\n========== IPA-profile speculative targets: ==========\n");
     435           27 :   cgraph_node *node;
     436           98 :   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
     437              :     {
     438           71 :       fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
     439          105 :       for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
     440              :         {
     441           34 :           fprintf (f, "  Summary for %s of indirect edge %d:\n",
     442           34 :                    e->caller->dump_name (), e->lto_stmt_uid);
     443           34 :           speculative_call_summary *csum = call_sums->get_create (e);
     444           34 :           csum->dump (f);
     445              :         }
     446              :     }
     447           27 :   fprintf (f, "\n\n");
     448           27 : }
     449              : 
     450              : /* Read speculative targets information about edge for LTO WPA.  */
     451              : 
     452              : static void
     453         1405 : ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge)
     454              : {
     455         1405 :   unsigned i, len;
     456              : 
     457         1405 :   len = streamer_read_hwi (ib);
     458         1405 :   gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
     459         1405 :   speculative_call_summary *csum = call_sums->get_create (edge);
     460              : 
     461         1415 :   for (i = 0; i < len; i++)
     462              :   {
     463           10 :     unsigned int target_id = streamer_read_hwi (ib);
     464           10 :     int target_probability = streamer_read_hwi (ib);
     465           10 :     speculative_call_target item (target_id, target_probability);
     466           10 :     csum->speculative_call_targets.safe_push (item);
     467              :   }
     468         1405 : }
     469              : 
     470              : /* Read profile speculative targets section information for LTO WPA.  */
     471              : 
     472              : static void
     473        10925 : ipa_profile_read_summary_section (struct lto_file_decl_data *file_data,
     474              :                                   class lto_input_block *ib)
     475              : {
     476        10925 :   if (!ib)
     477              :     return;
     478              : 
     479        10925 :   lto_symtab_encoder_t encoder = file_data->symtab_node_encoder;
     480              : 
     481        10925 :   unsigned int count = streamer_read_uhwi (ib);
     482              : 
     483        10925 :   unsigned int i;
     484        10925 :   unsigned int index;
     485        10925 :   cgraph_node * node;
     486              : 
     487        11603 :   for (i = 0; i < count; i++)
     488              :     {
     489          678 :       index = streamer_read_uhwi (ib);
     490          678 :       node
     491          678 :         = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, index));
     492              : 
     493         2083 :       for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
     494         1405 :         ipa_profile_read_edge_summary (ib, e);
     495              :     }
     496              : }
     497              : 
     498              : /* Deserialize the IPA histogram and speculative targets summary info for LTO.
     499              :    */
     500              : 
     501              : static void
     502        12202 : ipa_profile_read_summary (void)
     503              : {
     504        12202 :   struct lto_file_decl_data ** file_data_vec
     505        12202 :     = lto_get_file_decl_data ();
     506        12202 :   struct lto_file_decl_data * file_data;
     507        12202 :   int j = 0;
     508              : 
     509        12202 :   hash_table<histogram_hash> hashtable (10);
     510              : 
     511        12202 :   gcc_checking_assert (!call_sums);
     512        12202 :   call_sums = new ipa_profile_call_summaries (symtab);
     513              : 
     514        25459 :   while ((file_data = file_data_vec[j++]))
     515              :     {
     516        13257 :       const char *data;
     517        13257 :       size_t len;
     518        13257 :       class lto_input_block *ib
     519        13257 :         = lto_create_simple_input_block (file_data,
     520              :                                          LTO_section_ipa_profile,
     521              :                                          &data, &len);
     522        13257 :       if (ib)
     523              :         {
     524        10925 :           unsigned int num = streamer_read_uhwi (ib);
     525        10925 :           unsigned int n;
     526        11173 :           for (n = 0; n < num; n++)
     527              :             {
     528          248 :               gcov_type count = streamer_read_gcov_count (ib);
     529          248 :               int time = streamer_read_uhwi (ib);
     530          248 :               int size = streamer_read_uhwi (ib);
     531          248 :               account_time_size (&hashtable, histogram,
     532              :                                  count, time, size);
     533              :             }
     534              : 
     535        10925 :           ipa_profile_read_summary_section (file_data, ib);
     536              : 
     537        10925 :           lto_destroy_simple_input_block (file_data,
     538              :                                           LTO_section_ipa_profile,
     539              :                                           ib, data, len);
     540              :         }
     541              :     }
     542        12202 :   histogram.qsort (cmp_counts);
     543        12202 : }
     544              : 
     545              : /* Data used by ipa_propagate_frequency.  */
     546              : 
     547              : struct ipa_propagate_frequency_data
     548              : {
     549              :   cgraph_node *function_symbol;
     550              :   bool maybe_unlikely_executed;
     551              :   bool maybe_executed_once;
     552              :   bool only_called_at_startup;
     553              :   bool only_called_at_exit;
     554              : };
     555              : 
     556              : /* Worker for ipa_propagate_frequency_1.  */
     557              : 
     558              : static bool
     559      5100257 : ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
     560              : {
     561      5100257 :   struct ipa_propagate_frequency_data *d;
     562      5100257 :   struct cgraph_edge *edge;
     563              : 
     564      5100257 :   d = (struct ipa_propagate_frequency_data *)data;
     565      5100257 :   for (edge = node->callers;
     566     10306031 :        edge && (d->maybe_unlikely_executed || d->maybe_executed_once
     567       149273 :                 || d->only_called_at_startup || d->only_called_at_exit);
     568      5205774 :        edge = edge->next_caller)
     569              :     {
     570      5205774 :       if (edge->caller != d->function_symbol)
     571              :         {
     572      5203290 :           d->only_called_at_startup &= edge->caller->only_called_at_startup;
     573              :           /* It makes sense to put main() together with the static constructors.
     574              :              It will be executed for sure, but rest of functions called from
     575              :              main are definitely not at startup only.  */
     576      5203290 :           if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
     577       156121 :             d->only_called_at_startup = 0;
     578      5203290 :           d->only_called_at_exit &= edge->caller->only_called_at_exit;
     579              :         }
     580              : 
     581              :       /* When profile feedback is available, do not try to propagate too hard;
     582              :          counts are already good guide on function frequencies and roundoff
     583              :          errors can make us to push function into unlikely section even when
     584              :          it is executed by the train run.  Transfer the function only if all
     585              :          callers are unlikely executed.  */
     586      5205774 :       if (profile_info
     587      5206307 :           && !(edge->callee->count.ipa () == profile_count::zero ())
     588      5206304 :           && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
     589            0 :               || (edge->caller->inlined_to
     590            0 :                   && edge->caller->inlined_to->frequency
     591            0 :                      != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
     592          530 :           d->maybe_unlikely_executed = false;
     593      5205774 :       if (edge->count.ipa ().initialized_p ()
     594      5205774 :           && !edge->count.ipa ().nonzero_p ())
     595        30361 :         continue;
     596      5175413 :       switch (edge->caller->frequency)
     597              :         {
     598              :         case NODE_FREQUENCY_UNLIKELY_EXECUTED:
     599              :           break;
     600       373146 :         case NODE_FREQUENCY_EXECUTED_ONCE:
     601       373146 :           {
     602       373146 :             if (dump_file && (dump_flags & TDF_DETAILS))
     603           90 :               fprintf (dump_file, "  Called by %s that is executed once\n",
     604              :                        edge->caller->dump_name ());
     605       373146 :             d->maybe_unlikely_executed = false;
     606       373146 :             ipa_call_summary *s = ipa_call_summaries->get (edge);
     607       373146 :             if (s != NULL && s->loop_depth)
     608              :               {
     609         9326 :                 d->maybe_executed_once = false;
     610         9326 :                 if (dump_file && (dump_flags & TDF_DETAILS))
     611            6 :                   fprintf (dump_file, "  Called in loop\n");
     612              :               }
     613              :             break;
     614              :           }
     615      4799832 :         case NODE_FREQUENCY_HOT:
     616      4799832 :         case NODE_FREQUENCY_NORMAL:
     617      4799832 :           if (dump_file && (dump_flags & TDF_DETAILS))
     618          136 :             fprintf (dump_file, "  Called by %s that is normal or hot\n",
     619              :                      edge->caller->dump_name ());
     620      4799832 :           d->maybe_unlikely_executed = false;
     621      4799832 :           d->maybe_executed_once = false;
     622      4799832 :           break;
     623              :         }
     624              :     }
     625      5100257 :   return edge != NULL;
     626              : }
     627              : 
     628              : /* Return true if NODE contains hot calls.  */
     629              : 
     630              : bool
     631        28235 : contains_hot_call_p (struct cgraph_node *node)
     632              : {
     633        28235 :   struct cgraph_edge *e;
     634        76931 :   for (e = node->callees; e; e = e->next_callee)
     635        48698 :     if (e->maybe_hot_p ())
     636              :       return true;
     637        48696 :     else if (!e->inline_failed
     638        48696 :              && contains_hot_call_p (e->callee))
     639              :       return true;
     640        28608 :   for (e = node->indirect_calls; e; e = e->next_callee)
     641          375 :     if (e->maybe_hot_p ())
     642              :       return true;
     643              :   return false;
     644              : }
     645              : 
     646              : /* See if the frequency of NODE can be updated based on frequencies of its
     647              :    callers.  */
     648              : bool
     649      8757434 : ipa_propagate_frequency (struct cgraph_node *node)
     650              : {
     651      8757434 :   struct ipa_propagate_frequency_data d = {node, true, true, true, true};
     652      8757434 :   bool changed = false;
     653              : 
     654              :   /* We cannot propagate anything useful about externally visible functions
     655              :      nor about virtuals.  */
     656      8757434 :   if (!node->local
     657      5184934 :       || node->alias
     658     13930667 :       || (opt_for_fn (node->decl, flag_devirtualize)
     659      5089498 :           && DECL_VIRTUAL_P (node->decl)))
     660              :     return false;
     661      5091603 :   gcc_assert (node->analyzed);
     662      5091603 :   if (dump_file && (dump_flags & TDF_DETAILS))
     663          226 :     fprintf (dump_file, "Processing frequency %s\n", node->dump_name ());
     664              : 
     665      5091603 :   node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d,
     666              :                                      true);
     667              : 
     668      5091603 :   if ((d.only_called_at_startup && !d.only_called_at_exit)
     669        45210 :       && !node->only_called_at_startup)
     670              :     {
     671        16509 :        node->only_called_at_startup = true;
     672        16509 :        if (dump_file)
     673            0 :          fprintf (dump_file, "Node %s promoted to only called at startup.\n",
     674              :                   node->dump_name ());
     675              :        changed = true;
     676              :     }
     677      5091603 :   if ((d.only_called_at_exit && !d.only_called_at_startup)
     678           49 :       && !node->only_called_at_exit)
     679              :     {
     680           19 :        node->only_called_at_exit = true;
     681           19 :        if (dump_file)
     682            0 :          fprintf (dump_file, "Node %s promoted to only called at exit.\n",
     683              :                   node->dump_name ());
     684              :        changed = true;
     685              :     }
     686              : 
     687              :   /* With profile we can decide on hot/normal based on count.  */
     688      5091603 :   if (node->count. ipa().initialized_p ())
     689              :     {
     690        22078 :       bool hot = false;
     691        22078 :       if (!(node->count. ipa() == profile_count::zero ())
     692          207 :           && node->count. ipa() >= get_hot_bb_threshold ())
     693          202 :         hot = true;
     694        22078 :       if (!hot)
     695        21876 :         hot |= contains_hot_call_p (node);
     696        22078 :       if (hot)
     697              :         {
     698          204 :           if (node->frequency != NODE_FREQUENCY_HOT)
     699              :             {
     700           14 :               if (dump_file)
     701            0 :                 fprintf (dump_file, "Node %s promoted to hot.\n",
     702              :                          node->dump_name ());
     703           14 :               node->frequency = NODE_FREQUENCY_HOT;
     704           14 :               return true;
     705              :             }
     706              :           return false;
     707              :         }
     708        21874 :       else if (node->frequency == NODE_FREQUENCY_HOT)
     709              :         {
     710            4 :           if (dump_file)
     711            0 :             fprintf (dump_file, "Node %s reduced to normal.\n",
     712              :                      node->dump_name ());
     713            4 :           node->frequency = NODE_FREQUENCY_NORMAL;
     714            4 :           changed = true;
     715              :         }
     716              :     }
     717              :   /* These come either from profile or user hints; never update them.  */
     718      5091399 :   if (node->frequency == NODE_FREQUENCY_HOT
     719      5091393 :       || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
     720              :     return changed;
     721      5073403 :   if (d.maybe_unlikely_executed)
     722              :     {
     723         9114 :       node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
     724         9114 :       if (dump_file)
     725            0 :         fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
     726              :                  node->dump_name ());
     727              :       changed = true;
     728              :     }
     729      5064289 :   else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
     730              :     {
     731       118937 :       node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
     732       118937 :       if (dump_file)
     733           45 :         fprintf (dump_file, "Node %s promoted to executed once.\n",
     734              :                  node->dump_name ());
     735              :       changed = true;
     736              :     }
     737              :   return changed;
     738              : }
     739              : 
     740              : /* Check that number of arguments of N agrees with E.
     741              :    Be conservative when summaries are not present.  */
     742              : 
     743              : static bool
     744           42 : check_argument_count (struct cgraph_node *n, struct cgraph_edge *e)
     745              : {
     746           42 :   if (!ipa_node_params_sum || !ipa_edge_args_sum)
     747              :     return true;
     748           42 :   ipa_node_params *info = ipa_node_params_sum->get (n->function_symbol ());
     749           42 :   if (!info)
     750              :     return true;
     751           42 :   ipa_edge_args *e_info = ipa_edge_args_sum->get (e);
     752           42 :   if (!e_info)
     753              :     return true;
     754           66 :   if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info)
     755           42 :       && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info)
     756            0 :           || !stdarg_p (TREE_TYPE (n->decl))))
     757            0 :     return false;
     758              :   return true;
     759              : }
     760              : 
     761              : /* Simple ipa profile pass propagating frequencies across the callgraph.  */
     762              : 
     763              : static unsigned int
     764       151656 : ipa_profile (void)
     765              : {
     766       151656 :   struct cgraph_node **order;
     767       151656 :   struct cgraph_edge *e;
     768       151656 :   int order_pos;
     769       151656 :   bool something_changed = false;
     770       151656 :   int i;
     771       151656 :   widest_int overall_time = 0, cumulated = 0;
     772       151656 :   gcov_type overall_size = 0;
     773       151656 :   struct cgraph_node *n,*n2;
     774       151656 :   int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
     775       151656 :   int nmismatch = 0, nimpossible = 0;
     776       151656 :   bool node_map_initialized = false;
     777       151656 :   gcov_type threshold;
     778              : 
     779       151656 :   if (dump_file)
     780              :     {
     781           27 :       if (profile_info)
     782              :         {
     783            4 :           fprintf (dump_file,
     784              :                    "runs: %i sum_max: %" PRId64 " cutoff: %" PRId64"\n",
     785              :                    profile_info->runs, profile_info->sum_max, profile_info->cutoff);
     786            4 :           fprintf (dump_file, "hot bb threshold: %" PRId64 "\n",
     787              :                    get_hot_bb_threshold ());
     788              :         }
     789           27 :       dump_histogram (dump_file, histogram);
     790              :     }
     791       161148 :   for (i = 0; i < (int)histogram.length (); i++)
     792              :     {
     793         3302 :       overall_time += ((widest_int)histogram[i]->count) * histogram[i]->time;
     794              :       /* Watch for overflow.  */
     795         3302 :       gcc_assert (overall_time >= 0);
     796         3302 :       overall_size += histogram[i]->size;
     797              :     }
     798       151656 :   threshold = 0;
     799       151656 :   if (overall_time != 0)
     800              :     {
     801          101 :       gcc_assert (overall_size);
     802              : 
     803          101 :       widest_int cutoff = ((overall_time * param_hot_bb_count_ws_permille
     804          202 :                            + 500) / 1000);
     805              :       /* Watch for overflow.  */
     806          101 :       gcc_assert (cutoff >= 0);
     807          450 :       for (i = 0; cumulated < cutoff; i++)
     808              :         {
     809          349 :           cumulated += ((widest_int)histogram[i]->count) * histogram[i]->time;
     810          349 :           threshold = histogram[i]->count;
     811              :         }
     812          101 :       if (!threshold)
     813            0 :         threshold = 1;
     814          101 :       if (dump_file)
     815              :         {
     816            4 :           widest_int cumulated_time = 0;
     817            4 :           gcov_type cumulated_size = 0;
     818              : 
     819            4 :           for (i = 0;
     820           38 :                i < (int)histogram.length () && histogram[i]->count >= threshold;
     821              :                i++)
     822              :             {
     823           15 :               cumulated_time += ((widest_int)histogram[i]->count)
     824           30 :                                  * histogram[i]->time;
     825           15 :               cumulated_size += histogram[i]->size;
     826              :             }
     827            4 :           fprintf (dump_file, "Determined min count: %" PRId64
     828              :                    " Time:%3.2f%% Size:%3.2f%%\n",
     829              :                    (int64_t)threshold,
     830            8 :                    (cumulated_time * 10000 / overall_time).to_shwi () / 100.0,
     831            4 :                    cumulated_size * 100.0 / overall_size);
     832            4 :         }
     833              : 
     834          101 :       if (in_lto_p)
     835              :         {
     836            4 :           if (dump_file)
     837            2 :             fprintf (dump_file, "Setting hotness threshold in LTO mode.\n");
     838            4 :           set_hot_bb_threshold (threshold);
     839              :         }
     840          101 :     }
     841       151656 :   histogram.release ();
     842       151656 :   histogram_pool.release ();
     843              : 
     844              :   /* Produce speculative calls: we saved common target from profiling into
     845              :      e->target_id.  Now, at link time, we can look up corresponding
     846              :      function node and produce speculative call.  */
     847              : 
     848       151656 :   gcc_checking_assert (call_sums);
     849              : 
     850       151656 :   if (dump_file)
     851              :     {
     852           27 :       if (!node_map_initialized)
     853           27 :         init_node_map (false);
     854           27 :       node_map_initialized = true;
     855              : 
     856           27 :       ipa_profile_dump_all_summaries (dump_file);
     857              :     }
     858              : 
     859      1588017 :   FOR_EACH_DEFINED_FUNCTION (n)
     860              :     {
     861      1436361 :       bool update = false;
     862              : 
     863      1436361 :       if (!opt_for_fn (n->decl, flag_ipa_profile))
     864         9617 :         continue;
     865              : 
     866      1566750 :       for (e = n->indirect_calls; e; e = e->next_callee)
     867              :         {
     868       140006 :           if (n->count.initialized_p ())
     869       140002 :             nindirect++;
     870              : 
     871       140006 :           speculative_call_summary *csum = call_sums->get_create (e);
     872       140045 :           unsigned spec_count = csum->speculative_call_targets.length ();
     873           39 :           if (spec_count)
     874              :             {
     875           39 :               if (!node_map_initialized)
     876           19 :                 init_node_map (false);
     877           39 :               node_map_initialized = true;
     878           39 :               ncommon++;
     879              : 
     880           39 :               unsigned speculative_id = 0;
     881           39 :               profile_count orig = e->count;
     882           90 :               for (unsigned i = 0; i < spec_count; i++)
     883              :                 {
     884           51 :                   speculative_call_target item
     885           51 :                     = csum->speculative_call_targets[i];
     886           51 :                   n2 = find_func_by_profile_id (item.target_id);
     887           51 :                   if (n2)
     888              :                     {
     889           50 :                       if (dump_file)
     890              :                         {
     891            8 :                           fprintf (dump_file,
     892              :                                    "Indirect call -> direct call from"
     893              :                                    " other module %s => %s, prob %3.2f\n",
     894              :                                    n->dump_name (),
     895              :                                    n2->dump_name (),
     896              :                                    item.target_probability
     897            8 :                                      / (float) REG_BR_PROB_BASE);
     898              :                         }
     899           50 :                       if (item.target_probability < REG_BR_PROB_BASE / 2)
     900              :                         {
     901            8 :                           nuseless++;
     902            8 :                           if (dump_file)
     903            1 :                             fprintf (dump_file,
     904              :                                      "Not speculating: "
     905              :                                      "probability is too low.\n");
     906              :                         }
     907           42 :                       else if (n2->get_availability () <= AVAIL_INTERPOSABLE
     908           42 :                                && n2->can_be_discarded_p ())
     909              :                         {
     910            0 :                           nuseless++;
     911            0 :                           if (dump_file)
     912            0 :                             fprintf (dump_file,
     913              :                                      "Not speculating: target is overwritable "
     914              :                                      "and can be discarded.\n");
     915              :                         }
     916           42 :                       else if (!check_argument_count (n2, e))
     917              :                         {
     918            0 :                           nmismatch++;
     919            0 :                           if (dump_file)
     920            0 :                             fprintf (dump_file,
     921              :                                      "Not speculating: "
     922              :                                      "parameter count mismatch\n");
     923              :                         }
     924           42 :                       else if (usable_polymorphic_info_p (e->indirect_info)
     925           10 :                                && !opt_for_fn (n->decl, flag_devirtualize)
     926            4 :                                && !possible_polymorphic_call_target_p (e, n2))
     927              :                         {
     928            0 :                           nimpossible++;
     929            0 :                           if (dump_file)
     930            0 :                             fprintf (dump_file,
     931              :                                      "Not speculating: "
     932              :                                      "function is not in the polymorphic "
     933              :                                      "call target list\n");
     934              :                         }
     935              :                       else
     936              :                         {
     937              :                           /* Target may be overwritable, but profile says that
     938              :                              control flow goes to this particular implementation
     939              :                              of N2.  Speculate on the local alias to allow
     940              :                              inlining.  */
     941           42 :                           if (!n2->can_be_discarded_p ())
     942              :                             {
     943           36 :                               cgraph_node *alias;
     944           36 :                               alias = dyn_cast<cgraph_node *>
     945           36 :                                    (n2->noninterposable_alias ());
     946              :                               if (alias)
     947           42 :                                 n2 = alias;
     948              :                             }
     949           42 :                           nconverted++;
     950           42 :                           profile_probability prob
     951              :                                  = profile_probability::from_reg_br_prob_base
     952           42 :                                         (item.target_probability).adjusted ();
     953           42 :                           e->make_speculative (n2,
     954              :                                                orig.apply_probability (prob),
     955              :                                                speculative_id);
     956           42 :                           update = true;
     957           42 :                           speculative_id++;
     958              :                         }
     959              :                     }
     960              :                   else
     961              :                     {
     962            1 :                       if (dump_file)
     963            0 :                         fprintf (dump_file,
     964              :                                  "Function with profile-id %i not found.\n",
     965              :                                  item.target_id);
     966            1 :                       nunknown++;
     967              :                     }
     968              :                 }
     969              :             }
     970              :         }
     971      1426744 :       if (update)
     972           36 :         ipa_update_overall_fn_summary (n);
     973              :     }
     974       151656 :   if (node_map_initialized)
     975           46 :     del_node_map ();
     976       151656 :   if (dump_file && nindirect)
     977           25 :     fprintf (dump_file,
     978              :              "%i indirect calls trained.\n"
     979              :              "%i (%3.2f%%) have common target.\n"
     980              :              "%i (%3.2f%%) targets was not found.\n"
     981              :              "%i (%3.2f%%) targets had parameter count mismatch.\n"
     982              :              "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
     983              :              "%i (%3.2f%%) speculations seems useless.\n"
     984              :              "%i (%3.2f%%) speculations produced.\n",
     985              :              nindirect,
     986           25 :              ncommon, ncommon * 100.0 / nindirect,
     987           25 :              nunknown, nunknown * 100.0 / nindirect,
     988           25 :              nmismatch, nmismatch * 100.0 / nindirect,
     989           25 :              nimpossible, nimpossible * 100.0 / nindirect,
     990           25 :              nuseless, nuseless * 100.0 / nindirect,
     991           25 :              nconverted, nconverted * 100.0 / nindirect);
     992              : 
     993       151656 :   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
     994       151656 :   order_pos = ipa_reverse_postorder (order);
     995      2874044 :   for (i = order_pos - 1; i >= 0; i--)
     996              :     {
     997      2722388 :       if (order[i]->local
     998       168716 :           && opt_for_fn (order[i]->decl, flag_ipa_profile)
     999      2885644 :           && ipa_propagate_frequency (order[i]))
    1000              :         {
    1001       327477 :           for (e = order[i]->callees; e; e = e->next_callee)
    1002       298286 :             if (e->callee->local && !e->callee->aux)
    1003              :               {
    1004        10390 :                 something_changed = true;
    1005        10390 :                 e->callee->aux = (void *)1;
    1006              :               }
    1007              :         }
    1008      2722388 :       order[i]->aux = NULL;
    1009              :     }
    1010              : 
    1011       160494 :   while (something_changed)
    1012              :     {
    1013              :       something_changed = false;
    1014       120015 :       for (i = order_pos - 1; i >= 0; i--)
    1015              :         {
    1016       111177 :           if (order[i]->aux
    1017        16772 :               && opt_for_fn (order[i]->decl, flag_ipa_profile)
    1018       127949 :               && ipa_propagate_frequency (order[i]))
    1019              :             {
    1020       100320 :               for (e = order[i]->callees; e; e = e->next_callee)
    1021        85119 :                 if (e->callee->local && !e->callee->aux)
    1022              :                   {
    1023         6386 :                     something_changed = true;
    1024         6386 :                     e->callee->aux = (void *)1;
    1025              :                   }
    1026              :             }
    1027       111177 :           order[i]->aux = NULL;
    1028              :         }
    1029              :     }
    1030       151656 :   free (order);
    1031              : 
    1032       151656 :   if (dump_file && (dump_flags & TDF_DETAILS))
    1033            0 :     symtab->dump (dump_file);
    1034              : 
    1035       151656 :   delete call_sums;
    1036       151656 :   call_sums = NULL;
    1037              : 
    1038       151656 :   return 0;
    1039       151656 : }
    1040              : 
    1041              : namespace {
    1042              : 
    1043              : const pass_data pass_data_ipa_profile =
    1044              : {
    1045              :   IPA_PASS, /* type */
    1046              :   "profile_estimate", /* name */
    1047              :   OPTGROUP_NONE, /* optinfo_flags */
    1048              :   TV_IPA_PROFILE, /* tv_id */
    1049              :   0, /* properties_required */
    1050              :   0, /* properties_provided */
    1051              :   0, /* properties_destroyed */
    1052              :   0, /* todo_flags_start */
    1053              :   0, /* todo_flags_finish */
    1054              : };
    1055              : 
    1056              : class pass_ipa_profile : public ipa_opt_pass_d
    1057              : {
    1058              : public:
    1059       285722 :   pass_ipa_profile (gcc::context *ctxt)
    1060              :     : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
    1061              :                       ipa_profile_generate_summary, /* generate_summary */
    1062              :                       ipa_profile_write_summary, /* write_summary */
    1063              :                       ipa_profile_read_summary, /* read_summary */
    1064              :                       NULL, /* write_optimization_summary */
    1065              :                       NULL, /* read_optimization_summary */
    1066              :                       NULL, /* stmt_fixup */
    1067              :                       0, /* function_transform_todo_flags_start */
    1068              :                       NULL, /* function_transform */
    1069       285722 :                       NULL) /* variable_transform */
    1070       285722 :   {}
    1071              : 
    1072              :   /* opt_pass methods: */
    1073       586755 :   bool gate (function *) final override { return flag_ipa_profile || in_lto_p; }
    1074       151656 :   unsigned int execute (function *) final override { return ipa_profile (); }
    1075              : 
    1076              : }; // class pass_ipa_profile
    1077              : 
    1078              : } // anon namespace
    1079              : 
    1080              : ipa_opt_pass_d *
    1081       285722 : make_pass_ipa_profile (gcc::context *ctxt)
    1082              : {
    1083       285722 :   return new pass_ipa_profile (ctxt);
    1084              : }
    1085              : 
    1086              : /* Reset all state within ipa-profile.cc so that we can rerun the compiler
    1087              :    within the same process.  For use by toplev::finalize.  */
    1088              : 
    1089              : void
    1090       256621 : ipa_profile_cc_finalize (void)
    1091              : {
    1092       256621 :   delete call_sums;
    1093       256621 :   call_sums = NULL;
    1094       256621 : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.