LCOV - code coverage report
Current view: top level - gcc - ipa-inline-transform.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 87.6 % 394 345
Test Date: 2025-09-20 13:40:47 Functions: 92.3 % 13 12
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: - 0 0

             Branch data     Line data    Source code
       1                 :             : /* Callgraph transformations to handle inlining
       2                 :             :    Copyright (C) 2003-2025 Free Software Foundation, Inc.
       3                 :             :    Contributed by Jan Hubicka
       4                 :             : 
       5                 :             : This file is part of GCC.
       6                 :             : 
       7                 :             : GCC is free software; you can redistribute it and/or modify it under
       8                 :             : the terms of the GNU General Public License as published by the Free
       9                 :             : Software Foundation; either version 3, or (at your option) any later
      10                 :             : version.
      11                 :             : 
      12                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15                 :             : for more details.
      16                 :             : 
      17                 :             : You should have received a copy of the GNU General Public License
      18                 :             : along with GCC; see the file COPYING3.  If not see
      19                 :             : <http://www.gnu.org/licenses/>.  */
      20                 :             : 
      21                 :             : /* The inline decisions are stored in callgraph in "inline plan" and
      22                 :             :    applied later.
      23                 :             : 
      24                 :             :    To mark given call inline, use inline_call function.
      25                 :             :    The function marks the edge inlinable and, if necessary, produces
      26                 :             :    virtual clone in the callgraph representing the new copy of callee's
      27                 :             :    function body.
      28                 :             : 
      29                 :             :    The inline plan is applied on given function body by inline_transform.  */
      30                 :             : 
      31                 :             : #define INCLUDE_ALGORITHM
      32                 :             : #include "config.h"
      33                 :             : #include "system.h"
      34                 :             : #include "coretypes.h"
      35                 :             : #include "tm.h"
      36                 :             : #include "function.h"
      37                 :             : #include "tree.h"
      38                 :             : #include "alloc-pool.h"
      39                 :             : #include "tree-pass.h"
      40                 :             : #include "cgraph.h"
      41                 :             : #include "tree-cfg.h"
      42                 :             : #include "symbol-summary.h"
      43                 :             : #include "tree-vrp.h"
      44                 :             : #include "sreal.h"
      45                 :             : #include "ipa-cp.h"
      46                 :             : #include "ipa-prop.h"
      47                 :             : #include "ipa-fnsummary.h"
      48                 :             : #include "ipa-inline.h"
      49                 :             : #include "tree-inline.h"
      50                 :             : #include "function.h"
      51                 :             : #include "cfg.h"
      52                 :             : #include "basic-block.h"
      53                 :             : #include "ipa-utils.h"
      54                 :             : #include "ipa-modref-tree.h"
      55                 :             : #include "ipa-modref.h"
      56                 :             : #include "symtab-thunks.h"
      57                 :             : #include "symtab-clones.h"
      58                 :             : 
      59                 :             : int ncalls_inlined;
      60                 :             : int nfunctions_inlined;
      61                 :             : 
      62                 :             : /* We removed or are going to remove the last call to NODE.
      63                 :             :    Return true if we can and want proactively remove the NODE now.
      64                 :             :    This is important to do, since we want inliner to know when offline
      65                 :             :    copy of function was removed.  */
      66                 :             : 
      67                 :             : static bool
      68                 :     2897987 : can_remove_node_now_p_1 (struct cgraph_node *node, struct cgraph_edge *e)
      69                 :             : {
      70                 :     2897987 :   ipa_ref *ref;
      71                 :             : 
      72                 :     3186319 :   FOR_EACH_ALIAS (node, ref)
      73                 :             :     {
      74                 :      853869 :       cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
      75                 :      552093 :       if ((alias->callers && alias->callers != e)
      76                 :      853869 :           || !can_remove_node_now_p_1 (alias, e))
      77                 :      565537 :         return false;
      78                 :             :     }
      79                 :             :   /* FIXME: When address is taken of DECL_EXTERNAL function we still
      80                 :             :      can remove its offline copy, but we would need to keep unanalyzed node in
      81                 :             :      the callgraph so references can point to it.
      82                 :             : 
      83                 :             :      Also for comdat group we can ignore references inside a group as we
      84                 :             :      want to prove the group as a whole to be dead.  */
      85                 :     2332450 :   return (!node->address_taken
      86                 :     2300364 :           && node->can_remove_if_no_direct_calls_and_refs_p ()
      87                 :             :           /* Inlining might enable more devirtualizing, so we want to remove
      88                 :             :              those only after all devirtualizable virtual calls are processed.
      89                 :             :              Lacking may edges in callgraph we just preserve them post
      90                 :             :              inlining.  */
      91                 :     2233336 :           && (!DECL_VIRTUAL_P (node->decl)
      92                 :        6770 :               || !opt_for_fn (node->decl, flag_devirtualize))
      93                 :             :           /* During early inlining some unanalyzed cgraph nodes might be in the
      94                 :             :              callgraph and they might refer the function in question.  */
      95                 :     4559110 :           && !cgraph_new_nodes.exists ());
      96                 :             : }
      97                 :             : 
      98                 :             : /* We are going to eliminate last direct call to NODE (or alias of it) via edge E.
      99                 :             :    Verify that the NODE can be removed from unit and if it is contained in comdat
     100                 :             :    group that the whole comdat group is removable.  */
     101                 :             : 
     102                 :             : static bool
     103                 :     2392645 : can_remove_node_now_p (struct cgraph_node *node, struct cgraph_edge *e)
     104                 :             : {
     105                 :     2392645 :   struct cgraph_node *next;
     106                 :     2392645 :   if (!can_remove_node_now_p_1 (node, e))
     107                 :             :     return false;
     108                 :             : 
     109                 :             :   /* When we see same comdat group, we need to be sure that all
     110                 :             :      items can be removed.  */
     111                 :     1734788 :   if (!node->same_comdat_group || !node->externally_visible)
     112                 :             :     return true;
     113                 :      291149 :   for (next = dyn_cast<cgraph_node *> (node->same_comdat_group);
     114                 :      868465 :        next != node; next = dyn_cast<cgraph_node *> (next->same_comdat_group))
     115                 :             :     {
     116                 :      291472 :       if (next->alias)
     117                 :       85118 :         continue;
     118                 :      205981 :       if ((next->callers && next->callers != e)
     119                 :      409547 :           || !can_remove_node_now_p_1 (next, e))
     120                 :             :         return false;
     121                 :             :     }
     122                 :             :   return true;
     123                 :             : }
     124                 :             : 
     125                 :             : /* Return true if NODE is a master clone with non-inline clones.  */
     126                 :             : 
     127                 :             : static bool
     128                 :     1500450 : master_clone_with_noninline_clones_p (struct cgraph_node *node)
     129                 :             : {
     130                 :     1500450 :   if (node->clone_of)
     131                 :             :     return false;
     132                 :             : 
     133                 :     1917257 :   for (struct cgraph_node *n = node->clones; n; n = n->next_sibling_clone)
     134                 :      517916 :     if (n->decl != node->decl)
     135                 :             :       return true;
     136                 :             : 
     137                 :             :   return false;
     138                 :             : }
     139                 :             : 
     140                 :             : /* E is expected to be an edge being inlined.  Clone destination node of
     141                 :             :    the edge and redirect it to the new clone.
     142                 :             :    DUPLICATE is used for bookkeeping on whether we are actually creating new
     143                 :             :    clones or re-using node originally representing out-of-line function call.
     144                 :             :    By default the offline copy is removed, when it appears dead after inlining.
     145                 :             :    UPDATE_ORIGINAL prevents this transformation.
     146                 :             :    If OVERALL_SIZE is non-NULL, the size is updated to reflect the
     147                 :             :    transformation.  */
     148                 :             : 
     149                 :             : void
     150                 :     4794644 : clone_inlined_nodes (struct cgraph_edge *e, bool duplicate,
     151                 :             :                      bool update_original, int *overall_size)
     152                 :             : {
     153                 :     4794644 :   struct cgraph_node *inlining_into;
     154                 :     4794644 :   struct cgraph_edge *next;
     155                 :             : 
     156                 :     4794644 :   if (e->caller->inlined_to)
     157                 :             :     inlining_into = e->caller->inlined_to;
     158                 :             :   else
     159                 :     3883961 :     inlining_into = e->caller;
     160                 :             : 
     161                 :     4794644 :   if (duplicate)
     162                 :             :     {
     163                 :             :       /* We may eliminate the need for out-of-line copy to be output.
     164                 :             :          In that case just go ahead and re-use it.  This is not just an
     165                 :             :          memory optimization.  Making offline copy of function disappear
     166                 :             :          from the program will improve future decisions on inlining.  */
     167                 :     4590275 :       if (!e->callee->callers->next_caller
     168                 :             :           /* Recursive inlining never wants the master clone to
     169                 :             :              be overwritten.  */
     170                 :     2160295 :           && update_original
     171                 :     2148784 :           && can_remove_node_now_p (e->callee, e)
     172                 :             :           /* We cannot overwrite a master clone with non-inline clones
     173                 :             :              until after these clones are materialized.  */
     174                 :     6090725 :           && !master_clone_with_noninline_clones_p (e->callee))
     175                 :             :         {
     176                 :             :           /* TODO: When callee is in a comdat group, we could remove all of it,
     177                 :             :              including all inline clones inlined into it.  That would however
     178                 :             :              need small function inlining to register edge removal hook to
     179                 :             :              maintain the priority queue.
     180                 :             : 
     181                 :             :              For now we keep the other functions in the group in program until
     182                 :             :              cgraph_remove_unreachable_functions gets rid of them.  */
     183                 :     1500344 :           gcc_assert (!e->callee->inlined_to);
     184                 :     1500344 :           e->callee->remove_from_same_comdat_group ();
     185                 :     1500344 :           if (e->callee->definition
     186                 :     1500344 :               && inline_account_function_p (e->callee))
     187                 :             :             {
     188                 :     1334478 :               gcc_assert (!e->callee->alias);
     189                 :     1334478 :               if (overall_size)
     190                 :      263086 :                 *overall_size -= ipa_size_summaries->get (e->callee)->size;
     191                 :     1334478 :               nfunctions_inlined++;
     192                 :             :             }
     193                 :     1500344 :           duplicate = false;
     194                 :     1500344 :           e->callee->externally_visible = false;
     195                 :     1500344 :           profile_count num = e->count;
     196                 :     1500344 :           profile_count den = e->callee->count;
     197                 :     1500344 :           profile_count::adjust_for_ipa_scaling (&num, &den);
     198                 :     1500344 :           e->callee->apply_scale (num, den);
     199                 :             : 
     200                 :     1500344 :           dump_callgraph_transformation (e->callee, inlining_into,
     201                 :             :                                          "inlining to");
     202                 :             :         }
     203                 :             :       else
     204                 :             :         {
     205                 :     3089931 :           struct cgraph_node *n;
     206                 :             : 
     207                 :     3089931 :           n = e->callee->create_clone (e->callee->decl,
     208                 :             :                                        e->count,
     209                 :     3089931 :                                        update_original, vNULL, true,
     210                 :             :                                        inlining_into,
     211                 :             :                                        NULL, NULL);
     212                 :     3089931 :           n->used_as_abstract_origin = e->callee->used_as_abstract_origin;
     213                 :     3089931 :           e->redirect_callee (n);
     214                 :             :         }
     215                 :             :     }
     216                 :             :   else
     217                 :      204369 :     e->callee->remove_from_same_comdat_group ();
     218                 :             : 
     219                 :     4794644 :   e->callee->inlined_to = inlining_into;
     220                 :     4794644 :   if (e->callee->ipa_transforms_to_apply.length ())
     221                 :             :     {
     222                 :      305655 :       e->callee->ipa_transforms_to_apply.release ();
     223                 :      305655 :       e->callee->ipa_transforms_to_apply = vNULL;
     224                 :             :     }
     225                 :             : 
     226                 :             :   /* Recursively clone all bodies.  */
     227                 :     8872816 :   for (e = e->callee->callees; e; e = next)
     228                 :             :     {
     229                 :     4078172 :       next = e->next_callee;
     230                 :     4078172 :       if (!e->inline_failed)
     231                 :      673457 :         clone_inlined_nodes (e, duplicate, update_original, overall_size);
     232                 :             :     }
     233                 :     4794644 : }
     234                 :             : 
     235                 :             : /* Check all speculations in N and if any seem useless, resolve them.  When a
     236                 :             :    first edge is resolved, pop all edges from NEW_EDGES and insert them to
     237                 :             :    EDGE_SET.  Then remove each resolved edge from EDGE_SET, if it is there.  */
     238                 :             : 
     239                 :             : static bool
     240                 :     4793658 : check_speculations_1 (cgraph_node *n, vec<cgraph_edge *> *new_edges,
     241                 :             :                       hash_set <cgraph_edge *> *edge_set)
     242                 :             : {
     243                 :     4793658 :   bool speculation_removed = false;
     244                 :     4793658 :   cgraph_edge *next;
     245                 :             : 
     246                 :     8873670 :   for (cgraph_edge *e = n->callees; e; e = next)
     247                 :             :     {
     248                 :     4080012 :       next = e->next_callee;
     249                 :     4080012 :       if (e->speculative && !speculation_useful_p (e, true))
     250                 :             :         {
     251                 :          48 :           while (new_edges && !new_edges->is_empty ())
     252                 :           6 :             edge_set->add (new_edges->pop ());
     253                 :          42 :           edge_set->remove (e);
     254                 :             : 
     255                 :          42 :           cgraph_edge::resolve_speculation (e, NULL);
     256                 :          42 :           speculation_removed = true;
     257                 :             :         }
     258                 :     4079970 :       else if (!e->inline_failed)
     259                 :      672941 :         speculation_removed |= check_speculations_1 (e->callee, new_edges,
     260                 :             :                                                      edge_set);
     261                 :             :     }
     262                 :     4793658 :   return speculation_removed;
     263                 :             : }
     264                 :             : 
     265                 :             : /* Push E to NEW_EDGES.  Called from hash_set traverse method, which
     266                 :             :    unfortunately means this function has to have external linkage, otherwise
     267                 :             :    the code will not compile with gcc 4.8.  */
     268                 :             : 
     269                 :             : bool
     270                 :           0 : push_all_edges_in_set_to_vec (cgraph_edge * const &e,
     271                 :             :                               vec<cgraph_edge *> *new_edges)
     272                 :             : {
     273                 :           0 :   new_edges->safe_push (e);
     274                 :           0 :   return true;
     275                 :             : }
     276                 :             : 
     277                 :             : /* Check all speculations in N and if any seem useless, resolve them and remove
     278                 :             :    them from NEW_EDGES.  */
     279                 :             : 
     280                 :             : static bool
     281                 :     4120717 : check_speculations (cgraph_node *n, vec<cgraph_edge *> *new_edges)
     282                 :             : {
     283                 :     4120717 :   hash_set <cgraph_edge *> edge_set;
     284                 :     4120717 :   bool res = check_speculations_1 (n, new_edges, &edge_set);
     285                 :     4120717 :   if (!edge_set.is_empty ())
     286                 :           0 :     edge_set.traverse <vec<cgraph_edge *> *,
     287                 :           0 :                        push_all_edges_in_set_to_vec> (new_edges);
     288                 :     4120717 :   return res;
     289                 :     4120717 : }
     290                 :             : 
     291                 :             : /* Mark all call graph edges coming out of NODE and all nodes that have been
     292                 :             :    inlined to it as in_polymorphic_cdtor.  */
     293                 :             : 
     294                 :             : static void
     295                 :      127529 : mark_all_inlined_calls_cdtor (cgraph_node *node)
     296                 :             : {
     297                 :      260609 :   for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
     298                 :             :     {
     299                 :      133080 :       cs->in_polymorphic_cdtor = true;
     300                 :      133080 :       if (!cs->inline_failed)
     301                 :       21796 :         mark_all_inlined_calls_cdtor (cs->callee);
     302                 :             :     }
     303                 :      133006 :   for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
     304                 :        5477 :     cs->in_polymorphic_cdtor = true;
     305                 :      127529 : }
     306                 :             : 
     307                 :             : 
     308                 :             : /* Mark edge E as inlined and update callgraph accordingly.  UPDATE_ORIGINAL
     309                 :             :    specify whether profile of original function should be updated.  If any new
     310                 :             :    indirect edges are discovered in the process, add them to NEW_EDGES, unless
     311                 :             :    it is NULL. If UPDATE_OVERALL_SUMMARY is false, do not bother to recompute overall
     312                 :             :    size of caller after inlining. Caller is required to eventually do it via
     313                 :             :    ipa_update_overall_fn_summary.
     314                 :             :    If callee_removed is non-NULL, set it to true if we removed callee node.
     315                 :             : 
     316                 :             :    Return true iff any new callgraph edges were discovered as a
     317                 :             :    result of inlining.  */
     318                 :             : 
     319                 :             : bool
     320                 :     4120717 : inline_call (struct cgraph_edge *e, bool update_original,
     321                 :             :              vec<cgraph_edge *> *new_edges,
     322                 :             :              int *overall_size, bool update_overall_summary,
     323                 :             :              bool *callee_removed)
     324                 :             : {
     325                 :     4120717 :   int old_size = 0, new_size = 0;
     326                 :     4120717 :   struct cgraph_node *to = NULL;
     327                 :     4120717 :   struct cgraph_edge *curr = e;
     328                 :     4120717 :   bool comdat_local = e->callee->comdat_local_p ();
     329                 :     4120717 :   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
     330                 :     4120717 :   bool new_edges_found = false;
     331                 :             : 
     332                 :     4120717 :   int estimated_growth = 0;
     333                 :     4120717 :   if (! update_overall_summary)
     334                 :     3208757 :     estimated_growth = estimate_edge_growth (e);
     335                 :             :   /* This is used only for assert below.  */
     336                 :             : #if 0
     337                 :             :   bool predicated = inline_edge_summary (e)->predicate != NULL;
     338                 :             : #endif
     339                 :             : 
     340                 :             :   /* Don't inline inlined edges.  */
     341                 :     4120717 :   gcc_assert (e->inline_failed);
     342                 :             :   /* Don't even think of inlining inline clone.  */
     343                 :     4120717 :   gcc_assert (!callee->inlined_to);
     344                 :             : 
     345                 :     4120717 :   to = e->caller;
     346                 :     4120717 :   if (to->inlined_to)
     347                 :      237226 :     to = to->inlined_to;
     348                 :             : 
     349                 :             :   /* In case callee has AFDO profile but caller has GLOBAL0 we need
     350                 :             :      to re-scale it so it can have non-zero AFDO profile.  */
     351                 :     4120717 :   if (callee->count.quality () == AFDO
     352                 :           0 :       && e->count.nonzero_p ()
     353                 :     4120717 :       && (to->count.quality () == GUESSED_GLOBAL0_AFDO
     354                 :           0 :           || to->count.quality () == GUESSED_GLOBAL0_ADJUSTED))
     355                 :             :     {
     356                 :           0 :       profile_count num = callee->count;
     357                 :           0 :       profile_count den = e->count;
     358                 :           0 :       profile_count::adjust_for_ipa_scaling (&num, &den);
     359                 :           0 :       if (dump_file)
     360                 :             :         {
     361                 :           0 :           fprintf (dump_file, "Rescalling profile of caller %s "
     362                 :             :                    "to allow non-zero AFDO counts:",
     363                 :             :                    to->dump_name ());
     364                 :           0 :           den.dump (dump_file);
     365                 :           0 :           fprintf (dump_file, " -> ");
     366                 :           0 :           num.dump (dump_file);
     367                 :           0 :           fprintf (dump_file, "\n");
     368                 :             :         }
     369                 :           0 :       to->apply_scale (num, den);
     370                 :           0 :       to->frequency = std::max (to->frequency, callee->frequency);
     371                 :             :       /* Do not update original, so possible additional calls of callee
     372                 :             :          are handled reasonably well.  */
     373                 :           0 :       update_original = false;
     374                 :           0 :       gcc_checking_assert (to->count.quality () == AFDO);
     375                 :           0 :       if (dump_file)
     376                 :             :         {
     377                 :           0 :           fprintf (dump_file, "Scaled profile of %s: ", to->dump_name ());
     378                 :           0 :           to->count.dump (dump_file);
     379                 :           0 :           fprintf (dump_file, "\n");
     380                 :             :         }
     381                 :             :     }
     382                 :     4120717 :   if (to->thunk)
     383                 :             :     {
     384                 :        1156 :       struct cgraph_node *target = to->callees->callee;
     385                 :        1156 :       thunk_expansion = true;
     386                 :             : 
     387                 :             :       /* Remove all annotations, but keep thunk info.  */
     388                 :        1156 :       thunk_info info = *thunk_info::get (to);
     389                 :        1156 :       symtab->call_cgraph_removal_hooks (to);
     390                 :        1156 :       *thunk_info::get_create (to) = info;
     391                 :        1156 :       if (in_lto_p)
     392                 :          49 :         to->get_untransformed_body ();
     393                 :        1156 :       expand_thunk (to, false, true);
     394                 :             :       /* When thunk is instrumented we may have multiple callees.  */
     395                 :        1156 :       for (e = to->callees; e && e->callee != target; e = e->next_callee)
     396                 :             :         ;
     397                 :        1156 :       symtab->call_cgraph_insertion_hooks (to);
     398                 :        1156 :       thunk_expansion = false;
     399                 :        1156 :       gcc_assert (e);
     400                 :             :     }
     401                 :             : 
     402                 :             : 
     403                 :     4120717 :   e->inline_failed = CIF_OK;
     404                 :     4120717 :   DECL_POSSIBLY_INLINED (callee->decl) = true;
     405                 :             : 
     406                 :     4120717 :   if (DECL_FUNCTION_PERSONALITY (callee->decl))
     407                 :      286874 :     DECL_FUNCTION_PERSONALITY (to->decl)
     408                 :      143437 :       = DECL_FUNCTION_PERSONALITY (callee->decl);
     409                 :             : 
     410                 :     4120717 :   bool reload_optimization_node = false;
     411                 :     4120717 :   bool remove_strict_aliasing
     412                 :     4120717 :     = (!opt_for_fn (callee->decl, flag_strict_aliasing)
     413                 :     4120717 :        && opt_for_fn (to->decl, flag_strict_aliasing));
     414                 :     4120717 :   bool remove_assume_sane_operators_new_delete
     415                 :     4120717 :     = (!opt_for_fn (callee->decl, flag_assume_sane_operators_new_delete)
     416                 :     4120717 :        && opt_for_fn (to->decl, flag_assume_sane_operators_new_delete));
     417                 :     4120699 :   if (remove_strict_aliasing || remove_assume_sane_operators_new_delete)
     418                 :             :     {
     419                 :          31 :       struct gcc_options opts = global_options;
     420                 :          31 :       struct gcc_options opts_set = global_options_set;
     421                 :             : 
     422                 :          31 :       cl_optimization_restore (&opts, &opts_set, opts_for_fn (to->decl));
     423                 :          31 :       if (remove_strict_aliasing)
     424                 :             :         {
     425                 :          13 :           opts.x_flag_strict_aliasing = false;
     426                 :          13 :           if (dump_file)
     427                 :           0 :             fprintf (dump_file, "Dropping flag_strict_aliasing on %s\n",
     428                 :             :                      to->dump_name ());
     429                 :             :         }
     430                 :          31 :       if (remove_assume_sane_operators_new_delete)
     431                 :             :         {
     432                 :          18 :           opts.x_flag_assume_sane_operators_new_delete = false;
     433                 :          18 :           if (dump_file)
     434                 :           0 :             fprintf (dump_file,
     435                 :             :                      "Dropping flag_assume_sane_operators_new_delete on %s\n",
     436                 :             :                      to->dump_name ());
     437                 :             :         }
     438                 :          31 :       DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl)
     439                 :          31 :          = build_optimization_node (&opts, &opts_set);
     440                 :          31 :       reload_optimization_node = true;
     441                 :             :     }
     442                 :             : 
     443                 :     4120717 :   ipa_fn_summary *caller_info = ipa_fn_summaries->get (to);
     444                 :     4120717 :   ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
     445                 :     4120717 :   if (!caller_info->fp_expressions && callee_info->fp_expressions)
     446                 :             :     {
     447                 :       15766 :       caller_info->fp_expressions = true;
     448                 :       15766 :       if (opt_for_fn (callee->decl, flag_rounding_math)
     449                 :       15766 :           != opt_for_fn (to->decl, flag_rounding_math)
     450                 :       15766 :           || opt_for_fn (callee->decl, flag_trapping_math)
     451                 :       15766 :              != opt_for_fn (to->decl, flag_trapping_math)
     452                 :       15759 :           || opt_for_fn (callee->decl, flag_unsafe_math_optimizations)
     453                 :       15759 :              != opt_for_fn (to->decl, flag_unsafe_math_optimizations)
     454                 :       15759 :           || opt_for_fn (callee->decl, flag_finite_math_only)
     455                 :       15759 :              != opt_for_fn (to->decl, flag_finite_math_only)
     456                 :       15759 :           || opt_for_fn (callee->decl, flag_signaling_nans)
     457                 :       15759 :              != opt_for_fn (to->decl, flag_signaling_nans)
     458                 :       15759 :           || opt_for_fn (callee->decl, flag_complex_method)
     459                 :       15759 :              != opt_for_fn (to->decl, flag_complex_method)
     460                 :       15759 :           || opt_for_fn (callee->decl, flag_signed_zeros)
     461                 :       15759 :              != opt_for_fn (to->decl, flag_signed_zeros)
     462                 :       15759 :           || opt_for_fn (callee->decl, flag_associative_math)
     463                 :       15759 :              != opt_for_fn (to->decl, flag_associative_math)
     464                 :       15758 :           || opt_for_fn (callee->decl, flag_reciprocal_math)
     465                 :       15758 :              != opt_for_fn (to->decl, flag_reciprocal_math)
     466                 :       15758 :           || opt_for_fn (callee->decl, flag_fp_int_builtin_inexact)
     467                 :       15758 :              != opt_for_fn (to->decl, flag_fp_int_builtin_inexact)
     468                 :       15766 :           || opt_for_fn (callee->decl, flag_errno_math)
     469                 :       15758 :              != opt_for_fn (to->decl, flag_errno_math))
     470                 :             :         {
     471                 :           8 :           struct gcc_options opts = global_options;
     472                 :           8 :           struct gcc_options opts_set = global_options_set;
     473                 :             : 
     474                 :           8 :           cl_optimization_restore (&opts, &opts_set, opts_for_fn (to->decl));
     475                 :           8 :           opts.x_flag_rounding_math
     476                 :           8 :             = opt_for_fn (callee->decl, flag_rounding_math);
     477                 :           8 :           opts.x_flag_trapping_math
     478                 :           8 :             = opt_for_fn (callee->decl, flag_trapping_math);
     479                 :           8 :           opts.x_flag_unsafe_math_optimizations
     480                 :           8 :             = opt_for_fn (callee->decl, flag_unsafe_math_optimizations);
     481                 :           8 :           opts.x_flag_finite_math_only
     482                 :           8 :             = opt_for_fn (callee->decl, flag_finite_math_only);
     483                 :           8 :           opts.x_flag_signaling_nans
     484                 :           8 :             = opt_for_fn (callee->decl, flag_signaling_nans);
     485                 :           8 :           opts.x_flag_complex_method
     486                 :           8 :             = opt_for_fn (callee->decl, flag_complex_method);
     487                 :           8 :           opts.x_flag_signed_zeros
     488                 :           8 :             = opt_for_fn (callee->decl, flag_signed_zeros);
     489                 :           8 :           opts.x_flag_associative_math
     490                 :           8 :             = opt_for_fn (callee->decl, flag_associative_math);
     491                 :           8 :           opts.x_flag_reciprocal_math
     492                 :           8 :             = opt_for_fn (callee->decl, flag_reciprocal_math);
     493                 :           8 :           opts.x_flag_fp_int_builtin_inexact
     494                 :           8 :             = opt_for_fn (callee->decl, flag_fp_int_builtin_inexact);
     495                 :           8 :           opts.x_flag_errno_math
     496                 :           8 :             = opt_for_fn (callee->decl, flag_errno_math);
     497                 :           8 :           if (dump_file)
     498                 :           0 :             fprintf (dump_file, "Copying FP flags from %s to %s\n",
     499                 :             :                      callee->dump_name (), to->dump_name ());
     500                 :           8 :           DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl)
     501                 :           8 :              = build_optimization_node (&opts, &opts_set);
     502                 :           8 :           reload_optimization_node = true;
     503                 :             :         }
     504                 :             :     }
     505                 :             : 
     506                 :             :   /* Reload global optimization flags.  */
     507                 :     4120717 :   if (reload_optimization_node && DECL_STRUCT_FUNCTION (to->decl) == cfun)
     508                 :          34 :     set_cfun (cfun, true);
     509                 :             : 
     510                 :             :   /* If aliases are involved, redirect edge to the actual destination and
     511                 :             :      possibly remove the aliases.  */
     512                 :     4120717 :   if (e->callee != callee)
     513                 :             :     {
     514                 :      788016 :       struct cgraph_node *alias = e->callee, *next_alias;
     515                 :      788016 :       e->redirect_callee (callee);
     516                 :     1807556 :       while (alias && alias != callee)
     517                 :             :         {
     518                 :      788035 :           if (!alias->callers
     519                 :     1031896 :               && can_remove_node_now_p (alias,
     520                 :      243861 :                                         !e->next_caller && !e->prev_caller ? e : NULL))
     521                 :             :             {
     522                 :      231524 :               next_alias = alias->get_alias_target ();
     523                 :      231524 :               alias->remove ();
     524                 :      231524 :               if (callee_removed)
     525                 :        1210 :                 *callee_removed = true;
     526                 :             :               alias = next_alias;
     527                 :             :             }
     528                 :             :           else
     529                 :             :             break;
     530                 :             :         }
     531                 :             :     }
     532                 :             : 
     533                 :     4120717 :   clone_inlined_nodes (e, true, update_original, overall_size);
     534                 :             : 
     535                 :     4120717 :   gcc_assert (curr->callee->inlined_to == to);
     536                 :             : 
     537                 :     4120717 :   old_size = ipa_size_summaries->get (to)->size;
     538                 :     4120717 :   ipa_merge_modref_summary_after_inlining (e);
     539                 :     4120717 :   ipa_merge_fn_summary_after_inlining (e);
     540                 :     4120717 :   if (e->in_polymorphic_cdtor)
     541                 :      105733 :     mark_all_inlined_calls_cdtor (e->callee);
     542                 :     4120717 :   if (opt_for_fn (e->caller->decl, optimize))
     543                 :     4094094 :     new_edges_found = ipa_propagate_indirect_call_infos (curr, new_edges);
     544                 :     4120717 :   bool removed_p = check_speculations (e->callee, new_edges);
     545                 :     4120717 :   if (update_overall_summary)
     546                 :      911960 :     ipa_update_overall_fn_summary (to, new_edges_found || removed_p);
     547                 :             :   else
     548                 :             :     /* Update self size by the estimate so overall function growth limits
     549                 :             :        work for further inlining into this function.  Before inlining
     550                 :             :        the function we inlined to again we expect the caller to update
     551                 :             :        the overall summary.  */
     552                 :     3208757 :     ipa_size_summaries->get (to)->size += estimated_growth;
     553                 :     4120717 :   new_size = ipa_size_summaries->get (to)->size;
     554                 :             : 
     555                 :     4120717 :   if (callee->calls_comdat_local)
     556                 :           0 :     to->calls_comdat_local = true;
     557                 :     4120717 :   else if (to->calls_comdat_local && comdat_local)
     558                 :        3493 :     to->calls_comdat_local = to->check_calls_comdat_local_p ();
     559                 :             : 
     560                 :             :   /* FIXME: This assert suffers from roundoff errors, disable it for GCC 5
     561                 :             :      and revisit it after conversion to sreals in GCC 6.
     562                 :             :      See PR 65654.  */
     563                 :             : #if 0
     564                 :             :   /* Verify that estimated growth match real growth.  Allow off-by-one
     565                 :             :      error due to ipa_fn_summary::size_scale roudoff errors.  */
     566                 :             :   gcc_assert (!update_overall_summary || !overall_size || new_edges_found
     567                 :             :               || abs (estimated_growth - (new_size - old_size)) <= 1
     568                 :             :               || speculation_removed
     569                 :             :               /* FIXME: a hack.  Edges with false predicate are accounted
     570                 :             :                  wrong, we should remove them from callgraph.  */
     571                 :             :               || predicated);
     572                 :             : #endif
     573                 :             : 
     574                 :             :   /* Account the change of overall unit size; external functions will be
     575                 :             :      removed and are thus not accounted.  */
     576                 :     4120717 :   if (overall_size && inline_account_function_p (to))
     577                 :      896530 :     *overall_size += new_size - old_size;
     578                 :     4120717 :   ncalls_inlined++;
     579                 :             : 
     580                 :             :   /* This must happen after ipa_merge_fn_summary_after_inlining that rely on jump
     581                 :             :      functions of callee to not be updated.  */
     582                 :     4120717 :   return new_edges_found;
     583                 :             : }
     584                 :             : 
     585                 :             : /* For each node that was made the holder of function body by
     586                 :             :    save_inline_function_body, this summary contains pointer to the previous
     587                 :             :    holder of the body.  */
     588                 :             : 
     589                 :             : function_summary <tree *> *ipa_saved_clone_sources;
     590                 :             : 
     591                 :             : /* Copy function body of NODE and redirect all inline clones to it.
     592                 :             :    This is done before inline plan is applied to NODE when there are
     593                 :             :    still some inline clones if it.
     594                 :             : 
     595                 :             :    This is necessary because inline decisions are not really transitive
     596                 :             :    and the other inline clones may have different bodies.  */
     597                 :             : 
     598                 :             : static struct cgraph_node *
     599                 :       45963 : save_inline_function_body (struct cgraph_node *node)
     600                 :             : {
     601                 :       45963 :   struct cgraph_node *first_clone, *n;
     602                 :             : 
     603                 :       45963 :   if (dump_file)
     604                 :         126 :     fprintf (dump_file, "\nSaving body of %s for later reuse\n",
     605                 :             :              node->dump_name ());
     606                 :             : 
     607                 :       45963 :   gcc_assert (node == cgraph_node::get (node->decl));
     608                 :             : 
     609                 :             :   /* first_clone will be turned into real function.  */
     610                 :       45963 :   first_clone = node->clones;
     611                 :             : 
     612                 :             :   /* Arrange first clone to not be thunk as those do not have bodies.  */
     613                 :       45963 :   if (first_clone->thunk)
     614                 :             :     {
     615                 :           0 :       while (first_clone->thunk)
     616                 :           0 :         first_clone = first_clone->next_sibling_clone;
     617                 :           0 :       first_clone->prev_sibling_clone->next_sibling_clone
     618                 :           0 :         = first_clone->next_sibling_clone;
     619                 :           0 :       if (first_clone->next_sibling_clone)
     620                 :           0 :         first_clone->next_sibling_clone->prev_sibling_clone
     621                 :           0 :            = first_clone->prev_sibling_clone;
     622                 :           0 :       first_clone->next_sibling_clone = node->clones;
     623                 :           0 :       first_clone->prev_sibling_clone = NULL;
     624                 :           0 :       node->clones->prev_sibling_clone = first_clone;
     625                 :           0 :       node->clones = first_clone;
     626                 :             :     }
     627                 :       45963 :   first_clone->decl = copy_node (node->decl);
     628                 :       45963 :   first_clone->decl->decl_with_vis.symtab_node = first_clone;
     629                 :       45963 :   gcc_assert (first_clone == cgraph_node::get (first_clone->decl));
     630                 :             : 
     631                 :             :   /* Now reshape the clone tree, so all other clones descends from
     632                 :             :      first_clone.  */
     633                 :       45963 :   if (first_clone->next_sibling_clone)
     634                 :             :     {
     635                 :      188588 :       for (n = first_clone->next_sibling_clone; n->next_sibling_clone;
     636                 :      165595 :            n = n->next_sibling_clone)
     637                 :      165595 :         n->clone_of = first_clone;
     638                 :       22993 :       n->clone_of = first_clone;
     639                 :       22993 :       n->next_sibling_clone = first_clone->clones;
     640                 :       22993 :       if (first_clone->clones)
     641                 :         532 :         first_clone->clones->prev_sibling_clone = n;
     642                 :       22993 :       first_clone->clones = first_clone->next_sibling_clone;
     643                 :       22993 :       first_clone->next_sibling_clone->prev_sibling_clone = NULL;
     644                 :       22993 :       first_clone->next_sibling_clone = NULL;
     645                 :       22993 :       gcc_assert (!first_clone->prev_sibling_clone);
     646                 :             :     }
     647                 :             : 
     648                 :       45963 :   tree prev_body_holder = node->decl;
     649                 :       45963 :   if (!ipa_saved_clone_sources)
     650                 :             :     {
     651                 :       14106 :       ipa_saved_clone_sources = new function_summary <tree *> (symtab);
     652                 :       14106 :       ipa_saved_clone_sources->disable_insertion_hook ();
     653                 :             :     }
     654                 :             :   else
     655                 :             :     {
     656                 :       31857 :       tree *p = ipa_saved_clone_sources->get (node);
     657                 :       31857 :       if (p)
     658                 :             :         {
     659                 :           0 :           prev_body_holder = *p;
     660                 :           0 :           gcc_assert (prev_body_holder);
     661                 :             :         }
     662                 :             :     }
     663                 :       45963 :   *ipa_saved_clone_sources->get_create (first_clone) = prev_body_holder;
     664                 :       45963 :   first_clone->former_clone_of
     665                 :       45963 :     = node->former_clone_of ? node->former_clone_of : node->decl;
     666                 :       45963 :   first_clone->clone_of = NULL;
     667                 :             : 
     668                 :             :   /* Now node in question has no clones.  */
     669                 :       45963 :   node->clones = NULL;
     670                 :             : 
     671                 :             :   /* Inline clones share decl with the function they are cloned
     672                 :             :      from.  Walk the whole clone tree and redirect them all to the
     673                 :             :      new decl.  */
     674                 :       45963 :   if (first_clone->clones)
     675                 :      269673 :     for (n = first_clone->clones; n != first_clone;)
     676                 :             :       {
     677                 :      245058 :         gcc_assert (n->decl == node->decl);
     678                 :      245058 :         n->decl = first_clone->decl;
     679                 :      245058 :         if (n->clones)
     680                 :             :           n = n->clones;
     681                 :      239432 :         else if (n->next_sibling_clone)
     682                 :             :           n = n->next_sibling_clone;
     683                 :             :         else
     684                 :             :           {
     685                 :       58478 :             while (n != first_clone && !n->next_sibling_clone)
     686                 :       30241 :               n = n->clone_of;
     687                 :       28237 :             if (n != first_clone)
     688                 :        3622 :               n = n->next_sibling_clone;
     689                 :             :           }
     690                 :             :       }
     691                 :             : 
     692                 :             :   /* Copy the OLD_VERSION_NODE function tree to the new version.  */
     693                 :       45963 :   tree_function_versioning (node->decl, first_clone->decl,
     694                 :             :                             NULL, NULL, true, NULL, NULL);
     695                 :             : 
     696                 :             :   /* The function will be short lived and removed after we inline all the
     697                 :             :      clones, but make it internal so we won't confuse ourself.  */
     698                 :       45963 :   DECL_EXTERNAL (first_clone->decl) = 0;
     699                 :       45963 :   TREE_PUBLIC (first_clone->decl) = 0;
     700                 :       45963 :   DECL_COMDAT (first_clone->decl) = 0;
     701                 :       45963 :   first_clone->ipa_transforms_to_apply.release ();
     702                 :             : 
     703                 :             :   /* When doing recursive inlining, the clone may become unnecessary.
     704                 :             :      This is possible i.e. in the case when the recursive function is proved to
     705                 :             :      be non-throwing and the recursion happens only in the EH landing pad.
     706                 :             :      We cannot remove the clone until we are done with saving the body.
     707                 :             :      Remove it now.  */
     708                 :       45963 :   if (!first_clone->callers)
     709                 :             :     {
     710                 :           0 :       first_clone->remove_symbol_and_inline_clones ();
     711                 :           0 :       first_clone = NULL;
     712                 :             :     }
     713                 :       45963 :   else if (flag_checking)
     714                 :       45963 :     first_clone->verify ();
     715                 :             : 
     716                 :       45963 :   return first_clone;
     717                 :             : }
     718                 :             : 
     719                 :             : /* Return true when function body of DECL still needs to be kept around
     720                 :             :    for later re-use.  */
     721                 :             : static bool
     722                 :     1473782 : preserve_function_body_p (struct cgraph_node *node)
     723                 :             : {
     724                 :     1473782 :   gcc_assert (symtab->global_info_ready);
     725                 :     1473782 :   gcc_assert (!node->alias && !node->thunk);
     726                 :             : 
     727                 :             :   /* Look if there is any non-thunk clone around.  */
     728                 :     1473816 :   for (node = node->clones; node; node = node->next_sibling_clone)
     729                 :       45997 :     if (!node->thunk)
     730                 :             :       return true;
     731                 :             :   return false;
     732                 :             : }
     733                 :             : 
     734                 :             : /* tree-inline can not recurse; materialize all function bodie we will need
     735                 :             :    during inlining.  This includes inlined functions, but also called functions
     736                 :             :    with param manipulation because IPA param manipulation attaches debug
     737                 :             :    statements to PARM_DECLs of called clone.  Materialize them if needed.
     738                 :             : 
     739                 :             :    FIXME: This is somehwat broken by design because it does not play well
     740                 :             :    with partitioning.  */
     741                 :             : 
     742                 :             : static void
     743                 :     2866038 : maybe_materialize_called_clones (cgraph_node *node)
     744                 :             : {
     745                 :    11608398 :   for (cgraph_edge *e = node->callees; e; e = e->next_callee)
     746                 :             :     {
     747                 :     8742360 :       clone_info *info;
     748                 :             : 
     749                 :     8742360 :       if (!e->inline_failed)
     750                 :     1392256 :         maybe_materialize_called_clones (e->callee);
     751                 :             : 
     752                 :     8742360 :       cgraph_node *callee = cgraph_node::get (e->callee->decl);
     753                 :     8742360 :       if (callee->clone_of
     754                 :     8742360 :           && (info = clone_info::get (callee)) && info->param_adjustments)
     755                 :       95729 :         callee->get_untransformed_body ();
     756                 :             :     }
     757                 :     2866038 : }
     758                 :             : 
     759                 :             : /* Apply inline plan to function.  */
     760                 :             : 
     761                 :             : unsigned int
     762                 :     1473782 : inline_transform (struct cgraph_node *node)
     763                 :             : {
     764                 :     1473782 :   unsigned int todo = 0;
     765                 :     1473782 :   struct cgraph_edge *e, *next;
     766                 :     1473782 :   bool has_inline = false;
     767                 :             : 
     768                 :             :   /* FIXME: Currently the pass manager is adding inline transform more than
     769                 :             :      once to some clones.  This needs revisiting after WPA cleanups.  */
     770                 :     1473782 :   if (cfun->after_inlining)
     771                 :             :     return 0;
     772                 :             : 
     773                 :     1473782 :   cgraph_node *next_clone;
     774                 :     1708367 :   for (cgraph_node *n = node->clones; n; n = next_clone)
     775                 :             :     {
     776                 :      234585 :       next_clone = n->next_sibling_clone;
     777                 :      234585 :       if (n->decl != node->decl)
     778                 :           0 :         n->materialize_clone ();
     779                 :             :     }
     780                 :     1473782 :   node->clear_stmts_in_references ();
     781                 :             : 
     782                 :             :   /* We might need the body of this function so that we can expand
     783                 :             :      it inline somewhere else.  */
     784                 :     1473782 :   if (preserve_function_body_p (node))
     785                 :       45963 :     save_inline_function_body (node);
     786                 :             : 
     787                 :     1473782 :   profile_count num = node->count;
     788                 :     1473782 :   profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     789                 :     1473782 :   bool scale = num.initialized_p () && !(num == den);
     790                 :          46 :   if (scale)
     791                 :             :     {
     792                 :          46 :       profile_count::adjust_for_ipa_scaling (&num, &den);
     793                 :          46 :       if (dump_file)
     794                 :             :         {
     795                 :           0 :           fprintf (dump_file, "Applying count scale ");
     796                 :           0 :           num.dump (dump_file);
     797                 :           0 :           fprintf (dump_file, "/");
     798                 :           0 :           den.dump (dump_file);
     799                 :           0 :           fprintf (dump_file, "\n");
     800                 :             :         }
     801                 :             : 
     802                 :          46 :       basic_block bb;
     803                 :          46 :       cfun->cfg->count_max = profile_count::uninitialized ();
     804                 :         359 :       FOR_ALL_BB_FN (bb, cfun)
     805                 :             :         {
     806                 :         313 :           bb->count = bb->count.apply_scale (num, den);
     807                 :         313 :           cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count);
     808                 :             :         }
     809                 :          46 :       ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count;
     810                 :             :     }
     811                 :             : 
     812                 :     1473782 :   maybe_materialize_called_clones (node);
     813                 :     7450982 :   for (e = node->callees; e; e = next)
     814                 :             :     {
     815                 :     5977200 :       if (!e->inline_failed)
     816                 :      577544 :         has_inline = true;
     817                 :     5977200 :       next = e->next_callee;
     818                 :     5977200 :       cgraph_edge::redirect_call_stmt_to_callee (e);
     819                 :             :     }
     820                 :     1473782 :   node->remove_all_references ();
     821                 :             : 
     822                 :     1473782 :   timevar_push (TV_INTEGRATION);
     823                 :     1473782 :   if (node->callees && (opt_for_fn (node->decl, optimize) || has_inline))
     824                 :             :     {
     825                 :      813444 :       todo = optimize_inline_calls (current_function_decl);
     826                 :             :     }
     827                 :     1473782 :   timevar_pop (TV_INTEGRATION);
     828                 :             : 
     829                 :     1473782 :   cfun->always_inline_functions_inlined = true;
     830                 :     1473782 :   cfun->after_inlining = true;
     831                 :     1473782 :   todo |= execute_fixup_cfg ();
     832                 :             : 
     833                 :     1473782 :   if (!(todo & TODO_update_ssa_any))
     834                 :             :     /* Redirecting edges might lead to a need for vops to be recomputed.  */
     835                 :     1098340 :     todo |= TODO_update_ssa_only_virtuals;
     836                 :             : 
     837                 :             :   return todo;
     838                 :             : }
        

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.