LCOV - code coverage report
Current view: top level - gcc - shrink-wrap.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 96.6 % 824 796
Test Date: 2026-02-28 14:20:25 Functions: 100.0 % 20 20
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Shrink-wrapping related optimizations.
       2              :    Copyright (C) 1987-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it under
       7              : the terms of the GNU General Public License as published by the Free
       8              : Software Foundation; either version 3, or (at your option) any later
       9              : version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : /* This file handles shrink-wrapping related optimizations.  */
      21              : 
      22              : #include "config.h"
      23              : #include "system.h"
      24              : #include "coretypes.h"
      25              : #include "backend.h"
      26              : #include "target.h"
      27              : #include "rtl.h"
      28              : #include "tree.h"
      29              : #include "cfghooks.h"
      30              : #include "df.h"
      31              : #include "memmodel.h"
      32              : #include "tm_p.h"
      33              : #include "regs.h"
      34              : #include "insn-config.h"
      35              : #include "emit-rtl.h"
      36              : #include "output.h"
      37              : #include "tree-pass.h"
      38              : #include "cfgrtl.h"
      39              : #include "cfgbuild.h"
      40              : #include "bb-reorder.h"
      41              : #include "shrink-wrap.h"
      42              : #include "regcprop.h"
      43              : #include "rtl-iter.h"
      44              : #include "valtrack.h"
      45              : #include "function-abi.h"
      46              : #include "print-rtl.h"
      47              : 
      48              : /* Return true if INSN requires the stack frame to be set up.
      49              :    PROLOGUE_USED contains the hard registers used in the function
      50              :    prologue.  SET_UP_BY_PROLOGUE is the set of registers we expect the
      51              :    prologue to set up for the function.  */
      52              : bool
      53    131910652 : requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
      54              :                         HARD_REG_SET set_up_by_prologue)
      55              : {
      56    131910652 :   df_ref def, use;
      57    131910652 :   HARD_REG_SET hardregs;
      58    131910652 :   unsigned regno;
      59              : 
      60    131910652 :   if (CALL_P (insn) && !FAKE_CALL_P (insn))
      61      5431578 :     return !SIBLING_CALL_P (insn);
      62              : 
      63              :   /* We need a frame to get the unique CFA expected by the unwinder.  */
      64    126479074 :   if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
      65              :     return true;
      66              : 
      67    126290063 :   CLEAR_HARD_REG_SET (hardregs);
      68    262119712 :   FOR_EACH_INSN_DEF (def, insn)
      69              :     {
      70    135829649 :       rtx dreg = DF_REF_REG (def);
      71              : 
      72    135829649 :       if (!REG_P (dreg))
      73            0 :         continue;
      74              : 
      75    135829649 :       add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
      76              :     }
      77    126290063 :   if (hard_reg_set_intersect_p (hardregs, prologue_used))
      78              :     return true;
      79    124553385 :   hardregs &= ~crtl->abi->full_reg_clobbers ();
      80  11095765976 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
      81  10977658447 :     if (TEST_HARD_REG_BIT (hardregs, regno)
      82  10977658447 :         && df_regs_ever_live_p (regno))
      83              :       return true;
      84              : 
      85    236753047 :   FOR_EACH_INSN_USE (use, insn)
      86              :     {
      87    118645518 :       rtx reg = DF_REF_REG (use);
      88              : 
      89    118645518 :       if (!REG_P (reg))
      90            0 :         continue;
      91              : 
      92    118645518 :       add_to_hard_reg_set (&hardregs, GET_MODE (reg),
      93              :                            REGNO (reg));
      94              :     }
      95    118107529 :   if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
      96              :     return true;
      97              : 
      98              :   return false;
      99              : }
     100              : 
     101              : /* See whether there has a single live edge from BB, which dest uses
     102              :    [REGNO, END_REGNO).  Return the live edge if its dest bb has
     103              :    one or two predecessors.  Otherwise return NULL.  */
     104              : 
     105              : static edge
     106       312296 : live_edge_for_reg (basic_block bb, int regno, int end_regno)
     107              : {
     108       312296 :   edge e, live_edge;
     109       312296 :   edge_iterator ei;
     110       312296 :   bitmap live;
     111       312296 :   int i;
     112              : 
     113       312296 :   live_edge = NULL;
     114       736436 :   FOR_EACH_EDGE (e, ei, bb->succs)
     115              :     {
     116       525931 :       live = df_get_live_in (e->dest);
     117      1476002 :       for (i = regno; i < end_regno; i++)
     118       525931 :         if (REGNO_REG_SET_P (live, i))
     119              :           {
     120       411986 :             if (live_edge && live_edge != e)
     121              :               return NULL;
     122              :             live_edge = e;
     123              :           }
     124              :     }
     125              : 
     126              :   /* We can sometimes encounter dead code.  Don't try to move it
     127              :      into the exit block.  */
     128       210505 :   if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
     129              :     return NULL;
     130              : 
     131              :   /* Reject targets of abnormal edges.  This is needed for correctness
     132              :      on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
     133              :      exception edges even though it is generally treated as call-saved
     134              :      for the majority of the compilation.  Moving across abnormal edges
     135              :      isn't going to be interesting for shrink-wrap usage anyway.  */
     136       208404 :   if (live_edge->flags & EDGE_ABNORMAL)
     137              :     return NULL;
     138              : 
     139              :   /* When live_edge->dest->preds == 2, we can create a new block on
     140              :      the edge to make it meet the requirement.  */
     141       207009 :   if (EDGE_COUNT (live_edge->dest->preds) > 2)
     142              :     return NULL;
     143              : 
     144              :   return live_edge;
     145              : }
     146              : 
     147              : /* Try to move INSN from BB to a successor.  Return true on success.
     148              :    USES and DEFS are the set of registers that are used and defined
     149              :    after INSN in BB.  SPLIT_P indicates whether a live edge from BB
     150              :    is splitted or not.  */
     151              : 
     152              : static bool
     153      7746046 : move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
     154              :                            const_hard_reg_set uses,
     155              :                            const_hard_reg_set defs,
     156              :                            bool *split_p,
     157              :                            struct dead_debug_local *debug)
     158              : {
     159      7746046 :   rtx set, src, dest;
     160      7746046 :   bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
     161      7746046 :   unsigned int i, dregno, end_dregno;
     162      7746046 :   unsigned int sregno = FIRST_PSEUDO_REGISTER;
     163      7746046 :   unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
     164      7746046 :   basic_block next_block;
     165      7746046 :   edge live_edge;
     166      7746046 :   rtx_insn *dinsn;
     167      7746046 :   df_ref def;
     168              : 
     169              :   /* Look for a simple register assignment.  We don't use single_set here
     170              :      because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
     171              :      destinations.  */
     172      7746046 :   if (!INSN_P (insn))
     173              :     return false;
     174      7746046 :   set = PATTERN (insn);
     175      7746046 :   if (GET_CODE (set) != SET)
     176              :     return false;
     177      6353992 :   src = SET_SRC (set);
     178      6353992 :   dest = SET_DEST (set);
     179              : 
     180              :   /* For the destination, we want only a register.  Also disallow STACK
     181              :      or FRAME related adjustments.  They are likely part of the prologue,
     182              :      so keep them in the entry block.  */
     183      6353992 :   if (!REG_P (dest)
     184      4495722 :       || dest == stack_pointer_rtx
     185      4478755 :       || dest == frame_pointer_rtx
     186      4478755 :       || dest == hard_frame_pointer_rtx)
     187              :     return false;
     188              : 
     189              :   /* For the source, we want one of:
     190              :       (1) A (non-overlapping) register
     191              :       (2) A constant,
     192              :       (3) An expression involving no more than one register.
     193              : 
     194              :      That last point comes from the code following, which was originally
     195              :      written to handle only register move operations, and still only handles
     196              :      a single source register when checking for overlaps.  Happily, the
     197              :      same checks can be applied to expressions like (plus reg const).  */
     198              : 
     199      4478359 :   if (CONSTANT_P (src))
     200              :     ;
     201      3436484 :   else if (!REG_P (src))
     202              :     {
     203      2473937 :       rtx src_inner = NULL_RTX;
     204              : 
     205      2473937 :       if (can_throw_internal (insn))
     206      1779597 :         return false;
     207              : 
     208      2464587 :       subrtx_var_iterator::array_type array;
     209      5386807 :       FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
     210              :         {
     211      4692467 :           rtx x = *iter;
     212      4692467 :           switch (GET_RTX_CLASS (GET_CODE (x)))
     213              :             {
     214              :             case RTX_CONST_OBJ:
     215              :             case RTX_COMPARE:
     216              :             case RTX_COMM_COMPARE:
     217              :             case RTX_BIN_ARITH:
     218              :             case RTX_COMM_ARITH:
     219              :             case RTX_UNARY:
     220              :             case RTX_TERNARY:
     221              :               /* Constant or expression.  Continue.  */
     222              :               break;
     223              : 
     224      2792496 :             case RTX_OBJ:
     225      2792496 :             case RTX_EXTRA:
     226      2792496 :               switch (GET_CODE (x))
     227              :                 {
     228              :                 case UNSPEC:
     229              :                 case SUBREG:
     230              :                 case STRICT_LOW_PART:
     231              :                 case PC:
     232              :                 case LO_SUM:
     233              :                   /* Ok.  Continue.  */
     234              :                   break;
     235              : 
     236      1269117 :                 case REG:
     237              :                   /* Fail if we see a second inner register.  */
     238      1269117 :                   if (src_inner != NULL)
     239      1770247 :                     return false;
     240              :                   src_inner = x;
     241              :                   break;
     242              : 
     243              :                 default:
     244              :                   return false;
     245              :                 }
     246              :               break;
     247              : 
     248              :             default:
     249              :               return false;
     250              :             }
     251              :         }
     252              : 
     253       694340 :       if (src_inner != NULL)
     254       694278 :         src = src_inner;
     255      2464587 :     }
     256              : 
     257              :   /* Make sure that the source register isn't defined later in BB.  */
     258      2698762 :   if (REG_P (src))
     259              :     {
     260      1656825 :       sregno = REGNO (src);
     261      1656825 :       end_sregno = END_REGNO (src);
     262      1656825 :       if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
     263              :         return false;
     264              :     }
     265              : 
     266              :   /* Make sure that the destination register isn't referenced later in BB.  */
     267      1930917 :   dregno = REGNO (dest);
     268      1930917 :   end_dregno = END_REGNO (dest);
     269      1930917 :   if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
     270      1930917 :       || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
     271              :     return false;
     272              : 
     273              :   /* See whether there is a successor block to which we could move INSN.  */
     274       280839 :   live_edge = live_edge_for_reg (bb, dregno, end_dregno);
     275       280839 :   if (!live_edge)
     276              :     return false;
     277              : 
     278       177599 :   next_block = live_edge->dest;
     279              :   /* Create a new basic block on the edge.  */
     280       177599 :   if (EDGE_COUNT (next_block->preds) == 2)
     281              :     {
     282              :       /* split_edge for a block with only one successor is meaningless.  */
     283        97205 :       if (EDGE_COUNT (bb->succs) == 1)
     284              :         return false;
     285              : 
     286              :       /* If DF_LIVE doesn't exist, i.e. at -O1, just give up.  */
     287        10934 :       if (!df_live)
     288              :         return false;
     289              : 
     290        10467 :       basic_block old_dest = live_edge->dest;
     291        10467 :       next_block = split_edge (live_edge);
     292              : 
     293              :       /* We create a new basic block.  Call df_grow_bb_info to make sure
     294              :          all data structures are allocated.  */
     295        10467 :       df_grow_bb_info (df_live);
     296              : 
     297        10467 :       bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
     298        10467 :                   df_get_live_in (old_dest));
     299        10467 :       df_set_bb_dirty (next_block);
     300              : 
     301              :       /* We should not split more than once for a function.  */
     302        10467 :       if (*split_p)
     303              :         return false;
     304              : 
     305        10467 :       *split_p = true;
     306              :     }
     307              : 
     308              :   /* At this point we are committed to moving INSN, but let's try to
     309              :      move it as far as we can.  */
     310       107143 :   do
     311              :     {
     312       107143 :       if (MAY_HAVE_DEBUG_BIND_INSNS)
     313              :         {
     314       835638 :           FOR_BB_INSNS_REVERSE (bb, dinsn)
     315       823324 :             if (DEBUG_BIND_INSN_P (dinsn))
     316              :               {
     317       328837 :                 df_ref use;
     318       437608 :                 FOR_EACH_INSN_USE (use, dinsn)
     319       108771 :                   if (refers_to_regno_p (dregno, end_dregno,
     320       108771 :                                          DF_REF_REG (use), (rtx *) NULL))
     321        29568 :                     dead_debug_add (debug, use, DF_REF_REGNO (use));
     322              :               }
     323       494487 :             else if (dinsn == insn)
     324              :               break;
     325              :         }
     326       107143 :       live_out = df_get_live_out (bb);
     327       107143 :       live_in = df_get_live_in (next_block);
     328       107143 :       bb = next_block;
     329              : 
     330              :       /* Check whether BB uses DEST or clobbers DEST.  We need to add
     331              :          INSN to BB if so.  Either way, DEST is no longer live on entry,
     332              :          except for any part that overlaps SRC (next loop).  */
     333       107143 :       if (!*split_p)
     334              :         {
     335        89805 :           bb_uses = &DF_LR_BB_INFO (bb)->use;
     336        89805 :           bb_defs = &DF_LR_BB_INFO (bb)->def;
     337              :         }
     338       107143 :       if (df_live)
     339              :         {
     340       205660 :           for (i = dregno; i < end_dregno; i++)
     341              :             {
     342       102830 :               if (*split_p
     343        85492 :                   || REGNO_REG_SET_P (bb_uses, i)
     344        41699 :                   || REGNO_REG_SET_P (bb_defs, i)
     345       186228 :                   || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
     346              :                 next_block = NULL;
     347       102830 :               CLEAR_REGNO_REG_SET (live_out, i);
     348       102830 :               CLEAR_REGNO_REG_SET (live_in, i);
     349              :             }
     350              : 
     351              :           /* Check whether BB clobbers SRC.  We need to add INSN to BB if so.
     352              :              Either way, SRC is now live on entry.  */
     353       195331 :           for (i = sregno; i < end_sregno; i++)
     354              :             {
     355        92501 :               if (*split_p
     356        82929 :                   || REGNO_REG_SET_P (bb_defs, i)
     357       175413 :                   || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
     358              :                 next_block = NULL;
     359        92501 :               SET_REGNO_REG_SET (live_out, i);
     360        92501 :               SET_REGNO_REG_SET (live_in, i);
     361              :             }
     362              :         }
     363              :       else
     364              :         {
     365              :           /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
     366              :              DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
     367              :              at -O1, just give up searching NEXT_BLOCK.  */
     368         8626 :           next_block = NULL;
     369         8626 :           for (i = dregno; i < end_dregno; i++)
     370              :             {
     371         4313 :               CLEAR_REGNO_REG_SET (live_out, i);
     372         4313 :               CLEAR_REGNO_REG_SET (live_in, i);
     373              :             }
     374              : 
     375         8612 :           for (i = sregno; i < end_sregno; i++)
     376              :             {
     377         4299 :               SET_REGNO_REG_SET (live_out, i);
     378         4299 :               SET_REGNO_REG_SET (live_in, i);
     379              :             }
     380              :         }
     381              : 
     382              :       /* If we don't need to add the move to BB, look for a single
     383              :          successor block.  */
     384       107143 :       if (next_block)
     385              :         {
     386        31457 :           live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
     387        31457 :           if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
     388              :             break;
     389              :           next_block = live_edge->dest;
     390              :         }
     391              :     }
     392        91968 :   while (next_block);
     393              : 
     394              :   /* For the new created basic block, there is no dataflow info at all.
     395              :      So skip the following dataflow update and check.  */
     396        90861 :   if (!(*split_p))
     397              :     {
     398              :       /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
     399              :          (next loop).  */
     400       147046 :       for (i = dregno; i < end_dregno; i++)
     401              :         {
     402        73523 :           CLEAR_REGNO_REG_SET (bb_uses, i);
     403        73523 :           SET_REGNO_REG_SET (bb_defs, i);
     404              :         }
     405              : 
     406              :       /* BB now uses SRC.  */
     407       144942 :       for (i = sregno; i < end_sregno; i++)
     408        71419 :         SET_REGNO_REG_SET (bb_uses, i);
     409              :     }
     410              : 
     411              :   /* Insert debug temps for dead REGs used in subsequent debug insns.  */
     412        90861 :   if (debug->used && !bitmap_empty_p (debug->used))
     413        30480 :     FOR_EACH_INSN_DEF (def, insn)
     414        15240 :       dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
     415              :                               DEBUG_TEMP_BEFORE_WITH_VALUE);
     416              : 
     417        90861 :   rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb));
     418              :   /* Update the LABEL_NUSES count on any referenced labels. The ideal
     419              :      solution here would be to actually move the instruction instead
     420              :      of copying/deleting it as this loses some notations on the
     421              :      insn.  */
     422        90861 :   mark_jump_label (PATTERN (insn), insn_copy, 0);
     423        90861 :   delete_insn (insn);
     424        90861 :   return true;
     425              : }
     426              : 
     427              : /* Look for register copies in the first block of the function, and move
     428              :    them down into successor blocks if the register is used only on one
     429              :    path.  This exposes more opportunities for shrink-wrapping.  These
     430              :    kinds of sets often occur when incoming argument registers are moved
     431              :    to call-saved registers because their values are live across one or
     432              :    more calls during the function.  */
     433              : 
     434              : static void
     435       653209 : prepare_shrink_wrap (basic_block entry_block)
     436              : {
     437       653209 :   rtx_insn *insn, *curr;
     438       653209 :   rtx x;
     439       653209 :   HARD_REG_SET uses, defs;
     440       653209 :   df_ref def, use;
     441       653209 :   bool split_p = false;
     442       653209 :   unsigned int i;
     443       653209 :   struct dead_debug_local debug;
     444              : 
     445       653209 :   if (JUMP_P (BB_END (entry_block)))
     446              :     {
     447              :       /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
     448              :          to sink the copies from parameter to callee saved register out of
     449              :          entry block.  copyprop_hardreg_forward_bb_without_debug_insn is called
     450              :          to release some dependences.  */
     451       357335 :       copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
     452              :     }
     453              : 
     454       653209 :   dead_debug_local_init (&debug, NULL, NULL);
     455      2612836 :   CLEAR_HARD_REG_SET (uses);
     456       653209 :   CLEAR_HARD_REG_SET (defs);
     457              : 
     458     28492472 :   FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
     459     13593027 :     if (NONDEBUG_INSN_P (insn)
     460      7746046 :         && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
     461              :                                        &split_p, &debug))
     462              :       {
     463              :         /* Add all defined registers to DEFs.  */
     464     86025629 :         FOR_EACH_INSN_DEF (def, insn)
     465              :           {
     466     78370444 :             x = DF_REF_REG (def);
     467     78370444 :             if (REG_P (x) && HARD_REGISTER_P (x))
     468    156740888 :               for (i = REGNO (x); i < END_REGNO (x); i++)
     469     78370444 :                 SET_HARD_REG_BIT (defs, i);
     470              :           }
     471              : 
     472              :         /* Add all used registers to USESs.  */
     473     17571586 :         FOR_EACH_INSN_USE (use, insn)
     474              :           {
     475      9916401 :             x = DF_REF_REG (use);
     476      9916401 :             if (REG_P (x) && HARD_REGISTER_P (x))
     477     19832802 :               for (i = REGNO (x); i < END_REGNO (x); i++)
     478      9916401 :                 SET_HARD_REG_BIT (uses, i);
     479              :           }
     480              :       }
     481              : 
     482       653209 :   dead_debug_local_finish (&debug, NULL);
     483       653209 : }
     484              : 
     485              : /* Return whether basic block PRO can get the prologue.  It cannot if it
     486              :    has incoming complex edges that need a prologue inserted (we make a new
     487              :    block for the prologue, so those edges would need to be redirected, which
     488              :    does not work).  It also cannot if there exist registers live on entry
     489              :    to PRO that are clobbered by the prologue.  */
     490              : 
     491              : static bool
     492        78244 : can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
     493              : {
     494        78244 :   edge e;
     495        78244 :   edge_iterator ei;
     496       193991 :   FOR_EACH_EDGE (e, ei, pro->preds)
     497       115764 :     if (e->flags & EDGE_COMPLEX
     498       115764 :         && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
     499              :       return false;
     500              : 
     501              :   HARD_REG_SET live;
     502        78227 :   REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
     503       156454 :   if (hard_reg_set_intersect_p (live, prologue_clobbered))
     504              :     return false;
     505              : 
     506              :   return true;
     507              : }
     508              : 
     509              : /* Return whether we can duplicate basic block BB for shrink wrapping.  We
     510              :    cannot if the block cannot be duplicated at all, or if any of its incoming
     511              :    edges are complex and come from a block that does not require a prologue
     512              :    (we cannot redirect such edges), or if the block is too big to copy.
     513              :    PRO is the basic block before which we would put the prologue, MAX_SIZE is
     514              :    the maximum size block we allow to be copied.  */
     515              : 
     516              : static bool
     517       578858 : can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
     518              : {
     519       578858 :   if (!can_duplicate_block_p (bb))
     520              :     return false;
     521              : 
     522       578547 :   edge e;
     523       578547 :   edge_iterator ei;
     524      1459824 :   FOR_EACH_EDGE (e, ei, bb->preds)
     525       890772 :     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
     526       890772 :         && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
     527              :       return false;
     528              : 
     529       569052 :   unsigned size = 0;
     530              : 
     531       569052 :   rtx_insn *insn;
     532      5193813 :   FOR_BB_INSNS (bb, insn)
     533      4818240 :     if (NONDEBUG_INSN_P (insn))
     534              :       {
     535      1639295 :         size += get_attr_min_length (insn);
     536      1639295 :         if (size > max_size)
     537              :           return false;
     538              :       }
     539              : 
     540              :   return true;
     541              : }
     542              : 
     543              : /* Do whatever needs to be done for exits that run without prologue.
     544              :    Sibcalls need nothing done.  Normal exits get a simple_return inserted.  */
     545              : 
     546              : static void
     547        59512 : handle_simple_exit (edge e)
     548              : {
     549              : 
     550        59512 :   if (e->flags & EDGE_SIBCALL)
     551              :     {
     552              :       /* Tell function.cc to take no further action on this edge.  */
     553         4983 :       e->flags |= EDGE_IGNORE;
     554              : 
     555         4983 :       e->flags &= ~EDGE_FALLTHRU;
     556         4983 :       emit_barrier_after_bb (e->src);
     557         4983 :       return;
     558              :     }
     559              : 
     560              :   /* If the basic block the edge comes from has multiple successors,
     561              :      split the edge.  */
     562        54529 :   if (EDGE_COUNT (e->src->succs) > 1)
     563              :     {
     564         1473 :       basic_block old_bb = e->src;
     565         1473 :       rtx_insn *end = BB_END (old_bb);
     566         1473 :       rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
     567         1473 :       basic_block new_bb = create_basic_block (note, note, old_bb);
     568         1473 :       BB_COPY_PARTITION (new_bb, old_bb);
     569         1473 :       BB_END (old_bb) = end;
     570              : 
     571         1473 :       redirect_edge_succ (e, new_bb);
     572         1473 :       new_bb->count = e->count ();
     573         1473 :       e->flags |= EDGE_FALLTHRU;
     574              : 
     575         1473 :       e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
     576              :     }
     577              : 
     578        54529 :   e->flags &= ~EDGE_FALLTHRU;
     579        54529 :   rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
     580        54529 :                                              BB_END (e->src));
     581        54529 :   JUMP_LABEL (ret) = simple_return_rtx;
     582        54529 :   emit_barrier_after_bb (e->src);
     583              : 
     584        54529 :   if (dump_file)
     585           23 :     fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
     586           23 :              INSN_UID (ret), e->src->index);
     587              : }
     588              : 
     589              : /* Try to perform a kind of shrink-wrapping, making sure the
     590              :    prologue/epilogue is emitted only around those parts of the
     591              :    function that require it.
     592              : 
     593              :    There will be exactly one prologue, and it will be executed either
     594              :    zero or one time, on any path.  Depending on where the prologue is
     595              :    placed, some of the basic blocks can be reached via both paths with
     596              :    and without a prologue.  Such blocks will be duplicated here, and the
     597              :    edges changed to match.
     598              : 
     599              :    Paths that go to the exit without going through the prologue will use
     600              :    a simple_return instead of the epilogue.  We maximize the number of
     601              :    those, making sure to only duplicate blocks that can be duplicated.
     602              :    If the prologue can then still be placed in multiple locations, we
     603              :    place it as early as possible.
     604              : 
     605              :    An example, where we duplicate blocks with control flow (legend:
     606              :    _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
     607              :    be taken to point down or to the right, to simplify the diagram; here,
     608              :    block 3 needs a prologue, the rest does not):
     609              : 
     610              : 
     611              :        B                 B
     612              :        |                 |
     613              :        2                 2
     614              :        |\                |\
     615              :        | 3    becomes    | 3
     616              :        |/                |  \
     617              :        4                 7   4
     618              :        |\                |\  |\
     619              :        | 5               | 8 | 5
     620              :        |/                |/  |/
     621              :        6                 9   6
     622              :        |                 |   |
     623              :        R                 S   R
     624              : 
     625              : 
     626              :    (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
     627              :    edge 2->3).
     628              : 
     629              :    Another example, where part of a loop is duplicated (again, bb 3 is
     630              :    the only block that needs a prologue):
     631              : 
     632              : 
     633              :        B   3<--              B       ->3<--
     634              :        |   |   |             |      |  |   |
     635              :        |   v   |   becomes   |      |  v   |
     636              :        2---4---              2---5--   4---
     637              :            |                     |     |
     638              :            R                     S     R
     639              : 
     640              : 
     641              :    (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
     642              : 
     643              :    ENTRY_EDGE is the edge where the prologue will be placed, possibly
     644              :    changed by this function.  PROLOGUE_SEQ is the prologue we will insert.  */
     645              : 
     646              : void
     647      1471363 : try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
     648              : {
     649              :   /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
     650              :      no sense to shrink-wrap: then do not shrink-wrap!  */
     651              : 
     652      1471363 :   if (!SHRINK_WRAPPING_ENABLED)
     653      1413928 :     return;
     654              : 
     655      1043439 :   if (crtl->profile && !targetm.profile_before_prologue ())
     656              :     return;
     657              : 
     658      1043439 :   if (crtl->calls_eh_return)
     659              :     return;
     660              : 
     661      1433619 :   bool empty_prologue = true;
     662      1433619 :   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
     663      1043414 :     if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
     664              :       {
     665              :         empty_prologue = false;
     666              :         break;
     667              :       }
     668      1043414 :   if (empty_prologue)
     669              :     return;
     670              : 
     671              :   /* Move some code down to expose more shrink-wrapping opportunities.  */
     672              : 
     673       653209 :   basic_block entry = (*entry_edge)->dest;
     674       653209 :   prepare_shrink_wrap (entry);
     675              : 
     676       653209 :   if (dump_file)
     677           51 :     fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
     678              : 
     679              :   /* Compute the registers set and used in the prologue.  */
     680              : 
     681              :   HARD_REG_SET prologue_clobbered, prologue_used;
     682      1959627 :   CLEAR_HARD_REG_SET (prologue_clobbered);
     683      3228307 :   CLEAR_HARD_REG_SET (prologue_used);
     684      3228307 :   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
     685      2575098 :     if (NONDEBUG_INSN_P (insn))
     686              :       {
     687              :         HARD_REG_SET this_used;
     688      1921889 :         CLEAR_HARD_REG_SET (this_used);
     689      1921889 :         note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
     690      1921889 :         this_used &= ~prologue_clobbered;
     691      1921889 :         prologue_used |= this_used;
     692      1921889 :         note_stores (insn, record_hard_reg_sets, &prologue_clobbered);
     693              :       }
     694       653209 :   CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
     695       653209 :   if (frame_pointer_needed)
     696        43330 :     CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
     697              : 
     698              :   /* Find out what registers are set up by the prologue; any use of these
     699              :      cannot happen before the prologue.  */
     700              : 
     701              :   struct hard_reg_set_container set_up_by_prologue;
     702       653209 :   CLEAR_HARD_REG_SET (set_up_by_prologue.set);
     703       766275 :   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
     704       653209 :   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
     705       653209 :   if (frame_pointer_needed)
     706        43330 :     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
     707              :                          HARD_FRAME_POINTER_REGNUM);
     708       653209 :   if (pic_offset_table_rtx
     709       653209 :       && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
     710            0 :     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
     711            0 :                          PIC_OFFSET_TABLE_REGNUM);
     712       653209 :   if (crtl->drap_reg)
     713         5351 :     add_to_hard_reg_set (&set_up_by_prologue.set,
     714         5351 :                          GET_MODE (crtl->drap_reg),
     715         5351 :                          REGNO (crtl->drap_reg));
     716       653209 :   if (targetm.set_up_by_prologue)
     717            0 :     targetm.set_up_by_prologue (&set_up_by_prologue);
     718              : 
     719              :   /* We will insert the prologue before the basic block PRO.  PRO should
     720              :      dominate all basic blocks that need the prologue to be executed
     721              :      before them.  First, make PRO the "tightest wrap" possible.  */
     722              : 
     723       653209 :   calculate_dominance_info (CDI_DOMINATORS);
     724              : 
     725       653209 :   basic_block pro = 0;
     726              : 
     727       653209 :   basic_block bb;
     728       653209 :   edge e;
     729       653209 :   edge_iterator ei;
     730     10691097 :   FOR_EACH_BB_FN (bb, cfun)
     731              :     {
     732     10037888 :       rtx_insn *insn;
     733     70285235 :       FOR_BB_INSNS (bb, insn)
     734     65945780 :         if (NONDEBUG_INSN_P (insn)
     735     65945780 :             && requires_stack_frame_p (insn, prologue_used,
     736              :                                        set_up_by_prologue.set))
     737              :           {
     738      5698433 :             if (dump_file)
     739              :               {
     740          153 :                 fprintf (dump_file, "Block %d needs prologue due to insn %d:\n",
     741          153 :                          bb->index, INSN_UID (insn));
     742          153 :                 print_rtl_single (dump_file, insn);
     743              :               }
     744      5698433 :             pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
     745      5698433 :             break;
     746              :           }
     747              :     }
     748              : 
     749              :   /* If nothing needs a prologue, just put it at the start.  This really
     750              :      shouldn't happen, but we cannot fix it here.  */
     751              : 
     752       653209 :   if (pro == 0)
     753              :     {
     754           70 :       if (dump_file)
     755            0 :         fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
     756              :                            "putting it at the start.\n");
     757           70 :       pro = entry;
     758              :     }
     759              : 
     760       653209 :   if (dump_file)
     761           51 :     fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
     762              :              pro->index);
     763              : 
     764              :   /* Now see if we can put the prologue at the start of PRO.  Putting it
     765              :      there might require duplicating a block that cannot be duplicated,
     766              :      or in some cases we cannot insert the prologue there at all.  If PRO
     767              :      wont't do, try again with the immediate dominator of PRO, and so on.
     768              : 
     769              :      The blocks that need duplicating are those reachable from PRO but
     770              :      not dominated by it.  We keep in BB_WITH a bitmap of the blocks
     771              :      reachable from PRO that we already found, and in VEC a stack of
     772              :      those we still need to consider (to find successors).  */
     773              : 
     774       653209 :   auto_bitmap bb_with;
     775       653209 :   bitmap_set_bit (bb_with, pro->index);
     776              : 
     777       653209 :   vec<basic_block> vec;
     778       653209 :   vec.create (n_basic_blocks_for_fn (cfun));
     779       653209 :   vec.quick_push (pro);
     780              : 
     781       653209 :   unsigned max_grow_size = get_uncond_jump_length ();
     782       653209 :   max_grow_size *= param_max_grow_copy_bb_insns;
     783              : 
     784       653209 :   basic_block checked_pro = NULL;
     785              : 
     786      1232067 :   while (pro != entry)
     787              :     {
     788       643626 :       if (pro != checked_pro)
     789              :         {
     790        67409 :           while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
     791              :             {
     792          137 :               pro = get_immediate_dominator (CDI_DOMINATORS, pro);
     793              : 
     794          137 :               if (bitmap_set_bit (bb_with, pro->index))
     795          136 :                 vec.quick_push (pro);
     796              :             }
     797              :           checked_pro = pro;
     798              :         }
     799              : 
     800       643626 :       if (vec.is_empty ())
     801              :         break;
     802              : 
     803       578858 :       basic_block bb = vec.pop ();
     804       578858 :       if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
     805       206684 :         while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
     806              :           {
     807         3399 :             gcc_assert (pro != entry);
     808              : 
     809         3399 :             pro = get_immediate_dominator (CDI_DOMINATORS, pro);
     810              : 
     811         3399 :             if (bitmap_set_bit (bb_with, pro->index))
     812         3167 :               vec.quick_push (pro);
     813              :           }
     814              : 
     815      1389207 :       FOR_EACH_EDGE (e, ei, bb->succs)
     816       810349 :         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
     817       810349 :             && bitmap_set_bit (bb_with, e->dest->index))
     818       514813 :           vec.quick_push (e->dest);
     819              :     }
     820              : 
     821       653209 :   if (dump_file)
     822           51 :     fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
     823              :              pro->index);
     824              : 
     825              :   /* If we can move PRO back without having to duplicate more blocks, do so.
     826              :      We do this because putting the prologue earlier is better for scheduling.
     827              : 
     828              :      We can move back to a block PRE if every path from PRE will eventually
     829              :      need a prologue, that is, PRO is a post-dominator of PRE.  PRE needs
     830              :      to dominate every block reachable from itself.  We keep in BB_TMP a
     831              :      bitmap of the blocks reachable from PRE that we already found, and in
     832              :      VEC a stack of those we still need to consider.
     833              : 
     834              :      Any block reachable from PRE is also reachable from all predecessors
     835              :      of PRE, so if we find we need to move PRE back further we can leave
     836              :      everything not considered so far on the stack.  Any block dominated
     837              :      by PRE is also dominated by all other dominators of PRE, so anything
     838              :      found good for some PRE does not need to be reconsidered later.
     839              : 
     840              :      We don't need to update BB_WITH because none of the new blocks found
     841              :      can jump to a block that does not need the prologue.  */
     842              : 
     843       653209 :   if (pro != entry)
     844              :     {
     845        64768 :       calculate_dominance_info (CDI_POST_DOMINATORS);
     846              : 
     847        64768 :       auto_bitmap bb_tmp;
     848        64768 :       bitmap_copy (bb_tmp, bb_with);
     849        64768 :       basic_block last_ok = pro;
     850        64768 :       vec.truncate (0);
     851              : 
     852       140844 :       while (pro != entry)
     853              :         {
     854        68743 :           basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
     855        68743 :           if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
     856              :             break;
     857              : 
     858        11308 :           if (bitmap_set_bit (bb_tmp, pre->index))
     859        10897 :             vec.quick_push (pre);
     860              : 
     861        26816 :           bool ok = true;
     862        26816 :           while (!vec.is_empty ())
     863              :             {
     864        15872 :               if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
     865              :                 {
     866              :                   ok = false;
     867              :                   break;
     868              :                 }
     869              : 
     870        15508 :               basic_block bb = vec.pop ();
     871        35521 :               FOR_EACH_EDGE (e, ei, bb->succs)
     872        20013 :                 if (bitmap_set_bit (bb_tmp, e->dest->index))
     873         4611 :                   vec.quick_push (e->dest);
     874              :             }
     875              : 
     876        11308 :           if (ok && can_get_prologue (pre, prologue_clobbered))
     877              :             last_ok = pre;
     878              : 
     879        11308 :           pro = pre;
     880              :         }
     881              : 
     882        64768 :       pro = last_ok;
     883              : 
     884        64768 :       free_dominance_info (CDI_POST_DOMINATORS);
     885        64768 :     }
     886              : 
     887       653209 :   vec.release ();
     888              : 
     889       653209 :   if (dump_file)
     890           51 :     fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
     891              :              pro->index);
     892              : 
     893       653209 :   if (pro == entry)
     894              :     {
     895       595774 :       free_dominance_info (CDI_DOMINATORS);
     896       595774 :       return;
     897              :     }
     898              : 
     899              :   /* Compute what fraction of the frequency and count of the blocks that run
     900              :      both with and without prologue are for running with prologue.  This gives
     901              :      the correct answer for reducible flow graphs; for irreducible flow graphs
     902              :      our profile is messed up beyond repair anyway.  */
     903              : 
     904        57435 :   profile_count num = profile_count::zero ();
     905        57435 :   profile_count den = profile_count::zero ();
     906              : 
     907       142682 :   FOR_EACH_EDGE (e, ei, pro->preds)
     908        85247 :     if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
     909              :       {
     910        85003 :         if (e->count ().initialized_p ())
     911        84975 :           num += e->count ();
     912        85003 :         if (e->src->count.initialized_p ())
     913        84983 :           den += e->src->count;
     914              :       }
     915              : 
     916              :   /* All is okay, so do it.  */
     917              : 
     918        57435 :   crtl->shrink_wrapped = true;
     919        57435 :   if (dump_file)
     920           23 :     fprintf (dump_file, "Performing shrink-wrapping.\n");
     921              : 
     922              :   /* Copy the blocks that can run both with and without prologue.  The
     923              :      originals run with prologue, the copies without.  Store a pointer to
     924              :      the copy in the ->aux field of the original.  */
     925              : 
     926       776454 :   FOR_EACH_BB_FN (bb, cfun)
     927       719019 :     if (bitmap_bit_p (bb_with, bb->index)
     928       719019 :         && !dominated_by_p (CDI_DOMINATORS, bb, pro))
     929              :       {
     930        41015 :         basic_block dup = duplicate_block (bb, 0, 0);
     931              : 
     932        41015 :         bb->aux = dup;
     933              : 
     934        41015 :         if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
     935         4832 :           emit_barrier_after_bb (dup);
     936              : 
     937        41015 :         if (EDGE_COUNT (dup->succs) == 0)
     938            7 :           emit_barrier_after_bb (dup);
     939              : 
     940        41015 :         if (dump_file)
     941           21 :           fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
     942              : 
     943        41051 :         if (num == profile_count::zero () || den.nonzero_p ())
     944        41006 :           bb->count = bb->count.apply_scale (num, den);
     945        41015 :         dup->count -= bb->count;
     946              :       }
     947              : 
     948              :   /* Now change the edges to point to the copies, where appropriate.  */
     949              : 
     950       782187 :   FOR_EACH_BB_FN (bb, cfun)
     951       724752 :     if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
     952              :       {
     953       326357 :         basic_block src = bb;
     954       326357 :         if (bitmap_bit_p (bb_with, bb->index))
     955        41015 :           src = (basic_block) bb->aux;
     956              : 
     957       807494 :         FOR_EACH_EDGE (e, ei, src->succs)
     958              :           {
     959       481137 :             if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
     960        91110 :               continue;
     961              : 
     962       390027 :             if (bitmap_bit_p (bb_with, e->dest->index)
     963       390027 :                 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
     964              :               {
     965        57398 :                 if (dump_file)
     966           22 :                   fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
     967           22 :                            e->src->index, e->dest->index,
     968           22 :                            ((basic_block) e->dest->aux)->index);
     969        57398 :                 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
     970              :               }
     971       332629 :             else if (e->flags & EDGE_FALLTHRU
     972       332629 :                      && bitmap_bit_p (bb_with, bb->index))
     973          202 :               force_nonfallthru (e);
     974              :           }
     975              :       }
     976              : 
     977              :   /* Also redirect the function entry edge if necessary.  */
     978              : 
     979       114870 :   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
     980        57435 :     if (bitmap_bit_p (bb_with, e->dest->index)
     981        57435 :         && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
     982              :       {
     983           11 :         basic_block split_bb = split_edge (e);
     984           11 :         e = single_succ_edge (split_bb);
     985           11 :         redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
     986              :       }
     987              : 
     988              :   /* Make a simple_return for those exits that run without prologue.  */
     989              : 
     990       782198 :   FOR_EACH_BB_REVERSE_FN (bb, cfun)
     991       724763 :     if (!bitmap_bit_p (bb_with, bb->index))
     992       726605 :       FOR_EACH_EDGE (e, ei, bb->succs)
     993       439482 :         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
     994        59512 :           handle_simple_exit (e);
     995              : 
     996              :   /* Finally, we want a single edge to put the prologue on.  Make a new
     997              :      block before the PRO block; the edge beteen them is the edge we want.
     998              :      Then redirect those edges into PRO that come from blocks without the
     999              :      prologue, to point to the new block instead.  The new prologue block
    1000              :      is put at the end of the insn chain.  */
    1001              : 
    1002        57435 :   basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
    1003        57435 :   BB_COPY_PARTITION (new_bb, pro);
    1004        57435 :   new_bb->count = profile_count::zero ();
    1005        57435 :   if (dump_file)
    1006           23 :     fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
    1007              : 
    1008       143597 :   for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
    1009              :     {
    1010        86162 :       if (bitmap_bit_p (bb_with, e->src->index)
    1011        86162 :           || dominated_by_p (CDI_DOMINATORS, e->src, pro))
    1012              :         {
    1013         1159 :           ei_next (&ei);
    1014         1159 :           continue;
    1015              :         }
    1016              : 
    1017        85003 :       new_bb->count += e->count ();
    1018              : 
    1019        85003 :       redirect_edge_and_branch_force (e, new_bb);
    1020        85003 :       if (dump_file)
    1021           26 :         fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
    1022              :     }
    1023              : 
    1024        57435 :   *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
    1025        57435 :   force_nonfallthru (*entry_edge);
    1026              : 
    1027        57435 :   free_dominance_info (CDI_DOMINATORS);
    1028       653209 : }
    1029              : 
    1030              : /* Separate shrink-wrapping
    1031              : 
    1032              :    Instead of putting all of the prologue and epilogue in one spot, we
    1033              :    can put parts of it in places where those components are executed less
    1034              :    frequently.  The following code does this, for prologue and epilogue
    1035              :    components that can be put in more than one location, and where those
    1036              :    components can be executed more than once (the epilogue component will
    1037              :    always be executed before the prologue component is executed a second
    1038              :    time).
    1039              : 
    1040              :    What exactly is a component is target-dependent.  The more usual
    1041              :    components are simple saves/restores to/from the frame of callee-saved
    1042              :    registers.  This code treats components abstractly (as an sbitmap),
    1043              :    letting the target handle all details.
    1044              : 
    1045              :    Prologue components are placed in such a way that for every component
    1046              :    the prologue is executed as infrequently as possible.  We do this by
    1047              :    walking the dominator tree, comparing the cost of placing a prologue
    1048              :    component before a block to the sum of costs determined for all subtrees
    1049              :    of that block.
    1050              : 
    1051              :    From this placement, we then determine for each component all blocks
    1052              :    where at least one of this block's dominators (including itself) will
    1053              :    get a prologue inserted.  That then is how the components are placed.
    1054              :    We could place the epilogue components a bit smarter (we can save a
    1055              :    bit of code size sometimes); this is a possible future improvement.
    1056              : 
    1057              :    Prologues and epilogues are preferably placed into a block, either at
    1058              :    the beginning or end of it, if it is needed for all predecessor resp.
    1059              :    successor edges; or placed on the edge otherwise.
    1060              : 
    1061              :    If the placement of any prologue/epilogue leads to a situation we cannot
    1062              :    handle (for example, an abnormal edge would need to be split, or some
    1063              :    targets want to use some specific registers that may not be available
    1064              :    where we want to put them), separate shrink-wrapping for the components
    1065              :    in that prologue/epilogue is aborted.  */
    1066              : 
    1067              : 
    1068              : /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
    1069              :    label LABEL.  */
    1070              : static void
    1071         1671 : dump_components (const char *label, sbitmap components)
    1072              : {
    1073         1671 :   if (bitmap_empty_p (components))
    1074              :     return;
    1075              : 
    1076         1024 :   fprintf (dump_file, " [%s", label);
    1077              : 
    1078        95232 :   for (unsigned int j = 0; j < components->n_bits; j++)
    1079        94208 :     if (bitmap_bit_p (components, j))
    1080         3530 :       fprintf (dump_file, " %u", j);
    1081              : 
    1082         1024 :   fprintf (dump_file, "]");
    1083              : }
    1084              : 
    1085              : /* The data we collect for each bb.  */
    1086              : struct sw {
    1087              :   /* What components does this BB need?  */
    1088              :   sbitmap needs_components;
    1089              : 
    1090              :   /* What components does this BB have?  This is the main decision this
    1091              :      pass makes.  */
    1092              :   sbitmap has_components;
    1093              : 
    1094              :   /* The components for which we placed code at the start of the BB (instead
    1095              :      of on all incoming edges).  */
    1096              :   sbitmap head_components;
    1097              : 
    1098              :   /* The components for which we placed code at the end of the BB (instead
    1099              :      of on all outgoing edges).  */
    1100              :   sbitmap tail_components;
    1101              : 
    1102              :   /* The frequency of executing the prologue for this BB, if a prologue is
    1103              :      placed on this BB.  This is a pessimistic estimate (no prologue is
    1104              :      needed for edges from blocks that have the component under consideration
    1105              :      active already).  */
    1106              :   gcov_type own_cost;
    1107              : 
    1108              :   /* The frequency of executing the prologue for this BB and all BBs
    1109              :      dominated by it.  */
    1110              :   gcov_type total_cost;
    1111              : };
    1112              : 
    1113              : /* A helper function for accessing the pass-specific info.  */
    1114              : static inline struct sw *
    1115    417881978 : SW (basic_block bb)
    1116              : {
    1117    417881978 :   gcc_assert (bb->aux);
    1118    417881978 :   return (struct sw *) bb->aux;
    1119              : }
    1120              : 
    1121              : /* Create the pass-specific data structures for separately shrink-wrapping
    1122              :    with components COMPONENTS.  */
    1123              : static void
    1124       735781 : init_separate_shrink_wrap (sbitmap components)
    1125              : {
    1126       735781 :   basic_block bb;
    1127     10175730 :   FOR_ALL_BB_FN (bb, cfun)
    1128              :     {
    1129      9439949 :       bb->aux = xcalloc (1, sizeof (struct sw));
    1130              : 
    1131      9439949 :       SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
    1132              : 
    1133              :       /* Mark all basic blocks without successor as needing all components.
    1134              :          This avoids problems in at least cfgcleanup, sel-sched, and
    1135              :          regrename (largely to do with all paths to such a block still
    1136              :          needing the same dwarf CFI info).  */
    1137      9439949 :       if (EDGE_COUNT (bb->succs) == 0)
    1138      1142126 :         bitmap_copy (SW (bb)->needs_components, components);
    1139              : 
    1140      9439949 :       if (dump_file)
    1141              :         {
    1142          630 :           fprintf (dump_file, "bb %d components:", bb->index);
    1143          630 :           dump_components ("has", SW (bb)->needs_components);
    1144          630 :           fprintf (dump_file, "\n");
    1145              :         }
    1146              : 
    1147      9439949 :       SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
    1148      9439949 :       SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
    1149      9439949 :       SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
    1150      9439949 :       bitmap_clear (SW (bb)->has_components);
    1151              :     }
    1152       735781 : }
    1153              : 
    1154              : /* Destroy the pass-specific data.  */
    1155              : static void
    1156       735781 : fini_separate_shrink_wrap (void)
    1157              : {
    1158       735781 :   basic_block bb;
    1159     10256440 :   FOR_ALL_BB_FN (bb, cfun)
    1160      9520659 :     if (bb->aux)
    1161              :       {
    1162      9439949 :         sbitmap_free (SW (bb)->needs_components);
    1163      9439949 :         sbitmap_free (SW (bb)->has_components);
    1164      9439949 :         sbitmap_free (SW (bb)->head_components);
    1165      9439949 :         sbitmap_free (SW (bb)->tail_components);
    1166      9439949 :         free (bb->aux);
    1167      9439949 :         bb->aux = 0;
    1168              :       }
    1169       735781 : }
    1170              : 
    1171              : /* Place the prologue for component WHICH, in the basic blocks dominated
    1172              :    by HEAD.  Do a DFS over the dominator tree, and set bit WHICH in the
    1173              :    HAS_COMPONENTS of a block if either the block has that bit set in
    1174              :    NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
    1175              :    dominator subtrees separately.  */
    1176              : static void
    1177       590061 : place_prologue_for_one_component (unsigned int which, basic_block head)
    1178              : {
    1179              :   /* The block we are currently dealing with.  */
    1180       590061 :   basic_block bb = head;
    1181              :   /* Is this the first time we visit this block, i.e. have we just gone
    1182              :      down the tree.  */
    1183       590061 :   bool first_visit = true;
    1184              : 
    1185              :   /* Walk the dominator tree, visit one block per iteration of this loop.
    1186              :      Each basic block is visited twice: once before visiting any children
    1187              :      of the block, and once after visiting all of them (leaf nodes are
    1188              :      visited only once).  As an optimization, we do not visit subtrees
    1189              :      that can no longer influence the prologue placement.  */
    1190      5554002 :   for (;;)
    1191              :     {
    1192              :       /* First visit of a block: set the (children) cost accumulator to zero;
    1193              :          if the block does not have the component itself, walk down.  */
    1194      5554002 :       if (first_visit)
    1195              :         {
    1196              :           /* Initialize the cost.  The cost is the block execution frequency
    1197              :              that does not come from backedges.  Calculating this by simply
    1198              :              adding the cost of all edges that aren't backedges does not
    1199              :              work: this does not always add up to the block frequency at
    1200              :              all, and even if it does, rounding error makes for bad
    1201              :              decisions.  */
    1202      3773473 :           SW (bb)->own_cost = bb->count.to_frequency (cfun);
    1203              : 
    1204      3773473 :           edge e;
    1205      3773473 :           edge_iterator ei;
    1206      9624738 :           FOR_EACH_EDGE (e, ei, bb->preds)
    1207      5851265 :             if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
    1208              :               {
    1209       220385 :                 if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
    1210       204541 :                   SW (bb)->own_cost -= EDGE_FREQUENCY (e);
    1211              :                 else
    1212        15844 :                   SW (bb)->own_cost = 0;
    1213              :               }
    1214              : 
    1215      3773473 :           SW (bb)->total_cost = 0;
    1216              : 
    1217      3773473 :           if (!bitmap_bit_p (SW (bb)->needs_components, which)
    1218      3773473 :               && first_dom_son (CDI_DOMINATORS, bb))
    1219              :             {
    1220      1780529 :               bb = first_dom_son (CDI_DOMINATORS, bb);
    1221      1780529 :               continue;
    1222              :             }
    1223              :         }
    1224              : 
    1225              :       /* If this block does need the component itself, or it is cheaper to
    1226              :          put the prologue here than in all the descendants that need it,
    1227              :          mark it so.  If this block's immediate post-dominator is dominated
    1228              :          by this block, and that needs the prologue, we can put it on this
    1229              :          block as well (earlier is better).  */
    1230      3773473 :       if (bitmap_bit_p (SW (bb)->needs_components, which)
    1231      3773473 :           || SW (bb)->total_cost > SW (bb)->own_cost)
    1232              :         {
    1233      1063603 :           SW (bb)->total_cost = SW (bb)->own_cost;
    1234      1063603 :           bitmap_set_bit (SW (bb)->has_components, which);
    1235              :         }
    1236              :       else
    1237              :         {
    1238      2709870 :           basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
    1239      2709870 :           if (dominated_by_p (CDI_DOMINATORS, kid, bb)
    1240      2709870 :               && bitmap_bit_p (SW (kid)->has_components, which))
    1241              :             {
    1242       166782 :               SW (bb)->total_cost = SW (bb)->own_cost;
    1243       166782 :               bitmap_set_bit (SW (bb)->has_components, which);
    1244              :             }
    1245              :         }
    1246              : 
    1247              :       /* We are back where we started, so we are done now.  */
    1248      3773473 :       if (bb == head)
    1249       590061 :         return;
    1250              : 
    1251              :       /* We now know the cost of the subtree rooted at the current block.
    1252              :          Accumulate this cost in the parent.  */
    1253      3183412 :       basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
    1254      3183412 :       SW (parent)->total_cost += SW (bb)->total_cost;
    1255              : 
    1256              :       /* Don't walk the tree down unless necessary.  */
    1257      3183412 :       if (next_dom_son (CDI_DOMINATORS, bb)
    1258      3183412 :           && SW (parent)->total_cost <= SW (parent)->own_cost)
    1259              :         {
    1260      1402883 :           bb = next_dom_son (CDI_DOMINATORS, bb);
    1261      1402883 :           first_visit = true;
    1262              :         }
    1263              :       else
    1264              :         {
    1265              :           bb = parent;
    1266              :           first_visit = false;
    1267              :         }
    1268              :     }
    1269              : }
    1270              : 
    1271              : /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
    1272              :    setting it on any path from entry to exit where it was not already set
    1273              :    somewhere (or, for blocks that have no path to the exit, consider only
    1274              :    paths from the entry to the block itself).  Return whether any changes
    1275              :    were made to some HAS_COMPONENTS.  */
    1276              : static bool
    1277       917522 : spread_components (sbitmap components)
    1278              : {
    1279       917522 :   basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
    1280       917522 :   basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
    1281              : 
    1282              :   /* A stack of all blocks left to consider, and a bitmap of all blocks
    1283              :      on that stack.  */
    1284       917522 :   vec<basic_block> todo;
    1285       917522 :   todo.create (n_basic_blocks_for_fn (cfun));
    1286       917522 :   auto_bitmap seen;
    1287              : 
    1288       917522 :   auto_sbitmap old (SBITMAP_SIZE (components));
    1289              : 
    1290              :   /* Find for every block the components that are *not* needed on some path
    1291              :      from the entry to that block.  Do this with a flood fill from the entry
    1292              :      block.  Every block can be visited at most as often as the number of
    1293              :      components (plus one), and usually much less often.  */
    1294              : 
    1295       917522 :   if (dump_file)
    1296           79 :     fprintf (dump_file, "Spreading down...\n");
    1297              : 
    1298       917522 :   basic_block bb;
    1299     18607052 :   FOR_ALL_BB_FN (bb, cfun)
    1300     17689530 :     bitmap_clear (SW (bb)->head_components);
    1301              : 
    1302       917522 :   bitmap_copy (SW (entry_block)->head_components, components);
    1303              : 
    1304       917522 :   edge e;
    1305       917522 :   edge_iterator ei;
    1306              : 
    1307       917522 :   todo.quick_push (single_succ (entry_block));
    1308       917522 :   bitmap_set_bit (seen, single_succ (entry_block)->index);
    1309      6199296 :   while (!todo.is_empty ())
    1310              :     {
    1311      4364252 :       bb = todo.pop ();
    1312              : 
    1313      4364252 :       bitmap_copy (old, SW (bb)->head_components);
    1314              : 
    1315     14860933 :       FOR_EACH_EDGE (e, ei, bb->preds)
    1316     10496681 :         bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
    1317     10496681 :                     SW (e->src)->head_components);
    1318              : 
    1319      8728504 :       bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
    1320      4364252 :                         SW (bb)->has_components);
    1321              : 
    1322      4364252 :       if (!bitmap_equal_p (old, SW (bb)->head_components))
    1323      6290320 :         FOR_EACH_EDGE (e, ei, bb->succs)
    1324      3751267 :           if (bitmap_set_bit (seen, e->dest->index))
    1325      3446730 :             todo.quick_push (e->dest);
    1326              : 
    1327      4364252 :       bitmap_clear_bit (seen, bb->index);
    1328              :     }
    1329              : 
    1330              :   /* Find for every block the components that are *not* needed on some reverse
    1331              :      path from the exit to that block.  */
    1332              : 
    1333       917522 :   if (dump_file)
    1334           79 :     fprintf (dump_file, "Spreading up...\n");
    1335              : 
    1336              :   /* First, mark all blocks not reachable from the exit block as not needing
    1337              :      any component on any path to the exit.  Mark everything, and then clear
    1338              :      again by a flood fill.  */
    1339              : 
    1340     18607052 :   FOR_ALL_BB_FN (bb, cfun)
    1341     17689530 :     bitmap_copy (SW (bb)->tail_components, components);
    1342              : 
    1343      1924587 :   FOR_EACH_EDGE (e, ei, exit_block->preds)
    1344              :     {
    1345      1007065 :       todo.quick_push (e->src);
    1346      1007065 :       bitmap_set_bit (seen, e->src->index);
    1347              :     }
    1348              : 
    1349     16167998 :   while (!todo.is_empty ())
    1350              :     {
    1351     15250476 :       bb = todo.pop ();
    1352              : 
    1353     15250476 :       if (!bitmap_empty_p (SW (bb)->tail_components))
    1354     26988710 :         FOR_EACH_EDGE (e, ei, bb->preds)
    1355     15542058 :           if (bitmap_set_bit (seen, e->src->index))
    1356     14243411 :             todo.quick_push (e->src);
    1357              : 
    1358     15250476 :       bitmap_clear (SW (bb)->tail_components);
    1359              : 
    1360     15250476 :       bitmap_clear_bit (seen, bb->index);
    1361              :     }
    1362              : 
    1363              :   /* And then, flood fill backwards to find for every block the components
    1364              :      not needed on some path to the exit.  */
    1365              : 
    1366       917522 :   bitmap_copy (SW (exit_block)->tail_components, components);
    1367              : 
    1368      1924587 :   FOR_EACH_EDGE (e, ei, exit_block->preds)
    1369              :     {
    1370      1007065 :       todo.quick_push (e->src);
    1371      1007065 :       bitmap_set_bit (seen, e->src->index);
    1372              :     }
    1373              : 
    1374      9171921 :   while (!todo.is_empty ())
    1375              :     {
    1376      8254399 :       bb = todo.pop ();
    1377              : 
    1378      8254399 :       bitmap_copy (old, SW (bb)->tail_components);
    1379              : 
    1380     21977351 :       FOR_EACH_EDGE (e, ei, bb->succs)
    1381     13722952 :         bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
    1382     13722952 :                     SW (e->dest)->tail_components);
    1383              : 
    1384     16508798 :       bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
    1385      8254399 :                         SW (bb)->has_components);
    1386              : 
    1387      8254399 :       if (!bitmap_equal_p (old, SW (bb)->tail_components))
    1388     13588163 :         FOR_EACH_EDGE (e, ei, bb->preds)
    1389      7909730 :           if (bitmap_set_bit (seen, e->src->index))
    1390      7247334 :             todo.quick_push (e->src);
    1391              : 
    1392      8254399 :       bitmap_clear_bit (seen, bb->index);
    1393              :     }
    1394              : 
    1395       917522 :   todo.release ();
    1396              : 
    1397              :   /* Finally, mark everything not needed both forwards and backwards.  */
    1398              : 
    1399       917522 :   bool did_changes = false;
    1400              : 
    1401     16772008 :   FOR_EACH_BB_FN (bb, cfun)
    1402              :     {
    1403     15854486 :       bitmap_copy (old, SW (bb)->has_components);
    1404              : 
    1405     31708972 :       bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
    1406     15854486 :                   SW (bb)->tail_components);
    1407     31708972 :       bitmap_and_compl (SW (bb)->has_components, components,
    1408     15854486 :                         SW (bb)->head_components);
    1409              : 
    1410     15854486 :       if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components))
    1411              :         did_changes = true;
    1412              :     }
    1413              : 
    1414     18607052 :   FOR_ALL_BB_FN (bb, cfun)
    1415              :     {
    1416     17689530 :       if (dump_file)
    1417              :         {
    1418          942 :           fprintf (dump_file, "bb %d components:", bb->index);
    1419          942 :           dump_components ("has", SW (bb)->has_components);
    1420          942 :           fprintf (dump_file, "\n");
    1421              :         }
    1422              :     }
    1423              : 
    1424       917522 :   return did_changes;
    1425       917522 : }
    1426              : 
    1427              : /* If we cannot handle placing some component's prologues or epilogues where
    1428              :    we decided we should place them, unmark that component in COMPONENTS so
    1429              :    that it is not wrapped separately.  */
    1430              : static void
    1431       735781 : disqualify_problematic_components (sbitmap components)
    1432              : {
    1433       735781 :   auto_sbitmap pro (SBITMAP_SIZE (components));
    1434       735781 :   auto_sbitmap epi (SBITMAP_SIZE (components));
    1435              : 
    1436       735781 :   basic_block bb;
    1437      8704168 :   FOR_EACH_BB_FN (bb, cfun)
    1438              :     {
    1439      7968387 :       edge e;
    1440      7968387 :       edge_iterator ei;
    1441     19444487 :       FOR_EACH_EDGE (e, ei, bb->succs)
    1442              :         {
    1443              :           /* Find which components we want pro/epilogues for here.  */
    1444     11476100 :           bitmap_and_compl (epi, SW (e->src)->has_components,
    1445     11476100 :                             SW (e->dest)->has_components);
    1446     11476100 :           bitmap_and_compl (pro, SW (e->dest)->has_components,
    1447     11476100 :                             SW (e->src)->has_components);
    1448              : 
    1449              :           /* Ask the target what it thinks about things.  */
    1450     11476100 :           if (!bitmap_empty_p (epi))
    1451       307696 :             targetm.shrink_wrap.disqualify_components (components, e, epi,
    1452              :                                                        false);
    1453     11476100 :           if (!bitmap_empty_p (pro))
    1454       180515 :             targetm.shrink_wrap.disqualify_components (components, e, pro,
    1455              :                                                        true);
    1456              : 
    1457              :           /* If this edge doesn't need splitting, we're fine.  */
    1458     11476100 :           if (single_pred_p (e->dest)
    1459     11476100 :               && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
    1460      5317650 :             continue;
    1461              : 
    1462              :           /* If the edge can be split, that is fine too.  */
    1463      6158450 :           if ((e->flags & EDGE_ABNORMAL) == 0)
    1464      5830203 :             continue;
    1465              : 
    1466              :           /* We also can handle sibcalls.  */
    1467       328247 :           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
    1468              :             {
    1469       121707 :               gcc_assert (e->flags & EDGE_SIBCALL);
    1470       121707 :               continue;
    1471              :             }
    1472              : 
    1473              :           /* Remove from consideration those components we would need
    1474              :              pro/epilogues for on edges where we cannot insert them.  */
    1475       206540 :           bitmap_and_compl (components, components, epi);
    1476       206540 :           bitmap_and_compl (components, components, pro);
    1477              : 
    1478       206540 :           if (dump_file && !bitmap_subset_p (epi, components))
    1479              :             {
    1480            0 :               fprintf (dump_file, "  BAD epi %d->%d", e->src->index,
    1481            0 :                        e->dest->index);
    1482            0 :               if (e->flags & EDGE_EH)
    1483            0 :                 fprintf (dump_file, " for EH");
    1484            0 :               dump_components ("epi", epi);
    1485            0 :               fprintf (dump_file, "\n");
    1486              :             }
    1487              : 
    1488       206540 :           if (dump_file && !bitmap_subset_p (pro, components))
    1489              :             {
    1490            0 :               fprintf (dump_file, "  BAD pro %d->%d", e->src->index,
    1491            0 :                        e->dest->index);
    1492            0 :               if (e->flags & EDGE_EH)
    1493            0 :                 fprintf (dump_file, " for EH");
    1494            0 :               dump_components ("pro", pro);
    1495            0 :               fprintf (dump_file, "\n");
    1496              :             }
    1497              :         }
    1498              :     }
    1499       735781 : }
    1500              : 
    1501              : /* Place code for prologues and epilogues for COMPONENTS where we can put
    1502              :    that code at the start of basic blocks.  */
    1503              : static void
    1504        45765 : emit_common_heads_for_components (sbitmap components)
    1505              : {
    1506        45765 :   auto_sbitmap pro (SBITMAP_SIZE (components));
    1507        45765 :   auto_sbitmap epi (SBITMAP_SIZE (components));
    1508        45765 :   auto_sbitmap tmp (SBITMAP_SIZE (components));
    1509              : 
    1510        45765 :   basic_block bb;
    1511      1881364 :   FOR_ALL_BB_FN (bb, cfun)
    1512      1835599 :     bitmap_clear (SW (bb)->head_components);
    1513              : 
    1514      1789834 :   FOR_EACH_BB_FN (bb, cfun)
    1515              :     {
    1516              :       /* Find which prologue resp. epilogue components are needed for all
    1517              :          predecessor edges to this block.  */
    1518              : 
    1519              :       /* First, select all possible components.  */
    1520      1744069 :       bitmap_copy (epi, components);
    1521      1744069 :       bitmap_copy (pro, components);
    1522              : 
    1523      1744069 :       edge e;
    1524      1744069 :       edge_iterator ei;
    1525      4262659 :       FOR_EACH_EDGE (e, ei, bb->preds)
    1526              :         {
    1527      2552238 :           if (e->flags & EDGE_ABNORMAL)
    1528              :             {
    1529        33648 :               bitmap_clear (epi);
    1530        33648 :               bitmap_clear (pro);
    1531        33648 :               break;
    1532              :             }
    1533              : 
    1534              :           /* Deselect those epilogue components that should not be inserted
    1535              :              for this edge.  */
    1536      2518590 :           bitmap_and_compl (tmp, SW (e->src)->has_components,
    1537      2518590 :                             SW (e->dest)->has_components);
    1538      2518590 :           bitmap_and (epi, epi, tmp);
    1539              : 
    1540              :           /* Similar, for the prologue.  */
    1541      2518590 :           bitmap_and_compl (tmp, SW (e->dest)->has_components,
    1542      2518590 :                             SW (e->src)->has_components);
    1543      2518590 :           bitmap_and (pro, pro, tmp);
    1544              :         }
    1545              : 
    1546      1744069 :       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
    1547            6 :         fprintf (dump_file, "  bb %d", bb->index);
    1548              : 
    1549      1744069 :       if (dump_file && !bitmap_empty_p (epi))
    1550            0 :         dump_components ("epi", epi);
    1551      1744069 :       if (dump_file && !bitmap_empty_p (pro))
    1552            6 :         dump_components ("pro", pro);
    1553              : 
    1554      1744069 :       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
    1555            6 :         fprintf (dump_file, "\n");
    1556              : 
    1557              :       /* Place code after the BB note.  */
    1558      1744069 :       if (!bitmap_empty_p (pro))
    1559              :         {
    1560        80431 :           start_sequence ();
    1561        80431 :           targetm.shrink_wrap.emit_prologue_components (pro);
    1562        80431 :           rtx_insn *seq = end_sequence ();
    1563        80431 :           record_prologue_seq (seq);
    1564              : 
    1565        80431 :           emit_insn_after (seq, bb_note (bb));
    1566              : 
    1567        80431 :           bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
    1568              :         }
    1569              : 
    1570      1744069 :       if (!bitmap_empty_p (epi))
    1571              :         {
    1572            0 :           start_sequence ();
    1573            0 :           targetm.shrink_wrap.emit_epilogue_components (epi);
    1574            0 :           rtx_insn *seq = end_sequence ();
    1575            0 :           record_epilogue_seq (seq);
    1576              : 
    1577            0 :           emit_insn_after (seq, bb_note (bb));
    1578              : 
    1579            0 :           bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
    1580              :         }
    1581              :     }
    1582        45765 : }
    1583              : 
    1584              : /* Place code for prologues and epilogues for COMPONENTS where we can put
    1585              :    that code at the end of basic blocks.  */
    1586              : static void
    1587        45765 : emit_common_tails_for_components (sbitmap components)
    1588              : {
    1589        45765 :   auto_sbitmap pro (SBITMAP_SIZE (components));
    1590        45765 :   auto_sbitmap epi (SBITMAP_SIZE (components));
    1591        45765 :   auto_sbitmap tmp (SBITMAP_SIZE (components));
    1592              : 
    1593        45765 :   basic_block bb;
    1594      1881364 :   FOR_ALL_BB_FN (bb, cfun)
    1595      1835599 :     bitmap_clear (SW (bb)->tail_components);
    1596              : 
    1597      1789834 :   FOR_EACH_BB_FN (bb, cfun)
    1598              :     {
    1599              :       /* Find which prologue resp. epilogue components are needed for all
    1600              :          successor edges from this block.  */
    1601      1744069 :       if (EDGE_COUNT (bb->succs) == 0)
    1602        61155 :         continue;
    1603              : 
    1604              :       /* First, select all possible components.  */
    1605      1682914 :       bitmap_copy (epi, components);
    1606      1682914 :       bitmap_copy (pro, components);
    1607              : 
    1608      1682914 :       edge e;
    1609      1682914 :       edge_iterator ei;
    1610      4174851 :       FOR_EACH_EDGE (e, ei, bb->succs)
    1611              :         {
    1612      2554396 :           if (e->flags & EDGE_ABNORMAL)
    1613              :             {
    1614        62459 :               bitmap_clear (epi);
    1615        62459 :               bitmap_clear (pro);
    1616        62459 :               break;
    1617              :             }
    1618              : 
    1619              :           /* Deselect those epilogue components that should not be inserted
    1620              :              for this edge, and also those that are already put at the head
    1621              :              of the successor block.  */
    1622      2491937 :           bitmap_and_compl (tmp, SW (e->src)->has_components,
    1623      2491937 :                             SW (e->dest)->has_components);
    1624      2491937 :           bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
    1625      2491937 :           bitmap_and (epi, epi, tmp);
    1626              : 
    1627              :           /* Similarly, for the prologue.  */
    1628      2491937 :           bitmap_and_compl (tmp, SW (e->dest)->has_components,
    1629      2491937 :                             SW (e->src)->has_components);
    1630      2491937 :           bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
    1631      2491937 :           bitmap_and (pro, pro, tmp);
    1632              :         }
    1633              : 
    1634              :       /* If the last insn of this block is a control flow insn we cannot
    1635              :          put anything after it.  We can put our code before it instead,
    1636              :          but only if that jump insn is a simple jump.  */
    1637      1682914 :       rtx_insn *last_insn = BB_END (bb);
    1638      1682914 :       if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
    1639              :         {
    1640       901512 :           bitmap_clear (epi);
    1641       901512 :           bitmap_clear (pro);
    1642              :         }
    1643              : 
    1644      1682914 :       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
    1645            7 :         fprintf (dump_file, "  bb %d", bb->index);
    1646              : 
    1647      1682914 :       if (dump_file && !bitmap_empty_p (epi))
    1648            7 :         dump_components ("epi", epi);
    1649      1682914 :       if (dump_file && !bitmap_empty_p (pro))
    1650            0 :         dump_components ("pro", pro);
    1651              : 
    1652      1682914 :       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
    1653            7 :         fprintf (dump_file, "\n");
    1654              : 
    1655              :       /* Put the code at the end of the BB, but before any final jump.  */
    1656      1682914 :       if (!bitmap_empty_p (epi))
    1657              :         {
    1658        62525 :           start_sequence ();
    1659        62525 :           targetm.shrink_wrap.emit_epilogue_components (epi);
    1660        62525 :           rtx_insn *seq = end_sequence ();
    1661        62525 :           record_epilogue_seq (seq);
    1662              : 
    1663        62525 :           if (control_flow_insn_p (last_insn))
    1664        42967 :             emit_insn_before (seq, last_insn);
    1665              :           else
    1666        19558 :             emit_insn_after (seq, last_insn);
    1667              : 
    1668        62525 :           bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
    1669              :         }
    1670              : 
    1671      1682914 :       if (!bitmap_empty_p (pro))
    1672              :         {
    1673         1870 :           start_sequence ();
    1674         1870 :           targetm.shrink_wrap.emit_prologue_components (pro);
    1675         1870 :           rtx_insn *seq = end_sequence ();
    1676         1870 :           record_prologue_seq (seq);
    1677              : 
    1678         1870 :           if (control_flow_insn_p (last_insn))
    1679         1550 :             emit_insn_before (seq, last_insn);
    1680              :           else
    1681          320 :             emit_insn_after (seq, last_insn);
    1682              : 
    1683         1870 :           bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
    1684              :         }
    1685              :     }
    1686        45765 : }
    1687              : 
    1688              : /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
    1689              :    placed them inside blocks directly.  */
    1690              : static void
    1691        45765 : insert_prologue_epilogue_for_components (sbitmap components)
    1692              : {
    1693        45765 :   auto_sbitmap pro (SBITMAP_SIZE (components));
    1694        45765 :   auto_sbitmap epi (SBITMAP_SIZE (components));
    1695              : 
    1696        45765 :   basic_block bb;
    1697      1789859 :   FOR_EACH_BB_FN (bb, cfun)
    1698              :     {
    1699      1744094 :       if (!bb->aux)
    1700           25 :         continue;
    1701              : 
    1702      1744069 :       edge e;
    1703      1744069 :       edge_iterator ei;
    1704      4326525 :       FOR_EACH_EDGE (e, ei, bb->succs)
    1705              :         {
    1706              :           /* Find which pro/epilogue components are needed on this edge.  */
    1707      2582456 :           bitmap_and_compl (epi, SW (e->src)->has_components,
    1708      2582456 :                             SW (e->dest)->has_components);
    1709      2582456 :           bitmap_and_compl (pro, SW (e->dest)->has_components,
    1710      2582456 :                             SW (e->src)->has_components);
    1711      2582456 :           bitmap_and (epi, epi, components);
    1712      2582456 :           bitmap_and (pro, pro, components);
    1713              : 
    1714              :           /* Deselect those we already have put at the head or tail of the
    1715              :              edge's dest resp. src.  */
    1716      2582456 :           bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
    1717      2582456 :           bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
    1718      2582456 :           bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
    1719      2582456 :           bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
    1720              : 
    1721      2582456 :           if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
    1722              :             {
    1723        93564 :               if (dump_file)
    1724              :                 {
    1725           39 :                   fprintf (dump_file, "  %d->%d", e->src->index,
    1726           39 :                            e->dest->index);
    1727           39 :                   dump_components ("epi", epi);
    1728           39 :                   dump_components ("pro", pro);
    1729           39 :                   if (e->flags & EDGE_SIBCALL)
    1730            0 :                     fprintf (dump_file, "  (SIBCALL)");
    1731           39 :                   else if (e->flags & EDGE_ABNORMAL)
    1732            0 :                     fprintf (dump_file, "  (ABNORMAL)");
    1733           39 :                   fprintf (dump_file, "\n");
    1734              :                 }
    1735              : 
    1736              :               /* Put the epilogue components in place.  */
    1737        93564 :               start_sequence ();
    1738        93564 :               targetm.shrink_wrap.emit_epilogue_components (epi);
    1739        93564 :               rtx_insn *seq = end_sequence ();
    1740        93564 :               record_epilogue_seq (seq);
    1741              : 
    1742        93564 :               if (e->flags & EDGE_SIBCALL)
    1743              :                 {
    1744         3054 :                   gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
    1745              : 
    1746         3054 :                   rtx_insn *insn = BB_END (e->src);
    1747         3054 :                   gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
    1748         3054 :                   emit_insn_before (seq, insn);
    1749              :                 }
    1750        90510 :               else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
    1751              :                 {
    1752           25 :                   gcc_assert (e->flags & EDGE_FALLTHRU);
    1753           25 :                   basic_block new_bb = split_edge (e);
    1754           25 :                   emit_insn_after (seq, BB_END (new_bb));
    1755              :                 }
    1756              :               else
    1757        90485 :                 insert_insn_on_edge (seq, e);
    1758              : 
    1759              :               /* Put the prologue components in place.  */
    1760        93564 :               start_sequence ();
    1761        93564 :               targetm.shrink_wrap.emit_prologue_components (pro);
    1762        93564 :               seq = end_sequence ();
    1763        93564 :               record_prologue_seq (seq);
    1764              : 
    1765        93564 :               insert_insn_on_edge (seq, e);
    1766              :             }
    1767              :         }
    1768              :     }
    1769              : 
    1770        45765 :   commit_edge_insertions ();
    1771        45765 : }
    1772              : 
    1773              : bool
    1774      1471363 : use_shrink_wrapping_separate (void)
    1775              : {
    1776      1471363 :   if (!(SHRINK_WRAPPING_ENABLED && flag_shrink_wrap_separate
    1777      1043438 :         && optimize_function_for_speed_p (cfun)
    1778       979049 :         && targetm.shrink_wrap.get_separate_components))
    1779       492314 :     return false;
    1780              : 
    1781              :   /* We don't handle "strange" functions.  */
    1782       979049 :   if (cfun->calls_alloca
    1783       969423 :       || cfun->calls_setjmp
    1784       968627 :       || cfun->can_throw_non_call_exceptions
    1785       736596 :       || crtl->calls_eh_return
    1786       736573 :       || crtl->has_nonlocal_goto
    1787       736273 :       || crtl->saves_all_registers)
    1788              :     return false;
    1789              : 
    1790              :   return true;
    1791              : }
    1792              : 
    1793              : /* The main entry point to this subpass.  FIRST_BB is where the prologue
    1794              :    would be normally put.  */
    1795              : void
    1796      1471363 : try_shrink_wrapping_separate (basic_block first_bb)
    1797              : {
    1798      1471363 :   if (!use_shrink_wrapping_separate ())
    1799       735582 :     return;
    1800              : 
    1801              :   /* Ask the target what components there are.  If it returns NULL, don't
    1802              :      do anything.  */
    1803       735781 :   sbitmap components = targetm.shrink_wrap.get_separate_components ();
    1804       735781 :   if (!components)
    1805              :     return;
    1806              : 
    1807              :   /* We need LIVE info, not defining anything in the entry block and not
    1808              :      using anything in the exit block.  A block then needs a component if
    1809              :      the register for that component is in the IN or GEN or KILL set for
    1810              :      that block.  */
    1811       735781 :   df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
    1812       735781 :   df_update_entry_exit_and_calls ();
    1813       735781 :   df_live_add_problem ();
    1814       735781 :   df_live_set_all_dirty ();
    1815       735781 :   df_analyze ();
    1816              : 
    1817       735781 :   calculate_dominance_info (CDI_DOMINATORS);
    1818       735781 :   calculate_dominance_info (CDI_POST_DOMINATORS);
    1819              : 
    1820       735781 :   init_separate_shrink_wrap (components);
    1821              : 
    1822       735781 :   sbitmap_iterator sbi;
    1823       735781 :   unsigned int j;
    1824      2061623 :   EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
    1825       590061 :     place_prologue_for_one_component (j, first_bb);
    1826              : 
    1827              :   /* Try to minimize the number of saves and restores.  Do this as long as
    1828              :      it changes anything.  This does not iterate more than a few times.  */
    1829              :   int spread_times = 0;
    1830       917522 :   while (spread_components (components))
    1831              :     {
    1832       181741 :       spread_times++;
    1833              : 
    1834       181741 :       if (dump_file)
    1835           14 :         fprintf (dump_file, "Now spread %d times.\n", spread_times);
    1836              :     }
    1837              : 
    1838       735781 :   disqualify_problematic_components (components);
    1839              : 
    1840              :   /* Don't separately shrink-wrap anything where the "main" prologue will
    1841              :      go; the target code can often optimize things if it is presented with
    1842              :      all components together (say, if it generates store-multiple insns).  */
    1843       735781 :   bitmap_and_compl (components, components, SW (first_bb)->has_components);
    1844              : 
    1845       735781 :   if (bitmap_empty_p (components))
    1846              :     {
    1847       690016 :       if (dump_file)
    1848           57 :         fprintf (dump_file, "Not wrapping anything separately.\n");
    1849              :     }
    1850              :   else
    1851              :     {
    1852        45765 :       if (dump_file)
    1853              :         {
    1854            8 :           fprintf (dump_file, "The components we wrap separately are");
    1855            8 :           dump_components ("sep", components);
    1856            8 :           fprintf (dump_file, "\n");
    1857              : 
    1858            8 :           fprintf (dump_file, "... Inserting common heads...\n");
    1859              :         }
    1860              : 
    1861        45765 :       emit_common_heads_for_components (components);
    1862              : 
    1863        45765 :       if (dump_file)
    1864            8 :         fprintf (dump_file, "... Inserting common tails...\n");
    1865              : 
    1866        45765 :       emit_common_tails_for_components (components);
    1867              : 
    1868        45765 :       if (dump_file)
    1869            8 :         fprintf (dump_file, "... Inserting the more difficult ones...\n");
    1870              : 
    1871        45765 :       insert_prologue_epilogue_for_components (components);
    1872              : 
    1873        45765 :       if (dump_file)
    1874            8 :         fprintf (dump_file, "... Done.\n");
    1875              : 
    1876        45765 :       targetm.shrink_wrap.set_handled_components (components);
    1877              : 
    1878        45765 :       crtl->shrink_wrapped_separate = true;
    1879              :     }
    1880              : 
    1881       735781 :   fini_separate_shrink_wrap ();
    1882              : 
    1883       735781 :   sbitmap_free (components);
    1884       735781 :   free_dominance_info (CDI_DOMINATORS);
    1885       735781 :   free_dominance_info (CDI_POST_DOMINATORS);
    1886              : 
    1887              :   /* All done.  */
    1888       735781 :   df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
    1889       735781 :   df_update_entry_exit_and_calls ();
    1890       735781 :   df_live_set_all_dirty ();
    1891       735781 :   df_analyze ();
    1892              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.