LCOV - code coverage report
Current view: top level - gcc - shrink-wrap.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 96.6 % 824 796
Test Date: 2026-03-28 14:25:54 Functions: 100.0 % 20 20
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Shrink-wrapping related optimizations.
       2              :    Copyright (C) 1987-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it under
       7              : the terms of the GNU General Public License as published by the Free
       8              : Software Foundation; either version 3, or (at your option) any later
       9              : version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : /* This file handles shrink-wrapping related optimizations.  */
      21              : 
      22              : #include "config.h"
      23              : #include "system.h"
      24              : #include "coretypes.h"
      25              : #include "backend.h"
      26              : #include "target.h"
      27              : #include "rtl.h"
      28              : #include "tree.h"
      29              : #include "cfghooks.h"
      30              : #include "df.h"
      31              : #include "memmodel.h"
      32              : #include "tm_p.h"
      33              : #include "regs.h"
      34              : #include "insn-config.h"
      35              : #include "emit-rtl.h"
      36              : #include "output.h"
      37              : #include "tree-pass.h"
      38              : #include "cfgrtl.h"
      39              : #include "cfgbuild.h"
      40              : #include "bb-reorder.h"
      41              : #include "shrink-wrap.h"
      42              : #include "regcprop.h"
      43              : #include "rtl-iter.h"
      44              : #include "valtrack.h"
      45              : #include "function-abi.h"
      46              : #include "print-rtl.h"
      47              : 
      48              : /* Return true if INSN requires the stack frame to be set up.
      49              :    PROLOGUE_USED contains the hard registers used in the function
      50              :    prologue.  SET_UP_BY_PROLOGUE is the set of registers we expect the
      51              :    prologue to set up for the function.  */
      52              : bool
      53     60835393 : requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
      54              :                         HARD_REG_SET set_up_by_prologue)
      55              : {
      56     60835393 :   df_ref def, use;
      57     60835393 :   HARD_REG_SET hardregs;
      58     60835393 :   unsigned regno;
      59              : 
      60     60835393 :   if (CALL_P (insn) && !FAKE_CALL_P (insn))
      61      3598581 :     return !SIBLING_CALL_P (insn);
      62              : 
      63              :   /* We need a frame to get the unique CFA expected by the unwinder.  */
      64     57236812 :   if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
      65              :     return true;
      66              : 
      67     57092873 :   CLEAR_HARD_REG_SET (hardregs);
      68    106353974 :   FOR_EACH_INSN_DEF (def, insn)
      69              :     {
      70     49261101 :       rtx dreg = DF_REF_REG (def);
      71              : 
      72     49261101 :       if (!REG_P (dreg))
      73            0 :         continue;
      74              : 
      75     49261101 :       add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
      76              :     }
      77     57092873 :   if (hard_reg_set_intersect_p (hardregs, prologue_used))
      78              :     return true;
      79     55369211 :   hardregs &= ~crtl->abi->full_reg_clobbers ();
      80   4999051619 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
      81   4945616767 :     if (TEST_HARD_REG_BIT (hardregs, regno)
      82   4945616767 :         && df_regs_ever_live_p (regno))
      83              :       return true;
      84              : 
      85    105476661 :   FOR_EACH_INSN_USE (use, insn)
      86              :     {
      87     52041809 :       rtx reg = DF_REF_REG (use);
      88              : 
      89     52041809 :       if (!REG_P (reg))
      90            0 :         continue;
      91              : 
      92     52041809 :       add_to_hard_reg_set (&hardregs, GET_MODE (reg),
      93              :                            REGNO (reg));
      94              :     }
      95     53434852 :   if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
      96              :     return true;
      97              : 
      98              :   return false;
      99              : }
     100              : 
     101              : /* See whether there has a single live edge from BB, which dest uses
     102              :    [REGNO, END_REGNO).  Return the live edge if its dest bb has
     103              :    one or two predecessors.  Otherwise return NULL.  */
     104              : 
     105              : static edge
     106       312108 : live_edge_for_reg (basic_block bb, int regno, int end_regno)
     107              : {
     108       312108 :   edge e, live_edge;
     109       312108 :   edge_iterator ei;
     110       312108 :   bitmap live;
     111       312108 :   int i;
     112              : 
     113       312108 :   live_edge = NULL;
     114       734867 :   FOR_EACH_EDGE (e, ei, bb->succs)
     115              :     {
     116       525445 :       live = df_get_live_in (e->dest);
     117      1473649 :       for (i = regno; i < end_regno; i++)
     118       525445 :         if (REGNO_REG_SET_P (live, i))
     119              :           {
     120       412692 :             if (live_edge && live_edge != e)
     121              :               return NULL;
     122              :             live_edge = e;
     123              :           }
     124              :     }
     125              : 
     126              :   /* We can sometimes encounter dead code.  Don't try to move it
     127              :      into the exit block.  */
     128       209422 :   if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
     129              :     return NULL;
     130              : 
     131              :   /* Reject targets of abnormal edges.  This is needed for correctness
     132              :      on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
     133              :      exception edges even though it is generally treated as call-saved
     134              :      for the majority of the compilation.  Moving across abnormal edges
     135              :      isn't going to be interesting for shrink-wrap usage anyway.  */
     136       207320 :   if (live_edge->flags & EDGE_ABNORMAL)
     137              :     return NULL;
     138              : 
     139              :   /* When live_edge->dest->preds == 2, we can create a new block on
     140              :      the edge to make it meet the requirement.  */
     141       205933 :   if (EDGE_COUNT (live_edge->dest->preds) > 2)
     142              :     return NULL;
     143              : 
     144              :   return live_edge;
     145              : }
     146              : 
     147              : /* Try to move INSN from BB to a successor.  Return true on success.
     148              :    USES and DEFS are the set of registers that are used and defined
     149              :    after INSN in BB.  SPLIT_P indicates whether a live edge from BB
     150              :    is splitted or not.  */
     151              : 
     152              : static bool
     153      7772589 : move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
     154              :                            const_hard_reg_set uses,
     155              :                            const_hard_reg_set defs,
     156              :                            bool *split_p,
     157              :                            struct dead_debug_local *debug)
     158              : {
     159      7772589 :   rtx set, src, dest;
     160      7772589 :   bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
     161      7772589 :   unsigned int i, dregno, end_dregno;
     162      7772589 :   unsigned int sregno = FIRST_PSEUDO_REGISTER;
     163      7772589 :   unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
     164      7772589 :   basic_block next_block;
     165      7772589 :   edge live_edge;
     166      7772589 :   rtx_insn *dinsn;
     167      7772589 :   df_ref def;
     168              : 
     169              :   /* Look for a simple register assignment.  We don't use single_set here
     170              :      because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
     171              :      destinations.  */
     172      7772589 :   if (!INSN_P (insn))
     173              :     return false;
     174      7772589 :   set = PATTERN (insn);
     175      7772589 :   if (GET_CODE (set) != SET)
     176              :     return false;
     177      6378677 :   src = SET_SRC (set);
     178      6378677 :   dest = SET_DEST (set);
     179              : 
     180              :   /* For the destination, we want only a register.  Also disallow STACK
     181              :      or FRAME related adjustments.  They are likely part of the prologue,
     182              :      so keep them in the entry block.  */
     183      6378677 :   if (!REG_P (dest)
     184      4508736 :       || dest == stack_pointer_rtx
     185      4491776 :       || dest == frame_pointer_rtx
     186      4491776 :       || dest == hard_frame_pointer_rtx)
     187              :     return false;
     188              : 
     189              :   /* For the source, we want one of:
     190              :       (1) A (non-overlapping) register
     191              :       (2) A constant,
     192              :       (3) An expression involving no more than one register.
     193              : 
     194              :      That last point comes from the code following, which was originally
     195              :      written to handle only register move operations, and still only handles
     196              :      a single source register when checking for overlaps.  Happily, the
     197              :      same checks can be applied to expressions like (plus reg const).  */
     198              : 
     199      4491380 :   if (CONSTANT_P (src))
     200              :     ;
     201      3446234 :   else if (!REG_P (src))
     202              :     {
     203      2484368 :       rtx src_inner = NULL_RTX;
     204              : 
     205      2484368 :       if (can_throw_internal (insn))
     206      1787750 :         return false;
     207              : 
     208      2475007 :       subrtx_var_iterator::array_type array;
     209      5402217 :       FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
     210              :         {
     211      4705599 :           rtx x = *iter;
     212      4705599 :           switch (GET_RTX_CLASS (GET_CODE (x)))
     213              :             {
     214              :             case RTX_CONST_OBJ:
     215              :             case RTX_COMPARE:
     216              :             case RTX_COMM_COMPARE:
     217              :             case RTX_BIN_ARITH:
     218              :             case RTX_COMM_ARITH:
     219              :             case RTX_UNARY:
     220              :             case RTX_TERNARY:
     221              :               /* Constant or expression.  Continue.  */
     222              :               break;
     223              : 
     224      2801901 :             case RTX_OBJ:
     225      2801901 :             case RTX_EXTRA:
     226      2801901 :               switch (GET_CODE (x))
     227              :                 {
     228              :                 case UNSPEC:
     229              :                 case SUBREG:
     230              :                 case STRICT_LOW_PART:
     231              :                 case PC:
     232              :                 case LO_SUM:
     233              :                   /* Ok.  Continue.  */
     234              :                   break;
     235              : 
     236      1268877 :                 case REG:
     237              :                   /* Fail if we see a second inner register.  */
     238      1268877 :                   if (src_inner != NULL)
     239      1778389 :                     return false;
     240              :                   src_inner = x;
     241              :                   break;
     242              : 
     243              :                 default:
     244              :                   return false;
     245              :                 }
     246              :               break;
     247              : 
     248              :             default:
     249              :               return false;
     250              :             }
     251              :         }
     252              : 
     253       696618 :       if (src_inner != NULL)
     254       696556 :         src = src_inner;
     255      2475007 :     }
     256              : 
     257              :   /* Make sure that the source register isn't defined later in BB.  */
     258      2703630 :   if (REG_P (src))
     259              :     {
     260      1658422 :       sregno = REGNO (src);
     261      1658422 :       end_sregno = END_REGNO (src);
     262      1658422 :       if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
     263              :         return false;
     264              :     }
     265              : 
     266              :   /* Make sure that the destination register isn't referenced later in BB.  */
     267      1935369 :   dregno = REGNO (dest);
     268      1935369 :   end_dregno = END_REGNO (dest);
     269      1935369 :   if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
     270      1935369 :       || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
     271              :     return false;
     272              : 
     273              :   /* See whether there is a successor block to which we could move INSN.  */
     274       280880 :   live_edge = live_edge_for_reg (bb, dregno, end_dregno);
     275       280880 :   if (!live_edge)
     276              :     return false;
     277              : 
     278       176725 :   next_block = live_edge->dest;
     279              :   /* Create a new basic block on the edge.  */
     280       176725 :   if (EDGE_COUNT (next_block->preds) == 2)
     281              :     {
     282              :       /* split_edge for a block with only one successor is meaningless.  */
     283        97344 :       if (EDGE_COUNT (bb->succs) == 1)
     284              :         return false;
     285              : 
     286              :       /* If DF_LIVE doesn't exist, i.e. at -O1, just give up.  */
     287        10827 :       if (!df_live)
     288              :         return false;
     289              : 
     290        10360 :       basic_block old_dest = live_edge->dest;
     291        10360 :       next_block = split_edge (live_edge);
     292              : 
     293              :       /* We create a new basic block.  Call df_grow_bb_info to make sure
     294              :          all data structures are allocated.  */
     295        10360 :       df_grow_bb_info (df_live);
     296              : 
     297        10360 :       bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
     298        10360 :                   df_get_live_in (old_dest));
     299        10360 :       df_set_bb_dirty (next_block);
     300              : 
     301              :       /* We should not split more than once for a function.  */
     302        10360 :       if (*split_p)
     303              :         return false;
     304              : 
     305        10360 :       *split_p = true;
     306              :     }
     307              : 
     308              :   /* At this point we are committed to moving INSN, but let's try to
     309              :      move it as far as we can.  */
     310       105892 :   do
     311              :     {
     312       105892 :       if (MAY_HAVE_DEBUG_BIND_INSNS)
     313              :         {
     314       824173 :           FOR_BB_INSNS_REVERSE (bb, dinsn)
     315       812001 :             if (DEBUG_BIND_INSN_P (dinsn))
     316              :               {
     317       324318 :                 df_ref use;
     318       431709 :                 FOR_EACH_INSN_USE (use, dinsn)
     319       107391 :                   if (refers_to_regno_p (dregno, end_dregno,
     320       107391 :                                          DF_REF_REG (use), (rtx *) NULL))
     321        29133 :                     dead_debug_add (debug, use, DF_REF_REGNO (use));
     322              :               }
     323       487683 :             else if (dinsn == insn)
     324              :               break;
     325              :         }
     326       105892 :       live_out = df_get_live_out (bb);
     327       105892 :       live_in = df_get_live_in (next_block);
     328       105892 :       bb = next_block;
     329              : 
     330              :       /* Check whether BB uses DEST or clobbers DEST.  We need to add
     331              :          INSN to BB if so.  Either way, DEST is no longer live on entry,
     332              :          except for any part that overlaps SRC (next loop).  */
     333       105892 :       if (!*split_p)
     334              :         {
     335        88790 :           bb_uses = &DF_LR_BB_INFO (bb)->use;
     336        88790 :           bb_defs = &DF_LR_BB_INFO (bb)->def;
     337              :         }
     338       105892 :       if (df_live)
     339              :         {
     340       203936 :           for (i = dregno; i < end_dregno; i++)
     341              :             {
     342       101968 :               if (*split_p
     343        84866 :                   || REGNO_REG_SET_P (bb_uses, i)
     344        41344 :                   || REGNO_REG_SET_P (bb_defs, i)
     345       184656 :                   || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
     346              :                 next_block = NULL;
     347       101968 :               CLEAR_REGNO_REG_SET (live_out, i);
     348       101968 :               CLEAR_REGNO_REG_SET (live_in, i);
     349              :             }
     350              : 
     351              :           /* Check whether BB clobbers SRC.  We need to add INSN to BB if so.
     352              :              Either way, SRC is now live on entry.  */
     353       193620 :           for (i = sregno; i < end_sregno; i++)
     354              :             {
     355        91652 :               if (*split_p
     356        82293 :                   || REGNO_REG_SET_P (bb_defs, i)
     357       173706 :                   || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
     358              :                 next_block = NULL;
     359        91652 :               SET_REGNO_REG_SET (live_out, i);
     360        91652 :               SET_REGNO_REG_SET (live_in, i);
     361              :             }
     362              :         }
     363              :       else
     364              :         {
     365              :           /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
     366              :              DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
     367              :              at -O1, just give up searching NEXT_BLOCK.  */
     368         7848 :           next_block = NULL;
     369         7848 :           for (i = dregno; i < end_dregno; i++)
     370              :             {
     371         3924 :               CLEAR_REGNO_REG_SET (live_out, i);
     372         3924 :               CLEAR_REGNO_REG_SET (live_in, i);
     373              :             }
     374              : 
     375         7834 :           for (i = sregno; i < end_sregno; i++)
     376              :             {
     377         3910 :               SET_REGNO_REG_SET (live_out, i);
     378         3910 :               SET_REGNO_REG_SET (live_in, i);
     379              :             }
     380              :         }
     381              : 
     382              :       /* If we don't need to add the move to BB, look for a single
     383              :          successor block.  */
     384       105892 :       if (next_block)
     385              :         {
     386        31228 :           live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
     387        31228 :           if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
     388              :             break;
     389              :           next_block = live_edge->dest;
     390              :         }
     391              :     }
     392        90815 :   while (next_block);
     393              : 
     394              :   /* For the new created basic block, there is no dataflow info at all.
     395              :      So skip the following dataflow update and check.  */
     396        89741 :   if (!(*split_p))
     397              :     {
     398              :       /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
     399              :          (next loop).  */
     400       145278 :       for (i = dregno; i < end_dregno; i++)
     401              :         {
     402        72639 :           CLEAR_REGNO_REG_SET (bb_uses, i);
     403        72639 :           SET_REGNO_REG_SET (bb_defs, i);
     404              :         }
     405              : 
     406              :       /* BB now uses SRC.  */
     407       143166 :       for (i = sregno; i < end_sregno; i++)
     408        70527 :         SET_REGNO_REG_SET (bb_uses, i);
     409              :     }
     410              : 
     411              :   /* Insert debug temps for dead REGs used in subsequent debug insns.  */
     412        89741 :   if (debug->used && !bitmap_empty_p (debug->used))
     413        29926 :     FOR_EACH_INSN_DEF (def, insn)
     414        14963 :       dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
     415              :                               DEBUG_TEMP_BEFORE_WITH_VALUE);
     416              : 
     417        89741 :   rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb));
     418              :   /* Update the LABEL_NUSES count on any referenced labels. The ideal
     419              :      solution here would be to actually move the instruction instead
     420              :      of copying/deleting it as this loses some notations on the
     421              :      insn.  */
     422        89741 :   mark_jump_label (PATTERN (insn), insn_copy, 0);
     423        89741 :   delete_insn (insn);
     424        89741 :   return true;
     425              : }
     426              : 
     427              : /* Look for register copies in the first block of the function, and move
     428              :    them down into successor blocks if the register is used only on one
     429              :    path.  This exposes more opportunities for shrink-wrapping.  These
     430              :    kinds of sets often occur when incoming argument registers are moved
     431              :    to call-saved registers because their values are live across one or
     432              :    more calls during the function.  */
     433              : 
     434              : static void
     435       651879 : prepare_shrink_wrap (basic_block entry_block)
     436              : {
     437       651879 :   rtx_insn *insn, *curr;
     438       651879 :   rtx x;
     439       651879 :   HARD_REG_SET uses, defs;
     440       651879 :   df_ref def, use;
     441       651879 :   bool split_p = false;
     442       651879 :   unsigned int i;
     443       651879 :   struct dead_debug_local debug;
     444              : 
     445       651879 :   if (JUMP_P (BB_END (entry_block)))
     446              :     {
     447              :       /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
     448              :          to sink the copies from parameter to callee saved register out of
     449              :          entry block.  copyprop_hardreg_forward_bb_without_debug_insn is called
     450              :          to release some dependences.  */
     451       356447 :       copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
     452              :     }
     453              : 
     454       651879 :   dead_debug_local_init (&debug, NULL, NULL);
     455      2607516 :   CLEAR_HARD_REG_SET (uses);
     456       651879 :   CLEAR_HARD_REG_SET (defs);
     457              : 
     458     28595938 :   FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
     459     13646090 :     if (NONDEBUG_INSN_P (insn)
     460      7772589 :         && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
     461              :                                        &split_p, &debug))
     462              :       {
     463              :         /* Add all defined registers to DEFs.  */
     464     86015769 :         FOR_EACH_INSN_DEF (def, insn)
     465              :           {
     466     78332921 :             x = DF_REF_REG (def);
     467     78332921 :             if (REG_P (x) && HARD_REGISTER_P (x))
     468    156665842 :               for (i = REGNO (x); i < END_REGNO (x); i++)
     469     78332921 :                 SET_HARD_REG_BIT (defs, i);
     470              :           }
     471              : 
     472              :         /* Add all used registers to USESs.  */
     473     17634832 :         FOR_EACH_INSN_USE (use, insn)
     474              :           {
     475      9951984 :             x = DF_REF_REG (use);
     476      9951984 :             if (REG_P (x) && HARD_REGISTER_P (x))
     477     19903968 :               for (i = REGNO (x); i < END_REGNO (x); i++)
     478      9951984 :                 SET_HARD_REG_BIT (uses, i);
     479              :           }
     480              :       }
     481              : 
     482       651879 :   dead_debug_local_finish (&debug, NULL);
     483       651879 : }
     484              : 
     485              : /* Return whether basic block PRO can get the prologue.  It cannot if it
     486              :    has incoming complex edges that need a prologue inserted (we make a new
     487              :    block for the prologue, so those edges would need to be redirected, which
     488              :    does not work).  It also cannot if there exist registers live on entry
     489              :    to PRO that are clobbered by the prologue.  */
     490              : 
     491              : static bool
     492        77458 : can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
     493              : {
     494        77458 :   edge e;
     495        77458 :   edge_iterator ei;
     496       192375 :   FOR_EACH_EDGE (e, ei, pro->preds)
     497       114934 :     if (e->flags & EDGE_COMPLEX
     498       114934 :         && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
     499              :       return false;
     500              : 
     501              :   HARD_REG_SET live;
     502        77441 :   REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
     503       154882 :   if (hard_reg_set_intersect_p (live, prologue_clobbered))
     504              :     return false;
     505              : 
     506              :   return true;
     507              : }
     508              : 
     509              : /* Return whether we can duplicate basic block BB for shrink wrapping.  We
     510              :    cannot if the block cannot be duplicated at all, or if any of its incoming
     511              :    edges are complex and come from a block that does not require a prologue
     512              :    (we cannot redirect such edges), or if the block is too big to copy.
     513              :    PRO is the basic block before which we would put the prologue, MAX_SIZE is
     514              :    the maximum size block we allow to be copied.  */
     515              : 
     516              : static bool
     517       574800 : can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
     518              : {
     519       574800 :   if (!can_duplicate_block_p (bb))
     520              :     return false;
     521              : 
     522       574489 :   edge e;
     523       574489 :   edge_iterator ei;
     524      1449706 :   FOR_EACH_EDGE (e, ei, bb->preds)
     525       884668 :     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
     526       884668 :         && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
     527              :       return false;
     528              : 
     529       565038 :   unsigned size = 0;
     530              : 
     531       565038 :   rtx_insn *insn;
     532      5193471 :   FOR_BB_INSNS (bb, insn)
     533      4820417 :     if (NONDEBUG_INSN_P (insn))
     534              :       {
     535      1628976 :         size += get_attr_min_length (insn);
     536      1628976 :         if (size > max_size)
     537              :           return false;
     538              :       }
     539              : 
     540              :   return true;
     541              : }
     542              : 
     543              : /* Do whatever needs to be done for exits that run without prologue.
     544              :    Sibcalls need nothing done.  Normal exits get a simple_return inserted.  */
     545              : 
     546              : static void
     547        58829 : handle_simple_exit (edge e)
     548              : {
     549              : 
     550        58829 :   if (e->flags & EDGE_SIBCALL)
     551              :     {
     552              :       /* Tell function.cc to take no further action on this edge.  */
     553         5072 :       e->flags |= EDGE_IGNORE;
     554              : 
     555         5072 :       e->flags &= ~EDGE_FALLTHRU;
     556         5072 :       emit_barrier_after_bb (e->src);
     557         5072 :       return;
     558              :     }
     559              : 
     560              :   /* If the basic block the edge comes from has multiple successors,
     561              :      split the edge.  */
     562        53757 :   if (EDGE_COUNT (e->src->succs) > 1)
     563              :     {
     564         1475 :       basic_block old_bb = e->src;
     565         1475 :       rtx_insn *end = BB_END (old_bb);
     566         1475 :       rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
     567         1475 :       basic_block new_bb = create_basic_block (note, note, old_bb);
     568         1475 :       BB_COPY_PARTITION (new_bb, old_bb);
     569         1475 :       BB_END (old_bb) = end;
     570              : 
     571         1475 :       redirect_edge_succ (e, new_bb);
     572         1475 :       new_bb->count = e->count ();
     573         1475 :       e->flags |= EDGE_FALLTHRU;
     574              : 
     575         1475 :       e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
     576              :     }
     577              : 
     578        53757 :   e->flags &= ~EDGE_FALLTHRU;
     579        53757 :   rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
     580        53757 :                                              BB_END (e->src));
     581        53757 :   JUMP_LABEL (ret) = simple_return_rtx;
     582        53757 :   emit_barrier_after_bb (e->src);
     583              : 
     584        53757 :   if (dump_file)
     585           23 :     fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
     586           23 :              INSN_UID (ret), e->src->index);
     587              : }
     588              : 
     589              : /* Try to perform a kind of shrink-wrapping, making sure the
     590              :    prologue/epilogue is emitted only around those parts of the
     591              :    function that require it.
     592              : 
     593              :    There will be exactly one prologue, and it will be executed either
     594              :    zero or one time, on any path.  Depending on where the prologue is
     595              :    placed, some of the basic blocks can be reached via both paths with
     596              :    and without a prologue.  Such blocks will be duplicated here, and the
     597              :    edges changed to match.
     598              : 
     599              :    Paths that go to the exit without going through the prologue will use
     600              :    a simple_return instead of the epilogue.  We maximize the number of
     601              :    those, making sure to only duplicate blocks that can be duplicated.
     602              :    If the prologue can then still be placed in multiple locations, we
     603              :    place it as early as possible.
     604              : 
     605              :    An example, where we duplicate blocks with control flow (legend:
     606              :    _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
     607              :    be taken to point down or to the right, to simplify the diagram; here,
     608              :    block 3 needs a prologue, the rest does not):
     609              : 
     610              : 
     611              :        B                 B
     612              :        |                 |
     613              :        2                 2
     614              :        |\                |\
     615              :        | 3    becomes    | 3
     616              :        |/                |  \
     617              :        4                 7   4
     618              :        |\                |\  |\
     619              :        | 5               | 8 | 5
     620              :        |/                |/  |/
     621              :        6                 9   6
     622              :        |                 |   |
     623              :        R                 S   R
     624              : 
     625              : 
     626              :    (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
     627              :    edge 2->3).
     628              : 
     629              :    Another example, where part of a loop is duplicated (again, bb 3 is
     630              :    the only block that needs a prologue):
     631              : 
     632              : 
     633              :        B   3<--              B       ->3<--
     634              :        |   |   |             |      |  |   |
     635              :        |   v   |   becomes   |      |  v   |
     636              :        2---4---              2---5--   4---
     637              :            |                     |     |
     638              :            R                     S     R
     639              : 
     640              : 
     641              :    (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
     642              : 
     643              :    ENTRY_EDGE is the edge where the prologue will be placed, possibly
     644              :    changed by this function.  PROLOGUE_SEQ is the prologue we will insert.  */
     645              : 
     646              : void
     647      1480948 : try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
     648              : {
     649              :   /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
     650              :      no sense to shrink-wrap: then do not shrink-wrap!  */
     651              : 
     652      1480948 :   if (!SHRINK_WRAPPING_ENABLED)
     653      1424283 :     return;
     654              : 
     655      1039982 :   if (crtl->profile && !targetm.profile_before_prologue ())
     656              :     return;
     657              : 
     658      1039982 :   if (crtl->calls_eh_return)
     659              :     return;
     660              : 
     661      1428035 :   bool empty_prologue = true;
     662      1428035 :   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
     663      1039957 :     if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
     664              :       {
     665              :         empty_prologue = false;
     666              :         break;
     667              :       }
     668      1039957 :   if (empty_prologue)
     669              :     return;
     670              : 
     671              :   /* Move some code down to expose more shrink-wrapping opportunities.  */
     672              : 
     673       651879 :   basic_block entry = (*entry_edge)->dest;
     674       651879 :   prepare_shrink_wrap (entry);
     675              : 
     676       651879 :   if (dump_file)
     677           51 :     fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
     678              : 
     679              :   /* Compute the registers set and used in the prologue.  */
     680              : 
     681              :   HARD_REG_SET prologue_clobbered, prologue_used;
     682      1955637 :   CLEAR_HARD_REG_SET (prologue_clobbered);
     683      3222428 :   CLEAR_HARD_REG_SET (prologue_used);
     684      3222428 :   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
     685      2570549 :     if (NONDEBUG_INSN_P (insn))
     686              :       {
     687              :         HARD_REG_SET this_used;
     688      1918670 :         CLEAR_HARD_REG_SET (this_used);
     689      1918670 :         note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
     690      1918670 :         this_used &= ~prologue_clobbered;
     691      1918670 :         prologue_used |= this_used;
     692      1918670 :         note_stores (insn, record_hard_reg_sets, &prologue_clobbered);
     693              :       }
     694       651879 :   CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
     695       651879 :   if (frame_pointer_needed)
     696        43323 :     CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
     697              : 
     698              :   /* Find out what registers are set up by the prologue; any use of these
     699              :      cannot happen before the prologue.  */
     700              : 
     701              :   struct hard_reg_set_container set_up_by_prologue;
     702       651879 :   CLEAR_HARD_REG_SET (set_up_by_prologue.set);
     703       764947 :   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
     704       651879 :   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
     705       651879 :   if (frame_pointer_needed)
     706        43323 :     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
     707              :                          HARD_FRAME_POINTER_REGNUM);
     708       651879 :   if (pic_offset_table_rtx
     709       651879 :       && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
     710            0 :     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
     711            0 :                          PIC_OFFSET_TABLE_REGNUM);
     712       651879 :   if (crtl->drap_reg)
     713         5366 :     add_to_hard_reg_set (&set_up_by_prologue.set,
     714         5366 :                          GET_MODE (crtl->drap_reg),
     715         5366 :                          REGNO (crtl->drap_reg));
     716       651879 :   if (targetm.set_up_by_prologue)
     717            0 :     targetm.set_up_by_prologue (&set_up_by_prologue);
     718              : 
     719              :   /* We will insert the prologue before the basic block PRO.  PRO should
     720              :      dominate all basic blocks that need the prologue to be executed
     721              :      before them.  First, make PRO the "tightest wrap" possible.  */
     722              : 
     723       651879 :   calculate_dominance_info (CDI_DOMINATORS);
     724              : 
     725       651879 :   basic_block pro = 0;
     726              : 
     727       651879 :   basic_block bb;
     728       651879 :   edge e;
     729       651879 :   edge_iterator ei;
     730     10660493 :   FOR_EACH_BB_FN (bb, cfun)
     731              :     {
     732     10008614 :       rtx_insn *insn;
     733     69865586 :       FOR_BB_INSNS (bb, insn)
     734     65528125 :         if (NONDEBUG_INSN_P (insn)
     735     65528125 :             && requires_stack_frame_p (insn, prologue_used,
     736              :                                        set_up_by_prologue.set))
     737              :           {
     738      5671153 :             if (dump_file)
     739              :               {
     740          153 :                 fprintf (dump_file, "Block %d needs prologue due to insn %d:\n",
     741          153 :                          bb->index, INSN_UID (insn));
     742          153 :                 print_rtl_single (dump_file, insn);
     743              :               }
     744      5671153 :             pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
     745      5671153 :             break;
     746              :           }
     747              :     }
     748              : 
     749              :   /* If nothing needs a prologue, just put it at the start.  This really
     750              :      shouldn't happen, but we cannot fix it here.  */
     751              : 
     752       651879 :   if (pro == 0)
     753              :     {
     754           70 :       if (dump_file)
     755            0 :         fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
     756              :                            "putting it at the start.\n");
     757           70 :       pro = entry;
     758              :     }
     759              : 
     760       651879 :   if (dump_file)
     761           51 :     fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
     762              :              pro->index);
     763              : 
     764              :   /* Now see if we can put the prologue at the start of PRO.  Putting it
     765              :      there might require duplicating a block that cannot be duplicated,
     766              :      or in some cases we cannot insert the prologue there at all.  If PRO
     767              :      wont't do, try again with the immediate dominator of PRO, and so on.
     768              : 
     769              :      The blocks that need duplicating are those reachable from PRO but
     770              :      not dominated by it.  We keep in BB_WITH a bitmap of the blocks
     771              :      reachable from PRO that we already found, and in VEC a stack of
     772              :      those we still need to consider (to find successors).  */
     773              : 
     774       651879 :   auto_bitmap bb_with;
     775       651879 :   bitmap_set_bit (bb_with, pro->index);
     776              : 
     777       651879 :   vec<basic_block> vec;
     778       651879 :   vec.create (n_basic_blocks_for_fn (cfun));
     779       651879 :   vec.quick_push (pro);
     780              : 
     781       651879 :   unsigned max_grow_size = get_uncond_jump_length ();
     782       651879 :   max_grow_size *= param_max_grow_copy_bb_insns;
     783              : 
     784       651879 :   basic_block checked_pro = NULL;
     785              : 
     786      1226679 :   while (pro != entry)
     787              :     {
     788       638799 :       if (pro != checked_pro)
     789              :         {
     790        66621 :           while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
     791              :             {
     792          139 :               pro = get_immediate_dominator (CDI_DOMINATORS, pro);
     793              : 
     794          139 :               if (bitmap_set_bit (bb_with, pro->index))
     795          138 :                 vec.quick_push (pro);
     796              :             }
     797              :           checked_pro = pro;
     798              :         }
     799              : 
     800       638799 :       if (vec.is_empty ())
     801              :         break;
     802              : 
     803       574800 :       basic_block bb = vec.pop ();
     804       574800 :       if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
     805       205110 :         while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
     806              :           {
     807         3364 :             gcc_assert (pro != entry);
     808              : 
     809         3364 :             pro = get_immediate_dominator (CDI_DOMINATORS, pro);
     810              : 
     811         3364 :             if (bitmap_set_bit (bb_with, pro->index))
     812         3132 :               vec.quick_push (pro);
     813              :           }
     814              : 
     815      1379667 :       FOR_EACH_EDGE (e, ei, bb->succs)
     816       804867 :         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
     817       804867 :             && bitmap_set_bit (bb_with, e->dest->index))
     818       511536 :           vec.quick_push (e->dest);
     819              :     }
     820              : 
     821       651879 :   if (dump_file)
     822           51 :     fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
     823              :              pro->index);
     824              : 
     825              :   /* If we can move PRO back without having to duplicate more blocks, do so.
     826              :      We do this because putting the prologue earlier is better for scheduling.
     827              : 
     828              :      We can move back to a block PRE if every path from PRE will eventually
     829              :      need a prologue, that is, PRO is a post-dominator of PRE.  PRE needs
     830              :      to dominate every block reachable from itself.  We keep in BB_TMP a
     831              :      bitmap of the blocks reachable from PRE that we already found, and in
     832              :      VEC a stack of those we still need to consider.
     833              : 
     834              :      Any block reachable from PRE is also reachable from all predecessors
     835              :      of PRE, so if we find we need to move PRE back further we can leave
     836              :      everything not considered so far on the stack.  Any block dominated
     837              :      by PRE is also dominated by all other dominators of PRE, so anything
     838              :      found good for some PRE does not need to be reconsidered later.
     839              : 
     840              :      We don't need to update BB_WITH because none of the new blocks found
     841              :      can jump to a block that does not need the prologue.  */
     842              : 
     843       651879 :   if (pro != entry)
     844              :     {
     845        63999 :       calculate_dominance_info (CDI_POST_DOMINATORS);
     846              : 
     847        63999 :       auto_bitmap bb_tmp;
     848        63999 :       bitmap_copy (bb_tmp, bb_with);
     849        63999 :       basic_block last_ok = pro;
     850        63999 :       vec.truncate (0);
     851              : 
     852       139308 :       while (pro != entry)
     853              :         {
     854        67975 :           basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
     855        67975 :           if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
     856              :             break;
     857              : 
     858        11310 :           if (bitmap_set_bit (bb_tmp, pre->index))
     859        10903 :             vec.quick_push (pre);
     860              : 
     861        26824 :           bool ok = true;
     862        26824 :           while (!vec.is_empty ())
     863              :             {
     864        15878 :               if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
     865              :                 {
     866              :                   ok = false;
     867              :                   break;
     868              :                 }
     869              : 
     870        15514 :               basic_block bb = vec.pop ();
     871        35532 :               FOR_EACH_EDGE (e, ei, bb->succs)
     872        20018 :                 if (bitmap_set_bit (bb_tmp, e->dest->index))
     873         4611 :                   vec.quick_push (e->dest);
     874              :             }
     875              : 
     876        11310 :           if (ok && can_get_prologue (pre, prologue_clobbered))
     877              :             last_ok = pre;
     878              : 
     879        11310 :           pro = pre;
     880              :         }
     881              : 
     882        63999 :       pro = last_ok;
     883              : 
     884        63999 :       free_dominance_info (CDI_POST_DOMINATORS);
     885        63999 :     }
     886              : 
     887       651879 :   vec.release ();
     888              : 
     889       651879 :   if (dump_file)
     890           51 :     fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
     891              :              pro->index);
     892              : 
     893       651879 :   if (pro == entry)
     894              :     {
     895       595214 :       free_dominance_info (CDI_DOMINATORS);
     896       595214 :       return;
     897              :     }
     898              : 
     899              :   /* Compute what fraction of the frequency and count of the blocks that run
     900              :      both with and without prologue are for running with prologue.  This gives
     901              :      the correct answer for reducible flow graphs; for irreducible flow graphs
     902              :      our profile is messed up beyond repair anyway.  */
     903              : 
     904        56665 :   profile_count num = profile_count::zero ();
     905        56665 :   profile_count den = profile_count::zero ();
     906              : 
     907       141091 :   FOR_EACH_EDGE (e, ei, pro->preds)
     908        84426 :     if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
     909              :       {
     910        84183 :         if (e->count ().initialized_p ())
     911        84155 :           num += e->count ();
     912        84183 :         if (e->src->count.initialized_p ())
     913        84163 :           den += e->src->count;
     914              :       }
     915              : 
     916              :   /* All is okay, so do it.  */
     917              : 
     918        56665 :   crtl->shrink_wrapped = true;
     919        56665 :   if (dump_file)
     920           23 :     fprintf (dump_file, "Performing shrink-wrapping.\n");
     921              : 
     922              :   /* Copy the blocks that can run both with and without prologue.  The
     923              :      originals run with prologue, the copies without.  Store a pointer to
     924              :      the copy in the ->aux field of the original.  */
     925              : 
     926       769294 :   FOR_EACH_BB_FN (bb, cfun)
     927       712629 :     if (bitmap_bit_p (bb_with, bb->index)
     928       712629 :         && !dominated_by_p (CDI_DOMINATORS, bb, pro))
     929              :       {
     930        40400 :         basic_block dup = duplicate_block (bb, 0, 0);
     931              : 
     932        40400 :         bb->aux = dup;
     933              : 
     934        40400 :         if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
     935         4854 :           emit_barrier_after_bb (dup);
     936              : 
     937        40400 :         if (EDGE_COUNT (dup->succs) == 0)
     938            7 :           emit_barrier_after_bb (dup);
     939              : 
     940        40400 :         if (dump_file)
     941           21 :           fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
     942              : 
     943        40436 :         if (num == profile_count::zero () || den.nonzero_p ())
     944        40391 :           bb->count = bb->count.apply_scale (num, den);
     945        40400 :         dup->count -= bb->count;
     946              :       }
     947              : 
     948              :   /* Now change the edges to point to the copies, where appropriate.  */
     949              : 
     950       775066 :   FOR_EACH_BB_FN (bb, cfun)
     951       718401 :     if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
     952              :       {
     953       323399 :         basic_block src = bb;
     954       323399 :         if (bitmap_bit_p (bb_with, bb->index))
     955        40400 :           src = (basic_block) bb->aux;
     956              : 
     957       800284 :         FOR_EACH_EDGE (e, ei, src->succs)
     958              :           {
     959       476885 :             if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
     960        89755 :               continue;
     961              : 
     962       387130 :             if (bitmap_bit_p (bb_with, e->dest->index)
     963       387130 :                 && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
     964              :               {
     965        56834 :                 if (dump_file)
     966           22 :                   fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
     967           22 :                            e->src->index, e->dest->index,
     968           22 :                            ((basic_block) e->dest->aux)->index);
     969        56834 :                 redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
     970              :               }
     971       330296 :             else if (e->flags & EDGE_FALLTHRU
     972       330296 :                      && bitmap_bit_p (bb_with, bb->index))
     973          202 :               force_nonfallthru (e);
     974              :           }
     975              :       }
     976              : 
     977              :   /* Also redirect the function entry edge if necessary.  */
     978              : 
     979       113330 :   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
     980        56665 :     if (bitmap_bit_p (bb_with, e->dest->index)
     981        56665 :         && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
     982              :       {
     983           11 :         basic_block split_bb = split_edge (e);
     984           11 :         e = single_succ_edge (split_bb);
     985           11 :         redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
     986              :       }
     987              : 
     988              :   /* Make a simple_return for those exits that run without prologue.  */
     989              : 
     990       775077 :   FOR_EACH_BB_REVERSE_FN (bb, cfun)
     991       718412 :     if (!bitmap_bit_p (bb_with, bb->index))
     992       720594 :       FOR_EACH_EDGE (e, ei, bb->succs)
     993       435812 :         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
     994        58829 :           handle_simple_exit (e);
     995              : 
     996              :   /* Finally, we want a single edge to put the prologue on.  Make a new
     997              :      block before the PRO block; the edge beteen them is the edge we want.
     998              :      Then redirect those edges into PRO that come from blocks without the
     999              :      prologue, to point to the new block instead.  The new prologue block
    1000              :      is put at the end of the insn chain.  */
    1001              : 
    1002        56665 :   basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
    1003        56665 :   BB_COPY_PARTITION (new_bb, pro);
    1004        56665 :   new_bb->count = profile_count::zero ();
    1005        56665 :   if (dump_file)
    1006           23 :     fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
    1007              : 
    1008       142007 :   for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
    1009              :     {
    1010        85342 :       if (bitmap_bit_p (bb_with, e->src->index)
    1011        85342 :           || dominated_by_p (CDI_DOMINATORS, e->src, pro))
    1012              :         {
    1013         1159 :           ei_next (&ei);
    1014         1159 :           continue;
    1015              :         }
    1016              : 
    1017        84183 :       new_bb->count += e->count ();
    1018              : 
    1019        84183 :       redirect_edge_and_branch_force (e, new_bb);
    1020        84183 :       if (dump_file)
    1021           26 :         fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
    1022              :     }
    1023              : 
    1024        56665 :   *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
    1025        56665 :   force_nonfallthru (*entry_edge);
    1026              : 
    1027        56665 :   free_dominance_info (CDI_DOMINATORS);
    1028       651879 : }
    1029              : 
    1030              : /* Separate shrink-wrapping
    1031              : 
    1032              :    Instead of putting all of the prologue and epilogue in one spot, we
    1033              :    can put parts of it in places where those components are executed less
    1034              :    frequently.  The following code does this, for prologue and epilogue
    1035              :    components that can be put in more than one location, and where those
    1036              :    components can be executed more than once (the epilogue component will
    1037              :    always be executed before the prologue component is executed a second
    1038              :    time).
    1039              : 
    1040              :    What exactly is a component is target-dependent.  The more usual
    1041              :    components are simple saves/restores to/from the frame of callee-saved
    1042              :    registers.  This code treats components abstractly (as an sbitmap),
    1043              :    letting the target handle all details.
    1044              : 
    1045              :    Prologue components are placed in such a way that for every component
    1046              :    the prologue is executed as infrequently as possible.  We do this by
    1047              :    walking the dominator tree, comparing the cost of placing a prologue
    1048              :    component before a block to the sum of costs determined for all subtrees
    1049              :    of that block.
    1050              : 
    1051              :    From this placement, we then determine for each component all blocks
    1052              :    where at least one of this block's dominators (including itself) will
    1053              :    get a prologue inserted.  That then is how the components are placed.
    1054              :    We could place the epilogue components a bit smarter (we can save a
    1055              :    bit of code size sometimes); this is a possible future improvement.
    1056              : 
    1057              :    Prologues and epilogues are preferably placed into a block, either at
    1058              :    the beginning or end of it, if it is needed for all predecessor resp.
    1059              :    successor edges; or placed on the edge otherwise.
    1060              : 
    1061              :    If the placement of any prologue/epilogue leads to a situation we cannot
    1062              :    handle (for example, an abnormal edge would need to be split, or some
    1063              :    targets want to use some specific registers that may not be available
    1064              :    where we want to put them), separate shrink-wrapping for the components
    1065              :    in that prologue/epilogue is aborted.  */
    1066              : 
    1067              : 
    1068              : /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
    1069              :    label LABEL.  */
    1070              : static void
    1071         1671 : dump_components (const char *label, sbitmap components)
    1072              : {
    1073         1671 :   if (bitmap_empty_p (components))
    1074              :     return;
    1075              : 
    1076         1024 :   fprintf (dump_file, " [%s", label);
    1077              : 
    1078        95232 :   for (unsigned int j = 0; j < components->n_bits; j++)
    1079        94208 :     if (bitmap_bit_p (components, j))
    1080         3530 :       fprintf (dump_file, " %u", j);
    1081              : 
    1082         1024 :   fprintf (dump_file, "]");
    1083              : }
    1084              : 
    1085              : /* The data we collect for each bb.  */
    1086              : struct sw {
    1087              :   /* What components does this BB need?  */
    1088              :   sbitmap needs_components;
    1089              : 
    1090              :   /* What components does this BB have?  This is the main decision this
    1091              :      pass makes.  */
    1092              :   sbitmap has_components;
    1093              : 
    1094              :   /* The components for which we placed code at the start of the BB (instead
    1095              :      of on all incoming edges).  */
    1096              :   sbitmap head_components;
    1097              : 
    1098              :   /* The components for which we placed code at the end of the BB (instead
    1099              :      of on all outgoing edges).  */
    1100              :   sbitmap tail_components;
    1101              : 
    1102              :   /* The frequency of executing the prologue for this BB, if a prologue is
    1103              :      placed on this BB.  This is a pessimistic estimate (no prologue is
    1104              :      needed for edges from blocks that have the component under consideration
    1105              :      active already).  */
    1106              :   gcov_type own_cost;
    1107              : 
    1108              :   /* The frequency of executing the prologue for this BB and all BBs
    1109              :      dominated by it.  */
    1110              :   gcov_type total_cost;
    1111              : };
    1112              : 
    1113              : /* A helper function for accessing the pass-specific info.  */
    1114              : static inline struct sw *
    1115    414498622 : SW (basic_block bb)
    1116              : {
    1117    414498622 :   gcc_assert (bb->aux);
    1118    414498622 :   return (struct sw *) bb->aux;
    1119              : }
    1120              : 
    1121              : /* Create the pass-specific data structures for separately shrink-wrapping
    1122              :    with components COMPONENTS.  */
    1123              : static void
    1124       732301 : init_separate_shrink_wrap (sbitmap components)
    1125              : {
    1126       732301 :   basic_block bb;
    1127     10133832 :   FOR_ALL_BB_FN (bb, cfun)
    1128              :     {
    1129      9401531 :       bb->aux = xcalloc (1, sizeof (struct sw));
    1130              : 
    1131      9401531 :       SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
    1132              : 
    1133              :       /* Mark all basic blocks without successor as needing all components.
    1134              :          This avoids problems in at least cfgcleanup, sel-sched, and
    1135              :          regrename (largely to do with all paths to such a block still
    1136              :          needing the same dwarf CFI info).  */
    1137      9401531 :       if (EDGE_COUNT (bb->succs) == 0)
    1138      1137450 :         bitmap_copy (SW (bb)->needs_components, components);
    1139              : 
    1140      9401531 :       if (dump_file)
    1141              :         {
    1142          630 :           fprintf (dump_file, "bb %d components:", bb->index);
    1143          630 :           dump_components ("has", SW (bb)->needs_components);
    1144          630 :           fprintf (dump_file, "\n");
    1145              :         }
    1146              : 
    1147      9401531 :       SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
    1148      9401531 :       SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
    1149      9401531 :       SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
    1150      9401531 :       bitmap_clear (SW (bb)->has_components);
    1151              :     }
    1152       732301 : }
    1153              : 
    1154              : /* Destroy the pass-specific data.  */
    1155              : static void
    1156       732301 : fini_separate_shrink_wrap (void)
    1157              : {
    1158       732301 :   basic_block bb;
    1159     10213139 :   FOR_ALL_BB_FN (bb, cfun)
    1160      9480838 :     if (bb->aux)
    1161              :       {
    1162      9401531 :         sbitmap_free (SW (bb)->needs_components);
    1163      9401531 :         sbitmap_free (SW (bb)->has_components);
    1164      9401531 :         sbitmap_free (SW (bb)->head_components);
    1165      9401531 :         sbitmap_free (SW (bb)->tail_components);
    1166      9401531 :         free (bb->aux);
    1167      9401531 :         bb->aux = 0;
    1168              :       }
    1169       732301 : }
    1170              : 
    1171              : /* Place the prologue for component WHICH, in the basic blocks dominated
    1172              :    by HEAD.  Do a DFS over the dominator tree, and set bit WHICH in the
    1173              :    HAS_COMPONENTS of a block if either the block has that bit set in
    1174              :    NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
    1175              :    dominator subtrees separately.  */
    1176              : static void
    1177       587482 : place_prologue_for_one_component (unsigned int which, basic_block head)
    1178              : {
    1179              :   /* The block we are currently dealing with.  */
    1180       587482 :   basic_block bb = head;
    1181              :   /* Is this the first time we visit this block, i.e. have we just gone
    1182              :      down the tree.  */
    1183       587482 :   bool first_visit = true;
    1184              : 
    1185              :   /* Walk the dominator tree, visit one block per iteration of this loop.
    1186              :      Each basic block is visited twice: once before visiting any children
    1187              :      of the block, and once after visiting all of them (leaf nodes are
    1188              :      visited only once).  As an optimization, we do not visit subtrees
    1189              :      that can no longer influence the prologue placement.  */
    1190      5481433 :   for (;;)
    1191              :     {
    1192              :       /* First visit of a block: set the (children) cost accumulator to zero;
    1193              :          if the block does not have the component itself, walk down.  */
    1194      5481433 :       if (first_visit)
    1195              :         {
    1196              :           /* Initialize the cost.  The cost is the block execution frequency
    1197              :              that does not come from backedges.  Calculating this by simply
    1198              :              adding the cost of all edges that aren't backedges does not
    1199              :              work: this does not always add up to the block frequency at
    1200              :              all, and even if it does, rounding error makes for bad
    1201              :              decisions.  */
    1202      3725997 :           SW (bb)->own_cost = bb->count.to_frequency (cfun);
    1203              : 
    1204      3725997 :           edge e;
    1205      3725997 :           edge_iterator ei;
    1206      9503327 :           FOR_EACH_EDGE (e, ei, bb->preds)
    1207      5777330 :             if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
    1208              :               {
    1209       219975 :                 if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
    1210       204180 :                   SW (bb)->own_cost -= EDGE_FREQUENCY (e);
    1211              :                 else
    1212        15795 :                   SW (bb)->own_cost = 0;
    1213              :               }
    1214              : 
    1215      3725997 :           SW (bb)->total_cost = 0;
    1216              : 
    1217      3725997 :           if (!bitmap_bit_p (SW (bb)->needs_components, which)
    1218      3725997 :               && first_dom_son (CDI_DOMINATORS, bb))
    1219              :             {
    1220      1755436 :               bb = first_dom_son (CDI_DOMINATORS, bb);
    1221      1755436 :               continue;
    1222              :             }
    1223              :         }
    1224              : 
    1225              :       /* If this block does need the component itself, or it is cheaper to
    1226              :          put the prologue here than in all the descendants that need it,
    1227              :          mark it so.  If this block's immediate post-dominator is dominated
    1228              :          by this block, and that needs the prologue, we can put it on this
    1229              :          block as well (earlier is better).  */
    1230      3725997 :       if (bitmap_bit_p (SW (bb)->needs_components, which)
    1231      3725997 :           || SW (bb)->total_cost > SW (bb)->own_cost)
    1232              :         {
    1233      1056000 :           SW (bb)->total_cost = SW (bb)->own_cost;
    1234      1056000 :           bitmap_set_bit (SW (bb)->has_components, which);
    1235              :         }
    1236              :       else
    1237              :         {
    1238      2669997 :           basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
    1239      2669997 :           if (dominated_by_p (CDI_DOMINATORS, kid, bb)
    1240      2669997 :               && bitmap_bit_p (SW (kid)->has_components, which))
    1241              :             {
    1242       164499 :               SW (bb)->total_cost = SW (bb)->own_cost;
    1243       164499 :               bitmap_set_bit (SW (bb)->has_components, which);
    1244              :             }
    1245              :         }
    1246              : 
    1247              :       /* We are back where we started, so we are done now.  */
    1248      3725997 :       if (bb == head)
    1249       587482 :         return;
    1250              : 
    1251              :       /* We now know the cost of the subtree rooted at the current block.
    1252              :          Accumulate this cost in the parent.  */
    1253      3138515 :       basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
    1254      3138515 :       SW (parent)->total_cost += SW (bb)->total_cost;
    1255              : 
    1256              :       /* Don't walk the tree down unless necessary.  */
    1257      3138515 :       if (next_dom_son (CDI_DOMINATORS, bb)
    1258      3138515 :           && SW (parent)->total_cost <= SW (parent)->own_cost)
    1259              :         {
    1260      1383079 :           bb = next_dom_son (CDI_DOMINATORS, bb);
    1261      1383079 :           first_visit = true;
    1262              :         }
    1263              :       else
    1264              :         {
    1265              :           bb = parent;
    1266              :           first_visit = false;
    1267              :         }
    1268              :     }
    1269              : }
    1270              : 
    1271              : /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
    1272              :    setting it on any path from entry to exit where it was not already set
    1273              :    somewhere (or, for blocks that have no path to the exit, consider only
    1274              :    paths from the entry to the block itself).  Return whether any changes
    1275              :    were made to some HAS_COMPONENTS.  */
    1276              : static bool
    1277       912680 : spread_components (sbitmap components)
    1278              : {
    1279       912680 :   basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
    1280       912680 :   basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
    1281              : 
    1282              :   /* A stack of all blocks left to consider, and a bitmap of all blocks
    1283              :      on that stack.  */
    1284       912680 :   vec<basic_block> todo;
    1285       912680 :   todo.create (n_basic_blocks_for_fn (cfun));
    1286       912680 :   auto_bitmap seen;
    1287              : 
    1288       912680 :   auto_sbitmap old (SBITMAP_SIZE (components));
    1289              : 
    1290              :   /* Find for every block the components that are *not* needed on some path
    1291              :      from the entry to that block.  Do this with a flood fill from the entry
    1292              :      block.  Every block can be visited at most as often as the number of
    1293              :      components (plus one), and usually much less often.  */
    1294              : 
    1295       912680 :   if (dump_file)
    1296           79 :     fprintf (dump_file, "Spreading down...\n");
    1297              : 
    1298       912680 :   basic_block bb;
    1299     18457884 :   FOR_ALL_BB_FN (bb, cfun)
    1300     17545204 :     bitmap_clear (SW (bb)->head_components);
    1301              : 
    1302       912680 :   bitmap_copy (SW (entry_block)->head_components, components);
    1303              : 
    1304       912680 :   edge e;
    1305       912680 :   edge_iterator ei;
    1306              : 
    1307       912680 :   todo.quick_push (single_succ (entry_block));
    1308       912680 :   bitmap_set_bit (seen, single_succ (entry_block)->index);
    1309      6131553 :   while (!todo.is_empty ())
    1310              :     {
    1311      4306193 :       bb = todo.pop ();
    1312              : 
    1313      4306193 :       bitmap_copy (old, SW (bb)->head_components);
    1314              : 
    1315     14700982 :       FOR_EACH_EDGE (e, ei, bb->preds)
    1316     10394789 :         bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
    1317     10394789 :                     SW (e->src)->head_components);
    1318              : 
    1319      8612386 :       bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
    1320      4306193 :                         SW (bb)->has_components);
    1321              : 
    1322      4306193 :       if (!bitmap_equal_p (old, SW (bb)->head_components))
    1323      6190945 :         FOR_EACH_EDGE (e, ei, bb->succs)
    1324      3692936 :           if (bitmap_set_bit (seen, e->dest->index))
    1325      3393513 :             todo.quick_push (e->dest);
    1326              : 
    1327      4306193 :       bitmap_clear_bit (seen, bb->index);
    1328              :     }
    1329              : 
    1330              :   /* Find for every block the components that are *not* needed on some reverse
    1331              :      path from the exit to that block.  */
    1332              : 
    1333       912680 :   if (dump_file)
    1334           79 :     fprintf (dump_file, "Spreading up...\n");
    1335              : 
    1336              :   /* First, mark all blocks not reachable from the exit block as not needing
    1337              :      any component on any path to the exit.  Mark everything, and then clear
    1338              :      again by a flood fill.  */
    1339              : 
    1340     18457884 :   FOR_ALL_BB_FN (bb, cfun)
    1341     17545204 :     bitmap_copy (SW (bb)->tail_components, components);
    1342              : 
    1343      1913127 :   FOR_EACH_EDGE (e, ei, exit_block->preds)
    1344              :     {
    1345      1000447 :       todo.quick_push (e->src);
    1346      1000447 :       bitmap_set_bit (seen, e->src->index);
    1347              :     }
    1348              : 
    1349     16025707 :   while (!todo.is_empty ())
    1350              :     {
    1351     15113027 :       bb = todo.pop ();
    1352              : 
    1353     15113027 :       if (!bitmap_empty_p (SW (bb)->tail_components))
    1354     26731439 :         FOR_EACH_EDGE (e, ei, bb->preds)
    1355     15394233 :           if (bitmap_set_bit (seen, e->src->index))
    1356     14112580 :             todo.quick_push (e->src);
    1357              : 
    1358     15113027 :       bitmap_clear (SW (bb)->tail_components);
    1359              : 
    1360     15113027 :       bitmap_clear_bit (seen, bb->index);
    1361              :     }
    1362              : 
    1363              :   /* And then, flood fill backwards to find for every block the components
    1364              :      not needed on some path to the exit.  */
    1365              : 
    1366       912680 :   bitmap_copy (SW (exit_block)->tail_components, components);
    1367              : 
    1368      1913127 :   FOR_EACH_EDGE (e, ei, exit_block->preds)
    1369              :     {
    1370      1000447 :       todo.quick_push (e->src);
    1371      1000447 :       bitmap_set_bit (seen, e->src->index);
    1372              :     }
    1373              : 
    1374      9094898 :   while (!todo.is_empty ())
    1375              :     {
    1376      8182218 :       bb = todo.pop ();
    1377              : 
    1378      8182218 :       bitmap_copy (old, SW (bb)->tail_components);
    1379              : 
    1380     21812900 :       FOR_EACH_EDGE (e, ei, bb->succs)
    1381     13630682 :         bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
    1382     13630682 :                     SW (e->dest)->tail_components);
    1383              : 
    1384     16364436 :       bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
    1385      8182218 :                         SW (bb)->has_components);
    1386              : 
    1387      8182218 :       if (!bitmap_equal_p (old, SW (bb)->tail_components))
    1388     13457858 :         FOR_EACH_EDGE (e, ei, bb->preds)
    1389      7835093 :           if (bitmap_set_bit (seen, e->src->index))
    1390      7181771 :             todo.quick_push (e->src);
    1391              : 
    1392      8182218 :       bitmap_clear_bit (seen, bb->index);
    1393              :     }
    1394              : 
    1395       912680 :   todo.release ();
    1396              : 
    1397              :   /* Finally, mark everything not needed both forwards and backwards.  */
    1398              : 
    1399       912680 :   bool did_changes = false;
    1400              : 
    1401     16632524 :   FOR_EACH_BB_FN (bb, cfun)
    1402              :     {
    1403     15719844 :       bitmap_copy (old, SW (bb)->has_components);
    1404              : 
    1405     31439688 :       bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
    1406     15719844 :                   SW (bb)->tail_components);
    1407     31439688 :       bitmap_and_compl (SW (bb)->has_components, components,
    1408     15719844 :                         SW (bb)->head_components);
    1409              : 
    1410     15719844 :       if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components))
    1411              :         did_changes = true;
    1412              :     }
    1413              : 
    1414     18457884 :   FOR_ALL_BB_FN (bb, cfun)
    1415              :     {
    1416     17545204 :       if (dump_file)
    1417              :         {
    1418          942 :           fprintf (dump_file, "bb %d components:", bb->index);
    1419          942 :           dump_components ("has", SW (bb)->has_components);
    1420          942 :           fprintf (dump_file, "\n");
    1421              :         }
    1422              :     }
    1423              : 
    1424       912680 :   return did_changes;
    1425       912680 : }
    1426              : 
    1427              : /* If we cannot handle placing some component's prologues or epilogues where
    1428              :    we decided we should place them, unmark that component in COMPONENTS so
    1429              :    that it is not wrapped separately.  */
    1430              : static void
    1431       732301 : disqualify_problematic_components (sbitmap components)
    1432              : {
    1433       732301 :   auto_sbitmap pro (SBITMAP_SIZE (components));
    1434       732301 :   auto_sbitmap epi (SBITMAP_SIZE (components));
    1435              : 
    1436       732301 :   basic_block bb;
    1437      8669230 :   FOR_EACH_BB_FN (bb, cfun)
    1438              :     {
    1439      7936929 :       edge e;
    1440      7936929 :       edge_iterator ei;
    1441     19367786 :       FOR_EACH_EDGE (e, ei, bb->succs)
    1442              :         {
    1443              :           /* Find which components we want pro/epilogues for here.  */
    1444     11430857 :           bitmap_and_compl (epi, SW (e->src)->has_components,
    1445     11430857 :                             SW (e->dest)->has_components);
    1446     11430857 :           bitmap_and_compl (pro, SW (e->dest)->has_components,
    1447     11430857 :                             SW (e->src)->has_components);
    1448              : 
    1449              :           /* Ask the target what it thinks about things.  */
    1450     11430857 :           if (!bitmap_empty_p (epi))
    1451       304114 :             targetm.shrink_wrap.disqualify_components (components, e, epi,
    1452              :                                                        false);
    1453     11430857 :           if (!bitmap_empty_p (pro))
    1454       175182 :             targetm.shrink_wrap.disqualify_components (components, e, pro,
    1455              :                                                        true);
    1456              : 
    1457              :           /* If this edge doesn't need splitting, we're fine.  */
    1458     11430857 :           if (single_pred_p (e->dest)
    1459     11430857 :               && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
    1460      5299103 :             continue;
    1461              : 
    1462              :           /* If the edge can be split, that is fine too.  */
    1463      6131754 :           if ((e->flags & EDGE_ABNORMAL) == 0)
    1464      5808891 :             continue;
    1465              : 
    1466              :           /* We also can handle sibcalls.  */
    1467       322863 :           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
    1468              :             {
    1469       120165 :               gcc_assert (e->flags & EDGE_SIBCALL);
    1470       120165 :               continue;
    1471              :             }
    1472              : 
    1473              :           /* Remove from consideration those components we would need
    1474              :              pro/epilogues for on edges where we cannot insert them.  */
    1475       202698 :           bitmap_and_compl (components, components, epi);
    1476       202698 :           bitmap_and_compl (components, components, pro);
    1477              : 
    1478       202698 :           if (dump_file && !bitmap_subset_p (epi, components))
    1479              :             {
    1480            0 :               fprintf (dump_file, "  BAD epi %d->%d", e->src->index,
    1481            0 :                        e->dest->index);
    1482            0 :               if (e->flags & EDGE_EH)
    1483            0 :                 fprintf (dump_file, " for EH");
    1484            0 :               dump_components ("epi", epi);
    1485            0 :               fprintf (dump_file, "\n");
    1486              :             }
    1487              : 
    1488       202698 :           if (dump_file && !bitmap_subset_p (pro, components))
    1489              :             {
    1490            0 :               fprintf (dump_file, "  BAD pro %d->%d", e->src->index,
    1491            0 :                        e->dest->index);
    1492            0 :               if (e->flags & EDGE_EH)
    1493            0 :                 fprintf (dump_file, " for EH");
    1494            0 :               dump_components ("pro", pro);
    1495            0 :               fprintf (dump_file, "\n");
    1496              :             }
    1497              :         }
    1498              :     }
    1499       732301 : }
    1500              : 
    1501              : /* Place code for prologues and epilogues for COMPONENTS where we can put
    1502              :    that code at the start of basic blocks.  */
    1503              : static void
    1504        44943 : emit_common_heads_for_components (sbitmap components)
    1505              : {
    1506        44943 :   auto_sbitmap pro (SBITMAP_SIZE (components));
    1507        44943 :   auto_sbitmap epi (SBITMAP_SIZE (components));
    1508        44943 :   auto_sbitmap tmp (SBITMAP_SIZE (components));
    1509              : 
    1510        44943 :   basic_block bb;
    1511      1851126 :   FOR_ALL_BB_FN (bb, cfun)
    1512      1806183 :     bitmap_clear (SW (bb)->head_components);
    1513              : 
    1514      1761240 :   FOR_EACH_BB_FN (bb, cfun)
    1515              :     {
    1516              :       /* Find which prologue resp. epilogue components are needed for all
    1517              :          predecessor edges to this block.  */
    1518              : 
    1519              :       /* First, select all possible components.  */
    1520      1716297 :       bitmap_copy (epi, components);
    1521      1716297 :       bitmap_copy (pro, components);
    1522              : 
    1523      1716297 :       edge e;
    1524      1716297 :       edge_iterator ei;
    1525      4199246 :       FOR_EACH_EDGE (e, ei, bb->preds)
    1526              :         {
    1527      2514418 :           if (e->flags & EDGE_ABNORMAL)
    1528              :             {
    1529        31469 :               bitmap_clear (epi);
    1530        31469 :               bitmap_clear (pro);
    1531        31469 :               break;
    1532              :             }
    1533              : 
    1534              :           /* Deselect those epilogue components that should not be inserted
    1535              :              for this edge.  */
    1536      2482949 :           bitmap_and_compl (tmp, SW (e->src)->has_components,
    1537      2482949 :                             SW (e->dest)->has_components);
    1538      2482949 :           bitmap_and (epi, epi, tmp);
    1539              : 
    1540              :           /* Similar, for the prologue.  */
    1541      2482949 :           bitmap_and_compl (tmp, SW (e->dest)->has_components,
    1542      2482949 :                             SW (e->src)->has_components);
    1543      2482949 :           bitmap_and (pro, pro, tmp);
    1544              :         }
    1545              : 
    1546      1716297 :       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
    1547            6 :         fprintf (dump_file, "  bb %d", bb->index);
    1548              : 
    1549      1716297 :       if (dump_file && !bitmap_empty_p (epi))
    1550            0 :         dump_components ("epi", epi);
    1551      1716297 :       if (dump_file && !bitmap_empty_p (pro))
    1552            6 :         dump_components ("pro", pro);
    1553              : 
    1554      1716297 :       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
    1555            6 :         fprintf (dump_file, "\n");
    1556              : 
    1557              :       /* Place code after the BB note.  */
    1558      1716297 :       if (!bitmap_empty_p (pro))
    1559              :         {
    1560        78323 :           start_sequence ();
    1561        78323 :           targetm.shrink_wrap.emit_prologue_components (pro);
    1562        78323 :           rtx_insn *seq = end_sequence ();
    1563        78323 :           record_prologue_seq (seq);
    1564              : 
    1565        78323 :           emit_insn_after (seq, bb_note (bb));
    1566              : 
    1567        78323 :           bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
    1568              :         }
    1569              : 
    1570      1716297 :       if (!bitmap_empty_p (epi))
    1571              :         {
    1572            0 :           start_sequence ();
    1573            0 :           targetm.shrink_wrap.emit_epilogue_components (epi);
    1574            0 :           rtx_insn *seq = end_sequence ();
    1575            0 :           record_epilogue_seq (seq);
    1576              : 
    1577            0 :           emit_insn_after (seq, bb_note (bb));
    1578              : 
    1579            0 :           bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
    1580              :         }
    1581              :     }
    1582        44943 : }
    1583              : 
    1584              : /* Place code for prologues and epilogues for COMPONENTS where we can put
    1585              :    that code at the end of basic blocks.  */
    1586              : static void
    1587        44943 : emit_common_tails_for_components (sbitmap components)
    1588              : {
    1589        44943 :   auto_sbitmap pro (SBITMAP_SIZE (components));
    1590        44943 :   auto_sbitmap epi (SBITMAP_SIZE (components));
    1591        44943 :   auto_sbitmap tmp (SBITMAP_SIZE (components));
    1592              : 
    1593        44943 :   basic_block bb;
    1594      1851126 :   FOR_ALL_BB_FN (bb, cfun)
    1595      1806183 :     bitmap_clear (SW (bb)->tail_components);
    1596              : 
    1597      1761240 :   FOR_EACH_BB_FN (bb, cfun)
    1598              :     {
    1599              :       /* Find which prologue resp. epilogue components are needed for all
    1600              :          successor edges from this block.  */
    1601      1716297 :       if (EDGE_COUNT (bb->succs) == 0)
    1602        58626 :         continue;
    1603              : 
    1604              :       /* First, select all possible components.  */
    1605      1657671 :       bitmap_copy (epi, components);
    1606      1657671 :       bitmap_copy (pro, components);
    1607              : 
    1608      1657671 :       edge e;
    1609      1657671 :       edge_iterator ei;
    1610      4113703 :       FOR_EACH_EDGE (e, ei, bb->succs)
    1611              :         {
    1612      2516150 :           if (e->flags & EDGE_ABNORMAL)
    1613              :             {
    1614        60118 :               bitmap_clear (epi);
    1615        60118 :               bitmap_clear (pro);
    1616        60118 :               break;
    1617              :             }
    1618              : 
    1619              :           /* Deselect those epilogue components that should not be inserted
    1620              :              for this edge, and also those that are already put at the head
    1621              :              of the successor block.  */
    1622      2456032 :           bitmap_and_compl (tmp, SW (e->src)->has_components,
    1623      2456032 :                             SW (e->dest)->has_components);
    1624      2456032 :           bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
    1625      2456032 :           bitmap_and (epi, epi, tmp);
    1626              : 
    1627              :           /* Similarly, for the prologue.  */
    1628      2456032 :           bitmap_and_compl (tmp, SW (e->dest)->has_components,
    1629      2456032 :                             SW (e->src)->has_components);
    1630      2456032 :           bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
    1631      2456032 :           bitmap_and (pro, pro, tmp);
    1632              :         }
    1633              : 
    1634              :       /* If the last insn of this block is a control flow insn we cannot
    1635              :          put anything after it.  We can put our code before it instead,
    1636              :          but only if that jump insn is a simple jump.  */
    1637      1657671 :       rtx_insn *last_insn = BB_END (bb);
    1638      1657671 :       if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
    1639              :         {
    1640       887639 :           bitmap_clear (epi);
    1641       887639 :           bitmap_clear (pro);
    1642              :         }
    1643              : 
    1644      1657671 :       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
    1645            7 :         fprintf (dump_file, "  bb %d", bb->index);
    1646              : 
    1647      1657671 :       if (dump_file && !bitmap_empty_p (epi))
    1648            7 :         dump_components ("epi", epi);
    1649      1657671 :       if (dump_file && !bitmap_empty_p (pro))
    1650            0 :         dump_components ("pro", pro);
    1651              : 
    1652      1657671 :       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
    1653            7 :         fprintf (dump_file, "\n");
    1654              : 
    1655              :       /* Put the code at the end of the BB, but before any final jump.  */
    1656      1657671 :       if (!bitmap_empty_p (epi))
    1657              :         {
    1658        60940 :           start_sequence ();
    1659        60940 :           targetm.shrink_wrap.emit_epilogue_components (epi);
    1660        60940 :           rtx_insn *seq = end_sequence ();
    1661        60940 :           record_epilogue_seq (seq);
    1662              : 
    1663        60940 :           if (control_flow_insn_p (last_insn))
    1664        41837 :             emit_insn_before (seq, last_insn);
    1665              :           else
    1666        19103 :             emit_insn_after (seq, last_insn);
    1667              : 
    1668        60940 :           bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
    1669              :         }
    1670              : 
    1671      1657671 :       if (!bitmap_empty_p (pro))
    1672              :         {
    1673         1788 :           start_sequence ();
    1674         1788 :           targetm.shrink_wrap.emit_prologue_components (pro);
    1675         1788 :           rtx_insn *seq = end_sequence ();
    1676         1788 :           record_prologue_seq (seq);
    1677              : 
    1678         1788 :           if (control_flow_insn_p (last_insn))
    1679         1485 :             emit_insn_before (seq, last_insn);
    1680              :           else
    1681          303 :             emit_insn_after (seq, last_insn);
    1682              : 
    1683         1788 :           bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
    1684              :         }
    1685              :     }
    1686        44943 : }
    1687              : 
    1688              : /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
    1689              :    placed them inside blocks directly.  */
    1690              : static void
    1691        44943 : insert_prologue_epilogue_for_components (sbitmap components)
    1692              : {
    1693        44943 :   auto_sbitmap pro (SBITMAP_SIZE (components));
    1694        44943 :   auto_sbitmap epi (SBITMAP_SIZE (components));
    1695              : 
    1696        44943 :   basic_block bb;
    1697      1761265 :   FOR_EACH_BB_FN (bb, cfun)
    1698              :     {
    1699      1716322 :       if (!bb->aux)
    1700           25 :         continue;
    1701              : 
    1702      1716297 :       edge e;
    1703      1716297 :       edge_iterator ei;
    1704      4260346 :       FOR_EACH_EDGE (e, ei, bb->succs)
    1705              :         {
    1706              :           /* Find which pro/epilogue components are needed on this edge.  */
    1707      2544049 :           bitmap_and_compl (epi, SW (e->src)->has_components,
    1708      2544049 :                             SW (e->dest)->has_components);
    1709      2544049 :           bitmap_and_compl (pro, SW (e->dest)->has_components,
    1710      2544049 :                             SW (e->src)->has_components);
    1711      2544049 :           bitmap_and (epi, epi, components);
    1712      2544049 :           bitmap_and (pro, pro, components);
    1713              : 
    1714              :           /* Deselect those we already have put at the head or tail of the
    1715              :              edge's dest resp. src.  */
    1716      2544049 :           bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
    1717      2544049 :           bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
    1718      2544049 :           bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
    1719      2544049 :           bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
    1720              : 
    1721      2544049 :           if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
    1722              :             {
    1723        90291 :               if (dump_file)
    1724              :                 {
    1725           39 :                   fprintf (dump_file, "  %d->%d", e->src->index,
    1726           39 :                            e->dest->index);
    1727           39 :                   dump_components ("epi", epi);
    1728           39 :                   dump_components ("pro", pro);
    1729           39 :                   if (e->flags & EDGE_SIBCALL)
    1730            0 :                     fprintf (dump_file, "  (SIBCALL)");
    1731           39 :                   else if (e->flags & EDGE_ABNORMAL)
    1732            0 :                     fprintf (dump_file, "  (ABNORMAL)");
    1733           39 :                   fprintf (dump_file, "\n");
    1734              :                 }
    1735              : 
    1736              :               /* Put the epilogue components in place.  */
    1737        90291 :               start_sequence ();
    1738        90291 :               targetm.shrink_wrap.emit_epilogue_components (epi);
    1739        90291 :               rtx_insn *seq = end_sequence ();
    1740        90291 :               record_epilogue_seq (seq);
    1741              : 
    1742        90291 :               if (e->flags & EDGE_SIBCALL)
    1743              :                 {
    1744         2984 :                   gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
    1745              : 
    1746         2984 :                   rtx_insn *insn = BB_END (e->src);
    1747         2984 :                   gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
    1748         2984 :                   emit_insn_before (seq, insn);
    1749              :                 }
    1750        87307 :               else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
    1751              :                 {
    1752           25 :                   gcc_assert (e->flags & EDGE_FALLTHRU);
    1753           25 :                   basic_block new_bb = split_edge (e);
    1754           25 :                   emit_insn_after (seq, BB_END (new_bb));
    1755              :                 }
    1756              :               else
    1757        87282 :                 insert_insn_on_edge (seq, e);
    1758              : 
    1759              :               /* Put the prologue components in place.  */
    1760        90291 :               start_sequence ();
    1761        90291 :               targetm.shrink_wrap.emit_prologue_components (pro);
    1762        90291 :               seq = end_sequence ();
    1763        90291 :               record_prologue_seq (seq);
    1764              : 
    1765        90291 :               insert_insn_on_edge (seq, e);
    1766              :             }
    1767              :         }
    1768              :     }
    1769              : 
    1770        44943 :   commit_edge_insertions ();
    1771        44943 : }
    1772              : 
    1773              : bool
    1774      1480948 : use_shrink_wrapping_separate (void)
    1775              : {
    1776      1480948 :   if (!(SHRINK_WRAPPING_ENABLED && flag_shrink_wrap_separate
    1777      1039981 :         && optimize_function_for_speed_p (cfun)
    1778       975530 :         && targetm.shrink_wrap.get_separate_components))
    1779       505418 :     return false;
    1780              : 
    1781              :   /* We don't handle "strange" functions.  */
    1782       975530 :   if (cfun->calls_alloca
    1783       965946 :       || cfun->calls_setjmp
    1784       965150 :       || cfun->can_throw_non_call_exceptions
    1785       733116 :       || crtl->calls_eh_return
    1786       733093 :       || crtl->has_nonlocal_goto
    1787       732793 :       || crtl->saves_all_registers)
    1788              :     return false;
    1789              : 
    1790              :   return true;
    1791              : }
    1792              : 
    1793              : /* The main entry point to this subpass.  FIRST_BB is where the prologue
    1794              :    would be normally put.  */
    1795              : void
    1796      1480948 : try_shrink_wrapping_separate (basic_block first_bb)
    1797              : {
    1798      1480948 :   if (!use_shrink_wrapping_separate ())
    1799       748647 :     return;
    1800              : 
    1801              :   /* Ask the target what components there are.  If it returns NULL, don't
    1802              :      do anything.  */
    1803       732301 :   sbitmap components = targetm.shrink_wrap.get_separate_components ();
    1804       732301 :   if (!components)
    1805              :     return;
    1806              : 
    1807              :   /* We need LIVE info, not defining anything in the entry block and not
    1808              :      using anything in the exit block.  A block then needs a component if
    1809              :      the register for that component is in the IN or GEN or KILL set for
    1810              :      that block.  */
    1811       732301 :   df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
    1812       732301 :   df_update_entry_exit_and_calls ();
    1813       732301 :   df_live_add_problem ();
    1814       732301 :   df_live_set_all_dirty ();
    1815       732301 :   df_analyze ();
    1816              : 
    1817       732301 :   calculate_dominance_info (CDI_DOMINATORS);
    1818       732301 :   calculate_dominance_info (CDI_POST_DOMINATORS);
    1819              : 
    1820       732301 :   init_separate_shrink_wrap (components);
    1821              : 
    1822       732301 :   sbitmap_iterator sbi;
    1823       732301 :   unsigned int j;
    1824      2052084 :   EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
    1825       587482 :     place_prologue_for_one_component (j, first_bb);
    1826              : 
    1827              :   /* Try to minimize the number of saves and restores.  Do this as long as
    1828              :      it changes anything.  This does not iterate more than a few times.  */
    1829              :   int spread_times = 0;
    1830       912680 :   while (spread_components (components))
    1831              :     {
    1832       180379 :       spread_times++;
    1833              : 
    1834       180379 :       if (dump_file)
    1835           14 :         fprintf (dump_file, "Now spread %d times.\n", spread_times);
    1836              :     }
    1837              : 
    1838       732301 :   disqualify_problematic_components (components);
    1839              : 
    1840              :   /* Don't separately shrink-wrap anything where the "main" prologue will
    1841              :      go; the target code can often optimize things if it is presented with
    1842              :      all components together (say, if it generates store-multiple insns).  */
    1843       732301 :   bitmap_and_compl (components, components, SW (first_bb)->has_components);
    1844              : 
    1845       732301 :   if (bitmap_empty_p (components))
    1846              :     {
    1847       687358 :       if (dump_file)
    1848           57 :         fprintf (dump_file, "Not wrapping anything separately.\n");
    1849              :     }
    1850              :   else
    1851              :     {
    1852        44943 :       if (dump_file)
    1853              :         {
    1854            8 :           fprintf (dump_file, "The components we wrap separately are");
    1855            8 :           dump_components ("sep", components);
    1856            8 :           fprintf (dump_file, "\n");
    1857              : 
    1858            8 :           fprintf (dump_file, "... Inserting common heads...\n");
    1859              :         }
    1860              : 
    1861        44943 :       emit_common_heads_for_components (components);
    1862              : 
    1863        44943 :       if (dump_file)
    1864            8 :         fprintf (dump_file, "... Inserting common tails...\n");
    1865              : 
    1866        44943 :       emit_common_tails_for_components (components);
    1867              : 
    1868        44943 :       if (dump_file)
    1869            8 :         fprintf (dump_file, "... Inserting the more difficult ones...\n");
    1870              : 
    1871        44943 :       insert_prologue_epilogue_for_components (components);
    1872              : 
    1873        44943 :       if (dump_file)
    1874            8 :         fprintf (dump_file, "... Done.\n");
    1875              : 
    1876        44943 :       targetm.shrink_wrap.set_handled_components (components);
    1877              : 
    1878        44943 :       crtl->shrink_wrapped_separate = true;
    1879              :     }
    1880              : 
    1881       732301 :   fini_separate_shrink_wrap ();
    1882              : 
    1883       732301 :   sbitmap_free (components);
    1884       732301 :   free_dominance_info (CDI_DOMINATORS);
    1885       732301 :   free_dominance_info (CDI_POST_DOMINATORS);
    1886              : 
    1887              :   /* All done.  */
    1888       732301 :   df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
    1889       732301 :   df_update_entry_exit_and_calls ();
    1890       732301 :   df_live_set_all_dirty ();
    1891       732301 :   df_analyze ();
    1892              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.