LCOV - code coverage report
Current view: top level - gcc - ext-dce.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 66.0 % 715 472
Test Date: 2026-05-30 15:37:04 Functions: 91.7 % 24 22
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* RTL dead zero/sign extension (code) elimination.
       2              :    Copyright (C) 2000-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it under
       7              : the terms of the GNU General Public License as published by the Free
       8              : Software Foundation; either version 3, or (at your option) any later
       9              : version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "backend.h"
      24              : #include "rtl.h"
      25              : #include "tree.h"
      26              : #include "memmodel.h"
      27              : #include "insn-config.h"
      28              : #include "emit-rtl.h"
      29              : #include "expr.h"
      30              : #include "recog.h"
      31              : #include "cfganal.h"
      32              : #include "tree-pass.h"
      33              : #include "cfgrtl.h"
      34              : #include "rtl-iter.h"
      35              : #include "df.h"
      36              : #include "print-rtl.h"
      37              : #include "dbgcnt.h"
      38              : #include "diagnostic-core.h"
      39              : #include "target.h"
      40              : 
      41              : /* These should probably move into a C++ class.  */
      42              : static vec<bitmap_head> livein;
      43              : static bitmap all_blocks;
      44              : static bitmap livenow;
      45              : static bitmap changed_pseudos;
      46              : static bool modify;
      47              : 
      48              : /* Chain detection for promotion: we defer promotions and only apply them
      49              :    when they form chains (one candidate's result feeds another's operand).
      50              :    Standalone promotions are skipped as they cause regressions on targets
      51              :    with free sign extension (e.g., RISC-V W-suffix instructions).  */
      52              : struct promotion_candidate_info {
      53              :   rtx_insn *insn;
      54              :   rtx set;
      55              : };
      56              : 
      57              : static vec<promotion_candidate_info> promotion_candidates;
      58              : static bitmap promotable_dests;
      59              : static bitmap consumed_by_candidate;
      60              : 
      61              : /* Copy pairs seen during the reverse scan (from optimized extensions).
      62              :    Used to propagate chain info transitively.  */
      63              : struct copy_info {
      64              :   unsigned int dest_regno;
      65              :   unsigned int src_regno;
      66              : };
      67              : static vec<copy_info> promotion_copies;
      68              : 
      69              : /* We consider four bit groups for liveness:
      70              :    bit 0..7   (least significant byte)
      71              :    bit 8..15  (second least significant byte)
      72              :    bit 16..31
      73              :    bit 32..BITS_PER_WORD-1  */
      74              : 
      75              : /* For the given REG, return the number of bit groups implied by the
      76              :    size of the REG's mode, up to a maximum of 4 (number of bit groups
      77              :    tracked by this pass).
      78              : 
      79              :    For partial integer and variable sized modes also return 4.  This
      80              :    could possibly be refined for something like PSI mode, but it
      81              :    does not seem worth the effort.  */
      82              : 
      83              : static int
      84    235487052 : group_limit (const_rtx reg)
      85              : {
      86    235487052 :   machine_mode mode = GET_MODE (reg);
      87              : 
      88    235487052 :   if (!GET_MODE_BITSIZE (mode).is_constant ())
      89              :     return 4;
      90              : 
      91    235487052 :   int size = GET_MODE_SIZE (mode).to_constant ();
      92              : 
      93    235487052 :   size = exact_log2 (size);
      94              : 
      95    235392966 :   if (size < 0)
      96              :     return 4;
      97              : 
      98    235392966 :   size++;
      99    235392966 :   return (size > 4 ? 4 : size);
     100              : }
     101              : 
     102              : /* Make all bit groups live for REGNO in bitmap BMAP.  For hard regs,
     103              :    we assume all groups are live.  For a pseudo we consider the size
     104              :    of the pseudo to avoid creating unnecessarily live chunks of data.  */
     105              : 
     106              : static void
     107      4709955 : make_reg_live (bitmap bmap, int regno)
     108              : {
     109      4709955 :   int limit;
     110              : 
     111              :   /* For pseudos we can use the mode to limit how many bit groups
     112              :      are marked as live since a pseudo only has one mode.  Hard
     113              :      registers have to be handled more conservatively.  */
     114      4709955 :   if (regno > FIRST_PSEUDO_REGISTER)
     115              :     {
     116       884006 :       rtx reg = regno_reg_rtx[regno];
     117       884006 :       limit = group_limit (reg);
     118              :     }
     119              :   else
     120              :     limit = 4;
     121              : 
     122     23220131 :   for (int i = 0; i < limit; i++)
     123     18510176 :     bitmap_set_bit (bmap, regno * 4 + i);
     124      4709955 : }
     125              : 
     126              : /* Note this pass could be used to narrow memory loads too.  It's
     127              :    not clear if that's profitable or not in general.  */
     128              : 
     129              : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
     130              : 
     131              : /* If we know the destination of CODE only uses some low bits
     132              :    (say just the QI bits of an SI operation), then return true
     133              :    if we can propagate the need for just the subset of bits
     134              :    from the destination to the sources.
     135              : 
     136              :    FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
     137              :    operand 0.  Thus is likely would need some special casing to handle.  */
     138              : 
     139              : static bool
     140    142032701 : safe_for_live_propagation (rtx_code code)
     141              : {
     142              :   /* First handle rtx classes which as a whole are known to
     143              :      be either safe or unsafe.  */
     144    142032701 :   switch (GET_RTX_CLASS (code))
     145              :     {
     146              :       case RTX_OBJ:
     147              :       case RTX_CONST_OBJ:
     148              :         return true;
     149              : 
     150              :       case RTX_COMPARE:
     151              :       case RTX_COMM_COMPARE:
     152              :       case RTX_TERNARY:
     153              :         return false;
     154              : 
     155     73777040 :       default:
     156     73777040 :         break;
     157              :     }
     158              : 
     159              :   /* What's left are specific codes.  We only need to identify those
     160              :      which are safe.   */
     161     73777040 :   switch (code)
     162              :     {
     163              :     /* These are trivially safe.  */
     164              :     case SUBREG:
     165              :     case NOT:
     166              :     case ZERO_EXTEND:
     167              :     case SIGN_EXTEND:
     168              :     case TRUNCATE:
     169              :     case PLUS:
     170              :     case MINUS:
     171              :     case MULT:
     172              :     case SMUL_HIGHPART:
     173              :     case UMUL_HIGHPART:
     174              :     case AND:
     175              :     case IOR:
     176              :     case XOR:
     177              :       return true;
     178              : 
     179              :     /* We can propagate for the shifted operand, but not the shift
     180              :        count.  The count is handled specially.  */
     181              :     case ASHIFT:
     182              :     case LSHIFTRT:
     183              :     case ASHIFTRT:
     184              :     case SS_ASHIFT:
     185              :     case US_ASHIFT:
     186              :       return true;
     187              : 
     188              :     /* There may be other safe codes.  If so they can be added
     189              :        individually when discovered.  */
     190              :     default:
     191              :       return false;
     192              :     }
     193              : }
     194              : 
     195              : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
     196              :    set/clobbered by OBJ contained in INSN.
     197              : 
     198              :    Conceptually it is always safe to ignore a particular destination
     199              :    here as that will result in more chunks of data being considered
     200              :    live.  That's what happens when we "continue" the main loop when
     201              :    we see something we don't know how to handle such as a vector
     202              :    mode destination.
     203              : 
     204              :    The more accurate we are in identifying what objects (and chunks
     205              :    within an object) are set by INSN, the more aggressive the
     206              :    optimization phase during use handling will be.  */
     207              : 
     208              : static bool
     209    137778816 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
     210              : {
     211    137778816 :   bool skipped_dest = false;
     212              : 
     213    137778816 :   subrtx_iterator::array_type array;
     214    396399474 :   FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
     215              :     {
     216    258620658 :       const_rtx x = *iter;
     217              : 
     218              :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
     219    258620658 :       if (x == NULL_RTX)
     220      9524957 :         continue;
     221              : 
     222    249095701 :       if (UNSPEC_P (x))
     223       572241 :         continue;
     224              : 
     225    248523460 :       if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
     226              :         {
     227    143419915 :           unsigned bit = 0;
     228    143419915 :           x = SET_DEST (x);
     229              : 
     230              :           /* We don't support vector destinations or destinations
     231              :              wider than DImode.  */
     232    143419915 :           scalar_mode outer_mode;
     233    147303554 :           if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
     234     90749471 :               || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     235              :             {
     236              :               /* Skip the subrtxs of this destination.  There is
     237              :                  little value in iterating into the subobjects, so
     238              :                  just skip them for a bit of efficiency.  */
     239     56554083 :               skipped_dest = true;
     240     56554083 :               iter.skip_subrtxes ();
     241    315174741 :               continue;
     242              :             }
     243              : 
     244              :           /* We could have (strict_low_part (subreg ...)).  We can not just
     245              :              strip the STRICT_LOW_PART as that would result in clearing
     246              :              some bits in LIVENOW that are still live.  So process the
     247              :              STRICT_LOW_PART specially.  */
     248     86865832 :           if (GET_CODE (x) == STRICT_LOW_PART)
     249              :             {
     250            0 :               x = XEXP (x, 0);
     251              : 
     252              :               /* The only valid operand of a STRICT_LOW_PART is a non
     253              :                  paradoxical SUBREG.  */
     254            0 :               gcc_assert (SUBREG_P (x)
     255              :                           && !paradoxical_subreg_p (x)
     256              :                           && SUBREG_BYTE (x).is_constant ());
     257              : 
     258              :               /* I think we should always see a REG here.  But let's
     259              :                  be sure.  */
     260            0 :               gcc_assert (REG_P (SUBREG_REG (x)));
     261              : 
     262              :               /* The inner mode might be larger, just punt for
     263              :                  that case.  Remember, we can not just continue to process
     264              :                  the inner RTXs due to the STRICT_LOW_PART.  */
     265            0 :               if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     266            0 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     267              :                 {
     268              :                   /* Skip the subrtxs of the STRICT_LOW_PART.  We can't
     269              :                      process them because it'll set objects as no longer
     270              :                      live when they are in fact still live.  */
     271            0 :                   skipped_dest = true;
     272            0 :                   iter.skip_subrtxes ();
     273            0 :                   continue;
     274              :                 }
     275              : 
     276              :               /* LIVE_TMP contains the set groups that are live-out and set in
     277              :                  this insn.  It is used to narrow the groups live-in for the
     278              :                  inputs of this insn.
     279              : 
     280              :                  The simple thing to do is mark all the groups as live, but
     281              :                  that will significantly inhibit optimization.
     282              : 
     283              :                  We also need to be careful in the case where we have an in-out
     284              :                  operand.  If we're not careful we'd clear LIVE_TMP
     285              :                  incorrectly.  */
     286            0 :               HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
     287            0 :               int limit = group_limit (SUBREG_REG (x));
     288            0 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     289            0 :                 if (bitmap_bit_p (livenow, i))
     290            0 :                   bitmap_set_bit (live_tmp, i);
     291              : 
     292            0 :               if (bitmap_empty_p (live_tmp))
     293            0 :                 make_reg_live (live_tmp, rn);
     294              : 
     295              :               /* The mode of the SUBREG tells us how many bits we can
     296              :                  clear.  */
     297            0 :               machine_mode mode = GET_MODE (x);
     298            0 :               HOST_WIDE_INT size
     299            0 :                 = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
     300            0 :               bitmap_clear_range (livenow, 4 * rn, size);
     301              : 
     302              :               /* We have fully processed this destination.  */
     303            0 :               iter.skip_subrtxes ();
     304            0 :               continue;
     305            0 :             }
     306              : 
     307              :           /* Phase one of destination handling.  First remove any wrapper
     308              :              such as SUBREG or ZERO_EXTRACT.  */
     309     86865832 :           unsigned HOST_WIDE_INT mask
     310     86865832 :             = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
     311     86865832 :           if (SUBREG_P (x))
     312              :             {
     313              :               /* If we have a SUBREG destination that is too wide, just
     314              :                  skip the destination rather than continuing this iterator.
     315              :                  While continuing would be better, we'd need to strip the
     316              :                  subreg and restart within the SET processing rather than
     317              :                  the top of the loop which just complicates the flow even
     318              :                  more.  */
     319       654497 :               if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     320       543350 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     321              :                 {
     322       111147 :                   skipped_dest = true;
     323       111147 :                   iter.skip_subrtxes ();
     324       111147 :                   continue;
     325              :                 }
     326              : 
     327              :               /* We can safely strip a paradoxical subreg.  The inner mode will
     328              :                  be narrower than the outer mode.  We'll clear fewer bits in
     329              :                  LIVENOW than we'd like, but that's always safe.  */
     330       432691 :               if (paradoxical_subreg_p (x))
     331              :                 x = XEXP (x, 0);
     332       422952 :               else if (SUBREG_BYTE (x).is_constant ())
     333              :                 {
     334       422952 :                   bit = subreg_lsb (x).to_constant ();
     335       422952 :                   mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
     336       422952 :                   gcc_assert (mask);
     337              :                   x = SUBREG_REG (x);
     338              :                 }
     339              :               else
     340              :                 gcc_unreachable ();
     341              :             }
     342              : 
     343     86754685 :           if (GET_CODE (x) == ZERO_EXTRACT)
     344              :             {
     345              :               /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
     346              :                  modifies the bits referenced in the ZERO_EXTRACT, the rest
     347              :                  remain the same.  Thus we can not continue here, we must
     348              :                  either figure out what part of the destination is modified
     349              :                  or skip the sub-rtxs.  */
     350         3530 :               skipped_dest = true;
     351         3530 :               iter.skip_subrtxes ();
     352         3530 :               continue;
     353              :             }
     354              : 
     355              :           /* BIT >= 64 indicates something went horribly wrong.  */
     356     86751155 :           gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
     357              : 
     358              :           /* Now handle the actual object that was changed.  */
     359     86751155 :           if (REG_P (x))
     360              :             {
     361              :               /* LIVE_TMP contains the set groups that are live-out and set in
     362              :                  this insn.  It is used to narrow the groups live-in for the
     363              :                  inputs of this insn.
     364              : 
     365              :                  The simple thing to do is mark all the groups as live, but
     366              :                  that will significantly inhibit optimization.
     367              : 
     368              :                  We also need to be careful in the case where we have an in-out
     369              :                  operand.  If we're not careful we'd clear LIVE_TMP
     370              :                  incorrectly.  */
     371     72764154 :               HOST_WIDE_INT rn = REGNO (x);
     372     72764154 :               int limit = group_limit (x);
     373    325498713 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     374    252734559 :                 if (bitmap_bit_p (livenow, i))
     375    244941926 :                   bitmap_set_bit (live_tmp, i);
     376              : 
     377     72764154 :               if (bitmap_empty_p (live_tmp))
     378      1231352 :                 make_reg_live (live_tmp, rn);
     379              : 
     380              :               /* Now clear the bits known written by this instruction.
     381              :                  Note that BIT need not be a power of two, consider a
     382              :                  ZERO_EXTRACT destination.  */
     383     72764154 :               int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
     384     77517969 :               int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
     385     28007653 :                          : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
     386      5546900 :                          : (mask & 0xff00) ? 2 : 1);
     387     72764154 :               bitmap_clear_range (livenow, 4 * rn + start, end - start);
     388              :             }
     389              :           /* Some ports generate (clobber (const_int)).  */
     390     13987001 :           else if (CONST_INT_P (x))
     391            0 :             continue;
     392              :           else
     393     13987001 :             gcc_assert (CALL_P (insn)
     394              :                         || MEM_P (x)
     395              :                         || x == pc_rtx
     396              :                         || GET_CODE (x) == SCRATCH);
     397              : 
     398     86751155 :           iter.skip_subrtxes ();
     399     86751155 :         }
     400    105103545 :       else if (GET_CODE (x) == COND_EXEC)
     401              :         {
     402              :           /* This isn't ideal, but may not be so bad in practice.  */
     403            0 :           skipped_dest = true;
     404            0 :           iter.skip_subrtxes ();
     405              :         }
     406              :     }
     407    137778816 :   return skipped_dest;
     408    137778816 : }
     409              : 
     410              : /* INSN is a right shift and the second insn in a shift pair that is a
     411              :    sign or zero extension (SET is the single set associated with INSN).  
     412              : 
     413              :    Replace the source of SET with NEW_SRC which is a source register
     414              :    from NEW_SRC_INSN (the left shift in the pair).  This is effectively
     415              :    the same as the replacement we do for ZERO/SIGN extends on targets
     416              :    that support those insns.  */
     417              : static void
     418            0 : ext_dce_try_optimize_rshift (rtx_insn *insn, rtx set, rtx new_src, rtx_insn *new_src_insn)
     419              : {
     420              :   /* If the modes are not the same or one is a hard register, then
     421              :      conservatively do nothing.  */
     422            0 :   if (GET_MODE (SET_SRC (set)) != GET_MODE (new_src)
     423            0 :       || !REG_P (XEXP (SET_SRC (set), 0))
     424            0 :       || !REG_P (new_src)
     425            0 :       || REGNO (XEXP (SET_SRC (set), 0)) < FIRST_PSEUDO_REGISTER
     426            0 :       || REGNO (new_src) < FIRST_PSEUDO_REGISTER)
     427              :     return;
     428              : 
     429            0 :   if (dump_file)
     430              :     {
     431            0 :       fprintf (dump_file, "Processing insn:\n");
     432            0 :       dump_insn_slim (dump_file, insn);
     433            0 :       fprintf (dump_file, "Trying to simplify pattern:\n");
     434            0 :       print_rtl_single (dump_file, SET_SRC (set));
     435              :     }
     436              : 
     437              :   /* We decided to turn do the optimization but allow it to be rejected for
     438              :      bisection purposes.  */
     439            0 :   if (!dbg_cnt (::ext_dce))
     440              :     {
     441            0 :       if (dump_file)
     442            0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     443            0 :       return;
     444              :     }
     445              : 
     446              :   /* We're going to generate a fresh insn for the move, so put it
     447              :      into a sequence that we can emit after the current insn.   */
     448            0 :   start_sequence ();
     449            0 :   emit_move_insn (SET_DEST (set), new_src);
     450            0 :   rtx_insn *seq = end_sequence (); 
     451            0 :   emit_insn_after (seq, insn);
     452              : 
     453              :   /* Mark the destination as changed.  */
     454            0 :   rtx x = SET_DEST (set);
     455            0 :   while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     456            0 :     x = XEXP (x, 0);
     457            0 :   gcc_assert (REG_P (x));
     458            0 :   bitmap_set_bit (changed_pseudos, REGNO (x));
     459              : 
     460            0 :   if (dump_file)
     461              :     {
     462            0 :       fprintf (dump_file, "Successfully transformed to:\n");
     463            0 :       print_rtl_single (dump_file, PATTERN (seq));
     464            0 :       fprintf (dump_file, "\n");
     465              :     }
     466              : 
     467            0 :   delete_insn (insn);
     468              : 
     469              :   /* If NEW_SRC died in its prior location, then we need to remove the
     470              :      death note and move it to the new location.  */
     471            0 :   rtx note = find_regno_note (new_src_insn, REG_DEAD, REGNO (new_src));
     472            0 :   if (note)
     473              :     {
     474            0 :       remove_note (new_src_insn, note);
     475            0 :       add_reg_note (insn, REG_DEAD, new_src);
     476              :     }
     477              : }
     478              : 
     479              : 
     480              : /* INSN has a sign/zero extended source inside SET that we will
     481              :    try to turn into a SUBREG.  If NEW_SRC is non-null, use that
     482              :    for the new source of INSN's set.  That scenario only happens
     483              :    when we're optimizing a shift pair.  */
     484              : static void
     485         8581 : ext_dce_try_optimize_extension (rtx_insn *insn, rtx set)
     486              : {
     487         8581 :   rtx src = SET_SRC (set);
     488         8581 :   rtx inner = XEXP (src, 0);
     489              : 
     490              :   /* For sign-extending loads from memory, try to replace with a
     491              :      zero-extending load when the upper bits are dead.  E.g. on RISC-V
     492              :      this turns lh+zext.h into just lhu.  */
     493         8581 :   if (MEM_P (inner) && GET_CODE (src) == SIGN_EXTEND)
     494              :     {
     495           28 :       if (dump_file)
     496              :         {
     497            0 :           fprintf (dump_file, "Processing insn:\n");
     498            0 :           dump_insn_slim (dump_file, insn);
     499            0 :           fprintf (dump_file, "Trying to narrow sign_extend to zero_extend:\n");
     500            0 :           print_rtl_single (dump_file, SET_SRC (set));
     501              :         }
     502              : 
     503           28 :       if (!dbg_cnt (::ext_dce))
     504              :         {
     505            0 :           if (dump_file)
     506            0 :             fprintf (dump_file, "Rejected due to debug counter.\n");
     507            0 :           return;
     508              :         }
     509              : 
     510           28 :       rtx new_pattern = gen_rtx_ZERO_EXTEND (GET_MODE (src), inner);
     511           28 :       int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
     512              : 
     513           28 :       rtx x = SET_DEST (set);
     514           28 :       while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     515            0 :         x = XEXP (x, 0);
     516              : 
     517           28 :       gcc_assert (REG_P (x));
     518           28 :       if (ok)
     519              :         {
     520           28 :           bitmap_set_bit (changed_pseudos, REGNO (x));
     521           28 :           remove_reg_equal_equiv_notes (insn, false);
     522              :         }
     523              : 
     524           28 :       if (dump_file)
     525              :         {
     526            0 :           if (ok)
     527            0 :             fprintf (dump_file, "Successfully transformed to:\n");
     528              :           else
     529            0 :             fprintf (dump_file, "Failed transformation to:\n");
     530            0 :           print_rtl_single (dump_file, new_pattern);
     531            0 :           fprintf (dump_file, "\n");
     532              :         }
     533           28 :       return;
     534              :     }
     535              : 
     536              :   /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
     537              :      not useful for this optimization.  */
     538         8553 :   if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
     539              :     return;
     540              : 
     541         5828 :   rtx new_pattern;
     542         5828 :   if (dump_file)
     543              :     {
     544            0 :       fprintf (dump_file, "Processing insn:\n");
     545            0 :       dump_insn_slim (dump_file, insn);
     546            0 :       fprintf (dump_file, "Trying to simplify pattern:\n");
     547            0 :       print_rtl_single (dump_file, SET_SRC (set));
     548              :     }
     549              : 
     550              :   /* We decided to turn do the optimization but allow it to be rejected for
     551              :      bisection purposes.  */
     552         5828 :   if (!dbg_cnt (::ext_dce))
     553              :     {
     554            0 :       if (dump_file)
     555            0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     556            0 :       return;
     557              :     }
     558              : 
     559        11656 :   new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
     560         5828 :                                      GET_MODE (inner), 0);
     561              :   /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
     562              :      We must not pass that as a replacement pattern to validate_change.  */
     563         5828 :   if (new_pattern)
     564              :     {
     565         5828 :       int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
     566              : 
     567         5828 :       rtx x = SET_DEST (set);
     568         5828 :       while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     569            0 :         x = XEXP (x, 0);
     570              : 
     571         5828 :       gcc_assert (REG_P (x));
     572         5828 :       if (ok)
     573         5828 :         bitmap_set_bit (changed_pseudos, REGNO (x));
     574              : 
     575         5828 :       if (dump_file)
     576              :         {
     577            0 :           if (ok)
     578            0 :             fprintf (dump_file, "Successfully transformed to:\n");
     579              :           else
     580            0 :             fprintf (dump_file, "Failed transformation to:\n");
     581              : 
     582            0 :           print_rtl_single (dump_file, new_pattern);
     583            0 :           fprintf (dump_file, "\n");
     584              :         }
     585              : 
     586              :       /* INSN may have a REG_EQUAL note indicating that the value was
     587              :          sign or zero extended.  That note is no longer valid since we've
     588              :          just removed the extension.  Just wipe the notes.  */
     589         5828 :       if (ok)
     590         5828 :         remove_reg_equal_equiv_notes (insn, false);
     591              :     }
     592              :   else
     593              :     {
     594            0 :       if (dump_file)
     595            0 :         fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
     596              :     }
     597              : }
     598              : 
     599              : /* Try to promote a narrow-mode operation wrapped in a sign/zero extension
     600              :    to the wider mode when the extended bits are dead.  For example,
     601              :    (sign_extend:DI (plus:SI (x) (y))) -> (plus:DI (x') (y'))
     602              :    where x' and y' are the operands promoted to DI mode.
     603              : 
     604              :    This enables the combine pass to match wider-mode target patterns
     605              :    (e.g., sh2add on RISC-V) that cannot match the narrow-mode operation.  */
     606              : 
     607              : static void
     608            0 : ext_dce_try_promote_operation (rtx_insn *insn, rtx set)
     609              : {
     610            0 :   rtx src = SET_SRC (set);
     611              : 
     612              :   /* If the extension was already optimized away, nothing to do.  */
     613            0 :   if (GET_CODE (src) != SIGN_EXTEND && GET_CODE (src) != ZERO_EXTEND)
     614            0 :     return;
     615              : 
     616            0 :   machine_mode outer_mode = GET_MODE (src);
     617            0 :   rtx inner = XEXP (src, 0);
     618              : 
     619              :   /* Only handle binary and unary arithmetic/logic operations.  */
     620            0 :   if (!BINARY_P (inner) && !UNARY_P (inner))
     621              :     return;
     622              : 
     623            0 :   rtx_code inner_code = GET_CODE (inner);
     624              : 
     625              :   /* Restrict to operations whose result in the low bits is identical
     626              :      regardless of input width (i.e., no high-bit dependencies).  */
     627            0 :   switch (inner_code)
     628              :     {
     629            0 :     case PLUS:
     630            0 :     case MINUS:
     631            0 :     case MULT:
     632            0 :     case NEG:
     633            0 :     case AND:
     634            0 :     case IOR:
     635            0 :     case XOR:
     636            0 :     case NOT:
     637            0 :     case ASHIFT:
     638            0 :       break;
     639              :     default:
     640              :       return;
     641              :     }
     642              : 
     643              :   /* Promote each operand to the outer mode.  */
     644            0 :   int nops = BINARY_P (inner) ? 2 : 1;
     645            0 :   rtx new_ops[2];
     646              : 
     647            0 :   for (int i = 0; i < nops; i++)
     648              :     {
     649            0 :       rtx op = XEXP (inner, i);
     650              : 
     651            0 :       if (CONST_INT_P (op))
     652            0 :         new_ops[i] = op;
     653            0 :       else if (REG_P (op))
     654              :         {
     655            0 :           new_ops[i] = simplify_gen_subreg (outer_mode, op,
     656            0 :                                             GET_MODE (op), 0);
     657            0 :           if (!new_ops[i])
     658              :             return;
     659              :         }
     660            0 :       else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
     661              :         {
     662              :           /* The inner register may already be in the target mode
     663              :              (e.g., subreg:SI (reg:DI ...) 0).  Extract it directly
     664              :              rather than creating a paradoxical subreg of a subreg,
     665              :              which simplify_gen_subreg rejects.  */
     666            0 :           rtx inner_reg = SUBREG_REG (op);
     667            0 :           if (GET_MODE (inner_reg) == outer_mode)
     668            0 :             new_ops[i] = inner_reg;
     669              :           else
     670              :             {
     671            0 :               new_ops[i] = simplify_gen_subreg (outer_mode, inner_reg,
     672              :                                                 GET_MODE (inner_reg), 0);
     673            0 :               if (!new_ops[i])
     674              :                 return;
     675              :             }
     676              :         }
     677              :       else
     678              :         return;
     679              :     }
     680              : 
     681              :   /* Build the promoted operation.  */
     682            0 :   rtx new_src;
     683            0 :   if (BINARY_P (inner))
     684            0 :     new_src = gen_rtx_fmt_ee (inner_code, outer_mode,
     685              :                               new_ops[0], new_ops[1]);
     686              :   else
     687            0 :     new_src = gen_rtx_fmt_e (inner_code, outer_mode, new_ops[0]);
     688              : 
     689            0 :   if (dump_file)
     690              :     {
     691            0 :       fprintf (dump_file, "Processing insn:\n");
     692            0 :       dump_insn_slim (dump_file, insn);
     693            0 :       fprintf (dump_file, "Trying to promote to wider mode:\n");
     694            0 :       print_rtl_single (dump_file, new_src);
     695              :     }
     696              : 
     697              :   /* We decided to try the promotion but allow it to be rejected for
     698              :      bisection purposes.  */
     699            0 :   if (!dbg_cnt (::ext_dce))
     700              :     {
     701            0 :       if (dump_file)
     702            0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     703            0 :       return;
     704              :     }
     705              : 
     706            0 :   int ok = validate_change (insn, &SET_SRC (set), new_src, false);
     707              : 
     708            0 :   rtx x = SET_DEST (set);
     709            0 :   while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     710            0 :     x = XEXP (x, 0);
     711              : 
     712            0 :   gcc_assert (REG_P (x));
     713            0 :   if (ok)
     714            0 :     bitmap_set_bit (changed_pseudos, REGNO (x));
     715              : 
     716            0 :   if (dump_file)
     717              :     {
     718            0 :       if (ok)
     719            0 :         fprintf (dump_file, "Successfully promoted to:\n");
     720              :       else
     721            0 :         fprintf (dump_file, "Failed promotion to:\n");
     722            0 :       print_rtl_single (dump_file, new_src);
     723            0 :       fprintf (dump_file, "\n");
     724              :     }
     725              : 
     726            0 :   if (ok)
     727            0 :     remove_reg_equal_equiv_notes (insn, false);
     728              : }
     729              : 
     730              : /* Record INSN as a promotion candidate if it passes the same validity
     731              :    checks as ext_dce_try_promote_operation.  We defer actual promotion
     732              :    until we can determine whether the candidate is part of a chain.  */
     733              : 
     734              : static void
     735         2753 : ext_dce_record_promotion_candidate (rtx_insn *insn, rtx set)
     736              : {
     737         2753 :   rtx src = SET_SRC (set);
     738              : 
     739         2753 :   if (GET_CODE (src) != SIGN_EXTEND && GET_CODE (src) != ZERO_EXTEND)
     740              :     return;
     741              : 
     742         2753 :   machine_mode outer_mode = GET_MODE (src);
     743         2753 :   rtx inner = XEXP (src, 0);
     744              : 
     745         2753 :   if (!BINARY_P (inner) && !UNARY_P (inner))
     746              :     return;
     747              : 
     748         1701 :   rtx_code inner_code = GET_CODE (inner);
     749              : 
     750         1701 :   switch (inner_code)
     751              :     {
     752            0 :     case PLUS:
     753            0 :     case MINUS:
     754            0 :     case MULT:
     755            0 :     case NEG:
     756            0 :     case AND:
     757            0 :     case IOR:
     758            0 :     case XOR:
     759            0 :     case NOT:
     760            0 :     case ASHIFT:
     761            0 :       break;
     762              :     default:
     763              :       return;
     764              :     }
     765              : 
     766              :   /* Dry-run: check that all operands can be promoted.  */
     767            0 :   int nops = BINARY_P (inner) ? 2 : 1;
     768            0 :   for (int i = 0; i < nops; i++)
     769              :     {
     770            0 :       rtx op = XEXP (inner, i);
     771            0 :       if (CONST_INT_P (op))
     772            0 :         continue;
     773            0 :       else if (REG_P (op))
     774              :         {
     775            0 :           if (!simplify_gen_subreg (outer_mode, op, GET_MODE (op), 0))
     776              :             return;
     777              :         }
     778            0 :       else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
     779              :         {
     780            0 :           rtx inner_reg = SUBREG_REG (op);
     781            0 :           if (GET_MODE (inner_reg) != outer_mode
     782            0 :               && !simplify_gen_subreg (outer_mode, inner_reg,
     783              :                                        GET_MODE (inner_reg), 0))
     784              :             return;
     785              :         }
     786              :       else
     787              :         return;
     788              :     }
     789              : 
     790              :   /* Find the destination register.  */
     791            0 :   rtx dest = SET_DEST (set);
     792            0 :   while (SUBREG_P (dest) || GET_CODE (dest) == ZERO_EXTRACT)
     793            0 :     dest = XEXP (dest, 0);
     794            0 :   if (!REG_P (dest))
     795              :     return;
     796            0 :   unsigned int dest_regno = REGNO (dest);
     797              : 
     798              :   /* Record the candidate.  */
     799            0 :   promotion_candidates.safe_push ({insn, set});
     800            0 :   bitmap_set_bit (promotable_dests, dest_regno);
     801              : 
     802              :   /* Mark register operands as consumed by a candidate.  */
     803            0 :   for (int i = 0; i < nops; i++)
     804              :     {
     805            0 :       rtx op = XEXP (inner, i);
     806            0 :       if (REG_P (op))
     807            0 :         bitmap_set_bit (consumed_by_candidate, REGNO (op));
     808            0 :       else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
     809            0 :         bitmap_set_bit (consumed_by_candidate, REGNO (SUBREG_REG (op)));
     810              :     }
     811              : }
     812              : 
     813              : /* Promote candidates that form chains: a candidate whose result feeds
     814              :    into another candidate's operand, or whose operand comes from another
     815              :    candidate's result.  Skip standalone (isolated) promotions.  */
     816              : 
     817              : static void
     818      9758674 : ext_dce_promote_chained_candidates (void)
     819              : {
     820              :   /* Propagate chain info through copies recorded during the reverse scan.
     821              :      Since copies are recorded in reverse order, iterate forward to propagate
     822              :      promotable_dests (which was set late in the scan) through copies that
     823              :      were seen earlier.  */
     824      9758674 :   unsigned cix;
     825      9758674 :   copy_info *cp;
     826      9764502 :   FOR_EACH_VEC_ELT (promotion_copies, cix, cp)
     827              :     {
     828         5828 :       if (bitmap_bit_p (promotable_dests, cp->src_regno))
     829            0 :         bitmap_set_bit (promotable_dests, cp->dest_regno);
     830         5828 :       if (bitmap_bit_p (consumed_by_candidate, cp->dest_regno))
     831            0 :         bitmap_set_bit (consumed_by_candidate, cp->src_regno);
     832              :     }
     833              : 
     834              :   unsigned ix;
     835              :   promotion_candidate_info *cand;
     836              : 
     837      9758674 :   FOR_EACH_VEC_ELT (promotion_candidates, ix, cand)
     838              :     {
     839              :       /* Find destination register.  */
     840            0 :       rtx dest = SET_DEST (cand->set);
     841            0 :       while (SUBREG_P (dest) || GET_CODE (dest) == ZERO_EXTRACT)
     842            0 :         dest = XEXP (dest, 0);
     843            0 :       unsigned int dest_regno = REGNO (dest);
     844              : 
     845              :       /* Check if this candidate's result feeds into another candidate.  */
     846            0 :       bool is_chained = bitmap_bit_p (consumed_by_candidate, dest_regno);
     847              : 
     848              :       /* Check if any operand comes from another candidate's result.  */
     849            0 :       if (!is_chained)
     850              :         {
     851            0 :           rtx inner = XEXP (SET_SRC (cand->set), 0);
     852            0 :           int nops = BINARY_P (inner) ? 2 : 1;
     853            0 :           for (int i = 0; i < nops && !is_chained; i++)
     854              :             {
     855            0 :               rtx op = XEXP (inner, i);
     856            0 :               if (REG_P (op))
     857            0 :                 is_chained = bitmap_bit_p (promotable_dests, REGNO (op));
     858            0 :               else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
     859            0 :                 is_chained = bitmap_bit_p (promotable_dests,
     860            0 :                                            REGNO (SUBREG_REG (op)));
     861              :             }
     862              :         }
     863              : 
     864            0 :       if (is_chained)
     865            0 :         ext_dce_try_promote_operation (cand->insn, cand->set);
     866            0 :       else if (dump_file)
     867              :         {
     868            0 :           fprintf (dump_file, "Skipping standalone promotion for insn:\n");
     869            0 :           dump_insn_slim (dump_file, cand->insn);
     870            0 :           fprintf (dump_file, "\n");
     871              :         }
     872              :     }
     873              : 
     874      9758674 :   promotion_candidates.truncate (0);
     875      9758674 :   promotion_copies.truncate (0);
     876      9758674 :   bitmap_clear (promotable_dests);
     877      9758674 :   bitmap_clear (consumed_by_candidate);
     878      9758674 : }
     879              : 
     880              : /* Some operators imply that their second operand is fully live,
     881              :    regardless of how many bits in the output are live.  An example
     882              :    would be the shift count on a target without SHIFT_COUNT_TRUNCATED
     883              :    defined.
     884              : 
     885              :    Return TRUE if CODE is such an operator.  FALSE otherwise.  */
     886              : 
     887              : static bool
     888     76637193 : binop_implies_op2_fully_live (rtx_code code)
     889              : {
     890            0 :   switch (code)
     891              :     {
     892              :     case ASHIFT:
     893              :     case LSHIFTRT:
     894              :     case ASHIFTRT:
     895              :     case ROTATE:
     896              :     case ROTATERT:
     897              :     case SS_ASHIFT:
     898              :     case US_ASHIFT:
     899              :       return !SHIFT_COUNT_TRUNCATED;
     900              : 
     901            0 :     default:
     902            0 :       return false;
     903              :     }
     904              : }
     905              : 
     906              : /* X, with code CODE, is an operation for which safe_for_live_propagation
     907              :    holds true, and bits set in MASK are live in the result.  Compute a
     908              :    mask of (potentially) live bits in the non-constant inputs.  In case of
     909              :    binop_implies_op2_fully_live (e.g. shifts), the computed mask may
     910              :    exclusively pertain to the first operand.
     911              : 
     912              :    This looks wrong as we may have some important operations embedded as
     913              :    operands of another operation.  For example, we might have an extension
     914              :    wrapping a shift.  It really feels like this needs to be recursing down
     915              :    into operands much more often.  */
     916              : 
     917              : unsigned HOST_WIDE_INT
     918     71483689 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
     919              : {
     920     73188300 :   if (mask == 0)
     921              :     return 0;
     922              : 
     923     73188258 :   enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
     924     73188258 :   unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
     925              : 
     926              :   /* While we don't try to optimize operations on types larger
     927              :      than 64 bits, we do want to make sure not to invoke undefined
     928              :      behavior when presented with such operations during use
     929              :      processing.  The safe thing to do is to just return mmask
     930              :      for that scenario indicating every possible chunk is life.  */
     931     73188258 :   scalar_int_mode smode;
     932     73188258 :   if (!is_a <scalar_int_mode> (mode, &smode)
     933     61031141 :       || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
     934              :     return mmask;
     935              : 
     936     58963978 :   switch (code)
     937              :     {
     938     16532405 :     case PLUS:
     939     16532405 :     case MINUS:
     940     16532405 :     case MULT:
     941     16532405 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     942              : 
     943              :     /* We propagate for the shifted operand, but not the shift
     944              :        count.  The count is handled specially.  */
     945      1412936 :     case ASHIFT:
     946      1412936 :       if (CONST_INT_P (XEXP (x, 1))
     947      2754617 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     948      1341645 :         return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
     949        71291 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     950              : 
     951              :     /* We propagate for the shifted operand, but not the shift
     952              :        count.  The count is handled specially.  */
     953       638442 :     case LSHIFTRT:
     954       638442 :       if (CONST_INT_P (XEXP (x, 1))
     955      1244954 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     956       606484 :         return mmask & (mask << INTVAL (XEXP (x, 1)));
     957              :       return mmask;
     958              : 
     959              :     /* We propagate for the shifted operand, but not the shift
     960              :        count.  The count is handled specially.  */
     961       306371 :     case ASHIFTRT:
     962       306371 :       if (CONST_INT_P (XEXP (x, 1))
     963       598831 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     964              :         {
     965       292416 :           HOST_WIDE_INT sign = 0;
     966       292416 :           if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
     967       292416 :               > GET_MODE_BITSIZE (smode))
     968       584832 :             sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
     969       292416 :           return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
     970              :         }
     971              :       return mmask;
     972              : 
     973        37549 :     case SMUL_HIGHPART:
     974        37549 :     case UMUL_HIGHPART:
     975        37549 :       if (XEXP (x, 1) == const0_rtx)
     976              :         return 0;
     977        37549 :       if (XEXP (x, 1) == const1_rtx)
     978              :         return mmask;
     979        37549 :       if (CONST_INT_P (XEXP (x, 1)))
     980              :         {
     981            0 :           if (pow2p_hwi (INTVAL (XEXP (x, 1))))
     982            0 :             return mmask & (mask << (GET_MODE_BITSIZE (smode)
     983            0 :                                      - exact_log2 (INTVAL (XEXP (x, 1)))));
     984              : 
     985            0 :           int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
     986            0 :                       - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
     987            0 :           if (bits < GET_MODE_BITSIZE (smode))
     988            0 :             return (HOST_WIDE_INT_1U << bits) - 1;
     989              :         }
     990              :       return mmask;
     991              : 
     992       561291 :     case SIGN_EXTEND:
     993       561291 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     994       561291 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     995              :         return -1;
     996              : 
     997              :       /* We want the mode of the inner object.  We need to ensure its
     998              :          sign bit is on in MASK.  */
     999       561291 :       mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
    1000       561291 :       if (mask & ~GET_MODE_MASK (mode))
    1001       560762 :         mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
    1002       560762 :                                      - 1);
    1003              : 
    1004              :       /* Recurse into the operand.  */
    1005       561291 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
    1006              : 
    1007      1143320 :     case ZERO_EXTEND:
    1008      1143320 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
    1009      1143320 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
    1010              :         return -1;
    1011              : 
    1012              :       /* Recurse into the operand.  */
    1013      1143320 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
    1014              : 
    1015              :     /* If an AND/IOR unconditionally clears/sets bits in the output, then
    1016              :        we do not care about the liveness of those bits in the inputs.
    1017              : 
    1018              :        So AND MASK with the constant for AND and with the complemented constant
    1019              :        for IOR.  */
    1020      1542138 :     case AND:
    1021      1542138 :     case IOR:
    1022      1542138 :       if (CONST_INT_P (XEXP (x, 1)))
    1023       859320 :         mask &= (code == AND
    1024       859320 :                  ? UINTVAL (XEXP (x, 1))
    1025        95710 :                  : ~UINTVAL (XEXP (x, 1)));
    1026              :       return mask;
    1027              : 
    1028              :     /* We propagate for the shifted operand, but not the shift
    1029              :        count.  The count is handled specially.  */
    1030            0 :     case SS_ASHIFT:
    1031            0 :     case US_ASHIFT:
    1032            0 :       if (CONST_INT_P (XEXP (x, 1))
    1033            0 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
    1034              :         {
    1035            0 :           return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
    1036            0 :                              >> (INTVAL (XEXP (x, 1))
    1037            0 :                                  + (XEXP (x, 1) != const0_rtx
    1038            0 :                                     && code == SS_ASHIFT))))
    1039            0 :                   | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
    1040              :         }
    1041              :       return mmask;
    1042              : 
    1043              :     default:
    1044              :       return mask;
    1045              :     }
    1046              : }
    1047              : 
    1048              : /* Process uses in INSN contained in OBJ.  Set appropriate bits in LIVENOW
    1049              :    for any chunks of pseudos that become live, potentially filtering using
    1050              :    bits from LIVE_TMP.
    1051              : 
    1052              :    If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
    1053              :    the extended bits are never read and mark pseudos which had extensions
    1054              :    eliminated in CHANGED_PSEUDOS.  */
    1055              : 
    1056              : static void
    1057    137778816 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
    1058              :                       bitmap live_tmp, bool skipped_dest)
    1059              : {
    1060    137778816 :   subrtx_var_iterator::array_type array_var;
    1061    757036665 :   FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
    1062              :     {
    1063              :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
    1064    619257849 :       rtx x = *iter;
    1065    619257849 :       if (x == NULL_RTX)
    1066      9524957 :         continue;
    1067              : 
    1068              :       /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
    1069              :          handle SETs explicitly, possibly propagating live information
    1070              :          into the uses.
    1071              : 
    1072              :          We may continue the loop at various points which will cause
    1073              :          iteration into the next level of RTL.  Breaking from the loop
    1074              :          is never safe as it can lead us to fail to process some of the
    1075              :          RTL and thus not make objects live when necessary.  */
    1076    609732892 :       enum rtx_code xcode = GET_CODE (x);
    1077    609732892 :       if (xcode == SET)
    1078              :         {
    1079    121793322 :           const_rtx dst = SET_DEST (x);
    1080    121793322 :           rtx src = SET_SRC (x);
    1081    121793322 :           const_rtx y;
    1082    121793322 :           unsigned HOST_WIDE_INT bit = 0;
    1083              : 
    1084              :           /* The code of the RHS of a SET.  */
    1085    121793322 :           enum rtx_code code = GET_CODE (src);
    1086              : 
    1087              :           /* ?!? How much of this should mirror SET handling, potentially
    1088              :              being shared?   */
    1089    121793322 :           if (SUBREG_P (dst) && subreg_lsb (dst).is_constant (&bit))
    1090              :             {
    1091       587756 :               if (bit >= HOST_BITS_PER_WIDE_INT)
    1092              :                 bit = HOST_BITS_PER_WIDE_INT - 1;
    1093       587756 :               dst = SUBREG_REG (dst);
    1094              :             }
    1095    121205566 :           else if (GET_CODE (dst) == STRICT_LOW_PART)
    1096        13308 :             dst = XEXP (dst, 0);
    1097              : 
    1098              :           /* Main processing of the uses.  Two major goals here.
    1099              : 
    1100              :              First, we want to try and propagate liveness (or the lack
    1101              :              thereof) from the destination register to the source
    1102              :              register(s).
    1103              : 
    1104              :              Second, if the source is an extension, try to optimize
    1105              :              it into a SUBREG.  The SUBREG form indicates we don't
    1106              :              care about the upper bits and will usually be copy
    1107              :              propagated away.
    1108              : 
    1109              :              If we fail to handle something in here, the expectation
    1110              :              is the iterator will dive into the sub-components and
    1111              :              mark all the chunks in any found REGs as live.  */
    1112    121793322 :           if (REG_P (dst) && safe_for_live_propagation (code))
    1113              :             {
    1114              :               /* Create a mask representing the bits of this output
    1115              :                  operand that are live after this insn.  We can use
    1116              :                  this information to refine the live in state of
    1117              :                  inputs to this insn in many cases.
    1118              : 
    1119              :                  We have to do this on a per SET basis, we might have
    1120              :                  an INSN with multiple SETS, some of which can narrow
    1121              :                  the source operand liveness, some of which may not.  */
    1122     71483689 :               unsigned HOST_WIDE_INT dst_mask = 0;
    1123     71483689 :               HOST_WIDE_INT rn = REGNO (dst);
    1124     71483689 :               unsigned HOST_WIDE_INT mask_array[]
    1125              :                 = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
    1126              :                     -HOST_WIDE_INT_UC (0x100000000) };
    1127    357418445 :               for (int i = 0; i < 4; i++)
    1128    285934756 :                 if (bitmap_bit_p (live_tmp, 4 * rn + i))
    1129    228887869 :                   dst_mask |= mask_array[i];
    1130     71483689 :               dst_mask >>= bit;
    1131              : 
    1132              :               /* If we ignored a destination during set processing, then
    1133              :                  consider all the bits live.  */
    1134     71483689 :               if (skipped_dest)
    1135     25220965 :                 dst_mask = -1;
    1136              : 
    1137     71483689 :               dst_mask = carry_backpropagate (dst_mask, code, src);
    1138              : 
    1139              :               /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
    1140              :                  of the source specially to improve optimization.  */
    1141     71483689 :               if (code == SIGN_EXTEND || code == ZERO_EXTEND)
    1142              :                 {
    1143      1716370 :                   rtx inner = XEXP (src, 0);
    1144      1716370 :                   unsigned HOST_WIDE_INT src_mask
    1145      1716370 :                     = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
    1146              : 
    1147              :                   /* DST_MASK could be zero if we had something in the SET
    1148              :                      that we couldn't handle.  */
    1149      1716370 :                   if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
    1150              :                     {
    1151         8581 :                       ext_dce_try_optimize_extension (insn, x);
    1152              : 
    1153              :                       /* If the extension was optimized to a copy, propagate
    1154              :                          chain info through it: if the dest is consumed by a
    1155              :                          promotion candidate (seen later in reverse scan),
    1156              :                          the source register is transitively consumed too.  */
    1157         8581 :                       rtx opt_src = SET_SRC (x);
    1158         8581 :                       if (GET_CODE (opt_src) != SIGN_EXTEND
    1159         8581 :                           && GET_CODE (opt_src) != ZERO_EXTEND)
    1160              :                         {
    1161         5828 :                           rtx copy_dest = SET_DEST (x);
    1162         5828 :                           while (SUBREG_P (copy_dest)
    1163         5828 :                                  || GET_CODE (copy_dest) == ZERO_EXTRACT)
    1164            0 :                             copy_dest = XEXP (copy_dest, 0);
    1165              : 
    1166         5828 :                           rtx copy_src = opt_src;
    1167         5828 :                           if (SUBREG_P (copy_src))
    1168         5522 :                             copy_src = SUBREG_REG (copy_src);
    1169              : 
    1170         5828 :                           if (REG_P (copy_dest) && REG_P (copy_src))
    1171              :                             {
    1172         5828 :                               if (bitmap_bit_p (consumed_by_candidate,
    1173         5828 :                                                 REGNO (copy_dest)))
    1174            0 :                                 bitmap_set_bit (consumed_by_candidate,
    1175            0 :                                                 REGNO (copy_src));
    1176         5828 :                               if (bitmap_bit_p (promotable_dests,
    1177         5828 :                                                 REGNO (copy_src)))
    1178            0 :                                 bitmap_set_bit (promotable_dests,
    1179            0 :                                                 REGNO (copy_dest));
    1180         5828 :                               promotion_copies.safe_push (
    1181         5828 :                                 {REGNO (copy_dest), REGNO (copy_src)});
    1182              :                             }
    1183              :                         }
    1184              :                       else
    1185         2753 :                         ext_dce_record_promotion_candidate (insn, x);
    1186              :                     }
    1187              : 
    1188              :                   /* Stripping the extension here just seems wrong on multiple
    1189              :                      levels.  It's source side handling, so it seems like it
    1190              :                      belongs in the loop below.  Stripping here also makes it
    1191              :                      harder than necessary to properly handle live bit groups
    1192              :                      for (ANY_EXTEND (SUBREG)) where the SUBREG has
    1193              :                      SUBREG_PROMOTED state.  */
    1194      1716370 :                   dst_mask &= src_mask;
    1195      1716370 :                   src = XEXP (src, 0);
    1196      1716370 :                   code = GET_CODE (src);
    1197              :                 }
    1198              : 
    1199              :               /* Special case for (sub)targets that do not have extension
    1200              :                  insns (and thus use shifts).  We want to detect when we have
    1201              :                  a shift pair and treat the pair as-if was an extension.
    1202              : 
    1203              :                  Key on the right shift and use (for now) simplistic tests
    1204              :                  to find the corresponding left shift.  */
    1205     71483689 :               scalar_mode outer_mode;
    1206     71483689 :               if ((code == LSHIFTRT || code == ASHIFTRT)
    1207      1038030 :                   && CONST_INT_P (XEXP (src, 1))
    1208      1143340 :                   && (INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 8
    1209      1139126 :                       || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 16
    1210       978339 :                       || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 32)
    1211     71549543 :                   && is_a <scalar_mode> (GET_MODE (src), &outer_mode)
    1212     71549543 :                   && GET_MODE_BITSIZE (outer_mode) <= HOST_BITS_PER_WIDE_INT)
    1213              :                 {
    1214              :                   /* So we have a right shift that could correspond to
    1215              :                      the second in a pair impementing QI, HI or SI -> DI
    1216              :                      extension.  See if we can find the left shift.  For
    1217              :                      now, just look one real instruction back.  */
    1218        65710 :                   rtx_insn *prev_insn = prev_nonnote_nondebug_insn_bb (insn);
    1219              : 
    1220              :                   /* The previous insn must be a left shift by the same
    1221              :                      amount.  */
    1222        65710 :                   rtx prev_set;
    1223        65710 :                   if (prev_insn
    1224        62890 :                       && (prev_set = single_set (prev_insn))
    1225              :                       /* The destination of the left shift must be the
    1226              :                          source of the right shift.  */
    1227        62809 :                       && SET_DEST (prev_set) == XEXP (src, 0)
    1228        30907 :                       && GET_CODE (SET_SRC (prev_set)) == ASHIFT
    1229          811 :                       && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
    1230              :                       /* The counts must match.  */
    1231        65710 :                       && (INTVAL (XEXP (src, 1))
    1232          795 :                           == INTVAL (XEXP (SET_SRC (prev_set), 1))))
    1233              :                     {
    1234           15 :                       unsigned HOST_WIDE_INT src_mask = GET_MODE_BITSIZE (GET_MODE (src)).to_constant ();
    1235           15 :                       src_mask -= INTVAL (XEXP (src, 1));
    1236           15 :                       src_mask = (HOST_WIDE_INT_1U << src_mask) - 1;
    1237              : 
    1238              :                       /* DST_MASK has been adjusted for INSN.  We need its original value.  */
    1239           15 :                       unsigned HOST_WIDE_INT tmp_mask = 0;
    1240           75 :                       for (int i = 0; i < 4; i++)
    1241           60 :                         if (bitmap_bit_p (live_tmp, 4 * rn + i))
    1242           15 :                           tmp_mask |= mask_array[i];
    1243           15 :                       tmp_mask >>= bit;
    1244              : 
    1245           15 :                       if (modify && !skipped_dest && (tmp_mask & ~src_mask) == 0)
    1246              :                         {
    1247            0 :                           ext_dce_try_optimize_rshift (insn, x, XEXP (SET_SRC (prev_set), 0), prev_insn);
    1248              : 
    1249              :                           /* These may not strictly be necessary, but we might as well try and be
    1250              :                              as accurate as possible.  The RHS is now a simple REG.  */
    1251            0 :                           dst_mask = src_mask;
    1252            0 :                           src = XEXP (SET_SRC (prev_set), 0);
    1253            0 :                           code = GET_CODE (src);
    1254              :                         }
    1255              :                     }
    1256              :                 }
    1257              : 
    1258              :               /* Optimization is done at this point.  We just want to make
    1259              :                  sure everything that should get marked as live is marked
    1260              :                  from here onward.  */
    1261              : 
    1262              :               /* We will handle the other operand of a binary operator
    1263              :                  at the bottom of the loop by resetting Y.  */
    1264     71483689 :               if (BINARY_P (src))
    1265     22510355 :                 y = XEXP (src, 0);
    1266              :               else
    1267              :                 y = src;
    1268              : 
    1269              :               /* We're inside a SET and want to process the source operands
    1270              :                  making things live.  Breaking from this loop will cause
    1271              :                  the iterator to work on sub-rtxs, so it is safe to break
    1272              :                  if we see something we don't know how to handle.
    1273              : 
    1274              :                  This code is just hokey as it really just handles trivial
    1275              :                  unary and binary cases.  Otherwise the loop exits and we
    1276              :                  continue iterating on sub-rtxs, but outside the set context.  */
    1277              :               unsigned HOST_WIDE_INT save_mask = dst_mask;
    1278    115525199 :               for (;;)
    1279              :                 {
    1280              :                   /* In general we want to restore DST_MASK before each loop
    1281              :                      iteration.  The exception is when the opcode implies that
    1282              :                      the other operand is fully live.  That's handled by
    1283              :                      changing SAVE_MASK below.  */
    1284     93504444 :                   dst_mask = save_mask;
    1285              :                   /* Strip an outer paradoxical subreg.  The bits outside
    1286              :                      the inner mode are don't cares.  So we can just strip
    1287              :                      and process the inner object.  */
    1288     93504444 :                   if (paradoxical_subreg_p (y))
    1289              :                     y = XEXP (y, 0);
    1290     93402515 :                   else if (SUBREG_P (y) && subreg_lsb (y).is_constant (&bit))
    1291              :                     {
    1292              :                       /* If !TRULY_NOOP_TRUNCATION_MODES_P, the mode
    1293              :                          change performed by Y would normally need to be a
    1294              :                          TRUNCATE rather than a SUBREG.  It is probably the
    1295              :                          guarantee provided by SUBREG_PROMOTED_VAR_P that
    1296              :                          allows the SUBREG in Y as an exception.  We must
    1297              :                          therefore preserve that guarantee and treat the
    1298              :                          upper bits of the inner register as live
    1299              :                          regardless of the outer code.  See PR 120050.  */
    1300      1921278 :                       if (!REG_P (SUBREG_REG (y))
    1301      1921278 :                           || (SUBREG_PROMOTED_VAR_P (y)
    1302        13482 :                               && (!TRULY_NOOP_TRUNCATION_MODES_P (
    1303              :                                     GET_MODE (y),
    1304              :                                     GET_MODE (SUBREG_REG (y))))))
    1305              :                         break;
    1306              : 
    1307              :                       /* If this is a wide object (more bits than we can fit
    1308              :                          in a HOST_WIDE_INT), then just break from the SET
    1309              :                          context.   That will cause the iterator to walk down
    1310              :                          into the subrtx and if we land on a REG we'll mark
    1311              :                          the whole think live.  */
    1312      1920230 :                       if (bit >= HOST_BITS_PER_WIDE_INT)
    1313              :                         break;
    1314              : 
    1315              :                       /* The SUBREG's mode determines the live width.  */
    1316      1712694 :                       if (dst_mask)
    1317              :                         {
    1318      1712534 :                           dst_mask <<= bit;
    1319      1712534 :                           if (!dst_mask)
    1320            0 :                             dst_mask = -HOST_WIDE_INT_UC (0x100000000);
    1321              :                         }
    1322      1712694 :                       y = SUBREG_REG (y);
    1323              :                     }
    1324              : 
    1325     93295860 :                   if (REG_P (y))
    1326              :                     {
    1327              :                       /* We have found the use of a register.  We need to mark
    1328              :                          the appropriate chunks of the register live.  The mode
    1329              :                          of the REG is a starting point.  We may refine that
    1330              :                          based on what chunks in the output were live.  */
    1331     49651359 :                       rn = 4 * REGNO (y);
    1332     49651359 :                       unsigned HOST_WIDE_INT tmp_mask = dst_mask;
    1333              : 
    1334              :                       /* If the RTX code for the SET_SRC is not one we can
    1335              :                          propagate destination liveness through, then just
    1336              :                          set the mask to the mode's mask.  */
    1337     49651359 :                       if (!safe_for_live_propagation (code))
    1338        33025 :                         tmp_mask
    1339        66050 :                           = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
    1340              : 
    1341     49651359 :                       if (tmp_mask & 0xff)
    1342     49164345 :                         bitmap_set_bit (livenow, rn);
    1343     49651359 :                       if (tmp_mask & 0xff00)
    1344     47269601 :                         bitmap_set_bit (livenow, rn + 1);
    1345     49651359 :                       if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
    1346     46971184 :                         bitmap_set_bit (livenow, rn + 2);
    1347     49651359 :                       if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
    1348     40424673 :                         bitmap_set_bit (livenow, rn + 3);
    1349              :                     }
    1350     43644501 :                   else if (!CONSTANT_P (y))
    1351              :                     break;
    1352              : 
    1353              :                   /* We might have (ashift (const_int 1) (reg...))
    1354              :                      By setting dst_mask we can continue iterating on the
    1355              :                      the next operand and it will be considered fully live.
    1356              : 
    1357              :                      Note that since we restore DST_MASK from SAVE_MASK at the
    1358              :                      top of the loop, we have to change SAVE_MASK to get the
    1359              :                      semantics we want.  */
    1360     76637193 :                   if (binop_implies_op2_fully_live (GET_CODE (src)))
    1361      2463738 :                     save_mask = -1;
    1362              : 
    1363              :                   /* If this was anything but a binary operand, break the inner
    1364              :                      loop.  This is conservatively correct as it will cause the
    1365              :                      iterator to look at the sub-rtxs outside the SET context.  */
    1366     76637193 :                   if (!BINARY_P (src))
    1367              :                     break;
    1368              : 
    1369              :                   /* We processed the first operand of a binary operator.  Now
    1370              :                      handle the second.  */
    1371     22020755 :                   y = XEXP (src, 1), src = pc_rtx;
    1372     22020755 :                 }
    1373              : 
    1374              :               /* These are leaf nodes, no need to iterate down into them.  */
    1375     71483689 :               if (REG_P (y) || CONSTANT_P (y))
    1376     54616438 :                 iter.skip_subrtxes ();
    1377              :             }
    1378              :         }
    1379              :       /* If we are reading the low part of a SUBREG, then we can
    1380              :          refine liveness of the input register, otherwise let the
    1381              :          iterator continue into SUBREG_REG.  */
    1382    487939570 :       else if (SUBREG_P (x)
    1383      1315066 :                && REG_P (SUBREG_REG (x))
    1384      1313202 :                && !paradoxical_subreg_p (x)
    1385      1289393 :                && subreg_lowpart_p (x)
    1386      1011775 :                && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
    1387    489963120 :                && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
    1388              :         {
    1389       498907 :           HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
    1390       498907 :           HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
    1391              : 
    1392              :           /* If this is a promoted subreg, then more of it may be live than
    1393              :              is otherwise obvious.  */
    1394       498907 :           if (SUBREG_PROMOTED_VAR_P (x))
    1395         4432 :             size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
    1396              : 
    1397       498907 :           bitmap_set_bit (livenow, rn);
    1398       498907 :           if (size > 8)
    1399       324850 :             bitmap_set_bit (livenow, rn + 1);
    1400       324850 :           if (size > 16)
    1401       283673 :             bitmap_set_bit (livenow, rn + 2);
    1402       283673 :           if (size > 32)
    1403           74 :             bitmap_set_bit (livenow, rn + 3);
    1404       498907 :           iter.skip_subrtxes ();
    1405              :         }
    1406              :       /* If we have a register reference that is not otherwise handled,
    1407              :          just assume all the chunks are live.  */
    1408    487440663 :       else if (REG_P (x))
    1409    161838892 :         bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
    1410              :     }
    1411    137778816 : }
    1412              : 
    1413              : /* Process a single basic block BB with current liveness information
    1414              :    in LIVENOW, returning updated liveness information.
    1415              : 
    1416              :    If MODIFY is true, then this is the last pass and unnecessary
    1417              :    extensions should be eliminated when possible.  If an extension
    1418              :    is removed, the source pseudo is marked in CHANGED_PSEUDOS.  */
    1419              : 
    1420              : static void
    1421     22861547 : ext_dce_process_bb (basic_block bb)
    1422              : {
    1423     22861547 :   rtx_insn *insn;
    1424              : 
    1425    303895741 :   FOR_BB_INSNS_REVERSE (bb, insn)
    1426              :     {
    1427    281034194 :       if (!NONDEBUG_INSN_P (insn))
    1428    152780335 :         continue;
    1429              : 
    1430              :       /* Live-out state of the destination of this insn.  We can
    1431              :          use this to refine the live-in state of the sources of
    1432              :          this insn in many cases.  */
    1433    128253859 :       bitmap live_tmp = BITMAP_ALLOC (NULL);
    1434              : 
    1435              :       /* First process any sets/clobbers in INSN.  */
    1436    128253859 :       bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
    1437              : 
    1438              :       /* CALL_INSNs need processing their fusage data.  */
    1439    128253859 :       if (CALL_P (insn))
    1440      9524957 :         skipped_dest |= ext_dce_process_sets (insn,
    1441              :                                               CALL_INSN_FUNCTION_USAGE (insn),
    1442              :                                               live_tmp);
    1443              : 
    1444              :       /* And now uses, optimizing away SIGN/ZERO extensions as we go.  */
    1445    128253859 :       ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
    1446              : 
    1447              :       /* A nonlocal goto implicitly uses the frame pointer.  */
    1448    128253859 :       if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
    1449              :         {
    1450         1130 :           bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
    1451         1130 :           if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
    1452         1130 :             bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
    1453              :         }
    1454              : 
    1455              :       /* And process fusage data for the use as well.  */
    1456    128253859 :       if (CALL_P (insn))
    1457              :         {
    1458      9524957 :           if (!FAKE_CALL_P (insn))
    1459      9524897 :             bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
    1460              : 
    1461              :           /* If this is not a call to a const fucntion, then assume it
    1462              :              can read any global register.  */
    1463      9524957 :           if (!RTL_CONST_CALL_P (insn))
    1464    855648639 :             for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
    1465    846448116 :               if (global_regs[i])
    1466          230 :                 bitmap_set_range (livenow, i * 4, 4);
    1467              : 
    1468      9524957 :           ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
    1469              :         }
    1470              : 
    1471    128253859 :       BITMAP_FREE (live_tmp);
    1472              :     }
    1473              : 
    1474     22861547 :   if (modify)
    1475      9758674 :     ext_dce_promote_chained_candidates ();
    1476     22861547 : }
    1477              : 
    1478              : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
    1479              :    is usually helpful.  However, in some cases setting the value when
    1480              :    it not strictly needed can cause this pass to miss optimizations.
    1481              : 
    1482              :    Specifically consider (set (mem) (subreg (reg))).  If set in that
    1483              :    case it will cause more bit groups to be live for REG than would
    1484              :    be strictly necessary which in turn can inhibit extension removal.
    1485              : 
    1486              :    So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
    1487              :    is obviously not needed.  */
    1488              : 
    1489              : static void
    1490       966050 : maybe_clear_subreg_promoted_p (void)
    1491              : {
    1492    120258508 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1493              :     {
    1494    119292458 :       if (!NONDEBUG_INSN_P (insn))
    1495     64457056 :         continue;
    1496              : 
    1497     54835402 :       rtx set = single_set (insn);
    1498     54835402 :       if (!set)
    1499      3682349 :         continue;
    1500              : 
    1501              :       /* There may be other cases where we should clear, but for
    1502              :          now, this is the only known case where it causes problems.  */
    1503     51153053 :       if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
    1504        71105 :         && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
    1505        61291 :         SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
    1506              :     }
    1507       966050 : }
    1508              : 
    1509              : /* Walk the IL and build the transitive closure of all the REGs tied
    1510              :    together by copies where either the source or destination is
    1511              :    marked in CHANGED_PSEUDOS.  */
    1512              : 
    1513              : static void
    1514       966050 : expand_changed_pseudos (void)
    1515              : {
    1516              :   /* Build a vector of registers related by a copy.  This is meant to
    1517              :      speed up the next step by avoiding full IL walks.  */
    1518       966050 :   struct copy_pair { rtx first; rtx second; };
    1519       966050 :   auto_vec<copy_pair> pairs;
    1520    120258508 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1521              :     {
    1522    119292458 :       if (!NONDEBUG_INSN_P (insn))
    1523     64457056 :         continue;
    1524              : 
    1525     54835402 :       rtx pat = PATTERN (insn);
    1526              : 
    1527              :       /* Simple copies to a REG from another REG or SUBREG of a REG.  */
    1528     54835402 :       if (GET_CODE (pat) == SET
    1529     43225743 :           && REG_P (SET_DEST (pat))
    1530     30738844 :           && (REG_P (SET_SRC (pat))
    1531     22265183 :               || (SUBREG_P (SET_SRC (pat))
    1532       375749 :                   && REG_P (SUBREG_REG (SET_SRC (pat))))))
    1533              :         {
    1534       375295 :           rtx src = (REG_P (SET_SRC (pat))
    1535      8848956 :                      ? SET_SRC (pat)
    1536              :                      : SUBREG_REG (SET_SRC (pat)));
    1537      8848956 :           pairs.safe_push ({ SET_DEST (pat), src });
    1538              :         }
    1539              : 
    1540              :       /* Simple copies to a REG from another REG or SUBREG of a REG
    1541              :          held inside a PARALLEL.  */
    1542     54835402 :       if (GET_CODE (pat) == PARALLEL)
    1543              :         {
    1544     25002867 :           for (int i = XVECLEN (pat, 0) - 1; i >= 0; i--)
    1545              :             {
    1546     16785137 :               rtx elem = XVECEXP (pat, 0, i);
    1547              : 
    1548     16785137 :               if (GET_CODE (elem) == SET
    1549      8437929 :                   && REG_P (SET_DEST (elem))
    1550      8287962 :                   && (REG_P (SET_SRC (elem))
    1551      8287962 :                       || (SUBREG_P (SET_SRC (elem))
    1552            0 :                           && REG_P (SUBREG_REG (SET_SRC (elem))))))
    1553              :                 {
    1554            0 :                   rtx src = (REG_P (SET_SRC (elem))
    1555            0 :                              ? SET_SRC (elem)
    1556              :                              : SUBREG_REG (SET_SRC (elem)));
    1557            0 :                   pairs.safe_push ({ SET_DEST (elem), src });
    1558              :                 }
    1559              :             }
    1560      8217730 :           continue;
    1561      8217730 :         }
    1562              :     }
    1563              : 
    1564              :   /* Now we have a vector with copy pairs.  Iterate over that list
    1565              :      updating CHANGED_PSEUDOS as we go.  Eliminate copies from the
    1566              :      list as we go as they don't need further processing.  */
    1567              :   bool changed = true;
    1568      1932200 :   while (changed)
    1569              :     {
    1570              :       changed = false;
    1571              :       unsigned int i;
    1572              :       copy_pair *p;
    1573     10782586 :       FOR_EACH_VEC_ELT (pairs, i, p)
    1574              :         {
    1575      8850386 :           if (bitmap_bit_p (changed_pseudos, REGNO (p->second))
    1576      8850386 :               && bitmap_set_bit (changed_pseudos, REGNO (p->first)))
    1577              :             {
    1578          116 :               pairs.unordered_remove (i);
    1579          116 :               changed = true;
    1580              :             }
    1581              :         }
    1582              :     }
    1583       966050 : }
    1584              : 
    1585              : /* We optimize away sign/zero extensions in this pass and replace
    1586              :    them with SUBREGs indicating certain bits are don't cares.
    1587              : 
    1588              :    This changes the SUBREG_PROMOTED_VAR_P state of the object.
    1589              :    It is fairly painful to fix this on the fly, so we have
    1590              :    recorded which pseudos are affected and we look for SUBREGs
    1591              :    of those pseudos and fix them up.  */
    1592              : 
    1593              : static void
    1594       966050 : reset_subreg_promoted_p (void)
    1595              : {
    1596              :   /* This pass eliminates zero/sign extensions on pseudo regs found
    1597              :      in CHANGED_PSEUDOS.  Elimination of those extensions changes if
    1598              :      the pseudos are known to hold values extended to wider modes
    1599              :      via SUBREG_PROMOTED_VAR.  So we wipe the SUBREG_PROMOTED_VAR
    1600              :      state on all affected pseudos.
    1601              : 
    1602              :      But that is insufficient.  We might have a copy from one REG
    1603              :      to another (possibly with the source register wrapped with a
    1604              :      SUBREG).  We need to wipe SUBREG_PROMOTED_VAR on the transitive
    1605              :      closure of the original CHANGED_PSEUDOS and registers they're
    1606              :      connected to via copies.  So expand the set.  */
    1607       966050 :   expand_changed_pseudos ();
    1608              :     
    1609              :   /* If we removed an extension, that changed the promoted state
    1610              :      of the destination of that extension.  Thus we need to go
    1611              :      find any SUBREGs that reference that pseudo and adjust their
    1612              :      SUBREG_PROMOTED_P state.  */
    1613    120258508 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1614              :     {
    1615    119292458 :       if (!NONDEBUG_INSN_P (insn))
    1616     64457056 :         continue;
    1617              : 
    1618     54835402 :       rtx pat = PATTERN (insn);
    1619     54835402 :       subrtx_var_iterator::array_type array;
    1620    349998515 :       FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
    1621              :         {
    1622    295163113 :           rtx sub = *iter;
    1623              : 
    1624              :           /* We only care about SUBREGs.  */
    1625    295163113 :           if (GET_CODE (sub) != SUBREG)
    1626    293626509 :             continue;
    1627              : 
    1628      1536604 :           const_rtx x = SUBREG_REG (sub);
    1629              : 
    1630              :           /* We only care if the inner object is a REG.  */
    1631      1536604 :           if (!REG_P (x))
    1632          787 :             continue;
    1633              : 
    1634              :           /* And only if the SUBREG is a promoted var.  */
    1635      1535817 :           if (!SUBREG_PROMOTED_VAR_P (sub))
    1636      1530431 :             continue;
    1637              : 
    1638         5386 :           if (bitmap_bit_p (changed_pseudos, REGNO (x)))
    1639            0 :             SUBREG_PROMOTED_VAR_P (sub) = 0;
    1640              :         }
    1641     54835402 :     }
    1642       966050 : }
    1643              : 
    1644              : /* Initialization of the ext-dce pass.  Primarily this means
    1645              :    setting up the various bitmaps we utilize.  */
    1646              : 
    1647              : static void
    1648       966050 : ext_dce_init (void)
    1649              : {
    1650       966050 :   livein.create (last_basic_block_for_fn (cfun));
    1651       966050 :   livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
    1652     12665360 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1653     11699310 :     bitmap_initialize (&livein[i], &bitmap_default_obstack);
    1654              : 
    1655       966050 :   auto_bitmap refs (&bitmap_default_obstack);
    1656       966050 :   df_get_exit_block_use_set (refs);
    1657              : 
    1658       966050 :   unsigned i;
    1659       966050 :   bitmap_iterator bi;
    1660      4444653 :   EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
    1661      3478603 :     make_reg_live (&livein[EXIT_BLOCK], i);
    1662              : 
    1663       966050 :   livenow = BITMAP_ALLOC (NULL);
    1664       966050 :   all_blocks = BITMAP_ALLOC (NULL);
    1665       966050 :   changed_pseudos = BITMAP_ALLOC (NULL);
    1666       966050 :   promotable_dests = BITMAP_ALLOC (NULL);
    1667       966050 :   consumed_by_candidate = BITMAP_ALLOC (NULL);
    1668              : 
    1669     12665360 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1670     11699310 :     if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
    1671      9767210 :       bitmap_set_bit (all_blocks, i);
    1672              : 
    1673       966050 :   modify = false;
    1674       966050 : }
    1675              : 
    1676              : /* Finalization of the ext-dce pass.  Primarily this means
    1677              :    releasing up the various bitmaps we utilize.  */
    1678              : 
    1679              : static void
    1680       966050 : ext_dce_finish (void)
    1681              : {
    1682     12665360 :   for (unsigned i = 0; i < livein.length (); i++)
    1683     11699310 :     bitmap_clear (&livein[i]);
    1684       966050 :   livein.release ();
    1685              : 
    1686       966050 :   BITMAP_FREE (livenow);
    1687       966050 :   BITMAP_FREE (changed_pseudos);
    1688       966050 :   BITMAP_FREE (all_blocks);
    1689       966050 :   BITMAP_FREE (promotable_dests);
    1690       966050 :   BITMAP_FREE (consumed_by_candidate);
    1691       966050 :   promotion_candidates.release ();
    1692       966050 :   promotion_copies.release ();
    1693       966050 : }
    1694              : 
    1695              : /* Process block number BB_INDEX as part of the backward
    1696              :    simple dataflow analysis.  Return TRUE if something in
    1697              :    this block changed or FALSE otherwise.  */
    1698              : 
    1699              : static bool
    1700     26725747 : ext_dce_rd_transfer_n (int bb_index)
    1701              : {
    1702              :   /* The ENTRY/EXIT blocks never change.  */
    1703     26725747 :   if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
    1704              :     return false;
    1705              : 
    1706     22861547 :   basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
    1707              : 
    1708              :   /* Make everything live that's live in the successors.  */
    1709     22861547 :   bitmap_clear (livenow);
    1710     22861547 :   edge_iterator ei;
    1711     22861547 :   edge e;
    1712              : 
    1713     57499331 :   FOR_EACH_EDGE (e, ei, bb->succs)
    1714     34637784 :     bitmap_ior_into (livenow, &livein[e->dest->index]);
    1715              : 
    1716     22861547 :   ext_dce_process_bb (bb);
    1717              : 
    1718              :   /* We only allow widening the set of objects live at the start
    1719              :      of a block.  Otherwise we run the risk of not converging.  */
    1720     22861547 :   return bitmap_ior_into (&livein[bb_index], livenow);
    1721              : }
    1722              : 
    1723              : /* Dummy function for the df_simple_dataflow API.  */
    1724     33280793 : static bool ext_dce_rd_confluence_n (edge) { return true; }
    1725              : 
    1726              : /* Use lifetime analyis to identify extensions that set bits that
    1727              :    are never read.  Turn such extensions into SUBREGs instead which
    1728              :    can often be propagated away.  */
    1729              : 
    1730              : void
    1731       966050 : ext_dce_execute (void)
    1732              : {
    1733              :   /* Limit the amount of memory we use for livein, with 4 bits per
    1734              :      reg per basic-block including overhead that maps to one byte
    1735              :      per reg per basic-block.  */
    1736       966050 :   uint64_t memory_request
    1737       966050 :     = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
    1738       966050 :   if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
    1739              :     {
    1740            0 :       warning (OPT_Wdisabled_optimization,
    1741              :                "ext-dce disabled: %d basic blocks and %d registers; "
    1742              :                "increase %<--param max-gcse-memory%> above %wu",
    1743            0 :                n_basic_blocks_for_fn (cfun), max_reg_num (),
    1744              :                memory_request / 1024);
    1745            0 :       return;
    1746              :     }
    1747              : 
    1748              :   /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
    1749              :      to this pass.  Clear it for those cases.  */
    1750       966050 :   maybe_clear_subreg_promoted_p ();
    1751       966050 :   df_analyze ();
    1752       966050 :   ext_dce_init ();
    1753              : 
    1754      3864200 :   do
    1755              :     {
    1756      1932100 :       df_simple_dataflow (DF_BACKWARD, NULL, NULL,
    1757              :                           ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
    1758              :                           all_blocks, df_get_postorder (DF_BACKWARD),
    1759              :                           df_get_n_blocks (DF_BACKWARD));
    1760      1932100 :       modify = !modify;
    1761              :     }
    1762              :   while (modify);
    1763              : 
    1764       966050 :   reset_subreg_promoted_p ();
    1765              : 
    1766       966050 :   ext_dce_finish ();
    1767              : }
    1768              : 
    1769              : 
    1770              : namespace {
    1771              : 
    1772              : const pass_data pass_data_ext_dce =
    1773              : {
    1774              :   RTL_PASS, /* type */
    1775              :   "ext_dce", /* name */
    1776              :   OPTGROUP_NONE, /* optinfo_flags */
    1777              :   TV_EXT_DCE, /* tv_id */
    1778              :   PROP_cfglayout, /* properties_required */
    1779              :   0, /* properties_provided */
    1780              :   0, /* properties_destroyed */
    1781              :   0, /* todo_flags_start */
    1782              :   TODO_df_finish, /* todo_flags_finish */
    1783              : };
    1784              : 
    1785              : class pass_ext_dce : public rtl_opt_pass
    1786              : {
    1787              : public:
    1788       288767 :   pass_ext_dce (gcc::context *ctxt)
    1789       577534 :     : rtl_opt_pass (pass_data_ext_dce, ctxt)
    1790              :   {}
    1791              : 
    1792              :   /* opt_pass methods: */
    1793      1481491 :   virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
    1794       966050 :   virtual unsigned int execute (function *)
    1795              :     {
    1796       966050 :       ext_dce_execute ();
    1797       966050 :       return 0;
    1798              :     }
    1799              : 
    1800              : }; // class pass_combine
    1801              : 
    1802              : } // anon namespace
    1803              : 
    1804              : rtl_opt_pass *
    1805       288767 : make_pass_ext_dce (gcc::context *ctxt)
    1806              : {
    1807       288767 :   return new pass_ext_dce (ctxt);
    1808              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.