LCOV - code coverage report
Current view: top level - gcc - ext-dce.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 66.3 % 683 453
Test Date: 2026-05-11 19:44:49 Functions: 91.7 % 24 22
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* RTL dead zero/sign extension (code) elimination.
       2              :    Copyright (C) 2000-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it under
       7              : the terms of the GNU General Public License as published by the Free
       8              : Software Foundation; either version 3, or (at your option) any later
       9              : version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "backend.h"
      24              : #include "rtl.h"
      25              : #include "tree.h"
      26              : #include "memmodel.h"
      27              : #include "insn-config.h"
      28              : #include "emit-rtl.h"
      29              : #include "expr.h"
      30              : #include "recog.h"
      31              : #include "cfganal.h"
      32              : #include "tree-pass.h"
      33              : #include "cfgrtl.h"
      34              : #include "rtl-iter.h"
      35              : #include "df.h"
      36              : #include "print-rtl.h"
      37              : #include "dbgcnt.h"
      38              : #include "diagnostic-core.h"
      39              : #include "target.h"
      40              : 
      41              : /* These should probably move into a C++ class.  */
      42              : static vec<bitmap_head> livein;
      43              : static bitmap all_blocks;
      44              : static bitmap livenow;
      45              : static bitmap changed_pseudos;
      46              : static bool modify;
      47              : 
      48              : /* Chain detection for promotion: we defer promotions and only apply them
      49              :    when they form chains (one candidate's result feeds another's operand).
      50              :    Standalone promotions are skipped as they cause regressions on targets
      51              :    with free sign extension (e.g., RISC-V W-suffix instructions).  */
      52              : struct promotion_candidate_info {
      53              :   rtx_insn *insn;
      54              :   rtx set;
      55              : };
      56              : 
      57              : static vec<promotion_candidate_info> promotion_candidates;
      58              : static bitmap promotable_dests;
      59              : static bitmap consumed_by_candidate;
      60              : 
      61              : /* Copy pairs seen during the reverse scan (from optimized extensions).
      62              :    Used to propagate chain info transitively.  */
      63              : struct copy_info {
      64              :   unsigned int dest_regno;
      65              :   unsigned int src_regno;
      66              : };
      67              : static vec<copy_info> promotion_copies;
      68              : 
      69              : /* We consider four bit groups for liveness:
      70              :    bit 0..7   (least significant byte)
      71              :    bit 8..15  (second least significant byte)
      72              :    bit 16..31
      73              :    bit 32..BITS_PER_WORD-1  */
      74              : 
      75              : /* For the given REG, return the number of bit groups implied by the
      76              :    size of the REG's mode, up to a maximum of 4 (number of bit groups
      77              :    tracked by this pass).
      78              : 
      79              :    For partial integer and variable sized modes also return 4.  This
      80              :    could possibly be refined for something like PSI mode, but it
      81              :    does not seem worth the effort.  */
      82              : 
      83              : static int
      84    234236263 : group_limit (const_rtx reg)
      85              : {
      86    234236263 :   machine_mode mode = GET_MODE (reg);
      87              : 
      88    234236263 :   if (!GET_MODE_BITSIZE (mode).is_constant ())
      89              :     return 4;
      90              : 
      91    234236263 :   int size = GET_MODE_SIZE (mode).to_constant ();
      92              : 
      93    234236263 :   size = exact_log2 (size);
      94              : 
      95    234142387 :   if (size < 0)
      96              :     return 4;
      97              : 
      98    234142387 :   size++;
      99    234142387 :   return (size > 4 ? 4 : size);
     100              : }
     101              : 
     102              : /* Make all bit groups live for REGNO in bitmap BMAP.  For hard regs,
     103              :    we assume all groups are live.  For a pseudo we consider the size
     104              :    of the pseudo to avoid creating unnecessarily live chunks of data.  */
     105              : 
     106              : static void
     107      4688783 : make_reg_live (bitmap bmap, int regno)
     108              : {
     109      4688783 :   int limit;
     110              : 
     111              :   /* For pseudos we can use the mode to limit how many bit groups
     112              :      are marked as live since a pseudo only has one mode.  Hard
     113              :      registers have to be handled more conservatively.  */
     114      4688783 :   if (regno > FIRST_PSEUDO_REGISTER)
     115              :     {
     116       881705 :       rtx reg = regno_reg_rtx[regno];
     117       881705 :       limit = group_limit (reg);
     118              :     }
     119              :   else
     120              :     limit = 4;
     121              : 
     122     23114505 :   for (int i = 0; i < limit; i++)
     123     18425722 :     bitmap_set_bit (bmap, regno * 4 + i);
     124      4688783 : }
     125              : 
     126              : /* Note this pass could be used to narrow memory loads too.  It's
     127              :    not clear if that's profitable or not in general.  */
     128              : 
     129              : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
     130              : 
     131              : /* If we know the destination of CODE only uses some low bits
     132              :    (say just the QI bits of an SI operation), then return true
     133              :    if we can propagate the need for just the subset of bits
     134              :    from the destination to the sources.
     135              : 
     136              :    FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
     137              :    operand 0.  Thus is likely would need some special casing to handle.  */
     138              : 
     139              : static bool
     140    141254345 : safe_for_live_propagation (rtx_code code)
     141              : {
     142              :   /* First handle rtx classes which as a whole are known to
     143              :      be either safe or unsafe.  */
     144    141254345 :   switch (GET_RTX_CLASS (code))
     145              :     {
     146              :       case RTX_OBJ:
     147              :       case RTX_CONST_OBJ:
     148              :         return true;
     149              : 
     150              :       case RTX_COMPARE:
     151              :       case RTX_COMM_COMPARE:
     152              :       case RTX_TERNARY:
     153              :         return false;
     154              : 
     155     73379139 :       default:
     156     73379139 :         break;
     157              :     }
     158              : 
     159              :   /* What's left are specific codes.  We only need to identify those
     160              :      which are safe.   */
     161     73379139 :   switch (code)
     162              :     {
     163              :     /* These are trivially safe.  */
     164              :     case SUBREG:
     165              :     case NOT:
     166              :     case ZERO_EXTEND:
     167              :     case SIGN_EXTEND:
     168              :     case TRUNCATE:
     169              :     case PLUS:
     170              :     case MINUS:
     171              :     case MULT:
     172              :     case SMUL_HIGHPART:
     173              :     case UMUL_HIGHPART:
     174              :     case AND:
     175              :     case IOR:
     176              :     case XOR:
     177              :       return true;
     178              : 
     179              :     /* We can propagate for the shifted operand, but not the shift
     180              :        count.  The count is handled specially.  */
     181              :     case ASHIFT:
     182              :     case LSHIFTRT:
     183              :     case ASHIFTRT:
     184              :     case SS_ASHIFT:
     185              :     case US_ASHIFT:
     186              :       return true;
     187              : 
     188              :     /* There may be other safe codes.  If so they can be added
     189              :        individually when discovered.  */
     190              :     default:
     191              :       return false;
     192              :     }
     193              : }
     194              : 
     195              : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
     196              :    set/clobbered by OBJ contained in INSN.
     197              : 
     198              :    Conceptually it is always safe to ignore a particular destination
     199              :    here as that will result in more chunks of data being considered
     200              :    live.  That's what happens when we "continue" the main loop when
     201              :    we see something we don't know how to handle such as a vector
     202              :    mode destination.
     203              : 
     204              :    The more accurate we are in identifying what objects (and chunks
     205              :    within an object) are set by INSN, the more aggressive the
     206              :    optimization phase during use handling will be.  */
     207              : 
     208              : static bool
     209    137109191 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
     210              : {
     211    137109191 :   bool skipped_dest = false;
     212              : 
     213    137109191 :   subrtx_iterator::array_type array;
     214    394111964 :   FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
     215              :     {
     216    257002773 :       const_rtx x = *iter;
     217              : 
     218              :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
     219    257002773 :       if (x == NULL_RTX)
     220      9452414 :         continue;
     221              : 
     222    247550359 :       if (UNSPEC_P (x))
     223       572017 :         continue;
     224              : 
     225    246978342 :       if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
     226              :         {
     227    142734977 :           unsigned bit = 0;
     228    142734977 :           x = SET_DEST (x);
     229              : 
     230              :           /* We don't support vector destinations or destinations
     231              :              wider than DImode.  */
     232    142734977 :           scalar_mode outer_mode;
     233    146608655 :           if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
     234     90227606 :               || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     235              :             {
     236              :               /* Skip the subrtxs of this destination.  There is
     237              :                  little value in iterating into the subobjects, so
     238              :                  just skip them for a bit of efficiency.  */
     239     56381049 :               skipped_dest = true;
     240     56381049 :               iter.skip_subrtxes ();
     241    313383822 :               continue;
     242              :             }
     243              : 
     244              :           /* We could have (strict_low_part (subreg ...)).  We can not just
     245              :              strip the STRICT_LOW_PART as that would result in clearing
     246              :              some bits in LIVENOW that are still live.  So process the
     247              :              STRICT_LOW_PART specially.  */
     248     86353928 :           if (GET_CODE (x) == STRICT_LOW_PART)
     249              :             {
     250            0 :               x = XEXP (x, 0);
     251              : 
     252              :               /* The only valid operand of a STRICT_LOW_PART is a non
     253              :                  paradoxical SUBREG.  */
     254            0 :               gcc_assert (SUBREG_P (x)
     255              :                           && !paradoxical_subreg_p (x)
     256              :                           && SUBREG_BYTE (x).is_constant ());
     257              : 
     258              :               /* I think we should always see a REG here.  But let's
     259              :                  be sure.  */
     260            0 :               gcc_assert (REG_P (SUBREG_REG (x)));
     261              : 
     262              :               /* The inner mode might be larger, just punt for
     263              :                  that case.  Remember, we can not just continue to process
     264              :                  the inner RTXs due to the STRICT_LOW_PART.  */
     265            0 :               if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     266            0 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     267              :                 {
     268              :                   /* Skip the subrtxs of the STRICT_LOW_PART.  We can't
     269              :                      process them because it'll set objects as no longer
     270              :                      live when they are in fact still live.  */
     271            0 :                   skipped_dest = true;
     272            0 :                   iter.skip_subrtxes ();
     273            0 :                   continue;
     274              :                 }
     275              : 
     276              :               /* LIVE_TMP contains the set groups that are live-out and set in
     277              :                  this insn.  It is used to narrow the groups live-in for the
     278              :                  inputs of this insn.
     279              : 
     280              :                  The simple thing to do is mark all the groups as live, but
     281              :                  that will significantly inhibit optimization.
     282              : 
     283              :                  We also need to be careful in the case where we have an in-out
     284              :                  operand.  If we're not careful we'd clear LIVE_TMP
     285              :                  incorrectly.  */
     286            0 :               HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
     287            0 :               int limit = group_limit (SUBREG_REG (x));
     288            0 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     289            0 :                 if (bitmap_bit_p (livenow, i))
     290            0 :                   bitmap_set_bit (live_tmp, i);
     291              : 
     292            0 :               if (bitmap_empty_p (live_tmp))
     293            0 :                 make_reg_live (live_tmp, rn);
     294              : 
     295              :               /* The mode of the SUBREG tells us how many bits we can
     296              :                  clear.  */
     297            0 :               machine_mode mode = GET_MODE (x);
     298            0 :               HOST_WIDE_INT size
     299            0 :                 = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
     300            0 :               bitmap_clear_range (livenow, 4 * rn, size);
     301              : 
     302              :               /* We have fully processed this destination.  */
     303            0 :               iter.skip_subrtxes ();
     304            0 :               continue;
     305            0 :             }
     306              : 
     307              :           /* Phase one of destination handling.  First remove any wrapper
     308              :              such as SUBREG or ZERO_EXTRACT.  */
     309     86353928 :           unsigned HOST_WIDE_INT mask
     310     86353928 :             = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
     311     86353928 :           if (SUBREG_P (x))
     312              :             {
     313              :               /* If we have a SUBREG destination that is too wide, just
     314              :                  skip the destination rather than continuing this iterator.
     315              :                  While continuing would be better, we'd need to strip the
     316              :                  subreg and restart within the SET processing rather than
     317              :                  the top of the loop which just complicates the flow even
     318              :                  more.  */
     319       654680 :               if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     320       543453 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     321              :                 {
     322       111227 :                   skipped_dest = true;
     323       111227 :                   iter.skip_subrtxes ();
     324       111227 :                   continue;
     325              :                 }
     326              : 
     327              :               /* We can safely strip a paradoxical subreg.  The inner mode will
     328              :                  be narrower than the outer mode.  We'll clear fewer bits in
     329              :                  LIVENOW than we'd like, but that's always safe.  */
     330       432714 :               if (paradoxical_subreg_p (x))
     331              :                 x = XEXP (x, 0);
     332       422878 :               else if (SUBREG_BYTE (x).is_constant ())
     333              :                 {
     334       422878 :                   bit = subreg_lsb (x).to_constant ();
     335       422878 :                   mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
     336       422878 :                   gcc_assert (mask);
     337              :                   x = SUBREG_REG (x);
     338              :                 }
     339              :               else
     340              :                 gcc_unreachable ();
     341              :             }
     342              : 
     343     86242701 :           if (GET_CODE (x) == ZERO_EXTRACT)
     344              :             {
     345              :               /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
     346              :                  modifies the bits referenced in the ZERO_EXTRACT, the rest
     347              :                  remain the same.  Thus we can not continue here, we must
     348              :                  either figure out what part of the destination is modified
     349              :                  or skip the sub-rtxs.  */
     350         3542 :               skipped_dest = true;
     351         3542 :               iter.skip_subrtxes ();
     352         3542 :               continue;
     353              :             }
     354              : 
     355              :           /* BIT >= 64 indicates something went horribly wrong.  */
     356     86239159 :           gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
     357              : 
     358              :           /* Now handle the actual object that was changed.  */
     359     86239159 :           if (REG_P (x))
     360              :             {
     361              :               /* LIVE_TMP contains the set groups that are live-out and set in
     362              :                  this insn.  It is used to narrow the groups live-in for the
     363              :                  inputs of this insn.
     364              : 
     365              :                  The simple thing to do is mark all the groups as live, but
     366              :                  that will significantly inhibit optimization.
     367              : 
     368              :                  We also need to be careful in the case where we have an in-out
     369              :                  operand.  If we're not careful we'd clear LIVE_TMP
     370              :                  incorrectly.  */
     371     72303469 :               HOST_WIDE_INT rn = REGNO (x);
     372     72303469 :               int limit = group_limit (x);
     373    323351437 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     374    251047968 :                 if (bitmap_bit_p (livenow, i))
     375    243925589 :                   bitmap_set_bit (live_tmp, i);
     376              : 
     377     72303469 :               if (bitmap_empty_p (live_tmp))
     378      1226104 :                 make_reg_live (live_tmp, rn);
     379              : 
     380              :               /* Now clear the bits known written by this instruction.
     381              :                  Note that BIT need not be a power of two, consider a
     382              :                  ZERO_EXTRACT destination.  */
     383     72303469 :               int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
     384     77018393 :               int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
     385     27921109 :                          : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
     386      5516069 :                          : (mask & 0xff00) ? 2 : 1);
     387     72303469 :               bitmap_clear_range (livenow, 4 * rn + start, end - start);
     388              :             }
     389              :           /* Some ports generate (clobber (const_int)).  */
     390     13935690 :           else if (CONST_INT_P (x))
     391            0 :             continue;
     392              :           else
     393     13935690 :             gcc_assert (CALL_P (insn)
     394              :                         || MEM_P (x)
     395              :                         || x == pc_rtx
     396              :                         || GET_CODE (x) == SCRATCH);
     397              : 
     398     86239159 :           iter.skip_subrtxes ();
     399     86239159 :         }
     400    104243365 :       else if (GET_CODE (x) == COND_EXEC)
     401              :         {
     402              :           /* This isn't ideal, but may not be so bad in practice.  */
     403            0 :           skipped_dest = true;
     404            0 :           iter.skip_subrtxes ();
     405              :         }
     406              :     }
     407    137109191 :   return skipped_dest;
     408    137109191 : }
     409              : 
     410              : /* INSN is a right shift and the second insn in a shift pair that is a
     411              :    sign or zero extension (SET is the single set associated with INSN).  
     412              : 
     413              :    Replace the source of SET with NEW_SRC which is a source register
     414              :    from NEW_SRC_INSN (the left shift in the pair).  This is effectively
     415              :    the same as the replacement we do for ZERO/SIGN extends on targets
     416              :    that support those insns.  */
     417              : static void
     418            0 : ext_dce_try_optimize_rshift (rtx_insn *insn, rtx set, rtx new_src, rtx_insn *new_src_insn)
     419              : {
     420              :   /* If the modes are not the same or one is a hard register, then
     421              :      conservatively do nothing.  */
     422            0 :   if (GET_MODE (SET_SRC (set)) != GET_MODE (new_src)
     423            0 :       || !REG_P (XEXP (SET_SRC (set), 0))
     424            0 :       || !REG_P (new_src)
     425            0 :       || REGNO (XEXP (SET_SRC (set), 0)) < FIRST_PSEUDO_REGISTER
     426            0 :       || REGNO (new_src) < FIRST_PSEUDO_REGISTER)
     427              :     return;
     428              : 
     429            0 :   if (dump_file)
     430              :     {
     431            0 :       fprintf (dump_file, "Processing insn:\n");
     432            0 :       dump_insn_slim (dump_file, insn);
     433            0 :       fprintf (dump_file, "Trying to simplify pattern:\n");
     434            0 :       print_rtl_single (dump_file, SET_SRC (set));
     435              :     }
     436              : 
     437              :   /* We decided to turn do the optimization but allow it to be rejected for
     438              :      bisection purposes.  */
     439            0 :   if (!dbg_cnt (::ext_dce))
     440              :     {
     441            0 :       if (dump_file)
     442            0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     443            0 :       return;
     444              :     }
     445              : 
     446              :   /* We're going to generate a fresh insn for the move, so put it
     447              :      into a sequence that we can emit after the current insn.   */
     448            0 :   start_sequence ();
     449            0 :   emit_move_insn (SET_DEST (set), new_src);
     450            0 :   rtx_insn *seq = end_sequence (); 
     451            0 :   emit_insn_after (seq, insn);
     452              : 
     453              :   /* Mark the destination as changed.  */
     454            0 :   rtx x = SET_DEST (set);
     455            0 :   while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     456            0 :     x = XEXP (x, 0);
     457            0 :   gcc_assert (REG_P (x));
     458            0 :   bitmap_set_bit (changed_pseudos, REGNO (x));
     459              : 
     460            0 :   if (dump_file)
     461              :     {
     462            0 :       fprintf (dump_file, "Successfully transformed to:\n");
     463            0 :       print_rtl_single (dump_file, PATTERN (seq));
     464            0 :       fprintf (dump_file, "\n");
     465              :     }
     466              : 
     467            0 :   delete_insn (insn);
     468              : 
     469              :   /* If NEW_SRC died in its prior location, then we need to remove the
     470              :      death note and move it to the new location.  */
     471            0 :   rtx note = find_regno_note (new_src_insn, REG_DEAD, REGNO (new_src));
     472            0 :   if (note)
     473              :     {
     474            0 :       remove_note (new_src_insn, note);
     475            0 :       add_reg_note (insn, REG_DEAD, new_src);
     476              :     }
     477              : }
     478              : 
     479              : 
     480              : /* INSN has a sign/zero extended source inside SET that we will
     481              :    try to turn into a SUBREG.  If NEW_SRC is non-null, use that
     482              :    for the new source of INSN's set.  That scenario only happens
     483              :    when we're optimizing a shift pair.  */
     484              : static void
     485         4847 : ext_dce_try_optimize_extension (rtx_insn *insn, rtx set)
     486              : {
     487         4847 :   rtx src = SET_SRC (set);
     488         4847 :   rtx inner = XEXP (src, 0);
     489              : 
     490              :   /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
     491              :      not useful for this optimization.  */
     492         4847 :   if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
     493              :     return;
     494              : 
     495         2273 :   rtx new_pattern;
     496         2273 :   if (dump_file)
     497              :     {
     498            0 :       fprintf (dump_file, "Processing insn:\n");
     499            0 :       dump_insn_slim (dump_file, insn);
     500            0 :       fprintf (dump_file, "Trying to simplify pattern:\n");
     501            0 :       print_rtl_single (dump_file, SET_SRC (set));
     502              :     }
     503              : 
     504              :   /* We decided to turn do the optimization but allow it to be rejected for
     505              :      bisection purposes.  */
     506         2273 :   if (!dbg_cnt (::ext_dce))
     507              :     {
     508            0 :       if (dump_file)
     509            0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     510            0 :       return;
     511              :     }
     512              : 
     513         4546 :   new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
     514         2273 :                                      GET_MODE (inner), 0);
     515              :   /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
     516              :      We must not pass that as a replacement pattern to validate_change.  */
     517         2273 :   if (new_pattern)
     518              :     {
     519         2273 :       int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
     520              : 
     521         2273 :       rtx x = SET_DEST (set);
     522         2273 :       while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     523            0 :         x = XEXP (x, 0);
     524              : 
     525         2273 :       gcc_assert (REG_P (x));
     526         2273 :       if (ok)
     527         2273 :         bitmap_set_bit (changed_pseudos, REGNO (x));
     528              : 
     529         2273 :       if (dump_file)
     530              :         {
     531            0 :           if (ok)
     532            0 :             fprintf (dump_file, "Successfully transformed to:\n");
     533              :           else
     534            0 :             fprintf (dump_file, "Failed transformation to:\n");
     535              : 
     536            0 :           print_rtl_single (dump_file, new_pattern);
     537            0 :           fprintf (dump_file, "\n");
     538              :         }
     539              : 
     540              :       /* INSN may have a REG_EQUAL note indicating that the value was
     541              :          sign or zero extended.  That note is no longer valid since we've
     542              :          just removed the extension.  Just wipe the notes.  */
     543         2273 :       if (ok)
     544         2273 :         remove_reg_equal_equiv_notes (insn, false);
     545              :     }
     546              :   else
     547              :     {
     548            0 :       if (dump_file)
     549            0 :         fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
     550              :     }
     551              : }
     552              : 
     553              : /* Try to promote a narrow-mode operation wrapped in a sign/zero extension
     554              :    to the wider mode when the extended bits are dead.  For example,
     555              :    (sign_extend:DI (plus:SI (x) (y))) -> (plus:DI (x') (y'))
     556              :    where x' and y' are the operands promoted to DI mode.
     557              : 
     558              :    This enables the combine pass to match wider-mode target patterns
     559              :    (e.g., sh2add on RISC-V) that cannot match the narrow-mode operation.  */
     560              : 
     561              : static void
     562            0 : ext_dce_try_promote_operation (rtx_insn *insn, rtx set)
     563              : {
     564            0 :   rtx src = SET_SRC (set);
     565              : 
     566              :   /* If the extension was already optimized away, nothing to do.  */
     567            0 :   if (GET_CODE (src) != SIGN_EXTEND && GET_CODE (src) != ZERO_EXTEND)
     568            0 :     return;
     569              : 
     570            0 :   machine_mode outer_mode = GET_MODE (src);
     571            0 :   rtx inner = XEXP (src, 0);
     572              : 
     573              :   /* Only handle binary and unary arithmetic/logic operations.  */
     574            0 :   if (!BINARY_P (inner) && !UNARY_P (inner))
     575              :     return;
     576              : 
     577            0 :   rtx_code inner_code = GET_CODE (inner);
     578              : 
     579              :   /* Restrict to operations whose result in the low bits is identical
     580              :      regardless of input width (i.e., no high-bit dependencies).  */
     581            0 :   switch (inner_code)
     582              :     {
     583            0 :     case PLUS:
     584            0 :     case MINUS:
     585            0 :     case MULT:
     586            0 :     case NEG:
     587            0 :     case AND:
     588            0 :     case IOR:
     589            0 :     case XOR:
     590            0 :     case NOT:
     591            0 :     case ASHIFT:
     592            0 :       break;
     593              :     default:
     594              :       return;
     595              :     }
     596              : 
     597              :   /* Promote each operand to the outer mode.  */
     598            0 :   int nops = BINARY_P (inner) ? 2 : 1;
     599            0 :   rtx new_ops[2];
     600              : 
     601            0 :   for (int i = 0; i < nops; i++)
     602              :     {
     603            0 :       rtx op = XEXP (inner, i);
     604              : 
     605            0 :       if (CONST_INT_P (op))
     606            0 :         new_ops[i] = op;
     607            0 :       else if (REG_P (op))
     608              :         {
     609            0 :           new_ops[i] = simplify_gen_subreg (outer_mode, op,
     610            0 :                                             GET_MODE (op), 0);
     611            0 :           if (!new_ops[i])
     612              :             return;
     613              :         }
     614            0 :       else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
     615              :         {
     616              :           /* The inner register may already be in the target mode
     617              :              (e.g., subreg:SI (reg:DI ...) 0).  Extract it directly
     618              :              rather than creating a paradoxical subreg of a subreg,
     619              :              which simplify_gen_subreg rejects.  */
     620            0 :           rtx inner_reg = SUBREG_REG (op);
     621            0 :           if (GET_MODE (inner_reg) == outer_mode)
     622            0 :             new_ops[i] = inner_reg;
     623              :           else
     624              :             {
     625            0 :               new_ops[i] = simplify_gen_subreg (outer_mode, inner_reg,
     626              :                                                 GET_MODE (inner_reg), 0);
     627            0 :               if (!new_ops[i])
     628              :                 return;
     629              :             }
     630              :         }
     631              :       else
     632              :         return;
     633              :     }
     634              : 
     635              :   /* Build the promoted operation.  */
     636            0 :   rtx new_src;
     637            0 :   if (BINARY_P (inner))
     638            0 :     new_src = gen_rtx_fmt_ee (inner_code, outer_mode,
     639              :                               new_ops[0], new_ops[1]);
     640              :   else
     641            0 :     new_src = gen_rtx_fmt_e (inner_code, outer_mode, new_ops[0]);
     642              : 
     643            0 :   if (dump_file)
     644              :     {
     645            0 :       fprintf (dump_file, "Processing insn:\n");
     646            0 :       dump_insn_slim (dump_file, insn);
     647            0 :       fprintf (dump_file, "Trying to promote to wider mode:\n");
     648            0 :       print_rtl_single (dump_file, new_src);
     649              :     }
     650              : 
     651              :   /* We decided to try the promotion but allow it to be rejected for
     652              :      bisection purposes.  */
     653            0 :   if (!dbg_cnt (::ext_dce))
     654              :     {
     655            0 :       if (dump_file)
     656            0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     657            0 :       return;
     658              :     }
     659              : 
     660            0 :   int ok = validate_change (insn, &SET_SRC (set), new_src, false);
     661              : 
     662            0 :   rtx x = SET_DEST (set);
     663            0 :   while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     664            0 :     x = XEXP (x, 0);
     665              : 
     666            0 :   gcc_assert (REG_P (x));
     667            0 :   if (ok)
     668            0 :     bitmap_set_bit (changed_pseudos, REGNO (x));
     669              : 
     670            0 :   if (dump_file)
     671              :     {
     672            0 :       if (ok)
     673            0 :         fprintf (dump_file, "Successfully promoted to:\n");
     674              :       else
     675            0 :         fprintf (dump_file, "Failed promotion to:\n");
     676            0 :       print_rtl_single (dump_file, new_src);
     677            0 :       fprintf (dump_file, "\n");
     678              :     }
     679              : 
     680            0 :   if (ok)
     681            0 :     remove_reg_equal_equiv_notes (insn, false);
     682              : }
     683              : 
     684              : /* Record INSN as a promotion candidate if it passes the same validity
     685              :    checks as ext_dce_try_promote_operation.  We defer actual promotion
     686              :    until we can determine whether the candidate is part of a chain.  */
     687              : 
     688              : static void
     689         2574 : ext_dce_record_promotion_candidate (rtx_insn *insn, rtx set)
     690              : {
     691         2574 :   rtx src = SET_SRC (set);
     692              : 
     693         2574 :   if (GET_CODE (src) != SIGN_EXTEND && GET_CODE (src) != ZERO_EXTEND)
     694              :     return;
     695              : 
     696         2574 :   machine_mode outer_mode = GET_MODE (src);
     697         2574 :   rtx inner = XEXP (src, 0);
     698              : 
     699         2574 :   if (!BINARY_P (inner) && !UNARY_P (inner))
     700              :     return;
     701              : 
     702         1670 :   rtx_code inner_code = GET_CODE (inner);
     703              : 
     704         1670 :   switch (inner_code)
     705              :     {
     706            0 :     case PLUS:
     707            0 :     case MINUS:
     708            0 :     case MULT:
     709            0 :     case NEG:
     710            0 :     case AND:
     711            0 :     case IOR:
     712            0 :     case XOR:
     713            0 :     case NOT:
     714            0 :     case ASHIFT:
     715            0 :       break;
     716              :     default:
     717              :       return;
     718              :     }
     719              : 
     720              :   /* Dry-run: check that all operands can be promoted.  */
     721            0 :   int nops = BINARY_P (inner) ? 2 : 1;
     722            0 :   for (int i = 0; i < nops; i++)
     723              :     {
     724            0 :       rtx op = XEXP (inner, i);
     725            0 :       if (CONST_INT_P (op))
     726            0 :         continue;
     727            0 :       else if (REG_P (op))
     728              :         {
     729            0 :           if (!simplify_gen_subreg (outer_mode, op, GET_MODE (op), 0))
     730              :             return;
     731              :         }
     732            0 :       else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
     733              :         {
     734            0 :           rtx inner_reg = SUBREG_REG (op);
     735            0 :           if (GET_MODE (inner_reg) != outer_mode
     736            0 :               && !simplify_gen_subreg (outer_mode, inner_reg,
     737              :                                        GET_MODE (inner_reg), 0))
     738              :             return;
     739              :         }
     740              :       else
     741              :         return;
     742              :     }
     743              : 
     744              :   /* Find the destination register.  */
     745            0 :   rtx dest = SET_DEST (set);
     746            0 :   while (SUBREG_P (dest) || GET_CODE (dest) == ZERO_EXTRACT)
     747            0 :     dest = XEXP (dest, 0);
     748            0 :   if (!REG_P (dest))
     749              :     return;
     750            0 :   unsigned int dest_regno = REGNO (dest);
     751              : 
     752              :   /* Record the candidate.  */
     753            0 :   promotion_candidates.safe_push ({insn, set});
     754            0 :   bitmap_set_bit (promotable_dests, dest_regno);
     755              : 
     756              :   /* Mark register operands as consumed by a candidate.  */
     757            0 :   for (int i = 0; i < nops; i++)
     758              :     {
     759            0 :       rtx op = XEXP (inner, i);
     760            0 :       if (REG_P (op))
     761            0 :         bitmap_set_bit (consumed_by_candidate, REGNO (op));
     762            0 :       else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
     763            0 :         bitmap_set_bit (consumed_by_candidate, REGNO (SUBREG_REG (op)));
     764              :     }
     765              : }
     766              : 
     767              : /* Promote candidates that form chains: a candidate whose result feeds
     768              :    into another candidate's operand, or whose operand comes from another
     769              :    candidate's result.  Skip standalone (isolated) promotions.  */
     770              : 
     771              : static void
     772      9729754 : ext_dce_promote_chained_candidates (void)
     773              : {
     774              :   /* Propagate chain info through copies recorded during the reverse scan.
     775              :      Since copies are recorded in reverse order, iterate forward to propagate
     776              :      promotable_dests (which was set late in the scan) through copies that
     777              :      were seen earlier.  */
     778      9729754 :   unsigned cix;
     779      9729754 :   copy_info *cp;
     780      9732027 :   FOR_EACH_VEC_ELT (promotion_copies, cix, cp)
     781              :     {
     782         2273 :       if (bitmap_bit_p (promotable_dests, cp->src_regno))
     783            0 :         bitmap_set_bit (promotable_dests, cp->dest_regno);
     784         2273 :       if (bitmap_bit_p (consumed_by_candidate, cp->dest_regno))
     785            0 :         bitmap_set_bit (consumed_by_candidate, cp->src_regno);
     786              :     }
     787              : 
     788              :   unsigned ix;
     789              :   promotion_candidate_info *cand;
     790              : 
     791      9729754 :   FOR_EACH_VEC_ELT (promotion_candidates, ix, cand)
     792              :     {
     793              :       /* Find destination register.  */
     794            0 :       rtx dest = SET_DEST (cand->set);
     795            0 :       while (SUBREG_P (dest) || GET_CODE (dest) == ZERO_EXTRACT)
     796            0 :         dest = XEXP (dest, 0);
     797            0 :       unsigned int dest_regno = REGNO (dest);
     798              : 
     799              :       /* Check if this candidate's result feeds into another candidate.  */
     800            0 :       bool is_chained = bitmap_bit_p (consumed_by_candidate, dest_regno);
     801              : 
     802              :       /* Check if any operand comes from another candidate's result.  */
     803            0 :       if (!is_chained)
     804              :         {
     805            0 :           rtx inner = XEXP (SET_SRC (cand->set), 0);
     806            0 :           int nops = BINARY_P (inner) ? 2 : 1;
     807            0 :           for (int i = 0; i < nops && !is_chained; i++)
     808              :             {
     809            0 :               rtx op = XEXP (inner, i);
     810            0 :               if (REG_P (op))
     811            0 :                 is_chained = bitmap_bit_p (promotable_dests, REGNO (op));
     812            0 :               else if (SUBREG_P (op) && REG_P (SUBREG_REG (op)))
     813            0 :                 is_chained = bitmap_bit_p (promotable_dests,
     814            0 :                                            REGNO (SUBREG_REG (op)));
     815              :             }
     816              :         }
     817              : 
     818            0 :       if (is_chained)
     819            0 :         ext_dce_try_promote_operation (cand->insn, cand->set);
     820            0 :       else if (dump_file)
     821              :         {
     822            0 :           fprintf (dump_file, "Skipping standalone promotion for insn:\n");
     823            0 :           dump_insn_slim (dump_file, cand->insn);
     824            0 :           fprintf (dump_file, "\n");
     825              :         }
     826              :     }
     827              : 
     828      9729754 :   promotion_candidates.truncate (0);
     829      9729754 :   promotion_copies.truncate (0);
     830      9729754 :   bitmap_clear (promotable_dests);
     831      9729754 :   bitmap_clear (consumed_by_candidate);
     832      9729754 : }
     833              : 
     834              : /* Some operators imply that their second operand is fully live,
     835              :    regardless of how many bits in the output are live.  An example
     836              :    would be the shift count on a target without SHIFT_COUNT_TRUNCATED
     837              :    defined.
     838              : 
     839              :    Return TRUE if CODE is such an operator.  FALSE otherwise.  */
     840              : 
     841              : static bool
     842     76209628 : binop_implies_op2_fully_live (rtx_code code)
     843              : {
     844            0 :   switch (code)
     845              :     {
     846              :     case ASHIFT:
     847              :     case LSHIFTRT:
     848              :     case ASHIFTRT:
     849              :     case ROTATE:
     850              :     case ROTATERT:
     851              :     case SS_ASHIFT:
     852              :     case US_ASHIFT:
     853              :       return !SHIFT_COUNT_TRUNCATED;
     854              : 
     855            0 :     default:
     856            0 :       return false;
     857              :     }
     858              : }
     859              : 
     860              : /* X, with code CODE, is an operation for which safe_for_live_propagation
     861              :    holds true, and bits set in MASK are live in the result.  Compute a
     862              :    mask of (potentially) live bits in the non-constant inputs.  In case of
     863              :    binop_implies_op2_fully_live (e.g. shifts), the computed mask may
     864              :    exclusively pertain to the first operand.
     865              : 
     866              :    This looks wrong as we may have some important operations embedded as
     867              :    operands of another operation.  For example, we might have an extension
     868              :    wrapping a shift.  It really feels like this needs to be recursing down
     869              :    into operands much more often.  */
     870              : 
     871              : unsigned HOST_WIDE_INT
     872     71102720 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
     873              : {
     874     72808859 :   if (mask == 0)
     875              :     return 0;
     876              : 
     877     72808833 :   enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
     878     72808833 :   unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
     879              : 
     880              :   /* While we don't try to optimize operations on types larger
     881              :      than 64 bits, we do want to make sure not to invoke undefined
     882              :      behavior when presented with such operations during use
     883              :      processing.  The safe thing to do is to just return mmask
     884              :      for that scenario indicating every possible chunk is life.  */
     885     72808833 :   scalar_int_mode smode;
     886     72808833 :   if (!is_a <scalar_int_mode> (mode, &smode)
     887     60726460 :       || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
     888              :     return mmask;
     889              : 
     890     58649492 :   switch (code)
     891              :     {
     892     16437728 :     case PLUS:
     893     16437728 :     case MINUS:
     894     16437728 :     case MULT:
     895     16437728 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     896              : 
     897              :     /* We propagate for the shifted operand, but not the shift
     898              :        count.  The count is handled specially.  */
     899      1401452 :     case ASHIFT:
     900      1401452 :       if (CONST_INT_P (XEXP (x, 1))
     901      2731606 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     902      1330118 :         return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
     903        71334 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     904              : 
     905              :     /* We propagate for the shifted operand, but not the shift
     906              :        count.  The count is handled specially.  */
     907       642001 :     case LSHIFTRT:
     908       642001 :       if (CONST_INT_P (XEXP (x, 1))
     909      1251901 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     910       609872 :         return mmask & (mask << INTVAL (XEXP (x, 1)));
     911              :       return mmask;
     912              : 
     913              :     /* We propagate for the shifted operand, but not the shift
     914              :        count.  The count is handled specially.  */
     915       293138 :     case ASHIFTRT:
     916       293138 :       if (CONST_INT_P (XEXP (x, 1))
     917       572304 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     918              :         {
     919       279122 :           HOST_WIDE_INT sign = 0;
     920       279122 :           if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
     921       279122 :               > GET_MODE_BITSIZE (smode))
     922       558244 :             sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
     923       279122 :           return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
     924              :         }
     925              :       return mmask;
     926              : 
     927        37519 :     case SMUL_HIGHPART:
     928        37519 :     case UMUL_HIGHPART:
     929        37519 :       if (XEXP (x, 1) == const0_rtx)
     930              :         return 0;
     931        37519 :       if (XEXP (x, 1) == const1_rtx)
     932              :         return mmask;
     933        37519 :       if (CONST_INT_P (XEXP (x, 1)))
     934              :         {
     935            0 :           if (pow2p_hwi (INTVAL (XEXP (x, 1))))
     936            0 :             return mmask & (mask << (GET_MODE_BITSIZE (smode)
     937            0 :                                      - exact_log2 (INTVAL (XEXP (x, 1)))));
     938              : 
     939            0 :           int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
     940            0 :                       - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
     941            0 :           if (bits < GET_MODE_BITSIZE (smode))
     942            0 :             return (HOST_WIDE_INT_1U << bits) - 1;
     943              :         }
     944              :       return mmask;
     945              : 
     946       558687 :     case SIGN_EXTEND:
     947       558687 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     948       558687 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     949              :         return -1;
     950              : 
     951              :       /* We want the mode of the inner object.  We need to ensure its
     952              :          sign bit is on in MASK.  */
     953       558687 :       mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
     954       558687 :       if (mask & ~GET_MODE_MASK (mode))
     955       558350 :         mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
     956       558350 :                                      - 1);
     957              : 
     958              :       /* Recurse into the operand.  */
     959       558687 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
     960              : 
     961      1147452 :     case ZERO_EXTEND:
     962      1147452 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     963      1147452 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     964              :         return -1;
     965              : 
     966              :       /* Recurse into the operand.  */
     967      1147452 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
     968              : 
     969              :     /* We propagate for the shifted operand, but not the shift
     970              :        count.  The count is handled specially.  */
     971            0 :     case SS_ASHIFT:
     972            0 :     case US_ASHIFT:
     973            0 :       if (CONST_INT_P (XEXP (x, 1))
     974            0 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     975              :         {
     976            0 :           return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
     977            0 :                              >> (INTVAL (XEXP (x, 1))
     978            0 :                                  + (XEXP (x, 1) != const0_rtx
     979            0 :                                     && code == SS_ASHIFT))))
     980            0 :                   | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
     981              :         }
     982              :       return mmask;
     983              : 
     984              :     default:
     985              :       return mask;
     986              :     }
     987              : }
     988              : 
     989              : /* Process uses in INSN contained in OBJ.  Set appropriate bits in LIVENOW
     990              :    for any chunks of pseudos that become live, potentially filtering using
     991              :    bits from LIVE_TMP.
     992              : 
     993              :    If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
     994              :    the extended bits are never read and mark pseudos which had extensions
     995              :    eliminated in CHANGED_PSEUDOS.  */
     996              : 
     997              : static void
     998    137109191 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
     999              :                       bitmap live_tmp, bool skipped_dest)
    1000              : {
    1001    137109191 :   subrtx_var_iterator::array_type array_var;
    1002    753455382 :   FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
    1003              :     {
    1004              :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
    1005    616346191 :       rtx x = *iter;
    1006    616346191 :       if (x == NULL_RTX)
    1007      9452414 :         continue;
    1008              : 
    1009              :       /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
    1010              :          handle SETs explicitly, possibly propagating live information
    1011              :          into the uses.
    1012              : 
    1013              :          We may continue the loop at various points which will cause
    1014              :          iteration into the next level of RTL.  Breaking from the loop
    1015              :          is never safe as it can lead us to fail to process some of the
    1016              :          RTL and thus not make objects live when necessary.  */
    1017    606893777 :       enum rtx_code xcode = GET_CODE (x);
    1018    606893777 :       if (xcode == SET)
    1019              :         {
    1020    121252155 :           const_rtx dst = SET_DEST (x);
    1021    121252155 :           rtx src = SET_SRC (x);
    1022    121252155 :           const_rtx y;
    1023    121252155 :           unsigned HOST_WIDE_INT bit = 0;
    1024              : 
    1025              :           /* The code of the RHS of a SET.  */
    1026    121252155 :           enum rtx_code code = GET_CODE (src);
    1027              : 
    1028              :           /* ?!? How much of this should mirror SET handling, potentially
    1029              :              being shared?   */
    1030    121252155 :           if (SUBREG_P (dst) && subreg_lsb (dst).is_constant (&bit))
    1031              :             {
    1032       587847 :               if (bit >= HOST_BITS_PER_WIDE_INT)
    1033              :                 bit = HOST_BITS_PER_WIDE_INT - 1;
    1034       587847 :               dst = SUBREG_REG (dst);
    1035              :             }
    1036    120664308 :           else if (GET_CODE (dst) == STRICT_LOW_PART)
    1037        13316 :             dst = XEXP (dst, 0);
    1038              : 
    1039              :           /* Main processing of the uses.  Two major goals here.
    1040              : 
    1041              :              First, we want to try and propagate liveness (or the lack
    1042              :              thereof) from the destination register to the source
    1043              :              register(s).
    1044              : 
    1045              :              Second, if the source is an extension, try to optimize
    1046              :              it into a SUBREG.  The SUBREG form indicates we don't
    1047              :              care about the upper bits and will usually be copy
    1048              :              propagated away.
    1049              : 
    1050              :              If we fail to handle something in here, the expectation
    1051              :              is the iterator will dive into the sub-components and
    1052              :              mark all the chunks in any found REGs as live.  */
    1053    121252155 :           if (REG_P (dst) && safe_for_live_propagation (code))
    1054              :             {
    1055              :               /* Create a mask representing the bits of this output
    1056              :                  operand that are live after this insn.  We can use
    1057              :                  this information to refine the live in state of
    1058              :                  inputs to this insn in many cases.
    1059              : 
    1060              :                  We have to do this on a per SET basis, we might have
    1061              :                  an INSN with multiple SETS, some of which can narrow
    1062              :                  the source operand liveness, some of which may not.  */
    1063     71102720 :               unsigned HOST_WIDE_INT dst_mask = 0;
    1064     71102720 :               HOST_WIDE_INT rn = REGNO (dst);
    1065     71102720 :               unsigned HOST_WIDE_INT mask_array[]
    1066              :                 = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
    1067              :                     -HOST_WIDE_INT_UC (0x100000000) };
    1068    355513600 :               for (int i = 0; i < 4; i++)
    1069    284410880 :                 if (bitmap_bit_p (live_tmp, 4 * rn + i))
    1070    227946870 :                   dst_mask |= mask_array[i];
    1071     71102720 :               dst_mask >>= bit;
    1072              : 
    1073              :               /* If we ignored a destination during set processing, then
    1074              :                  consider all the bits live.  */
    1075     71102720 :               if (skipped_dest)
    1076     25125095 :                 dst_mask = -1;
    1077              : 
    1078     71102720 :               dst_mask = carry_backpropagate (dst_mask, code, src);
    1079              : 
    1080              :               /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
    1081              :                  of the source specially to improve optimization.  */
    1082     71102720 :               if (code == SIGN_EXTEND || code == ZERO_EXTEND)
    1083              :                 {
    1084      1717796 :                   rtx inner = XEXP (src, 0);
    1085      1717796 :                   unsigned HOST_WIDE_INT src_mask
    1086      1717796 :                     = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
    1087              : 
    1088              :                   /* DST_MASK could be zero if we had something in the SET
    1089              :                      that we couldn't handle.  */
    1090      1717796 :                   if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
    1091              :                     {
    1092         4847 :                       ext_dce_try_optimize_extension (insn, x);
    1093              : 
    1094              :                       /* If the extension was optimized to a copy, propagate
    1095              :                          chain info through it: if the dest is consumed by a
    1096              :                          promotion candidate (seen later in reverse scan),
    1097              :                          the source register is transitively consumed too.  */
    1098         4847 :                       rtx opt_src = SET_SRC (x);
    1099         4847 :                       if (GET_CODE (opt_src) != SIGN_EXTEND
    1100         4847 :                           && GET_CODE (opt_src) != ZERO_EXTEND)
    1101              :                         {
    1102         2273 :                           rtx copy_dest = SET_DEST (x);
    1103         2273 :                           while (SUBREG_P (copy_dest)
    1104         2273 :                                  || GET_CODE (copy_dest) == ZERO_EXTRACT)
    1105            0 :                             copy_dest = XEXP (copy_dest, 0);
    1106              : 
    1107         2273 :                           rtx copy_src = opt_src;
    1108         2273 :                           if (SUBREG_P (copy_src))
    1109         2016 :                             copy_src = SUBREG_REG (copy_src);
    1110              : 
    1111         2273 :                           if (REG_P (copy_dest) && REG_P (copy_src))
    1112              :                             {
    1113         2273 :                               if (bitmap_bit_p (consumed_by_candidate,
    1114         2273 :                                                 REGNO (copy_dest)))
    1115            0 :                                 bitmap_set_bit (consumed_by_candidate,
    1116            0 :                                                 REGNO (copy_src));
    1117         2273 :                               if (bitmap_bit_p (promotable_dests,
    1118         2273 :                                                 REGNO (copy_src)))
    1119            0 :                                 bitmap_set_bit (promotable_dests,
    1120            0 :                                                 REGNO (copy_dest));
    1121         2273 :                               promotion_copies.safe_push (
    1122         2273 :                                 {REGNO (copy_dest), REGNO (copy_src)});
    1123              :                             }
    1124              :                         }
    1125              :                       else
    1126         2574 :                         ext_dce_record_promotion_candidate (insn, x);
    1127              :                     }
    1128              : 
    1129              :                   /* Stripping the extension here just seems wrong on multiple
    1130              :                      levels.  It's source side handling, so it seems like it
    1131              :                      belongs in the loop below.  Stripping here also makes it
    1132              :                      harder than necessary to properly handle live bit groups
    1133              :                      for (ANY_EXTEND (SUBREG)) where the SUBREG has
    1134              :                      SUBREG_PROMOTED state.  */
    1135      1717796 :                   dst_mask &= src_mask;
    1136      1717796 :                   src = XEXP (src, 0);
    1137      1717796 :                   code = GET_CODE (src);
    1138              :                 }
    1139              : 
    1140              :               /* Special case for (sub)targets that do not have extension
    1141              :                  insns (and thus use shifts).  We want to detect when we have
    1142              :                  a shift pair and treat the pair as-if was an extension.
    1143              : 
    1144              :                  Key on the right shift and use (for now) simplistic tests
    1145              :                  to find the corresponding left shift.  */
    1146     71102720 :               scalar_mode outer_mode;
    1147     71102720 :               if ((code == LSHIFTRT || code == ASHIFTRT)
    1148      1028285 :                   && CONST_INT_P (XEXP (src, 1))
    1149      1135440 :                   && (INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 8
    1150      1131045 :                       || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 16
    1151       968158 :                       || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 32)
    1152     71168573 :                   && is_a <scalar_mode> (GET_MODE (src), &outer_mode)
    1153     71168573 :                   && GET_MODE_BITSIZE (outer_mode) <= HOST_BITS_PER_WIDE_INT)
    1154              :                 {
    1155              :                   /* So we have a right shift that could correspond to
    1156              :                      the second in a pair impementing QI, HI or SI -> DI
    1157              :                      extension.  See if we can find the left shift.  For
    1158              :                      now, just look one real instruction back.  */
    1159        65709 :                   rtx_insn *prev_insn = prev_nonnote_nondebug_insn_bb (insn);
    1160              : 
    1161              :                   /* The previous insn must be a left shift by the same
    1162              :                      amount.  */
    1163        65709 :                   rtx prev_set;
    1164        65709 :                   if (prev_insn
    1165        62884 :                       && (prev_set = single_set (prev_insn))
    1166              :                       /* The destination of the left shift must be the
    1167              :                          source of the right shift.  */
    1168        62803 :                       && SET_DEST (prev_set) == XEXP (src, 0)
    1169        30926 :                       && GET_CODE (SET_SRC (prev_set)) == ASHIFT
    1170          811 :                       && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
    1171              :                       /* The counts must match.  */
    1172        65709 :                       && (INTVAL (XEXP (src, 1))
    1173          795 :                           == INTVAL (XEXP (SET_SRC (prev_set), 1))))
    1174              :                     {
    1175           15 :                       unsigned HOST_WIDE_INT src_mask = GET_MODE_BITSIZE (GET_MODE (src)).to_constant ();
    1176           15 :                       src_mask -= INTVAL (XEXP (src, 1));
    1177           15 :                       src_mask = (HOST_WIDE_INT_1U << src_mask) - 1;
    1178              : 
    1179              :                       /* DST_MASK has been adjusted for INSN.  We need its original value.  */
    1180           15 :                       unsigned HOST_WIDE_INT tmp_mask = 0;
    1181           75 :                       for (int i = 0; i < 4; i++)
    1182           60 :                         if (bitmap_bit_p (live_tmp, 4 * rn + i))
    1183           15 :                           tmp_mask |= mask_array[i];
    1184           15 :                       tmp_mask >>= bit;
    1185              : 
    1186           15 :                       if (modify && !skipped_dest && (tmp_mask & ~src_mask) == 0)
    1187              :                         {
    1188            0 :                           ext_dce_try_optimize_rshift (insn, x, XEXP (SET_SRC (prev_set), 0), prev_insn);
    1189              : 
    1190              :                           /* These may not strictly be necessary, but we might as well try and be
    1191              :                              as accurate as possible.  The RHS is now a simple REG.  */
    1192            0 :                           dst_mask = src_mask;
    1193            0 :                           src = XEXP (SET_SRC (prev_set), 0);
    1194            0 :                           code = GET_CODE (src);
    1195              :                         }
    1196              :                     }
    1197              :                 }
    1198              : 
    1199              :               /* Optimization is done at this point.  We just want to make
    1200              :                  sure everything that should get marked as live is marked
    1201              :                  from here onward.  */
    1202              : 
    1203              :               /* We will handle the other operand of a binary operator
    1204              :                  at the bottom of the loop by resetting Y.  */
    1205     71102720 :               if (BINARY_P (src))
    1206     22385088 :                 y = XEXP (src, 0);
    1207              :               else
    1208              :                 y = src;
    1209              : 
    1210              :               /* We're inside a SET and want to process the source operands
    1211              :                  making things live.  Breaking from this loop will cause
    1212              :                  the iterator to work on sub-rtxs, so it is safe to break
    1213              :                  if we see something we don't know how to handle.
    1214              : 
    1215              :                  This code is just hokey as it really just handles trivial
    1216              :                  unary and binary cases.  Otherwise the loop exits and we
    1217              :                  continue iterating on sub-rtxs, but outside the set context.  */
    1218              :               unsigned HOST_WIDE_INT save_mask = dst_mask;
    1219    114900646 :               for (;;)
    1220              :                 {
    1221              :                   /* In general we want to restore DST_MASK before each loop
    1222              :                      iteration.  The exception is when the opcode implies that
    1223              :                      the other operand is fully live.  That's handled by
    1224              :                      changing SAVE_MASK below.  */
    1225     93001683 :                   dst_mask = save_mask;
    1226              :                   /* Strip an outer paradoxical subreg.  The bits outside
    1227              :                      the inner mode are don't cares.  So we can just strip
    1228              :                      and process the inner object.  */
    1229     93001683 :                   if (paradoxical_subreg_p (y))
    1230              :                     y = XEXP (y, 0);
    1231     92899782 :                   else if (SUBREG_P (y) && subreg_lsb (y).is_constant (&bit))
    1232              :                     {
    1233              :                       /* If !TRULY_NOOP_TRUNCATION_MODES_P, the mode
    1234              :                          change performed by Y would normally need to be a
    1235              :                          TRUNCATE rather than a SUBREG.  It is probably the
    1236              :                          guarantee provided by SUBREG_PROMOTED_VAR_P that
    1237              :                          allows the SUBREG in Y as an exception.  We must
    1238              :                          therefore preserve that guarantee and treat the
    1239              :                          upper bits of the inner register as live
    1240              :                          regardless of the outer code.  See PR 120050.  */
    1241      1918129 :                       if (!REG_P (SUBREG_REG (y))
    1242      1918129 :                           || (SUBREG_PROMOTED_VAR_P (y)
    1243        13408 :                               && (!TRULY_NOOP_TRUNCATION_MODES_P (
    1244              :                                     GET_MODE (y),
    1245              :                                     GET_MODE (SUBREG_REG (y))))))
    1246              :                         break;
    1247              : 
    1248              :                       /* If this is a wide object (more bits than we can fit
    1249              :                          in a HOST_WIDE_INT), then just break from the SET
    1250              :                          context.   That will cause the iterator to walk down
    1251              :                          into the subrtx and if we land on a REG we'll mark
    1252              :                          the whole think live.  */
    1253      1917155 :                       if (bit >= HOST_BITS_PER_WIDE_INT)
    1254              :                         break;
    1255              : 
    1256              :                       /* The SUBREG's mode determines the live width.  */
    1257      1714586 :                       if (dst_mask)
    1258              :                         {
    1259      1714586 :                           dst_mask <<= bit;
    1260      1714586 :                           if (!dst_mask)
    1261            0 :                             dst_mask = -HOST_WIDE_INT_UC (0x100000000);
    1262              :                         }
    1263      1714586 :                       y = SUBREG_REG (y);
    1264              :                     }
    1265              : 
    1266     92798140 :                   if (REG_P (y))
    1267              :                     {
    1268              :                       /* We have found the use of a register.  We need to mark
    1269              :                          the appropriate chunks of the register live.  The mode
    1270              :                          of the REG is a starting point.  We may refine that
    1271              :                          based on what chunks in the output were live.  */
    1272     49361130 :                       rn = 4 * REGNO (y);
    1273     49361130 :                       unsigned HOST_WIDE_INT tmp_mask = dst_mask;
    1274              : 
    1275              :                       /* If the RTX code for the SET_SRC is not one we can
    1276              :                          propagate destination liveness through, then just
    1277              :                          set the mask to the mode's mask.  */
    1278     49361130 :                       if (!safe_for_live_propagation (code))
    1279        33113 :                         tmp_mask
    1280        66226 :                           = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
    1281              : 
    1282     49361130 :                       if (tmp_mask & 0xff)
    1283     48945432 :                         bitmap_set_bit (livenow, rn);
    1284     49361130 :                       if (tmp_mask & 0xff00)
    1285     47533771 :                         bitmap_set_bit (livenow, rn + 1);
    1286     49361130 :                       if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
    1287     47284717 :                         bitmap_set_bit (livenow, rn + 2);
    1288     49361130 :                       if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
    1289     40796008 :                         bitmap_set_bit (livenow, rn + 3);
    1290              :                     }
    1291     43437010 :                   else if (!CONSTANT_P (y))
    1292              :                     break;
    1293              : 
    1294              :                   /* We might have (ashift (const_int 1) (reg...))
    1295              :                      By setting dst_mask we can continue iterating on the
    1296              :                      the next operand and it will be considered fully live.
    1297              : 
    1298              :                      Note that since we restore DST_MASK from SAVE_MASK at the
    1299              :                      top of the loop, we have to change SAVE_MASK to get the
    1300              :                      semantics we want.  */
    1301     76209628 :                   if (binop_implies_op2_fully_live (GET_CODE (src)))
    1302      2442492 :                     save_mask = -1;
    1303              : 
    1304              :                   /* If this was anything but a binary operand, break the inner
    1305              :                      loop.  This is conservatively correct as it will cause the
    1306              :                      iterator to look at the sub-rtxs outside the SET context.  */
    1307     76209628 :                   if (!BINARY_P (src))
    1308              :                     break;
    1309              : 
    1310              :                   /* We processed the first operand of a binary operator.  Now
    1311              :                      handle the second.  */
    1312     21898963 :                   y = XEXP (src, 1), src = pc_rtx;
    1313     21898963 :                 }
    1314              : 
    1315              :               /* These are leaf nodes, no need to iterate down into them.  */
    1316     71102720 :               if (REG_P (y) || CONSTANT_P (y))
    1317     54310665 :                 iter.skip_subrtxes ();
    1318              :             }
    1319              :         }
    1320              :       /* If we are reading the low part of a SUBREG, then we can
    1321              :          refine liveness of the input register, otherwise let the
    1322              :          iterator continue into SUBREG_REG.  */
    1323    485641622 :       else if (SUBREG_P (x)
    1324      1314519 :                && REG_P (SUBREG_REG (x))
    1325      1312705 :                && !paradoxical_subreg_p (x)
    1326      1288737 :                && subreg_lowpart_p (x)
    1327      1016151 :                && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
    1328    487673924 :                && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
    1329              :         {
    1330       503587 :           HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
    1331       503587 :           HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
    1332              : 
    1333              :           /* If this is a promoted subreg, then more of it may be live than
    1334              :              is otherwise obvious.  */
    1335       503587 :           if (SUBREG_PROMOTED_VAR_P (x))
    1336         4432 :             size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
    1337              : 
    1338       503587 :           bitmap_set_bit (livenow, rn);
    1339       503587 :           if (size > 8)
    1340       328958 :             bitmap_set_bit (livenow, rn + 1);
    1341       328958 :           if (size > 16)
    1342       286607 :             bitmap_set_bit (livenow, rn + 2);
    1343       286607 :           if (size >= 32)
    1344       286607 :             bitmap_set_bit (livenow, rn + 3);
    1345       503587 :           iter.skip_subrtxes ();
    1346              :         }
    1347              :       /* If we have a register reference that is not otherwise handled,
    1348              :          just assume all the chunks are live.  */
    1349    485138035 :       else if (REG_P (x))
    1350    161051089 :         bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
    1351              :     }
    1352    137109191 : }
    1353              : 
    1354              : /* Process a single basic block BB with current liveness information
    1355              :    in LIVENOW, returning updated liveness information.
    1356              : 
    1357              :    If MODIFY is true, then this is the last pass and unnecessary
    1358              :    extensions should be eliminated when possible.  If an extension
    1359              :    is removed, the source pseudo is marked in CHANGED_PSEUDOS.  */
    1360              : 
    1361              : static void
    1362     22781109 : ext_dce_process_bb (basic_block bb)
    1363              : {
    1364     22781109 :   rtx_insn *insn;
    1365              : 
    1366    300557375 :   FOR_BB_INSNS_REVERSE (bb, insn)
    1367              :     {
    1368    277776266 :       if (!NONDEBUG_INSN_P (insn))
    1369    150119489 :         continue;
    1370              : 
    1371              :       /* Live-out state of the destination of this insn.  We can
    1372              :          use this to refine the live-in state of the sources of
    1373              :          this insn in many cases.  */
    1374    127656777 :       bitmap live_tmp = BITMAP_ALLOC (NULL);
    1375              : 
    1376              :       /* First process any sets/clobbers in INSN.  */
    1377    127656777 :       bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
    1378              : 
    1379              :       /* CALL_INSNs need processing their fusage data.  */
    1380    127656777 :       if (CALL_P (insn))
    1381      9452414 :         skipped_dest |= ext_dce_process_sets (insn,
    1382              :                                               CALL_INSN_FUNCTION_USAGE (insn),
    1383              :                                               live_tmp);
    1384              : 
    1385              :       /* And now uses, optimizing away SIGN/ZERO extensions as we go.  */
    1386    127656777 :       ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
    1387              : 
    1388              :       /* A nonlocal goto implicitly uses the frame pointer.  */
    1389    127656777 :       if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
    1390              :         {
    1391         1130 :           bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
    1392         1130 :           if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
    1393         1130 :             bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
    1394              :         }
    1395              : 
    1396              :       /* And process fusage data for the use as well.  */
    1397    127656777 :       if (CALL_P (insn))
    1398              :         {
    1399      9452414 :           if (!FAKE_CALL_P (insn))
    1400      9452354 :             bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
    1401              : 
    1402              :           /* If this is not a call to a const fucntion, then assume it
    1403              :              can read any global register.  */
    1404      9452414 :           if (!RTL_CONST_CALL_P (insn))
    1405    849089814 :             for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
    1406    839959816 :               if (global_regs[i])
    1407          230 :                 bitmap_set_range (livenow, i * 4, 4);
    1408              : 
    1409      9452414 :           ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
    1410              :         }
    1411              : 
    1412    127656777 :       BITMAP_FREE (live_tmp);
    1413              :     }
    1414              : 
    1415     22781109 :   if (modify)
    1416      9729754 :     ext_dce_promote_chained_candidates ();
    1417     22781109 : }
    1418              : 
    1419              : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
    1420              :    is usually helpful.  However, in some cases setting the value when
    1421              :    it not strictly needed can cause this pass to miss optimizations.
    1422              : 
    1423              :    Specifically consider (set (mem) (subreg (reg))).  If set in that
    1424              :    case it will cause more bit groups to be live for REG than would
    1425              :    be strictly necessary which in turn can inhibit extension removal.
    1426              : 
    1427              :    So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
    1428              :    is obviously not needed.  */
    1429              : 
    1430              : static void
    1431       961546 : maybe_clear_subreg_promoted_p (void)
    1432              : {
    1433    119020217 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1434              :     {
    1435    118058671 :       if (!NONDEBUG_INSN_P (insn))
    1436     63461524 :         continue;
    1437              : 
    1438     54597147 :       rtx set = single_set (insn);
    1439     54597147 :       if (!set)
    1440      3652697 :         continue;
    1441              : 
    1442              :       /* There may be other cases where we should clear, but for
    1443              :          now, this is the only known case where it causes problems.  */
    1444     50944450 :       if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
    1445        70804 :         && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
    1446        61223 :         SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
    1447              :     }
    1448       961546 : }
    1449              : 
    1450              : /* Walk the IL and build the transitive closure of all the REGs tied
    1451              :    together by copies where either the source or destination is
    1452              :    marked in CHANGED_PSEUDOS.  */
    1453              : 
    1454              : static void
    1455       961546 : expand_changed_pseudos (void)
    1456              : {
    1457              :   /* Build a vector of registers related by a copy.  This is meant to
    1458              :      speed up the next step by avoiding full IL walks.  */
    1459       961546 :   struct copy_pair { rtx first; rtx second; };
    1460       961546 :   auto_vec<copy_pair> pairs;
    1461    119020217 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1462              :     {
    1463    118058671 :       if (!NONDEBUG_INSN_P (insn))
    1464     63461524 :         continue;
    1465              : 
    1466     54597147 :       rtx pat = PATTERN (insn);
    1467              : 
    1468              :       /* Simple copies to a REG from another REG or SUBREG of a REG.  */
    1469     54597147 :       if (GET_CODE (pat) == SET
    1470     43075497 :           && REG_P (SET_DEST (pat))
    1471     30610351 :           && (REG_P (SET_SRC (pat))
    1472     22183221 :               || (SUBREG_P (SET_SRC (pat))
    1473       368518 :                   && REG_P (SUBREG_REG (SET_SRC (pat))))))
    1474              :         {
    1475       368081 :           rtx src = (REG_P (SET_SRC (pat))
    1476      8795211 :                      ? SET_SRC (pat)
    1477              :                      : SUBREG_REG (SET_SRC (pat)));
    1478      8795211 :           pairs.safe_push ({ SET_DEST (pat), src });
    1479              :         }
    1480              : 
    1481              :       /* Simple copies to a REG from another REG or SUBREG of a REG
    1482              :          held inside a PARALLEL.  */
    1483     54597147 :       if (GET_CODE (pat) == PARALLEL)
    1484              :         {
    1485     24823290 :           for (int i = XVECLEN (pat, 0) - 1; i >= 0; i--)
    1486              :             {
    1487     16664273 :               rtx elem = XVECEXP (pat, 0, i);
    1488              : 
    1489     16664273 :               if (GET_CODE (elem) == SET
    1490      8378385 :                   && REG_P (SET_DEST (elem))
    1491      8228531 :                   && (REG_P (SET_SRC (elem))
    1492      8228531 :                       || (SUBREG_P (SET_SRC (elem))
    1493            0 :                           && REG_P (SUBREG_REG (SET_SRC (elem))))))
    1494              :                 {
    1495            0 :                   rtx src = (REG_P (SET_SRC (elem))
    1496            0 :                              ? SET_SRC (elem)
    1497              :                              : SUBREG_REG (SET_SRC (elem)));
    1498            0 :                   pairs.safe_push ({ SET_DEST (elem), src });
    1499              :                 }
    1500              :             }
    1501      8159017 :           continue;
    1502      8159017 :         }
    1503              :     }
    1504              : 
    1505              :   /* Now we have a vector with copy pairs.  Iterate over that list
    1506              :      updating CHANGED_PSEUDOS as we go.  Eliminate copies from the
    1507              :      list as we go as they don't need further processing.  */
    1508              :   bool changed = true;
    1509      1923151 :   while (changed)
    1510              :     {
    1511              :       changed = false;
    1512              :       unsigned int i;
    1513              :       copy_pair *p;
    1514     10719646 :       FOR_EACH_VEC_ELT (pairs, i, p)
    1515              :         {
    1516      8796495 :           if (bitmap_bit_p (changed_pseudos, REGNO (p->second))
    1517      8796495 :               && bitmap_set_bit (changed_pseudos, REGNO (p->first)))
    1518              :             {
    1519           75 :               pairs.unordered_remove (i);
    1520           75 :               changed = true;
    1521              :             }
    1522              :         }
    1523              :     }
    1524       961546 : }
    1525              : 
    1526              : /* We optimize away sign/zero extensions in this pass and replace
    1527              :    them with SUBREGs indicating certain bits are don't cares.
    1528              : 
    1529              :    This changes the SUBREG_PROMOTED_VAR_P state of the object.
    1530              :    It is fairly painful to fix this on the fly, so we have
    1531              :    recorded which pseudos are affected and we look for SUBREGs
    1532              :    of those pseudos and fix them up.  */
    1533              : 
    1534              : static void
    1535       961546 : reset_subreg_promoted_p (void)
    1536              : {
    1537              :   /* This pass eliminates zero/sign extensions on pseudo regs found
    1538              :      in CHANGED_PSEUDOS.  Elimination of those extensions changes if
    1539              :      the pseudos are known to hold values extended to wider modes
    1540              :      via SUBREG_PROMOTED_VAR.  So we wipe the SUBREG_PROMOTED_VAR
    1541              :      state on all affected pseudos.
    1542              : 
    1543              :      But that is insufficient.  We might have a copy from one REG
    1544              :      to another (possibly with the source register wrapped with a
    1545              :      SUBREG).  We need to wipe SUBREG_PROMOTED_VAR on the transitive
    1546              :      closure of the original CHANGED_PSEUDOS and registers they're
    1547              :      connected to via copies.  So expand the set.  */
    1548       961546 :   expand_changed_pseudos ();
    1549              :     
    1550              :   /* If we removed an extension, that changed the promoted state
    1551              :      of the destination of that extension.  Thus we need to go
    1552              :      find any SUBREGs that reference that pseudo and adjust their
    1553              :      SUBREG_PROMOTED_P state.  */
    1554    119020217 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1555              :     {
    1556    118058671 :       if (!NONDEBUG_INSN_P (insn))
    1557     63461524 :         continue;
    1558              : 
    1559     54597147 :       rtx pat = PATTERN (insn);
    1560     54597147 :       subrtx_var_iterator::array_type array;
    1561    348480935 :       FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
    1562              :         {
    1563    293883788 :           rtx sub = *iter;
    1564              : 
    1565              :           /* We only care about SUBREGs.  */
    1566    293883788 :           if (GET_CODE (sub) != SUBREG)
    1567    292348141 :             continue;
    1568              : 
    1569      1535647 :           const_rtx x = SUBREG_REG (sub);
    1570              : 
    1571              :           /* We only care if the inner object is a REG.  */
    1572      1535647 :           if (!REG_P (x))
    1573          758 :             continue;
    1574              : 
    1575              :           /* And only if the SUBREG is a promoted var.  */
    1576      1534889 :           if (!SUBREG_PROMOTED_VAR_P (sub))
    1577      1529503 :             continue;
    1578              : 
    1579         5386 :           if (bitmap_bit_p (changed_pseudos, REGNO (x)))
    1580            0 :             SUBREG_PROMOTED_VAR_P (sub) = 0;
    1581              :         }
    1582     54597147 :     }
    1583       961546 : }
    1584              : 
    1585              : /* Initialization of the ext-dce pass.  Primarily this means
    1586              :    setting up the various bitmaps we utilize.  */
    1587              : 
    1588              : static void
    1589       961546 : ext_dce_init (void)
    1590              : {
    1591       961546 :   livein.create (last_basic_block_for_fn (cfun));
    1592       961546 :   livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
    1593     12622634 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1594     11661088 :     bitmap_initialize (&livein[i], &bitmap_default_obstack);
    1595              : 
    1596       961546 :   auto_bitmap refs (&bitmap_default_obstack);
    1597       961546 :   df_get_exit_block_use_set (refs);
    1598              : 
    1599       961546 :   unsigned i;
    1600       961546 :   bitmap_iterator bi;
    1601      4424225 :   EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
    1602      3462679 :     make_reg_live (&livein[EXIT_BLOCK], i);
    1603              : 
    1604       961546 :   livenow = BITMAP_ALLOC (NULL);
    1605       961546 :   all_blocks = BITMAP_ALLOC (NULL);
    1606       961546 :   changed_pseudos = BITMAP_ALLOC (NULL);
    1607       961546 :   promotable_dests = BITMAP_ALLOC (NULL);
    1608       961546 :   consumed_by_candidate = BITMAP_ALLOC (NULL);
    1609              : 
    1610     12622634 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1611     11661088 :     if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
    1612      9737996 :       bitmap_set_bit (all_blocks, i);
    1613              : 
    1614       961546 :   modify = false;
    1615       961546 : }
    1616              : 
    1617              : /* Finalization of the ext-dce pass.  Primarily this means
    1618              :    releasing up the various bitmaps we utilize.  */
    1619              : 
    1620              : static void
    1621       961546 : ext_dce_finish (void)
    1622              : {
    1623     12622634 :   for (unsigned i = 0; i < livein.length (); i++)
    1624     11661088 :     bitmap_clear (&livein[i]);
    1625       961546 :   livein.release ();
    1626              : 
    1627       961546 :   BITMAP_FREE (livenow);
    1628       961546 :   BITMAP_FREE (changed_pseudos);
    1629       961546 :   BITMAP_FREE (all_blocks);
    1630       961546 :   BITMAP_FREE (promotable_dests);
    1631       961546 :   BITMAP_FREE (consumed_by_candidate);
    1632       961546 :   promotion_candidates.release ();
    1633       961546 :   promotion_copies.release ();
    1634       961546 : }
    1635              : 
    1636              : /* Process block number BB_INDEX as part of the backward
    1637              :    simple dataflow analysis.  Return TRUE if something in
    1638              :    this block changed or FALSE otherwise.  */
    1639              : 
    1640              : static bool
    1641     26627293 : ext_dce_rd_transfer_n (int bb_index)
    1642              : {
    1643              :   /* The ENTRY/EXIT blocks never change.  */
    1644     26627293 :   if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
    1645              :     return false;
    1646              : 
    1647     22781109 :   basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
    1648              : 
    1649              :   /* Make everything live that's live in the successors.  */
    1650     22781109 :   bitmap_clear (livenow);
    1651     22781109 :   edge_iterator ei;
    1652     22781109 :   edge e;
    1653              : 
    1654     57313177 :   FOR_EACH_EDGE (e, ei, bb->succs)
    1655     34532068 :     bitmap_ior_into (livenow, &livein[e->dest->index]);
    1656              : 
    1657     22781109 :   ext_dce_process_bb (bb);
    1658              : 
    1659              :   /* We only allow widening the set of objects live at the start
    1660              :      of a block.  Otherwise we run the risk of not converging.  */
    1661     22781109 :   return bitmap_ior_into (&livein[bb_index], livenow);
    1662              : }
    1663              : 
    1664              : /* Dummy function for the df_simple_dataflow API.  */
    1665     33185314 : static bool ext_dce_rd_confluence_n (edge) { return true; }
    1666              : 
    1667              : /* Use lifetime analyis to identify extensions that set bits that
    1668              :    are never read.  Turn such extensions into SUBREGs instead which
    1669              :    can often be propagated away.  */
    1670              : 
    1671              : void
    1672       961546 : ext_dce_execute (void)
    1673              : {
    1674              :   /* Limit the amount of memory we use for livein, with 4 bits per
    1675              :      reg per basic-block including overhead that maps to one byte
    1676              :      per reg per basic-block.  */
    1677       961546 :   uint64_t memory_request
    1678       961546 :     = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
    1679       961546 :   if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
    1680              :     {
    1681            0 :       warning (OPT_Wdisabled_optimization,
    1682              :                "ext-dce disabled: %d basic blocks and %d registers; "
    1683              :                "increase %<--param max-gcse-memory%> above %wu",
    1684            0 :                n_basic_blocks_for_fn (cfun), max_reg_num (),
    1685              :                memory_request / 1024);
    1686            0 :       return;
    1687              :     }
    1688              : 
    1689              :   /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
    1690              :      to this pass.  Clear it for those cases.  */
    1691       961546 :   maybe_clear_subreg_promoted_p ();
    1692       961546 :   df_analyze ();
    1693       961546 :   ext_dce_init ();
    1694              : 
    1695      3846184 :   do
    1696              :     {
    1697      1923092 :       df_simple_dataflow (DF_BACKWARD, NULL, NULL,
    1698              :                           ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
    1699              :                           all_blocks, df_get_postorder (DF_BACKWARD),
    1700              :                           df_get_n_blocks (DF_BACKWARD));
    1701      1923092 :       modify = !modify;
    1702              :     }
    1703              :   while (modify);
    1704              : 
    1705       961546 :   reset_subreg_promoted_p ();
    1706              : 
    1707       961546 :   ext_dce_finish ();
    1708              : }
    1709              : 
    1710              : 
    1711              : namespace {
    1712              : 
    1713              : const pass_data pass_data_ext_dce =
    1714              : {
    1715              :   RTL_PASS, /* type */
    1716              :   "ext_dce", /* name */
    1717              :   OPTGROUP_NONE, /* optinfo_flags */
    1718              :   TV_EXT_DCE, /* tv_id */
    1719              :   PROP_cfglayout, /* properties_required */
    1720              :   0, /* properties_provided */
    1721              :   0, /* properties_destroyed */
    1722              :   0, /* todo_flags_start */
    1723              :   TODO_df_finish, /* todo_flags_finish */
    1724              : };
    1725              : 
    1726              : class pass_ext_dce : public rtl_opt_pass
    1727              : {
    1728              : public:
    1729       288047 :   pass_ext_dce (gcc::context *ctxt)
    1730       576094 :     : rtl_opt_pass (pass_data_ext_dce, ctxt)
    1731              :   {}
    1732              : 
    1733              :   /* opt_pass methods: */
    1734      1474422 :   virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
    1735       961546 :   virtual unsigned int execute (function *)
    1736              :     {
    1737       961546 :       ext_dce_execute ();
    1738       961546 :       return 0;
    1739              :     }
    1740              : 
    1741              : }; // class pass_combine
    1742              : 
    1743              : } // anon namespace
    1744              : 
    1745              : rtl_opt_pass *
    1746       288047 : make_pass_ext_dce (gcc::context *ctxt)
    1747              : {
    1748       288047 :   return new pass_ext_dce (ctxt);
    1749              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.