LCOV - code coverage report
Current view: top level - gcc - ext-dce.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 79.5 % 512 407
Test Date: 2026-02-28 14:20:25 Functions: 95.2 % 21 20
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* RTL dead zero/sign extension (code) elimination.
       2              :    Copyright (C) 2000-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it under
       7              : the terms of the GNU General Public License as published by the Free
       8              : Software Foundation; either version 3, or (at your option) any later
       9              : version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "backend.h"
      24              : #include "rtl.h"
      25              : #include "tree.h"
      26              : #include "memmodel.h"
      27              : #include "insn-config.h"
      28              : #include "emit-rtl.h"
      29              : #include "expr.h"
      30              : #include "recog.h"
      31              : #include "cfganal.h"
      32              : #include "tree-pass.h"
      33              : #include "cfgrtl.h"
      34              : #include "rtl-iter.h"
      35              : #include "df.h"
      36              : #include "print-rtl.h"
      37              : #include "dbgcnt.h"
      38              : #include "diagnostic-core.h"
      39              : #include "target.h"
      40              : 
      41              : /* These should probably move into a C++ class.  */
      42              : static vec<bitmap_head> livein;
      43              : static bitmap all_blocks;
      44              : static bitmap livenow;
      45              : static bitmap changed_pseudos;
      46              : static bool modify;
      47              : 
      48              : /* We consider four bit groups for liveness:
      49              :    bit 0..7   (least significant byte)
      50              :    bit 8..15  (second least significant byte)
      51              :    bit 16..31
      52              :    bit 32..BITS_PER_WORD-1  */
      53              : 
      54              : /* For the given REG, return the number of bit groups implied by the
      55              :    size of the REG's mode, up to a maximum of 4 (number of bit groups
      56              :    tracked by this pass).
      57              : 
      58              :    For partial integer and variable sized modes also return 4.  This
      59              :    could possibly be refined for something like PSI mode, but it
      60              :    does not seem worth the effort.  */
      61              : 
      62              : static int
      63    236158975 : group_limit (const_rtx reg)
      64              : {
      65    236158975 :   machine_mode mode = GET_MODE (reg);
      66              : 
      67    236158975 :   if (!GET_MODE_BITSIZE (mode).is_constant ())
      68              :     return 4;
      69              : 
      70    236158975 :   int size = GET_MODE_SIZE (mode).to_constant ();
      71              : 
      72    236158975 :   size = exact_log2 (size);
      73              : 
      74    236069420 :   if (size < 0)
      75              :     return 4;
      76              : 
      77    236069420 :   size++;
      78    236069420 :   return (size > 4 ? 4 : size);
      79              : }
      80              : 
      81              : /* Make all bit groups live for REGNO in bitmap BMAP.  For hard regs,
      82              :    we assume all groups are live.  For a pseudo we consider the size
      83              :    of the pseudo to avoid creating unnecessarily live chunks of data.  */
      84              : 
      85              : static void
      86      4691535 : make_reg_live (bitmap bmap, int regno)
      87              : {
      88      4691535 :   int limit;
      89              : 
      90              :   /* For pseudos we can use the mode to limit how many bit groups
      91              :      are marked as live since a pseudo only has one mode.  Hard
      92              :      registers have to be handled more conservatively.  */
      93      4691535 :   if (regno > FIRST_PSEUDO_REGISTER)
      94              :     {
      95       877789 :       rtx reg = regno_reg_rtx[regno];
      96       877789 :       limit = group_limit (reg);
      97              :     }
      98              :   else
      99              :     limit = 4;
     100              : 
     101     23126587 :   for (int i = 0; i < limit; i++)
     102     18435052 :     bitmap_set_bit (bmap, regno * 4 + i);
     103      4691535 : }
     104              : 
     105              : /* Note this pass could be used to narrow memory loads too.  It's
     106              :    not clear if that's profitable or not in general.  */
     107              : 
     108              : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
     109              : 
     110              : /* If we know the destination of CODE only uses some low bits
     111              :    (say just the QI bits of an SI operation), then return true
     112              :    if we can propagate the need for just the subset of bits
     113              :    from the destination to the sources.
     114              : 
     115              :    FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
     116              :    operand 0.  Thus is likely would need some special casing to handle.  */
     117              : 
     118              : static bool
     119    142514375 : safe_for_live_propagation (rtx_code code)
     120              : {
     121              :   /* First handle rtx classes which as a whole are known to
     122              :      be either safe or unsafe.  */
     123    142514375 :   switch (GET_RTX_CLASS (code))
     124              :     {
     125              :       case RTX_OBJ:
     126              :       case RTX_CONST_OBJ:
     127              :         return true;
     128              : 
     129              :       case RTX_COMPARE:
     130              :       case RTX_COMM_COMPARE:
     131              :       case RTX_TERNARY:
     132              :         return false;
     133              : 
     134     73964051 :       default:
     135     73964051 :         break;
     136              :     }
     137              : 
     138              :   /* What's left are specific codes.  We only need to identify those
     139              :      which are safe.   */
     140     73964051 :   switch (code)
     141              :     {
     142              :     /* These are trivially safe.  */
     143              :     case SUBREG:
     144              :     case NOT:
     145              :     case ZERO_EXTEND:
     146              :     case SIGN_EXTEND:
     147              :     case TRUNCATE:
     148              :     case PLUS:
     149              :     case MINUS:
     150              :     case MULT:
     151              :     case SMUL_HIGHPART:
     152              :     case UMUL_HIGHPART:
     153              :     case AND:
     154              :     case IOR:
     155              :     case XOR:
     156              :       return true;
     157              : 
     158              :     /* We can propagate for the shifted operand, but not the shift
     159              :        count.  The count is handled specially.  */
     160              :     case ASHIFT:
     161              :     case LSHIFTRT:
     162              :     case ASHIFTRT:
     163              :     case SS_ASHIFT:
     164              :     case US_ASHIFT:
     165              :       return true;
     166              : 
     167              :     /* There may be other safe codes.  If so they can be added
     168              :        individually when discovered.  */
     169              :     default:
     170              :       return false;
     171              :     }
     172              : }
     173              : 
     174              : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
     175              :    set/clobbered by OBJ contained in INSN.
     176              : 
     177              :    Conceptually it is always safe to ignore a particular destination
     178              :    here as that will result in more chunks of data being considered
     179              :    live.  That's what happens when we "continue" the main loop when
     180              :    we see something we don't know how to handle such as a vector
     181              :    mode destination.
     182              : 
     183              :    The more accurate we are in identifying what objects (and chunks
     184              :    within an object) are set by INSN, the more aggressive the
     185              :    optimization phase during use handling will be.  */
     186              : 
     187              : static bool
     188    138134916 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
     189              : {
     190    138134916 :   bool skipped_dest = false;
     191              : 
     192    138134916 :   subrtx_iterator::array_type array;
     193    396884616 :   FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
     194              :     {
     195    258749700 :       const_rtx x = *iter;
     196              : 
     197              :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
     198    258749700 :       if (x == NULL_RTX)
     199      9475204 :         continue;
     200              : 
     201    249274496 :       if (UNSPEC_P (x))
     202       571969 :         continue;
     203              : 
     204    248702527 :       if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
     205              :         {
     206    143909654 :           unsigned bit = 0;
     207    143909654 :           x = SET_DEST (x);
     208              : 
     209              :           /* We don't support vector destinations or destinations
     210              :              wider than DImode.  */
     211    143909654 :           scalar_mode outer_mode;
     212    147818840 :           if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
     213     90977576 :               || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     214              :             {
     215              :               /* Skip the subrtxs of this destination.  There is
     216              :                  little value in iterating into the subobjects, so
     217              :                  just skip them for a bit of efficiency.  */
     218     56841264 :               skipped_dest = true;
     219     56841264 :               iter.skip_subrtxes ();
     220    315590964 :               continue;
     221              :             }
     222              : 
     223              :           /* We could have (strict_low_part (subreg ...)).  We can not just
     224              :              strip the STRICT_LOW_PART as that would result in clearing
     225              :              some bits in LIVENOW that are still live.  So process the
     226              :              STRICT_LOW_PART specially.  */
     227     87068390 :           if (GET_CODE (x) == STRICT_LOW_PART)
     228              :             {
     229            0 :               x = XEXP (x, 0);
     230              : 
     231              :               /* The only valid operand of a STRICT_LOW_PART is a non
     232              :                  paradoxical SUBREG.  */
     233            0 :               gcc_assert (SUBREG_P (x)
     234              :                           && !paradoxical_subreg_p (x)
     235              :                           && SUBREG_BYTE (x).is_constant ());
     236              : 
     237              :               /* I think we should always see a REG here.  But let's
     238              :                  be sure.  */
     239            0 :               gcc_assert (REG_P (SUBREG_REG (x)));
     240              : 
     241              :               /* The inner mode might be larger, just punt for
     242              :                  that case.  Remember, we can not just continue to process
     243              :                  the inner RTXs due to the STRICT_LOW_PART.  */
     244            0 :               if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     245            0 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     246              :                 {
     247              :                   /* Skip the subrtxs of the STRICT_LOW_PART.  We can't
     248              :                      process them because it'll set objects as no longer
     249              :                      live when they are in fact still live.  */
     250            0 :                   skipped_dest = true;
     251            0 :                   iter.skip_subrtxes ();
     252            0 :                   continue;
     253              :                 }
     254              : 
     255              :               /* LIVE_TMP contains the set groups that are live-out and set in
     256              :                  this insn.  It is used to narrow the groups live-in for the
     257              :                  inputs of this insn.
     258              : 
     259              :                  The simple thing to do is mark all the groups as live, but
     260              :                  that will significantly inhibit optimization.
     261              : 
     262              :                  We also need to be careful in the case where we have an in-out
     263              :                  operand.  If we're not careful we'd clear LIVE_TMP
     264              :                  incorrectly.  */
     265            0 :               HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
     266            0 :               int limit = group_limit (SUBREG_REG (x));
     267            0 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     268            0 :                 if (bitmap_bit_p (livenow, i))
     269            0 :                   bitmap_set_bit (live_tmp, i);
     270              : 
     271            0 :               if (bitmap_empty_p (live_tmp))
     272            0 :                 make_reg_live (live_tmp, rn);
     273              : 
     274              :               /* The mode of the SUBREG tells us how many bits we can
     275              :                  clear.  */
     276            0 :               machine_mode mode = GET_MODE (x);
     277            0 :               HOST_WIDE_INT size
     278            0 :                 = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
     279            0 :               bitmap_clear_range (livenow, 4 * rn, size);
     280              : 
     281              :               /* We have fully processed this destination.  */
     282            0 :               iter.skip_subrtxes ();
     283            0 :               continue;
     284            0 :             }
     285              : 
     286              :           /* Phase one of destination handling.  First remove any wrapper
     287              :              such as SUBREG or ZERO_EXTRACT.  */
     288     87068390 :           unsigned HOST_WIDE_INT mask
     289     87068390 :             = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
     290     87068390 :           if (SUBREG_P (x))
     291              :             {
     292              :               /* If we have a SUBREG destination that is too wide, just
     293              :                  skip the destination rather than continuing this iterator.
     294              :                  While continuing would be better, we'd need to strip the
     295              :                  subreg and restart within the SET processing rather than
     296              :                  the top of the loop which just complicates the flow even
     297              :                  more.  */
     298       648649 :               if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     299       538707 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     300              :                 {
     301       109942 :                   skipped_dest = true;
     302       109942 :                   iter.skip_subrtxes ();
     303       109942 :                   continue;
     304              :                 }
     305              : 
     306              :               /* We can safely strip a paradoxical subreg.  The inner mode will
     307              :                  be narrower than the outer mode.  We'll clear fewer bits in
     308              :                  LIVENOW than we'd like, but that's always safe.  */
     309       429201 :               if (paradoxical_subreg_p (x))
     310              :                 x = XEXP (x, 0);
     311       419351 :               else if (SUBREG_BYTE (x).is_constant ())
     312              :                 {
     313       419351 :                   bit = subreg_lsb (x).to_constant ();
     314       419351 :                   mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
     315       419351 :                   gcc_assert (mask);
     316              :                   x = SUBREG_REG (x);
     317              :                 }
     318              :               else
     319              :                 gcc_unreachable ();
     320              :             }
     321              : 
     322     86958448 :           if (GET_CODE (x) == ZERO_EXTRACT)
     323              :             {
     324              :               /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
     325              :                  modifies the bits referenced in the ZERO_EXTRACT, the rest
     326              :                  remain the same.  Thus we can not continue here, we must
     327              :                  either figure out what part of the destination is modified
     328              :                  or skip the sub-rtxs.  */
     329         3503 :               skipped_dest = true;
     330         3503 :               iter.skip_subrtxes ();
     331         3503 :               continue;
     332              :             }
     333              : 
     334              :           /* BIT >= 64 indicates something went horribly wrong.  */
     335     86954945 :           gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
     336              : 
     337              :           /* Now handle the actual object that was changed.  */
     338     86954945 :           if (REG_P (x))
     339              :             {
     340              :               /* LIVE_TMP contains the set groups that are live-out and set in
     341              :                  this insn.  It is used to narrow the groups live-in for the
     342              :                  inputs of this insn.
     343              : 
     344              :                  The simple thing to do is mark all the groups as live, but
     345              :                  that will significantly inhibit optimization.
     346              : 
     347              :                  We also need to be careful in the case where we have an in-out
     348              :                  operand.  If we're not careful we'd clear LIVE_TMP
     349              :                  incorrectly.  */
     350     72977998 :               HOST_WIDE_INT rn = REGNO (x);
     351     72977998 :               int limit = group_limit (x);
     352    326216644 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     353    253238646 :                 if (bitmap_bit_p (livenow, i))
     354    246153719 :                   bitmap_set_bit (live_tmp, i);
     355              : 
     356     72977998 :               if (bitmap_empty_p (live_tmp))
     357      1220531 :                 make_reg_live (live_tmp, rn);
     358              : 
     359              :               /* Now clear the bits known written by this instruction.
     360              :                  Note that BIT need not be a power of two, consider a
     361              :                  ZERO_EXTRACT destination.  */
     362     72977998 :               int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
     363     77835163 :               int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
     364     28142908 :                          : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
     365      5659421 :                          : (mask & 0xff00) ? 2 : 1);
     366     72977998 :               bitmap_clear_range (livenow, 4 * rn + start, end - start);
     367              :             }
     368              :           /* Some ports generate (clobber (const_int)).  */
     369     13976947 :           else if (CONST_INT_P (x))
     370            0 :             continue;
     371              :           else
     372     13976947 :             gcc_assert (CALL_P (insn)
     373              :                         || MEM_P (x)
     374              :                         || x == pc_rtx
     375              :                         || GET_CODE (x) == SCRATCH);
     376              : 
     377     86954945 :           iter.skip_subrtxes ();
     378     86954945 :         }
     379    104792873 :       else if (GET_CODE (x) == COND_EXEC)
     380              :         {
     381              :           /* This isn't ideal, but may not be so bad in practice.  */
     382            0 :           skipped_dest = true;
     383            0 :           iter.skip_subrtxes ();
     384              :         }
     385              :     }
     386    138134916 :   return skipped_dest;
     387    138134916 : }
     388              : 
     389              : /* INSN is a right shift and the second insn in a shift pair that is a
     390              :    sign or zero extension (SET is the single set associated with INSN).  
     391              : 
     392              :    Replace the source of SET with NEW_SRC which is a source register
     393              :    from NEW_SRC_INSN (the left shift in the pair).  This is effectively
     394              :    the same as the replacement we do for ZERO/SIGN extends on targets
     395              :    that support those insns.  */
     396              : static void
     397            0 : ext_dce_try_optimize_rshift (rtx_insn *insn, rtx set, rtx new_src, rtx_insn *new_src_insn)
     398              : {
     399              :   /* If the modes are not the same or one is a hard register, then
     400              :      conservatively do nothing.  */
     401            0 :   if (GET_MODE (SET_SRC (set)) != GET_MODE (new_src)
     402            0 :       || !REG_P (XEXP (SET_SRC (set), 0))
     403            0 :       || !REG_P (new_src)
     404            0 :       || REGNO (XEXP (SET_SRC (set), 0)) < FIRST_PSEUDO_REGISTER
     405            0 :       || REGNO (new_src) < FIRST_PSEUDO_REGISTER)
     406              :     return;
     407              : 
     408            0 :   if (dump_file)
     409              :     {
     410            0 :       fprintf (dump_file, "Processing insn:\n");
     411            0 :       dump_insn_slim (dump_file, insn);
     412            0 :       fprintf (dump_file, "Trying to simplify pattern:\n");
     413            0 :       print_rtl_single (dump_file, SET_SRC (set));
     414              :     }
     415              : 
     416              :   /* We decided to turn do the optimization but allow it to be rejected for
     417              :      bisection purposes.  */
     418            0 :   if (!dbg_cnt (::ext_dce))
     419              :     {
     420            0 :       if (dump_file)
     421            0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     422            0 :       return;
     423              :     }
     424              : 
     425              :   /* We're going to generate a fresh insn for the move, so put it
     426              :      into a sequence that we can emit after the current insn.   */
     427            0 :   start_sequence ();
     428            0 :   emit_move_insn (SET_DEST (set), new_src);
     429            0 :   rtx_insn *seq = end_sequence (); 
     430            0 :   emit_insn_after (seq, insn);
     431              : 
     432              :   /* Mark the destination as changed.  */
     433            0 :   rtx x = SET_DEST (set);
     434            0 :   while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     435            0 :     x = XEXP (x, 0);
     436            0 :   gcc_assert (REG_P (x));
     437            0 :   bitmap_set_bit (changed_pseudos, REGNO (x));
     438              : 
     439            0 :   if (dump_file)
     440              :     {
     441            0 :       fprintf (dump_file, "Successfully transformed to:\n");
     442            0 :       print_rtl_single (dump_file, PATTERN (seq));
     443            0 :       fprintf (dump_file, "\n");
     444              :     }
     445              : 
     446            0 :   delete_insn (insn);
     447              : 
     448              :   /* If NEW_SRC died in its prior location, then we need to remove the
     449              :      death note and move it to the new location.  */
     450            0 :   rtx note = find_regno_note (new_src_insn, REG_DEAD, REGNO (new_src));
     451            0 :   if (note)
     452              :     {
     453            0 :       remove_note (new_src_insn, note);
     454            0 :       add_reg_note (insn, REG_DEAD, new_src);
     455              :     }
     456              : }
     457              : 
     458              : 
     459              : /* INSN has a sign/zero extended source inside SET that we will
     460              :    try to turn into a SUBREG.  If NEW_SRC is non-null, use that
     461              :    for the new source of INSN's set.  That scenario only happens
     462              :    when we're optimizing a shift pair.  */
     463              : static void
     464         4694 : ext_dce_try_optimize_extension (rtx_insn *insn, rtx set)
     465              : {
     466         4694 :   rtx src = SET_SRC (set);
     467         4694 :   rtx inner = XEXP (src, 0);
     468              : 
     469              :   /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
     470              :      not useful for this optimization.  */
     471         4694 :   if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
     472              :     return;
     473              : 
     474         2248 :   rtx new_pattern;
     475         2248 :   if (dump_file)
     476              :     {
     477            0 :       fprintf (dump_file, "Processing insn:\n");
     478            0 :       dump_insn_slim (dump_file, insn);
     479            0 :       fprintf (dump_file, "Trying to simplify pattern:\n");
     480            0 :       print_rtl_single (dump_file, SET_SRC (set));
     481              :     }
     482              : 
     483              :   /* We decided to turn do the optimization but allow it to be rejected for
     484              :      bisection purposes.  */
     485         2248 :   if (!dbg_cnt (::ext_dce))
     486              :     {
     487            0 :       if (dump_file)
     488            0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     489            0 :       return;
     490              :     }
     491              : 
     492         4496 :   new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
     493         2248 :                                      GET_MODE (inner), 0);
     494              :   /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
     495              :      We must not pass that as a replacement pattern to validate_change.  */
     496         2248 :   if (new_pattern)
     497              :     {
     498         2248 :       int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
     499              : 
     500         2248 :       rtx x = SET_DEST (set);
     501         2248 :       while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     502            0 :         x = XEXP (x, 0);
     503              : 
     504         2248 :       gcc_assert (REG_P (x));
     505         2248 :       if (ok)
     506         2248 :         bitmap_set_bit (changed_pseudos, REGNO (x));
     507              : 
     508         2248 :       if (dump_file)
     509              :         {
     510            0 :           if (ok)
     511            0 :             fprintf (dump_file, "Successfully transformed to:\n");
     512              :           else
     513            0 :             fprintf (dump_file, "Failed transformation to:\n");
     514              : 
     515            0 :           print_rtl_single (dump_file, new_pattern);
     516            0 :           fprintf (dump_file, "\n");
     517              :         }
     518              : 
     519              :       /* INSN may have a REG_EQUAL note indicating that the value was
     520              :          sign or zero extended.  That note is no longer valid since we've
     521              :          just removed the extension.  Just wipe the notes.  */
     522         2248 :       remove_reg_equal_equiv_notes (insn, false);
     523              :     }
     524              :   else
     525              :     {
     526            0 :       if (dump_file)
     527            0 :         fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
     528              :     }
     529              : }
     530              : 
     531              : /* Some operators imply that their second operand is fully live,
     532              :    regardless of how many bits in the output are live.  An example
     533              :    would be the shift count on a target without SHIFT_COUNT_TRUNCATED
     534              :    defined.
     535              : 
     536              :    Return TRUE if CODE is such an operator.  FALSE otherwise.  */
     537              : 
     538              : static bool
     539     76898622 : binop_implies_op2_fully_live (rtx_code code)
     540              : {
     541            0 :   switch (code)
     542              :     {
     543              :     case ASHIFT:
     544              :     case LSHIFTRT:
     545              :     case ASHIFTRT:
     546              :     case ROTATE:
     547              :     case ROTATERT:
     548              :     case SS_ASHIFT:
     549              :     case US_ASHIFT:
     550              :       return !SHIFT_COUNT_TRUNCATED;
     551              : 
     552            0 :     default:
     553            0 :       return false;
     554              :     }
     555              : }
     556              : 
     557              : /* X, with code CODE, is an operation for which safe_for_live_propagation
     558              :    holds true, and bits set in MASK are live in the result.  Compute a
     559              :    mask of (potentially) live bits in the non-constant inputs.  In case of
     560              :    binop_implies_op2_fully_live (e.g. shifts), the computed mask may
     561              :    exclusively pertain to the first operand.
     562              : 
     563              :    This looks wrong as we may have some important operations embedded as
     564              :    operands of another operation.  For example, we might have an extension
     565              :    wrapping a shift.  It really feels like this needs to be recursing down
     566              :    into operands much more often.  */
     567              : 
     568              : unsigned HOST_WIDE_INT
     569     71759026 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
     570              : {
     571     73501529 :   if (mask == 0)
     572              :     return 0;
     573              : 
     574     73501503 :   enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
     575     73501503 :   unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
     576              : 
     577              :   /* While we don't try to optimize operations on types larger
     578              :      than 64 bits, we do want to make sure not to invoke undefined
     579              :      behavior when presented with such operations during use
     580              :      processing.  The safe thing to do is to just return mmask
     581              :      for that scenario indicating every possible chunk is life.  */
     582     73501503 :   scalar_int_mode smode;
     583     73501503 :   if (!is_a <scalar_int_mode> (mode, &smode)
     584     61261654 :       || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
     585              :     return mmask;
     586              : 
     587     59153359 :   switch (code)
     588              :     {
     589     16547587 :     case PLUS:
     590     16547587 :     case MINUS:
     591     16547587 :     case MULT:
     592     16547587 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     593              : 
     594              :     /* We propagate for the shifted operand, but not the shift
     595              :        count.  The count is handled specially.  */
     596      1409087 :     case ASHIFT:
     597      1409087 :       if (CONST_INT_P (XEXP (x, 1))
     598      2748189 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     599      1339066 :         return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
     600        70021 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     601              : 
     602              :     /* We propagate for the shifted operand, but not the shift
     603              :        count.  The count is handled specially.  */
     604       657985 :     case LSHIFTRT:
     605       657985 :       if (CONST_INT_P (XEXP (x, 1))
     606      1283584 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     607       625571 :         return mmask & (mask << INTVAL (XEXP (x, 1)));
     608              :       return mmask;
     609              : 
     610              :     /* We propagate for the shifted operand, but not the shift
     611              :        count.  The count is handled specially.  */
     612       292912 :     case ASHIFTRT:
     613       292912 :       if (CONST_INT_P (XEXP (x, 1))
     614       574036 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     615              :         {
     616       281116 :           HOST_WIDE_INT sign = 0;
     617       281116 :           if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
     618       281116 :               > GET_MODE_BITSIZE (smode))
     619       562232 :             sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
     620       281116 :           return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
     621              :         }
     622              :       return mmask;
     623              : 
     624        38878 :     case SMUL_HIGHPART:
     625        38878 :     case UMUL_HIGHPART:
     626        38878 :       if (XEXP (x, 1) == const0_rtx)
     627              :         return 0;
     628        38878 :       if (XEXP (x, 1) == const1_rtx)
     629              :         return mmask;
     630        38878 :       if (CONST_INT_P (XEXP (x, 1)))
     631              :         {
     632            0 :           if (pow2p_hwi (INTVAL (XEXP (x, 1))))
     633            0 :             return mmask & (mask << (GET_MODE_BITSIZE (smode)
     634            0 :                                      - exact_log2 (INTVAL (XEXP (x, 1)))));
     635              : 
     636            0 :           int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
     637            0 :                       - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
     638            0 :           if (bits < GET_MODE_BITSIZE (smode))
     639            0 :             return (HOST_WIDE_INT_1U << bits) - 1;
     640              :         }
     641              :       return mmask;
     642              : 
     643       652566 :     case SIGN_EXTEND:
     644       652566 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     645       652566 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     646              :         return -1;
     647              : 
     648              :       /* We want the mode of the inner object.  We need to ensure its
     649              :          sign bit is on in MASK.  */
     650       652566 :       mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
     651       652566 :       if (mask & ~GET_MODE_MASK (mode))
     652       652231 :         mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
     653       652231 :                                      - 1);
     654              : 
     655              :       /* Recurse into the operand.  */
     656       652566 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
     657              : 
     658      1089937 :     case ZERO_EXTEND:
     659      1089937 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     660      1089937 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     661              :         return -1;
     662              : 
     663              :       /* Recurse into the operand.  */
     664      1089937 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
     665              : 
     666              :     /* We propagate for the shifted operand, but not the shift
     667              :        count.  The count is handled specially.  */
     668            0 :     case SS_ASHIFT:
     669            0 :     case US_ASHIFT:
     670            0 :       if (CONST_INT_P (XEXP (x, 1))
     671            0 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     672              :         {
     673            0 :           return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
     674            0 :                              >> (INTVAL (XEXP (x, 1))
     675            0 :                                  + (XEXP (x, 1) != const0_rtx
     676            0 :                                     && code == SS_ASHIFT))))
     677            0 :                   | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
     678              :         }
     679              :       return mmask;
     680              : 
     681              :     default:
     682              :       return mask;
     683              :     }
     684              : }
     685              : 
     686              : /* Process uses in INSN contained in OBJ.  Set appropriate bits in LIVENOW
     687              :    for any chunks of pseudos that become live, potentially filtering using
     688              :    bits from LIVE_TMP.
     689              : 
     690              :    If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
     691              :    the extended bits are never read and mark pseudos which had extensions
     692              :    eliminated in CHANGED_PSEUDOS.  */
     693              : 
     694              : static void
     695    138134916 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
     696              :                       bitmap live_tmp, bool skipped_dest)
     697              : {
     698    138134916 :   subrtx_var_iterator::array_type array_var;
     699    759051725 :   FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
     700              :     {
     701              :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
     702    620916809 :       rtx x = *iter;
     703    620916809 :       if (x == NULL_RTX)
     704      9475204 :         continue;
     705              : 
     706              :       /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
     707              :          handle SETs explicitly, possibly propagating live information
     708              :          into the uses.
     709              : 
     710              :          We may continue the loop at various points which will cause
     711              :          iteration into the next level of RTL.  Breaking from the loop
     712              :          is never safe as it can lead us to fail to process some of the
     713              :          RTL and thus not make objects live when necessary.  */
     714    611441605 :       enum rtx_code xcode = GET_CODE (x);
     715    611441605 :       if (xcode == SET)
     716              :         {
     717    122235635 :           const_rtx dst = SET_DEST (x);
     718    122235635 :           rtx src = SET_SRC (x);
     719    122235635 :           const_rtx y;
     720    122235635 :           unsigned HOST_WIDE_INT bit = 0;
     721              : 
     722              :           /* The code of the RHS of a SET.  */
     723    122235635 :           enum rtx_code code = GET_CODE (src);
     724              : 
     725              :           /* ?!? How much of this should mirror SET handling, potentially
     726              :              being shared?   */
     727    122235635 :           if (SUBREG_P (dst) && subreg_lsb (dst).is_constant (&bit))
     728              :             {
     729       582925 :               if (bit >= HOST_BITS_PER_WIDE_INT)
     730              :                 bit = HOST_BITS_PER_WIDE_INT - 1;
     731       582925 :               dst = SUBREG_REG (dst);
     732              :             }
     733    121652710 :           else if (GET_CODE (dst) == STRICT_LOW_PART)
     734        13271 :             dst = XEXP (dst, 0);
     735              : 
     736              :           /* Main processing of the uses.  Two major goals here.
     737              : 
     738              :              First, we want to try and propagate liveness (or the lack
     739              :              thereof) from the destination register to the source
     740              :              register(s).
     741              : 
     742              :              Second, if the source is an extension, try to optimize
     743              :              it into a SUBREG.  The SUBREG form indicates we don't
     744              :              care about the upper bits and will usually be copy
     745              :              propagated away.
     746              : 
     747              :              If we fail to handle something in here, the expectation
     748              :              is the iterator will dive into the sub-components and
     749              :              mark all the chunks in any found REGs as live.  */
     750    122235635 :           if (REG_P (dst) && safe_for_live_propagation (code))
     751              :             {
     752              :               /* Create a mask representing the bits of this output
     753              :                  operand that are live after this insn.  We can use
     754              :                  this information to refine the live in state of
     755              :                  inputs to this insn in many cases.
     756              : 
     757              :                  We have to do this on a per SET basis, we might have
     758              :                  an INSN with multiple SETS, some of which can narrow
     759              :                  the source operand liveness, some of which may not.  */
     760     71759026 :               unsigned HOST_WIDE_INT dst_mask = 0;
     761     71759026 :               HOST_WIDE_INT rn = REGNO (dst);
     762     71759026 :               unsigned HOST_WIDE_INT mask_array[]
     763              :                 = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
     764              :                     -HOST_WIDE_INT_UC (0x100000000) };
     765    358795130 :               for (int i = 0; i < 4; i++)
     766    287036104 :                 if (bitmap_bit_p (live_tmp, 4 * rn + i))
     767    230138918 :                   dst_mask |= mask_array[i];
     768     71759026 :               dst_mask >>= bit;
     769              : 
     770              :               /* If we ignored a destination during set processing, then
     771              :                  consider all the bits live.  */
     772     71759026 :               if (skipped_dest)
     773     25285852 :                 dst_mask = -1;
     774              : 
     775     71759026 :               dst_mask = carry_backpropagate (dst_mask, code, src);
     776              : 
     777              :               /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
     778              :                  of the source specially to improve optimization.  */
     779     71759026 :               if (code == SIGN_EXTEND || code == ZERO_EXTEND)
     780              :                 {
     781      1754356 :                   rtx inner = XEXP (src, 0);
     782      1754356 :                   unsigned HOST_WIDE_INT src_mask
     783      1754356 :                     = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
     784              : 
     785              :                   /* DST_MASK could be zero if we had something in the SET
     786              :                      that we couldn't handle.  */
     787      1754356 :                   if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
     788         4694 :                     ext_dce_try_optimize_extension (insn, x);
     789              : 
     790              :                   /* Stripping the extension here just seems wrong on multiple
     791              :                      levels.  It's source side handling, so it seems like it
     792              :                      belongs in the loop below.  Stripping here also makes it
     793              :                      harder than necessary to properly handle live bit groups
     794              :                      for (ANY_EXTEND (SUBREG)) where the SUBREG has
     795              :                      SUBREG_PROMOTED state.  */
     796      1754356 :                   dst_mask &= src_mask;
     797      1754356 :                   src = XEXP (src, 0);
     798      1754356 :                   code = GET_CODE (src);
     799              :                 }
     800              : 
     801              :               /* Special case for (sub)targets that do not have extension
     802              :                  insns (and thus use shifts).  We want to detect when we have
     803              :                  a shift pair and treat the pair as-if was an extension.
     804              : 
     805              :                  Key on the right shift and use (for now) simplistic tests
     806              :                  to find the corresponding left shift.  */
     807     71759026 :               scalar_mode outer_mode;
     808     71759026 :               if ((code == LSHIFTRT || code == ASHIFTRT)
     809      1047172 :                   && CONST_INT_P (XEXP (src, 1))
     810      1155681 :                   && (INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 8
     811      1151162 :                       || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 16
     812       989014 :                       || INTVAL (XEXP (src, 1)) == BITS_PER_WORD - 32)
     813     71826313 :                   && is_a <scalar_mode> (GET_MODE (src), &outer_mode)
     814     71826313 :                   && GET_MODE_BITSIZE (outer_mode) <= HOST_BITS_PER_WIDE_INT)
     815              :                 {
     816              :                   /* So we have a right shift that could correspond to
     817              :                      the second in a pair impementing QI, HI or SI -> DI
     818              :                      extension.  See if we can find the left shift.  For
     819              :                      now, just look one real instruction back.  */
     820        67143 :                   rtx_insn *prev_insn = prev_nonnote_nondebug_insn_bb (insn);
     821              : 
     822              :                   /* The previous insn must be a left shift by the same
     823              :                      amount.  */
     824        67143 :                   rtx prev_set;
     825        67143 :                   if (prev_insn
     826        64319 :                       && (prev_set = single_set (prev_insn))
     827              :                       /* The destination of the left shift must be the
     828              :                          source of the right shift.  */
     829        64220 :                       && SET_DEST (prev_set) == XEXP (src, 0)
     830        32248 :                       && GET_CODE (SET_SRC (prev_set)) == ASHIFT
     831          811 :                       && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
     832              :                       /* The counts must match.  */
     833        67143 :                       && (INTVAL (XEXP (src, 1))
     834          795 :                           == INTVAL (XEXP (SET_SRC (prev_set), 1))))
     835              :                     {
     836           15 :                       unsigned HOST_WIDE_INT src_mask = GET_MODE_BITSIZE (GET_MODE (src)).to_constant ();
     837           15 :                       src_mask -= INTVAL (XEXP (src, 1));
     838           15 :                       src_mask = (HOST_WIDE_INT_1U << src_mask) - 1;
     839              : 
     840              :                       /* DST_MASK has been adjusted for INSN.  We need its original value.  */
     841           15 :                       unsigned HOST_WIDE_INT tmp_mask = 0;
     842           75 :                       for (int i = 0; i < 4; i++)
     843           60 :                         if (bitmap_bit_p (live_tmp, 4 * rn + i))
     844           15 :                           tmp_mask |= mask_array[i];
     845           15 :                       tmp_mask >>= bit;
     846              : 
     847           15 :                       if (modify && !skipped_dest && (tmp_mask & ~src_mask) == 0)
     848              :                         {
     849            0 :                           ext_dce_try_optimize_rshift (insn, x, XEXP (SET_SRC (prev_set), 0), prev_insn);
     850              : 
     851              :                           /* These may not strictly be necessary, but we might as well try and be
     852              :                              as accurate as possible.  The RHS is now a simple REG.  */
     853            0 :                           dst_mask = src_mask;
     854            0 :                           src = XEXP (SET_SRC (prev_set), 0);
     855            0 :                           code = GET_CODE (src);
     856              :                         }
     857              :                     }
     858              :                 }
     859              : 
     860              :               /* Optimization is done at this point.  We just want to make
     861              :                  sure everything that should get marked as live is marked
     862              :                  from here onward.  */
     863              : 
     864              :               /* We will handle the other operand of a binary operator
     865              :                  at the bottom of the loop by resetting Y.  */
     866     71759026 :               if (BINARY_P (src))
     867     22564860 :                 y = XEXP (src, 0);
     868              :               else
     869              :                 y = src;
     870              : 
     871              :               /* We're inside a SET and want to process the source operands
     872              :                  making things live.  Breaking from this loop will cause
     873              :                  the iterator to work on sub-rtxs, so it is safe to break
     874              :                  if we see something we don't know how to handle.
     875              : 
     876              :                  This code is just hokey as it really just handles trivial
     877              :                  unary and binary cases.  Otherwise the loop exits and we
     878              :                  continue iterating on sub-rtxs, but outside the set context.  */
     879              :               unsigned HOST_WIDE_INT save_mask = dst_mask;
     880    115913692 :               for (;;)
     881              :                 {
     882              :                   /* In general we want to restore DST_MASK before each loop
     883              :                      iteration.  The exception is when the opcode implies that
     884              :                      the other operand is fully live.  That's handled by
     885              :                      changing SAVE_MASK below.  */
     886     93836359 :                   dst_mask = save_mask;
     887              :                   /* Strip an outer paradoxical subreg.  The bits outside
     888              :                      the inner mode are don't cares.  So we can just strip
     889              :                      and process the inner object.  */
     890     93836359 :                   if (paradoxical_subreg_p (y))
     891              :                     y = XEXP (y, 0);
     892     93734740 :                   else if (SUBREG_P (y) && subreg_lsb (y).is_constant (&bit))
     893              :                     {
     894              :                       /* If !TRULY_NOOP_TRUNCATION_MODES_P, the mode
     895              :                          change performed by Y would normally need to be a
     896              :                          TRUNCATE rather than a SUBREG.  It is probably the
     897              :                          guarantee provided by SUBREG_PROMOTED_VAR_P that
     898              :                          allows the SUBREG in Y as an exception.  We must
     899              :                          therefore preserve that guarantee and treat the
     900              :                          upper bits of the inner register as live
     901              :                          regardless of the outer code.  See PR 120050.  */
     902      1925257 :                       if (!REG_P (SUBREG_REG (y))
     903      1925257 :                           || (SUBREG_PROMOTED_VAR_P (y)
     904        13482 :                               && (!TRULY_NOOP_TRUNCATION_MODES_P (
     905              :                                     GET_MODE (y),
     906              :                                     GET_MODE (SUBREG_REG (y))))))
     907              :                         break;
     908              : 
     909              :                       /* If this is a wide object (more bits than we can fit
     910              :                          in a HOST_WIDE_INT), then just break from the SET
     911              :                          context.   That will cause the iterator to walk down
     912              :                          into the subrtx and if we land on a REG we'll mark
     913              :                          the whole think live.  */
     914      1924285 :                       if (bit >= HOST_BITS_PER_WIDE_INT)
     915              :                         break;
     916              : 
     917              :                       /* The SUBREG's mode determines the live width.  */
     918      1714487 :                       if (dst_mask)
     919              :                         {
     920      1714487 :                           dst_mask <<= bit;
     921      1714487 :                           if (!dst_mask)
     922            0 :                             dst_mask = -HOST_WIDE_INT_UC (0x100000000);
     923              :                         }
     924      1714487 :                       y = SUBREG_REG (y);
     925              :                     }
     926              : 
     927     93625589 :                   if (REG_P (y))
     928              :                     {
     929              :                       /* We have found the use of a register.  We need to mark
     930              :                          the appropriate chunks of the register live.  The mode
     931              :                          of the REG is a starting point.  We may refine that
     932              :                          based on what chunks in the output were live.  */
     933     49788925 :                       rn = 4 * REGNO (y);
     934     49788925 :                       unsigned HOST_WIDE_INT tmp_mask = dst_mask;
     935              : 
     936              :                       /* If the RTX code for the SET_SRC is not one we can
     937              :                          propagate destination liveness through, then just
     938              :                          set the mask to the mode's mask.  */
     939     49788925 :                       if (!safe_for_live_propagation (code))
     940        32834 :                         tmp_mask
     941        65668 :                           = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
     942              : 
     943     49788925 :                       if (tmp_mask & 0xff)
     944     49370983 :                         bitmap_set_bit (livenow, rn);
     945     49788925 :                       if (tmp_mask & 0xff00)
     946     47936192 :                         bitmap_set_bit (livenow, rn + 1);
     947     49788925 :                       if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
     948     47687604 :                         bitmap_set_bit (livenow, rn + 2);
     949     49788925 :                       if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
     950     41184022 :                         bitmap_set_bit (livenow, rn + 3);
     951              :                     }
     952     43836664 :                   else if (!CONSTANT_P (y))
     953              :                     break;
     954              : 
     955              :                   /* We might have (ashift (const_int 1) (reg...))
     956              :                      By setting dst_mask we can continue iterating on the
     957              :                      the next operand and it will be considered fully live.
     958              : 
     959              :                      Note that since we restore DST_MASK from SAVE_MASK at the
     960              :                      top of the loop, we have to change SAVE_MASK to get the
     961              :                      semantics we want.  */
     962     76898622 :                   if (binop_implies_op2_fully_live (GET_CODE (src)))
     963      2468888 :                     save_mask = -1;
     964              : 
     965              :                   /* If this was anything but a binary operand, break the inner
     966              :                      loop.  This is conservatively correct as it will cause the
     967              :                      iterator to look at the sub-rtxs outside the SET context.  */
     968     76898622 :                   if (!BINARY_P (src))
     969              :                     break;
     970              : 
     971              :                   /* We processed the first operand of a binary operator.  Now
     972              :                      handle the second.  */
     973     22077333 :                   y = XEXP (src, 1), src = pc_rtx;
     974     22077333 :                 }
     975              : 
     976              :               /* These are leaf nodes, no need to iterate down into them.  */
     977     71759026 :               if (REG_P (y) || CONSTANT_P (y))
     978     54821289 :                 iter.skip_subrtxes ();
     979              :             }
     980              :         }
     981              :       /* If we are reading the low part of a SUBREG, then we can
     982              :          refine liveness of the input register, otherwise let the
     983              :          iterator continue into SUBREG_REG.  */
     984    489205970 :       else if (SUBREG_P (x)
     985      1328249 :                && REG_P (SUBREG_REG (x))
     986      1326437 :                && !paradoxical_subreg_p (x)
     987      1302920 :                && subreg_lowpart_p (x)
     988      1020268 :                && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     989    491246506 :                && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
     990              :         {
     991       508039 :           HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
     992       508039 :           HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
     993              : 
     994              :           /* If this is a promoted subreg, then more of it may be live than
     995              :              is otherwise obvious.  */
     996       508039 :           if (SUBREG_PROMOTED_VAR_P (x))
     997         4296 :             size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
     998              : 
     999       508039 :           bitmap_set_bit (livenow, rn);
    1000       508039 :           if (size > 8)
    1001       331405 :             bitmap_set_bit (livenow, rn + 1);
    1002       331405 :           if (size > 16)
    1003       289398 :             bitmap_set_bit (livenow, rn + 2);
    1004       289398 :           if (size >= 32)
    1005       289398 :             bitmap_set_bit (livenow, rn + 3);
    1006       508039 :           iter.skip_subrtxes ();
    1007              :         }
    1008              :       /* If we have a register reference that is not otherwise handled,
    1009              :          just assume all the chunks are live.  */
    1010    488697931 :       else if (REG_P (x))
    1011    162303188 :         bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
    1012              :     }
    1013    138134916 : }
    1014              : 
    1015              : /* Process a single basic block BB with current liveness information
    1016              :    in LIVENOW, returning updated liveness information.
    1017              : 
    1018              :    If MODIFY is true, then this is the last pass and unnecessary
    1019              :    extensions should be eliminated when possible.  If an extension
    1020              :    is removed, the source pseudo is marked in CHANGED_PSEUDOS.  */
    1021              : 
    1022              : static void
    1023     23026832 : ext_dce_process_bb (basic_block bb)
    1024              : {
    1025     23026832 :   rtx_insn *insn;
    1026              : 
    1027    304087629 :   FOR_BB_INSNS_REVERSE (bb, insn)
    1028              :     {
    1029    433461882 :       if (!NONDEBUG_INSN_P (insn))
    1030    152401085 :         continue;
    1031              : 
    1032              :       /* Live-out state of the destination of this insn.  We can
    1033              :          use this to refine the live-in state of the sources of
    1034              :          this insn in many cases.  */
    1035    128659712 :       bitmap live_tmp = BITMAP_ALLOC (NULL);
    1036              : 
    1037              :       /* First process any sets/clobbers in INSN.  */
    1038    128659712 :       bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
    1039              : 
    1040              :       /* CALL_INSNs need processing their fusage data.  */
    1041    128659712 :       if (CALL_P (insn))
    1042      9475204 :         skipped_dest |= ext_dce_process_sets (insn,
    1043              :                                               CALL_INSN_FUNCTION_USAGE (insn),
    1044              :                                               live_tmp);
    1045              : 
    1046              :       /* And now uses, optimizing away SIGN/ZERO extensions as we go.  */
    1047    128659712 :       ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
    1048              : 
    1049              :       /* A nonlocal goto implicitly uses the frame pointer.  */
    1050    128659712 :       if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
    1051              :         {
    1052         1130 :           bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
    1053         1130 :           if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
    1054         1130 :             bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
    1055              :         }
    1056              : 
    1057              :       /* And process fusage data for the use as well.  */
    1058    128659712 :       if (CALL_P (insn))
    1059              :         {
    1060      9475204 :           if (!FAKE_CALL_P (insn))
    1061      9475144 :             bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
    1062              : 
    1063              :           /* If this is not a call to a const fucntion, then assume it
    1064              :              can read any global register.  */
    1065      9475204 :           if (!RTL_CONST_CALL_P (insn))
    1066    851058717 :             for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
    1067    841907548 :               if (global_regs[i])
    1068          230 :                 bitmap_set_range (livenow, i * 4, 4);
    1069              : 
    1070      9475204 :           ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
    1071              :         }
    1072              : 
    1073    128659712 :       BITMAP_FREE (live_tmp);
    1074              :     }
    1075     23026832 : }
    1076              : 
    1077              : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
    1078              :    is usually helpful.  However, in some cases setting the value when
    1079              :    it not strictly needed can cause this pass to miss optimizations.
    1080              : 
    1081              :    Specifically consider (set (mem) (subreg (reg))).  If set in that
    1082              :    case it will cause more bit groups to be live for REG than would
    1083              :    be strictly necessary which in turn can inhibit extension removal.
    1084              : 
    1085              :    So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
    1086              :    is obviously not needed.  */
    1087              : 
    1088              : static void
    1089       963981 : maybe_clear_subreg_promoted_p (void)
    1090              : {
    1091    120322219 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1092              :     {
    1093    119358238 :       if (!NONDEBUG_INSN_P (insn))
    1094     64364377 :         continue;
    1095              : 
    1096     54993861 :       rtx set = single_set (insn);
    1097     54993861 :       if (!set)
    1098      3657489 :         continue;
    1099              : 
    1100              :       /* There may be other cases where we should clear, but for
    1101              :          now, this is the only known case where it causes problems.  */
    1102     51336372 :       if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
    1103        69773 :         && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
    1104        60518 :         SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
    1105              :     }
    1106       963981 : }
    1107              : 
    1108              : /* Walk the IL and build the transitive closure of all the REGs tied
    1109              :    together by copies where either the source or destination is
    1110              :    marked in CHANGED_PSEUDOS.  */
    1111              : 
    1112              : static void
    1113       963981 : expand_changed_pseudos (void)
    1114              : {
    1115              :   /* Build a vector of registers related by a copy.  This is meant to
    1116              :      speed up the next step by avoiding full IL walks.  */
    1117       963981 :   struct copy_pair { rtx first; rtx second; };
    1118       963981 :   auto_vec<copy_pair> pairs;
    1119    120322219 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1120              :     {
    1121    119358238 :       if (!NONDEBUG_INSN_P (insn))
    1122     64364377 :         continue;
    1123              : 
    1124     54993861 :       rtx pat = PATTERN (insn);
    1125              : 
    1126              :       /* Simple copies to a REG from another REG or SUBREG of a REG.  */
    1127     54993861 :       if (GET_CODE (pat) == SET
    1128     43389105 :           && REG_P (SET_DEST (pat))
    1129     30876850 :           && (REG_P (SET_SRC (pat))
    1130     22368508 :               || (SUBREG_P (SET_SRC (pat))
    1131       374574 :                   && REG_P (SUBREG_REG (SET_SRC (pat))))))
    1132              :         {
    1133       374137 :           rtx src = (REG_P (SET_SRC (pat))
    1134      8882479 :                      ? SET_SRC (pat)
    1135              :                      : SUBREG_REG (SET_SRC (pat)));
    1136      8882479 :           pairs.safe_push ({ SET_DEST (pat), src });
    1137              :         }
    1138              : 
    1139              :       /* Simple copies to a REG from another REG or SUBREG of a REG
    1140              :          held inside a PARALLEL.  */
    1141     54993861 :       if (GET_CODE (pat) == PARALLEL)
    1142              :         {
    1143     25049722 :           for (int i = XVECLEN (pat, 0) - 1; i >= 0; i--)
    1144              :             {
    1145     16813423 :               rtx elem = XVECEXP (pat, 0, i);
    1146              : 
    1147     16813423 :               if (GET_CODE (elem) == SET
    1148      8451547 :                   && REG_P (SET_DEST (elem))
    1149      8301403 :                   && (REG_P (SET_SRC (elem))
    1150      8301403 :                       || (SUBREG_P (SET_SRC (elem))
    1151            0 :                           && REG_P (SUBREG_REG (SET_SRC (elem))))))
    1152              :                 {
    1153            0 :                   rtx src = (REG_P (SET_SRC (elem))
    1154            0 :                              ? SET_SRC (elem)
    1155              :                              : SUBREG_REG (SET_SRC (elem)));
    1156            0 :                   pairs.safe_push ({ SET_DEST (elem), src });
    1157              :                 }
    1158              :             }
    1159      8236299 :           continue;
    1160      8236299 :         }
    1161              :     }
    1162              : 
    1163              :   /* Now we have a vector with copy pairs.  Iterate over that list
    1164              :      updating CHANGED_PSEUDOS as we go.  Eliminate copies from the
    1165              :      list as we go as they don't need further processing.  */
    1166              :   bool changed = true;
    1167      1928002 :   while (changed)
    1168              :     {
    1169              :       changed = false;
    1170              :       unsigned int i;
    1171              :       copy_pair *p;
    1172     10810836 :       FOR_EACH_VEC_ELT (pairs, i, p)
    1173              :         {
    1174      8882834 :           if (bitmap_bit_p (changed_pseudos, REGNO (p->second))
    1175      8882834 :               && bitmap_set_bit (changed_pseudos, REGNO (p->first)))
    1176              :             {
    1177           52 :               pairs.unordered_remove (i);
    1178           52 :               changed = true;
    1179              :             }
    1180              :         }
    1181              :     }
    1182       963981 : }
    1183              : 
    1184              : /* We optimize away sign/zero extensions in this pass and replace
    1185              :    them with SUBREGs indicating certain bits are don't cares.
    1186              : 
    1187              :    This changes the SUBREG_PROMOTED_VAR_P state of the object.
    1188              :    It is fairly painful to fix this on the fly, so we have
    1189              :    recorded which pseudos are affected and we look for SUBREGs
    1190              :    of those pseudos and fix them up.  */
    1191              : 
    1192              : static void
    1193       963981 : reset_subreg_promoted_p (void)
    1194              : {
    1195              :   /* This pass eliminates zero/sign extensions on pseudo regs found
    1196              :      in CHANGED_PSEUDOS.  Elimination of those extensions changes if
    1197              :      the pseudos are known to hold values extended to wider modes
    1198              :      via SUBREG_PROMOTED_VAR.  So we wipe the SUBREG_PROMOTED_VAR
    1199              :      state on all affected pseudos.
    1200              : 
    1201              :      But that is insufficient.  We might have a copy from one REG
    1202              :      to another (possibly with the source register wrapped with a
    1203              :      SUBREG).  We need to wipe SUBREG_PROMOTED_VAR on the transitive
    1204              :      closure of the original CHANGED_PSEUDOS and registers they're
    1205              :      connected to via copies.  So expand the set.  */
    1206       963981 :   expand_changed_pseudos ();
    1207              :     
    1208              :   /* If we removed an extension, that changed the promoted state
    1209              :      of the destination of that extension.  Thus we need to go
    1210              :      find any SUBREGs that reference that pseudo and adjust their
    1211              :      SUBREG_PROMOTED_P state.  */
    1212    120322219 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1213              :     {
    1214    119358238 :       if (!NONDEBUG_INSN_P (insn))
    1215     64364377 :         continue;
    1216              : 
    1217     54993861 :       rtx pat = PATTERN (insn);
    1218     54993861 :       subrtx_var_iterator::array_type array;
    1219    351032729 :       FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
    1220              :         {
    1221    296038868 :           rtx sub = *iter;
    1222              : 
    1223              :           /* We only care about SUBREGs.  */
    1224    296038868 :           if (GET_CODE (sub) != SUBREG)
    1225    294504717 :             continue;
    1226              : 
    1227      1534151 :           const_rtx x = SUBREG_REG (sub);
    1228              : 
    1229              :           /* We only care if the inner object is a REG.  */
    1230      1534151 :           if (!REG_P (x))
    1231          758 :             continue;
    1232              : 
    1233              :           /* And only if the SUBREG is a promoted var.  */
    1234      1533393 :           if (!SUBREG_PROMOTED_VAR_P (sub))
    1235      1528063 :             continue;
    1236              : 
    1237         5330 :           if (bitmap_bit_p (changed_pseudos, REGNO (x)))
    1238            0 :             SUBREG_PROMOTED_VAR_P (sub) = 0;
    1239              :         }
    1240     54993861 :     }
    1241       963981 : }
    1242              : 
    1243              : /* Initialization of the ext-dce pass.  Primarily this means
    1244              :    setting up the various bitmaps we utilize.  */
    1245              : 
    1246              : static void
    1247       963981 : ext_dce_init (void)
    1248              : {
    1249       963981 :   livein.create (last_basic_block_for_fn (cfun));
    1250       963981 :   livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
    1251     12732532 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1252     11768551 :     bitmap_initialize (&livein[i], &bitmap_default_obstack);
    1253              : 
    1254       963981 :   auto_bitmap refs (&bitmap_default_obstack);
    1255       963981 :   df_get_exit_block_use_set (refs);
    1256              : 
    1257       963981 :   unsigned i;
    1258       963981 :   bitmap_iterator bi;
    1259      4434985 :   EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
    1260      3471004 :     make_reg_live (&livein[EXIT_BLOCK], i);
    1261              : 
    1262       963981 :   livenow = BITMAP_ALLOC (NULL);
    1263       963981 :   all_blocks = BITMAP_ALLOC (NULL);
    1264       963981 :   changed_pseudos = BITMAP_ALLOC (NULL);
    1265              : 
    1266     12732532 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1267     11768551 :     if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
    1268      9840589 :       bitmap_set_bit (all_blocks, i);
    1269              : 
    1270       963981 :   modify = false;
    1271       963981 : }
    1272              : 
    1273              : /* Finalization of the ext-dce pass.  Primarily this means
    1274              :    releasing up the various bitmaps we utilize.  */
    1275              : 
    1276              : static void
    1277       963981 : ext_dce_finish (void)
    1278              : {
    1279     12732532 :   for (unsigned i = 0; i < livein.length (); i++)
    1280     11768551 :     bitmap_clear (&livein[i]);
    1281       963981 :   livein.release ();
    1282              : 
    1283       963981 :   BITMAP_FREE (livenow);
    1284       963981 :   BITMAP_FREE (changed_pseudos);
    1285       963981 :   BITMAP_FREE (all_blocks);
    1286       963981 : }
    1287              : 
    1288              : /* Process block number BB_INDEX as part of the backward
    1289              :    simple dataflow analysis.  Return TRUE if something in
    1290              :    this block changed or FALSE otherwise.  */
    1291              : 
    1292              : static bool
    1293     26882756 : ext_dce_rd_transfer_n (int bb_index)
    1294              : {
    1295              :   /* The ENTRY/EXIT blocks never change.  */
    1296     26882756 :   if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
    1297              :     return false;
    1298              : 
    1299     23026832 :   basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
    1300              : 
    1301              :   /* Make everything live that's live in the successors.  */
    1302     23026832 :   bitmap_clear (livenow);
    1303     23026832 :   edge_iterator ei;
    1304     23026832 :   edge e;
    1305              : 
    1306     57963954 :   FOR_EACH_EDGE (e, ei, bb->succs)
    1307     34937122 :     bitmap_ior_into (livenow, &livein[e->dest->index]);
    1308              : 
    1309     23026832 :   ext_dce_process_bb (bb);
    1310              : 
    1311              :   /* We only allow widening the set of objects live at the start
    1312              :      of a block.  Otherwise we run the risk of not converging.  */
    1313     23026832 :   return bitmap_ior_into (&livein[bb_index], livenow);
    1314              : }
    1315              : 
    1316              : /* Dummy function for the df_simple_dataflow API.  */
    1317     33560402 : static bool ext_dce_rd_confluence_n (edge) { return true; }
    1318              : 
    1319              : /* Use lifetime analyis to identify extensions that set bits that
    1320              :    are never read.  Turn such extensions into SUBREGs instead which
    1321              :    can often be propagated away.  */
    1322              : 
    1323              : void
    1324       963981 : ext_dce_execute (void)
    1325              : {
    1326              :   /* Limit the amount of memory we use for livein, with 4 bits per
    1327              :      reg per basic-block including overhead that maps to one byte
    1328              :      per reg per basic-block.  */
    1329       963981 :   uint64_t memory_request
    1330       963981 :     = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
    1331       963981 :   if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
    1332              :     {
    1333            0 :       warning (OPT_Wdisabled_optimization,
    1334              :                "ext-dce disabled: %d basic blocks and %d registers; "
    1335              :                "increase %<--param max-gcse-memory%> above %wu",
    1336            0 :                n_basic_blocks_for_fn (cfun), max_reg_num (),
    1337              :                memory_request / 1024);
    1338            0 :       return;
    1339              :     }
    1340              : 
    1341              :   /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
    1342              :      to this pass.  Clear it for those cases.  */
    1343       963981 :   maybe_clear_subreg_promoted_p ();
    1344       963981 :   df_analyze ();
    1345       963981 :   ext_dce_init ();
    1346              : 
    1347      3855924 :   do
    1348              :     {
    1349      1927962 :       df_simple_dataflow (DF_BACKWARD, NULL, NULL,
    1350              :                           ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
    1351              :                           all_blocks, df_get_postorder (DF_BACKWARD),
    1352              :                           df_get_n_blocks (DF_BACKWARD));
    1353      1927962 :       modify = !modify;
    1354              :     }
    1355              :   while (modify);
    1356              : 
    1357       963981 :   reset_subreg_promoted_p ();
    1358              : 
    1359       963981 :   ext_dce_finish ();
    1360              : }
    1361              : 
    1362              : 
    1363              : namespace {
    1364              : 
    1365              : const pass_data pass_data_ext_dce =
    1366              : {
    1367              :   RTL_PASS, /* type */
    1368              :   "ext_dce", /* name */
    1369              :   OPTGROUP_NONE, /* optinfo_flags */
    1370              :   TV_EXT_DCE, /* tv_id */
    1371              :   PROP_cfglayout, /* properties_required */
    1372              :   0, /* properties_provided */
    1373              :   0, /* properties_destroyed */
    1374              :   0, /* todo_flags_start */
    1375              :   TODO_df_finish, /* todo_flags_finish */
    1376              : };
    1377              : 
    1378              : class pass_ext_dce : public rtl_opt_pass
    1379              : {
    1380              : public:
    1381       285722 :   pass_ext_dce (gcc::context *ctxt)
    1382       571444 :     : rtl_opt_pass (pass_data_ext_dce, ctxt)
    1383              :   {}
    1384              : 
    1385              :   /* opt_pass methods: */
    1386      1471370 :   virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
    1387       963981 :   virtual unsigned int execute (function *)
    1388              :     {
    1389       963981 :       ext_dce_execute ();
    1390       963981 :       return 0;
    1391              :     }
    1392              : 
    1393              : }; // class pass_combine
    1394              : 
    1395              : } // anon namespace
    1396              : 
    1397              : rtl_opt_pass *
    1398       285722 : make_pass_ext_dce (gcc::context *ctxt)
    1399              : {
    1400       285722 :   return new pass_ext_dce (ctxt);
    1401              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.