LCOV - code coverage report
Current view: top level - gcc - ext-dce.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 84.4 % 418 353
Test Date: 2025-06-21 16:26:05 Functions: 94.7 % 19 18
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: - 0 0

             Branch data     Line data    Source code
       1                 :             : /* RTL dead zero/sign extension (code) elimination.
       2                 :             :    Copyright (C) 2000-2025 Free Software Foundation, Inc.
       3                 :             : 
       4                 :             : This file is part of GCC.
       5                 :             : 
       6                 :             : GCC is free software; you can redistribute it and/or modify it under
       7                 :             : the terms of the GNU General Public License as published by the Free
       8                 :             : Software Foundation; either version 3, or (at your option) any later
       9                 :             : version.
      10                 :             : 
      11                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14                 :             : for more details.
      15                 :             : 
      16                 :             : You should have received a copy of the GNU General Public License
      17                 :             : along with GCC; see the file COPYING3.  If not see
      18                 :             : <http://www.gnu.org/licenses/>.  */
      19                 :             : 
      20                 :             : #include "config.h"
      21                 :             : #include "system.h"
      22                 :             : #include "coretypes.h"
      23                 :             : #include "backend.h"
      24                 :             : #include "rtl.h"
      25                 :             : #include "tree.h"
      26                 :             : #include "memmodel.h"
      27                 :             : #include "insn-config.h"
      28                 :             : #include "emit-rtl.h"
      29                 :             : #include "recog.h"
      30                 :             : #include "cfganal.h"
      31                 :             : #include "tree-pass.h"
      32                 :             : #include "cfgrtl.h"
      33                 :             : #include "rtl-iter.h"
      34                 :             : #include "df.h"
      35                 :             : #include "print-rtl.h"
      36                 :             : #include "dbgcnt.h"
      37                 :             : #include "diagnostic-core.h"
      38                 :             : #include "target.h"
      39                 :             : 
      40                 :             : /* These should probably move into a C++ class.  */
      41                 :             : static vec<bitmap_head> livein;
      42                 :             : static bitmap all_blocks;
      43                 :             : static bitmap livenow;
      44                 :             : static bitmap changed_pseudos;
      45                 :             : static bool modify;
      46                 :             : 
      47                 :             : /* We consider four bit groups for liveness:
      48                 :             :    bit 0..7   (least significant byte)
      49                 :             :    bit 8..15  (second least significant byte)
      50                 :             :    bit 16..31
      51                 :             :    bit 32..BITS_PER_WORD-1  */
      52                 :             : 
      53                 :             : /* For the given REG, return the number of bit groups implied by the
      54                 :             :    size of the REG's mode, up to a maximum of 4 (number of bit groups
      55                 :             :    tracked by this pass).
      56                 :             : 
      57                 :             :    For partial integer and variable sized modes also return 4.  This
      58                 :             :    could possibly be refined for something like PSI mode, but it
      59                 :             :    does not seem worth the effort.  */
      60                 :             : 
      61                 :             : static int
      62                 :   232950710 : group_limit (const_rtx reg)
      63                 :             : {
      64                 :   232950710 :   machine_mode mode = GET_MODE (reg);
      65                 :             : 
      66                 :   232950710 :   if (!GET_MODE_BITSIZE (mode).is_constant ())
      67                 :             :     return 4;
      68                 :             : 
      69                 :   232950710 :   int size = GET_MODE_SIZE (mode).to_constant ();
      70                 :             : 
      71                 :   232950710 :   size = exact_log2 (size);
      72                 :             : 
      73                 :   232862952 :   if (size < 0)
      74                 :             :     return 4;
      75                 :             : 
      76                 :   232862952 :   size++;
      77                 :   232862952 :   return (size > 4 ? 4 : size);
      78                 :             : }
      79                 :             : 
      80                 :             : /* Make all bit groups live for REGNO in bitmap BMAP.  For hard regs,
      81                 :             :    we assume all groups are live.  For a pseudo we consider the size
      82                 :             :    of the pseudo to avoid creating unnecessarily live chunks of data.  */
      83                 :             : 
      84                 :             : static void
      85                 :     4667768 : make_reg_live (bitmap bmap, int regno)
      86                 :             : {
      87                 :     4667768 :   int limit;
      88                 :             : 
      89                 :             :   /* For pseudos we can use the mode to limit how many bit groups
      90                 :             :      are marked as live since a pseudo only has one mode.  Hard
      91                 :             :      registers have to be handled more conservatively.  */
      92                 :     4667768 :   if (regno > FIRST_PSEUDO_REGISTER)
      93                 :             :     {
      94                 :      909455 :       rtx reg = regno_reg_rtx[regno];
      95                 :      909455 :       limit = group_limit (reg);
      96                 :             :     }
      97                 :             :   else
      98                 :             :     limit = 4;
      99                 :             : 
     100                 :    23006816 :   for (int i = 0; i < limit; i++)
     101                 :    18339048 :     bitmap_set_bit (bmap, regno * 4 + i);
     102                 :     4667768 : }
     103                 :             : 
     104                 :             : /* Note this pass could be used to narrow memory loads too.  It's
     105                 :             :    not clear if that's profitable or not in general.  */
     106                 :             : 
     107                 :             : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
     108                 :             : 
     109                 :             : /* If we know the destination of CODE only uses some low bits
     110                 :             :    (say just the QI bits of an SI operation), then return true
     111                 :             :    if we can propagate the need for just the subset of bits
     112                 :             :    from the destination to the sources.
     113                 :             : 
     114                 :             :    FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
     115                 :             :    operand 0.  Thus is likely would need some special casing to handle.  */
     116                 :             : 
     117                 :             : static bool
     118                 :   142678689 : safe_for_live_propagation (rtx_code code)
     119                 :             : {
     120                 :             :   /* First handle rtx classes which as a whole are known to
     121                 :             :      be either safe or unsafe.  */
     122                 :   142678689 :   switch (GET_RTX_CLASS (code))
     123                 :             :     {
     124                 :             :       case RTX_OBJ:
     125                 :             :       case RTX_CONST_OBJ:
     126                 :             :         return true;
     127                 :             : 
     128                 :             :       case RTX_COMPARE:
     129                 :             :       case RTX_COMM_COMPARE:
     130                 :             :       case RTX_TERNARY:
     131                 :             :         return false;
     132                 :             : 
     133                 :    73649162 :       default:
     134                 :    73649162 :         break;
     135                 :             :     }
     136                 :             : 
     137                 :             :   /* What's left are specific codes.  We only need to identify those
     138                 :             :      which are safe.   */
     139                 :    73649162 :   switch (code)
     140                 :             :     {
     141                 :             :     /* These are trivially safe.  */
     142                 :             :     case SUBREG:
     143                 :             :     case NOT:
     144                 :             :     case ZERO_EXTEND:
     145                 :             :     case SIGN_EXTEND:
     146                 :             :     case TRUNCATE:
     147                 :             :     case PLUS:
     148                 :             :     case MINUS:
     149                 :             :     case MULT:
     150                 :             :     case SMUL_HIGHPART:
     151                 :             :     case UMUL_HIGHPART:
     152                 :             :     case AND:
     153                 :             :     case IOR:
     154                 :             :     case XOR:
     155                 :             :       return true;
     156                 :             : 
     157                 :             :     /* We can propagate for the shifted operand, but not the shift
     158                 :             :        count.  The count is handled specially.  */
     159                 :             :     case ASHIFT:
     160                 :             :     case LSHIFTRT:
     161                 :             :     case ASHIFTRT:
     162                 :             :     case SS_ASHIFT:
     163                 :             :     case US_ASHIFT:
     164                 :             :       return true;
     165                 :             : 
     166                 :             :     /* There may be other safe codes.  If so they can be added
     167                 :             :        individually when discovered.  */
     168                 :             :     default:
     169                 :             :       return false;
     170                 :             :     }
     171                 :             : }
     172                 :             : 
     173                 :             : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
     174                 :             :    set/clobbered by OBJ contained in INSN.
     175                 :             : 
     176                 :             :    Conceptually it is always safe to ignore a particular destination
     177                 :             :    here as that will result in more chunks of data being considered
     178                 :             :    live.  That's what happens when we "continue" the main loop when
     179                 :             :    we see something we don't know how to handle such as a vector
     180                 :             :    mode destination.
     181                 :             : 
     182                 :             :    The more accurate we are in identifying what objects (and chunks
     183                 :             :    within an object) are set by INSN, the more aggressive the
     184                 :             :    optimization phase during use handling will be.  */
     185                 :             : 
     186                 :             : static bool
     187                 :   137513902 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
     188                 :             : {
     189                 :   137513902 :   bool skipped_dest = false;
     190                 :             : 
     191                 :   137513902 :   subrtx_iterator::array_type array;
     192                 :   389329448 :   FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
     193                 :             :     {
     194                 :   251815546 :       const_rtx x = *iter;
     195                 :             : 
     196                 :             :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
     197                 :   251815546 :       if (x == NULL_RTX)
     198                 :     9509248 :         continue;
     199                 :             : 
     200                 :   242306298 :       if (UNSPEC_P (x))
     201                 :      566401 :         continue;
     202                 :             : 
     203                 :   241739897 :       if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
     204                 :             :         {
     205                 :   143315165 :           unsigned bit = 0;
     206                 :   143315165 :           x = SET_DEST (x);
     207                 :             : 
     208                 :             :           /* We don't support vector destinations or destinations
     209                 :             :              wider than DImode.  */
     210                 :   143315165 :           scalar_mode outer_mode;
     211                 :   147335739 :           if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
     212                 :    91812448 :               || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     213                 :             :             {
     214                 :             :               /* Skip the subrtxs of this destination.  There is
     215                 :             :                  little value in iterating into the subobjects, so
     216                 :             :                  just skip them for a bit of efficiency.  */
     217                 :    55523291 :               skipped_dest = true;
     218                 :    55523291 :               iter.skip_subrtxes ();
     219                 :   307338837 :               continue;
     220                 :             :             }
     221                 :             : 
     222                 :             :           /* We could have (strict_low_part (subreg ...)).  We can not just
     223                 :             :              strip the STRICT_LOW_PART as that would result in clearing
     224                 :             :              some bits in LIVENOW that are still live.  So process the
     225                 :             :              STRICT_LOW_PART specially.  */
     226                 :    87791874 :           if (GET_CODE (x) == STRICT_LOW_PART)
     227                 :             :             {
     228                 :           0 :               x = XEXP (x, 0);
     229                 :             : 
     230                 :             :               /* The only valid operand of a STRICT_LOW_PART is a non
     231                 :             :                  paradoxical SUBREG.  */
     232                 :           0 :               gcc_assert (SUBREG_P (x)
     233                 :             :                           && !paradoxical_subreg_p (x)
     234                 :             :                           && SUBREG_BYTE (x).is_constant ());
     235                 :             : 
     236                 :             :               /* I think we should always see a REG here.  But let's
     237                 :             :                  be sure.  */
     238                 :           0 :               gcc_assert (REG_P (SUBREG_REG (x)));
     239                 :             : 
     240                 :             :               /* The inner mode might be larger, just punt for
     241                 :             :                  that case.  Remember, we can not just continue to process
     242                 :             :                  the inner RTXs due to the STRICT_LOW_PART.  */
     243                 :           0 :               if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     244                 :           0 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     245                 :             :                 {
     246                 :             :                   /* Skip the subrtxs of the STRICT_LOW_PART.  We can't
     247                 :             :                      process them because it'll set objects as no longer
     248                 :             :                      live when they are in fact still live.  */
     249                 :           0 :                   skipped_dest = true;
     250                 :           0 :                   iter.skip_subrtxes ();
     251                 :           0 :                   continue;
     252                 :             :                 }
     253                 :             : 
     254                 :             :               /* LIVE_TMP contains the set groups that are live-out and set in
     255                 :             :                  this insn.  It is used to narrow the groups live-in for the
     256                 :             :                  inputs of this insn.
     257                 :             : 
     258                 :             :                  The simple thing to do is mark all the groups as live, but
     259                 :             :                  that will significantly inhibit optimization.
     260                 :             : 
     261                 :             :                  We also need to be careful in the case where we have an in-out
     262                 :             :                  operand.  If we're not careful we'd clear LIVE_TMP
     263                 :             :                  incorrectly.  */
     264                 :           0 :               HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
     265                 :           0 :               int limit = group_limit (SUBREG_REG (x));
     266                 :           0 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     267                 :           0 :                 if (bitmap_bit_p (livenow, i))
     268                 :           0 :                   bitmap_set_bit (live_tmp, i);
     269                 :             : 
     270                 :           0 :               if (bitmap_empty_p (live_tmp))
     271                 :           0 :                 make_reg_live (live_tmp, rn);
     272                 :             : 
     273                 :             :               /* The mode of the SUBREG tells us how many bits we can
     274                 :             :                  clear.  */
     275                 :           0 :               machine_mode mode = GET_MODE (x);
     276                 :           0 :               HOST_WIDE_INT size
     277                 :           0 :                 = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
     278                 :           0 :               bitmap_clear_range (livenow, 4 * rn, size);
     279                 :             : 
     280                 :             :               /* We have fully processed this destination.  */
     281                 :           0 :               iter.skip_subrtxes ();
     282                 :           0 :               continue;
     283                 :           0 :             }
     284                 :             : 
     285                 :             :           /* Phase one of destination handling.  First remove any wrapper
     286                 :             :              such as SUBREG or ZERO_EXTRACT.  */
     287                 :    87791874 :           unsigned HOST_WIDE_INT mask
     288                 :    87791874 :             = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
     289                 :    87791874 :           if (SUBREG_P (x))
     290                 :             :             {
     291                 :             :               /* If we have a SUBREG destination that is too wide, just
     292                 :             :                  skip the destination rather than continuing this iterator.
     293                 :             :                  While continuing would be better, we'd need to strip the
     294                 :             :                  subreg and restart within the SET processing rather than
     295                 :             :                  the top of the loop which just complicates the flow even
     296                 :             :                  more.  */
     297                 :      664979 :               if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     298                 :      547376 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     299                 :             :                 {
     300                 :      117603 :                   skipped_dest = true;
     301                 :      117603 :                   iter.skip_subrtxes ();
     302                 :      117603 :                   continue;
     303                 :             :                 }
     304                 :             : 
     305                 :             :               /* We can safely strip a paradoxical subreg.  The inner mode will
     306                 :             :                  be narrower than the outer mode.  We'll clear fewer bits in
     307                 :             :                  LIVENOW than we'd like, but that's always safe.  */
     308                 :      430203 :               if (paradoxical_subreg_p (x))
     309                 :             :                 x = XEXP (x, 0);
     310                 :      422761 :               else if (SUBREG_BYTE (x).is_constant ())
     311                 :             :                 {
     312                 :      422761 :                   bit = subreg_lsb (x).to_constant ();
     313                 :      422761 :                   mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
     314                 :      422761 :                   gcc_assert (mask);
     315                 :             :                   x = SUBREG_REG (x);
     316                 :             :                 }
     317                 :             :               else
     318                 :             :                 gcc_unreachable ();
     319                 :             :             }
     320                 :             : 
     321                 :    87674271 :           if (GET_CODE (x) == ZERO_EXTRACT)
     322                 :             :             {
     323                 :             :               /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
     324                 :             :                  modifies the bits referenced in the ZERO_EXTRACT, the rest
     325                 :             :                  remain the same.  Thus we can not continue here, we must
     326                 :             :                  either figure out what part of the destination is modified
     327                 :             :                  or skip the sub-rtxs.  */
     328                 :        3446 :               skipped_dest = true;
     329                 :        3446 :               iter.skip_subrtxes ();
     330                 :        3446 :               continue;
     331                 :             :             }
     332                 :             : 
     333                 :             :           /* BIT >= 64 indicates something went horribly wrong.  */
     334                 :    87670825 :           gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
     335                 :             : 
     336                 :             :           /* Now handle the actual object that was changed.  */
     337                 :    87670825 :           if (REG_P (x))
     338                 :             :             {
     339                 :             :               /* LIVE_TMP contains the set groups that are live-out and set in
     340                 :             :                  this insn.  It is used to narrow the groups live-in for the
     341                 :             :                  inputs of this insn.
     342                 :             : 
     343                 :             :                  The simple thing to do is mark all the groups as live, but
     344                 :             :                  that will significantly inhibit optimization.
     345                 :             : 
     346                 :             :                  We also need to be careful in the case where we have an in-out
     347                 :             :                  operand.  If we're not careful we'd clear LIVE_TMP
     348                 :             :                  incorrectly.  */
     349                 :    73717586 :               HOST_WIDE_INT rn = REGNO (x);
     350                 :    73717586 :               int limit = group_limit (x);
     351                 :   329299357 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     352                 :   255581771 :                 if (bitmap_bit_p (livenow, i))
     353                 :   248197777 :                   bitmap_set_bit (live_tmp, i);
     354                 :             : 
     355                 :    73717586 :               if (bitmap_empty_p (live_tmp))
     356                 :     1251110 :                 make_reg_live (live_tmp, rn);
     357                 :             : 
     358                 :             :               /* Now clear the bits known written by this instruction.
     359                 :             :                  Note that BIT need not be a power of two, consider a
     360                 :             :                  ZERO_EXTRACT destination.  */
     361                 :    73717586 :               int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
     362                 :    78774945 :               int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
     363                 :    28390816 :                          : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
     364                 :     5828554 :                          : (mask & 0xff00) ? 2 : 1);
     365                 :    73717586 :               bitmap_clear_range (livenow, 4 * rn + start, end - start);
     366                 :             :             }
     367                 :             :           /* Some ports generate (clobber (const_int)).  */
     368                 :    13953239 :           else if (CONST_INT_P (x))
     369                 :           0 :             continue;
     370                 :             :           else
     371                 :    13953239 :             gcc_assert (CALL_P (insn)
     372                 :             :                         || MEM_P (x)
     373                 :             :                         || x == pc_rtx
     374                 :             :                         || GET_CODE (x) == SCRATCH);
     375                 :             : 
     376                 :    87670825 :           iter.skip_subrtxes ();
     377                 :    87670825 :         }
     378                 :    98424732 :       else if (GET_CODE (x) == COND_EXEC)
     379                 :             :         {
     380                 :             :           /* This isn't ideal, but may not be so bad in practice.  */
     381                 :           0 :           skipped_dest = true;
     382                 :           0 :           iter.skip_subrtxes ();
     383                 :             :         }
     384                 :             :     }
     385                 :   137513902 :   return skipped_dest;
     386                 :   137513902 : }
     387                 :             : 
     388                 :             : /* INSN has a sign/zero extended source inside SET that we will
     389                 :             :    try to turn into a SUBREG.  */
     390                 :             : static void
     391                 :        4796 : ext_dce_try_optimize_insn (rtx_insn *insn, rtx set)
     392                 :             : {
     393                 :        4796 :   rtx src = SET_SRC (set);
     394                 :        4796 :   rtx inner = XEXP (src, 0);
     395                 :             : 
     396                 :             :   /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
     397                 :             :      not useful for this optimization.  */
     398                 :        4796 :   if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
     399                 :             :     return;
     400                 :             : 
     401                 :        2307 :   rtx new_pattern;
     402                 :        2307 :   if (dump_file)
     403                 :             :     {
     404                 :           0 :       fprintf (dump_file, "Processing insn:\n");
     405                 :           0 :       dump_insn_slim (dump_file, insn);
     406                 :           0 :       fprintf (dump_file, "Trying to simplify pattern:\n");
     407                 :           0 :       print_rtl_single (dump_file, SET_SRC (set));
     408                 :             :     }
     409                 :             : 
     410                 :             :   /* We decided to turn do the optimization but allow it to be rejected for
     411                 :             :      bisection purposes.  */
     412                 :        2307 :   if (!dbg_cnt (::ext_dce))
     413                 :             :     {
     414                 :           0 :       if (dump_file)
     415                 :           0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     416                 :           0 :       return;
     417                 :             :     }
     418                 :             : 
     419                 :        4614 :   new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
     420                 :        2307 :                                      GET_MODE (inner), 0);
     421                 :             :   /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
     422                 :             :      We must not pass that as a replacement pattern to validate_change.  */
     423                 :        2307 :   if (new_pattern)
     424                 :             :     {
     425                 :        2307 :       int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
     426                 :             : 
     427                 :        2307 :       rtx x = SET_DEST (set);
     428                 :        2307 :       while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     429                 :           0 :         x = XEXP (x, 0);
     430                 :             : 
     431                 :        2307 :       gcc_assert (REG_P (x));
     432                 :        2307 :       if (ok)
     433                 :        2307 :         bitmap_set_bit (changed_pseudos, REGNO (x));
     434                 :             : 
     435                 :        2307 :       if (dump_file)
     436                 :             :         {
     437                 :           0 :           if (ok)
     438                 :           0 :             fprintf (dump_file, "Successfully transformed to:\n");
     439                 :             :           else
     440                 :           0 :             fprintf (dump_file, "Failed transformation to:\n");
     441                 :             : 
     442                 :           0 :           print_rtl_single (dump_file, new_pattern);
     443                 :           0 :           fprintf (dump_file, "\n");
     444                 :             :         }
     445                 :             :     }
     446                 :             :   else
     447                 :             :     {
     448                 :           0 :       if (dump_file)
     449                 :           0 :         fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
     450                 :             :     }
     451                 :             : }
     452                 :             : 
     453                 :             : /* Some operators imply that their second operand is fully live,
     454                 :             :    regardless of how many bits in the output are live.  An example
     455                 :             :    would be the shift count on a target without SHIFT_COUNT_TRUNCATED
     456                 :             :    defined.
     457                 :             : 
     458                 :             :    Return TRUE if CODE is such an operator.  FALSE otherwise.  */
     459                 :             : 
     460                 :             : static bool
     461                 :    77334397 : binop_implies_op2_fully_live (rtx_code code)
     462                 :             : {
     463                 :           0 :   switch (code)
     464                 :             :     {
     465                 :             :     case ASHIFT:
     466                 :             :     case LSHIFTRT:
     467                 :             :     case ASHIFTRT:
     468                 :             :     case ROTATE:
     469                 :             :     case ROTATERT:
     470                 :             :     case SS_ASHIFT:
     471                 :             :     case US_ASHIFT:
     472                 :             :       return !SHIFT_COUNT_TRUNCATED;
     473                 :             : 
     474                 :           0 :     default:
     475                 :           0 :       return false;
     476                 :             :     }
     477                 :             : }
     478                 :             : 
     479                 :             : /* X, with code CODE, is an operation for which safe_for_live_propagation
     480                 :             :    holds true, and bits set in MASK are live in the result.  Compute a
     481                 :             :    mask of (potentially) live bits in the non-constant inputs.  In case of
     482                 :             :    binop_implies_op2_fully_live (e.g. shifts), the computed mask may
     483                 :             :    exclusively pertain to the first operand.
     484                 :             : 
     485                 :             :    This looks wrong as we may have some important operations embedded as
     486                 :             :    operands of another operation.  For example, we might have an extension
     487                 :             :    wrapping a shift.  It really feels like this needs to be recursing down
     488                 :             :    into operands much more often.  */
     489                 :             : 
     490                 :             : unsigned HOST_WIDE_INT
     491                 :    72080781 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
     492                 :             : {
     493                 :    73884163 :   if (mask == 0)
     494                 :             :     return 0;
     495                 :             : 
     496                 :    73884137 :   enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
     497                 :    73884137 :   unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
     498                 :             : 
     499                 :             :   /* While we don't try to optimize operations on types larger
     500                 :             :      than 64 bits, we do want to make sure not to invoke undefined
     501                 :             :      behavior when presented with such operations during use
     502                 :             :      processing.  The safe thing to do is to just return mmask
     503                 :             :      for that scenario indicating every possible chunk is life.  */
     504                 :    73884137 :   scalar_int_mode smode;
     505                 :    73884137 :   if (!is_a <scalar_int_mode> (mode, &smode)
     506                 :    61452239 :       || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
     507                 :             :     return mmask;
     508                 :             : 
     509                 :    59340759 :   switch (code)
     510                 :             :     {
     511                 :    16372900 :     case PLUS:
     512                 :    16372900 :     case MINUS:
     513                 :    16372900 :     case MULT:
     514                 :    16372900 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     515                 :             : 
     516                 :             :     /* We propagate for the shifted operand, but not the shift
     517                 :             :        count.  The count is handled specially.  */
     518                 :     1367382 :     case ASHIFT:
     519                 :     1367382 :       if (CONST_INT_P (XEXP (x, 1))
     520                 :     2660970 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     521                 :     1293552 :         return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
     522                 :       73830 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     523                 :             : 
     524                 :             :     /* We propagate for the shifted operand, but not the shift
     525                 :             :        count.  The count is handled specially.  */
     526                 :      728757 :     case LSHIFTRT:
     527                 :      728757 :       if (CONST_INT_P (XEXP (x, 1))
     528                 :     1423900 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     529                 :      695115 :         return mmask & (mask << INTVAL (XEXP (x, 1)));
     530                 :             :       return mmask;
     531                 :             : 
     532                 :             :     /* We propagate for the shifted operand, but not the shift
     533                 :             :        count.  The count is handled specially.  */
     534                 :      318381 :     case ASHIFTRT:
     535                 :      318381 :       if (CONST_INT_P (XEXP (x, 1))
     536                 :      624924 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     537                 :             :         {
     538                 :      306535 :           HOST_WIDE_INT sign = 0;
     539                 :      306535 :           if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
     540                 :      306535 :               > GET_MODE_BITSIZE (smode))
     541                 :      613070 :             sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
     542                 :      306535 :           return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
     543                 :             :         }
     544                 :             :       return mmask;
     545                 :             : 
     546                 :       61045 :     case SMUL_HIGHPART:
     547                 :       61045 :     case UMUL_HIGHPART:
     548                 :       61045 :       if (XEXP (x, 1) == const0_rtx)
     549                 :             :         return 0;
     550                 :       61045 :       if (XEXP (x, 1) == const1_rtx)
     551                 :             :         return mmask;
     552                 :       61045 :       if (CONST_INT_P (XEXP (x, 1)))
     553                 :             :         {
     554                 :           0 :           if (pow2p_hwi (INTVAL (XEXP (x, 1))))
     555                 :           0 :             return mmask & (mask << (GET_MODE_BITSIZE (smode)
     556                 :           0 :                                      - exact_log2 (INTVAL (XEXP (x, 1)))));
     557                 :             : 
     558                 :           0 :           int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
     559                 :           0 :                       - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
     560                 :           0 :           if (bits < GET_MODE_BITSIZE (smode))
     561                 :           0 :             return (HOST_WIDE_INT_1U << bits) - 1;
     562                 :             :         }
     563                 :             :       return mmask;
     564                 :             : 
     565                 :      677280 :     case SIGN_EXTEND:
     566                 :      677280 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     567                 :      677280 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     568                 :             :         return -1;
     569                 :             : 
     570                 :             :       /* We want the mode of the inner object.  We need to ensure its
     571                 :             :          sign bit is on in MASK.  */
     572                 :      677280 :       mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
     573                 :      677280 :       if (mask & ~GET_MODE_MASK (mode))
     574                 :      676823 :         mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
     575                 :      676823 :                                      - 1);
     576                 :             : 
     577                 :             :       /* Recurse into the operand.  */
     578                 :      677280 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
     579                 :             : 
     580                 :     1126102 :     case ZERO_EXTEND:
     581                 :     1126102 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     582                 :     1126102 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     583                 :             :         return -1;
     584                 :             : 
     585                 :             :       /* Recurse into the operand.  */
     586                 :     1126102 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
     587                 :             : 
     588                 :             :     /* We propagate for the shifted operand, but not the shift
     589                 :             :        count.  The count is handled specially.  */
     590                 :           0 :     case SS_ASHIFT:
     591                 :           0 :     case US_ASHIFT:
     592                 :           0 :       if (CONST_INT_P (XEXP (x, 1))
     593                 :           0 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     594                 :             :         {
     595                 :           0 :           return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
     596                 :           0 :                              >> (INTVAL (XEXP (x, 1))
     597                 :           0 :                                  + (XEXP (x, 1) != const0_rtx
     598                 :           0 :                                     && code == SS_ASHIFT))))
     599                 :           0 :                   | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
     600                 :             :         }
     601                 :             :       return mmask;
     602                 :             : 
     603                 :             :     default:
     604                 :             :       return mask;
     605                 :             :     }
     606                 :             : }
     607                 :             : 
     608                 :             : /* Process uses in INSN contained in OBJ.  Set appropriate bits in LIVENOW
     609                 :             :    for any chunks of pseudos that become live, potentially filtering using
     610                 :             :    bits from LIVE_TMP.
     611                 :             : 
     612                 :             :    If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
     613                 :             :    the extended bits are never read and mark pseudos which had extensions
     614                 :             :    eliminated in CHANGED_PSEUDOS.  */
     615                 :             : 
     616                 :             : static void
     617                 :   137513902 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
     618                 :             :                       bitmap live_tmp, bool skipped_dest)
     619                 :             : {
     620                 :   137513902 :   subrtx_var_iterator::array_type array_var;
     621                 :   745237773 :   FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
     622                 :             :     {
     623                 :             :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
     624                 :   607723871 :       rtx x = *iter;
     625                 :   607723871 :       if (x == NULL_RTX)
     626                 :     9509248 :         continue;
     627                 :             : 
     628                 :             :       /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
     629                 :             :          handle SETs explicitly, possibly propagating live information
     630                 :             :          into the uses.
     631                 :             : 
     632                 :             :          We may continue the loop at various points which will cause
     633                 :             :          iteration into the next level of RTL.  Breaking from the loop
     634                 :             :          is never safe as it can lead us to fail to process some of the
     635                 :             :          RTL and thus not make objects live when necessary.  */
     636                 :   598214623 :       enum rtx_code xcode = GET_CODE (x);
     637                 :   598214623 :       if (xcode == SET)
     638                 :             :         {
     639                 :   121607594 :           const_rtx dst = SET_DEST (x);
     640                 :   121607594 :           rtx src = SET_SRC (x);
     641                 :   121607594 :           const_rtx y;
     642                 :   121607594 :           unsigned HOST_WIDE_INT bit = 0;
     643                 :             : 
     644                 :             :           /* The code of the RHS of a SET.  */
     645                 :   121607594 :           enum rtx_code code = GET_CODE (src);
     646                 :             : 
     647                 :             :           /* ?!? How much of this should mirror SET handling, potentially
     648                 :             :              being shared?   */
     649                 :   121607594 :           if (SUBREG_P (dst) && SUBREG_BYTE (dst).is_constant ())
     650                 :             :             {
     651                 :      590972 :               bit = subreg_lsb (dst).to_constant ();
     652                 :      590972 :               if (bit >= HOST_BITS_PER_WIDE_INT)
     653                 :             :                 bit = HOST_BITS_PER_WIDE_INT - 1;
     654                 :      590972 :               dst = SUBREG_REG (dst);
     655                 :             :             }
     656                 :   121016622 :           else if (GET_CODE (dst) == STRICT_LOW_PART)
     657                 :       10222 :             dst = XEXP (dst, 0);
     658                 :             : 
     659                 :             :           /* Main processing of the uses.  Two major goals here.
     660                 :             : 
     661                 :             :              First, we want to try and propagate liveness (or the lack
     662                 :             :              thereof) from the destination register to the source
     663                 :             :              register(s).
     664                 :             : 
     665                 :             :              Second, if the source is an extension, try to optimize
     666                 :             :              it into a SUBREG.  The SUBREG form indicates we don't
     667                 :             :              care about the upper bits and will usually be copy
     668                 :             :              propagated away.
     669                 :             : 
     670                 :             :              If we fail to handle something in here, the expectation
     671                 :             :              is the iterator will dive into the sub-components and
     672                 :             :              mark all the chunks in any found REGs as live.  */
     673                 :   121607594 :           if (REG_P (dst) && safe_for_live_propagation (code))
     674                 :             :             {
     675                 :             :               /* Create a mask representing the bits of this output
     676                 :             :                  operand that are live after this insn.  We can use
     677                 :             :                  this information to refine the live in state of
     678                 :             :                  inputs to this insn in many cases.
     679                 :             : 
     680                 :             :                  We have to do this on a per SET basis, we might have
     681                 :             :                  an INSN with multiple SETS, some of which can narrow
     682                 :             :                  the source operand liveness, some of which may not.  */
     683                 :    72080781 :               unsigned HOST_WIDE_INT dst_mask = 0;
     684                 :    72080781 :               HOST_WIDE_INT rn = REGNO (dst);
     685                 :    72080781 :               unsigned HOST_WIDE_INT mask_array[]
     686                 :             :                 = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
     687                 :             :                     -HOST_WIDE_INT_UC (0x100000000) };
     688                 :   360403905 :               for (int i = 0; i < 4; i++)
     689                 :   288323124 :                 if (bitmap_bit_p (live_tmp, 4 * rn + i))
     690                 :   232264904 :                   dst_mask |= mask_array[i];
     691                 :    72080781 :               dst_mask >>= bit;
     692                 :             : 
     693                 :             :               /* If we ignored a destination during set processing, then
     694                 :             :                  consider all the bits live.  */
     695                 :    72080781 :               if (skipped_dest)
     696                 :    25011336 :                 dst_mask = -1;
     697                 :             : 
     698                 :    72080781 :               dst_mask = carry_backpropagate (dst_mask, code, src);
     699                 :             : 
     700                 :             :               /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
     701                 :             :                  of the source specially to improve optimization.  */
     702                 :    72080781 :               if (code == SIGN_EXTEND || code == ZERO_EXTEND)
     703                 :             :                 {
     704                 :     1816041 :                   rtx inner = XEXP (src, 0);
     705                 :     1816041 :                   unsigned HOST_WIDE_INT src_mask
     706                 :     1816041 :                     = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
     707                 :             : 
     708                 :             :                   /* DST_MASK could be zero if we had something in the SET
     709                 :             :                      that we couldn't handle.  */
     710                 :     1816041 :                   if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
     711                 :        4796 :                     ext_dce_try_optimize_insn (insn, x);
     712                 :             : 
     713                 :             :                   /* Stripping the extension here just seems wrong on multiple
     714                 :             :                      levels.  It's source side handling, so it seems like it
     715                 :             :                      belongs in the loop below.  Stripping here also makes it
     716                 :             :                      harder than necessary to properly handle live bit groups
     717                 :             :                      for (ANY_EXTEND (SUBREG)) where the SUBREG has
     718                 :             :                      SUBREG_PROMOTED state.  */
     719                 :     1816041 :                   dst_mask &= src_mask;
     720                 :     1816041 :                   src = XEXP (src, 0);
     721                 :     1816041 :                   code = GET_CODE (src);
     722                 :             :                 }
     723                 :             : 
     724                 :             :               /* Optimization is done at this point.  We just want to make
     725                 :             :                  sure everything that should get marked as live is marked
     726                 :             :                  from here onward.  */
     727                 :             : 
     728                 :             :               /* We will handle the other operand of a binary operator
     729                 :             :                  at the bottom of the loop by resetting Y.  */
     730                 :    72080781 :               if (BINARY_P (src))
     731                 :    22612423 :                 y = XEXP (src, 0);
     732                 :             :               else
     733                 :             :                 y = src;
     734                 :             : 
     735                 :             :               /* We're inside a SET and want to process the source operands
     736                 :             :                  making things live.  Breaking from this loop will cause
     737                 :             :                  the iterator to work on sub-rtxs, so it is safe to break
     738                 :             :                  if we see something we don't know how to handle.
     739                 :             : 
     740                 :             :                  This code is just hokey as it really just handles trivial
     741                 :             :                  unary and binary cases.  Otherwise the loop exits and we
     742                 :             :                  continue iterating on sub-rtxs, but outside the set context.  */
     743                 :             :               unsigned HOST_WIDE_INT save_mask = dst_mask;
     744                 :   116144977 :               for (;;)
     745                 :             :                 {
     746                 :             :                   /* In general we want to restore DST_MASK before each loop
     747                 :             :                      iteration.  The exception is when the opcode implies that
     748                 :             :                      the other operand is fully live.  That's handled by
     749                 :             :                      changing SAVE_MASK below.  */
     750                 :    94112879 :                   dst_mask = save_mask;
     751                 :             :                   /* Strip an outer paradoxical subreg.  The bits outside
     752                 :             :                      the inner mode are don't cares.  So we can just strip
     753                 :             :                      and process the inner object.  */
     754                 :    94112879 :                   if (paradoxical_subreg_p (y))
     755                 :             :                     y = XEXP (y, 0);
     756                 :    94028452 :                   else if (SUBREG_P (y) && SUBREG_BYTE (y).is_constant ())
     757                 :             :                     {
     758                 :             :                       /* We really want to know the outer code here, ie do we
     759                 :             :                          have (ANY_EXTEND (SUBREG ...)) as we need to know if
     760                 :             :                          the extension matches the SUBREG_PROMOTED state.  In
     761                 :             :                          that case optimizers can turn the extension into a
     762                 :             :                          simple copy.  Which means that bits outside the
     763                 :             :                          SUBREG's mode are actually live.
     764                 :             : 
     765                 :             :                          We don't want to mark those bits live unnecessarily
     766                 :             :                          as that inhibits extension elimination in important
     767                 :             :                          cases such as those in Coremark.  So we need that
     768                 :             :                          outer code.
     769                 :             : 
     770                 :             :                          But if !TRULY_NOOP_TRUNCATION_MODES_P, the mode
     771                 :             :                          change performed by Y would normally need to be a
     772                 :             :                          TRUNCATE rather than a SUBREG.  It is probably the
     773                 :             :                          guarantee provided by SUBREG_PROMOTED_VAR_P that
     774                 :             :                          allows the SUBREG in Y as an exception.  We must
     775                 :             :                          therefore preserve that guarantee and treat the
     776                 :             :                          upper bits of the inner register as live
     777                 :             :                          regardless of the outer code.  See PR 120050.  */
     778                 :     1979411 :                       if (!REG_P (SUBREG_REG (y))
     779                 :     1979411 :                           || (SUBREG_PROMOTED_VAR_P (y)
     780                 :       11204 :                               && ((GET_CODE (SET_SRC (x)) == SIGN_EXTEND
     781                 :        1159 :                                    && SUBREG_PROMOTED_SIGNED_P (y))
     782                 :       11204 :                                   || (GET_CODE (SET_SRC (x)) == ZERO_EXTEND
     783                 :           0 :                                       && SUBREG_PROMOTED_UNSIGNED_P (y))
     784                 :       12150 :                                   || !TRULY_NOOP_TRUNCATION_MODES_P (
     785                 :             :                                         GET_MODE (y),
     786                 :             :                                         GET_MODE (SUBREG_REG (y))))))
     787                 :             :                         break;
     788                 :             : 
     789                 :     1978465 :                       bit = subreg_lsb (y).to_constant ();
     790                 :             : 
     791                 :             :                       /* If this is a wide object (more bits than we can fit
     792                 :             :                          in a HOST_WIDE_INT), then just break from the SET
     793                 :             :                          context.   That will cause the iterator to walk down
     794                 :             :                          into the subrtx and if we land on a REG we'll mark
     795                 :             :                          the whole think live.  */
     796                 :     1978465 :                       if (bit >= HOST_BITS_PER_WIDE_INT)
     797                 :             :                         break;
     798                 :             : 
     799                 :             :                       /* The SUBREG's mode determines the live width.  */
     800                 :     1738231 :                       if (dst_mask)
     801                 :             :                         {
     802                 :     1738231 :                           dst_mask <<= bit;
     803                 :     1738231 :                           if (!dst_mask)
     804                 :           0 :                             dst_mask = -HOST_WIDE_INT_UC (0x100000000);
     805                 :             :                         }
     806                 :     1738231 :                       y = SUBREG_REG (y);
     807                 :             :                     }
     808                 :             : 
     809                 :    93871699 :                   if (REG_P (y))
     810                 :             :                     {
     811                 :             :                       /* We have found the use of a register.  We need to mark
     812                 :             :                          the appropriate chunks of the register live.  The mode
     813                 :             :                          of the REG is a starting point.  We may refine that
     814                 :             :                          based on what chunks in the output were live.  */
     815                 :    49855752 :                       rn = 4 * REGNO (y);
     816                 :    49855752 :                       unsigned HOST_WIDE_INT tmp_mask = dst_mask;
     817                 :             : 
     818                 :             :                       /* If the RTX code for the SET_SRC is not one we can
     819                 :             :                          propagate destination liveness through, then just
     820                 :             :                          set the mask to the mode's mask.  */
     821                 :    49855752 :                       if (!safe_for_live_propagation (code))
     822                 :       29708 :                         tmp_mask
     823                 :       59416 :                           = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
     824                 :             : 
     825                 :    49855752 :                       if (tmp_mask & 0xff)
     826                 :    49373349 :                         bitmap_set_bit (livenow, rn);
     827                 :    49855752 :                       if (tmp_mask & 0xff00)
     828                 :    47883899 :                         bitmap_set_bit (livenow, rn + 1);
     829                 :    49855752 :                       if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
     830                 :    47641759 :                         bitmap_set_bit (livenow, rn + 2);
     831                 :    49855752 :                       if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
     832                 :    41221206 :                         bitmap_set_bit (livenow, rn + 3);
     833                 :             :                     }
     834                 :    44015947 :                   else if (!CONSTANT_P (y))
     835                 :             :                     break;
     836                 :             : 
     837                 :             :                   /* We might have (ashift (const_int 1) (reg...))
     838                 :             :                      By setting dst_mask we can continue iterating on the
     839                 :             :                      the next operand and it will be considered fully live.
     840                 :             : 
     841                 :             :                      Note that since we restore DST_MASK from SAVE_MASK at the
     842                 :             :                      top of the loop, we have to change SAVE_MASK to get the
     843                 :             :                      semantics we want.  */
     844                 :    77334397 :                   if (binop_implies_op2_fully_live (GET_CODE (src)))
     845                 :     2521407 :                     save_mask = -1;
     846                 :             : 
     847                 :             :                   /* If this was anything but a binary operand, break the inner
     848                 :             :                      loop.  This is conservatively correct as it will cause the
     849                 :             :                      iterator to look at the sub-rtxs outside the SET context.  */
     850                 :    77334397 :                   if (!BINARY_P (src))
     851                 :             :                     break;
     852                 :             : 
     853                 :             :                   /* We processed the first operand of a binary operator.  Now
     854                 :             :                      handle the second.  */
     855                 :    22032098 :                   y = XEXP (src, 1), src = pc_rtx;
     856                 :    22032098 :                 }
     857                 :             : 
     858                 :             :               /* These are leaf nodes, no need to iterate down into them.  */
     859                 :    72080781 :               if (REG_P (y) || CONSTANT_P (y))
     860                 :    55302299 :                 iter.skip_subrtxes ();
     861                 :             :             }
     862                 :             :         }
     863                 :             :       /* If we are reading the low part of a SUBREG, then we can
     864                 :             :          refine liveness of the input register, otherwise let the
     865                 :             :          iterator continue into SUBREG_REG.  */
     866                 :   476607029 :       else if (SUBREG_P (x)
     867                 :     1334384 :                && REG_P (SUBREG_REG (x))
     868                 :     1332627 :                && !paradoxical_subreg_p (x)
     869                 :     1310995 :                && subreg_lowpart_p (x)
     870                 :      997655 :                && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     871                 :   478602339 :                && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
     872                 :             :         {
     873                 :      513638 :           HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
     874                 :      513638 :           HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
     875                 :             : 
     876                 :             :           /* If this is a promoted subreg, then more of it may be live than
     877                 :             :              is otherwise obvious.  */
     878                 :      513638 :           if (SUBREG_PROMOTED_VAR_P (x))
     879                 :        3978 :             size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
     880                 :             : 
     881                 :      513638 :           bitmap_set_bit (livenow, rn);
     882                 :      513638 :           if (size > 8)
     883                 :      335438 :             bitmap_set_bit (livenow, rn + 1);
     884                 :      335438 :           if (size > 16)
     885                 :      293973 :             bitmap_set_bit (livenow, rn + 2);
     886                 :      293973 :           if (size >= 32)
     887                 :      293973 :             bitmap_set_bit (livenow, rn + 3);
     888                 :      513638 :           iter.skip_subrtxes ();
     889                 :             :         }
     890                 :             :       /* If we have a register reference that is not otherwise handled,
     891                 :             :          just assume all the chunks are live.  */
     892                 :   476093391 :       else if (REG_P (x))
     893                 :   158323669 :         bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
     894                 :             :     }
     895                 :   137513902 : }
     896                 :             : 
     897                 :             : /* Process a single basic block BB with current liveness information
     898                 :             :    in LIVENOW, returning updated liveness information.
     899                 :             : 
     900                 :             :    If MODIFY is true, then this is the last pass and unnecessary
     901                 :             :    extensions should be eliminated when possible.  If an extension
     902                 :             :    is removed, the source pseudo is marked in CHANGED_PSEUDOS.  */
     903                 :             : 
     904                 :             : static void
     905                 :    22785259 : ext_dce_process_bb (basic_block bb)
     906                 :             : {
     907                 :    22785259 :   rtx_insn *insn;
     908                 :             : 
     909                 :   300891510 :   FOR_BB_INSNS_REVERSE (bb, insn)
     910                 :             :     {
     911                 :   428207848 :       if (!NONDEBUG_INSN_P (insn))
     912                 :   150101597 :         continue;
     913                 :             : 
     914                 :             :       /* Live-out state of the destination of this insn.  We can
     915                 :             :          use this to refine the live-in state of the sources of
     916                 :             :          this insn in many cases.  */
     917                 :   128004654 :       bitmap live_tmp = BITMAP_ALLOC (NULL);
     918                 :             : 
     919                 :             :       /* First process any sets/clobbers in INSN.  */
     920                 :   128004654 :       bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
     921                 :             : 
     922                 :             :       /* CALL_INSNs need processing their fusage data.  */
     923                 :   128004654 :       if (CALL_P (insn))
     924                 :     9509248 :         skipped_dest |= ext_dce_process_sets (insn,
     925                 :             :                                               CALL_INSN_FUNCTION_USAGE (insn),
     926                 :             :                                               live_tmp);
     927                 :             : 
     928                 :             :       /* And now uses, optimizing away SIGN/ZERO extensions as we go.  */
     929                 :   128004654 :       ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
     930                 :             : 
     931                 :             :       /* A nonlocal goto implicitly uses the frame pointer.  */
     932                 :   128004654 :       if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
     933                 :             :         {
     934                 :        1130 :           bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
     935                 :        1130 :           if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
     936                 :        1130 :             bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
     937                 :             :         }
     938                 :             : 
     939                 :             :       /* And process fusage data for the use as well.  */
     940                 :   128004654 :       if (CALL_P (insn))
     941                 :             :         {
     942                 :     9509248 :           if (!FAKE_CALL_P (insn))
     943                 :     9509188 :             bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
     944                 :             : 
     945                 :             :           /* If this is not a call to a const fucntion, then assume it
     946                 :             :              can read any global register.  */
     947                 :     9509248 :           if (!RTL_CONST_CALL_P (insn))
     948                 :   853864248 :             for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     949                 :   844682912 :               if (global_regs[i])
     950                 :         230 :                 bitmap_set_range (livenow, i * 4, 4);
     951                 :             : 
     952                 :     9509248 :           ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
     953                 :             :         }
     954                 :             : 
     955                 :   128004654 :       BITMAP_FREE (live_tmp);
     956                 :             :     }
     957                 :    22785259 : }
     958                 :             : 
     959                 :             : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
     960                 :             :    is usually helpful.  However, in some cases setting the value when
     961                 :             :    it not strictly needed can cause this pass to miss optimizations.
     962                 :             : 
     963                 :             :    Specifically consider (set (mem) (subreg (reg))).  If set in that
     964                 :             :    case it will cause more bit groups to be live for REG than would
     965                 :             :    be strictly necessary which in turn can inhibit extension removal.
     966                 :             : 
     967                 :             :    So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
     968                 :             :    is obviously not needed.  */
     969                 :             : 
     970                 :             : static void
     971                 :      947921 : maybe_clear_subreg_promoted_p (void)
     972                 :             : {
     973                 :   118830534 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
     974                 :             :     {
     975                 :   117882613 :       if (!NONDEBUG_INSN_P (insn))
     976                 :    63092171 :         continue;
     977                 :             : 
     978                 :    54790442 :       rtx set = single_set (insn);
     979                 :    54790442 :       if (!set)
     980                 :     3638247 :         continue;
     981                 :             : 
     982                 :             :       /* There may be other cases where we should clear, but for
     983                 :             :          now, this is the only known case where it causes problems.  */
     984                 :    51152195 :       if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
     985                 :       70729 :         && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
     986                 :       61859 :         SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
     987                 :             :     }
     988                 :      947921 : }
     989                 :             : 
     990                 :             : 
     991                 :             : /* We optimize away sign/zero extensions in this pass and replace
     992                 :             :    them with SUBREGs indicating certain bits are don't cares.
     993                 :             : 
     994                 :             :    This changes the SUBREG_PROMOTED_VAR_P state of the object.
     995                 :             :    It is fairly painful to fix this on the fly, so we have
     996                 :             :    recorded which pseudos are affected and we look for SUBREGs
     997                 :             :    of those pseudos and fix them up.  */
     998                 :             : 
     999                 :             : static void
    1000                 :      947921 : reset_subreg_promoted_p (void)
    1001                 :             : {
    1002                 :             :   /* If we removed an extension, that changed the promoted state
    1003                 :             :      of the destination of that extension.  Thus we need to go
    1004                 :             :      find any SUBREGs that reference that pseudo and adjust their
    1005                 :             :      SUBREG_PROMOTED_P state.  */
    1006                 :   118830534 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1007                 :             :     {
    1008                 :   117882613 :       if (!NONDEBUG_INSN_P (insn))
    1009                 :    63092171 :         continue;
    1010                 :             : 
    1011                 :    54790442 :       rtx pat = PATTERN (insn);
    1012                 :    54790442 :       subrtx_var_iterator::array_type array;
    1013                 :   349185454 :       FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
    1014                 :             :         {
    1015                 :   294395012 :           rtx sub = *iter;
    1016                 :             : 
    1017                 :             :           /* We only care about SUBREGs.  */
    1018                 :   294395012 :           if (GET_CODE (sub) != SUBREG)
    1019                 :   292853152 :             continue;
    1020                 :             : 
    1021                 :     1541860 :           const_rtx x = SUBREG_REG (sub);
    1022                 :             : 
    1023                 :             :           /* We only care if the inner object is a REG.  */
    1024                 :     1541860 :           if (!REG_P (x))
    1025                 :         732 :             continue;
    1026                 :             : 
    1027                 :             :           /* And only if the SUBREG is a promoted var.  */
    1028                 :     1541128 :           if (!SUBREG_PROMOTED_VAR_P (sub))
    1029                 :     1535981 :             continue;
    1030                 :             : 
    1031                 :        5147 :           if (bitmap_bit_p (changed_pseudos, REGNO (x)))
    1032                 :           0 :             SUBREG_PROMOTED_VAR_P (sub) = 0;
    1033                 :             :         }
    1034                 :    54790442 :     }
    1035                 :      947921 : }
    1036                 :             : 
    1037                 :             : /* Initialization of the ext-dce pass.  Primarily this means
    1038                 :             :    setting up the various bitmaps we utilize.  */
    1039                 :             : 
    1040                 :             : static void
    1041                 :      947921 : ext_dce_init (void)
    1042                 :             : {
    1043                 :      947921 :   livein.create (last_basic_block_for_fn (cfun));
    1044                 :      947921 :   livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
    1045                 :    12593493 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1046                 :    11645572 :     bitmap_initialize (&livein[i], &bitmap_default_obstack);
    1047                 :             : 
    1048                 :      947921 :   auto_bitmap refs (&bitmap_default_obstack);
    1049                 :      947921 :   df_get_exit_block_use_set (refs);
    1050                 :             : 
    1051                 :      947921 :   unsigned i;
    1052                 :      947921 :   bitmap_iterator bi;
    1053                 :     4364579 :   EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
    1054                 :     3416658 :     make_reg_live (&livein[EXIT_BLOCK], i);
    1055                 :             : 
    1056                 :      947921 :   livenow = BITMAP_ALLOC (NULL);
    1057                 :      947921 :   all_blocks = BITMAP_ALLOC (NULL);
    1058                 :      947921 :   changed_pseudos = BITMAP_ALLOC (NULL);
    1059                 :             : 
    1060                 :    12593493 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1061                 :    11645572 :     if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
    1062                 :     9749730 :       bitmap_set_bit (all_blocks, i);
    1063                 :             : 
    1064                 :      947921 :   modify = false;
    1065                 :      947921 : }
    1066                 :             : 
    1067                 :             : /* Finalization of the ext-dce pass.  Primarily this means
    1068                 :             :    releasing up the various bitmaps we utilize.  */
    1069                 :             : 
    1070                 :             : static void
    1071                 :      947921 : ext_dce_finish (void)
    1072                 :             : {
    1073                 :    12593493 :   for (unsigned i = 0; i < livein.length (); i++)
    1074                 :    11645572 :     bitmap_clear (&livein[i]);
    1075                 :      947921 :   livein.release ();
    1076                 :             : 
    1077                 :      947921 :   BITMAP_FREE (livenow);
    1078                 :      947921 :   BITMAP_FREE (changed_pseudos);
    1079                 :      947921 :   BITMAP_FREE (all_blocks);
    1080                 :      947921 : }
    1081                 :             : 
    1082                 :             : /* Process block number BB_INDEX as part of the backward
    1083                 :             :    simple dataflow analysis.  Return TRUE if something in
    1084                 :             :    this block changed or FALSE otherwise.  */
    1085                 :             : 
    1086                 :             : static bool
    1087                 :    26576943 : ext_dce_rd_transfer_n (int bb_index)
    1088                 :             : {
    1089                 :             :   /* The ENTRY/EXIT blocks never change.  */
    1090                 :    26576943 :   if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
    1091                 :             :     return false;
    1092                 :             : 
    1093                 :    22785259 :   basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
    1094                 :             : 
    1095                 :             :   /* Make everything live that's live in the successors.  */
    1096                 :    22785259 :   bitmap_clear (livenow);
    1097                 :    22785259 :   edge_iterator ei;
    1098                 :    22785259 :   edge e;
    1099                 :             : 
    1100                 :    57332212 :   FOR_EACH_EDGE (e, ei, bb->succs)
    1101                 :    34546953 :     bitmap_ior_into (livenow, &livein[e->dest->index]);
    1102                 :             : 
    1103                 :    22785259 :   ext_dce_process_bb (bb);
    1104                 :             : 
    1105                 :             :   /* We only allow widening the set of objects live at the start
    1106                 :             :      of a block.  Otherwise we run the risk of not converging.  */
    1107                 :    22785259 :   return bitmap_ior_into (&livein[bb_index], livenow);
    1108                 :             : }
    1109                 :             : 
    1110                 :             : /* Dummy function for the df_simple_dataflow API.  */
    1111                 :    33164354 : static bool ext_dce_rd_confluence_n (edge) { return true; }
    1112                 :             : 
    1113                 :             : /* Use lifetime analyis to identify extensions that set bits that
    1114                 :             :    are never read.  Turn such extensions into SUBREGs instead which
    1115                 :             :    can often be propagated away.  */
    1116                 :             : 
    1117                 :             : void
    1118                 :      947921 : ext_dce_execute (void)
    1119                 :             : {
    1120                 :             :   /* Limit the amount of memory we use for livein, with 4 bits per
    1121                 :             :      reg per basic-block including overhead that maps to one byte
    1122                 :             :      per reg per basic-block.  */
    1123                 :      947921 :   uint64_t memory_request
    1124                 :      947921 :     = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
    1125                 :      947921 :   if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
    1126                 :             :     {
    1127                 :           0 :       warning (OPT_Wdisabled_optimization,
    1128                 :             :                "ext-dce disabled: %d basic blocks and %d registers; "
    1129                 :             :                "increase %<--param max-gcse-memory%> above %wu",
    1130                 :           0 :                n_basic_blocks_for_fn (cfun), max_reg_num (),
    1131                 :             :                memory_request / 1024);
    1132                 :           0 :       return;
    1133                 :             :     }
    1134                 :             : 
    1135                 :             :   /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
    1136                 :             :      to this pass.  Clear it for those cases.  */
    1137                 :      947921 :   maybe_clear_subreg_promoted_p ();
    1138                 :      947921 :   df_analyze ();
    1139                 :      947921 :   ext_dce_init ();
    1140                 :             : 
    1141                 :     3791684 :   do
    1142                 :             :     {
    1143                 :     1895842 :       df_simple_dataflow (DF_BACKWARD, NULL, NULL,
    1144                 :             :                           ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
    1145                 :             :                           all_blocks, df_get_postorder (DF_BACKWARD),
    1146                 :             :                           df_get_n_blocks (DF_BACKWARD));
    1147                 :     1895842 :       modify = !modify;
    1148                 :             :     }
    1149                 :             :   while (modify);
    1150                 :             : 
    1151                 :      947921 :   reset_subreg_promoted_p ();
    1152                 :             : 
    1153                 :      947921 :   ext_dce_finish ();
    1154                 :             : }
    1155                 :             : 
    1156                 :             : 
    1157                 :             : namespace {
    1158                 :             : 
    1159                 :             : const pass_data pass_data_ext_dce =
    1160                 :             : {
    1161                 :             :   RTL_PASS, /* type */
    1162                 :             :   "ext_dce", /* name */
    1163                 :             :   OPTGROUP_NONE, /* optinfo_flags */
    1164                 :             :   TV_EXT_DCE, /* tv_id */
    1165                 :             :   PROP_cfglayout, /* properties_required */
    1166                 :             :   0, /* properties_provided */
    1167                 :             :   0, /* properties_destroyed */
    1168                 :             :   0, /* todo_flags_start */
    1169                 :             :   TODO_df_finish, /* todo_flags_finish */
    1170                 :             : };
    1171                 :             : 
    1172                 :             : class pass_ext_dce : public rtl_opt_pass
    1173                 :             : {
    1174                 :             : public:
    1175                 :      285081 :   pass_ext_dce (gcc::context *ctxt)
    1176                 :      570162 :     : rtl_opt_pass (pass_data_ext_dce, ctxt)
    1177                 :             :   {}
    1178                 :             : 
    1179                 :             :   /* opt_pass methods: */
    1180                 :     1449863 :   virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
    1181                 :      947921 :   virtual unsigned int execute (function *)
    1182                 :             :     {
    1183                 :      947921 :       ext_dce_execute ();
    1184                 :      947921 :       return 0;
    1185                 :             :     }
    1186                 :             : 
    1187                 :             : }; // class pass_combine
    1188                 :             : 
    1189                 :             : } // anon namespace
    1190                 :             : 
    1191                 :             : rtl_opt_pass *
    1192                 :      285081 : make_pass_ext_dce (gcc::context *ctxt)
    1193                 :             : {
    1194                 :      285081 :   return new pass_ext_dce (ctxt);
    1195                 :             : }
        

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.