LCOV - code coverage report
Current view: top level - gcc - ext-dce.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 84.9 % 451 383
Test Date: 2025-07-05 13:26:22 Functions: 95.0 % 20 19
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: - 0 0

             Branch data     Line data    Source code
       1                 :             : /* RTL dead zero/sign extension (code) elimination.
       2                 :             :    Copyright (C) 2000-2025 Free Software Foundation, Inc.
       3                 :             : 
       4                 :             : This file is part of GCC.
       5                 :             : 
       6                 :             : GCC is free software; you can redistribute it and/or modify it under
       7                 :             : the terms of the GNU General Public License as published by the Free
       8                 :             : Software Foundation; either version 3, or (at your option) any later
       9                 :             : version.
      10                 :             : 
      11                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14                 :             : for more details.
      15                 :             : 
      16                 :             : You should have received a copy of the GNU General Public License
      17                 :             : along with GCC; see the file COPYING3.  If not see
      18                 :             : <http://www.gnu.org/licenses/>.  */
      19                 :             : 
      20                 :             : #include "config.h"
      21                 :             : #include "system.h"
      22                 :             : #include "coretypes.h"
      23                 :             : #include "backend.h"
      24                 :             : #include "rtl.h"
      25                 :             : #include "tree.h"
      26                 :             : #include "memmodel.h"
      27                 :             : #include "insn-config.h"
      28                 :             : #include "emit-rtl.h"
      29                 :             : #include "recog.h"
      30                 :             : #include "cfganal.h"
      31                 :             : #include "tree-pass.h"
      32                 :             : #include "cfgrtl.h"
      33                 :             : #include "rtl-iter.h"
      34                 :             : #include "df.h"
      35                 :             : #include "print-rtl.h"
      36                 :             : #include "dbgcnt.h"
      37                 :             : #include "diagnostic-core.h"
      38                 :             : #include "target.h"
      39                 :             : 
      40                 :             : /* These should probably move into a C++ class.  */
      41                 :             : static vec<bitmap_head> livein;
      42                 :             : static bitmap all_blocks;
      43                 :             : static bitmap livenow;
      44                 :             : static bitmap changed_pseudos;
      45                 :             : static bool modify;
      46                 :             : 
      47                 :             : /* We consider four bit groups for liveness:
      48                 :             :    bit 0..7   (least significant byte)
      49                 :             :    bit 8..15  (second least significant byte)
      50                 :             :    bit 16..31
      51                 :             :    bit 32..BITS_PER_WORD-1  */
      52                 :             : 
      53                 :             : /* For the given REG, return the number of bit groups implied by the
      54                 :             :    size of the REG's mode, up to a maximum of 4 (number of bit groups
      55                 :             :    tracked by this pass).
      56                 :             : 
      57                 :             :    For partial integer and variable sized modes also return 4.  This
      58                 :             :    could possibly be refined for something like PSI mode, but it
      59                 :             :    does not seem worth the effort.  */
      60                 :             : 
      61                 :             : static int
      62                 :   229871246 : group_limit (const_rtx reg)
      63                 :             : {
      64                 :   229871246 :   machine_mode mode = GET_MODE (reg);
      65                 :             : 
      66                 :   229871246 :   if (!GET_MODE_BITSIZE (mode).is_constant ())
      67                 :             :     return 4;
      68                 :             : 
      69                 :   229871246 :   int size = GET_MODE_SIZE (mode).to_constant ();
      70                 :             : 
      71                 :   229871246 :   size = exact_log2 (size);
      72                 :             : 
      73                 :   229783488 :   if (size < 0)
      74                 :             :     return 4;
      75                 :             : 
      76                 :   229783488 :   size++;
      77                 :   229783488 :   return (size > 4 ? 4 : size);
      78                 :             : }
      79                 :             : 
      80                 :             : /* Make all bit groups live for REGNO in bitmap BMAP.  For hard regs,
      81                 :             :    we assume all groups are live.  For a pseudo we consider the size
      82                 :             :    of the pseudo to avoid creating unnecessarily live chunks of data.  */
      83                 :             : 
      84                 :             : static void
      85                 :     4626776 : make_reg_live (bitmap bmap, int regno)
      86                 :             : {
      87                 :     4626776 :   int limit;
      88                 :             : 
      89                 :             :   /* For pseudos we can use the mode to limit how many bit groups
      90                 :             :      are marked as live since a pseudo only has one mode.  Hard
      91                 :             :      registers have to be handled more conservatively.  */
      92                 :     4626776 :   if (regno > FIRST_PSEUDO_REGISTER)
      93                 :             :     {
      94                 :      864919 :       rtx reg = regno_reg_rtx[regno];
      95                 :      864919 :       limit = group_limit (reg);
      96                 :             :     }
      97                 :             :   else
      98                 :             :     limit = 4;
      99                 :             : 
     100                 :    22815015 :   for (int i = 0; i < limit; i++)
     101                 :    18188239 :     bitmap_set_bit (bmap, regno * 4 + i);
     102                 :     4626776 : }
     103                 :             : 
     104                 :             : /* Note this pass could be used to narrow memory loads too.  It's
     105                 :             :    not clear if that's profitable or not in general.  */
     106                 :             : 
     107                 :             : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
     108                 :             : 
     109                 :             : /* If we know the destination of CODE only uses some low bits
     110                 :             :    (say just the QI bits of an SI operation), then return true
     111                 :             :    if we can propagate the need for just the subset of bits
     112                 :             :    from the destination to the sources.
     113                 :             : 
     114                 :             :    FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
     115                 :             :    operand 0.  Thus is likely would need some special casing to handle.  */
     116                 :             : 
     117                 :             : static bool
     118                 :   140065463 : safe_for_live_propagation (rtx_code code)
     119                 :             : {
     120                 :             :   /* First handle rtx classes which as a whole are known to
     121                 :             :      be either safe or unsafe.  */
     122                 :   140065463 :   switch (GET_RTX_CLASS (code))
     123                 :             :     {
     124                 :             :       case RTX_OBJ:
     125                 :             :       case RTX_CONST_OBJ:
     126                 :             :         return true;
     127                 :             : 
     128                 :             :       case RTX_COMPARE:
     129                 :             :       case RTX_COMM_COMPARE:
     130                 :             :       case RTX_TERNARY:
     131                 :             :         return false;
     132                 :             : 
     133                 :    71956463 :       default:
     134                 :    71956463 :         break;
     135                 :             :     }
     136                 :             : 
     137                 :             :   /* What's left are specific codes.  We only need to identify those
     138                 :             :      which are safe.   */
     139                 :    71956463 :   switch (code)
     140                 :             :     {
     141                 :             :     /* These are trivially safe.  */
     142                 :             :     case SUBREG:
     143                 :             :     case NOT:
     144                 :             :     case ZERO_EXTEND:
     145                 :             :     case SIGN_EXTEND:
     146                 :             :     case TRUNCATE:
     147                 :             :     case PLUS:
     148                 :             :     case MINUS:
     149                 :             :     case MULT:
     150                 :             :     case SMUL_HIGHPART:
     151                 :             :     case UMUL_HIGHPART:
     152                 :             :     case AND:
     153                 :             :     case IOR:
     154                 :             :     case XOR:
     155                 :             :       return true;
     156                 :             : 
     157                 :             :     /* We can propagate for the shifted operand, but not the shift
     158                 :             :        count.  The count is handled specially.  */
     159                 :             :     case ASHIFT:
     160                 :             :     case LSHIFTRT:
     161                 :             :     case ASHIFTRT:
     162                 :             :     case SS_ASHIFT:
     163                 :             :     case US_ASHIFT:
     164                 :             :       return true;
     165                 :             : 
     166                 :             :     /* There may be other safe codes.  If so they can be added
     167                 :             :        individually when discovered.  */
     168                 :             :     default:
     169                 :             :       return false;
     170                 :             :     }
     171                 :             : }
     172                 :             : 
     173                 :             : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
     174                 :             :    set/clobbered by OBJ contained in INSN.
     175                 :             : 
     176                 :             :    Conceptually it is always safe to ignore a particular destination
     177                 :             :    here as that will result in more chunks of data being considered
     178                 :             :    live.  That's what happens when we "continue" the main loop when
     179                 :             :    we see something we don't know how to handle such as a vector
     180                 :             :    mode destination.
     181                 :             : 
     182                 :             :    The more accurate we are in identifying what objects (and chunks
     183                 :             :    within an object) are set by INSN, the more aggressive the
     184                 :             :    optimization phase during use handling will be.  */
     185                 :             : 
     186                 :             : static bool
     187                 :   135832638 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
     188                 :             : {
     189                 :   135832638 :   bool skipped_dest = false;
     190                 :             : 
     191                 :   135832638 :   subrtx_iterator::array_type array;
     192                 :   383711604 :   FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
     193                 :             :     {
     194                 :   247878966 :       const_rtx x = *iter;
     195                 :             : 
     196                 :             :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
     197                 :   247878966 :       if (x == NULL_RTX)
     198                 :     9400395 :         continue;
     199                 :             : 
     200                 :   238478571 :       if (UNSPEC_P (x))
     201                 :      566360 :         continue;
     202                 :             : 
     203                 :   237912211 :       if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
     204                 :             :         {
     205                 :   141122308 :           unsigned bit = 0;
     206                 :   141122308 :           x = SET_DEST (x);
     207                 :             : 
     208                 :             :           /* We don't support vector destinations or destinations
     209                 :             :              wider than DImode.  */
     210                 :   141122308 :           scalar_mode outer_mode;
     211                 :   145129563 :           if (!is_a <scalar_mode> (GET_MODE (x), &outer_mode)
     212                 :    90316134 :               || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     213                 :             :             {
     214                 :             :               /* Skip the subrtxs of this destination.  There is
     215                 :             :                  little value in iterating into the subobjects, so
     216                 :             :                  just skip them for a bit of efficiency.  */
     217                 :    54813429 :               skipped_dest = true;
     218                 :    54813429 :               iter.skip_subrtxes ();
     219                 :   302692395 :               continue;
     220                 :             :             }
     221                 :             : 
     222                 :             :           /* We could have (strict_low_part (subreg ...)).  We can not just
     223                 :             :              strip the STRICT_LOW_PART as that would result in clearing
     224                 :             :              some bits in LIVENOW that are still live.  So process the
     225                 :             :              STRICT_LOW_PART specially.  */
     226                 :    86308879 :           if (GET_CODE (x) == STRICT_LOW_PART)
     227                 :             :             {
     228                 :           0 :               x = XEXP (x, 0);
     229                 :             : 
     230                 :             :               /* The only valid operand of a STRICT_LOW_PART is a non
     231                 :             :                  paradoxical SUBREG.  */
     232                 :           0 :               gcc_assert (SUBREG_P (x)
     233                 :             :                           && !paradoxical_subreg_p (x)
     234                 :             :                           && SUBREG_BYTE (x).is_constant ());
     235                 :             : 
     236                 :             :               /* I think we should always see a REG here.  But let's
     237                 :             :                  be sure.  */
     238                 :           0 :               gcc_assert (REG_P (SUBREG_REG (x)));
     239                 :             : 
     240                 :             :               /* The inner mode might be larger, just punt for
     241                 :             :                  that case.  Remember, we can not just continue to process
     242                 :             :                  the inner RTXs due to the STRICT_LOW_PART.  */
     243                 :           0 :               if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     244                 :           0 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     245                 :             :                 {
     246                 :             :                   /* Skip the subrtxs of the STRICT_LOW_PART.  We can't
     247                 :             :                      process them because it'll set objects as no longer
     248                 :             :                      live when they are in fact still live.  */
     249                 :           0 :                   skipped_dest = true;
     250                 :           0 :                   iter.skip_subrtxes ();
     251                 :           0 :                   continue;
     252                 :             :                 }
     253                 :             : 
     254                 :             :               /* LIVE_TMP contains the set groups that are live-out and set in
     255                 :             :                  this insn.  It is used to narrow the groups live-in for the
     256                 :             :                  inputs of this insn.
     257                 :             : 
     258                 :             :                  The simple thing to do is mark all the groups as live, but
     259                 :             :                  that will significantly inhibit optimization.
     260                 :             : 
     261                 :             :                  We also need to be careful in the case where we have an in-out
     262                 :             :                  operand.  If we're not careful we'd clear LIVE_TMP
     263                 :             :                  incorrectly.  */
     264                 :           0 :               HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
     265                 :           0 :               int limit = group_limit (SUBREG_REG (x));
     266                 :           0 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     267                 :           0 :                 if (bitmap_bit_p (livenow, i))
     268                 :           0 :                   bitmap_set_bit (live_tmp, i);
     269                 :             : 
     270                 :           0 :               if (bitmap_empty_p (live_tmp))
     271                 :           0 :                 make_reg_live (live_tmp, rn);
     272                 :             : 
     273                 :             :               /* The mode of the SUBREG tells us how many bits we can
     274                 :             :                  clear.  */
     275                 :           0 :               machine_mode mode = GET_MODE (x);
     276                 :           0 :               HOST_WIDE_INT size
     277                 :           0 :                 = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
     278                 :           0 :               bitmap_clear_range (livenow, 4 * rn, size);
     279                 :             : 
     280                 :             :               /* We have fully processed this destination.  */
     281                 :           0 :               iter.skip_subrtxes ();
     282                 :           0 :               continue;
     283                 :           0 :             }
     284                 :             : 
     285                 :             :           /* Phase one of destination handling.  First remove any wrapper
     286                 :             :              such as SUBREG or ZERO_EXTRACT.  */
     287                 :    86308879 :           unsigned HOST_WIDE_INT mask
     288                 :    86308879 :             = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
     289                 :    86308879 :           if (SUBREG_P (x))
     290                 :             :             {
     291                 :             :               /* If we have a SUBREG destination that is too wide, just
     292                 :             :                  skip the destination rather than continuing this iterator.
     293                 :             :                  While continuing would be better, we'd need to strip the
     294                 :             :                  subreg and restart within the SET processing rather than
     295                 :             :                  the top of the loop which just complicates the flow even
     296                 :             :                  more.  */
     297                 :      653159 :               if (!is_a <scalar_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     298                 :      541524 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     299                 :             :                 {
     300                 :      111635 :                   skipped_dest = true;
     301                 :      111635 :                   iter.skip_subrtxes ();
     302                 :      111635 :                   continue;
     303                 :             :                 }
     304                 :             : 
     305                 :             :               /* We can safely strip a paradoxical subreg.  The inner mode will
     306                 :             :                  be narrower than the outer mode.  We'll clear fewer bits in
     307                 :             :                  LIVENOW than we'd like, but that's always safe.  */
     308                 :      430319 :               if (paradoxical_subreg_p (x))
     309                 :             :                 x = XEXP (x, 0);
     310                 :      422890 :               else if (SUBREG_BYTE (x).is_constant ())
     311                 :             :                 {
     312                 :      422890 :                   bit = subreg_lsb (x).to_constant ();
     313                 :      422890 :                   mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
     314                 :      422890 :                   gcc_assert (mask);
     315                 :             :                   x = SUBREG_REG (x);
     316                 :             :                 }
     317                 :             :               else
     318                 :             :                 gcc_unreachable ();
     319                 :             :             }
     320                 :             : 
     321                 :    86197244 :           if (GET_CODE (x) == ZERO_EXTRACT)
     322                 :             :             {
     323                 :             :               /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
     324                 :             :                  modifies the bits referenced in the ZERO_EXTRACT, the rest
     325                 :             :                  remain the same.  Thus we can not continue here, we must
     326                 :             :                  either figure out what part of the destination is modified
     327                 :             :                  or skip the sub-rtxs.  */
     328                 :        3404 :               skipped_dest = true;
     329                 :        3404 :               iter.skip_subrtxes ();
     330                 :        3404 :               continue;
     331                 :             :             }
     332                 :             : 
     333                 :             :           /* BIT >= 64 indicates something went horribly wrong.  */
     334                 :    86193840 :           gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
     335                 :             : 
     336                 :             :           /* Now handle the actual object that was changed.  */
     337                 :    86193840 :           if (REG_P (x))
     338                 :             :             {
     339                 :             :               /* LIVE_TMP contains the set groups that are live-out and set in
     340                 :             :                  this insn.  It is used to narrow the groups live-in for the
     341                 :             :                  inputs of this insn.
     342                 :             : 
     343                 :             :                  The simple thing to do is mark all the groups as live, but
     344                 :             :                  that will significantly inhibit optimization.
     345                 :             : 
     346                 :             :                  We also need to be careful in the case where we have an in-out
     347                 :             :                  operand.  If we're not careful we'd clear LIVE_TMP
     348                 :             :                  incorrectly.  */
     349                 :    72324968 :               HOST_WIDE_INT rn = REGNO (x);
     350                 :    72324968 :               int limit = group_limit (x);
     351                 :   322696063 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     352                 :   250371095 :                 if (bitmap_bit_p (livenow, i))
     353                 :   243395743 :                   bitmap_set_bit (live_tmp, i);
     354                 :             : 
     355                 :    72324968 :               if (bitmap_empty_p (live_tmp))
     356                 :     1212616 :                 make_reg_live (live_tmp, rn);
     357                 :             : 
     358                 :             :               /* Now clear the bits known written by this instruction.
     359                 :             :                  Note that BIT need not be a power of two, consider a
     360                 :             :                  ZERO_EXTRACT destination.  */
     361                 :    72324968 :               int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
     362                 :    77352806 :               int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
     363                 :    28077398 :                          : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
     364                 :     5812304 :                          : (mask & 0xff00) ? 2 : 1);
     365                 :    72324968 :               bitmap_clear_range (livenow, 4 * rn + start, end - start);
     366                 :             :             }
     367                 :             :           /* Some ports generate (clobber (const_int)).  */
     368                 :    13868872 :           else if (CONST_INT_P (x))
     369                 :           0 :             continue;
     370                 :             :           else
     371                 :    13868872 :             gcc_assert (CALL_P (insn)
     372                 :             :                         || MEM_P (x)
     373                 :             :                         || x == pc_rtx
     374                 :             :                         || GET_CODE (x) == SCRATCH);
     375                 :             : 
     376                 :    86193840 :           iter.skip_subrtxes ();
     377                 :    86193840 :         }
     378                 :    96789903 :       else if (GET_CODE (x) == COND_EXEC)
     379                 :             :         {
     380                 :             :           /* This isn't ideal, but may not be so bad in practice.  */
     381                 :           0 :           skipped_dest = true;
     382                 :           0 :           iter.skip_subrtxes ();
     383                 :             :         }
     384                 :             :     }
     385                 :   135832638 :   return skipped_dest;
     386                 :   135832638 : }
     387                 :             : 
     388                 :             : /* INSN has a sign/zero extended source inside SET that we will
     389                 :             :    try to turn into a SUBREG.  */
     390                 :             : static void
     391                 :        4796 : ext_dce_try_optimize_insn (rtx_insn *insn, rtx set)
     392                 :             : {
     393                 :        4796 :   rtx src = SET_SRC (set);
     394                 :        4796 :   rtx inner = XEXP (src, 0);
     395                 :             : 
     396                 :             :   /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
     397                 :             :      not useful for this optimization.  */
     398                 :        4796 :   if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
     399                 :             :     return;
     400                 :             : 
     401                 :        2307 :   rtx new_pattern;
     402                 :        2307 :   if (dump_file)
     403                 :             :     {
     404                 :           0 :       fprintf (dump_file, "Processing insn:\n");
     405                 :           0 :       dump_insn_slim (dump_file, insn);
     406                 :           0 :       fprintf (dump_file, "Trying to simplify pattern:\n");
     407                 :           0 :       print_rtl_single (dump_file, SET_SRC (set));
     408                 :             :     }
     409                 :             : 
     410                 :             :   /* We decided to turn do the optimization but allow it to be rejected for
     411                 :             :      bisection purposes.  */
     412                 :        2307 :   if (!dbg_cnt (::ext_dce))
     413                 :             :     {
     414                 :           0 :       if (dump_file)
     415                 :           0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     416                 :           0 :       return;
     417                 :             :     }
     418                 :             : 
     419                 :        4614 :   new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
     420                 :        2307 :                                      GET_MODE (inner), 0);
     421                 :             :   /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
     422                 :             :      We must not pass that as a replacement pattern to validate_change.  */
     423                 :        2307 :   if (new_pattern)
     424                 :             :     {
     425                 :        2307 :       int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
     426                 :             : 
     427                 :        2307 :       rtx x = SET_DEST (set);
     428                 :        2307 :       while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     429                 :           0 :         x = XEXP (x, 0);
     430                 :             : 
     431                 :        2307 :       gcc_assert (REG_P (x));
     432                 :        2307 :       if (ok)
     433                 :        2307 :         bitmap_set_bit (changed_pseudos, REGNO (x));
     434                 :             : 
     435                 :        2307 :       if (dump_file)
     436                 :             :         {
     437                 :           0 :           if (ok)
     438                 :           0 :             fprintf (dump_file, "Successfully transformed to:\n");
     439                 :             :           else
     440                 :           0 :             fprintf (dump_file, "Failed transformation to:\n");
     441                 :             : 
     442                 :           0 :           print_rtl_single (dump_file, new_pattern);
     443                 :           0 :           fprintf (dump_file, "\n");
     444                 :             :         }
     445                 :             : 
     446                 :             :       /* INSN may have a REG_EQUAL note indicating that the value was
     447                 :             :          sign or zero extended.  That note is no longer valid since we've
     448                 :             :          just removed the extension.  Just wipe the notes.  */
     449                 :        2307 :       remove_reg_equal_equiv_notes (insn, false);
     450                 :             :     }
     451                 :             :   else
     452                 :             :     {
     453                 :           0 :       if (dump_file)
     454                 :           0 :         fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
     455                 :             :     }
     456                 :             : }
     457                 :             : 
     458                 :             : /* Some operators imply that their second operand is fully live,
     459                 :             :    regardless of how many bits in the output are live.  An example
     460                 :             :    would be the shift count on a target without SHIFT_COUNT_TRUNCATED
     461                 :             :    defined.
     462                 :             : 
     463                 :             :    Return TRUE if CODE is such an operator.  FALSE otherwise.  */
     464                 :             : 
     465                 :             : static bool
     466                 :    75706551 : binop_implies_op2_fully_live (rtx_code code)
     467                 :             : {
     468                 :           0 :   switch (code)
     469                 :             :     {
     470                 :             :     case ASHIFT:
     471                 :             :     case LSHIFTRT:
     472                 :             :     case ASHIFTRT:
     473                 :             :     case ROTATE:
     474                 :             :     case ROTATERT:
     475                 :             :     case SS_ASHIFT:
     476                 :             :     case US_ASHIFT:
     477                 :             :       return !SHIFT_COUNT_TRUNCATED;
     478                 :             : 
     479                 :           0 :     default:
     480                 :           0 :       return false;
     481                 :             :     }
     482                 :             : }
     483                 :             : 
     484                 :             : /* X, with code CODE, is an operation for which safe_for_live_propagation
     485                 :             :    holds true, and bits set in MASK are live in the result.  Compute a
     486                 :             :    mask of (potentially) live bits in the non-constant inputs.  In case of
     487                 :             :    binop_implies_op2_fully_live (e.g. shifts), the computed mask may
     488                 :             :    exclusively pertain to the first operand.
     489                 :             : 
     490                 :             :    This looks wrong as we may have some important operations embedded as
     491                 :             :    operands of another operation.  For example, we might have an extension
     492                 :             :    wrapping a shift.  It really feels like this needs to be recursing down
     493                 :             :    into operands much more often.  */
     494                 :             : 
     495                 :             : unsigned HOST_WIDE_INT
     496                 :    70778587 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
     497                 :             : {
     498                 :    72494033 :   if (mask == 0)
     499                 :             :     return 0;
     500                 :             : 
     501                 :    72494007 :   enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
     502                 :    72494007 :   unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
     503                 :             : 
     504                 :             :   /* While we don't try to optimize operations on types larger
     505                 :             :      than 64 bits, we do want to make sure not to invoke undefined
     506                 :             :      behavior when presented with such operations during use
     507                 :             :      processing.  The safe thing to do is to just return mmask
     508                 :             :      for that scenario indicating every possible chunk is life.  */
     509                 :    72494007 :   scalar_int_mode smode;
     510                 :    72494007 :   if (!is_a <scalar_int_mode> (mode, &smode)
     511                 :    60190526 :       || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
     512                 :             :     return mmask;
     513                 :             : 
     514                 :    58082977 :   switch (code)
     515                 :             :     {
     516                 :    15990518 :     case PLUS:
     517                 :    15990518 :     case MINUS:
     518                 :    15990518 :     case MULT:
     519                 :    15990518 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     520                 :             : 
     521                 :             :     /* We propagate for the shifted operand, but not the shift
     522                 :             :        count.  The count is handled specially.  */
     523                 :     1371131 :     case ASHIFT:
     524                 :     1371131 :       if (CONST_INT_P (XEXP (x, 1))
     525                 :     2671079 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     526                 :     1299912 :         return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
     527                 :       71219 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     528                 :             : 
     529                 :             :     /* We propagate for the shifted operand, but not the shift
     530                 :             :        count.  The count is handled specially.  */
     531                 :      670624 :     case LSHIFTRT:
     532                 :      670624 :       if (CONST_INT_P (XEXP (x, 1))
     533                 :     1307514 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     534                 :      636862 :         return mmask & (mask << INTVAL (XEXP (x, 1)));
     535                 :             :       return mmask;
     536                 :             : 
     537                 :             :     /* We propagate for the shifted operand, but not the shift
     538                 :             :        count.  The count is handled specially.  */
     539                 :      295266 :     case ASHIFTRT:
     540                 :      295266 :       if (CONST_INT_P (XEXP (x, 1))
     541                 :      578694 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     542                 :             :         {
     543                 :      283420 :           HOST_WIDE_INT sign = 0;
     544                 :      283420 :           if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
     545                 :      283420 :               > GET_MODE_BITSIZE (smode))
     546                 :      566840 :             sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
     547                 :      283420 :           return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
     548                 :             :         }
     549                 :             :       return mmask;
     550                 :             : 
     551                 :       42678 :     case SMUL_HIGHPART:
     552                 :       42678 :     case UMUL_HIGHPART:
     553                 :       42678 :       if (XEXP (x, 1) == const0_rtx)
     554                 :             :         return 0;
     555                 :       42678 :       if (XEXP (x, 1) == const1_rtx)
     556                 :             :         return mmask;
     557                 :       42678 :       if (CONST_INT_P (XEXP (x, 1)))
     558                 :             :         {
     559                 :           0 :           if (pow2p_hwi (INTVAL (XEXP (x, 1))))
     560                 :           0 :             return mmask & (mask << (GET_MODE_BITSIZE (smode)
     561                 :           0 :                                      - exact_log2 (INTVAL (XEXP (x, 1)))));
     562                 :             : 
     563                 :           0 :           int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
     564                 :           0 :                       - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
     565                 :           0 :           if (bits < GET_MODE_BITSIZE (smode))
     566                 :           0 :             return (HOST_WIDE_INT_1U << bits) - 1;
     567                 :             :         }
     568                 :             :       return mmask;
     569                 :             : 
     570                 :      658101 :     case SIGN_EXTEND:
     571                 :      658101 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     572                 :      658101 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     573                 :             :         return -1;
     574                 :             : 
     575                 :             :       /* We want the mode of the inner object.  We need to ensure its
     576                 :             :          sign bit is on in MASK.  */
     577                 :      658101 :       mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
     578                 :      658101 :       if (mask & ~GET_MODE_MASK (mode))
     579                 :      657644 :         mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
     580                 :      657644 :                                      - 1);
     581                 :             : 
     582                 :             :       /* Recurse into the operand.  */
     583                 :      658101 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
     584                 :             : 
     585                 :     1057345 :     case ZERO_EXTEND:
     586                 :     1057345 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     587                 :     1057345 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     588                 :             :         return -1;
     589                 :             : 
     590                 :             :       /* Recurse into the operand.  */
     591                 :     1057345 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
     592                 :             : 
     593                 :             :     /* We propagate for the shifted operand, but not the shift
     594                 :             :        count.  The count is handled specially.  */
     595                 :           0 :     case SS_ASHIFT:
     596                 :           0 :     case US_ASHIFT:
     597                 :           0 :       if (CONST_INT_P (XEXP (x, 1))
     598                 :           0 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     599                 :             :         {
     600                 :           0 :           return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
     601                 :           0 :                              >> (INTVAL (XEXP (x, 1))
     602                 :           0 :                                  + (XEXP (x, 1) != const0_rtx
     603                 :           0 :                                     && code == SS_ASHIFT))))
     604                 :           0 :                   | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
     605                 :             :         }
     606                 :             :       return mmask;
     607                 :             : 
     608                 :             :     default:
     609                 :             :       return mask;
     610                 :             :     }
     611                 :             : }
     612                 :             : 
     613                 :             : /* Process uses in INSN contained in OBJ.  Set appropriate bits in LIVENOW
     614                 :             :    for any chunks of pseudos that become live, potentially filtering using
     615                 :             :    bits from LIVE_TMP.
     616                 :             : 
     617                 :             :    If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
     618                 :             :    the extended bits are never read and mark pseudos which had extensions
     619                 :             :    eliminated in CHANGED_PSEUDOS.  */
     620                 :             : 
     621                 :             : static void
     622                 :   135832638 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
     623                 :             :                       bitmap live_tmp, bool skipped_dest)
     624                 :             : {
     625                 :   135832638 :   subrtx_var_iterator::array_type array_var;
     626                 :   736653478 :   FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
     627                 :             :     {
     628                 :             :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
     629                 :   600820840 :       rtx x = *iter;
     630                 :   600820840 :       if (x == NULL_RTX)
     631                 :     9400395 :         continue;
     632                 :             : 
     633                 :             :       /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
     634                 :             :          handle SETs explicitly, possibly propagating live information
     635                 :             :          into the uses.
     636                 :             : 
     637                 :             :          We may continue the loop at various points which will cause
     638                 :             :          iteration into the next level of RTL.  Breaking from the loop
     639                 :             :          is never safe as it can lead us to fail to process some of the
     640                 :             :          RTL and thus not make objects live when necessary.  */
     641                 :   591420445 :       enum rtx_code xcode = GET_CODE (x);
     642                 :   591420445 :       if (xcode == SET)
     643                 :             :         {
     644                 :   120015590 :           const_rtx dst = SET_DEST (x);
     645                 :   120015590 :           rtx src = SET_SRC (x);
     646                 :   120015590 :           const_rtx y;
     647                 :   120015590 :           unsigned HOST_WIDE_INT bit = 0;
     648                 :             : 
     649                 :             :           /* The code of the RHS of a SET.  */
     650                 :   120015590 :           enum rtx_code code = GET_CODE (src);
     651                 :             : 
     652                 :             :           /* ?!? How much of this should mirror SET handling, potentially
     653                 :             :              being shared?   */
     654                 :   120015590 :           if (SUBREG_P (dst) && SUBREG_BYTE (dst).is_constant ())
     655                 :             :             {
     656                 :      585120 :               bit = subreg_lsb (dst).to_constant ();
     657                 :      585120 :               if (bit >= HOST_BITS_PER_WIDE_INT)
     658                 :             :                 bit = HOST_BITS_PER_WIDE_INT - 1;
     659                 :      585120 :               dst = SUBREG_REG (dst);
     660                 :             :             }
     661                 :   119430470 :           else if (GET_CODE (dst) == STRICT_LOW_PART)
     662                 :       10240 :             dst = XEXP (dst, 0);
     663                 :             : 
     664                 :             :           /* Main processing of the uses.  Two major goals here.
     665                 :             : 
     666                 :             :              First, we want to try and propagate liveness (or the lack
     667                 :             :              thereof) from the destination register to the source
     668                 :             :              register(s).
     669                 :             : 
     670                 :             :              Second, if the source is an extension, try to optimize
     671                 :             :              it into a SUBREG.  The SUBREG form indicates we don't
     672                 :             :              care about the upper bits and will usually be copy
     673                 :             :              propagated away.
     674                 :             : 
     675                 :             :              If we fail to handle something in here, the expectation
     676                 :             :              is the iterator will dive into the sub-components and
     677                 :             :              mark all the chunks in any found REGs as live.  */
     678                 :   120015590 :           if (REG_P (dst) && safe_for_live_propagation (code))
     679                 :             :             {
     680                 :             :               /* Create a mask representing the bits of this output
     681                 :             :                  operand that are live after this insn.  We can use
     682                 :             :                  this information to refine the live in state of
     683                 :             :                  inputs to this insn in many cases.
     684                 :             : 
     685                 :             :                  We have to do this on a per SET basis, we might have
     686                 :             :                  an INSN with multiple SETS, some of which can narrow
     687                 :             :                  the source operand liveness, some of which may not.  */
     688                 :    70778587 :               unsigned HOST_WIDE_INT dst_mask = 0;
     689                 :    70778587 :               HOST_WIDE_INT rn = REGNO (dst);
     690                 :    70778587 :               unsigned HOST_WIDE_INT mask_array[]
     691                 :             :                 = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
     692                 :             :                     -HOST_WIDE_INT_UC (0x100000000) };
     693                 :   353892935 :               for (int i = 0; i < 4; i++)
     694                 :   283114348 :                 if (bitmap_bit_p (live_tmp, 4 * rn + i))
     695                 :   227573926 :                   dst_mask |= mask_array[i];
     696                 :    70778587 :               dst_mask >>= bit;
     697                 :             : 
     698                 :             :               /* If we ignored a destination during set processing, then
     699                 :             :                  consider all the bits live.  */
     700                 :    70778587 :               if (skipped_dest)
     701                 :    24342908 :                 dst_mask = -1;
     702                 :             : 
     703                 :    70778587 :               dst_mask = carry_backpropagate (dst_mask, code, src);
     704                 :             : 
     705                 :             :               /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
     706                 :             :                  of the source specially to improve optimization.  */
     707                 :    70778587 :               if (code == SIGN_EXTEND || code == ZERO_EXTEND)
     708                 :             :                 {
     709                 :     1728250 :                   rtx inner = XEXP (src, 0);
     710                 :     1728250 :                   unsigned HOST_WIDE_INT src_mask
     711                 :     1728250 :                     = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
     712                 :             : 
     713                 :             :                   /* DST_MASK could be zero if we had something in the SET
     714                 :             :                      that we couldn't handle.  */
     715                 :     1728250 :                   if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
     716                 :        4796 :                     ext_dce_try_optimize_insn (insn, x);
     717                 :             : 
     718                 :             :                   /* Stripping the extension here just seems wrong on multiple
     719                 :             :                      levels.  It's source side handling, so it seems like it
     720                 :             :                      belongs in the loop below.  Stripping here also makes it
     721                 :             :                      harder than necessary to properly handle live bit groups
     722                 :             :                      for (ANY_EXTEND (SUBREG)) where the SUBREG has
     723                 :             :                      SUBREG_PROMOTED state.  */
     724                 :     1728250 :                   dst_mask &= src_mask;
     725                 :     1728250 :                   src = XEXP (src, 0);
     726                 :     1728250 :                   code = GET_CODE (src);
     727                 :             :                 }
     728                 :             : 
     729                 :             :               /* Optimization is done at this point.  We just want to make
     730                 :             :                  sure everything that should get marked as live is marked
     731                 :             :                  from here onward.  */
     732                 :             : 
     733                 :             :               /* We will handle the other operand of a binary operator
     734                 :             :                  at the bottom of the loop by resetting Y.  */
     735                 :    70778587 :               if (BINARY_P (src))
     736                 :    21977370 :                 y = XEXP (src, 0);
     737                 :             :               else
     738                 :             :                 y = src;
     739                 :             : 
     740                 :             :               /* We're inside a SET and want to process the source operands
     741                 :             :                  making things live.  Breaking from this loop will cause
     742                 :             :                  the iterator to work on sub-rtxs, so it is safe to break
     743                 :             :                  if we see something we don't know how to handle.
     744                 :             : 
     745                 :             :                  This code is just hokey as it really just handles trivial
     746                 :             :                  unary and binary cases.  Otherwise the loop exits and we
     747                 :             :                  continue iterating on sub-rtxs, but outside the set context.  */
     748                 :             :               unsigned HOST_WIDE_INT save_mask = dst_mask;
     749                 :   113742545 :               for (;;)
     750                 :             :                 {
     751                 :             :                   /* In general we want to restore DST_MASK before each loop
     752                 :             :                      iteration.  The exception is when the opcode implies that
     753                 :             :                      the other operand is fully live.  That's handled by
     754                 :             :                      changing SAVE_MASK below.  */
     755                 :    92260566 :                   dst_mask = save_mask;
     756                 :             :                   /* Strip an outer paradoxical subreg.  The bits outside
     757                 :             :                      the inner mode are don't cares.  So we can just strip
     758                 :             :                      and process the inner object.  */
     759                 :    92260566 :                   if (paradoxical_subreg_p (y))
     760                 :             :                     y = XEXP (y, 0);
     761                 :    92175966 :                   else if (SUBREG_P (y) && SUBREG_BYTE (y).is_constant ())
     762                 :             :                     {
     763                 :             :                       /* If !TRULY_NOOP_TRUNCATION_MODES_P, the mode
     764                 :             :                          change performed by Y would normally need to be a
     765                 :             :                          TRUNCATE rather than a SUBREG.  It is probably the
     766                 :             :                          guarantee provided by SUBREG_PROMOTED_VAR_P that
     767                 :             :                          allows the SUBREG in Y as an exception.  We must
     768                 :             :                          therefore preserve that guarantee and treat the
     769                 :             :                          upper bits of the inner register as live
     770                 :             :                          regardless of the outer code.  See PR 120050.  */
     771                 :     1845374 :                       if (!REG_P (SUBREG_REG (y))
     772                 :     1845374 :                           || (SUBREG_PROMOTED_VAR_P (y)
     773                 :       12162 :                               && (!TRULY_NOOP_TRUNCATION_MODES_P (
     774                 :             :                                     GET_MODE (y),
     775                 :             :                                     GET_MODE (SUBREG_REG (y))))))
     776                 :             :                         break;
     777                 :             : 
     778                 :     1844428 :                       bit = subreg_lsb (y).to_constant ();
     779                 :             : 
     780                 :             :                       /* If this is a wide object (more bits than we can fit
     781                 :             :                          in a HOST_WIDE_INT), then just break from the SET
     782                 :             :                          context.   That will cause the iterator to walk down
     783                 :             :                          into the subrtx and if we land on a REG we'll mark
     784                 :             :                          the whole think live.  */
     785                 :     1844428 :                       if (bit >= HOST_BITS_PER_WIDE_INT)
     786                 :             :                         break;
     787                 :             : 
     788                 :             :                       /* The SUBREG's mode determines the live width.  */
     789                 :     1614471 :                       if (dst_mask)
     790                 :             :                         {
     791                 :     1614471 :                           dst_mask <<= bit;
     792                 :     1614471 :                           if (!dst_mask)
     793                 :           0 :                             dst_mask = -HOST_WIDE_INT_UC (0x100000000);
     794                 :             :                         }
     795                 :     1614471 :                       y = SUBREG_REG (y);
     796                 :             :                     }
     797                 :             : 
     798                 :    92029663 :                   if (REG_P (y))
     799                 :             :                     {
     800                 :             :                       /* We have found the use of a register.  We need to mark
     801                 :             :                          the appropriate chunks of the register live.  The mode
     802                 :             :                          of the REG is a starting point.  We may refine that
     803                 :             :                          based on what chunks in the output were live.  */
     804                 :    48740891 :                       rn = 4 * REGNO (y);
     805                 :    48740891 :                       unsigned HOST_WIDE_INT tmp_mask = dst_mask;
     806                 :             : 
     807                 :             :                       /* If the RTX code for the SET_SRC is not one we can
     808                 :             :                          propagate destination liveness through, then just
     809                 :             :                          set the mask to the mode's mask.  */
     810                 :    48740891 :                       if (!safe_for_live_propagation (code))
     811                 :       29712 :                         tmp_mask
     812                 :       59424 :                           = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
     813                 :             : 
     814                 :    48740891 :                       if (tmp_mask & 0xff)
     815                 :    48314597 :                         bitmap_set_bit (livenow, rn);
     816                 :    48740891 :                       if (tmp_mask & 0xff00)
     817                 :    46855708 :                         bitmap_set_bit (livenow, rn + 1);
     818                 :    48740891 :                       if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
     819                 :    46605680 :                         bitmap_set_bit (livenow, rn + 2);
     820                 :    48740891 :                       if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
     821                 :    40244820 :                         bitmap_set_bit (livenow, rn + 3);
     822                 :             :                     }
     823                 :    43288772 :                   else if (!CONSTANT_P (y))
     824                 :             :                     break;
     825                 :             : 
     826                 :             :                   /* We might have (ashift (const_int 1) (reg...))
     827                 :             :                      By setting dst_mask we can continue iterating on the
     828                 :             :                      the next operand and it will be considered fully live.
     829                 :             : 
     830                 :             :                      Note that since we restore DST_MASK from SAVE_MASK at the
     831                 :             :                      top of the loop, we have to change SAVE_MASK to get the
     832                 :             :                      semantics we want.  */
     833                 :    75706551 :                   if (binop_implies_op2_fully_live (GET_CODE (src)))
     834                 :     2443538 :                     save_mask = -1;
     835                 :             : 
     836                 :             :                   /* If this was anything but a binary operand, break the inner
     837                 :             :                      loop.  This is conservatively correct as it will cause the
     838                 :             :                      iterator to look at the sub-rtxs outside the SET context.  */
     839                 :    75706551 :                   if (!BINARY_P (src))
     840                 :             :                     break;
     841                 :             : 
     842                 :             :                   /* We processed the first operand of a binary operator.  Now
     843                 :             :                      handle the second.  */
     844                 :    21481979 :                   y = XEXP (src, 1), src = pc_rtx;
     845                 :    21481979 :                 }
     846                 :             : 
     847                 :             :               /* These are leaf nodes, no need to iterate down into them.  */
     848                 :    70778587 :               if (REG_P (y) || CONSTANT_P (y))
     849                 :    54224572 :                 iter.skip_subrtxes ();
     850                 :             :             }
     851                 :             :         }
     852                 :             :       /* If we are reading the low part of a SUBREG, then we can
     853                 :             :          refine liveness of the input register, otherwise let the
     854                 :             :          iterator continue into SUBREG_REG.  */
     855                 :   471404855 :       else if (SUBREG_P (x)
     856                 :     1298130 :                && REG_P (SUBREG_REG (x))
     857                 :     1296373 :                && !paradoxical_subreg_p (x)
     858                 :     1274744 :                && subreg_lowpart_p (x)
     859                 :      968054 :                && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     860                 :   473340963 :                && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
     861                 :             :         {
     862                 :      478841 :           HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
     863                 :      478841 :           HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
     864                 :             : 
     865                 :             :           /* If this is a promoted subreg, then more of it may be live than
     866                 :             :              is otherwise obvious.  */
     867                 :      478841 :           if (SUBREG_PROMOTED_VAR_P (x))
     868                 :        3994 :             size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
     869                 :             : 
     870                 :      478841 :           bitmap_set_bit (livenow, rn);
     871                 :      478841 :           if (size > 8)
     872                 :      297027 :             bitmap_set_bit (livenow, rn + 1);
     873                 :      297027 :           if (size > 16)
     874                 :      260903 :             bitmap_set_bit (livenow, rn + 2);
     875                 :      260903 :           if (size >= 32)
     876                 :      260903 :             bitmap_set_bit (livenow, rn + 3);
     877                 :      478841 :           iter.skip_subrtxes ();
     878                 :             :         }
     879                 :             :       /* If we have a register reference that is not otherwise handled,
     880                 :             :          just assume all the chunks are live.  */
     881                 :   470926014 :       else if (REG_P (x))
     882                 :   156681359 :         bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
     883                 :             :     }
     884                 :   135832638 : }
     885                 :             : 
     886                 :             : /* Process a single basic block BB with current liveness information
     887                 :             :    in LIVENOW, returning updated liveness information.
     888                 :             : 
     889                 :             :    If MODIFY is true, then this is the last pass and unnecessary
     890                 :             :    extensions should be eliminated when possible.  If an extension
     891                 :             :    is removed, the source pseudo is marked in CHANGED_PSEUDOS.  */
     892                 :             : 
     893                 :             : static void
     894                 :    22523555 : ext_dce_process_bb (basic_block bb)
     895                 :             : {
     896                 :    22523555 :   rtx_insn *insn;
     897                 :             : 
     898                 :   295321209 :   FOR_BB_INSNS_REVERSE (bb, insn)
     899                 :             :     {
     900                 :   419163065 :       if (!NONDEBUG_INSN_P (insn))
     901                 :   146365411 :         continue;
     902                 :             : 
     903                 :             :       /* Live-out state of the destination of this insn.  We can
     904                 :             :          use this to refine the live-in state of the sources of
     905                 :             :          this insn in many cases.  */
     906                 :   126432243 :       bitmap live_tmp = BITMAP_ALLOC (NULL);
     907                 :             : 
     908                 :             :       /* First process any sets/clobbers in INSN.  */
     909                 :   126432243 :       bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
     910                 :             : 
     911                 :             :       /* CALL_INSNs need processing their fusage data.  */
     912                 :   126432243 :       if (CALL_P (insn))
     913                 :     9400395 :         skipped_dest |= ext_dce_process_sets (insn,
     914                 :             :                                               CALL_INSN_FUNCTION_USAGE (insn),
     915                 :             :                                               live_tmp);
     916                 :             : 
     917                 :             :       /* And now uses, optimizing away SIGN/ZERO extensions as we go.  */
     918                 :   126432243 :       ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
     919                 :             : 
     920                 :             :       /* A nonlocal goto implicitly uses the frame pointer.  */
     921                 :   126432243 :       if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
     922                 :             :         {
     923                 :        1130 :           bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
     924                 :        1130 :           if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
     925                 :        1130 :             bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
     926                 :             :         }
     927                 :             : 
     928                 :             :       /* And process fusage data for the use as well.  */
     929                 :   126432243 :       if (CALL_P (insn))
     930                 :             :         {
     931                 :     9400395 :           if (!FAKE_CALL_P (insn))
     932                 :     9400335 :             bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
     933                 :             : 
     934                 :             :           /* If this is not a call to a const fucntion, then assume it
     935                 :             :              can read any global register.  */
     936                 :     9400395 :           if (!RTL_CONST_CALL_P (insn))
     937                 :   844032474 :             for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     938                 :   834956856 :               if (global_regs[i])
     939                 :         230 :                 bitmap_set_range (livenow, i * 4, 4);
     940                 :             : 
     941                 :     9400395 :           ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
     942                 :             :         }
     943                 :             : 
     944                 :   126432243 :       BITMAP_FREE (live_tmp);
     945                 :             :     }
     946                 :    22523555 : }
     947                 :             : 
     948                 :             : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
     949                 :             :    is usually helpful.  However, in some cases setting the value when
     950                 :             :    it not strictly needed can cause this pass to miss optimizations.
     951                 :             : 
     952                 :             :    Specifically consider (set (mem) (subreg (reg))).  If set in that
     953                 :             :    case it will cause more bit groups to be live for REG than would
     954                 :             :    be strictly necessary which in turn can inhibit extension removal.
     955                 :             : 
     956                 :             :    So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
     957                 :             :    is obviously not needed.  */
     958                 :             : 
     959                 :             : static void
     960                 :      947488 : maybe_clear_subreg_promoted_p (void)
     961                 :             : {
     962                 :   117007026 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
     963                 :             :     {
     964                 :   116059538 :       if (!NONDEBUG_INSN_P (insn))
     965                 :    61851339 :         continue;
     966                 :             : 
     967                 :    54208199 :       rtx set = single_set (insn);
     968                 :    54208199 :       if (!set)
     969                 :     3605385 :         continue;
     970                 :             : 
     971                 :             :       /* There may be other cases where we should clear, but for
     972                 :             :          now, this is the only known case where it causes problems.  */
     973                 :    50602814 :       if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
     974                 :       69390 :         && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
     975                 :       60496 :         SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
     976                 :             :     }
     977                 :      947488 : }
     978                 :             : 
     979                 :             : /* Walk the IL and build the transitive closure of all the REGs tied
     980                 :             :    together by copies where either the source or destination is
     981                 :             :    marked in CHANGED_PSEUDOS.  */
     982                 :             : 
     983                 :             : static void
     984                 :      947488 : expand_changed_pseudos (void)
     985                 :             : {
     986                 :             :   /* Build a vector of registers related by a copy.  This is meant to
     987                 :             :      speed up the next step by avoiding full IL walks.  */
     988                 :      947488 :   struct copy_pair { rtx first; rtx second; };
     989                 :      947488 :   auto_vec<copy_pair> pairs;
     990                 :   117007026 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
     991                 :             :     {
     992                 :   116059538 :       if (!NONDEBUG_INSN_P (insn))
     993                 :    61851339 :         continue;
     994                 :             : 
     995                 :    54208199 :       rtx pat = PATTERN (insn);
     996                 :             : 
     997                 :             :       /* Simple copies to a REG from another REG or SUBREG of a REG.  */
     998                 :    54208199 :       if (GET_CODE (pat) == SET
     999                 :    42820278 :           && REG_P (SET_DEST (pat))
    1000                 :    30621575 :           && (REG_P (SET_SRC (pat))
    1001                 :    22140259 :               || (SUBREG_P (SET_SRC (pat))
    1002                 :      386440 :                   && REG_P (SUBREG_REG (SET_SRC (pat))))))
    1003                 :             :         {
    1004                 :      386003 :           rtx src = (REG_P (SET_SRC (pat))
    1005                 :     8867319 :                      ? SET_SRC (pat)
    1006                 :             :                      : SUBREG_REG (SET_SRC (pat)));
    1007                 :     8867319 :           pairs.safe_push ({ SET_DEST (pat), src });
    1008                 :             :         }
    1009                 :             : 
    1010                 :             :       /* Simple copies to a REG from another REG or SUBREG of a REG
    1011                 :             :          held inside a PARALLEL.  */
    1012                 :    54208199 :       if (GET_CODE (pat) == PARALLEL)
    1013                 :             :         {
    1014                 :    24466859 :           for (int i = XVECLEN (pat, 0) - 1; i >= 0; i--)
    1015                 :             :             {
    1016                 :    16420043 :               rtx elem = XVECEXP (pat, 0, i);
    1017                 :             : 
    1018                 :    16420043 :               if (GET_CODE (elem) == SET
    1019                 :     8252614 :                   && REG_P (SET_DEST (elem))
    1020                 :     8107409 :                   && (REG_P (SET_SRC (elem))
    1021                 :     8107409 :                       || (SUBREG_P (SET_SRC (elem))
    1022                 :           0 :                           && REG_P (SUBREG_REG (SET_SRC (elem))))))
    1023                 :             :                 {
    1024                 :           0 :                   rtx src = (REG_P (SET_SRC (elem))
    1025                 :           0 :                              ? SET_SRC (elem)
    1026                 :             :                              : SUBREG_REG (SET_SRC (elem)));
    1027                 :           0 :                   pairs.safe_push ({ SET_DEST (elem), src });
    1028                 :             :                 }
    1029                 :             :             }
    1030                 :     8046816 :           continue;
    1031                 :     8046816 :         }
    1032                 :             :     }
    1033                 :             : 
    1034                 :             :   /* Now we have a vector with copy pairs.  Iterate over that list
    1035                 :             :      updating CHANGED_PSEUDOS as we go.  Eliminate copies from the
    1036                 :             :      list as we go as they don't need further processing.  */
    1037                 :             :   bool changed = true;
    1038                 :     1895012 :   while (changed)
    1039                 :             :     {
    1040                 :             :       changed = false;
    1041                 :             :       unsigned int i;
    1042                 :             :       copy_pair *p;
    1043                 :    10762869 :       FOR_EACH_VEC_ELT (pairs, i, p)
    1044                 :             :         {
    1045                 :     8867857 :           if (bitmap_bit_p (changed_pseudos, REGNO (p->second))
    1046                 :     8867857 :               && bitmap_set_bit (changed_pseudos, REGNO (p->first)))
    1047                 :             :             {
    1048                 :          48 :               pairs.unordered_remove (i);
    1049                 :          48 :               changed = true;
    1050                 :             :             }
    1051                 :             :         }
    1052                 :             :     }
    1053                 :      947488 : }
    1054                 :             : 
    1055                 :             : /* We optimize away sign/zero extensions in this pass and replace
    1056                 :             :    them with SUBREGs indicating certain bits are don't cares.
    1057                 :             : 
    1058                 :             :    This changes the SUBREG_PROMOTED_VAR_P state of the object.
    1059                 :             :    It is fairly painful to fix this on the fly, so we have
    1060                 :             :    recorded which pseudos are affected and we look for SUBREGs
    1061                 :             :    of those pseudos and fix them up.  */
    1062                 :             : 
    1063                 :             : static void
    1064                 :      947488 : reset_subreg_promoted_p (void)
    1065                 :             : {
    1066                 :             :   /* This pass eliminates zero/sign extensions on pseudo regs found
    1067                 :             :      in CHANGED_PSEUDOS.  Elimination of those extensions changes if
    1068                 :             :      the pseudos are known to hold values extended to wider modes
    1069                 :             :      via SUBREG_PROMOTED_VAR.  So we wipe the SUBREG_PROMOTED_VAR
    1070                 :             :      state on all affected pseudos.
    1071                 :             : 
    1072                 :             :      But that is insufficient.  We might have a copy from one REG
    1073                 :             :      to another (possibly with the source register wrapped with a
    1074                 :             :      SUBREG).  We need to wipe SUBREG_PROMOTED_VAR on the transitive
    1075                 :             :      closure of the original CHANGED_PSEUDOS and registers they're
    1076                 :             :      connected to via copies.  So expand the set.  */
    1077                 :      947488 :   expand_changed_pseudos ();
    1078                 :             :     
    1079                 :             :   /* If we removed an extension, that changed the promoted state
    1080                 :             :      of the destination of that extension.  Thus we need to go
    1081                 :             :      find any SUBREGs that reference that pseudo and adjust their
    1082                 :             :      SUBREG_PROMOTED_P state.  */
    1083                 :   117007026 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
    1084                 :             :     {
    1085                 :   116059538 :       if (!NONDEBUG_INSN_P (insn))
    1086                 :    61851339 :         continue;
    1087                 :             : 
    1088                 :    54208199 :       rtx pat = PATTERN (insn);
    1089                 :    54208199 :       subrtx_var_iterator::array_type array;
    1090                 :   344879878 :       FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
    1091                 :             :         {
    1092                 :   290671679 :           rtx sub = *iter;
    1093                 :             : 
    1094                 :             :           /* We only care about SUBREGs.  */
    1095                 :   290671679 :           if (GET_CODE (sub) != SUBREG)
    1096                 :   289196250 :             continue;
    1097                 :             : 
    1098                 :     1475429 :           const_rtx x = SUBREG_REG (sub);
    1099                 :             : 
    1100                 :             :           /* We only care if the inner object is a REG.  */
    1101                 :     1475429 :           if (!REG_P (x))
    1102                 :         732 :             continue;
    1103                 :             : 
    1104                 :             :           /* And only if the SUBREG is a promoted var.  */
    1105                 :     1474697 :           if (!SUBREG_PROMOTED_VAR_P (sub))
    1106                 :     1469540 :             continue;
    1107                 :             : 
    1108                 :        5157 :           if (bitmap_bit_p (changed_pseudos, REGNO (x)))
    1109                 :           0 :             SUBREG_PROMOTED_VAR_P (sub) = 0;
    1110                 :             :         }
    1111                 :    54208199 :     }
    1112                 :      947488 : }
    1113                 :             : 
    1114                 :             : /* Initialization of the ext-dce pass.  Primarily this means
    1115                 :             :    setting up the various bitmaps we utilize.  */
    1116                 :             : 
    1117                 :             : static void
    1118                 :      947488 : ext_dce_init (void)
    1119                 :             : {
    1120                 :      947488 :   livein.create (last_basic_block_for_fn (cfun));
    1121                 :      947488 :   livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
    1122                 :    12494841 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1123                 :    11547353 :     bitmap_initialize (&livein[i], &bitmap_default_obstack);
    1124                 :             : 
    1125                 :      947488 :   auto_bitmap refs (&bitmap_default_obstack);
    1126                 :      947488 :   df_get_exit_block_use_set (refs);
    1127                 :             : 
    1128                 :      947488 :   unsigned i;
    1129                 :      947488 :   bitmap_iterator bi;
    1130                 :     4361648 :   EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
    1131                 :     3414160 :     make_reg_live (&livein[EXIT_BLOCK], i);
    1132                 :             : 
    1133                 :      947488 :   livenow = BITMAP_ALLOC (NULL);
    1134                 :      947488 :   all_blocks = BITMAP_ALLOC (NULL);
    1135                 :      947488 :   changed_pseudos = BITMAP_ALLOC (NULL);
    1136                 :             : 
    1137                 :    12494841 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1138                 :    11547353 :     if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
    1139                 :     9652377 :       bitmap_set_bit (all_blocks, i);
    1140                 :             : 
    1141                 :      947488 :   modify = false;
    1142                 :      947488 : }
    1143                 :             : 
    1144                 :             : /* Finalization of the ext-dce pass.  Primarily this means
    1145                 :             :    releasing up the various bitmaps we utilize.  */
    1146                 :             : 
    1147                 :             : static void
    1148                 :      947488 : ext_dce_finish (void)
    1149                 :             : {
    1150                 :    12494841 :   for (unsigned i = 0; i < livein.length (); i++)
    1151                 :    11547353 :     bitmap_clear (&livein[i]);
    1152                 :      947488 :   livein.release ();
    1153                 :             : 
    1154                 :      947488 :   BITMAP_FREE (livenow);
    1155                 :      947488 :   BITMAP_FREE (changed_pseudos);
    1156                 :      947488 :   BITMAP_FREE (all_blocks);
    1157                 :      947488 : }
    1158                 :             : 
    1159                 :             : /* Process block number BB_INDEX as part of the backward
    1160                 :             :    simple dataflow analysis.  Return TRUE if something in
    1161                 :             :    this block changed or FALSE otherwise.  */
    1162                 :             : 
    1163                 :             : static bool
    1164                 :    26313507 : ext_dce_rd_transfer_n (int bb_index)
    1165                 :             : {
    1166                 :             :   /* The ENTRY/EXIT blocks never change.  */
    1167                 :    26313507 :   if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
    1168                 :             :     return false;
    1169                 :             : 
    1170                 :    22523555 :   basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
    1171                 :             : 
    1172                 :             :   /* Make everything live that's live in the successors.  */
    1173                 :    22523555 :   bitmap_clear (livenow);
    1174                 :    22523555 :   edge_iterator ei;
    1175                 :    22523555 :   edge e;
    1176                 :             : 
    1177                 :    56672031 :   FOR_EACH_EDGE (e, ei, bb->succs)
    1178                 :    34148476 :     bitmap_ior_into (livenow, &livein[e->dest->index]);
    1179                 :             : 
    1180                 :    22523555 :   ext_dce_process_bb (bb);
    1181                 :             : 
    1182                 :             :   /* We only allow widening the set of objects live at the start
    1183                 :             :      of a block.  Otherwise we run the risk of not converging.  */
    1184                 :    22523555 :   return bitmap_ior_into (&livein[bb_index], livenow);
    1185                 :             : }
    1186                 :             : 
    1187                 :             : /* Dummy function for the df_simple_dataflow API.  */
    1188                 :    32779025 : static bool ext_dce_rd_confluence_n (edge) { return true; }
    1189                 :             : 
    1190                 :             : /* Use lifetime analyis to identify extensions that set bits that
    1191                 :             :    are never read.  Turn such extensions into SUBREGs instead which
    1192                 :             :    can often be propagated away.  */
    1193                 :             : 
    1194                 :             : void
    1195                 :      947488 : ext_dce_execute (void)
    1196                 :             : {
    1197                 :             :   /* Limit the amount of memory we use for livein, with 4 bits per
    1198                 :             :      reg per basic-block including overhead that maps to one byte
    1199                 :             :      per reg per basic-block.  */
    1200                 :      947488 :   uint64_t memory_request
    1201                 :      947488 :     = (uint64_t)n_basic_blocks_for_fn (cfun) * max_reg_num ();
    1202                 :      947488 :   if (memory_request / 1024 > (uint64_t)param_max_gcse_memory)
    1203                 :             :     {
    1204                 :           0 :       warning (OPT_Wdisabled_optimization,
    1205                 :             :                "ext-dce disabled: %d basic blocks and %d registers; "
    1206                 :             :                "increase %<--param max-gcse-memory%> above %wu",
    1207                 :           0 :                n_basic_blocks_for_fn (cfun), max_reg_num (),
    1208                 :             :                memory_request / 1024);
    1209                 :           0 :       return;
    1210                 :             :     }
    1211                 :             : 
    1212                 :             :   /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
    1213                 :             :      to this pass.  Clear it for those cases.  */
    1214                 :      947488 :   maybe_clear_subreg_promoted_p ();
    1215                 :      947488 :   df_analyze ();
    1216                 :      947488 :   ext_dce_init ();
    1217                 :             : 
    1218                 :     3789952 :   do
    1219                 :             :     {
    1220                 :     1894976 :       df_simple_dataflow (DF_BACKWARD, NULL, NULL,
    1221                 :             :                           ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
    1222                 :             :                           all_blocks, df_get_postorder (DF_BACKWARD),
    1223                 :             :                           df_get_n_blocks (DF_BACKWARD));
    1224                 :     1894976 :       modify = !modify;
    1225                 :             :     }
    1226                 :             :   while (modify);
    1227                 :             : 
    1228                 :      947488 :   reset_subreg_promoted_p ();
    1229                 :             : 
    1230                 :      947488 :   ext_dce_finish ();
    1231                 :             : }
    1232                 :             : 
    1233                 :             : 
    1234                 :             : namespace {
    1235                 :             : 
    1236                 :             : const pass_data pass_data_ext_dce =
    1237                 :             : {
    1238                 :             :   RTL_PASS, /* type */
    1239                 :             :   "ext_dce", /* name */
    1240                 :             :   OPTGROUP_NONE, /* optinfo_flags */
    1241                 :             :   TV_EXT_DCE, /* tv_id */
    1242                 :             :   PROP_cfglayout, /* properties_required */
    1243                 :             :   0, /* properties_provided */
    1244                 :             :   0, /* properties_destroyed */
    1245                 :             :   0, /* todo_flags_start */
    1246                 :             :   TODO_df_finish, /* todo_flags_finish */
    1247                 :             : };
    1248                 :             : 
    1249                 :             : class pass_ext_dce : public rtl_opt_pass
    1250                 :             : {
    1251                 :             : public:
    1252                 :      286682 :   pass_ext_dce (gcc::context *ctxt)
    1253                 :      573364 :     : rtl_opt_pass (pass_data_ext_dce, ctxt)
    1254                 :             :   {}
    1255                 :             : 
    1256                 :             :   /* opt_pass methods: */
    1257                 :     1455336 :   virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
    1258                 :      947488 :   virtual unsigned int execute (function *)
    1259                 :             :     {
    1260                 :      947488 :       ext_dce_execute ();
    1261                 :      947488 :       return 0;
    1262                 :             :     }
    1263                 :             : 
    1264                 :             : }; // class pass_combine
    1265                 :             : 
    1266                 :             : } // anon namespace
    1267                 :             : 
    1268                 :             : rtl_opt_pass *
    1269                 :      286682 : make_pass_ext_dce (gcc::context *ctxt)
    1270                 :             : {
    1271                 :      286682 :   return new pass_ext_dce (ctxt);
    1272                 :             : }
        

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.