LCOV - code coverage report
Current view: top level - gcc - ext-dce.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 85.1 % 415 353
Test Date: 2024-11-30 13:30:02 Functions: 94.7 % 19 18
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: - 0 0

             Branch data     Line data    Source code
       1                 :             : /* RTL dead zero/sign extension (code) elimination.
       2                 :             :    Copyright (C) 2000-2022 Free Software Foundation, Inc.
       3                 :             : 
       4                 :             : This file is part of GCC.
       5                 :             : 
       6                 :             : GCC is free software; you can redistribute it and/or modify it under
       7                 :             : the terms of the GNU General Public License as published by the Free
       8                 :             : Software Foundation; either version 3, or (at your option) any later
       9                 :             : version.
      10                 :             : 
      11                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14                 :             : for more details.
      15                 :             : 
      16                 :             : You should have received a copy of the GNU General Public License
      17                 :             : along with GCC; see the file COPYING3.  If not see
      18                 :             : <http://www.gnu.org/licenses/>.  */
      19                 :             : 
      20                 :             : #include "config.h"
      21                 :             : #include "system.h"
      22                 :             : #include "coretypes.h"
      23                 :             : #include "backend.h"
      24                 :             : #include "rtl.h"
      25                 :             : #include "tree.h"
      26                 :             : #include "memmodel.h"
      27                 :             : #include "insn-config.h"
      28                 :             : #include "emit-rtl.h"
      29                 :             : #include "recog.h"
      30                 :             : #include "cfganal.h"
      31                 :             : #include "tree-pass.h"
      32                 :             : #include "cfgrtl.h"
      33                 :             : #include "rtl-iter.h"
      34                 :             : #include "df.h"
      35                 :             : #include "print-rtl.h"
      36                 :             : #include "dbgcnt.h"
      37                 :             : 
      38                 :             : /* These should probably move into a C++ class.  */
      39                 :             : static vec<bitmap_head> livein;
      40                 :             : static bitmap all_blocks;
      41                 :             : static bitmap livenow;
      42                 :             : static bitmap changed_pseudos;
      43                 :             : static bool modify;
      44                 :             : 
      45                 :             : /* We consider four bit groups for liveness:
      46                 :             :    bit 0..7   (least significant byte)
      47                 :             :    bit 8..15  (second least significant byte)
      48                 :             :    bit 16..31
      49                 :             :    bit 32..BITS_PER_WORD-1  */
      50                 :             : 
      51                 :             : /* For the given REG, return the number of bit groups implied by the
      52                 :             :    size of the REG's mode, up to a maximum of 4 (number of bit groups
      53                 :             :    tracked by this pass).
      54                 :             : 
      55                 :             :    For partial integer and variable sized modes also return 4.  This
      56                 :             :    could possibly be refined for something like PSI mode, but it
      57                 :             :    does not seem worth the effort.  */
      58                 :             : 
      59                 :             : static int
      60                 :   219256017 : group_limit (const_rtx reg)
      61                 :             : {
      62                 :   219256017 :   machine_mode mode = GET_MODE (reg);
      63                 :             : 
      64                 :   219256017 :   if (!GET_MODE_BITSIZE (mode).is_constant ())
      65                 :             :     return 4;
      66                 :             : 
      67                 :   219256017 :   int size = GET_MODE_SIZE (mode).to_constant ();
      68                 :             : 
      69                 :   219256017 :   size = exact_log2 (size);
      70                 :             : 
      71                 :   219162159 :   if (size < 0)
      72                 :             :     return 4;
      73                 :             : 
      74                 :   219162159 :   size++;
      75                 :   219162159 :   return (size > 4 ? 4 : size);
      76                 :             : }
      77                 :             : 
      78                 :             : /* Make all bit groups live for REGNO in bitmap BMAP.  For hard regs,
      79                 :             :    we assume all groups are live.  For a pseudo we consider the size
      80                 :             :    of the pseudo to avoid creating unnecessarily live chunks of data.  */
      81                 :             : 
      82                 :             : static void
      83                 :     4681794 : make_reg_live (bitmap bmap, int regno)
      84                 :             : {
      85                 :     4681794 :   int limit;
      86                 :             : 
      87                 :             :   /* For pseudos we can use the mode to limit how many bit groups
      88                 :             :      are marked as live since a pseudo only has one mode.  Hard
      89                 :             :      registers have to be handled more conservatively.  */
      90                 :     4681794 :   if (regno > FIRST_PSEUDO_REGISTER)
      91                 :             :     {
      92                 :      883816 :       rtx reg = regno_reg_rtx[regno];
      93                 :      883816 :       limit = group_limit (reg);
      94                 :             :     }
      95                 :             :   else
      96                 :             :     limit = 4;
      97                 :             : 
      98                 :    23088644 :   for (int i = 0; i < limit; i++)
      99                 :    18406850 :     bitmap_set_bit (bmap, regno * 4 + i);
     100                 :     4681794 : }
     101                 :             : 
     102                 :             : /* Note this pass could be used to narrow memory loads too.  It's
     103                 :             :    not clear if that's profitable or not in general.  */
     104                 :             : 
     105                 :             : #define UNSPEC_P(X) (GET_CODE (X) == UNSPEC || GET_CODE (X) == UNSPEC_VOLATILE)
     106                 :             : 
     107                 :             : /* If we know the destination of CODE only uses some low bits
     108                 :             :    (say just the QI bits of an SI operation), then return true
     109                 :             :    if we can propagate the need for just the subset of bits
     110                 :             :    from the destination to the sources.
     111                 :             : 
     112                 :             :    FIXME: This is safe for operands 1 and 2 of an IF_THEN_ELSE, but not
     113                 :             :    operand 0.  Thus is likely would need some special casing to handle.  */
     114                 :             : 
     115                 :             : static bool
     116                 :   136097016 : safe_for_live_propagation (rtx_code code)
     117                 :             : {
     118                 :             :   /* First handle rtx classes which as a whole are known to
     119                 :             :      be either safe or unsafe.  */
     120                 :   136097016 :   switch (GET_RTX_CLASS (code))
     121                 :             :     {
     122                 :             :       case RTX_OBJ:
     123                 :             :       case RTX_CONST_OBJ:
     124                 :             :         return true;
     125                 :             : 
     126                 :             :       case RTX_COMPARE:
     127                 :             :       case RTX_COMM_COMPARE:
     128                 :             :       case RTX_TERNARY:
     129                 :             :         return false;
     130                 :             : 
     131                 :    70825329 :       default:
     132                 :    70825329 :         break;
     133                 :             :     }
     134                 :             : 
     135                 :             :   /* What's left are specific codes.  We only need to identify those
     136                 :             :      which are safe.   */
     137                 :    70825329 :   switch (code)
     138                 :             :     {
     139                 :             :     /* These are trivially safe.  */
     140                 :             :     case SUBREG:
     141                 :             :     case NOT:
     142                 :             :     case ZERO_EXTEND:
     143                 :             :     case SIGN_EXTEND:
     144                 :             :     case TRUNCATE:
     145                 :             :     case PLUS:
     146                 :             :     case MINUS:
     147                 :             :     case MULT:
     148                 :             :     case SMUL_HIGHPART:
     149                 :             :     case UMUL_HIGHPART:
     150                 :             :     case AND:
     151                 :             :     case IOR:
     152                 :             :     case XOR:
     153                 :             :       return true;
     154                 :             : 
     155                 :             :     /* We can propagate for the shifted operand, but not the shift
     156                 :             :        count.  The count is handled specially.  */
     157                 :             :     case ASHIFT:
     158                 :             :     case LSHIFTRT:
     159                 :             :     case ASHIFTRT:
     160                 :             :     case SS_ASHIFT:
     161                 :             :     case US_ASHIFT:
     162                 :             :       return true;
     163                 :             : 
     164                 :             :     /* There may be other safe codes.  If so they can be added
     165                 :             :        individually when discovered.  */
     166                 :             :     default:
     167                 :             :       return false;
     168                 :             :     }
     169                 :             : }
     170                 :             : 
     171                 :             : /* Clear bits in LIVENOW and set bits in LIVE_TMP for objects
     172                 :             :    set/clobbered by OBJ contained in INSN.
     173                 :             : 
     174                 :             :    Conceptually it is always safe to ignore a particular destination
     175                 :             :    here as that will result in more chunks of data being considered
     176                 :             :    live.  That's what happens when we "continue" the main loop when
     177                 :             :    we see something we don't know how to handle such as a vector
     178                 :             :    mode destination.
     179                 :             : 
     180                 :             :    The more accurate we are in identifying what objects (and chunks
     181                 :             :    within an object) are set by INSN, the more aggressive the
     182                 :             :    optimization phase during use handling will be.  */
     183                 :             : 
     184                 :             : static bool
     185                 :   132563480 : ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp)
     186                 :             : {
     187                 :   132563480 :   bool skipped_dest = false;
     188                 :             : 
     189                 :   132563480 :   subrtx_iterator::array_type array;
     190                 :   373671293 :   FOR_EACH_SUBRTX (iter, array, obj, NONCONST)
     191                 :             :     {
     192                 :   241107813 :       const_rtx x = *iter;
     193                 :             : 
     194                 :             :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
     195                 :   241107813 :       if (x == NULL_RTX)
     196                 :     9176496 :         continue;
     197                 :             : 
     198                 :   231931317 :       if (UNSPEC_P (x))
     199                 :      566300 :         continue;
     200                 :             : 
     201                 :   231365017 :       if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
     202                 :             :         {
     203                 :   137759811 :           unsigned bit = 0;
     204                 :   137759811 :           x = SET_DEST (x);
     205                 :             : 
     206                 :             :           /* We don't support vector destinations or destinations
     207                 :             :              wider than DImode.  */
     208                 :   137759811 :           scalar_int_mode outer_mode;
     209                 :   140371508 :           if (!is_a <scalar_int_mode> (GET_MODE (x), &outer_mode)
     210                 :    81352222 :               || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     211                 :             :             {
     212                 :             :               /* Skip the subrtxs of this destination.  There is
     213                 :             :                  little value in iterating into the subobjects, so
     214                 :             :                  just skip them for a bit of efficiency.  */
     215                 :    59019286 :               skipped_dest = true;
     216                 :    59019286 :               iter.skip_subrtxes ();
     217                 :   300127099 :               continue;
     218                 :             :             }
     219                 :             : 
     220                 :             :           /* We could have (strict_low_part (subreg ...)).  We can not just
     221                 :             :              strip the STRICT_LOW_PART as that would result in clearing
     222                 :             :              some bits in LIVENOW that are still live.  So process the
     223                 :             :              STRICT_LOW_PART specially.  */
     224                 :    78740525 :           if (GET_CODE (x) == STRICT_LOW_PART)
     225                 :             :             {
     226                 :           0 :               x = XEXP (x, 0);
     227                 :             : 
     228                 :             :               /* The only valid operand of a STRICT_LOW_PART is a non
     229                 :             :                  paradoxical SUBREG.  */
     230                 :           0 :               gcc_assert (SUBREG_P (x)
     231                 :             :                           && !paradoxical_subreg_p (x)
     232                 :             :                           && SUBREG_BYTE (x).is_constant ());
     233                 :             : 
     234                 :             :               /* I think we should always see a REG here.  But let's
     235                 :             :                  be sure.  */
     236                 :           0 :               gcc_assert (REG_P (SUBREG_REG (x)));
     237                 :             : 
     238                 :             :               /* The inner mode might be larger, just punt for
     239                 :             :                  that case.  Remember, we can not just continue to process
     240                 :             :                  the inner RTXs due to the STRICT_LOW_PART.  */
     241                 :           0 :               if (!is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     242                 :           0 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     243                 :             :                 {
     244                 :             :                   /* Skip the subrtxs of the STRICT_LOW_PART.  We can't
     245                 :             :                      process them because it'll set objects as no longer
     246                 :             :                      live when they are in fact still live.  */
     247                 :           0 :                   skipped_dest = true;
     248                 :           0 :                   iter.skip_subrtxes ();
     249                 :           0 :                   continue;
     250                 :             :                 }
     251                 :             : 
     252                 :             :               /* LIVE_TMP contains the set groups that are live-out and set in
     253                 :             :                  this insn.  It is used to narrow the groups live-in for the
     254                 :             :                  inputs of this insn.
     255                 :             : 
     256                 :             :                  The simple thing to do is mark all the groups as live, but
     257                 :             :                  that will significantly inhibit optimization.
     258                 :             : 
     259                 :             :                  We also need to be careful in the case where we have an in-out
     260                 :             :                  operand.  If we're not careful we'd clear LIVE_TMP
     261                 :             :                  incorrectly.  */
     262                 :           0 :               HOST_WIDE_INT rn = REGNO (SUBREG_REG (x));
     263                 :           0 :               int limit = group_limit (SUBREG_REG (x));
     264                 :           0 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     265                 :           0 :                 if (bitmap_bit_p (livenow, i))
     266                 :           0 :                   bitmap_set_bit (live_tmp, i);
     267                 :             : 
     268                 :           0 :               if (bitmap_empty_p (live_tmp))
     269                 :           0 :                 make_reg_live (live_tmp, rn);
     270                 :             : 
     271                 :             :               /* The mode of the SUBREG tells us how many bits we can
     272                 :             :                  clear.  */
     273                 :           0 :               machine_mode mode = GET_MODE (x);
     274                 :           0 :               HOST_WIDE_INT size
     275                 :           0 :                 = exact_log2 (GET_MODE_SIZE (mode).to_constant ()) + 1;
     276                 :           0 :               bitmap_clear_range (livenow, 4 * rn, size);
     277                 :             : 
     278                 :             :               /* We have fully processed this destination.  */
     279                 :           0 :               iter.skip_subrtxes ();
     280                 :           0 :               continue;
     281                 :           0 :             }
     282                 :             : 
     283                 :             :           /* Phase one of destination handling.  First remove any wrapper
     284                 :             :              such as SUBREG or ZERO_EXTRACT.  */
     285                 :    78740525 :           unsigned HOST_WIDE_INT mask
     286                 :    78740525 :             = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
     287                 :    78740525 :           if (SUBREG_P (x))
     288                 :             :             {
     289                 :             :               /* If we have a SUBREG destination that is too wide, just
     290                 :             :                  skip the destination rather than continuing this iterator.
     291                 :             :                  While continuing would be better, we'd need to strip the
     292                 :             :                  subreg and restart within the SET processing rather than
     293                 :             :                  the top of the loop which just complicates the flow even
     294                 :             :                  more.  */
     295                 :      669703 :               if (!is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (x)), &outer_mode)
     296                 :      547025 :                   || GET_MODE_BITSIZE (outer_mode) > HOST_BITS_PER_WIDE_INT)
     297                 :             :                 {
     298                 :      122678 :                   skipped_dest = true;
     299                 :      122678 :                   iter.skip_subrtxes ();
     300                 :      122678 :                   continue;
     301                 :             :                 }
     302                 :             : 
     303                 :             :               /* We can safely strip a paradoxical subreg.  The inner mode will
     304                 :             :                  be narrower than the outer mode.  We'll clear fewer bits in
     305                 :             :                  LIVENOW than we'd like, but that's always safe.  */
     306                 :      433473 :               if (paradoxical_subreg_p (x))
     307                 :             :                 x = XEXP (x, 0);
     308                 :      426189 :               else if (SUBREG_BYTE (x).is_constant ())
     309                 :             :                 {
     310                 :      426189 :                   bit = subreg_lsb (x).to_constant ();
     311                 :      426189 :                   mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
     312                 :      426189 :                   gcc_assert (mask);
     313                 :             :                   x = SUBREG_REG (x);
     314                 :             :                 }
     315                 :             :               else
     316                 :             :                 gcc_unreachable ();
     317                 :             :             }
     318                 :             : 
     319                 :    78617847 :           if (GET_CODE (x) == ZERO_EXTRACT)
     320                 :             :             {
     321                 :             :               /* Unlike a SUBREG destination, a set of a ZERO_EXTRACT only
     322                 :             :                  modifies the bits referenced in the ZERO_EXTRACT, the rest
     323                 :             :                  remain the same.  Thus we can not continue here, we must
     324                 :             :                  either figure out what part of the destination is modified
     325                 :             :                  or skip the sub-rtxs.  */
     326                 :        4206 :               skipped_dest = true;
     327                 :        4206 :               iter.skip_subrtxes ();
     328                 :        4206 :               continue;
     329                 :             :             }
     330                 :             : 
     331                 :             :           /* BIT >= 64 indicates something went horribly wrong.  */
     332                 :    78613641 :           gcc_assert (bit <= HOST_BITS_PER_WIDE_INT - 1);
     333                 :             : 
     334                 :             :           /* Now handle the actual object that was changed.  */
     335                 :    78613641 :           if (REG_P (x))
     336                 :             :             {
     337                 :             :               /* LIVE_TMP contains the set groups that are live-out and set in
     338                 :             :                  this insn.  It is used to narrow the groups live-in for the
     339                 :             :                  inputs of this insn.
     340                 :             : 
     341                 :             :                  The simple thing to do is mark all the groups as live, but
     342                 :             :                  that will significantly inhibit optimization.
     343                 :             : 
     344                 :             :                  We also need to be careful in the case where we have an in-out
     345                 :             :                  operand.  If we're not careful we'd clear LIVE_TMP
     346                 :             :                  incorrectly.  */
     347                 :    66142570 :               HOST_WIDE_INT rn = REGNO (x);
     348                 :    66142570 :               int limit = group_limit (x);
     349                 :   294782497 :               for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++)
     350                 :   228639927 :                 if (bitmap_bit_p (livenow, i))
     351                 :   221265436 :                   bitmap_set_bit (live_tmp, i);
     352                 :             : 
     353                 :    66142570 :               if (bitmap_empty_p (live_tmp))
     354                 :     1213641 :                 make_reg_live (live_tmp, rn);
     355                 :             : 
     356                 :             :               /* Now clear the bits known written by this instruction.
     357                 :             :                  Note that BIT need not be a power of two, consider a
     358                 :             :                  ZERO_EXTRACT destination.  */
     359                 :    66142570 :               int start = (bit < 8 ? 0 : bit < 16 ? 1 : bit < 32 ? 2 : 3);
     360                 :    66142570 :               int end = ((mask & ~HOST_WIDE_INT_UC (0xffffffff)) ? 4
     361                 :    25477627 :                          : (mask & HOST_WIDE_INT_UC (0xffff0000)) ? 3
     362                 :     5542621 :                          : (mask & 0xff00) ? 2 : 1);
     363                 :    66142570 :               bitmap_clear_range (livenow, 4 * rn + start, end - start);
     364                 :             :             }
     365                 :             :           /* Some ports generate (clobber (const_int)).  */
     366                 :    12471071 :           else if (CONST_INT_P (x))
     367                 :           0 :             continue;
     368                 :             :           else
     369                 :    12471071 :             gcc_assert (CALL_P (insn)
     370                 :             :                         || MEM_P (x)
     371                 :             :                         || x == pc_rtx
     372                 :             :                         || GET_CODE (x) == SCRATCH);
     373                 :             : 
     374                 :    78613641 :           iter.skip_subrtxes ();
     375                 :    78613641 :         }
     376                 :    93605206 :       else if (GET_CODE (x) == COND_EXEC)
     377                 :             :         {
     378                 :             :           /* This isn't ideal, but may not be so bad in practice.  */
     379                 :           0 :           skipped_dest = true;
     380                 :           0 :           iter.skip_subrtxes ();
     381                 :             :         }
     382                 :             :     }
     383                 :   132563480 :   return skipped_dest;
     384                 :   132563480 : }
     385                 :             : 
     386                 :             : /* INSN has a sign/zero extended source inside SET that we will
     387                 :             :    try to turn into a SUBREG.  */
     388                 :             : static void
     389                 :       28664 : ext_dce_try_optimize_insn (rtx_insn *insn, rtx set)
     390                 :             : {
     391                 :       28664 :   rtx src = SET_SRC (set);
     392                 :       28664 :   rtx inner = XEXP (src, 0);
     393                 :             : 
     394                 :             :   /* Avoid (subreg (mem)) and other constructs which may be valid RTL, but
     395                 :             :      not useful for this optimization.  */
     396                 :       28664 :   if (!(REG_P (inner) || (SUBREG_P (inner) && REG_P (SUBREG_REG (inner)))))
     397                 :             :     return;
     398                 :             : 
     399                 :       25915 :   rtx new_pattern;
     400                 :       25915 :   if (dump_file)
     401                 :             :     {
     402                 :           0 :       fprintf (dump_file, "Processing insn:\n");
     403                 :           0 :       dump_insn_slim (dump_file, insn);
     404                 :           0 :       fprintf (dump_file, "Trying to simplify pattern:\n");
     405                 :           0 :       print_rtl_single (dump_file, SET_SRC (set));
     406                 :             :     }
     407                 :             : 
     408                 :             :   /* We decided to turn do the optimization but allow it to be rejected for
     409                 :             :      bisection purposes.  */
     410                 :       25915 :   if (!dbg_cnt (::ext_dce))
     411                 :             :     {
     412                 :           0 :       if (dump_file)
     413                 :           0 :         fprintf (dump_file, "Rejected due to debug counter.\n");
     414                 :           0 :       return;
     415                 :             :     }
     416                 :             : 
     417                 :       51830 :   new_pattern = simplify_gen_subreg (GET_MODE (src), inner,
     418                 :       25915 :                                      GET_MODE (inner), 0);
     419                 :             :   /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL.
     420                 :             :      We must not pass that as a replacement pattern to validate_change.  */
     421                 :       25915 :   if (new_pattern)
     422                 :             :     {
     423                 :       25915 :       int ok = validate_change (insn, &SET_SRC (set), new_pattern, false);
     424                 :             : 
     425                 :       25915 :       rtx x = SET_DEST (set);
     426                 :       25915 :       while (SUBREG_P (x) || GET_CODE (x) == ZERO_EXTRACT)
     427                 :           0 :         x = XEXP (x, 0);
     428                 :             : 
     429                 :       25915 :       gcc_assert (REG_P (x));
     430                 :       25915 :       if (ok)
     431                 :       25915 :         bitmap_set_bit (changed_pseudos, REGNO (x));
     432                 :             : 
     433                 :       25915 :       if (dump_file)
     434                 :             :         {
     435                 :           0 :           if (ok)
     436                 :           0 :             fprintf (dump_file, "Successfully transformed to:\n");
     437                 :             :           else
     438                 :           0 :             fprintf (dump_file, "Failed transformation to:\n");
     439                 :             : 
     440                 :           0 :           print_rtl_single (dump_file, new_pattern);
     441                 :           0 :           fprintf (dump_file, "\n");
     442                 :             :         }
     443                 :             :     }
     444                 :             :   else
     445                 :             :     {
     446                 :           0 :       if (dump_file)
     447                 :           0 :         fprintf (dump_file, "Unable to generate valid SUBREG expression.\n");
     448                 :             :     }
     449                 :             : }
     450                 :             : 
     451                 :             : /* Some operators imply that their second operand is fully live,
     452                 :             :    regardless of how many bits in the output are live.  An example
     453                 :             :    would be the shift count on a target without SHIFT_COUNT_TRUNCATED
     454                 :             :    defined.
     455                 :             : 
     456                 :             :    Return TRUE if CODE is such an operator.  FALSE otherwise.  */
     457                 :             : 
     458                 :             : static bool
     459                 :    73495691 : binop_implies_op2_fully_live (rtx_code code)
     460                 :             : {
     461                 :           0 :   switch (code)
     462                 :             :     {
     463                 :             :     case ASHIFT:
     464                 :             :     case LSHIFTRT:
     465                 :             :     case ASHIFTRT:
     466                 :             :     case ROTATE:
     467                 :             :     case ROTATERT:
     468                 :             :     case SS_ASHIFT:
     469                 :             :     case US_ASHIFT:
     470                 :             :       return !SHIFT_COUNT_TRUNCATED;
     471                 :             : 
     472                 :           0 :     default:
     473                 :           0 :       return false;
     474                 :             :     }
     475                 :             : }
     476                 :             : 
     477                 :             : /* X, with code CODE, is an operation for which safe_for_live_propagation
     478                 :             :    holds true, and bits set in MASK are live in the result.  Compute a
     479                 :             :    mask of (potentially) live bits in the non-constant inputs.  In case of
     480                 :             :    binop_implies_op2_fully_live (e.g. shifts), the computed mask may
     481                 :             :    exclusively pertain to the first operand.
     482                 :             : 
     483                 :             :    This looks wrong as we may have some important operations embedded as
     484                 :             :    operands of another operation.  For example, we might have an extension
     485                 :             :    wrapping a shift.  It really feels like this needs to be recursing down
     486                 :             :    into operands much more often.  */
     487                 :             : 
     488                 :             : unsigned HOST_WIDE_INT
     489                 :    68713469 : carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
     490                 :             : {
     491                 :    70304077 :   if (mask == 0)
     492                 :             :     return 0;
     493                 :             : 
     494                 :    70304053 :   enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
     495                 :    70304053 :   unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
     496                 :             : 
     497                 :             :   /* While we don't try to optimize operations on types larger
     498                 :             :      than 64 bits, we do want to make sure not to invoke undefined
     499                 :             :      behavior when presented with such operations during use
     500                 :             :      processing.  The safe thing to do is to just return mmask
     501                 :             :      for that scenario indicating every possible chunk is life.  */
     502                 :    70304053 :   scalar_int_mode smode;
     503                 :    70304053 :   if (!is_a <scalar_int_mode> (mode, &smode)
     504                 :    58303359 :       || GET_MODE_BITSIZE (smode) > HOST_BITS_PER_WIDE_INT)
     505                 :             :     return mmask;
     506                 :             : 
     507                 :    56133196 :   switch (code)
     508                 :             :     {
     509                 :    15880999 :     case PLUS:
     510                 :    15880999 :     case MINUS:
     511                 :    15880999 :     case MULT:
     512                 :    15880999 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     513                 :             : 
     514                 :             :     /* We propagate for the shifted operand, but not the shift
     515                 :             :        count.  The count is handled specially.  */
     516                 :     1224137 :     case ASHIFT:
     517                 :     1224137 :       if (CONST_INT_P (XEXP (x, 1))
     518                 :     2379667 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     519                 :     1155502 :         return (HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1));
     520                 :       68635 :       return (HOST_WIDE_INT_UC (2) << floor_log2 (mask)) - 1;
     521                 :             : 
     522                 :             :     /* We propagate for the shifted operand, but not the shift
     523                 :             :        count.  The count is handled specially.  */
     524                 :      612570 :     case LSHIFTRT:
     525                 :      612570 :       if (CONST_INT_P (XEXP (x, 1))
     526                 :     1193911 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     527                 :      581313 :         return mmask & (mask << INTVAL (XEXP (x, 1)));
     528                 :             :       return mmask;
     529                 :             : 
     530                 :             :     /* We propagate for the shifted operand, but not the shift
     531                 :             :        count.  The count is handled specially.  */
     532                 :      241792 :     case ASHIFTRT:
     533                 :      241792 :       if (CONST_INT_P (XEXP (x, 1))
     534                 :      471862 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     535                 :             :         {
     536                 :      230062 :           HOST_WIDE_INT sign = 0;
     537                 :      230062 :           if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
     538                 :      230062 :               > GET_MODE_BITSIZE (smode))
     539                 :      460124 :             sign = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (smode) - 1);
     540                 :      230062 :           return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
     541                 :             :         }
     542                 :             :       return mmask;
     543                 :             : 
     544                 :       45123 :     case SMUL_HIGHPART:
     545                 :       45123 :     case UMUL_HIGHPART:
     546                 :       45123 :       if (XEXP (x, 1) == const0_rtx)
     547                 :             :         return 0;
     548                 :       45123 :       if (XEXP (x, 1) == const1_rtx)
     549                 :             :         return mmask;
     550                 :       45123 :       if (CONST_INT_P (XEXP (x, 1)))
     551                 :             :         {
     552                 :           0 :           if (pow2p_hwi (INTVAL (XEXP (x, 1))))
     553                 :           0 :             return mmask & (mask << (GET_MODE_BITSIZE (smode)
     554                 :           0 :                                      - exact_log2 (INTVAL (XEXP (x, 1)))));
     555                 :             : 
     556                 :           0 :           int bits = (HOST_BITS_PER_WIDE_INT + GET_MODE_BITSIZE (smode)
     557                 :           0 :                       - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
     558                 :           0 :           if (bits < GET_MODE_BITSIZE (smode))
     559                 :           0 :             return (HOST_WIDE_INT_1U << bits) - 1;
     560                 :             :         }
     561                 :             :       return mmask;
     562                 :             : 
     563                 :      648796 :     case SIGN_EXTEND:
     564                 :      648796 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     565                 :      648796 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     566                 :             :         return -1;
     567                 :             : 
     568                 :             :       /* We want the mode of the inner object.  We need to ensure its
     569                 :             :          sign bit is on in MASK.  */
     570                 :      648796 :       mode = GET_MODE_INNER (GET_MODE (XEXP (x, 0)));
     571                 :      648796 :       if (mask & ~GET_MODE_MASK (mode))
     572                 :      644339 :         mask |= HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode).to_constant ()
     573                 :      644339 :                                      - 1);
     574                 :             : 
     575                 :             :       /* Recurse into the operand.  */
     576                 :      648796 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
     577                 :             : 
     578                 :      941812 :     case ZERO_EXTEND:
     579                 :      941812 :       if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     580                 :      941812 :           || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
     581                 :             :         return -1;
     582                 :             : 
     583                 :             :       /* Recurse into the operand.  */
     584                 :      941812 :       return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
     585                 :             : 
     586                 :             :     /* We propagate for the shifted operand, but not the shift
     587                 :             :        count.  The count is handled specially.  */
     588                 :           0 :     case SS_ASHIFT:
     589                 :           0 :     case US_ASHIFT:
     590                 :           0 :       if (CONST_INT_P (XEXP (x, 1))
     591                 :           0 :           && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (smode))
     592                 :             :         {
     593                 :           0 :           return ((mmask & ~((unsigned HOST_WIDE_INT) mmask
     594                 :           0 :                              >> (INTVAL (XEXP (x, 1))
     595                 :           0 :                                  + (XEXP (x, 1) != const0_rtx
     596                 :           0 :                                     && code == SS_ASHIFT))))
     597                 :           0 :                   | ((HOST_WIDE_INT) mask >> INTVAL (XEXP (x, 1))));
     598                 :             :         }
     599                 :             :       return mmask;
     600                 :             : 
     601                 :             :     default:
     602                 :             :       return mask;
     603                 :             :     }
     604                 :             : }
     605                 :             : 
     606                 :             : /* Process uses in INSN contained in OBJ.  Set appropriate bits in LIVENOW
     607                 :             :    for any chunks of pseudos that become live, potentially filtering using
     608                 :             :    bits from LIVE_TMP.
     609                 :             : 
     610                 :             :    If MODIFY is true, then optimize sign/zero extensions to SUBREGs when
     611                 :             :    the extended bits are never read and mark pseudos which had extensions
     612                 :             :    eliminated in CHANGED_PSEUDOS.  */
     613                 :             : 
     614                 :             : static void
     615                 :   132563480 : ext_dce_process_uses (rtx_insn *insn, rtx obj,
     616                 :             :                       bitmap live_tmp, bool skipped_dest)
     617                 :             : {
     618                 :   132563480 :   subrtx_var_iterator::array_type array_var;
     619                 :   720209793 :   FOR_EACH_SUBRTX_VAR (iter, array_var, obj, NONCONST)
     620                 :             :     {
     621                 :             :       /* An EXPR_LIST (from call fusage) ends in NULL_RTX.  */
     622                 :   587646313 :       rtx x = *iter;
     623                 :   587646313 :       if (x == NULL_RTX)
     624                 :     9176496 :         continue;
     625                 :             : 
     626                 :             :       /* So the basic idea in this FOR_EACH_SUBRTX_VAR loop is to
     627                 :             :          handle SETs explicitly, possibly propagating live information
     628                 :             :          into the uses.
     629                 :             : 
     630                 :             :          We may continue the loop at various points which will cause
     631                 :             :          iteration into the next level of RTL.  Breaking from the loop
     632                 :             :          is never safe as it can lead us to fail to process some of the
     633                 :             :          RTL and thus not make objects live when necessary.  */
     634                 :   578469817 :       enum rtx_code xcode = GET_CODE (x);
     635                 :   578469817 :       if (xcode == SET)
     636                 :             :         {
     637                 :   116942395 :           const_rtx dst = SET_DEST (x);
     638                 :   116942395 :           rtx src = SET_SRC (x);
     639                 :   116942395 :           const_rtx y;
     640                 :   116942395 :           unsigned HOST_WIDE_INT bit = 0;
     641                 :             : 
     642                 :             :           /* The code of the RHS of a SET.  */
     643                 :   116942395 :           enum rtx_code code = GET_CODE (src);
     644                 :             : 
     645                 :             :           /* ?!? How much of this should mirror SET handling, potentially
     646                 :             :              being shared?   */
     647                 :   116942395 :           if (SUBREG_P (dst) && SUBREG_BYTE (dst).is_constant ())
     648                 :             :             {
     649                 :      601500 :               bit = subreg_lsb (dst).to_constant ();
     650                 :      601500 :               if (bit >= HOST_BITS_PER_WIDE_INT)
     651                 :             :                 bit = HOST_BITS_PER_WIDE_INT - 1;
     652                 :      601500 :               dst = SUBREG_REG (dst);
     653                 :             :             }
     654                 :   116340895 :           else if (GET_CODE (dst) == STRICT_LOW_PART)
     655                 :       11308 :             dst = XEXP (dst, 0);
     656                 :             : 
     657                 :             :           /* Main processing of the uses.  Two major goals here.
     658                 :             : 
     659                 :             :              First, we want to try and propagate liveness (or the lack
     660                 :             :              thereof) from the destination register to the source
     661                 :             :              register(s).
     662                 :             : 
     663                 :             :              Second, if the source is an extension, try to optimize
     664                 :             :              it into a SUBREG.  The SUBREG form indicates we don't
     665                 :             :              care about the upper bits and will usually be copy
     666                 :             :              propagated away.
     667                 :             : 
     668                 :             :              If we fail to handle something in here, the expectation
     669                 :             :              is the iterator will dive into the sub-components and
     670                 :             :              mark all the chunks in any found REGs as live.  */
     671                 :   116942395 :           if (REG_P (dst) && safe_for_live_propagation (code))
     672                 :             :             {
     673                 :             :               /* Create a mask representing the bits of this output
     674                 :             :                  operand that are live after this insn.  We can use
     675                 :             :                  this information to refine the live in state of
     676                 :             :                  inputs to this insn in many cases.
     677                 :             : 
     678                 :             :                  We have to do this on a per SET basis, we might have
     679                 :             :                  an INSN with multiple SETS, some of which can narrow
     680                 :             :                  the source operand liveness, some of which may not.  */
     681                 :    68713469 :               unsigned HOST_WIDE_INT dst_mask = 0;
     682                 :    68713469 :               HOST_WIDE_INT rn = REGNO (dst);
     683                 :    68713469 :               unsigned HOST_WIDE_INT mask_array[]
     684                 :             :                 = { 0xff, 0xff00, HOST_WIDE_INT_UC (0xffff0000),
     685                 :             :                     -HOST_WIDE_INT_UC (0x100000000) };
     686                 :   343567345 :               for (int i = 0; i < 4; i++)
     687                 :   274853876 :                 if (bitmap_bit_p (live_tmp, 4 * rn + i))
     688                 :   208881444 :                   dst_mask |= mask_array[i];
     689                 :    68713469 :               dst_mask >>= bit;
     690                 :             : 
     691                 :             :               /* If we ignored a destination during set processing, then
     692                 :             :                  consider all the bits live.  */
     693                 :    68713469 :               if (skipped_dest)
     694                 :    27102935 :                 dst_mask = -1;
     695                 :             : 
     696                 :    68713469 :               dst_mask = carry_backpropagate (dst_mask, code, src);
     697                 :             : 
     698                 :             :               /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
     699                 :             :                  of the source specially to improve optimization.  */
     700                 :    68713469 :               if (code == SIGN_EXTEND || code == ZERO_EXTEND)
     701                 :             :                 {
     702                 :     1603045 :                   rtx inner = XEXP (src, 0);
     703                 :     1603045 :                   unsigned HOST_WIDE_INT src_mask
     704                 :     1603045 :                     = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
     705                 :             : 
     706                 :             :                   /* DST_MASK could be zero if we had something in the SET
     707                 :             :                      that we couldn't handle.  */
     708                 :     1603045 :                   if (modify && !skipped_dest && (dst_mask & ~src_mask) == 0)
     709                 :       28664 :                     ext_dce_try_optimize_insn (insn, x);
     710                 :             : 
     711                 :             :                   /* Stripping the extension here just seems wrong on multiple
     712                 :             :                      levels.  It's source side handling, so it seems like it
     713                 :             :                      belongs in the loop below.  Stripping here also makes it
     714                 :             :                      harder than necessary to properly handle live bit groups
     715                 :             :                      for (ANY_EXTEND (SUBREG)) where the SUBREG has
     716                 :             :                      SUBREG_PROMOTED state.  */
     717                 :     1603045 :                   dst_mask &= src_mask;
     718                 :     1603045 :                   src = XEXP (src, 0);
     719                 :     1603045 :                   code = GET_CODE (src);
     720                 :             :                 }
     721                 :             : 
     722                 :             :               /* Optimization is done at this point.  We just want to make
     723                 :             :                  sure everything that should get marked as live is marked
     724                 :             :                  from here onward.  */
     725                 :             : 
     726                 :             :               /* We will handle the other operand of a binary operator
     727                 :             :                  at the bottom of the loop by resetting Y.  */
     728                 :    68713469 :               if (BINARY_P (src))
     729                 :    21600432 :                 y = XEXP (src, 0);
     730                 :             :               else
     731                 :             :                 y = src;
     732                 :             : 
     733                 :             :               /* We're inside a SET and want to process the source operands
     734                 :             :                  making things live.  Breaking from this loop will cause
     735                 :             :                  the iterator to work on sub-rtxs, so it is safe to break
     736                 :             :                  if we see something we don't know how to handle.
     737                 :             : 
     738                 :             :                  This code is just hokey as it really just handles trivial
     739                 :             :                  unary and binary cases.  Otherwise the loop exits and we
     740                 :             :                  continue iterating on sub-rtxs, but outside the set context.  */
     741                 :             :               unsigned HOST_WIDE_INT save_mask = dst_mask;
     742                 :   110764467 :               for (;;)
     743                 :             :                 {
     744                 :             :                   /* In general we want to restore DST_MASK before each loop
     745                 :             :                      iteration.  The exception is when the opcode implies that
     746                 :             :                      the other operand is fully live.  That's handled by
     747                 :             :                      changing SAVE_MASK below.  */
     748                 :    89738968 :                   dst_mask = save_mask;
     749                 :             :                   /* Strip an outer paradoxical subreg.  The bits outside
     750                 :             :                      the inner mode are don't cares.  So we can just strip
     751                 :             :                      and process the inner object.  */
     752                 :    89738968 :                   if (paradoxical_subreg_p (y))
     753                 :       79661 :                     y = XEXP (y, 0);
     754                 :    89659307 :                   else if (SUBREG_P (y) && SUBREG_BYTE (y).is_constant ())
     755                 :             :                     {
     756                 :             :                       /* We really want to know the outer code here, ie do we
     757                 :             :                          have (ANY_EXTEND (SUBREG ...)) as we need to know if
     758                 :             :                          the extension matches the SUBREG_PROMOTED state.  In
     759                 :             :                          that case optimizers can turn the extension into a
     760                 :             :                          simple copy.  Which means that bits outside the
     761                 :             :                          SUBREG's mode are actually live.
     762                 :             : 
     763                 :             :                          We don't want to mark those bits live unnecessarily
     764                 :             :                          as that inhibits extension elimination in important
     765                 :             :                          cases such as those in Coremark.  So we need that
     766                 :             :                          outer code.  */
     767                 :     1973982 :                       if (!REG_P (SUBREG_REG (y))
     768                 :     1973982 :                           || (SUBREG_PROMOTED_VAR_P (y)
     769                 :       11834 :                               && ((GET_CODE (SET_SRC (x)) == SIGN_EXTEND
     770                 :        1160 :                                    && SUBREG_PROMOTED_SIGNED_P (y))
     771                 :       11834 :                                   || (GET_CODE (SET_SRC (x)) == ZERO_EXTEND
     772                 :           0 :                                       && SUBREG_PROMOTED_UNSIGNED_P (y)))))
     773                 :             :                         break;
     774                 :             : 
     775                 :     1973036 :                       bit = subreg_lsb (y).to_constant ();
     776                 :             : 
     777                 :             :                       /* If this is a wide object (more bits than we can fit
     778                 :             :                          in a HOST_WIDE_INT), then just break from the SET
     779                 :             :                          context.   That will cause the iterator to walk down
     780                 :             :                          into the subrtx and if we land on a REG we'll mark
     781                 :             :                          the whole think live.  */
     782                 :     1973036 :                       if (bit >= HOST_BITS_PER_WIDE_INT)
     783                 :             :                         break;
     784                 :             : 
     785                 :             :                       /* The SUBREG's mode determines the live width.  */
     786                 :     1749727 :                       if (dst_mask)
     787                 :             :                         {
     788                 :     1749727 :                           dst_mask <<= bit;
     789                 :     1749727 :                           if (!dst_mask)
     790                 :           0 :                             dst_mask = -HOST_WIDE_INT_UC (0x100000000);
     791                 :             :                         }
     792                 :     1749727 :                       y = SUBREG_REG (y);
     793                 :             :                     }
     794                 :             : 
     795                 :    89514713 :                   if (REG_P (y))
     796                 :             :                     {
     797                 :             :                       /* We have found the use of a register.  We need to mark
     798                 :             :                          the appropriate chunks of the register live.  The mode
     799                 :             :                          of the REG is a starting point.  We may refine that
     800                 :             :                          based on what chunks in the output were live.  */
     801                 :    47057137 :                       rn = 4 * REGNO (y);
     802                 :    47057137 :                       unsigned HOST_WIDE_INT tmp_mask = dst_mask;
     803                 :             : 
     804                 :             :                       /* If the RTX code for the SET_SRC is not one we can
     805                 :             :                          propagate destination liveness through, then just
     806                 :             :                          set the mask to the mode's mask.  */
     807                 :    47057137 :                       if (!safe_for_live_propagation (code))
     808                 :       32360 :                         tmp_mask
     809                 :       64720 :                           = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
     810                 :             : 
     811                 :    47057137 :                       if (tmp_mask & 0xff)
     812                 :    46638100 :                         bitmap_set_bit (livenow, rn);
     813                 :    47057137 :                       if (tmp_mask & 0xff00)
     814                 :    45223953 :                         bitmap_set_bit (livenow, rn + 1);
     815                 :    47057137 :                       if (tmp_mask & HOST_WIDE_INT_UC (0xffff0000))
     816                 :    44959856 :                         bitmap_set_bit (livenow, rn + 2);
     817                 :    47057137 :                       if (tmp_mask & -HOST_WIDE_INT_UC (0x100000000))
     818                 :    38738496 :                         bitmap_set_bit (livenow, rn + 3);
     819                 :             :                     }
     820                 :    42457576 :                   else if (!CONSTANT_P (y))
     821                 :             :                     break;
     822                 :             : 
     823                 :             :                   /* We might have (ashift (const_int 1) (reg...))
     824                 :             :                      By setting dst_mask we can continue iterating on the
     825                 :             :                      the next operand and it will be considered fully live.
     826                 :             : 
     827                 :             :                      Note that since we restore DST_MASK from SAVE_MASK at the
     828                 :             :                      top of the loop, we have to change SAVE_MASK to get the
     829                 :             :                      semantics we want.  */
     830                 :    73495691 :                   if (binop_implies_op2_fully_live (GET_CODE (src)))
     831                 :     2183900 :                     save_mask = -1;
     832                 :             : 
     833                 :             :                   /* If this was anything but a binary operand, break the inner
     834                 :             :                      loop.  This is conservatively correct as it will cause the
     835                 :             :                      iterator to look at the sub-rtxs outside the SET context.  */
     836                 :    73495691 :                   if (!BINARY_P (src))
     837                 :             :                     break;
     838                 :             : 
     839                 :             :                   /* We processed the first operand of a binary operator.  Now
     840                 :             :                      handle the second.  */
     841                 :    21025499 :                   y = XEXP (src, 1), src = pc_rtx;
     842                 :    21025499 :                 }
     843                 :             : 
     844                 :             :               /* These are leaf nodes, no need to iterate down into them.  */
     845                 :    68713469 :               if (REG_P (y) || CONSTANT_P (y))
     846                 :    52470192 :                 iter.skip_subrtxes ();
     847                 :             :             }
     848                 :             :         }
     849                 :             :       /* If we are reading the low part of a SUBREG, then we can
     850                 :             :          refine liveness of the input register, otherwise let the
     851                 :             :          iterator continue into SUBREG_REG.  */
     852                 :   461527422 :       else if (SUBREG_P (x)
     853                 :     1340408 :                && REG_P (SUBREG_REG (x))
     854                 :     1338654 :                && !paradoxical_subreg_p (x)
     855                 :     1314115 :                && subreg_lowpart_p (x)
     856                 :     1021303 :                && GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
     857                 :   463570028 :                && GET_MODE_BITSIZE (GET_MODE (x)).to_constant () <= 32)
     858                 :             :         {
     859                 :      547447 :           HOST_WIDE_INT size = GET_MODE_BITSIZE (GET_MODE (x)).to_constant ();
     860                 :      547447 :           HOST_WIDE_INT rn = 4 * REGNO (SUBREG_REG (x));
     861                 :             : 
     862                 :             :           /* If this is a promoted subreg, then more of it may be live than
     863                 :             :              is otherwise obvious.  */
     864                 :      547447 :           if (SUBREG_PROMOTED_VAR_P (x))
     865                 :        3674 :             size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))).to_constant ();
     866                 :             : 
     867                 :      547447 :           bitmap_set_bit (livenow, rn);
     868                 :      547447 :           if (size > 8)
     869                 :      312972 :             bitmap_set_bit (livenow, rn + 1);
     870                 :      312972 :           if (size > 16)
     871                 :      255755 :             bitmap_set_bit (livenow, rn + 2);
     872                 :      255755 :           if (size >= 32)
     873                 :      255755 :             bitmap_set_bit (livenow, rn + 3);
     874                 :      547447 :           iter.skip_subrtxes ();
     875                 :             :         }
     876                 :             :       /* If we have a register reference that is not otherwise handled,
     877                 :             :          just assume all the chunks are live.  */
     878                 :   460979975 :       else if (REG_P (x))
     879                 :   152229631 :         bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x));
     880                 :             :     }
     881                 :   132563480 : }
     882                 :             : 
     883                 :             : /* Process a single basic block BB with current liveness information
     884                 :             :    in LIVENOW, returning updated liveness information.
     885                 :             : 
     886                 :             :    If MODIFY is true, then this is the last pass and unnecessary
     887                 :             :    extensions should be eliminated when possible.  If an extension
     888                 :             :    is removed, the source pseudo is marked in CHANGED_PSEUDOS.  */
     889                 :             : 
     890                 :             : static void
     891                 :    22436947 : ext_dce_process_bb (basic_block bb)
     892                 :             : {
     893                 :    22436947 :   rtx_insn *insn;
     894                 :             : 
     895                 :   274680628 :   FOR_BB_INSNS_REVERSE (bb, insn)
     896                 :             :     {
     897                 :   381100378 :       if (!NONDEBUG_INSN_P (insn))
     898                 :   128856697 :         continue;
     899                 :             : 
     900                 :             :       /* Live-out state of the destination of this insn.  We can
     901                 :             :          use this to refine the live-in state of the sources of
     902                 :             :          this insn in many cases.  */
     903                 :   123386984 :       bitmap live_tmp = BITMAP_ALLOC (NULL);
     904                 :             : 
     905                 :             :       /* First process any sets/clobbers in INSN.  */
     906                 :   123386984 :       bool skipped_dest = ext_dce_process_sets (insn, PATTERN (insn), live_tmp);
     907                 :             : 
     908                 :             :       /* CALL_INSNs need processing their fusage data.  */
     909                 :   123386984 :       if (CALL_P (insn))
     910                 :     9176496 :         skipped_dest |= ext_dce_process_sets (insn,
     911                 :             :                                               CALL_INSN_FUNCTION_USAGE (insn),
     912                 :             :                                               live_tmp);
     913                 :             : 
     914                 :             :       /* And now uses, optimizing away SIGN/ZERO extensions as we go.  */
     915                 :   123386984 :       ext_dce_process_uses (insn, PATTERN (insn), live_tmp, skipped_dest);
     916                 :             : 
     917                 :             :       /* A nonlocal goto implicitly uses the frame pointer.  */
     918                 :   123386984 :       if (JUMP_P (insn) && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
     919                 :             :         {
     920                 :        1130 :           bitmap_set_range (livenow, FRAME_POINTER_REGNUM * 4, 4);
     921                 :        1130 :           if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
     922                 :        1130 :             bitmap_set_range (livenow, HARD_FRAME_POINTER_REGNUM * 4, 4);
     923                 :             :         }
     924                 :             : 
     925                 :             :       /* And process fusage data for the use as well.  */
     926                 :   123386984 :       if (CALL_P (insn))
     927                 :             :         {
     928                 :     9176496 :           if (!FAKE_CALL_P (insn))
     929                 :     9176436 :             bitmap_set_range (livenow, STACK_POINTER_REGNUM * 4, 4);
     930                 :             : 
     931                 :             :           /* If this is not a call to a const fucntion, then assume it
     932                 :             :              can read any global register.  */
     933                 :     9176496 :           if (!RTL_CONST_CALL_P (insn))
     934                 :   824983470 :             for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     935                 :   816112680 :               if (global_regs[i])
     936                 :         229 :                 bitmap_set_range (livenow, i * 4, 4);
     937                 :             : 
     938                 :     9176496 :           ext_dce_process_uses (insn, CALL_INSN_FUNCTION_USAGE (insn), live_tmp, false);
     939                 :             :         }
     940                 :             : 
     941                 :   123386984 :       BITMAP_FREE (live_tmp);
     942                 :             :     }
     943                 :    22436947 : }
     944                 :             : 
     945                 :             : /* SUBREG_PROMOTED_VAR_P is set by the gimple->rtl optimizers and
     946                 :             :    is usually helpful.  However, in some cases setting the value when
     947                 :             :    it not strictly needed can cause this pass to miss optimizations.
     948                 :             : 
     949                 :             :    Specifically consider (set (mem) (subreg (reg))).  If set in that
     950                 :             :    case it will cause more bit groups to be live for REG than would
     951                 :             :    be strictly necessary which in turn can inhibit extension removal.
     952                 :             : 
     953                 :             :    So do a pass over the IL wiping the SUBREG_PROMOTED_VAR_P when it
     954                 :             :    is obviously not needed.  */
     955                 :             : 
     956                 :             : static void
     957                 :      968946 : maybe_clear_subreg_promoted_p (void)
     958                 :             : {
     959                 :   108623150 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
     960                 :             :     {
     961                 :   107654204 :       if (!NONDEBUG_INSN_P (insn))
     962                 :    54783569 :         continue;
     963                 :             : 
     964                 :    52870635 :       rtx set = single_set (insn);
     965                 :    52870635 :       if (!set)
     966                 :     3661124 :         continue;
     967                 :             : 
     968                 :             :       /* There may be other cases where we should clear, but for
     969                 :             :          now, this is the only known case where it causes problems.  */
     970                 :    49209511 :       if (MEM_P (SET_DEST (set)) && SUBREG_P (SET_SRC (set))
     971                 :       66330 :         && GET_MODE (SET_DEST (set)) <= GET_MODE (SUBREG_REG (SET_SRC (set))))
     972                 :       57874 :         SUBREG_PROMOTED_VAR_P (SET_SRC (set)) = 0;
     973                 :             :     }
     974                 :      968946 : }
     975                 :             : 
     976                 :             : 
     977                 :             : /* We optimize away sign/zero extensions in this pass and replace
     978                 :             :    them with SUBREGs indicating certain bits are don't cares.
     979                 :             : 
     980                 :             :    This changes the SUBREG_PROMOTED_VAR_P state of the object.
     981                 :             :    It is fairly painful to fix this on the fly, so we have
     982                 :             :    recorded which pseudos are affected and we look for SUBREGs
     983                 :             :    of those pseudos and fix them up.  */
     984                 :             : 
     985                 :             : static void
     986                 :      968946 : reset_subreg_promoted_p (void)
     987                 :             : {
     988                 :             :   /* If we removed an extension, that changed the promoted state
     989                 :             :      of the destination of that extension.  Thus we need to go
     990                 :             :      find any SUBREGs that reference that pseudo and adjust their
     991                 :             :      SUBREG_PROMOTED_P state.  */
     992                 :   108623150 :   for (rtx_insn *insn = get_insns(); insn; insn = NEXT_INSN (insn))
     993                 :             :     {
     994                 :   107654204 :       if (!NONDEBUG_INSN_P (insn))
     995                 :    54783569 :         continue;
     996                 :             : 
     997                 :    52870635 :       rtx pat = PATTERN (insn);
     998                 :    52870635 :       subrtx_var_iterator::array_type array;
     999                 :   337424828 :       FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
    1000                 :             :         {
    1001                 :   284554193 :           rtx sub = *iter;
    1002                 :             : 
    1003                 :             :           /* We only care about SUBREGs.  */
    1004                 :   284554193 :           if (GET_CODE (sub) != SUBREG)
    1005                 :   283017495 :             continue;
    1006                 :             : 
    1007                 :     1536698 :           const_rtx x = SUBREG_REG (sub);
    1008                 :             : 
    1009                 :             :           /* We only care if the inner object is a REG.  */
    1010                 :     1536698 :           if (!REG_P (x))
    1011                 :         729 :             continue;
    1012                 :             : 
    1013                 :             :           /* And only if the SUBREG is a promoted var.  */
    1014                 :     1535969 :           if (!SUBREG_PROMOTED_VAR_P (sub))
    1015                 :     1531002 :             continue;
    1016                 :             : 
    1017                 :        4967 :           if (bitmap_bit_p (changed_pseudos, REGNO (x)))
    1018                 :           0 :             SUBREG_PROMOTED_VAR_P (sub) = 0;
    1019                 :             :         }
    1020                 :    52870635 :     }
    1021                 :      968946 : }
    1022                 :             : 
    1023                 :             : /* Initialization of the ext-dce pass.  Primarily this means
    1024                 :             :    setting up the various bitmaps we utilize.  */
    1025                 :             : 
    1026                 :             : static void
    1027                 :      968946 : ext_dce_init (void)
    1028                 :             : {
    1029                 :      968946 :   livein.create (last_basic_block_for_fn (cfun));
    1030                 :      968946 :   livein.quick_grow_cleared (last_basic_block_for_fn (cfun));
    1031                 :    12538255 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1032                 :    11569309 :     bitmap_initialize (&livein[i], &bitmap_default_obstack);
    1033                 :             : 
    1034                 :      968946 :   auto_bitmap refs (&bitmap_default_obstack);
    1035                 :      968946 :   df_get_exit_block_use_set (refs);
    1036                 :             : 
    1037                 :      968946 :   unsigned i;
    1038                 :      968946 :   bitmap_iterator bi;
    1039                 :     4437099 :   EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
    1040                 :     3468153 :     make_reg_live (&livein[EXIT_BLOCK], i);
    1041                 :             : 
    1042                 :      968946 :   livenow = BITMAP_ALLOC (NULL);
    1043                 :      968946 :   all_blocks = BITMAP_ALLOC (NULL);
    1044                 :      968946 :   changed_pseudos = BITMAP_ALLOC (NULL);
    1045                 :             : 
    1046                 :    12538255 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1047                 :    11569309 :     if (i != ENTRY_BLOCK && i != EXIT_BLOCK)
    1048                 :     9631417 :       bitmap_set_bit (all_blocks, i);
    1049                 :             : 
    1050                 :      968946 :   modify = false;
    1051                 :      968946 : }
    1052                 :             : 
    1053                 :             : /* Finalization of the ext-dce pass.  Primarily this means
    1054                 :             :    releasing up the various bitmaps we utilize.  */
    1055                 :             : 
    1056                 :             : static void
    1057                 :      968946 : ext_dce_finish (void)
    1058                 :             : {
    1059                 :    12538255 :   for (unsigned i = 0; i < livein.length (); i++)
    1060                 :    11569309 :     bitmap_clear (&livein[i]);
    1061                 :      968946 :   livein.release ();
    1062                 :             : 
    1063                 :      968946 :   BITMAP_FREE (livenow);
    1064                 :      968946 :   BITMAP_FREE (changed_pseudos);
    1065                 :      968946 :   BITMAP_FREE (all_blocks);
    1066                 :      968946 : }
    1067                 :             : 
    1068                 :             : /* Process block number BB_INDEX as part of the backward
    1069                 :             :    simple dataflow analysis.  Return TRUE if something in
    1070                 :             :    this block changed or FALSE otherwise.  */
    1071                 :             : 
    1072                 :             : static bool
    1073                 :    26312731 : ext_dce_rd_transfer_n (int bb_index)
    1074                 :             : {
    1075                 :             :   /* The ENTRY/EXIT blocks never change.  */
    1076                 :    26312731 :   if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
    1077                 :             :     return false;
    1078                 :             : 
    1079                 :    22436947 :   basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
    1080                 :             : 
    1081                 :             :   /* Make everything live that's live in the successors.  */
    1082                 :    22436947 :   bitmap_clear (livenow);
    1083                 :    22436947 :   edge_iterator ei;
    1084                 :    22436947 :   edge e;
    1085                 :             : 
    1086                 :    56547825 :   FOR_EACH_EDGE (e, ei, bb->succs)
    1087                 :    34110878 :     bitmap_ior_into (livenow, &livein[e->dest->index]);
    1088                 :             : 
    1089                 :    22436947 :   ext_dce_process_bb (bb);
    1090                 :             : 
    1091                 :             :   /* We may have narrowed the set of live objects at the start
    1092                 :             :      of this block.  If so, update the bitmaps and indicate to
    1093                 :             :      the generic dataflow code that something changed.  */
    1094                 :    22436947 :   if (!bitmap_equal_p (&livein[bb_index], livenow))
    1095                 :             :     {
    1096                 :    12060563 :       bitmap_copy (&livein[bb_index], livenow);
    1097                 :    12060563 :       return true;
    1098                 :             :     }
    1099                 :             : 
    1100                 :             :   return false;
    1101                 :             : }
    1102                 :             : 
    1103                 :             : /* Dummy function for the df_simple_dataflow API.  */
    1104                 :    32648179 : static bool ext_dce_rd_confluence_n (edge) { return true; }
    1105                 :             : 
    1106                 :             : /* Use lifetime analyis to identify extensions that set bits that
    1107                 :             :    are never read.  Turn such extensions into SUBREGs instead which
    1108                 :             :    can often be propagated away.  */
    1109                 :             : 
    1110                 :             : void
    1111                 :      968946 : ext_dce_execute (void)
    1112                 :             : {
    1113                 :             :   /* Some settings of SUBREG_PROMOTED_VAR_P are actively harmful
    1114                 :             :      to this pass.  Clear it for those cases.  */
    1115                 :      968946 :   maybe_clear_subreg_promoted_p ();
    1116                 :      968946 :   df_analyze ();
    1117                 :      968946 :   ext_dce_init ();
    1118                 :             : 
    1119                 :     3875784 :   do
    1120                 :             :     {
    1121                 :     1937892 :       df_simple_dataflow (DF_BACKWARD, NULL, NULL,
    1122                 :             :                           ext_dce_rd_confluence_n, ext_dce_rd_transfer_n,
    1123                 :             :                           all_blocks, df_get_postorder (DF_BACKWARD),
    1124                 :             :                           df_get_n_blocks (DF_BACKWARD));
    1125                 :     1937892 :       modify = !modify;
    1126                 :             :     }
    1127                 :             :   while (modify);
    1128                 :             : 
    1129                 :      968946 :   reset_subreg_promoted_p ();
    1130                 :             : 
    1131                 :      968946 :   ext_dce_finish ();
    1132                 :      968946 : }
    1133                 :             : 
    1134                 :             : 
    1135                 :             : namespace {
    1136                 :             : 
    1137                 :             : const pass_data pass_data_ext_dce =
    1138                 :             : {
    1139                 :             :   RTL_PASS, /* type */
    1140                 :             :   "ext_dce", /* name */
    1141                 :             :   OPTGROUP_NONE, /* optinfo_flags */
    1142                 :             :   TV_EXT_DCE, /* tv_id */
    1143                 :             :   PROP_cfglayout, /* properties_required */
    1144                 :             :   0, /* properties_provided */
    1145                 :             :   0, /* properties_destroyed */
    1146                 :             :   0, /* todo_flags_start */
    1147                 :             :   TODO_df_finish, /* todo_flags_finish */
    1148                 :             : };
    1149                 :             : 
    1150                 :             : class pass_ext_dce : public rtl_opt_pass
    1151                 :             : {
    1152                 :             : public:
    1153                 :      281608 :   pass_ext_dce (gcc::context *ctxt)
    1154                 :      563216 :     : rtl_opt_pass (pass_data_ext_dce, ctxt)
    1155                 :             :   {}
    1156                 :             : 
    1157                 :             :   /* opt_pass methods: */
    1158                 :     1486709 :   virtual bool gate (function *) { return flag_ext_dce && optimize > 0; }
    1159                 :      968946 :   virtual unsigned int execute (function *)
    1160                 :             :     {
    1161                 :      968946 :       ext_dce_execute ();
    1162                 :      968946 :       return 0;
    1163                 :             :     }
    1164                 :             : 
    1165                 :             : }; // class pass_combine
    1166                 :             : 
    1167                 :             : } // anon namespace
    1168                 :             : 
    1169                 :             : rtl_opt_pass *
    1170                 :      281608 : make_pass_ext_dce (gcc::context *ctxt)
    1171                 :             : {
    1172                 :      281608 :   return new pass_ext_dce (ctxt);
    1173                 :             : }
        

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.