LCOV - code coverage report
Current view: top level - gcc - gimple-ssa-store-merging.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 95.7 % 2676 2561
Test Date: 2026-03-28 14:25:54 Functions: 100.0 % 71 71
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* GIMPLE store merging and byte swapping passes.
       2              :    Copyright (C) 2009-2026 Free Software Foundation, Inc.
       3              :    Contributed by ARM Ltd.
       4              : 
       5              :    This file is part of GCC.
       6              : 
       7              :    GCC is free software; you can redistribute it and/or modify it
       8              :    under the terms of the GNU General Public License as published by
       9              :    the Free Software Foundation; either version 3, or (at your option)
      10              :    any later version.
      11              : 
      12              :    GCC is distributed in the hope that it will be useful, but
      13              :    WITHOUT ANY WARRANTY; without even the implied warranty of
      14              :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15              :    General Public License for more details.
      16              : 
      17              :    You should have received a copy of the GNU General Public License
      18              :    along with GCC; see the file COPYING3.  If not see
      19              :    <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : /* The purpose of the store merging pass is to combine multiple memory stores
      22              :    of constant values, values loaded from memory, bitwise operations on those,
      23              :    or bit-field values, to consecutive locations, into fewer wider stores.
      24              : 
      25              :    For example, if we have a sequence peforming four byte stores to
      26              :    consecutive memory locations:
      27              :    [p     ] := imm1;
      28              :    [p + 1B] := imm2;
      29              :    [p + 2B] := imm3;
      30              :    [p + 3B] := imm4;
      31              :    we can transform this into a single 4-byte store if the target supports it:
      32              :    [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
      33              : 
      34              :    Or:
      35              :    [p     ] := [q     ];
      36              :    [p + 1B] := [q + 1B];
      37              :    [p + 2B] := [q + 2B];
      38              :    [p + 3B] := [q + 3B];
      39              :    if there is no overlap can be transformed into a single 4-byte
      40              :    load followed by single 4-byte store.
      41              : 
      42              :    Or:
      43              :    [p     ] := [q     ] ^ imm1;
      44              :    [p + 1B] := [q + 1B] ^ imm2;
      45              :    [p + 2B] := [q + 2B] ^ imm3;
      46              :    [p + 3B] := [q + 3B] ^ imm4;
      47              :    if there is no overlap can be transformed into a single 4-byte
      48              :    load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
      49              : 
      50              :    Or:
      51              :    [p:1 ] := imm;
      52              :    [p:31] := val & 0x7FFFFFFF;
      53              :    we can transform this into a single 4-byte store if the target supports it:
      54              :    [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
      55              : 
      56              :    The algorithm is applied to each basic block in three phases:
      57              : 
      58              :    1) Scan through the basic block and record assignments to destinations
      59              :    that can be expressed as a store to memory of a certain size at a certain
      60              :    bit offset from base expressions we can handle.  For bit-fields we also
      61              :    record the surrounding bit region, i.e. bits that could be stored in
      62              :    a read-modify-write operation when storing the bit-field.  Record store
      63              :    chains to different bases in a hash_map (m_stores) and make sure to
      64              :    terminate such chains when appropriate (for example when the stored
      65              :    values get used subsequently).
      66              :    These stores can be a result of structure element initializers, array stores
      67              :    etc.  A store_immediate_info object is recorded for every such store.
      68              :    Record as many such assignments to a single base as possible until a
      69              :    statement that interferes with the store sequence is encountered.
      70              :    Each store has up to 2 operands, which can be a either constant, a memory
      71              :    load or an SSA name, from which the value to be stored can be computed.
      72              :    At most one of the operands can be a constant.  The operands are recorded
      73              :    in store_operand_info struct.
      74              : 
      75              :    2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
      76              :    store_immediate_info objects) and coalesce contiguous stores into
      77              :    merged_store_group objects.  For bit-field stores, we don't need to
      78              :    require the stores to be contiguous, just their surrounding bit regions
      79              :    have to be contiguous.  If the expression being stored is different
      80              :    between adjacent stores, such as one store storing a constant and
      81              :    following storing a value loaded from memory, or if the loaded memory
      82              :    objects are not adjacent, a new merged_store_group is created as well.
      83              : 
      84              :    For example, given the stores:
      85              :    [p     ] := 0;
      86              :    [p + 1B] := 1;
      87              :    [p + 3B] := 0;
      88              :    [p + 4B] := 1;
      89              :    [p + 5B] := 0;
      90              :    [p + 6B] := 0;
      91              :    This phase would produce two merged_store_group objects, one recording the
      92              :    two bytes stored in the memory region [p : p + 1] and another
      93              :    recording the four bytes stored in the memory region [p + 3 : p + 6].
      94              : 
      95              :    3) The merged_store_group objects produced in phase 2) are processed
      96              :    to generate the sequence of wider stores that set the contiguous memory
      97              :    regions to the sequence of bytes that correspond to it.  This may emit
      98              :    multiple stores per store group to handle contiguous stores that are not
      99              :    of a size that is a power of 2.  For example it can try to emit a 40-bit
     100              :    store as a 32-bit store followed by an 8-bit store.
     101              :    We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
     102              :    or TARGET_SLOW_UNALIGNED_ACCESS settings.
     103              : 
     104              :    Note on endianness and example:
     105              :    Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
     106              :    [p     ] := 0x1234;
     107              :    [p + 2B] := 0x5678;
     108              :    [p + 4B] := 0xab;
     109              :    [p + 5B] := 0xcd;
     110              : 
     111              :    The memory layout for little-endian (LE) and big-endian (BE) must be:
     112              :   p |LE|BE|
     113              :   ---------
     114              :   0 |34|12|
     115              :   1 |12|34|
     116              :   2 |78|56|
     117              :   3 |56|78|
     118              :   4 |ab|ab|
     119              :   5 |cd|cd|
     120              : 
     121              :   To merge these into a single 48-bit merged value 'val' in phase 2)
     122              :   on little-endian we insert stores to higher (consecutive) bitpositions
     123              :   into the most significant bits of the merged value.
     124              :   The final merged value would be: 0xcdab56781234
     125              : 
     126              :   For big-endian we insert stores to higher bitpositions into the least
     127              :   significant bits of the merged value.
     128              :   The final merged value would be: 0x12345678abcd
     129              : 
     130              :   Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
     131              :   followed by a 16-bit store.  Again, we must consider endianness when
     132              :   breaking down the 48-bit value 'val' computed above.
     133              :   For little endian we emit:
     134              :   [p]      (32-bit) := 0x56781234; // val & 0x0000ffffffff;
     135              :   [p + 4B] (16-bit) := 0xcdab;    // (val & 0xffff00000000) >> 32;
     136              : 
     137              :   Whereas for big-endian we emit:
     138              :   [p]      (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
     139              :   [p + 4B] (16-bit) := 0xabcd;     //  val & 0x00000000ffff;  */
     140              : 
     141              : #include "config.h"
     142              : #include "system.h"
     143              : #include "coretypes.h"
     144              : #include "backend.h"
     145              : #include "tree.h"
     146              : #include "gimple.h"
     147              : #include "builtins.h"
     148              : #include "fold-const.h"
     149              : #include "tree-pass.h"
     150              : #include "ssa.h"
     151              : #include "gimple-pretty-print.h"
     152              : #include "alias.h"
     153              : #include "fold-const.h"
     154              : #include "print-tree.h"
     155              : #include "tree-hash-traits.h"
     156              : #include "gimple-iterator.h"
     157              : #include "gimplify.h"
     158              : #include "gimple-fold.h"
     159              : #include "stor-layout.h"
     160              : #include "timevar.h"
     161              : #include "cfganal.h"
     162              : #include "cfgcleanup.h"
     163              : #include "tree-cfg.h"
     164              : #include "except.h"
     165              : #include "tree-eh.h"
     166              : #include "target.h"
     167              : #include "gimplify-me.h"
     168              : #include "rtl.h"
     169              : #include "expr.h"     /* For get_bit_range.  */
     170              : #include "optabs-tree.h"
     171              : #include "dbgcnt.h"
     172              : #include "selftest.h"
     173              : 
     174              : /* The maximum size (in bits) of the stores this pass should generate.  */
     175              : #define MAX_STORE_BITSIZE (BITS_PER_WORD)
     176              : #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
     177              : 
     178              : /* Limit to bound the number of aliasing checks for loads with the same
     179              :    vuse as the corresponding store.  */
     180              : #define MAX_STORE_ALIAS_CHECKS 64
     181              : 
     182              : namespace {
     183              : 
     184              : struct bswap_stat
     185              : {
     186              :   /* Number of hand-written 16-bit nop / bswaps found.  */
     187              :   int found_16bit;
     188              : 
     189              :   /* Number of hand-written 32-bit nop / bswaps found.  */
     190              :   int found_32bit;
     191              : 
     192              :   /* Number of hand-written 64-bit nop / bswaps found.  */
     193              :   int found_64bit;
     194              : } nop_stats, bswap_stats;
     195              : 
     196              : /* A symbolic number structure is used to detect byte permutation and selection
     197              :    patterns of a source.  To achieve that, its field N contains an artificial
     198              :    number consisting of BITS_PER_MARKER sized markers tracking where does each
     199              :    byte come from in the source:
     200              : 
     201              :    0       - target byte has the value 0
     202              :    FF      - target byte has an unknown value (eg. due to sign extension)
     203              :    1..size - marker value is the byte index in the source (0 for lsb).
     204              : 
     205              :    To detect permutations on memory sources (arrays and structures), a symbolic
     206              :    number is also associated:
     207              :    - a base address BASE_ADDR and an OFFSET giving the address of the source;
     208              :    - a range which gives the difference between the highest and lowest accessed
     209              :      memory location to make such a symbolic number;
     210              :    - the address SRC of the source element of lowest address as a convenience
     211              :      to easily get BASE_ADDR + offset + lowest bytepos;
     212              :    - number of expressions N_OPS bitwise ored together to represent
     213              :      approximate cost of the computation.
     214              : 
     215              :    Note 1: the range is different from size as size reflects the size of the
     216              :    type of the current expression.  For instance, for an array char a[],
     217              :    (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
     218              :    (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
     219              :    time a range of 1.
     220              : 
     221              :    Note 2: for non-memory sources, range holds the same value as size.
     222              : 
     223              :    Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
     224              : 
     225              : struct symbolic_number {
     226              :   uint64_t n;
     227              :   tree type;
     228              :   tree base_addr;
     229              :   tree offset;
     230              :   poly_int64 bytepos;
     231              :   tree src;
     232              :   tree alias_set;
     233              :   tree vuse;
     234              :   unsigned HOST_WIDE_INT range;
     235              :   int n_ops;
     236              : };
     237              : 
     238              : #define BITS_PER_MARKER 8
     239              : #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
     240              : #define MARKER_BYTE_UNKNOWN MARKER_MASK
     241              : #define HEAD_MARKER(n, size) \
     242              :   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
     243              : 
     244              : /* The number which the find_bswap_or_nop_1 result should match in
     245              :    order to have a nop.  The number is masked according to the size of
     246              :    the symbolic number before using it.  */
     247              : #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
     248              :   (uint64_t)0x08070605 << 32 | 0x04030201)
     249              : 
     250              : /* The number which the find_bswap_or_nop_1 result should match in
     251              :    order to have a byte swap.  The number is masked according to the
     252              :    size of the symbolic number before using it.  */
     253              : #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
     254              :   (uint64_t)0x01020304 << 32 | 0x05060708)
     255              : 
     256              : /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
     257              :    number N.  Return false if the requested operation is not permitted
     258              :    on a symbolic number.  */
     259              : 
     260              : inline bool
     261       191549 : do_shift_rotate (enum tree_code code,
     262              :                  struct symbolic_number *n,
     263              :                  int count)
     264              : {
     265       191549 :   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
     266       191549 :   uint64_t head_marker;
     267              : 
     268       191549 :   if (count < 0
     269       191549 :       || count >= TYPE_PRECISION (n->type)
     270       383098 :       || count % BITS_PER_UNIT != 0)
     271              :     return false;
     272       149938 :   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
     273              : 
     274              :   /* Zero out the extra bits of N in order to avoid them being shifted
     275              :      into the significant bits.  */
     276       149938 :   if (size < 64 / BITS_PER_MARKER)
     277        43222 :     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
     278              : 
     279       149938 :   switch (code)
     280              :     {
     281       121017 :     case LSHIFT_EXPR:
     282       121017 :       n->n <<= count;
     283       121017 :       break;
     284        26065 :     case RSHIFT_EXPR:
     285        26065 :       head_marker = HEAD_MARKER (n->n, size);
     286        26065 :       n->n >>= count;
     287              :       /* Arithmetic shift of signed type: result is dependent on the value.  */
     288        26065 :       if (!TYPE_UNSIGNED (n->type) && head_marker)
     289         2925 :         for (i = 0; i < count / BITS_PER_MARKER; i++)
     290         1862 :           n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
     291         1862 :                   << ((size - 1 - i) * BITS_PER_MARKER);
     292              :       break;
     293           25 :     case LROTATE_EXPR:
     294           25 :       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
     295           25 :       break;
     296         2831 :     case RROTATE_EXPR:
     297         2831 :       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
     298         2831 :       break;
     299              :     default:
     300              :       return false;
     301              :     }
     302              :   /* Zero unused bits for size.  */
     303       149938 :   if (size < 64 / BITS_PER_MARKER)
     304        43222 :     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
     305              :   return true;
     306              : }
     307              : 
     308              : /* Perform sanity checking for the symbolic number N and the gimple
     309              :    statement STMT.  */
     310              : 
     311              : inline bool
     312       342122 : verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
     313              : {
     314       342122 :   tree lhs_type;
     315              : 
     316       342122 :   lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
     317              : 
     318       342122 :   if (TREE_CODE (lhs_type) != INTEGER_TYPE
     319       342122 :       && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
     320              :     return false;
     321              : 
     322       329659 :   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
     323            0 :     return false;
     324              : 
     325              :   return true;
     326              : }
     327              : 
     328              : /* Initialize the symbolic number N for the bswap pass from the base element
     329              :    SRC manipulated by the bitwise OR expression.  */
     330              : 
     331              : bool
     332      1134538 : init_symbolic_number (struct symbolic_number *n, tree src)
     333              : {
     334      1134538 :   int size;
     335              : 
     336      1134538 :   if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
     337              :     return false;
     338              : 
     339       983523 :   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
     340       983523 :   n->src = src;
     341              : 
     342              :   /* Set up the symbolic number N by setting each byte to a value between 1 and
     343              :      the byte size of rhs1.  The highest order byte is set to n->size and the
     344              :      lowest order byte to 1.  */
     345       983523 :   n->type = TREE_TYPE (src);
     346       983523 :   size = TYPE_PRECISION (n->type);
     347       983523 :   if (size % BITS_PER_UNIT != 0)
     348              :     return false;
     349       968780 :   size /= BITS_PER_UNIT;
     350       968780 :   if (size > 64 / BITS_PER_MARKER)
     351              :     return false;
     352       967936 :   n->range = size;
     353       967936 :   n->n = CMPNOP;
     354       967936 :   n->n_ops = 1;
     355              : 
     356       967936 :   if (size < 64 / BITS_PER_MARKER)
     357       448945 :     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
     358              : 
     359              :   return true;
     360              : }
     361              : 
     362              : /* Check if STMT might be a byte swap or a nop from a memory source and returns
     363              :    the answer. If so, REF is that memory source and the base of the memory area
     364              :    accessed and the offset of the access from that base are recorded in N.  */
     365              : 
     366              : static bool
     367      5237059 : find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
     368              : {
     369              :   /* Leaf node is an array or component ref. Memorize its base and
     370              :      offset from base to compare to other such leaf node.  */
     371      5237059 :   poly_int64 bitsize, bitpos, bytepos;
     372      5237059 :   machine_mode mode;
     373      5237059 :   int unsignedp, reversep, volatilep;
     374      5237059 :   tree offset, base_addr;
     375              : 
     376              :   /* Not prepared to handle PDP endian.  */
     377      5237059 :   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
     378              :     return false;
     379              : 
     380      6142914 :   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
     381              :     return false;
     382              : 
     383       900640 :   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
     384              :                                    &unsignedp, &reversep, &volatilep);
     385              : 
     386       900640 :   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
     387              :     /* Do not rewrite TARGET_MEM_REF.  */
     388              :     return false;
     389       853468 :   else if (TREE_CODE (base_addr) == MEM_REF)
     390              :     {
     391       469844 :       poly_offset_int bit_offset = 0;
     392       469844 :       tree off = TREE_OPERAND (base_addr, 1);
     393              : 
     394       469844 :       if (!integer_zerop (off))
     395              :         {
     396       104410 :           poly_offset_int boff = mem_ref_offset (base_addr);
     397       104410 :           boff <<= LOG2_BITS_PER_UNIT;
     398       104410 :           bit_offset += boff;
     399              :         }
     400              : 
     401       469844 :       base_addr = TREE_OPERAND (base_addr, 0);
     402              : 
     403              :       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
     404       469844 :       if (maybe_lt (bit_offset, 0))
     405              :         {
     406         4374 :           tree byte_offset = wide_int_to_tree
     407         4374 :             (sizetype, bits_to_bytes_round_down (bit_offset));
     408         4374 :           bit_offset = num_trailing_bits (bit_offset);
     409         4374 :           if (offset)
     410            0 :             offset = size_binop (PLUS_EXPR, offset, byte_offset);
     411              :           else
     412         4374 :             offset = byte_offset;
     413              :         }
     414              : 
     415       469844 :       bitpos += bit_offset.force_shwi ();
     416              :     }
     417              :   else
     418       383624 :     base_addr = build_fold_addr_expr (base_addr);
     419              : 
     420      5374817 :   if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
     421              :     return false;
     422       852508 :   if (!multiple_p (bitsize, BITS_PER_UNIT))
     423              :     return false;
     424       851226 :   if (reversep)
     425              :     return false;
     426              : 
     427       851217 :   if (!init_symbolic_number (n, ref))
     428              :     return false;
     429       714750 :   n->base_addr = base_addr;
     430       714750 :   n->offset = offset;
     431       714750 :   n->bytepos = bytepos;
     432       714750 :   n->alias_set = reference_alias_ptr_type (ref);
     433       714750 :   n->vuse = gimple_vuse (stmt);
     434       714750 :   return true;
     435              : }
     436              : 
     437              : /* Compute the symbolic number N representing the result of a bitwise OR,
     438              :    bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
     439              :    are respectively SOURCE_STMT1 and SOURCE_STMT2.  CODE is the operation.  */
     440              : 
     441              : gimple *
     442       158612 : perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
     443              :                         gimple *source_stmt2, struct symbolic_number *n2,
     444              :                         struct symbolic_number *n, enum tree_code code)
     445              : {
     446       158612 :   int i, size;
     447       158612 :   uint64_t mask;
     448       158612 :   gimple *source_stmt;
     449       158612 :   struct symbolic_number *n_start;
     450              : 
     451       158612 :   tree rhs1 = gimple_assign_rhs1 (source_stmt1);
     452       158612 :   if (TREE_CODE (rhs1) == BIT_FIELD_REF
     453       158612 :       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
     454          385 :     rhs1 = TREE_OPERAND (rhs1, 0);
     455       158612 :   tree rhs2 = gimple_assign_rhs1 (source_stmt2);
     456       158612 :   if (TREE_CODE (rhs2) == BIT_FIELD_REF
     457       158612 :       && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
     458          380 :     rhs2 = TREE_OPERAND (rhs2, 0);
     459              : 
     460              :   /* Sources are different, cancel bswap if they are not memory location with
     461              :      the same base (array, structure, ...).  */
     462       158612 :   if (rhs1 != rhs2)
     463              :     {
     464       150964 :       uint64_t inc;
     465       150964 :       HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
     466       150964 :       struct symbolic_number *toinc_n_ptr, *n_end;
     467       150964 :       basic_block bb1, bb2;
     468              : 
     469       123896 :       if (!n1->base_addr || !n2->base_addr
     470       274850 :           || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
     471        86010 :         return NULL;
     472              : 
     473        64954 :       if (!n1->offset != !n2->offset
     474        64954 :           || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
     475         3370 :         return NULL;
     476              : 
     477        61584 :       start1 = 0;
     478        61584 :       if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
     479              :         return NULL;
     480              : 
     481        61584 :       if (start1 < start2)
     482              :         {
     483              :           n_start = n1;
     484              :           start_sub = start2 - start1;
     485              :         }
     486              :       else
     487              :         {
     488        14540 :           n_start = n2;
     489        14540 :           start_sub = start1 - start2;
     490              :         }
     491              : 
     492        61584 :       bb1 = gimple_bb (source_stmt1);
     493        61584 :       bb2 = gimple_bb (source_stmt2);
     494        61584 :       if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
     495              :         source_stmt = source_stmt1;
     496              :       else
     497         4613 :         source_stmt = source_stmt2;
     498              : 
     499              :       /* Find the highest address at which a load is performed and
     500              :          compute related info.  */
     501        61584 :       end1 = start1 + (n1->range - 1);
     502        61584 :       end2 = start2 + (n2->range - 1);
     503        61584 :       if (end1 < end2)
     504              :         {
     505        61584 :           end = end2;
     506              :           end_sub = end2 - end1;
     507              :         }
     508              :       else
     509              :         {
     510              :           end = end1;
     511              :           end_sub = end1 - end2;
     512              :         }
     513        61584 :       n_end = (end2 > end1) ? n2 : n1;
     514              : 
     515              :       /* Find symbolic number whose lsb is the most significant.  */
     516        61584 :       if (BYTES_BIG_ENDIAN)
     517              :         toinc_n_ptr = (n_end == n1) ? n2 : n1;
     518              :       else
     519        61584 :         toinc_n_ptr = (n_start == n1) ? n2 : n1;
     520              : 
     521        61584 :       n->range = end - MIN (start1, start2) + 1;
     522              : 
     523              :       /* Check that the range of memory covered can be represented by
     524              :          a symbolic number.  */
     525        61584 :       if (n->range > 64 / BITS_PER_MARKER)
     526              :         return NULL;
     527              : 
     528              :       /* Reinterpret byte marks in symbolic number holding the value of
     529              :          bigger weight according to target endianness.  */
     530        47362 :       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
     531        47362 :       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
     532       371967 :       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
     533              :         {
     534       324605 :           unsigned marker
     535       324605 :             = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
     536       324605 :           if (marker && marker != MARKER_BYTE_UNKNOWN)
     537       123240 :             toinc_n_ptr->n += inc;
     538              :         }
     539              :     }
     540              :   else
     541              :     {
     542         7648 :       n->range = n1->range;
     543         7648 :       n_start = n1;
     544         7648 :       source_stmt = source_stmt1;
     545              :     }
     546              : 
     547        55010 :   if (!n1->alias_set
     548        55010 :       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
     549        47330 :     n->alias_set = n1->alias_set;
     550              :   else
     551         7680 :     n->alias_set = ptr_type_node;
     552        55010 :   n->vuse = n_start->vuse;
     553        55010 :   n->base_addr = n_start->base_addr;
     554        55010 :   n->offset = n_start->offset;
     555        55010 :   n->src = n_start->src;
     556        55010 :   n->bytepos = n_start->bytepos;
     557        55010 :   n->type = n_start->type;
     558        55010 :   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
     559        55010 :   uint64_t res_n = n1->n | n2->n;
     560              : 
     561       425736 :   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
     562              :     {
     563       371973 :       uint64_t masked1, masked2;
     564              : 
     565       371973 :       masked1 = n1->n & mask;
     566       371973 :       masked2 = n2->n & mask;
     567              :       /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x.  */
     568       371973 :       if (masked1 && masked2)
     569              :         {
     570              :           /* + can carry into upper bits, just punt.  */
     571         6413 :           if (code == PLUS_EXPR)
     572              :             return NULL;
     573              :           /* x | x is still x.  */
     574         5166 :           if (code == BIT_IOR_EXPR && masked1 == masked2)
     575          220 :             continue;
     576         4946 :           if (code == BIT_XOR_EXPR)
     577              :             {
     578              :               /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
     579              :                  unknown values and unknown ^ unknown is unknown.  */
     580          926 :               if (masked1 == masked2
     581          196 :                   && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
     582          134 :                                  << i * BITS_PER_MARKER))
     583              :                 {
     584           62 :                   res_n &= ~mask;
     585           62 :                   continue;
     586              :                 }
     587              :             }
     588              :           /* Otherwise set the byte to unknown, it might still be
     589              :              later masked off.  */
     590         4884 :           res_n |= mask;
     591              :         }
     592              :     }
     593        53763 :   n->n = res_n;
     594        53763 :   n->n_ops = n1->n_ops + n2->n_ops;
     595              : 
     596        53763 :   return source_stmt;
     597              : }
     598              : 
     599              : /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
     600              :    the operation given by the rhs of STMT on the result.  If the operation
     601              :    could successfully be executed the function returns a gimple stmt whose
     602              :    rhs's first tree is the expression of the source operand and NULL
     603              :    otherwise.  */
     604              : 
     605              : gimple *
     606      6227638 : find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
     607              : {
     608      6227638 :   enum tree_code code;
     609      6227638 :   tree rhs1, rhs2 = NULL;
     610      6227638 :   gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
     611      6227638 :   enum gimple_rhs_class rhs_class;
     612              : 
     613      6227638 :   if (!limit
     614      6176013 :       || !is_gimple_assign (stmt)
     615     11471721 :       || stmt_can_throw_internal (cfun, stmt))
     616       990579 :     return NULL;
     617              : 
     618      5237059 :   rhs1 = gimple_assign_rhs1 (stmt);
     619              : 
     620      5237059 :   if (find_bswap_or_nop_load (stmt, rhs1, n))
     621              :     return stmt;
     622              : 
     623              :   /* Handle BIT_FIELD_REF.  */
     624      4522309 :   if (TREE_CODE (rhs1) == BIT_FIELD_REF
     625      4522309 :       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
     626              :     {
     627        12540 :       if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
     628        12540 :           || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
     629              :         return NULL;
     630              : 
     631        12540 :       unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
     632        12540 :       unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
     633        12540 :       if (bitpos % BITS_PER_UNIT == 0
     634        12540 :           && bitsize % BITS_PER_UNIT == 0
     635        25080 :           && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
     636              :         {
     637              :           /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
     638          546 :           if (BYTES_BIG_ENDIAN)
     639              :             bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
     640              : 
     641              :           /* Shift.  */
     642          546 :           if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
     643              :             return NULL;
     644              : 
     645              :           /* Mask.  */
     646              :           uint64_t mask = 0;
     647         1357 :           for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; i++)
     648          811 :             mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
     649          546 :           n->n &= mask;
     650              : 
     651              :           /* Convert.  */
     652          546 :           n->type = TREE_TYPE (rhs1);
     653          546 :           if (!verify_symbolic_number_p (n, stmt))
     654              :             return NULL;
     655              : 
     656          534 :           if (!n->base_addr)
     657          534 :             n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
     658              : 
     659          534 :           return stmt;
     660              :         }
     661              : 
     662        11994 :       return NULL;
     663              :     }
     664              : 
     665      4509769 :   if (TREE_CODE (rhs1) != SSA_NAME)
     666              :     return NULL;
     667              : 
     668      4208216 :   code = gimple_assign_rhs_code (stmt);
     669      4208216 :   rhs_class = gimple_assign_rhs_class (stmt);
     670      4208216 :   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
     671              : 
     672      4208216 :   if (rhs_class == GIMPLE_BINARY_RHS)
     673      3896107 :     rhs2 = gimple_assign_rhs2 (stmt);
     674              : 
     675              :   /* Handle unary rhs and binary rhs with integer constants as second
     676              :      operand.  */
     677              : 
     678      4208216 :   if (rhs_class == GIMPLE_UNARY_RHS
     679      3899536 :       || (rhs_class == GIMPLE_BINARY_RHS
     680      3896107 :           && TREE_CODE (rhs2) == INTEGER_CST))
     681              :     {
     682      1968663 :       if (code != BIT_AND_EXPR
     683      1968663 :           && code != LSHIFT_EXPR
     684              :           && code != RSHIFT_EXPR
     685      1893376 :           && code != LROTATE_EXPR
     686      1837506 :           && code != RROTATE_EXPR
     687      1831851 :           && !CONVERT_EXPR_CODE_P (code))
     688              :         return NULL;
     689              : 
     690       406025 :       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
     691              : 
     692              :       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
     693              :          we have to initialize the symbolic number.  */
     694       406025 :       if (!source_stmt1)
     695              :         {
     696       270781 :           if (gimple_assign_load_p (stmt)
     697       270781 :               || !init_symbolic_number (n, rhs1))
     698        18141 :             return NULL;
     699              :           source_stmt1 = stmt;
     700              :         }
     701              : 
     702       387884 :       switch (code)
     703              :         {
     704        40100 :         case BIT_AND_EXPR:
     705        40100 :           {
     706        40100 :             int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
     707        40100 :             uint64_t val = int_cst_value (rhs2), mask = 0;
     708        40100 :             uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
     709              : 
     710              :             /* Only constants masking full bytes are allowed.  */
     711       226940 :             for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
     712       206273 :               if ((val & tmp) != 0 && (val & tmp) != tmp)
     713              :                 return NULL;
     714       186840 :               else if (val & tmp)
     715        96122 :                 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
     716              : 
     717        20667 :             n->n &= mask;
     718              :           }
     719        20667 :           break;
     720        96220 :         case LSHIFT_EXPR:
     721        96220 :         case RSHIFT_EXPR:
     722        96220 :         case LROTATE_EXPR:
     723        96220 :         case RROTATE_EXPR:
     724        96220 :           if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
     725              :             return NULL;
     726              :           break;
     727       251564 :         CASE_CONVERT:
     728       251564 :           {
     729       251564 :             int i, type_size, old_type_size;
     730       251564 :             tree type;
     731              : 
     732       251564 :             type = TREE_TYPE (gimple_assign_lhs (stmt));
     733       251564 :             type_size = TYPE_PRECISION (type);
     734       251564 :             if (type_size % BITS_PER_UNIT != 0)
     735              :               return NULL;
     736       248831 :             type_size /= BITS_PER_UNIT;
     737       248831 :             if (type_size > 64 / BITS_PER_MARKER)
     738              :               return NULL;
     739              : 
     740              :             /* Sign extension: result is dependent on the value.  */
     741       248278 :             old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
     742       352198 :             if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
     743       286117 :                 && HEAD_MARKER (n->n, old_type_size))
     744       181479 :               for (i = 0; i < type_size - old_type_size; i++)
     745       143783 :                 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
     746       143783 :                         << ((type_size - 1 - i) * BITS_PER_MARKER);
     747              : 
     748       248278 :             if (type_size < 64 / BITS_PER_MARKER)
     749              :               {
     750              :                 /* If STMT casts to a smaller type mask out the bits not
     751              :                    belonging to the target type.  */
     752       121699 :                 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
     753              :               }
     754       248278 :             n->type = type;
     755       248278 :             if (!n->base_addr)
     756       167569 :               n->range = type_size;
     757              :           }
     758              :           break;
     759              :         default:
     760              :           return NULL;
     761       323554 :         };
     762       323554 :       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
     763              :     }
     764              : 
     765              :   /* Handle binary rhs.  */
     766              : 
     767      2236124 :   if (rhs_class == GIMPLE_BINARY_RHS)
     768              :     {
     769      2236124 :       struct symbolic_number n1, n2;
     770      2236124 :       gimple *source_stmt, *source_stmt2;
     771              : 
     772      2236124 :       if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
     773              :         return NULL;
     774              : 
     775      2177472 :       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
     776              : 
     777      2177472 :       switch (code)
     778              :         {
     779      1941491 :         case BIT_IOR_EXPR:
     780      1941491 :         case BIT_XOR_EXPR:
     781      1941491 :         case PLUS_EXPR:
     782      1941491 :           source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
     783              : 
     784      1941491 :           if (!source_stmt1)
     785              :             return NULL;
     786              : 
     787       266166 :           source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
     788              : 
     789       266166 :           if (!source_stmt2)
     790              :             return NULL;
     791              : 
     792       136322 :           if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
     793              :             return NULL;
     794              : 
     795       136322 :           if (n1.vuse != n2.vuse)
     796              :             return NULL;
     797              : 
     798       108081 :           source_stmt
     799       108081 :             = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
     800              :                                       code);
     801              : 
     802       108081 :           if (!source_stmt)
     803              :             return NULL;
     804              : 
     805        18022 :           if (!verify_symbolic_number_p (n, stmt))
     806              :             return NULL;
     807              : 
     808              :           break;
     809              :         default:
     810              :           return NULL;
     811              :         }
     812              :       return source_stmt;
     813              :     }
     814              :   return NULL;
     815              : }
     816              : 
     817              : /* Like find_bswap_or_nop_1, but also handles CONSTRUCTOR and
     818              :    VEC_PACK_TRUNC_EXPR.  */
     819              : 
     820              : gimple *
     821      2106243 : find_bswap_or_nop_2 (gimple *stmt, struct symbolic_number *n, int limit)
     822              : {
     823      2106243 :   if (gimple *ret = find_bswap_or_nop_1 (stmt, n, limit))
     824              :     return ret;
     825              : 
     826      2098115 :   if (!limit
     827      2098115 :       || !is_gimple_assign (stmt)
     828      2098115 :       || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
     829      4196222 :       || stmt_can_throw_internal (cfun, stmt))
     830           42 :     return NULL;
     831              : 
     832      2098073 :   tree rhs1 = gimple_assign_rhs1 (stmt);
     833      2098073 :   if (gimple_assign_rhs_code (stmt) == VEC_PACK_TRUNC_EXPR
     834      2098073 :       && TREE_CODE (rhs1) == SSA_NAME)
     835              :     {
     836         1187 :       if (BYTES_BIG_ENDIAN)
     837              :         return NULL; /* For now... */
     838         1187 :       gimple *rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
     839         1187 :       tree rhs2 = gimple_assign_rhs2 (stmt);
     840         1187 :       if (TREE_CODE (rhs2) != SSA_NAME)
     841              :         return NULL;
     842         1187 :       gimple *rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
     843         1187 :       struct symbolic_number n1, n2;
     844              : 
     845         1187 :       gimple *source_stmt1 = find_bswap_or_nop_2 (rhs1_stmt, &n1, limit - 1);
     846         1187 :       if (!source_stmt1)
     847              :         return NULL;
     848           59 :       gimple *source_stmt2 = find_bswap_or_nop_2 (rhs2_stmt, &n2, limit - 1);
     849           59 :       if (!source_stmt2)
     850              :         return NULL;
     851              : 
     852           59 :       if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
     853              :         return NULL;
     854           59 :       if (n1.vuse != n2.vuse)
     855              :         return NULL;
     856           59 :       auto nn2 = n2.n;
     857           59 :       n2.n = 0;
     858              :       /* We need to take into account number of elements of each vector,
     859              :          which perform_symbolic_merge doesn't know.  So, handle it as
     860              :          two separate BIT_IOR_EXPR merges, each time with one operand
     861              :          with changed mastk to all 0s, and then merge here.  */
     862           59 :       gimple *source_stmt
     863           59 :         = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
     864              :                                   BIT_IOR_EXPR);
     865           59 :       if (!source_stmt)
     866              :         return NULL;
     867           59 :       n2.n = nn2;
     868           59 :       auto nn1 = n->n;
     869           59 :       n1.n = 0;
     870           59 :       gimple *source_stmt3
     871           59 :         = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
     872              :                                   BIT_IOR_EXPR);
     873           59 :       gcc_assert (source_stmt == source_stmt3);
     874           59 :       nn2 = n->n;
     875           59 :       tree lhs = gimple_assign_lhs (stmt);
     876           59 :       int eltsize
     877           59 :         = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (lhs))) / BITS_PER_UNIT;
     878           59 :       int nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (lhs)).to_constant ();
     879           59 :       int size = eltsize * nelts;
     880           59 :       int hsize = size / 2;
     881           59 :       n->n = 0;
     882           59 :       if (!BYTES_BIG_ENDIAN)
     883          407 :         for (int i = 0; i < size; i++)
     884          348 :           n->n |= ((((i < hsize ? nn1 : nn2)
     885          348 :                      >> (((i % hsize) / eltsize * 2 * eltsize
     886          348 :                           + (i % eltsize))) * BITS_PER_MARKER) & MARKER_MASK)
     887          348 :                    << (i * BITS_PER_MARKER));
     888              :       else
     889              :         gcc_unreachable ();
     890              :       return source_stmt;
     891              :     }
     892              : 
     893      2096886 :   if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
     894              :     return NULL;
     895              : 
     896        26335 :   tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
     897        26335 :   unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
     898        26335 :   if (sz != 16 && sz != 32 && sz != 64)
     899              :     return NULL;
     900      2115184 :   if (CONSTRUCTOR_NELTS (rhs1) == 0)
     901              :     return NULL;
     902        21978 :   tree eltype = TREE_TYPE (TREE_TYPE (rhs1));
     903        21978 :   unsigned HOST_WIDE_INT eltsz = int_size_in_bytes (eltype) * BITS_PER_UNIT;
     904        21978 :   if (TYPE_PRECISION (eltype) != eltsz)
     905              :     return NULL;
     906        21838 :   constructor_elt *elt;
     907        21838 :   unsigned int i;
     908        21838 :   tree type = build_nonstandard_integer_type (sz, 1);
     909        21838 :   gimple *ins_stmt = NULL;
     910        38434 :   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs1), i, elt)
     911              :     {
     912        34712 :       if (TREE_CODE (elt->value) != SSA_NAME
     913        34712 :           || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
     914        18116 :         return NULL;
     915        34028 :       struct symbolic_number n1;
     916        34028 :       gimple *source_stmt
     917        34028 :         = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1, limit - 1);
     918              : 
     919        34028 :       if (!source_stmt)
     920              :         return NULL;
     921              : 
     922        23713 :       n1.type = type;
     923        23713 :       if (!n1.base_addr)
     924         3787 :         n1.range = sz / BITS_PER_UNIT;
     925              : 
     926        23713 :       if (i == 0)
     927              :         {
     928        11390 :           ins_stmt = source_stmt;
     929        11390 :           *n = n1;
     930              :         }
     931              :       else
     932              :         {
     933        12323 :           if (n->vuse != n1.vuse)
     934         7117 :             return NULL;
     935              : 
     936         7534 :           struct symbolic_number n0 = *n;
     937              : 
     938         7534 :           if (!BYTES_BIG_ENDIAN)
     939              :             {
     940         7534 :               if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
     941              :                 return NULL;
     942              :             }
     943              :           else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
     944              :             return NULL;
     945         7534 :           ins_stmt
     946         7534 :             = perform_symbolic_merge (ins_stmt, &n0, source_stmt,
     947              :                                       &n1, n, BIT_IOR_EXPR);
     948              : 
     949         7534 :           if (!ins_stmt)
     950              :             return NULL;
     951              :         }
     952              :     }
     953              :   return ins_stmt;
     954              : }
     955              : 
     956              : /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
     957              :    *CMPXCHG, *CMPNOP and adjust *N.  */
     958              : 
     959              : void
     960        35625 : find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
     961              :                             uint64_t *cmpnop, bool *cast64_to_32)
     962              : {
     963        35625 :   unsigned rsize;
     964        35625 :   uint64_t tmpn, mask;
     965              : 
     966              :   /* The number which the find_bswap_or_nop_1 result should match in order
     967              :      to have a full byte swap.  The number is shifted to the right
     968              :      according to the size of the symbolic number before using it.  */
     969        35625 :   *cmpxchg = CMPXCHG;
     970        35625 :   *cmpnop = CMPNOP;
     971        35625 :   *cast64_to_32 = false;
     972              : 
     973              :   /* Find real size of result (highest non-zero byte).  */
     974        35625 :   if (n->base_addr)
     975       253117 :     for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
     976              :   else
     977         1824 :     rsize = n->range;
     978              : 
     979              :   /* Zero out the bits corresponding to untouched bytes in original gimple
     980              :      expression.  */
     981        35625 :   if (n->range < (int) sizeof (int64_t))
     982              :     {
     983        11484 :       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
     984        11484 :       if (n->base_addr == NULL
     985          999 :           && n->range == 4
     986        12198 :           && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
     987              :         {
     988              :           /* If all bytes in n->n are either 0 or in [5..8] range, this
     989              :              might be a candidate for (unsigned) __builtin_bswap64 (src).
     990              :              It is not worth it for (unsigned short) __builtin_bswap64 (src)
     991              :              or (unsigned short) __builtin_bswap32 (src).  */
     992          129 :           *cast64_to_32 = true;
     993          269 :           for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
     994          232 :             if ((tmpn & MARKER_MASK)
     995          232 :                 && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
     996              :               {
     997           92 :                 *cast64_to_32 = false;
     998           92 :                 break;
     999              :               }
    1000              :         }
    1001        11484 :       if (*cast64_to_32)
    1002           37 :         *cmpxchg &= mask;
    1003              :       else
    1004        11447 :         *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
    1005        11484 :       *cmpnop &= mask;
    1006              :     }
    1007              : 
    1008              :   /* Zero out the bits corresponding to unused bytes in the result of the
    1009              :      gimple expression.  */
    1010        35625 :   if (rsize < n->range)
    1011              :     {
    1012         2556 :       if (BYTES_BIG_ENDIAN)
    1013              :         {
    1014              :           mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
    1015              :           *cmpxchg &= mask;
    1016              :           if (n->range - rsize == sizeof (int64_t))
    1017              :             *cmpnop = 0;
    1018              :           else
    1019              :             *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
    1020              :         }
    1021              :       else
    1022              :         {
    1023         2556 :           mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
    1024         2556 :           if (n->range - rsize == sizeof (int64_t))
    1025            6 :             *cmpxchg = 0;
    1026              :           else
    1027         2550 :             *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
    1028         2556 :           *cmpnop &= mask;
    1029              :         }
    1030         2556 :       n->range = rsize;
    1031              :     }
    1032              : 
    1033        35625 :   if (*cast64_to_32)
    1034           37 :     n->range = 8;
    1035        35625 :   n->range *= BITS_PER_UNIT;
    1036        35625 : }
    1037              : 
    1038              : /* Helper function for find_bswap_or_nop,
    1039              :    Return true if N is a swap or nop with MASK.  */
    1040              : static bool
    1041        13126 : is_bswap_or_nop_p (uint64_t n, uint64_t cmpxchg,
    1042              :                    uint64_t cmpnop, uint64_t* mask,
    1043              :                    bool* bswap)
    1044              : {
    1045        13126 :   *mask = ~(uint64_t) 0;
    1046        13126 :   if (n == cmpnop)
    1047         5219 :     *bswap = false;
    1048         7907 :   else if (n == cmpxchg)
    1049         2282 :     *bswap = true;
    1050              :   else
    1051              :     {
    1052              :       int set = 0;
    1053        12867 :       for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
    1054        12293 :         if ((n & msk) == 0)
    1055         4732 :           *mask &= ~msk;
    1056         7561 :         else if ((n & msk) == (cmpxchg & msk))
    1057         2510 :           set++;
    1058              :         else
    1059              :           return false;
    1060              : 
    1061          574 :       if (set < 2)
    1062              :         return false;
    1063          573 :       *bswap = true;
    1064              :     }
    1065              :   return true;
    1066              : }
    1067              : 
    1068              : 
    1069              : /* Check if STMT completes a bswap implementation or a read in a given
    1070              :    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
    1071              :    accordingly.  It also sets N to represent the kind of operations
    1072              :    performed: size of the resulting expression and whether it works on
    1073              :    a memory source, and if so alias-set and vuse.  At last, the
    1074              :    function returns a stmt whose rhs's first tree is the source
    1075              :    expression.  */
    1076              : 
    1077              : gimple *
    1078      2104997 : find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
    1079              :                    bool *cast64_to_32, uint64_t *mask, uint64_t* l_rotate)
    1080              : {
    1081      2104997 :   tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
    1082      2104997 :   if (!tree_fits_uhwi_p (type_size))
    1083              :     return NULL;
    1084              : 
    1085              :   /* The last parameter determines the depth search limit.  It usually
    1086              :      correlates directly to the number n of bytes to be touched.  We
    1087              :      increase that number by 2 * (log2(n) + 1) here in order to also
    1088              :      cover signed -> unsigned conversions of the src operand as can be seen
    1089              :      in libgcc, and for initial shift/and operation of the src operand.  */
    1090      2104997 :   int limit = tree_to_uhwi (type_size);
    1091      2104997 :   limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
    1092      2104997 :   gimple *ins_stmt = find_bswap_or_nop_2 (stmt, n, limit);
    1093      2104997 :   if (!ins_stmt)
    1094              :     return NULL;
    1095              : 
    1096        11791 :   uint64_t cmpxchg, cmpnop;
    1097        11791 :   uint64_t orig_range = n->range * BITS_PER_UNIT;
    1098        11791 :   find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
    1099              : 
    1100              :   /* A complete byte swap should make the symbolic number to start with
    1101              :      the largest digit in the highest order byte. Unchanged symbolic
    1102              :      number indicates a read with same endianness as target architecture.  */
    1103        11791 :   *l_rotate = 0;
    1104        11791 :   uint64_t tmp_n = n->n;
    1105        11791 :   if (!is_bswap_or_nop_p (tmp_n, cmpxchg, cmpnop, mask, bswap))
    1106              :     {
    1107              :       /* Try bswap + lrotate.  */
    1108              :       /* TODO, handle cast64_to_32 and big/litte_endian memory
    1109              :          source when rsize < range.  */
    1110         3839 :       if (n->range == orig_range
    1111              :           /* There're case like 0x300000200 for uint32->uint64 cast,
    1112              :              Don't handle this.  */
    1113         2936 :           && n->range == TYPE_PRECISION (n->type)
    1114         1681 :           && ((orig_range == 32
    1115          576 :                && optab_handler (rotl_optab, SImode) != CODE_FOR_nothing)
    1116         1105 :               || (orig_range == 64
    1117         1086 :                   && optab_handler (rotl_optab, DImode) != CODE_FOR_nothing))
    1118         5501 :           && (tmp_n & MARKER_MASK) < orig_range / BITS_PER_UNIT)
    1119              :         {
    1120         1339 :           uint64_t range = (orig_range / BITS_PER_UNIT) * BITS_PER_MARKER;
    1121         1339 :           uint64_t count = (tmp_n & MARKER_MASK) * BITS_PER_MARKER;
    1122              :           /* .i.e. handle 0x203040506070800 when lower byte is zero.  */
    1123         1339 :           if (!count)
    1124              :             {
    1125           60 :               for (uint64_t i = 1; i != range / BITS_PER_MARKER; i++)
    1126              :                 {
    1127           60 :                   count = (tmp_n >> i * BITS_PER_MARKER) & MARKER_MASK;
    1128           60 :                   if (count)
    1129              :                     {
    1130              :                       /* Count should be meaningful not 0xff.  */
    1131           31 :                       if (count <= range / BITS_PER_MARKER)
    1132              :                         {
    1133           31 :                           count = (count + i) * BITS_PER_MARKER % range;
    1134           31 :                           if (!count)
    1135              :                             return NULL;
    1136              :                           break;
    1137              :                         }
    1138              :                       else
    1139              :                         return NULL;
    1140              :                     }
    1141              :                 }
    1142              :             }
    1143         1335 :           tmp_n = tmp_n >> count | tmp_n << (range - count);
    1144         1335 :           if (orig_range == 32)
    1145          366 :             tmp_n &= (1ULL << 32) - 1;
    1146         1335 :           if (!is_bswap_or_nop_p (tmp_n, cmpxchg, cmpnop, mask, bswap))
    1147              :             return NULL;
    1148          122 :           *l_rotate = count / BITS_PER_MARKER * BITS_PER_UNIT;
    1149          122 :           gcc_assert (*bswap);
    1150              :         }
    1151              :       else
    1152         2500 :         return NULL;
    1153              :     }
    1154              : 
    1155              :   /* Useless bit manipulation performed by code.  */
    1156         8074 :   if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
    1157              :     return NULL;
    1158              : 
    1159              :   return ins_stmt;
    1160              : }
    1161              : 
    1162              : const pass_data pass_data_optimize_bswap =
    1163              : {
    1164              :   GIMPLE_PASS, /* type */
    1165              :   "bswap", /* name */
    1166              :   OPTGROUP_NONE, /* optinfo_flags */
    1167              :   TV_NONE, /* tv_id */
    1168              :   PROP_ssa, /* properties_required */
    1169              :   0, /* properties_provided */
    1170              :   0, /* properties_destroyed */
    1171              :   0, /* todo_flags_start */
    1172              :   0, /* todo_flags_finish */
    1173              : };
    1174              : 
    1175              : class pass_optimize_bswap : public gimple_opt_pass
    1176              : {
    1177              : public:
    1178       287872 :   pass_optimize_bswap (gcc::context *ctxt)
    1179       575744 :     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
    1180              :   {}
    1181              : 
    1182              :   /* opt_pass methods: */
    1183      1038027 :   bool gate (function *) final override
    1184              :     {
    1185      1038027 :       return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
    1186              :     }
    1187              : 
    1188              :   unsigned int execute (function *) final override;
    1189              : 
    1190              : }; // class pass_optimize_bswap
    1191              : 
    1192              : /* Helper function for bswap_replace.  Build VIEW_CONVERT_EXPR from
    1193              :    VAL to TYPE.  If VAL has different type size, emit a NOP_EXPR cast
    1194              :    first.  */
    1195              : 
    1196              : static tree
    1197         2250 : bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
    1198              :                     bool before)
    1199              : {
    1200         2250 :   gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
    1201              :               || POINTER_TYPE_P (TREE_TYPE (val)));
    1202         2250 :   if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
    1203              :     {
    1204            1 :       HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
    1205            1 :       if (POINTER_TYPE_P (TREE_TYPE (val)))
    1206              :         {
    1207            0 :           gimple *g
    1208            0 :             = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
    1209              :                                    NOP_EXPR, val);
    1210            0 :           if (before)
    1211            0 :             gsi_insert_before (gsi, g, GSI_SAME_STMT);
    1212              :           else
    1213            0 :             gsi_insert_after (gsi, g, GSI_NEW_STMT);
    1214            0 :           val = gimple_assign_lhs (g);
    1215              :         }
    1216            1 :       tree itype = build_nonstandard_integer_type (prec, 1);
    1217            1 :       gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
    1218            1 :       if (before)
    1219            0 :         gsi_insert_before (gsi, g, GSI_SAME_STMT);
    1220              :       else
    1221            1 :         gsi_insert_after (gsi, g, GSI_NEW_STMT);
    1222            1 :       val = gimple_assign_lhs (g);
    1223              :     }
    1224         2250 :   return build1 (VIEW_CONVERT_EXPR, type, val);
    1225              : }
    1226              : 
    1227              : /* Perform the bswap optimization: replace the expression computed in the rhs
    1228              :    of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
    1229              :    bswap, load or load + bswap expression.
    1230              :    Which of these alternatives replace the rhs is given by N->base_addr (non
    1231              :    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
    1232              :    load to perform are also given in N while the builtin bswap invoke is given
    1233              :    in FNDEL.  Finally, if a load is involved, INS_STMT refers to one of the
    1234              :    load statements involved to construct the rhs in gsi_stmt (GSI) and
    1235              :    N->range gives the size of the rhs expression for maintaining some
    1236              :    statistics.
    1237              : 
    1238              :    Note that if the replacement involve a load and if gsi_stmt (GSI) is
    1239              :    non-NULL, that stmt is moved just after INS_STMT to do the load with the
    1240              :    same VUSE which can lead to gsi_stmt (GSI) changing of basic block.  */
    1241              : 
    1242              : tree
    1243         6581 : bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
    1244              :                tree bswap_type, tree load_type, struct symbolic_number *n,
    1245              :                bool bswap, uint64_t mask, uint64_t l_rotate)
    1246              : {
    1247         6581 :   tree src, tmp, tgt = NULL_TREE;
    1248         6581 :   gimple *bswap_stmt, *mask_stmt = NULL, *rotl_stmt = NULL;
    1249         6581 :   tree_code conv_code = NOP_EXPR;
    1250              : 
    1251         6581 :   gimple *cur_stmt = gsi_stmt (gsi);
    1252         6581 :   src = n->src;
    1253         6581 :   if (cur_stmt)
    1254              :     {
    1255         5747 :       tgt = gimple_assign_lhs (cur_stmt);
    1256         5747 :       if ((gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
    1257         3528 :            || gimple_assign_rhs_code (cur_stmt) == VEC_PACK_TRUNC_EXPR)
    1258         2250 :           && tgt
    1259         7997 :           && VECTOR_TYPE_P (TREE_TYPE (tgt)))
    1260              :         conv_code = VIEW_CONVERT_EXPR;
    1261              :     }
    1262              : 
    1263              :   /* Need to load the value from memory first.  */
    1264         6581 :   if (n->base_addr)
    1265              :     {
    1266         5872 :       gimple_stmt_iterator gsi_ins = gsi;
    1267         5872 :       if (ins_stmt)
    1268         5799 :         gsi_ins = gsi_for_stmt (ins_stmt);
    1269         5872 :       tree addr_expr, addr_tmp, val_expr, val_tmp;
    1270         5872 :       tree load_offset_ptr, aligned_load_type;
    1271         5872 :       gimple *load_stmt;
    1272         5872 :       unsigned align = get_object_alignment (src);
    1273         5872 :       poly_int64 load_offset = 0;
    1274              : 
    1275         5872 :       if (cur_stmt)
    1276              :         {
    1277         5451 :           basic_block ins_bb = gimple_bb (ins_stmt);
    1278         5451 :           basic_block cur_bb = gimple_bb (cur_stmt);
    1279         5451 :           if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
    1280         4117 :             return NULL_TREE;
    1281              : 
    1282              :           /* Move cur_stmt just before one of the load of the original
    1283              :              to ensure it has the same VUSE.  See PR61517 for what could
    1284              :              go wrong.  */
    1285         5451 :           if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
    1286         1015 :             reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
    1287         5451 :           gsi_move_before (&gsi, &gsi_ins);
    1288         5451 :           gsi = gsi_for_stmt (cur_stmt);
    1289              :         }
    1290              :       else
    1291          421 :         gsi = gsi_ins;
    1292              : 
    1293              :       /* Compute address to load from and cast according to the size
    1294              :          of the load.  */
    1295         5872 :       addr_expr = build_fold_addr_expr (src);
    1296         5872 :       if (is_gimple_mem_ref_addr (addr_expr))
    1297          460 :         addr_tmp = unshare_expr (addr_expr);
    1298              :       else
    1299              :         {
    1300         5412 :           addr_tmp = unshare_expr (n->base_addr);
    1301         5412 :           if (!is_gimple_mem_ref_addr (addr_tmp))
    1302            0 :             addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
    1303              :                                                    is_gimple_mem_ref_addr,
    1304              :                                                    NULL_TREE, true,
    1305              :                                                    GSI_SAME_STMT);
    1306         5412 :           load_offset = n->bytepos;
    1307         5412 :           if (n->offset)
    1308              :             {
    1309            0 :               tree off
    1310            0 :                 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
    1311              :                                             true, NULL_TREE, true,
    1312              :                                             GSI_SAME_STMT);
    1313            0 :               gimple *stmt
    1314            0 :                 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
    1315              :                                        POINTER_PLUS_EXPR, addr_tmp, off);
    1316            0 :               gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
    1317            0 :               addr_tmp = gimple_assign_lhs (stmt);
    1318              :             }
    1319              :         }
    1320              : 
    1321              :       /* Perform the load.  */
    1322         5872 :       aligned_load_type = load_type;
    1323         5872 :       if (align < TYPE_ALIGN (load_type))
    1324         5133 :         aligned_load_type = build_aligned_type (load_type, align);
    1325         5872 :       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
    1326         5872 :       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
    1327              :                               load_offset_ptr);
    1328              : 
    1329         5872 :       if (!bswap)
    1330              :         {
    1331         4117 :           if (n->range == 16)
    1332         1184 :             nop_stats.found_16bit++;
    1333         2933 :           else if (n->range == 32)
    1334          625 :             nop_stats.found_32bit++;
    1335              :           else
    1336              :             {
    1337         2308 :               gcc_assert (n->range == 64);
    1338         2308 :               nop_stats.found_64bit++;
    1339              :             }
    1340              : 
    1341              :           /* Convert the result of load if necessary.  */
    1342         4117 :           if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
    1343              :             {
    1344         3586 :               val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
    1345              :                                             "load_dst");
    1346         3586 :               load_stmt = gimple_build_assign (val_tmp, val_expr);
    1347         3586 :               gimple_set_vuse (load_stmt, n->vuse);
    1348         3586 :               gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
    1349         3586 :               if (conv_code == VIEW_CONVERT_EXPR)
    1350         2110 :                 val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
    1351              :                                               true);
    1352         3586 :               gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
    1353         3586 :               update_stmt (cur_stmt);
    1354              :             }
    1355          531 :           else if (cur_stmt)
    1356              :             {
    1357          444 :               gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
    1358          444 :               gimple_set_vuse (cur_stmt, n->vuse);
    1359          444 :               update_stmt (cur_stmt);
    1360              :             }
    1361              :           else
    1362              :             {
    1363           87 :               tgt = make_ssa_name (load_type);
    1364           87 :               cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
    1365           87 :               gimple_set_vuse (cur_stmt, n->vuse);
    1366           87 :               gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
    1367              :             }
    1368              : 
    1369         4117 :           if (dump_file)
    1370              :             {
    1371           31 :               fprintf (dump_file,
    1372              :                        "%d bit load in target endianness found at: ",
    1373           31 :                        (int) n->range);
    1374           31 :               print_gimple_stmt (dump_file, cur_stmt, 0);
    1375              :             }
    1376         4117 :           return tgt;
    1377              :         }
    1378              :       else
    1379              :         {
    1380         1755 :           val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
    1381         1755 :           load_stmt = gimple_build_assign (val_tmp, val_expr);
    1382         1755 :           gimple_set_vuse (load_stmt, n->vuse);
    1383         1755 :           gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
    1384              :         }
    1385         1755 :       src = val_tmp;
    1386              :     }
    1387          709 :   else if (!bswap)
    1388              :     {
    1389          249 :       gimple *g = NULL;
    1390          249 :       if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
    1391              :         {
    1392            2 :           if (!is_gimple_val (src))
    1393              :             return NULL_TREE;
    1394            2 :           if (conv_code == VIEW_CONVERT_EXPR)
    1395            2 :             src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
    1396            2 :           g = gimple_build_assign (tgt, conv_code, src);
    1397              :         }
    1398          247 :       else if (cur_stmt)
    1399            7 :         g = gimple_build_assign (tgt, src);
    1400              :       else
    1401              :         tgt = src;
    1402          249 :       if (n->range == 16)
    1403           67 :         nop_stats.found_16bit++;
    1404          182 :       else if (n->range == 32)
    1405          100 :         nop_stats.found_32bit++;
    1406              :       else
    1407              :         {
    1408           82 :           gcc_assert (n->range == 64);
    1409           82 :           nop_stats.found_64bit++;
    1410              :         }
    1411          249 :       if (dump_file)
    1412              :         {
    1413            1 :           fprintf (dump_file,
    1414              :                    "%d bit reshuffle in target endianness found at: ",
    1415              :                    (int) n->range);
    1416            1 :           if (cur_stmt)
    1417            0 :             print_gimple_stmt (dump_file, cur_stmt, 0);
    1418              :           else
    1419              :             {
    1420            1 :               print_generic_expr (dump_file, tgt, TDF_NONE);
    1421            1 :               fprintf (dump_file, "\n");
    1422              :             }
    1423              :         }
    1424          249 :       if (cur_stmt)
    1425            9 :         gsi_replace (&gsi, g, true);
    1426          249 :       return tgt;
    1427              :     }
    1428          460 :   else if (TREE_CODE (src) == BIT_FIELD_REF)
    1429            0 :     src = TREE_OPERAND (src, 0);
    1430              : 
    1431         2215 :   if (n->range == 16)
    1432          993 :     bswap_stats.found_16bit++;
    1433         1222 :   else if (n->range == 32)
    1434          811 :     bswap_stats.found_32bit++;
    1435              :   else
    1436              :     {
    1437          411 :       gcc_assert (n->range == 64);
    1438          411 :       bswap_stats.found_64bit++;
    1439              :     }
    1440              : 
    1441         2215 :   tmp = src;
    1442              : 
    1443              :   /* Convert the src expression if necessary.  */
    1444         2215 :   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
    1445              :     {
    1446          166 :       gimple *convert_stmt;
    1447              : 
    1448          166 :       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
    1449          166 :       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
    1450          166 :       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
    1451              :     }
    1452              : 
    1453              :   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
    1454              :      are considered as rotation of 2N bit values by N bits is generally not
    1455              :      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
    1456              :      gives 0x03040102 while a bswap for that value is 0x04030201.  */
    1457         2215 :   if (bswap && n->range == 16)
    1458              :     {
    1459          993 :       tree count = build_int_cst (NULL, BITS_PER_UNIT);
    1460          993 :       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
    1461          993 :       bswap_stmt = gimple_build_assign (NULL, src);
    1462          993 :     }
    1463              :   else
    1464         1222 :     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
    1465              : 
    1466         2215 :   if (tgt == NULL_TREE)
    1467          507 :     tgt = make_ssa_name (bswap_type);
    1468         2215 :   tmp = tgt;
    1469              : 
    1470         2215 :   if (mask != ~(uint64_t) 0)
    1471              :     {
    1472          507 :       tree m = build_int_cst (bswap_type, mask);
    1473          507 :       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
    1474          507 :       gimple_set_lhs (bswap_stmt, tmp);
    1475          507 :       mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
    1476          507 :       tmp = tgt;
    1477              :     }
    1478              : 
    1479         2215 :   if (l_rotate)
    1480              :     {
    1481          122 :       tree m = build_int_cst (bswap_type, l_rotate);
    1482          177 :       tmp = make_temp_ssa_name (bswap_type, NULL,
    1483              :                                 mask_stmt ? "bswapmaskdst" : "bswapdst");
    1484          177 :       gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
    1485          122 :       rotl_stmt = gimple_build_assign (tgt, LROTATE_EXPR, tmp, m);
    1486          122 :       tmp = tgt;
    1487              :     }
    1488              : 
    1489              :   /* Convert the result if necessary.  */
    1490         2215 :   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
    1491              :     {
    1492          756 :       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
    1493          756 :       tree atmp = tmp;
    1494          756 :       gimple_stmt_iterator gsi2 = gsi;
    1495          756 :       if (conv_code == VIEW_CONVERT_EXPR)
    1496          138 :         atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
    1497          756 :       gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
    1498          756 :       gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
    1499              :     }
    1500              : 
    1501         4308 :   gimple_set_lhs (rotl_stmt ? rotl_stmt
    1502         2093 :                   : mask_stmt ? mask_stmt : bswap_stmt, tmp);
    1503              : 
    1504         2215 :   if (dump_file)
    1505              :     {
    1506           37 :       fprintf (dump_file, "%d bit bswap implementation found at: ",
    1507           37 :                (int) n->range);
    1508           37 :       if (cur_stmt)
    1509           28 :         print_gimple_stmt (dump_file, cur_stmt, 0);
    1510              :       else
    1511              :         {
    1512            9 :           print_generic_expr (dump_file, tgt, TDF_NONE);
    1513            9 :           fprintf (dump_file, "\n");
    1514              :         }
    1515              :     }
    1516              : 
    1517         2215 :   if (cur_stmt)
    1518              :     {
    1519         1708 :       if (rotl_stmt)
    1520          122 :         gsi_insert_after (&gsi, rotl_stmt, GSI_SAME_STMT);
    1521         1708 :       if (mask_stmt)
    1522          507 :         gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
    1523         1708 :       gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
    1524         1708 :       gsi_remove (&gsi, true);
    1525              :     }
    1526              :   else
    1527              :     {
    1528          507 :       gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
    1529          507 :       if (mask_stmt)
    1530            0 :         gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
    1531          507 :       if (rotl_stmt)
    1532            0 :         gsi_insert_after (&gsi, rotl_stmt, GSI_SAME_STMT);
    1533              :     }
    1534              :   return tgt;
    1535              : }
    1536              : 
    1537              : /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR/VEC_PACK_TRUNC_EXPR
    1538              :    on the rhs using bswap optimizations.  CDI_DOMINATORS need to be
    1539              :    computed on entry.  Return true if it has been optimized and
    1540              :    TODO_update_ssa is needed.  */
    1541              : 
    1542              : static bool
    1543      1029504 : maybe_optimize_vector_constructor (gimple *cur_stmt)
    1544              : {
    1545      1029504 :   tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
    1546      1029504 :   struct symbolic_number n;
    1547      1029504 :   bool bswap;
    1548              : 
    1549      1029504 :   gcc_assert (is_gimple_assign (cur_stmt));
    1550      1029504 :   switch (gimple_assign_rhs_code (cur_stmt))
    1551              :     {
    1552      1029504 :     case CONSTRUCTOR:
    1553      1029504 :     case VEC_PACK_TRUNC_EXPR:
    1554      1029504 :       break;
    1555            0 :     default:
    1556            0 :       gcc_unreachable ();
    1557              :     }
    1558              : 
    1559      1029504 :   tree rhs = gimple_assign_rhs1 (cur_stmt);
    1560      1029504 :   if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
    1561        89378 :       || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
    1562      1115587 :       || gimple_assign_lhs (cur_stmt) == NULL_TREE)
    1563              :     return false;
    1564              : 
    1565        86083 :   HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
    1566        86083 :   switch (sz)
    1567              :     {
    1568          944 :     case 16:
    1569          944 :       load_type = bswap_type = uint16_type_node;
    1570          944 :       break;
    1571          685 :     case 32:
    1572          685 :       if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
    1573         1364 :           && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
    1574              :         {
    1575          679 :           load_type = uint32_type_node;
    1576          679 :           fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
    1577          679 :           bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
    1578              :         }
    1579              :       else
    1580            6 :         return false;
    1581          679 :       break;
    1582        21815 :     case 64:
    1583        21815 :       if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
    1584        42685 :           && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
    1585        17565 :               || (word_mode == SImode
    1586        17565 :                   && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
    1587        17565 :                   && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
    1588              :         {
    1589        20870 :           load_type = uint64_type_node;
    1590        20870 :           fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
    1591        20870 :           bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
    1592              :         }
    1593              :       else
    1594          945 :         return false;
    1595        20870 :       break;
    1596              :     default:
    1597              :       return false;
    1598              :     }
    1599              : 
    1600        22493 :   bool cast64_to_32;
    1601        22493 :   uint64_t mask, l_rotate;
    1602        22493 :   gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
    1603              :                                         &cast64_to_32, &mask, &l_rotate);
    1604        22493 :   if (!ins_stmt
    1605         2213 :       || n.range != (unsigned HOST_WIDE_INT) sz
    1606         2213 :       || cast64_to_32
    1607         2213 :       || mask != ~(uint64_t) 0)
    1608              :     return false;
    1609              : 
    1610         2213 :   if (bswap && !fndecl && n.range != 16)
    1611              :     return false;
    1612              : 
    1613         2213 :   memset (&nop_stats, 0, sizeof (nop_stats));
    1614         2213 :   memset (&bswap_stats, 0, sizeof (bswap_stats));
    1615         2213 :   return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
    1616              :                         bswap_type, load_type, &n, bswap, mask,
    1617         2213 :                         l_rotate) != NULL_TREE;
    1618              : }
    1619              : 
    1620              : /* Find manual byte swap implementations as well as load in a given
    1621              :    endianness.  Byte swaps are turned into a bswap builtin invocation
    1622              :    while endian loads are converted to bswap builtin invocation or
    1623              :    simple load according to the target endianness.  */
    1624              : 
    1625              : unsigned int
    1626       960532 : pass_optimize_bswap::execute (function *fun)
    1627              : {
    1628       960532 :   basic_block bb;
    1629       960532 :   bool bswap32_p, bswap64_p;
    1630       960532 :   bool changed = false;
    1631       960532 :   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
    1632              : 
    1633       960532 :   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
    1634      1823694 :                && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
    1635       960532 :   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
    1636      1823694 :                && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
    1637       120899 :                    || (bswap32_p && word_mode == SImode)));
    1638              : 
    1639              :   /* Determine the argument type of the builtins.  The code later on
    1640              :      assumes that the return and argument type are the same.  */
    1641       960532 :   if (bswap32_p)
    1642              :     {
    1643       863162 :       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
    1644       863162 :       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
    1645              :     }
    1646              : 
    1647       960532 :   if (bswap64_p)
    1648              :     {
    1649       863162 :       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
    1650       863162 :       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
    1651              :     }
    1652              : 
    1653       960532 :   memset (&nop_stats, 0, sizeof (nop_stats));
    1654       960532 :   memset (&bswap_stats, 0, sizeof (bswap_stats));
    1655       960532 :   calculate_dominance_info (CDI_DOMINATORS);
    1656              : 
    1657      9836129 :   FOR_EACH_BB_FN (bb, fun)
    1658              :     {
    1659      8875597 :       gimple_stmt_iterator gsi;
    1660              : 
    1661              :       /* We do a reverse scan for bswap patterns to make sure we get the
    1662              :          widest match. As bswap pattern matching doesn't handle previously
    1663              :          inserted smaller bswap replacements as sub-patterns, the wider
    1664              :          variant wouldn't be detected.  */
    1665     97051868 :       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
    1666              :         {
    1667     79300674 :           gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
    1668     79300674 :           tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
    1669     79300674 :           enum tree_code code;
    1670     79300674 :           struct symbolic_number n;
    1671     79300674 :           bool bswap, cast64_to_32;
    1672     79300674 :           uint64_t mask, l_rotate;
    1673              : 
    1674              :           /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
    1675              :              might be moved to a different basic block by bswap_replace and gsi
    1676              :              must not points to it if that's the case.  Moving the gsi_prev
    1677              :              there make sure that gsi points to the statement previous to
    1678              :              cur_stmt while still making sure that all statements are
    1679              :              considered in this basic block.  */
    1680     79300674 :           gsi_prev (&gsi);
    1681              : 
    1682     79300674 :           if (!is_gimple_assign (cur_stmt))
    1683     79297140 :             continue;
    1684              : 
    1685     20373643 :           code = gimple_assign_rhs_code (cur_stmt);
    1686     20373643 :           switch (code)
    1687              :             {
    1688         6602 :             case LROTATE_EXPR:
    1689         6602 :             case RROTATE_EXPR:
    1690         6602 :               if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
    1691         6602 :                   || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
    1692         4066 :                      % BITS_PER_UNIT)
    1693         5557 :                 continue;
    1694              :               /* Fall through.  */
    1695              :             case BIT_IOR_EXPR:
    1696              :             case BIT_XOR_EXPR:
    1697              :             case PLUS_EXPR:
    1698              :               break;
    1699      1262480 :             case CONSTRUCTOR:
    1700      1262480 :             case VEC_PACK_TRUNC_EXPR:
    1701      1262480 :               {
    1702      1262480 :                 tree rhs = gimple_assign_rhs1 (cur_stmt);
    1703      1262480 :                 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
    1704      1262480 :                     && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
    1705              :                   break;
    1706              :               }
    1707      1257913 :               continue;
    1708     17027669 :             default:
    1709     17027669 :               continue;
    1710     18285582 :             }
    1711              : 
    1712      2082504 :           ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
    1713              :                                         &cast64_to_32, &mask, &l_rotate);
    1714              : 
    1715      2082504 :           if (!ins_stmt)
    1716      2076643 :             continue;
    1717              : 
    1718         5861 :           switch (n.range)
    1719              :             {
    1720         2286 :             case 16:
    1721              :               /* Already in canonical form, nothing to do.  */
    1722         2286 :               if (code == LROTATE_EXPR || code == RROTATE_EXPR)
    1723          625 :                 continue;
    1724         1661 :               load_type = bswap_type = uint16_type_node;
    1725         1661 :               break;
    1726         1278 :             case 32:
    1727         1278 :               load_type = uint32_type_node;
    1728         1278 :               if (bswap32_p)
    1729              :                 {
    1730         1278 :                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
    1731         1278 :                   bswap_type = bswap32_type;
    1732              :                 }
    1733              :               break;
    1734          595 :             case 64:
    1735          595 :               load_type = uint64_type_node;
    1736          595 :               if (bswap64_p)
    1737              :                 {
    1738          595 :                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
    1739          595 :                   bswap_type = bswap64_type;
    1740              :                 }
    1741              :               break;
    1742         1702 :             default:
    1743         1702 :               continue;
    1744              :             }
    1745              : 
    1746         3534 :           if (bswap && !fndecl && n.range != 16)
    1747            0 :             continue;
    1748              : 
    1749         3534 :           if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
    1750              :                              bswap_type, load_type, &n, bswap, mask,
    1751              :                              l_rotate))
    1752         3534 :             changed = true;
    1753              :         }
    1754              :     }
    1755              : 
    1756       960532 :   statistics_counter_event (fun, "16-bit nop implementations found",
    1757              :                             nop_stats.found_16bit);
    1758       960532 :   statistics_counter_event (fun, "32-bit nop implementations found",
    1759              :                             nop_stats.found_32bit);
    1760       960532 :   statistics_counter_event (fun, "64-bit nop implementations found",
    1761              :                             nop_stats.found_64bit);
    1762       960532 :   statistics_counter_event (fun, "16-bit bswap implementations found",
    1763              :                             bswap_stats.found_16bit);
    1764       960532 :   statistics_counter_event (fun, "32-bit bswap implementations found",
    1765              :                             bswap_stats.found_32bit);
    1766       960532 :   statistics_counter_event (fun, "64-bit bswap implementations found",
    1767              :                             bswap_stats.found_64bit);
    1768              : 
    1769       960532 :   return (changed ? TODO_update_ssa : 0);
    1770              : }
    1771              : 
    1772              : } // anon namespace
    1773              : 
    1774              : gimple_opt_pass *
    1775       287872 : make_pass_optimize_bswap (gcc::context *ctxt)
    1776              : {
    1777       287872 :   return new pass_optimize_bswap (ctxt);
    1778              : }
    1779              : 
    1780              : namespace {
    1781              : 
    1782              : /* Struct recording one operand for the store, which is either a constant,
    1783              :    then VAL represents the constant and all the other fields are zero, or
    1784              :    a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
    1785              :    and the other fields also reflect the memory load, or an SSA name, then
    1786              :    VAL represents the SSA name and all the other fields are zero.  */
    1787              : 
    1788              : class store_operand_info
    1789              : {
    1790              : public:
    1791              :   tree val;
    1792              :   tree base_addr;
    1793              :   poly_uint64 bitsize;
    1794              :   poly_uint64 bitpos;
    1795              :   poly_uint64 bitregion_start;
    1796              :   poly_uint64 bitregion_end;
    1797              :   gimple *stmt;
    1798              :   bool bit_not_p;
    1799              :   store_operand_info ();
    1800              : };
    1801              : 
    1802     10141578 : store_operand_info::store_operand_info ()
    1803     10141578 :   : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
    1804     10141578 :     bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
    1805              : {
    1806            0 : }
    1807              : 
    1808              : /* Struct recording the information about a single store of an immediate
    1809              :    to memory.  These are created in the first phase and coalesced into
    1810              :    merged_store_group objects in the second phase.  */
    1811              : 
    1812              : class store_immediate_info
    1813              : {
    1814              : public:
    1815              :   unsigned HOST_WIDE_INT bitsize;
    1816              :   unsigned HOST_WIDE_INT bitpos;
    1817              :   unsigned HOST_WIDE_INT bitregion_start;
    1818              :   /* This is one past the last bit of the bit region.  */
    1819              :   unsigned HOST_WIDE_INT bitregion_end;
    1820              :   gimple *stmt;
    1821              :   unsigned int order;
    1822              :   /* INTEGER_CST for constant store, STRING_CST for string store,
    1823              :      MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
    1824              :      BIT_INSERT_EXPR for bit insertion.
    1825              :      LROTATE_EXPR if it can be only bswap optimized and
    1826              :      ops are not really meaningful.
    1827              :      NOP_EXPR if bswap optimization detected identity, ops
    1828              :      are not meaningful.  */
    1829              :   enum tree_code rhs_code;
    1830              :   /* Two fields for bswap optimization purposes.  */
    1831              :   struct symbolic_number n;
    1832              :   gimple *ins_stmt;
    1833              :   /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing.  */
    1834              :   bool bit_not_p;
    1835              :   /* True if ops have been swapped and thus ops[1] represents
    1836              :      rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2.  */
    1837              :   bool ops_swapped_p;
    1838              :   /* The index number of the landing pad, or 0 if there is none.  */
    1839              :   int lp_nr;
    1840              :   /* Operands.  For BIT_*_EXPR rhs_code both operands are used, otherwise
    1841              :      just the first one.  */
    1842              :   store_operand_info ops[2];
    1843              :   store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
    1844              :                         unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
    1845              :                         gimple *, unsigned int, enum tree_code,
    1846              :                         struct symbolic_number &, gimple *, bool, int,
    1847              :                         const store_operand_info &,
    1848              :                         const store_operand_info &);
    1849              : };
    1850              : 
    1851      3050692 : store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
    1852              :                                             unsigned HOST_WIDE_INT bp,
    1853              :                                             unsigned HOST_WIDE_INT brs,
    1854              :                                             unsigned HOST_WIDE_INT bre,
    1855              :                                             gimple *st,
    1856              :                                             unsigned int ord,
    1857              :                                             enum tree_code rhscode,
    1858              :                                             struct symbolic_number &nr,
    1859              :                                             gimple *ins_stmtp,
    1860              :                                             bool bitnotp,
    1861              :                                             int nr2,
    1862              :                                             const store_operand_info &op0r,
    1863      3050692 :                                             const store_operand_info &op1r)
    1864      3050692 :   : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
    1865      3050692 :     stmt (st), order (ord), rhs_code (rhscode), n (nr),
    1866      3050692 :     ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
    1867      3050692 :     lp_nr (nr2), ops { op0r, op1r }
    1868              : {
    1869            0 : }
    1870              : 
    1871              : /* Struct representing a group of stores to contiguous memory locations.
    1872              :    These are produced by the second phase (coalescing) and consumed in the
    1873              :    third phase that outputs the widened stores.  */
    1874              : 
    1875              : class merged_store_group
    1876              : {
    1877              : public:
    1878              :   unsigned HOST_WIDE_INT start;
    1879              :   unsigned HOST_WIDE_INT width;
    1880              :   unsigned HOST_WIDE_INT bitregion_start;
    1881              :   unsigned HOST_WIDE_INT bitregion_end;
    1882              :   /* The size of the allocated memory for val and mask.  */
    1883              :   unsigned HOST_WIDE_INT buf_size;
    1884              :   unsigned HOST_WIDE_INT align_base;
    1885              :   poly_uint64 load_align_base[2];
    1886              : 
    1887              :   unsigned int align;
    1888              :   unsigned int load_align[2];
    1889              :   unsigned int first_order;
    1890              :   unsigned int last_order;
    1891              :   bool bit_insertion;
    1892              :   bool string_concatenation;
    1893              :   bool only_constants;
    1894              :   bool consecutive;
    1895              :   unsigned int first_nonmergeable_order;
    1896              :   int lp_nr;
    1897              : 
    1898              :   auto_vec<store_immediate_info *> stores;
    1899              :   /* We record the first and last original statements in the sequence because
    1900              :      we'll need their vuse/vdef and replacement position.  It's easier to keep
    1901              :      track of them separately as 'stores' is reordered by apply_stores.  */
    1902              :   gimple *last_stmt;
    1903              :   gimple *first_stmt;
    1904              :   unsigned char *val;
    1905              :   unsigned char *mask;
    1906              : 
    1907              :   merged_store_group (store_immediate_info *);
    1908              :   ~merged_store_group ();
    1909              :   bool can_be_merged_into (store_immediate_info *);
    1910              :   void merge_into (store_immediate_info *);
    1911              :   void merge_overlapping (store_immediate_info *);
    1912              :   bool apply_stores ();
    1913              : private:
    1914              :   void do_merge (store_immediate_info *);
    1915              : };
    1916              : 
    1917              : /* Debug helper.  Dump LEN elements of byte array PTR to FD in hex.  */
    1918              : 
    1919              : static void
    1920          444 : dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
    1921              : {
    1922          444 :   if (!fd)
    1923              :     return;
    1924              : 
    1925        22044 :   for (unsigned int i = 0; i < len; i++)
    1926        21600 :     fprintf (fd, "%02x ", ptr[i]);
    1927          444 :   fprintf (fd, "\n");
    1928              : }
    1929              : 
    1930              : /* Clear out LEN bits starting from bit START in the byte array
    1931              :    PTR.  This clears the bits to the *right* from START.
    1932              :    START must be within [0, BITS_PER_UNIT) and counts starting from
    1933              :    the least significant bit.  */
    1934              : 
    1935              : static void
    1936           12 : clear_bit_region_be (unsigned char *ptr, unsigned int start,
    1937              :                      unsigned int len)
    1938              : {
    1939           20 :   if (len == 0)
    1940              :     return;
    1941              :   /* Clear len bits to the right of start.  */
    1942           20 :   else if (len <= start + 1)
    1943              :     {
    1944            8 :       unsigned char mask = (~(~0U << len));
    1945            8 :       mask = mask << (start + 1U - len);
    1946            8 :       ptr[0] &= ~mask;
    1947              :     }
    1948           12 :   else if (start != BITS_PER_UNIT - 1)
    1949              :     {
    1950            4 :       clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
    1951            4 :       clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
    1952            4 :                            len - (start % BITS_PER_UNIT) - 1);
    1953              :     }
    1954            8 :   else if (start == BITS_PER_UNIT - 1
    1955              :            && len > BITS_PER_UNIT)
    1956              :     {
    1957            8 :       unsigned int nbytes = len / BITS_PER_UNIT;
    1958            8 :       memset (ptr, 0, nbytes);
    1959            8 :       if (len % BITS_PER_UNIT != 0)
    1960            4 :         clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
    1961              :                              len % BITS_PER_UNIT);
    1962              :     }
    1963              :   else
    1964            0 :     gcc_unreachable ();
    1965              : }
    1966              : 
    1967              : /* In the byte array PTR clear the bit region starting at bit
    1968              :    START and is LEN bits wide.
    1969              :    For regions spanning multiple bytes do this recursively until we reach
    1970              :    zero LEN or a region contained within a single byte.  */
    1971              : 
    1972              : static void
    1973      1292473 : clear_bit_region (unsigned char *ptr, unsigned int start,
    1974              :                   unsigned int len)
    1975              : {
    1976              :   /* Degenerate base case.  */
    1977      1388425 :   if (len == 0)
    1978              :     return;
    1979      1388425 :   else if (start >= BITS_PER_UNIT)
    1980        46352 :     clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
    1981              :   /* Second base case.  */
    1982      1342073 :   else if ((start + len) <= BITS_PER_UNIT)
    1983              :     {
    1984       218232 :       unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
    1985       218232 :       mask >>= BITS_PER_UNIT - (start + len);
    1986              : 
    1987       218232 :       ptr[0] &= ~mask;
    1988              : 
    1989       218232 :       return;
    1990              :     }
    1991              :   /* Clear most significant bits in a byte and proceed with the next byte.  */
    1992      1123841 :   else if (start != 0)
    1993              :     {
    1994        46732 :       clear_bit_region (ptr, start, BITS_PER_UNIT - start);
    1995        46732 :       clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
    1996              :     }
    1997              :   /* Whole bytes need to be cleared.  */
    1998      1077109 :   else if (start == 0 && len > BITS_PER_UNIT)
    1999              :     {
    2000      1077109 :       unsigned int nbytes = len / BITS_PER_UNIT;
    2001              :       /* We could recurse on each byte but we clear whole bytes, so a simple
    2002              :          memset will do.  */
    2003      1077109 :       memset (ptr, '\0', nbytes);
    2004              :       /* Clear the remaining sub-byte region if there is one.  */
    2005      1077109 :       if (len % BITS_PER_UNIT != 0)
    2006         2868 :         clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
    2007              :     }
    2008              :   else
    2009            0 :     gcc_unreachable ();
    2010              : }
    2011              : 
    2012              : /* Write BITLEN bits of EXPR to the byte array PTR at
    2013              :    bit position BITPOS.  PTR should contain TOTAL_BYTES elements.
    2014              :    Return true if the operation succeeded.  */
    2015              : 
    2016              : static bool
    2017      1000999 : encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
    2018              :                        unsigned int total_bytes)
    2019              : {
    2020      1000999 :   unsigned int first_byte = bitpos / BITS_PER_UNIT;
    2021      1000999 :   bool empty_ctor_p
    2022      1000999 :     = (TREE_CODE (expr) == CONSTRUCTOR
    2023       217400 :        && CONSTRUCTOR_NELTS (expr) == 0
    2024       217400 :        && TYPE_SIZE_UNIT (TREE_TYPE (expr))
    2025      1218399 :        && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
    2026      1000999 :   bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
    2027       972904 :                         || (bitpos % BITS_PER_UNIT)
    2028      1973356 :                         || (!int_mode_for_size (bitlen, 0).exists ()
    2029        60576 :                             && !empty_ctor_p));
    2030              : 
    2031       972065 :   if (!sub_byte_op_p)
    2032              :     {
    2033       972065 :       if (first_byte >= total_bytes)
    2034              :         return false;
    2035       972065 :       total_bytes -= first_byte;
    2036       972065 :       if (empty_ctor_p)
    2037              :         {
    2038       217400 :           unsigned HOST_WIDE_INT rhs_bytes
    2039       217400 :             = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
    2040       217400 :           if (rhs_bytes > total_bytes)
    2041              :             return false;
    2042       217400 :           memset (ptr + first_byte, '\0', rhs_bytes);
    2043       217400 :           return true;
    2044              :         }
    2045       754665 :       return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
    2046              :     }
    2047              : 
    2048              :   /* LITTLE-ENDIAN
    2049              :      We are writing a non byte-sized quantity or at a position that is not
    2050              :      at a byte boundary.
    2051              :      |--------|--------|--------| ptr + first_byte
    2052              :            ^              ^
    2053              :            xxx xxxxxxxx xxx< bp>
    2054              :            |______EXPR____|
    2055              : 
    2056              :      First native_encode_expr EXPR into a temporary buffer and shift each
    2057              :      byte in the buffer by 'bp' (carrying the bits over as necessary).
    2058              :      |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
    2059              :                                               <------bitlen---->< bp>
    2060              :     Then we clear the destination bits:
    2061              :     |---00000|00000000|000-----| ptr + first_byte
    2062              :         <-------bitlen--->< bp>
    2063              : 
    2064              :     Finally we ORR the bytes of the shifted EXPR into the cleared region:
    2065              :     |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
    2066              : 
    2067              :    BIG-ENDIAN
    2068              :    We are writing a non byte-sized quantity or at a position that is not
    2069              :    at a byte boundary.
    2070              :      ptr + first_byte |--------|--------|--------|
    2071              :                             ^              ^
    2072              :                        <bp >xxx xxxxxxxx xxx
    2073              :                             |_____EXPR_____|
    2074              : 
    2075              :      First native_encode_expr EXPR into a temporary buffer and shift each
    2076              :      byte in the buffer to the right by (carrying the bits over as necessary).
    2077              :      We shift by as much as needed to align the most significant bit of EXPR
    2078              :      with bitpos:
    2079              :      |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
    2080              :         <---bitlen---->          <bp ><-----bitlen----->
    2081              :     Then we clear the destination bits:
    2082              :     ptr + first_byte |-----000||00000000||00000---|
    2083              :                       <bp ><-------bitlen----->
    2084              : 
    2085              :     Finally we ORR the bytes of the shifted EXPR into the cleared region:
    2086              :     ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
    2087              :     The awkwardness comes from the fact that bitpos is counted from the
    2088              :     most significant bit of a byte.  */
    2089              : 
    2090              :   /* We must be dealing with fixed-size data at this point, since the
    2091              :      total size is also fixed.  */
    2092        28934 :   unsigned int byte_size;
    2093        28934 :   if (empty_ctor_p)
    2094              :     {
    2095            0 :       unsigned HOST_WIDE_INT rhs_bytes
    2096            0 :         = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
    2097            0 :       if (rhs_bytes > total_bytes)
    2098              :         return false;
    2099            0 :       byte_size = rhs_bytes;
    2100              :     }
    2101              :   else
    2102              :     {
    2103        28934 :       fixed_size_mode mode
    2104        28934 :         = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
    2105        28934 :       byte_size
    2106        28934 :         = mode == BLKmode
    2107          198 :         ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
    2108        28736 :         : GET_MODE_SIZE (mode);
    2109              :     }
    2110              :   /* Allocate an extra byte so that we have space to shift into.  */
    2111        28934 :   byte_size++;
    2112        28934 :   unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
    2113        28934 :   memset (tmpbuf, '\0', byte_size);
    2114              :   /* The store detection code should only have allowed constants that are
    2115              :      accepted by native_encode_expr or empty ctors.  */
    2116        28934 :   if (!empty_ctor_p
    2117        28934 :       && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
    2118            0 :     gcc_unreachable ();
    2119              : 
    2120              :   /* The native_encode_expr machinery uses TYPE_MODE to determine how many
    2121              :      bytes to write.  This means it can write more than
    2122              :      ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
    2123              :      write 8 bytes for a bitlen of 40).  Skip the bytes that are not within
    2124              :      bitlen and zero out the bits that are not relevant as well (that may
    2125              :      contain a sign bit due to sign-extension).  */
    2126        28934 :   unsigned int padding
    2127        28934 :     = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
    2128              :   /* On big-endian the padding is at the 'front' so just skip the initial
    2129              :      bytes.  */
    2130        28934 :   if (BYTES_BIG_ENDIAN)
    2131              :     tmpbuf += padding;
    2132              : 
    2133        28934 :   byte_size -= padding;
    2134              : 
    2135        28934 :   if (bitlen % BITS_PER_UNIT != 0)
    2136              :     {
    2137        28095 :       if (BYTES_BIG_ENDIAN)
    2138              :         clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
    2139              :                              BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
    2140              :       else
    2141        28095 :         clear_bit_region (tmpbuf, bitlen,
    2142        28095 :                           byte_size * BITS_PER_UNIT - bitlen);
    2143              :     }
    2144              :   /* Left shifting relies on the last byte being clear if bitlen is
    2145              :      a multiple of BITS_PER_UNIT, which might not be clear if
    2146              :      there are padding bytes.  */
    2147          839 :   else if (!BYTES_BIG_ENDIAN)
    2148          839 :     tmpbuf[byte_size - 1] = '\0';
    2149              : 
    2150              :   /* Clear the bit region in PTR where the bits from TMPBUF will be
    2151              :      inserted into.  */
    2152        28934 :   if (BYTES_BIG_ENDIAN)
    2153              :     clear_bit_region_be (ptr + first_byte,
    2154              :                          BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
    2155              :   else
    2156        28934 :     clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
    2157              : 
    2158        28934 :   int shift_amnt;
    2159        28934 :   int bitlen_mod = bitlen % BITS_PER_UNIT;
    2160        28934 :   int bitpos_mod = bitpos % BITS_PER_UNIT;
    2161              : 
    2162        28934 :   bool skip_byte = false;
    2163        28934 :   if (BYTES_BIG_ENDIAN)
    2164              :     {
    2165              :       /* BITPOS and BITLEN are exactly aligned and no shifting
    2166              :          is necessary.  */
    2167              :       if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
    2168              :           || (bitpos_mod == 0 && bitlen_mod == 0))
    2169              :         shift_amnt = 0;
    2170              :       /* |. . . . . . . .|
    2171              :           <bp >   <blen >.
    2172              :          We always shift right for BYTES_BIG_ENDIAN so shift the beginning
    2173              :          of the value until it aligns with 'bp' in the next byte over.  */
    2174              :       else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
    2175              :         {
    2176              :           shift_amnt = bitlen_mod + bitpos_mod;
    2177              :           skip_byte = bitlen_mod != 0;
    2178              :         }
    2179              :       /* |. . . . . . . .|
    2180              :           <----bp--->
    2181              :             <---blen---->.
    2182              :          Shift the value right within the same byte so it aligns with 'bp'.  */
    2183              :       else
    2184              :         shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
    2185              :     }
    2186              :   else
    2187        28934 :     shift_amnt = bitpos % BITS_PER_UNIT;
    2188              : 
    2189              :   /* Create the shifted version of EXPR.  */
    2190        28934 :   if (!BYTES_BIG_ENDIAN)
    2191              :     {
    2192        28934 :       shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
    2193        28934 :       if (shift_amnt == 0)
    2194        12926 :         byte_size--;
    2195              :     }
    2196              :   else
    2197              :     {
    2198              :       gcc_assert (BYTES_BIG_ENDIAN);
    2199              :       shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
    2200              :       /* If shifting right forced us to move into the next byte skip the now
    2201              :          empty byte.  */
    2202              :       if (skip_byte)
    2203              :         {
    2204              :           tmpbuf++;
    2205              :           byte_size--;
    2206              :         }
    2207              :     }
    2208              : 
    2209              :   /* Insert the bits from TMPBUF.  */
    2210       122229 :   for (unsigned int i = 0; i < byte_size; i++)
    2211        93295 :     ptr[first_byte + i] |= tmpbuf[i];
    2212              : 
    2213              :   return true;
    2214              : }
    2215              : 
    2216              : /* Sorting function for store_immediate_info objects.
    2217              :    Sorts them by bitposition.  */
    2218              : 
    2219              : static int
    2220     17711964 : sort_by_bitpos (const void *x, const void *y)
    2221              : {
    2222     17711964 :   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
    2223     17711964 :   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
    2224              : 
    2225     17711964 :   if ((*tmp)->bitpos < (*tmp2)->bitpos)
    2226              :     return -1;
    2227      9079027 :   else if ((*tmp)->bitpos > (*tmp2)->bitpos)
    2228              :     return 1;
    2229              :   else
    2230              :     /* If they are the same let's use the order which is guaranteed to
    2231              :        be different.  */
    2232       737783 :     return (*tmp)->order - (*tmp2)->order;
    2233              : }
    2234              : 
    2235              : /* Sorting function for store_immediate_info objects.
    2236              :    Sorts them by the order field.  */
    2237              : 
    2238              : static int
    2239      6744143 : sort_by_order (const void *x, const void *y)
    2240              : {
    2241      6744143 :   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
    2242      6744143 :   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
    2243              : 
    2244      6744143 :   if ((*tmp)->order < (*tmp2)->order)
    2245              :     return -1;
    2246      3264066 :   else if ((*tmp)->order > (*tmp2)->order)
    2247              :     return 1;
    2248              : 
    2249            0 :   gcc_unreachable ();
    2250              : }
    2251              : 
    2252              : /* Initialize a merged_store_group object from a store_immediate_info
    2253              :    object.  */
    2254              : 
    2255       837497 : merged_store_group::merged_store_group (store_immediate_info *info)
    2256              : {
    2257       837497 :   start = info->bitpos;
    2258       837497 :   width = info->bitsize;
    2259       837497 :   bitregion_start = info->bitregion_start;
    2260       837497 :   bitregion_end = info->bitregion_end;
    2261              :   /* VAL has memory allocated for it in apply_stores once the group
    2262              :      width has been finalized.  */
    2263       837497 :   val = NULL;
    2264       837497 :   mask = NULL;
    2265       837497 :   bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
    2266       837497 :   string_concatenation = info->rhs_code == STRING_CST;
    2267       837497 :   only_constants = info->rhs_code == INTEGER_CST;
    2268       837497 :   consecutive = true;
    2269       837497 :   first_nonmergeable_order = ~0U;
    2270       837497 :   lp_nr = info->lp_nr;
    2271       837497 :   unsigned HOST_WIDE_INT align_bitpos = 0;
    2272       837497 :   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
    2273              :                           &align, &align_bitpos);
    2274       837497 :   align_base = start - align_bitpos;
    2275      2512491 :   for (int i = 0; i < 2; ++i)
    2276              :     {
    2277      1674994 :       store_operand_info &op = info->ops[i];
    2278      1674994 :       if (op.base_addr == NULL_TREE)
    2279              :         {
    2280      1486614 :           load_align[i] = 0;
    2281      1486614 :           load_align_base[i] = 0;
    2282              :         }
    2283              :       else
    2284              :         {
    2285       188380 :           get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
    2286       188380 :           load_align_base[i] = op.bitpos - align_bitpos;
    2287              :         }
    2288              :     }
    2289       837497 :   stores.create (1);
    2290       837497 :   stores.safe_push (info);
    2291       837497 :   last_stmt = info->stmt;
    2292       837497 :   last_order = info->order;
    2293       837497 :   first_stmt = last_stmt;
    2294       837497 :   first_order = last_order;
    2295       837497 :   buf_size = 0;
    2296       837497 : }
    2297              : 
    2298       837497 : merged_store_group::~merged_store_group ()
    2299              : {
    2300       837497 :   if (val)
    2301       399120 :     XDELETEVEC (val);
    2302       837497 : }
    2303              : 
    2304              : /* Return true if the store described by INFO can be merged into the group.  */
    2305              : 
    2306              : bool
    2307       691671 : merged_store_group::can_be_merged_into (store_immediate_info *info)
    2308              : {
    2309              :   /* Do not merge bswap patterns.  */
    2310       691671 :   if (info->rhs_code == LROTATE_EXPR)
    2311              :     return false;
    2312              : 
    2313       678105 :   if (info->lp_nr != lp_nr)
    2314              :     return false;
    2315              : 
    2316              :   /* The canonical case.  */
    2317       678105 :   if (info->rhs_code == stores[0]->rhs_code)
    2318              :     return true;
    2319              : 
    2320              :   /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST.  */
    2321        57160 :   if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
    2322         2243 :     return !string_concatenation;
    2323              : 
    2324        54917 :   if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
    2325         6011 :     return !string_concatenation;
    2326              : 
    2327              :   /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
    2328              :      only for small regions since this can generate a lot of instructions.  */
    2329        48906 :   if (info->rhs_code == MEM_REF
    2330        21517 :       && (stores[0]->rhs_code == INTEGER_CST
    2331         3083 :           || stores[0]->rhs_code == BIT_INSERT_EXPR)
    2332        18906 :       && info->bitregion_start == stores[0]->bitregion_start
    2333          763 :       && info->bitregion_end == stores[0]->bitregion_end
    2334        50432 :       && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
    2335          761 :     return !string_concatenation;
    2336              : 
    2337        48145 :   if (stores[0]->rhs_code == MEM_REF
    2338        21693 :       && (info->rhs_code == INTEGER_CST
    2339        21693 :           || info->rhs_code == BIT_INSERT_EXPR)
    2340        21414 :       && info->bitregion_start == stores[0]->bitregion_start
    2341            4 :       && info->bitregion_end == stores[0]->bitregion_end
    2342        48149 :       && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
    2343            2 :     return !string_concatenation;
    2344              : 
    2345              :   /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR.  */
    2346        48143 :   if (info->rhs_code == STRING_CST
    2347          128 :       && stores[0]->rhs_code == INTEGER_CST
    2348        48271 :       && stores[0]->bitsize == CHAR_BIT)
    2349           66 :     return !bit_insertion;
    2350              : 
    2351        48077 :   if (stores[0]->rhs_code == STRING_CST
    2352          153 :       && info->rhs_code == INTEGER_CST
    2353        48230 :       && info->bitsize == CHAR_BIT)
    2354           14 :     return !bit_insertion;
    2355              : 
    2356              :   return false;
    2357              : }
    2358              : 
    2359              : /* Helper method for merge_into and merge_overlapping to do
    2360              :    the common part.  */
    2361              : 
    2362              : void
    2363       791741 : merged_store_group::do_merge (store_immediate_info *info)
    2364              : {
    2365       791741 :   bitregion_start = MIN (bitregion_start, info->bitregion_start);
    2366       791741 :   bitregion_end = MAX (bitregion_end, info->bitregion_end);
    2367              : 
    2368       791741 :   unsigned int this_align;
    2369       791741 :   unsigned HOST_WIDE_INT align_bitpos = 0;
    2370       791741 :   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
    2371              :                           &this_align, &align_bitpos);
    2372       791741 :   if (this_align > align)
    2373              :     {
    2374         1097 :       align = this_align;
    2375         1097 :       align_base = info->bitpos - align_bitpos;
    2376              :     }
    2377      2375223 :   for (int i = 0; i < 2; ++i)
    2378              :     {
    2379      1583482 :       store_operand_info &op = info->ops[i];
    2380      1583482 :       if (!op.base_addr)
    2381      1480783 :         continue;
    2382              : 
    2383       102699 :       get_object_alignment_1 (op.val, &this_align, &align_bitpos);
    2384       102699 :       if (this_align > load_align[i])
    2385              :         {
    2386          130 :           load_align[i] = this_align;
    2387          130 :           load_align_base[i] = op.bitpos - align_bitpos;
    2388              :         }
    2389              :     }
    2390              : 
    2391       791741 :   gimple *stmt = info->stmt;
    2392       791741 :   stores.safe_push (info);
    2393       791741 :   if (info->order > last_order)
    2394              :     {
    2395       532735 :       last_order = info->order;
    2396       532735 :       last_stmt = stmt;
    2397              :     }
    2398       259006 :   else if (info->order < first_order)
    2399              :     {
    2400       110172 :       first_order = info->order;
    2401       110172 :       first_stmt = stmt;
    2402              :     }
    2403              : 
    2404       791741 :   if (info->bitpos != start + width)
    2405       187344 :     consecutive = false;
    2406              : 
    2407              :   /* We need to use extraction if there is any bit-field.  */
    2408       791741 :   if (info->rhs_code == BIT_INSERT_EXPR)
    2409              :     {
    2410         9452 :       bit_insertion = true;
    2411         9452 :       gcc_assert (!string_concatenation);
    2412              :     }
    2413              : 
    2414              :   /* We want to use concatenation if there is any string.  */
    2415       791741 :   if (info->rhs_code == STRING_CST)
    2416              :     {
    2417          460 :       string_concatenation = true;
    2418          460 :       gcc_assert (!bit_insertion);
    2419              :     }
    2420              : 
    2421              :   /* But we cannot use it if we don't have consecutive stores.  */
    2422       791741 :   if (!consecutive)
    2423       298633 :     string_concatenation = false;
    2424              : 
    2425       791741 :   if (info->rhs_code != INTEGER_CST)
    2426       113171 :     only_constants = false;
    2427       791741 : }
    2428              : 
    2429              : /* Merge a store recorded by INFO into this merged store.
    2430              :    The store is not overlapping with the existing recorded
    2431              :    stores.  */
    2432              : 
    2433              : void
    2434       119483 : merged_store_group::merge_into (store_immediate_info *info)
    2435              : {
    2436       119483 :   do_merge (info);
    2437              : 
    2438              :   /* Make sure we're inserting in the position we think we're inserting.  */
    2439       119483 :   gcc_assert (info->bitpos >= start + width
    2440              :               && info->bitregion_start <= bitregion_end);
    2441              : 
    2442       119483 :   width = info->bitpos + info->bitsize - start;
    2443       119483 : }
    2444              : 
    2445              : /* Merge a store described by INFO into this merged store.
    2446              :    INFO overlaps in some way with the current store (i.e. it's not contiguous
    2447              :    which is handled by merged_store_group::merge_into).  */
    2448              : 
    2449              : void
    2450       672258 : merged_store_group::merge_overlapping (store_immediate_info *info)
    2451              : {
    2452       672258 :   do_merge (info);
    2453              : 
    2454              :   /* If the store extends the size of the group, extend the width.  */
    2455       672258 :   if (info->bitpos + info->bitsize > start + width)
    2456       491609 :     width = info->bitpos + info->bitsize - start;
    2457       672258 : }
    2458              : 
    2459              : /* Go through all the recorded stores in this group in program order and
    2460              :    apply their values to the VAL byte array to create the final merged
    2461              :    value.  Return true if the operation succeeded.  */
    2462              : 
    2463              : bool
    2464       836663 : merged_store_group::apply_stores ()
    2465              : {
    2466       836663 :   store_immediate_info *info;
    2467       836663 :   unsigned int i;
    2468              : 
    2469              :   /* Make sure we have more than one store in the group, otherwise we cannot
    2470              :      merge anything.  */
    2471       836663 :   if (bitregion_start % BITS_PER_UNIT != 0
    2472       836663 :       || bitregion_end % BITS_PER_UNIT != 0
    2473      1673326 :       || stores.length () == 1)
    2474              :     return false;
    2475              : 
    2476       399120 :   buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
    2477              : 
    2478              :   /* Really do string concatenation for large strings only.  */
    2479       399120 :   if (buf_size <= MOVE_MAX)
    2480       198357 :     string_concatenation = false;
    2481              : 
    2482              :   /* String concatenation only works for byte aligned start and end.  */
    2483       399120 :   if (start % BITS_PER_UNIT != 0 || width % BITS_PER_UNIT != 0)
    2484         4846 :     string_concatenation = false;
    2485              : 
    2486              :   /* Create a power-of-2-sized buffer for native_encode_expr.  */
    2487       399120 :   if (!string_concatenation)
    2488       798106 :     buf_size = 1 << ceil_log2 (buf_size);
    2489              : 
    2490       399120 :   val = XNEWVEC (unsigned char, 2 * buf_size);
    2491       399120 :   mask = val + buf_size;
    2492       399120 :   memset (val, 0, buf_size);
    2493       399120 :   memset (mask, ~0U, buf_size);
    2494              : 
    2495       399120 :   stores.qsort (sort_by_order);
    2496              : 
    2497      1587771 :   FOR_EACH_VEC_ELT (stores, i, info)
    2498              :     {
    2499      1188651 :       unsigned int pos_in_buffer = info->bitpos - bitregion_start;
    2500      1188651 :       tree cst;
    2501      1188651 :       if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
    2502              :         cst = info->ops[0].val;
    2503       174580 :       else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
    2504              :         cst = info->ops[1].val;
    2505              :       else
    2506              :         cst = NULL_TREE;
    2507      1014374 :       bool ret = true;
    2508      1014374 :       if (cst && info->rhs_code != BIT_INSERT_EXPR)
    2509      1000999 :         ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
    2510      1000999 :                                      buf_size);
    2511      1188651 :       unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
    2512      1188651 :       if (BYTES_BIG_ENDIAN)
    2513              :         clear_bit_region_be (m, (BITS_PER_UNIT - 1
    2514              :                                  - (pos_in_buffer % BITS_PER_UNIT)),
    2515              :                              info->bitsize);
    2516              :       else
    2517      1188651 :         clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
    2518      1188651 :       if (cst && dump_file && (dump_flags & TDF_DETAILS))
    2519              :         {
    2520          222 :           if (ret)
    2521              :             {
    2522          222 :               fputs ("After writing ", dump_file);
    2523          222 :               print_generic_expr (dump_file, cst, TDF_NONE);
    2524          222 :               fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
    2525              :                        " at position %d\n", info->bitsize, pos_in_buffer);
    2526          222 :               fputs ("  the merged value contains ", dump_file);
    2527          222 :               dump_char_array (dump_file, val, buf_size);
    2528          222 :               fputs ("  the merged mask contains  ", dump_file);
    2529          222 :               dump_char_array (dump_file, mask, buf_size);
    2530          222 :               if (bit_insertion)
    2531            0 :                 fputs ("  bit insertion is required\n", dump_file);
    2532          222 :               if (string_concatenation)
    2533            0 :                 fputs ("  string concatenation is required\n", dump_file);
    2534              :             }
    2535              :           else
    2536            0 :             fprintf (dump_file, "Failed to merge stores\n");
    2537              :         }
    2538      1188651 :       if (!ret)
    2539              :         return false;
    2540              :     }
    2541       399120 :   stores.qsort (sort_by_bitpos);
    2542              :   return true;
    2543              : }
    2544              : 
    2545              : /* Structure describing the store chain.  */
    2546              : 
    2547              : class imm_store_chain_info
    2548              : {
    2549              : public:
    2550              :   /* Doubly-linked list that imposes an order on chain processing.
    2551              :      PNXP (prev's next pointer) points to the head of a list, or to
    2552              :      the next field in the previous chain in the list.
    2553              :      See pass_store_merging::m_stores_head for more rationale.  */
    2554              :   imm_store_chain_info *next, **pnxp;
    2555              :   tree base_addr;
    2556              :   auto_vec<store_immediate_info *> m_store_info;
    2557              :   auto_vec<merged_store_group *> m_merged_store_groups;
    2558              : 
    2559      1915352 :   imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
    2560      1915352 :   : next (inspt), pnxp (&inspt), base_addr (b_a)
    2561              :   {
    2562      1915352 :     inspt = this;
    2563      1915352 :     if (next)
    2564              :       {
    2565       637595 :         gcc_checking_assert (pnxp == next->pnxp);
    2566       637595 :         next->pnxp = &next;
    2567              :       }
    2568      1915352 :   }
    2569      1915352 :   ~imm_store_chain_info ()
    2570              :   {
    2571      1915352 :     *pnxp = next;
    2572      1915352 :     if (next)
    2573              :       {
    2574       612807 :         gcc_checking_assert (&next == next->pnxp);
    2575       612807 :         next->pnxp = pnxp;
    2576              :       }
    2577      1915352 :   }
    2578              :   bool terminate_and_process_chain ();
    2579              :   bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
    2580              :                            unsigned int);
    2581              :   bool coalesce_immediate_stores ();
    2582              :   bool output_merged_store (merged_store_group *);
    2583              :   bool output_merged_stores ();
    2584              : };
    2585              : 
    2586              : const pass_data pass_data_tree_store_merging = {
    2587              :   GIMPLE_PASS,     /* type */
    2588              :   "store-merging", /* name */
    2589              :   OPTGROUP_NONE,   /* optinfo_flags */
    2590              :   TV_GIMPLE_STORE_MERGING,       /* tv_id */
    2591              :   PROP_ssa,     /* properties_required */
    2592              :   0,               /* properties_provided */
    2593              :   0,               /* properties_destroyed */
    2594              :   0,               /* todo_flags_start */
    2595              :   TODO_update_ssa, /* todo_flags_finish */
    2596              : };
    2597              : 
    2598              : class pass_store_merging : public gimple_opt_pass
    2599              : {
    2600              : public:
    2601       287872 :   pass_store_merging (gcc::context *ctxt)
    2602       575744 :     : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
    2603       287872 :       m_n_chains (0), m_n_stores (0)
    2604              :   {
    2605       287872 :   }
    2606              : 
    2607              :   /* Pass not supported for PDP-endian, nor for insane hosts or
    2608              :      target character sizes where native_{encode,interpret}_expr
    2609              :      doesn't work properly.  */
    2610              :   bool
    2611      1038027 :   gate (function *) final override
    2612              :   {
    2613      1038027 :     return flag_store_merging
    2614              :            && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
    2615              :            && CHAR_BIT == 8
    2616      1038027 :            && BITS_PER_UNIT == 8;
    2617              :   }
    2618              : 
    2619              :   unsigned int execute (function *) final override;
    2620              : 
    2621              : private:
    2622              :   hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
    2623              : 
    2624              :   /* Form a doubly-linked stack of the elements of m_stores, so that
    2625              :      we can iterate over them in a predictable way.  Using this order
    2626              :      avoids extraneous differences in the compiler output just because
    2627              :      of tree pointer variations (e.g. different chains end up in
    2628              :      different positions of m_stores, so they are handled in different
    2629              :      orders, so they allocate or release SSA names in different
    2630              :      orders, and when they get reused, subsequent passes end up
    2631              :      getting different SSA names, which may ultimately change
    2632              :      decisions when going out of SSA).  */
    2633              :   imm_store_chain_info *m_stores_head;
    2634              : 
    2635              :   /* The number of store chains currently tracked.  */
    2636              :   unsigned m_n_chains;
    2637              :   /* The number of stores currently tracked.  */
    2638              :   unsigned m_n_stores;
    2639              : 
    2640              :   bool process_store (gimple *);
    2641              :   bool terminate_and_process_chain (imm_store_chain_info *);
    2642              :   bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
    2643              :   bool terminate_and_process_all_chains ();
    2644              : }; // class pass_store_merging
    2645              : 
    2646              : /* Terminate and process all recorded chains.  Return true if any changes
    2647              :    were made.  */
    2648              : 
    2649              : bool
    2650      1163709 : pass_store_merging::terminate_and_process_all_chains ()
    2651              : {
    2652      1163709 :   bool ret = false;
    2653      2070613 :   while (m_stores_head)
    2654       906904 :     ret |= terminate_and_process_chain (m_stores_head);
    2655      1163709 :   gcc_assert (m_stores.is_empty ());
    2656      1163709 :   return ret;
    2657              : }
    2658              : 
    2659              : /* Terminate all chains that are affected by the statement STMT.
    2660              :    CHAIN_INFO is the chain we should ignore from the checks if
    2661              :    non-NULL.  Return true if any changes were made.  */
    2662              : 
    2663              : bool
    2664      9857853 : pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
    2665              :                                                      **chain_info,
    2666              :                                                    gimple *stmt)
    2667              : {
    2668      9857853 :   bool ret = false;
    2669              : 
    2670              :   /* If the statement doesn't touch memory it can't alias.  */
    2671     19255192 :   if (!gimple_vuse (stmt))
    2672              :     return false;
    2673              : 
    2674      7855418 :   tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
    2675      7855418 :   ao_ref store_lhs_ref;
    2676      7855418 :   ao_ref_init (&store_lhs_ref, store_lhs);
    2677      7855418 :   for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
    2678              :     {
    2679      8170053 :       next = cur->next;
    2680              : 
    2681              :       /* We already checked all the stores in chain_info and terminated the
    2682              :          chain if necessary.  Skip it here.  */
    2683      8170053 :       if (chain_info && *chain_info == cur)
    2684      1135340 :         continue;
    2685              : 
    2686              :       store_immediate_info *info;
    2687              :       unsigned int i;
    2688     33571648 :       FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
    2689              :         {
    2690     11517429 :           tree lhs = gimple_assign_lhs (info->stmt);
    2691     11517429 :           ao_ref lhs_ref;
    2692     11517429 :           ao_ref_init (&lhs_ref, lhs);
    2693     11517429 :           if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
    2694     10685930 :               || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
    2695     22062789 :               || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
    2696              :                                                    &lhs_ref, false)))
    2697              :             {
    2698      1005965 :               if (dump_file && (dump_flags & TDF_DETAILS))
    2699              :                 {
    2700           24 :                   fprintf (dump_file, "stmt causes chain termination:\n");
    2701           24 :                   print_gimple_stmt (dump_file, stmt, 0);
    2702              :                 }
    2703      1005965 :               ret |= terminate_and_process_chain (cur);
    2704      1005965 :               break;
    2705              :             }
    2706              :         }
    2707              :     }
    2708              : 
    2709              :   return ret;
    2710              : }
    2711              : 
    2712              : /* Helper function.  Terminate the recorded chain storing to base object
    2713              :    BASE.  Return true if the merging and output was successful.  The m_stores
    2714              :    entry is removed after the processing in any case.  */
    2715              : 
    2716              : bool
    2717      1915352 : pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
    2718              : {
    2719      1915352 :   m_n_stores -= chain_info->m_store_info.length ();
    2720      1915352 :   m_n_chains--;
    2721      1915352 :   bool ret = chain_info->terminate_and_process_chain ();
    2722      1915352 :   m_stores.remove (chain_info->base_addr);
    2723      1915352 :   delete chain_info;
    2724      1915352 :   return ret;
    2725              : }
    2726              : 
    2727              : /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
    2728              :    may clobber REF.  FIRST and LAST must have non-NULL vdef.  We want to
    2729              :    be able to sink load of REF across stores between FIRST and LAST, up
    2730              :    to right before LAST.  */
    2731              : 
    2732              : bool
    2733        32216 : stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
    2734              : {
    2735        32216 :   ao_ref r;
    2736        32216 :   ao_ref_init (&r, ref);
    2737        32216 :   unsigned int count = 0;
    2738        32216 :   tree vop = gimple_vdef (last);
    2739        32216 :   gimple *stmt;
    2740              : 
    2741              :   /* Return true conservatively if the basic blocks are different.  */
    2742        32216 :   if (gimple_bb (first) != gimple_bb (last))
    2743              :     return true;
    2744              : 
    2745        83124 :   do
    2746              :     {
    2747        83124 :       stmt = SSA_NAME_DEF_STMT (vop);
    2748        83124 :       if (stmt_may_clobber_ref_p_1 (stmt, &r))
    2749              :         return true;
    2750        82565 :       if (gimple_store_p (stmt)
    2751        82565 :           && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
    2752              :         return true;
    2753              :       /* Avoid quadratic compile time by bounding the number of checks
    2754              :          we perform.  */
    2755        82287 :       if (++count > MAX_STORE_ALIAS_CHECKS)
    2756              :         return true;
    2757        82287 :       vop = gimple_vuse (stmt);
    2758              :     }
    2759        82287 :   while (stmt != first);
    2760              : 
    2761              :   return false;
    2762              : }
    2763              : 
    2764              : /* Return true if INFO->ops[IDX] is mergeable with the
    2765              :    corresponding loads already in MERGED_STORE group.
    2766              :    BASE_ADDR is the base address of the whole store group.  */
    2767              : 
    2768              : bool
    2769       123957 : compatible_load_p (merged_store_group *merged_store,
    2770              :                    store_immediate_info *info,
    2771              :                    tree base_addr, int idx)
    2772              : {
    2773       123957 :   store_immediate_info *infof = merged_store->stores[0];
    2774       123957 :   if (!info->ops[idx].base_addr
    2775       123957 :       || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
    2776       123957 :                    info->bitpos - infof->bitpos)
    2777       231623 :       || !operand_equal_p (info->ops[idx].base_addr,
    2778       107666 :                            infof->ops[idx].base_addr, 0))
    2779        17804 :     return false;
    2780              : 
    2781       106153 :   store_immediate_info *infol = merged_store->stores.last ();
    2782       106153 :   tree load_vuse = gimple_vuse (info->ops[idx].stmt);
    2783              :   /* In this case all vuses should be the same, e.g.
    2784              :      _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
    2785              :      or
    2786              :      _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
    2787              :      and we can emit the coalesced load next to any of those loads.  */
    2788       106153 :   if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
    2789       202082 :       && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
    2790              :     return true;
    2791              : 
    2792              :   /* Otherwise, at least for now require that the load has the same
    2793              :      vuse as the store.  See following examples.  */
    2794        20448 :   if (gimple_vuse (info->stmt) != load_vuse)
    2795              :     return false;
    2796              : 
    2797        16300 :   if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
    2798         8150 :       || (infof != infol
    2799        10050 :           && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
    2800              :     return false;
    2801              : 
    2802              :   /* If the load is from the same location as the store, already
    2803              :      the construction of the immediate chain info guarantees no intervening
    2804              :      stores, so no further checks are needed.  Example:
    2805              :      _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4;  */
    2806         7629 :   if (known_eq (info->ops[idx].bitpos, info->bitpos)
    2807         7629 :       && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
    2808              :     return true;
    2809              : 
    2810              :   /* Otherwise, we need to punt if any of the loads can be clobbered by any
    2811              :      of the stores in the group, or any other stores in between those.
    2812              :      Previous calls to compatible_load_p ensured that for all the
    2813              :      merged_store->stores IDX loads, no stmts starting with
    2814              :      merged_store->first_stmt and ending right before merged_store->last_stmt
    2815              :      clobbers those loads.  */
    2816         7506 :   gimple *first = merged_store->first_stmt;
    2817         7506 :   gimple *last = merged_store->last_stmt;
    2818              :   /* The stores are sorted by increasing store bitpos, so if info->stmt store
    2819              :      comes before the so far first load, we'll be changing
    2820              :      merged_store->first_stmt.  In that case we need to give up if
    2821              :      any of the earlier processed loads clobber with the stmts in the new
    2822              :      range.  */
    2823         7506 :   if (info->order < merged_store->first_order)
    2824              :     {
    2825         1167 :       for (store_immediate_info *infoc : merged_store->stores)
    2826          329 :         if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
    2827              :           return false;
    2828          180 :       first = info->stmt;
    2829              :     }
    2830              :   /* Similarly, we could change merged_store->last_stmt, so ensure
    2831              :      in that case no stmts in the new range clobber any of the earlier
    2832              :      processed loads.  */
    2833         7177 :   else if (info->order > merged_store->last_order)
    2834              :     {
    2835        45881 :       for (store_immediate_info *infoc : merged_store->stores)
    2836        25023 :         if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
    2837              :           return false;
    2838         6504 :       last = info->stmt;
    2839              :     }
    2840              :   /* And finally, we'd be adding a new load to the set, ensure it isn't
    2841              :      clobbered in the new range.  */
    2842         6684 :   if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
    2843              :     return false;
    2844              : 
    2845              :   /* Otherwise, we are looking for:
    2846              :      _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
    2847              :      or
    2848              :      _1 = s.a; t.a = _1; _2 = s.b; t.b = _2;  */
    2849              :   return true;
    2850              : }
    2851              : 
    2852              : /* Add all refs loaded to compute VAL to REFS vector.  */
    2853              : 
    2854              : void
    2855          231 : gather_bswap_load_refs (vec<tree> *refs, tree val)
    2856              : {
    2857          244 :   if (TREE_CODE (val) != SSA_NAME)
    2858              :     return;
    2859              : 
    2860          242 :   gimple *stmt = SSA_NAME_DEF_STMT (val);
    2861          242 :   if (!is_gimple_assign (stmt))
    2862              :     return;
    2863              : 
    2864          242 :   if (gimple_assign_load_p (stmt))
    2865              :     {
    2866          229 :       refs->safe_push (gimple_assign_rhs1 (stmt));
    2867          229 :       return;
    2868              :     }
    2869              : 
    2870           13 :   switch (gimple_assign_rhs_class (stmt))
    2871              :     {
    2872            2 :     case GIMPLE_BINARY_RHS:
    2873            2 :       gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
    2874              :       /* FALLTHRU */
    2875           13 :     case GIMPLE_UNARY_RHS:
    2876           13 :       gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
    2877           13 :       break;
    2878            0 :     default:
    2879            0 :       gcc_unreachable ();
    2880              :     }
    2881              : }
    2882              : 
    2883              : /* Check if there are any stores in M_STORE_INFO after index I
    2884              :    (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
    2885              :    a potential group ending with END that have their order
    2886              :    smaller than LAST_ORDER.  ALL_INTEGER_CST_P is true if
    2887              :    all the stores already merged and the one under consideration
    2888              :    have rhs_code of INTEGER_CST.  Return true if there are no such stores.
    2889              :    Consider:
    2890              :      MEM[(long long int *)p_28] = 0;
    2891              :      MEM[(long long int *)p_28 + 8B] = 0;
    2892              :      MEM[(long long int *)p_28 + 16B] = 0;
    2893              :      MEM[(long long int *)p_28 + 24B] = 0;
    2894              :      _129 = (int) _130;
    2895              :      MEM[(int *)p_28 + 8B] = _129;
    2896              :      MEM[(int *)p_28].a = -1;
    2897              :    We already have
    2898              :      MEM[(long long int *)p_28] = 0;
    2899              :      MEM[(int *)p_28].a = -1;
    2900              :    stmts in the current group and need to consider if it is safe to
    2901              :    add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
    2902              :    There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
    2903              :    store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
    2904              :    into the group and merging of those 3 stores is successful, merged
    2905              :    stmts will be emitted at the latest store from that group, i.e.
    2906              :    LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
    2907              :    The MEM[(int *)p_28 + 8B] = _129; store that originally follows
    2908              :    the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
    2909              :    so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
    2910              :    into the group.  That way it will be its own store group and will
    2911              :    not be touched.  If ALL_INTEGER_CST_P and there are overlapping
    2912              :    INTEGER_CST stores, those are mergeable using merge_overlapping,
    2913              :    so don't return false for those.
    2914              : 
    2915              :    Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
    2916              :    (exclusive), whether they don't overlap the bitrange START to END
    2917              :    and have order in between FIRST_ORDER and LAST_ORDER.  This is to
    2918              :    prevent merging in cases like:
    2919              :      MEM <char[12]> [&b + 8B] = {};
    2920              :      MEM[(short *) &b] = 5;
    2921              :      _5 = *x_4(D);
    2922              :      MEM <long long unsigned int> [&b + 2B] = _5;
    2923              :      MEM[(char *)&b + 16B] = 88;
    2924              :      MEM[(int *)&b + 20B] = 1;
    2925              :    The = {} store comes in sort_by_bitpos before the = 88 store, and can't
    2926              :    be merged with it, because the = _5 store overlaps these and is in between
    2927              :    them in sort_by_order ordering.  If it was merged, the merged store would
    2928              :    go after the = _5 store and thus change behavior.  */
    2929              : 
    2930              : static bool
    2931       801433 : check_no_overlap (const vec<store_immediate_info *> &m_store_info,
    2932              :                   unsigned int i,
    2933              :                   bool all_integer_cst_p, unsigned int first_order,
    2934              :                   unsigned int last_order, unsigned HOST_WIDE_INT start,
    2935              :                   unsigned HOST_WIDE_INT end, unsigned int first_earlier,
    2936              :                   unsigned end_earlier)
    2937              : {
    2938       801433 :   unsigned int len = m_store_info.length ();
    2939       813153 :   for (unsigned int j = first_earlier; j < end_earlier; j++)
    2940              :     {
    2941        11729 :       store_immediate_info *info = m_store_info[j];
    2942        11729 :       if (info->order > first_order
    2943           39 :           && info->order < last_order
    2944           11 :           && info->bitpos + info->bitsize > start)
    2945              :         return false;
    2946              :     }
    2947       884583 :   for (++i; i < len; ++i)
    2948              :     {
    2949       464058 :       store_immediate_info *info = m_store_info[i];
    2950       464058 :       if (info->bitpos >= end)
    2951              :         break;
    2952        83178 :       if (info->order < last_order
    2953        36579 :           && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
    2954              :         return false;
    2955              :     }
    2956              :   return true;
    2957              : }
    2958              : 
    2959              : /* Return true if m_store_info[first] and at least one following store
    2960              :    form a group which store try_size bitsize value which is byte swapped
    2961              :    from a memory load or some value, or identity from some value.
    2962              :    This uses the bswap pass APIs.  */
    2963              : 
    2964              : bool
    2965       231795 : imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
    2966              :                                           unsigned int first,
    2967              :                                           unsigned int try_size,
    2968              :                                           unsigned int first_earlier)
    2969              : {
    2970       231795 :   unsigned int len = m_store_info.length (), last = first;
    2971       231795 :   unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
    2972       231795 :   if (width >= try_size)
    2973              :     return false;
    2974        85077 :   for (unsigned int i = first + 1; i < len; ++i)
    2975              :     {
    2976        77653 :       if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
    2977        76935 :           || m_store_info[i]->lp_nr != merged_store->lp_nr
    2978       154588 :           || m_store_info[i]->ins_stmt == NULL)
    2979              :         return false;
    2980        75952 :       width += m_store_info[i]->bitsize;
    2981        75952 :       if (width >= try_size)
    2982              :         {
    2983              :           last = i;
    2984              :           break;
    2985              :         }
    2986              :     }
    2987        48298 :   if (width != try_size)
    2988              :     return false;
    2989              : 
    2990        40333 :   bool allow_unaligned
    2991        40333 :     = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
    2992              :   /* Punt if the combined store would not be aligned and we need alignment.  */
    2993        40333 :   if (!allow_unaligned)
    2994              :     {
    2995            0 :       unsigned int align = merged_store->align;
    2996            0 :       unsigned HOST_WIDE_INT align_base = merged_store->align_base;
    2997            0 :       for (unsigned int i = first + 1; i <= last; ++i)
    2998              :         {
    2999            0 :           unsigned int this_align;
    3000            0 :           unsigned HOST_WIDE_INT align_bitpos = 0;
    3001            0 :           get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
    3002              :                                   &this_align, &align_bitpos);
    3003            0 :           if (this_align > align)
    3004              :             {
    3005            0 :               align = this_align;
    3006            0 :               align_base = m_store_info[i]->bitpos - align_bitpos;
    3007              :             }
    3008              :         }
    3009            0 :       unsigned HOST_WIDE_INT align_bitpos
    3010            0 :         = (m_store_info[first]->bitpos - align_base) & (align - 1);
    3011            0 :       if (align_bitpos)
    3012            0 :         align = least_bit_hwi (align_bitpos);
    3013            0 :       if (align < try_size)
    3014              :         return false;
    3015              :     }
    3016              : 
    3017        40333 :   tree type;
    3018        40333 :   switch (try_size)
    3019              :     {
    3020         6180 :     case 16: type = uint16_type_node; break;
    3021         5048 :     case 32: type = uint32_type_node; break;
    3022        29105 :     case 64: type = uint64_type_node; break;
    3023            0 :     default: gcc_unreachable ();
    3024              :     }
    3025        40333 :   struct symbolic_number n;
    3026        40333 :   gimple *ins_stmt = NULL;
    3027        40333 :   int vuse_store = -1;
    3028        40333 :   unsigned int first_order = merged_store->first_order;
    3029        40333 :   unsigned int last_order = merged_store->last_order;
    3030        40333 :   gimple *first_stmt = merged_store->first_stmt;
    3031        40333 :   gimple *last_stmt = merged_store->last_stmt;
    3032        40333 :   unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
    3033        40333 :   store_immediate_info *infof = m_store_info[first];
    3034              : 
    3035       111083 :   for (unsigned int i = first; i <= last; ++i)
    3036              :     {
    3037        87249 :       store_immediate_info *info = m_store_info[i];
    3038        87249 :       struct symbolic_number this_n = info->n;
    3039        87249 :       this_n.type = type;
    3040        87249 :       if (!this_n.base_addr)
    3041        12052 :         this_n.range = try_size / BITS_PER_UNIT;
    3042              :       else
    3043              :         /* Update vuse in case it has changed by output_merged_stores.  */
    3044       150394 :         this_n.vuse = gimple_vuse (info->ins_stmt);
    3045        87249 :       unsigned int bitpos = info->bitpos - infof->bitpos;
    3046        87249 :       if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
    3047              :                             BYTES_BIG_ENDIAN
    3048              :                             ? try_size - info->bitsize - bitpos
    3049              :                             : bitpos))
    3050        16499 :         return false;
    3051        87249 :       if (this_n.base_addr && vuse_store)
    3052              :         {
    3053              :           unsigned int j;
    3054       118668 :           for (j = first; j <= last; ++j)
    3055       184250 :             if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
    3056              :               break;
    3057        45381 :           if (j > last)
    3058              :             {
    3059        26543 :               if (vuse_store == 1)
    3060              :                 return false;
    3061              :               vuse_store = 0;
    3062              :             }
    3063              :         }
    3064        87249 :       if (i == first)
    3065              :         {
    3066        40333 :           n = this_n;
    3067        40333 :           ins_stmt = info->ins_stmt;
    3068              :         }
    3069              :       else
    3070              :         {
    3071        46916 :           if (n.base_addr && n.vuse != this_n.vuse)
    3072              :             {
    3073         7138 :               if (vuse_store == 0)
    3074              :                 return false;
    3075              :               vuse_store = 1;
    3076              :             }
    3077        42879 :           if (info->order > last_order)
    3078              :             {
    3079        40210 :               last_order = info->order;
    3080        40210 :               last_stmt = info->stmt;
    3081              :             }
    3082         2669 :           else if (info->order < first_order)
    3083              :             {
    3084         2646 :               first_order = info->order;
    3085         2646 :               first_stmt = info->stmt;
    3086              :             }
    3087        42879 :           end = MAX (end, info->bitpos + info->bitsize);
    3088              : 
    3089        42879 :           ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
    3090              :                                              &this_n, &n, BIT_IOR_EXPR);
    3091        42879 :           if (ins_stmt == NULL)
    3092              :             return false;
    3093              :         }
    3094              :     }
    3095              : 
    3096        23834 :   uint64_t cmpxchg, cmpnop;
    3097        23834 :   bool cast64_to_32;
    3098        23834 :   find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
    3099              : 
    3100              :   /* A complete byte swap should make the symbolic number to start with
    3101              :      the largest digit in the highest order byte.  Unchanged symbolic
    3102              :      number indicates a read with same endianness as target architecture.  */
    3103        23834 :   if (n.n != cmpnop && n.n != cmpxchg)
    3104              :     return false;
    3105              : 
    3106              :   /* For now.  */
    3107        21163 :   if (cast64_to_32)
    3108              :     return false;
    3109              : 
    3110        21163 :   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
    3111              :     return false;
    3112              : 
    3113        21163 :   if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
    3114              :                          merged_store->start, end, first_earlier, first))
    3115              :     return false;
    3116              : 
    3117              :   /* Don't handle memory copy this way if normal non-bswap processing
    3118              :      would handle it too.  */
    3119        21161 :   if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
    3120              :     {
    3121              :       unsigned int i;
    3122        61340 :       for (i = first; i <= last; ++i)
    3123        41132 :         if (m_store_info[i]->rhs_code != MEM_REF)
    3124              :           break;
    3125        20535 :       if (i == last + 1)
    3126              :         return false;
    3127              :     }
    3128              : 
    3129          953 :   if (n.n == cmpxchg)
    3130          626 :     switch (try_size)
    3131              :       {
    3132              :       case 16:
    3133              :         /* Will emit LROTATE_EXPR.  */
    3134              :         break;
    3135          182 :       case 32:
    3136          182 :         if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
    3137          306 :             && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
    3138              :           break;
    3139           58 :         return false;
    3140          114 :       case 64:
    3141          114 :         if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
    3142          186 :             && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
    3143           31 :                 || (word_mode == SImode
    3144           31 :                     && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
    3145           31 :                     && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
    3146              :           break;
    3147           42 :         return false;
    3148            0 :       default:
    3149            0 :         gcc_unreachable ();
    3150              :       }
    3151              : 
    3152          853 :   if (!allow_unaligned && n.base_addr)
    3153              :     {
    3154            0 :       unsigned int align = get_object_alignment (n.src);
    3155            0 :       if (align < try_size)
    3156              :         return false;
    3157              :     }
    3158              : 
    3159              :   /* If each load has vuse of the corresponding store, need to verify
    3160              :      the loads can be sunk right before the last store.  */
    3161          853 :   if (vuse_store == 1)
    3162              :     {
    3163           92 :       auto_vec<tree, 64> refs;
    3164          321 :       for (unsigned int i = first; i <= last; ++i)
    3165          229 :         gather_bswap_load_refs (&refs,
    3166          229 :                                 gimple_assign_rhs1 (m_store_info[i]->stmt));
    3167              : 
    3168          437 :       for (tree ref : refs)
    3169          180 :         if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
    3170           19 :           return false;
    3171           73 :       n.vuse = NULL_TREE;
    3172           92 :     }
    3173              : 
    3174          834 :   infof->n = n;
    3175          834 :   infof->ins_stmt = ins_stmt;
    3176         3878 :   for (unsigned int i = first; i <= last; ++i)
    3177              :     {
    3178         4426 :       m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
    3179         3044 :       m_store_info[i]->ops[0].base_addr = NULL_TREE;
    3180         3044 :       m_store_info[i]->ops[1].base_addr = NULL_TREE;
    3181         3044 :       if (i != first)
    3182         2210 :         merged_store->merge_into (m_store_info[i]);
    3183              :     }
    3184              : 
    3185              :   return true;
    3186              : }
    3187              : 
    3188              : /* Go through the candidate stores recorded in m_store_info and merge them
    3189              :    into merged_store_group objects recorded into m_merged_store_groups
    3190              :    representing the widened stores.  Return true if coalescing was successful
    3191              :    and the number of widened stores is fewer than the original number
    3192              :    of stores.  */
    3193              : 
    3194              : bool
    3195       493956 : imm_store_chain_info::coalesce_immediate_stores ()
    3196              : {
    3197              :   /* Anything less can't be processed.  */
    3198       620330 :   if (m_store_info.length () < 2)
    3199              :     return false;
    3200              : 
    3201       493956 :   if (dump_file && (dump_flags & TDF_DETAILS))
    3202           25 :     fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
    3203              :              m_store_info.length ());
    3204              : 
    3205       493956 :   store_immediate_info *info;
    3206       493956 :   unsigned int i, ignore = 0;
    3207       493956 :   unsigned int first_earlier = 0;
    3208       493956 :   unsigned int end_earlier = 0;
    3209              : 
    3210              :   /* Order the stores by the bitposition they write to.  */
    3211       493956 :   m_store_info.qsort (sort_by_bitpos);
    3212              : 
    3213       493956 :   info = m_store_info[0];
    3214       493956 :   merged_store_group *merged_store = new merged_store_group (info);
    3215       493956 :   if (dump_file && (dump_flags & TDF_DETAILS))
    3216           25 :     fputs ("New store group\n", dump_file);
    3217              : 
    3218      2123252 :   FOR_EACH_VEC_ELT (m_store_info, i, info)
    3219              :     {
    3220      1629296 :       unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
    3221              : 
    3222      1629296 :       if (i <= ignore)
    3223       575978 :         goto done;
    3224              : 
    3225              :       while (first_earlier < end_earlier
    3226      1310321 :              && (m_store_info[first_earlier]->bitpos
    3227       267590 :                  + m_store_info[first_earlier]->bitsize
    3228       267590 :                  <= merged_store->start))
    3229       257003 :         first_earlier++;
    3230              : 
    3231              :       /* First try to handle group of stores like:
    3232              :          p[0] = data >> 24;
    3233              :          p[1] = data >> 16;
    3234              :          p[2] = data >> 8;
    3235              :          p[3] = data;
    3236              :          using the bswap framework.  */
    3237      1053318 :       if (info->bitpos == merged_store->start + merged_store->width
    3238       685742 :           && merged_store->stores.length () == 1
    3239       370711 :           && merged_store->stores[0]->ins_stmt != NULL
    3240       115614 :           && info->lp_nr == merged_store->lp_nr
    3241      1168932 :           && info->ins_stmt != NULL)
    3242              :         {
    3243              :           unsigned int try_size;
    3244       308429 :           for (try_size = 64; try_size >= 16; try_size >>= 1)
    3245       231795 :             if (try_coalesce_bswap (merged_store, i - 1, try_size,
    3246              :                                     first_earlier))
    3247              :               break;
    3248              : 
    3249        77468 :           if (try_size >= 16)
    3250              :             {
    3251          834 :               ignore = i + merged_store->stores.length () - 1;
    3252          834 :               m_merged_store_groups.safe_push (merged_store);
    3253          834 :               if (ignore < m_store_info.length ())
    3254              :                 {
    3255          324 :                   merged_store = new merged_store_group (m_store_info[ignore]);
    3256          324 :                   end_earlier = ignore;
    3257              :                 }
    3258              :               else
    3259          510 :                 merged_store = NULL;
    3260          834 :               goto done;
    3261              :             }
    3262              :         }
    3263              : 
    3264      1052484 :       new_bitregion_start
    3265      1052484 :         = MIN (merged_store->bitregion_start, info->bitregion_start);
    3266      1052484 :       new_bitregion_end
    3267      1052484 :         = MAX (merged_store->bitregion_end, info->bitregion_end);
    3268              : 
    3269      1052484 :       if (info->order >= merged_store->first_nonmergeable_order
    3270      1050671 :           || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
    3271      1050671 :               > (unsigned) param_store_merging_max_size))
    3272              :         ;
    3273              : 
    3274              :       /* |---store 1---|
    3275              :                |---store 2---|
    3276              :          Overlapping stores.  */
    3277      1050591 :       else if (IN_RANGE (info->bitpos, merged_store->start,
    3278              :                          merged_store->start + merged_store->width - 1)
    3279              :                /* |---store 1---||---store 2---|
    3280              :                   Handle also the consecutive INTEGER_CST stores case here,
    3281              :                   as we have here the code to deal with overlaps.  */
    3282      1050591 :                || (info->bitregion_start <= merged_store->bitregion_end
    3283       691671 :                    && info->rhs_code == INTEGER_CST
    3284       524446 :                    && merged_store->only_constants
    3285       491515 :                    && merged_store->can_be_merged_into (info)))
    3286              :         {
    3287              :           /* Only allow overlapping stores of constants.  */
    3288       599665 :           if (info->rhs_code == INTEGER_CST
    3289       592108 :               && merged_store->only_constants
    3290       592033 :               && info->lp_nr == merged_store->lp_nr)
    3291              :             {
    3292       592021 :               unsigned int first_order
    3293       592021 :                 = MIN (merged_store->first_order, info->order);
    3294       592021 :               unsigned int last_order
    3295       592021 :                 = MAX (merged_store->last_order, info->order);
    3296       592021 :               unsigned HOST_WIDE_INT end
    3297       592021 :                 = MAX (merged_store->start + merged_store->width,
    3298              :                        info->bitpos + info->bitsize);
    3299       592021 :               if (check_no_overlap (m_store_info, i, true, first_order,
    3300              :                                     last_order, merged_store->start, end,
    3301              :                                     first_earlier, end_earlier))
    3302              :                 {
    3303              :                   /* check_no_overlap call above made sure there are no
    3304              :                      overlapping stores with non-INTEGER_CST rhs_code
    3305              :                      in between the first and last of the stores we've
    3306              :                      just merged.  If there are any INTEGER_CST rhs_code
    3307              :                      stores in between, we need to merge_overlapping them
    3308              :                      even if in the sort_by_bitpos order there are other
    3309              :                      overlapping stores in between.  Keep those stores as is.
    3310              :                      Example:
    3311              :                         MEM[(int *)p_28] = 0;
    3312              :                         MEM[(char *)p_28 + 3B] = 1;
    3313              :                         MEM[(char *)p_28 + 1B] = 2;
    3314              :                         MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
    3315              :                      We can't merge the zero store with the store of two and
    3316              :                      not merge anything else, because the store of one is
    3317              :                      in the original order in between those two, but in
    3318              :                      store_by_bitpos order it comes after the last store that
    3319              :                      we can't merge with them.  We can merge the first 3 stores
    3320              :                      and keep the last store as is though.  */
    3321       591997 :                   unsigned int len = m_store_info.length ();
    3322       591997 :                   unsigned int try_order = last_order;
    3323       591997 :                   unsigned int first_nonmergeable_order;
    3324       591997 :                   unsigned int k;
    3325       591997 :                   bool last_iter = false;
    3326       591997 :                   int attempts = 0;
    3327       614662 :                   do
    3328              :                     {
    3329       614662 :                       unsigned int max_order = 0;
    3330       614662 :                       unsigned int min_order = first_order;
    3331       614662 :                       unsigned first_nonmergeable_int_order = ~0U;
    3332       614662 :                       unsigned HOST_WIDE_INT this_end = end;
    3333       614662 :                       unsigned HOST_WIDE_INT this_bitregion_start
    3334              :                         = new_bitregion_start;
    3335       614662 :                       unsigned HOST_WIDE_INT this_bitregion_end
    3336              :                         = new_bitregion_end;
    3337       614662 :                       k = i;
    3338       614662 :                       first_nonmergeable_order = ~0U;
    3339       743881 :                       for (unsigned int j = i + 1; j < len; ++j)
    3340              :                         {
    3341       456115 :                           store_immediate_info *info2 = m_store_info[j];
    3342       456115 :                           if (info2->bitpos >= this_end)
    3343              :                             break;
    3344       129222 :                           if (info2->order < try_order)
    3345              :                             {
    3346        81223 :                               if (info2->rhs_code != INTEGER_CST
    3347        81220 :                                   || info2->lp_nr != merged_store->lp_nr)
    3348              :                                 {
    3349              :                                   /* Normally check_no_overlap makes sure this
    3350              :                                      doesn't happen, but if end grows below,
    3351              :                                      then we need to process more stores than
    3352              :                                      check_no_overlap verified.  Example:
    3353              :                                       MEM[(int *)p_5] = 0;
    3354              :                                       MEM[(short *)p_5 + 3B] = 1;
    3355              :                                       MEM[(char *)p_5 + 4B] = _9;
    3356              :                                       MEM[(char *)p_5 + 2B] = 2;  */
    3357              :                                   k = 0;
    3358              :                                   break;
    3359              :                                 }
    3360        81220 :                               if (info2->bitregion_start
    3361              :                                   < this_bitregion_start)
    3362              :                                 this_bitregion_start = info2->bitregion_start;
    3363        81220 :                               if (info2->bitregion_end
    3364              :                                   > this_bitregion_end)
    3365              :                                 this_bitregion_end = info2->bitregion_end;
    3366        81220 :                               if (((this_bitregion_end - this_bitregion_start
    3367        81220 :                                     + 1) / BITS_PER_UNIT)
    3368              :                                   > (unsigned) param_store_merging_max_size)
    3369              :                                 {
    3370              :                                   k = 0;
    3371              :                                   break;
    3372              :                                 }
    3373        81220 :                               k = j;
    3374        81220 :                               min_order = MIN (min_order, info2->order);
    3375        81220 :                               this_end = MAX (this_end,
    3376              :                                               info2->bitpos + info2->bitsize);
    3377              :                             }
    3378        47999 :                           else if (info2->rhs_code == INTEGER_CST
    3379        45191 :                                    && info2->lp_nr == merged_store->lp_nr
    3380        45191 :                                    && !last_iter)
    3381              :                             {
    3382        45191 :                               max_order = MAX (max_order, info2->order + 1);
    3383        45191 :                               first_nonmergeable_int_order
    3384        45191 :                                 = MIN (first_nonmergeable_int_order,
    3385              :                                        info2->order);
    3386              :                             }
    3387              :                           else
    3388         2808 :                             first_nonmergeable_order
    3389         2808 :                               = MIN (first_nonmergeable_order, info2->order);
    3390              :                         }
    3391       614662 :                       if (k > i
    3392       614662 :                           && !check_no_overlap (m_store_info, len - 1, true,
    3393              :                                                 min_order, try_order,
    3394              :                                                 merged_store->start, this_end,
    3395              :                                                 first_earlier, end_earlier))
    3396              :                         k = 0;
    3397       614662 :                       if (k == 0)
    3398              :                         {
    3399            3 :                           if (last_order == try_order)
    3400              :                             break;
    3401              :                           /* If this failed, but only because we grew
    3402              :                              try_order, retry with the last working one,
    3403              :                              so that we merge at least something.  */
    3404            0 :                           try_order = last_order;
    3405            0 :                           last_iter = true;
    3406            0 :                           continue;
    3407              :                         }
    3408       614659 :                       last_order = try_order;
    3409              :                       /* Retry with a larger try_order to see if we could
    3410              :                          merge some further INTEGER_CST stores.  */
    3411       614659 :                       if (max_order
    3412       614659 :                           && (first_nonmergeable_int_order
    3413       614659 :                               < first_nonmergeable_order))
    3414              :                         {
    3415        22666 :                           try_order = MIN (max_order,
    3416              :                                            first_nonmergeable_order);
    3417        22666 :                           try_order
    3418        22666 :                             = MIN (try_order,
    3419              :                                    merged_store->first_nonmergeable_order);
    3420        22666 :                           if (try_order > last_order && ++attempts < 16)
    3421        22665 :                             continue;
    3422              :                         }
    3423       591994 :                       first_nonmergeable_order
    3424       591994 :                         = MIN (first_nonmergeable_order,
    3425              :                                first_nonmergeable_int_order);
    3426              :                       end = this_end;
    3427              :                       break;
    3428              :                     }
    3429              :                   while (1);
    3430              : 
    3431       591997 :                   if (k != 0)
    3432              :                     {
    3433       591994 :                       merged_store->merge_overlapping (info);
    3434              : 
    3435       591994 :                       merged_store->first_nonmergeable_order
    3436       591994 :                         = MIN (merged_store->first_nonmergeable_order,
    3437              :                                first_nonmergeable_order);
    3438              : 
    3439       672316 :                       for (unsigned int j = i + 1; j <= k; j++)
    3440              :                         {
    3441        80322 :                           store_immediate_info *info2 = m_store_info[j];
    3442        80322 :                           gcc_assert (info2->bitpos < end);
    3443        80322 :                           if (info2->order < last_order)
    3444              :                             {
    3445        80264 :                               gcc_assert (info2->rhs_code == INTEGER_CST);
    3446        80264 :                               if (info != info2)
    3447        80264 :                                 merged_store->merge_overlapping (info2);
    3448              :                             }
    3449              :                           /* Other stores are kept and not merged in any
    3450              :                              way.  */
    3451              :                         }
    3452       591994 :                       ignore = k;
    3453       591994 :                       goto done;
    3454              :                     }
    3455              :                 }
    3456              :             }
    3457              :         }
    3458              :       /* |---store 1---||---store 2---|
    3459              :          This store is consecutive to the previous one.
    3460              :          Merge it into the current store group.  There can be gaps in between
    3461              :          the stores, but there can't be gaps in between bitregions.  */
    3462       450926 :       else if (info->bitregion_start <= merged_store->bitregion_end
    3463       450926 :                && merged_store->can_be_merged_into (info))
    3464              :         {
    3465       138527 :           store_immediate_info *infof = merged_store->stores[0];
    3466              : 
    3467              :           /* All the rhs_code ops that take 2 operands are commutative,
    3468              :              swap the operands if it could make the operands compatible.  */
    3469       138527 :           if (infof->ops[0].base_addr
    3470       122305 :               && infof->ops[1].base_addr
    3471         1656 :               && info->ops[0].base_addr
    3472         1656 :               && info->ops[1].base_addr
    3473         1656 :               && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
    3474              :                            info->bitpos - infof->bitpos)
    3475       139689 :               && operand_equal_p (info->ops[1].base_addr,
    3476              :                                   infof->ops[0].base_addr, 0))
    3477              :             {
    3478          956 :               std::swap (info->ops[0], info->ops[1]);
    3479          956 :               info->ops_swapped_p = true;
    3480              :             }
    3481       138527 :           if (check_no_overlap (m_store_info, i, false,
    3482       138527 :                                 MIN (merged_store->first_order, info->order),
    3483       138527 :                                 MAX (merged_store->last_order, info->order),
    3484              :                                 merged_store->start,
    3485       138527 :                                 MAX (merged_store->start + merged_store->width,
    3486              :                                      info->bitpos + info->bitsize),
    3487              :                                 first_earlier, end_earlier))
    3488              :             {
    3489              :               /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
    3490       138525 :               if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
    3491              :                 {
    3492          761 :                   info->rhs_code = BIT_INSERT_EXPR;
    3493          761 :                   info->ops[0].val = gimple_assign_rhs1 (info->stmt);
    3494          761 :                   info->ops[0].base_addr = NULL_TREE;
    3495              :                 }
    3496       137764 :               else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
    3497              :                 {
    3498            8 :                   for (store_immediate_info *infoj : merged_store->stores)
    3499              :                     {
    3500            2 :                       infoj->rhs_code = BIT_INSERT_EXPR;
    3501            2 :                       infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
    3502            2 :                       infoj->ops[0].base_addr = NULL_TREE;
    3503              :                     }
    3504            2 :                   merged_store->bit_insertion = true;
    3505              :                 }
    3506       138525 :               if ((infof->ops[0].base_addr
    3507       122301 :                    ? compatible_load_p (merged_store, info, base_addr, 0)
    3508        16224 :                    : !info->ops[0].base_addr)
    3509       278706 :                   && (infof->ops[1].base_addr
    3510         1656 :                       ? compatible_load_p (merged_store, info, base_addr, 1)
    3511       115624 :                       : !info->ops[1].base_addr))
    3512              :                 {
    3513       117273 :                   merged_store->merge_into (info);
    3514       117273 :                   goto done;
    3515              :                 }
    3516              :             }
    3517              :         }
    3518              : 
    3519              :       /* |---store 1---| <gap> |---store 2---|.
    3520              :          Gap between stores or the rhs not compatible.  Start a new group.  */
    3521              : 
    3522              :       /* Try to apply all the stores recorded for the group to determine
    3523              :          the bitpattern they write and discard it if that fails.
    3524              :          This will also reject single-store groups.  */
    3525       343217 :       if (merged_store->apply_stores ())
    3526        58080 :         m_merged_store_groups.safe_push (merged_store);
    3527              :       else
    3528       285137 :         delete merged_store;
    3529              : 
    3530       343217 :       merged_store = new merged_store_group (info);
    3531       343217 :       end_earlier = i;
    3532       343217 :       if (dump_file && (dump_flags & TDF_DETAILS))
    3533            1 :         fputs ("New store group\n", dump_file);
    3534              : 
    3535      1629296 :     done:
    3536      1629296 :       if (dump_file && (dump_flags & TDF_DETAILS))
    3537              :         {
    3538          227 :           fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
    3539              :                               " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
    3540              :                    i, info->bitsize, info->bitpos);
    3541          227 :           print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
    3542          227 :           fputc ('\n', dump_file);
    3543              :         }
    3544              :     }
    3545              : 
    3546              :   /* Record or discard the last store group.  */
    3547       493956 :   if (merged_store)
    3548              :     {
    3549       493446 :       if (merged_store->apply_stores ())
    3550       341040 :         m_merged_store_groups.safe_push (merged_store);
    3551              :       else
    3552       152406 :         delete merged_store;
    3553              :     }
    3554              : 
    3555      1355494 :   gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
    3556              : 
    3557       493956 :   bool success
    3558       493956 :     = !m_merged_store_groups.is_empty ()
    3559       367582 :       && m_merged_store_groups.length () < m_store_info.length ();
    3560              : 
    3561       367582 :   if (success && dump_file)
    3562           94 :     fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
    3563              :              m_merged_store_groups.length ());
    3564              : 
    3565              :   return success;
    3566              : }
    3567              : 
    3568              : /* Return the type to use for the merged stores or loads described by STMTS.
    3569              :    This is needed to get the alias sets right.  If IS_LOAD, look for rhs,
    3570              :    otherwise lhs.  Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
    3571              :    of the MEM_REFs if any.  */
    3572              : 
    3573              : static tree
    3574        93985 : get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
    3575              :                           unsigned short *cliquep, unsigned short *basep)
    3576              : {
    3577        93985 :   gimple *stmt;
    3578        93985 :   unsigned int i;
    3579        93985 :   tree type = NULL_TREE;
    3580        93985 :   tree ret = NULL_TREE;
    3581        93985 :   *cliquep = 0;
    3582        93985 :   *basep = 0;
    3583              : 
    3584       324891 :   FOR_EACH_VEC_ELT (stmts, i, stmt)
    3585              :     {
    3586       230906 :       tree ref = is_load ? gimple_assign_rhs1 (stmt)
    3587       224036 :                          : gimple_assign_lhs (stmt);
    3588       230906 :       tree type1 = reference_alias_ptr_type (ref);
    3589       230906 :       tree base = get_base_address (ref);
    3590              : 
    3591       230906 :       if (i == 0)
    3592              :         {
    3593        93985 :           if (TREE_CODE (base) == MEM_REF)
    3594              :             {
    3595        13924 :               *cliquep = MR_DEPENDENCE_CLIQUE (base);
    3596        13924 :               *basep = MR_DEPENDENCE_BASE (base);
    3597              :             }
    3598        93985 :           ret = type = type1;
    3599        93985 :           continue;
    3600              :         }
    3601       136921 :       if (!alias_ptr_types_compatible_p (type, type1))
    3602        82662 :         ret = ptr_type_node;
    3603       136921 :       if (TREE_CODE (base) != MEM_REF
    3604        19869 :           || *cliquep != MR_DEPENDENCE_CLIQUE (base)
    3605       155349 :           || *basep != MR_DEPENDENCE_BASE (base))
    3606              :         {
    3607       118493 :           *cliquep = 0;
    3608       118493 :           *basep = 0;
    3609              :         }
    3610              :     }
    3611        93985 :   return ret;
    3612              : }
    3613              : 
    3614              : /* Return the location_t information we can find among the statements
    3615              :    in STMTS.  */
    3616              : 
    3617              : static location_t
    3618        94022 : get_location_for_stmts (vec<gimple *> &stmts)
    3619              : {
    3620       286248 :   for (gimple *stmt : stmts)
    3621        96383 :     if (gimple_has_location (stmt))
    3622        92201 :       return gimple_location (stmt);
    3623              : 
    3624              :   return UNKNOWN_LOCATION;
    3625              : }
    3626              : 
    3627              : /* Used to decribe a store resulting from splitting a wide store in smaller
    3628              :    regularly-sized stores in split_group.  */
    3629              : 
    3630       909303 : class split_store
    3631              : {
    3632              : public:
    3633              :   unsigned HOST_WIDE_INT bytepos;
    3634              :   unsigned HOST_WIDE_INT size;
    3635              :   unsigned HOST_WIDE_INT align;
    3636              :   auto_vec<store_immediate_info *> orig_stores;
    3637              :   /* True if there is a single orig stmt covering the whole split store.  */
    3638              :   bool orig;
    3639              :   split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
    3640              :                unsigned HOST_WIDE_INT);
    3641              : };
    3642              : 
    3643              : /* Simple constructor.  */
    3644              : 
    3645       909303 : split_store::split_store (unsigned HOST_WIDE_INT bp,
    3646              :                           unsigned HOST_WIDE_INT sz,
    3647       909303 :                           unsigned HOST_WIDE_INT al)
    3648       909303 :                           : bytepos (bp), size (sz), align (al), orig (false)
    3649              : {
    3650       909303 :   orig_stores.create (0);
    3651            0 : }
    3652              : 
    3653              : /* Record all stores in GROUP that write to the region starting at BITPOS and
    3654              :    is of size BITSIZE.  Record infos for such statements in STORES if
    3655              :    non-NULL.  The stores in GROUP must be sorted by bitposition.  Return INFO
    3656              :    if there is exactly one original store in the range (in that case ignore
    3657              :    clobber stmts, unless there are only clobber stmts).  */
    3658              : 
    3659              : static store_immediate_info *
    3660      7204725 : find_constituent_stores (class merged_store_group *group,
    3661              :                          vec<store_immediate_info *> *stores,
    3662              :                          unsigned int *first,
    3663              :                          unsigned HOST_WIDE_INT bitpos,
    3664              :                          unsigned HOST_WIDE_INT bitsize)
    3665              : {
    3666      7204725 :   store_immediate_info *info, *ret = NULL;
    3667      7204725 :   unsigned int i;
    3668      7204725 :   bool second = false;
    3669      7204725 :   bool update_first = true;
    3670      7204725 :   unsigned HOST_WIDE_INT end = bitpos + bitsize;
    3671     18877558 :   for (i = *first; group->stores.iterate (i, &info); ++i)
    3672              :     {
    3673     15819975 :       unsigned HOST_WIDE_INT stmt_start = info->bitpos;
    3674     15819975 :       unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
    3675     15819975 :       if (stmt_end <= bitpos)
    3676              :         {
    3677              :           /* BITPOS passed to this function never decreases from within the
    3678              :              same split_group call, so optimize and don't scan info records
    3679              :              which are known to end before or at BITPOS next time.
    3680              :              Only do it if all stores before this one also pass this.  */
    3681      4437347 :           if (update_first)
    3682      2055882 :             *first = i + 1;
    3683      4437347 :           continue;
    3684              :         }
    3685              :       else
    3686     11382628 :         update_first = false;
    3687              : 
    3688              :       /* The stores in GROUP are ordered by bitposition so if we're past
    3689              :          the region for this group return early.  */
    3690     11382628 :       if (stmt_start >= end)
    3691              :         return ret;
    3692              : 
    3693      7608752 :       if (gimple_clobber_p (info->stmt))
    3694              :         {
    3695       702997 :           if (stores)
    3696        34874 :             stores->safe_push (info);
    3697       702997 :           if (ret == NULL)
    3698       680537 :             ret = info;
    3699       702997 :           continue;
    3700              :         }
    3701      6905755 :       if (stores)
    3702              :         {
    3703      1046934 :           stores->safe_push (info);
    3704      1046934 :           if (ret && !gimple_clobber_p (ret->stmt))
    3705              :             {
    3706              :               ret = NULL;
    3707              :               second = true;
    3708              :             }
    3709              :         }
    3710      5858821 :       else if (ret && !gimple_clobber_p (ret->stmt))
    3711              :         return NULL;
    3712      6433141 :       if (!second)
    3713      6393103 :         ret = info;
    3714              :     }
    3715              :   return ret;
    3716              : }
    3717              : 
    3718              : /* Return how many SSA_NAMEs used to compute value to store in the INFO
    3719              :    store have multiple uses.  If any SSA_NAME has multiple uses, also
    3720              :    count statements needed to compute it.  */
    3721              : 
    3722              : static unsigned
    3723      1141020 : count_multiple_uses (store_immediate_info *info)
    3724              : {
    3725      1141020 :   gimple *stmt = info->stmt;
    3726      1141020 :   unsigned ret = 0;
    3727      1141020 :   switch (info->rhs_code)
    3728              :     {
    3729              :     case INTEGER_CST:
    3730              :     case STRING_CST:
    3731              :       return 0;
    3732         6233 :     case BIT_AND_EXPR:
    3733         6233 :     case BIT_IOR_EXPR:
    3734         6233 :     case BIT_XOR_EXPR:
    3735         6233 :       if (info->bit_not_p)
    3736              :         {
    3737           65 :           if (!has_single_use (gimple_assign_rhs1 (stmt)))
    3738              :             ret = 1; /* Fall through below to return
    3739              :                         the BIT_NOT_EXPR stmt and then
    3740              :                         BIT_{AND,IOR,XOR}_EXPR and anything it
    3741              :                         uses.  */
    3742              :           else
    3743              :             /* stmt is after this the BIT_NOT_EXPR.  */
    3744           65 :             stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    3745              :         }
    3746         6233 :       if (!has_single_use (gimple_assign_rhs1 (stmt)))
    3747              :         {
    3748           26 :           ret += 1 + info->ops[0].bit_not_p;
    3749           26 :           if (info->ops[1].base_addr)
    3750           26 :             ret += 1 + info->ops[1].bit_not_p;
    3751           26 :           return ret + 1;
    3752              :         }
    3753         6207 :       stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    3754              :       /* stmt is now the BIT_*_EXPR.  */
    3755         6207 :       if (!has_single_use (gimple_assign_rhs1 (stmt)))
    3756         3766 :         ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
    3757         2441 :       else if (info->ops[info->ops_swapped_p].bit_not_p)
    3758              :         {
    3759          171 :           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    3760          171 :           if (!has_single_use (gimple_assign_rhs1 (stmt2)))
    3761            0 :             ++ret;
    3762              :         }
    3763         6207 :       if (info->ops[1].base_addr == NULL_TREE)
    3764              :         {
    3765          317 :           gcc_checking_assert (!info->ops_swapped_p);
    3766              :           return ret;
    3767              :         }
    3768         5890 :       if (!has_single_use (gimple_assign_rhs2 (stmt)))
    3769         1772 :         ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
    3770         4118 :       else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
    3771              :         {
    3772           19 :           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
    3773           19 :           if (!has_single_use (gimple_assign_rhs1 (stmt2)))
    3774            0 :             ++ret;
    3775              :         }
    3776              :       return ret;
    3777       262484 :     case MEM_REF:
    3778       262484 :       if (!has_single_use (gimple_assign_rhs1 (stmt)))
    3779       166995 :         return 1 + info->ops[0].bit_not_p;
    3780        95489 :       else if (info->ops[0].bit_not_p)
    3781              :         {
    3782          142 :           stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
    3783          142 :           if (!has_single_use (gimple_assign_rhs1 (stmt)))
    3784              :             return 1;
    3785              :         }
    3786              :       return 0;
    3787        13442 :     case BIT_INSERT_EXPR:
    3788        13442 :       return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
    3789            0 :     default:
    3790            0 :       gcc_unreachable ();
    3791              :     }
    3792              : }
    3793              : 
    3794              : /* Split a merged store described by GROUP by populating the SPLIT_STORES
    3795              :    vector (if non-NULL) with split_store structs describing the byte offset
    3796              :    (from the base), the bit size and alignment of each store as well as the
    3797              :    original statements involved in each such split group.
    3798              :    This is to separate the splitting strategy from the statement
    3799              :    building/emission/linking done in output_merged_store.
    3800              :    Return number of new stores.
    3801              :    If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
    3802              :    If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
    3803              :    BZERO_FIRST may be true only when the first store covers the whole group
    3804              :    and clears it; if BZERO_FIRST is true, keep that first store in the set
    3805              :    unmodified and emit further stores for the overrides only.
    3806              :    If SPLIT_STORES is NULL, it is just a dry run to count number of
    3807              :    new stores.  */
    3808              : 
    3809              : static unsigned int
    3810      1009330 : split_group (merged_store_group *group, bool allow_unaligned_store,
    3811              :              bool allow_unaligned_load, bool bzero_first,
    3812              :              vec<split_store *> *split_stores,
    3813              :              unsigned *total_orig,
    3814              :              unsigned *total_new)
    3815              : {
    3816      1009330 :   unsigned HOST_WIDE_INT pos = group->bitregion_start;
    3817      1009330 :   unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
    3818      1009330 :   unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
    3819      1009330 :   unsigned HOST_WIDE_INT group_align = group->align;
    3820      1009330 :   unsigned HOST_WIDE_INT align_base = group->align_base;
    3821      1009330 :   unsigned HOST_WIDE_INT group_load_align = group_align;
    3822      1009330 :   bool any_orig = false;
    3823              : 
    3824      1009330 :   gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
    3825              : 
    3826              :   /* For bswap framework using sets of stores, all the checking has been done
    3827              :      earlier in try_coalesce_bswap and the result always needs to be emitted
    3828              :      as a single store.  Likewise for string concatenation.  */
    3829      1009330 :   if (group->stores[0]->rhs_code == LROTATE_EXPR
    3830      1007809 :       || group->stores[0]->rhs_code == NOP_EXPR
    3831      2016158 :       || group->string_concatenation)
    3832              :     {
    3833         2703 :       gcc_assert (!bzero_first);
    3834         2703 :       if (total_orig)
    3835              :         {
    3836              :           /* Avoid the old/new stmt count heuristics.  It should be
    3837              :              always beneficial.  */
    3838          901 :           total_new[0] = 1;
    3839          901 :           total_orig[0] = 2;
    3840              :         }
    3841              : 
    3842         2703 :       if (split_stores)
    3843              :         {
    3844          901 :           unsigned HOST_WIDE_INT align_bitpos
    3845          901 :             = (group->start - align_base) & (group_align - 1);
    3846          901 :           unsigned HOST_WIDE_INT align = group_align;
    3847          901 :           if (align_bitpos)
    3848          120 :             align = least_bit_hwi (align_bitpos);
    3849          901 :           bytepos = group->start / BITS_PER_UNIT;
    3850          901 :           split_store *store
    3851          901 :             = new split_store (bytepos, group->width, align);
    3852          901 :           unsigned int first = 0;
    3853          901 :           find_constituent_stores (group, &store->orig_stores,
    3854              :                                    &first, group->start, group->width);
    3855          901 :           split_stores->safe_push (store);
    3856              :         }
    3857              : 
    3858         2703 :       return 1;
    3859              :     }
    3860              : 
    3861      1006627 :   unsigned int ret = 0, first = 0;
    3862      1006627 :   unsigned HOST_WIDE_INT try_pos = bytepos;
    3863              : 
    3864      1006627 :   if (total_orig)
    3865              :     {
    3866       328321 :       unsigned int i;
    3867       328321 :       store_immediate_info *info = group->stores[0];
    3868              : 
    3869       328321 :       total_new[0] = 0;
    3870       328321 :       total_orig[0] = 1; /* The orig store.  */
    3871       328321 :       info = group->stores[0];
    3872       328321 :       if (info->ops[0].base_addr)
    3873        73531 :         total_orig[0]++;
    3874       328321 :       if (info->ops[1].base_addr)
    3875         1492 :         total_orig[0]++;
    3876       328321 :       switch (info->rhs_code)
    3877              :         {
    3878         1587 :         case BIT_AND_EXPR:
    3879         1587 :         case BIT_IOR_EXPR:
    3880         1587 :         case BIT_XOR_EXPR:
    3881         1587 :           total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
    3882         1587 :           break;
    3883              :         default:
    3884              :           break;
    3885              :         }
    3886       328321 :       total_orig[0] *= group->stores.length ();
    3887              : 
    3888      1374984 :       FOR_EACH_VEC_ELT (group->stores, i, info)
    3889              :         {
    3890      1046663 :           total_new[0] += count_multiple_uses (info);
    3891      1046663 :           total_orig[0] += (info->bit_not_p
    3892      1046663 :                             + info->ops[0].bit_not_p
    3893      1046663 :                             + info->ops[1].bit_not_p);
    3894              :         }
    3895              :     }
    3896              : 
    3897      1006627 :   if (!allow_unaligned_load)
    3898            0 :     for (int i = 0; i < 2; ++i)
    3899            0 :       if (group->load_align[i])
    3900            0 :         group_load_align = MIN (group_load_align, group->load_align[i]);
    3901              : 
    3902      1006627 :   if (bzero_first)
    3903              :     {
    3904              :       store_immediate_info *gstore;
    3905        24649 :       FOR_EACH_VEC_ELT (group->stores, first, gstore)
    3906        24649 :         if (!gimple_clobber_p (gstore->stmt))
    3907              :           break;
    3908        23415 :       ++first;
    3909        23415 :       ret = 1;
    3910        23415 :       if (split_stores)
    3911              :         {
    3912         1751 :           split_store *store
    3913         1751 :             = new split_store (bytepos, gstore->bitsize, align_base);
    3914         1751 :           store->orig_stores.safe_push (gstore);
    3915         1751 :           store->orig = true;
    3916         1751 :           any_orig = true;
    3917         1751 :           split_stores->safe_push (store);
    3918              :         }
    3919              :     }
    3920              : 
    3921      5119293 :   while (size > 0)
    3922              :     {
    3923      4112666 :       if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
    3924      1792752 :           && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
    3925      1776124 :               || (bzero_first && group->val[try_pos - bytepos] == 0)))
    3926              :         {
    3927              :           /* Skip padding bytes.  */
    3928       611263 :           ++try_pos;
    3929       611263 :           size -= BITS_PER_UNIT;
    3930       611263 :           continue;
    3931              :         }
    3932              : 
    3933      3501403 :       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
    3934      3501403 :       unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
    3935      3501403 :       unsigned HOST_WIDE_INT align_bitpos
    3936      3501403 :         = (try_bitpos - align_base) & (group_align - 1);
    3937      3501403 :       unsigned HOST_WIDE_INT align = group_align;
    3938      3501403 :       bool found_orig = false;
    3939      3501403 :       if (align_bitpos)
    3940       930793 :         align = least_bit_hwi (align_bitpos);
    3941      3501403 :       if (!allow_unaligned_store)
    3942      2359452 :         try_size = MIN (try_size, align);
    3943      3501403 :       if (!allow_unaligned_load)
    3944              :         {
    3945              :           /* If we can't do or don't want to do unaligned stores
    3946              :              as well as loads, we need to take the loads into account
    3947              :              as well.  */
    3948            0 :           unsigned HOST_WIDE_INT load_align = group_load_align;
    3949            0 :           align_bitpos = (try_bitpos - align_base) & (load_align - 1);
    3950            0 :           if (align_bitpos)
    3951            0 :             load_align = least_bit_hwi (align_bitpos);
    3952            0 :           for (int i = 0; i < 2; ++i)
    3953            0 :             if (group->load_align[i])
    3954              :               {
    3955            0 :                 align_bitpos
    3956            0 :                   = known_alignment (try_bitpos
    3957            0 :                                      - group->stores[0]->bitpos
    3958            0 :                                      + group->stores[0]->ops[i].bitpos
    3959            0 :                                      - group->load_align_base[i]);
    3960            0 :                 if (align_bitpos & (group_load_align - 1))
    3961              :                   {
    3962            0 :                     unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
    3963            0 :                     load_align = MIN (load_align, a);
    3964              :                   }
    3965              :               }
    3966            0 :           try_size = MIN (try_size, load_align);
    3967              :         }
    3968      3501403 :       store_immediate_info *info
    3969      3501403 :         = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
    3970      3501403 :       if (info && !gimple_clobber_p (info->stmt))
    3971              :         {
    3972              :           /* If there is just one original statement for the range, see if
    3973              :              we can just reuse the original store which could be even larger
    3974              :              than try_size.  */
    3975      2575257 :           unsigned HOST_WIDE_INT stmt_end
    3976      2575257 :             = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
    3977      2575257 :           info = find_constituent_stores (group, NULL, &first, try_bitpos,
    3978              :                                           stmt_end - try_bitpos);
    3979      2575257 :           if (info && info->bitpos >= try_bitpos)
    3980              :             {
    3981      2390125 :               store_immediate_info *info2 = NULL;
    3982      2390125 :               unsigned int first_copy = first;
    3983      2390125 :               if (info->bitpos > try_bitpos
    3984         6239 :                   && stmt_end - try_bitpos <= try_size)
    3985              :                 {
    3986         5997 :                   info2 = find_constituent_stores (group, NULL, &first_copy,
    3987              :                                                    try_bitpos,
    3988              :                                                    info->bitpos - try_bitpos);
    3989         5997 :                   gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
    3990              :                 }
    3991      2389711 :               if (info2 == NULL && stmt_end - try_bitpos < try_size)
    3992              :                 {
    3993       429032 :                   info2 = find_constituent_stores (group, NULL, &first_copy,
    3994              :                                                    stmt_end,
    3995       214516 :                                                    (try_bitpos + try_size)
    3996              :                                                    - stmt_end);
    3997       214516 :                   gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
    3998              :                 }
    3999              :               if (info2 == NULL)
    4000              :                 {
    4001      2369551 :                   try_size = stmt_end - try_bitpos;
    4002      2369551 :                   found_orig = true;
    4003      2369551 :                   goto found;
    4004              :                 }
    4005              :             }
    4006              :         }
    4007              : 
    4008              :       /* Approximate store bitsize for the case when there are no padding
    4009              :          bits.  */
    4010      1217181 :       while (try_size > size)
    4011        85329 :         try_size /= 2;
    4012              :       /* Now look for whole padding bytes at the end of that bitsize.  */
    4013      2199251 :       for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
    4014      2031906 :         if (group->mask[try_pos - bytepos + nonmasked - 1]
    4015              :             != (unsigned char) ~0U
    4016      1983280 :             && (!bzero_first
    4017      1026842 :                 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
    4018              :           break;
    4019      1131852 :       if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
    4020              :         {
    4021              :           /* If entire try_size range is padding, skip it.  */
    4022       591393 :           try_pos += try_size / BITS_PER_UNIT;
    4023       591393 :           size -= try_size;
    4024       591393 :           continue;
    4025              :         }
    4026              :       /* Otherwise try to decrease try_size if second half, last 3 quarters
    4027              :          etc. are padding.  */
    4028       540459 :       nonmasked *= BITS_PER_UNIT;
    4029       551111 :       while (nonmasked <= try_size / 2)
    4030              :         try_size /= 2;
    4031       540459 :       if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
    4032              :         {
    4033              :           /* Now look for whole padding bytes at the start of that bitsize.  */
    4034       309891 :           unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
    4035       320005 :           for (masked = 0; masked < try_bytesize; ++masked)
    4036       320005 :             if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
    4037       317237 :                 && (!bzero_first
    4038        11255 :                     || group->val[try_pos - bytepos + masked] != 0))
    4039              :               break;
    4040       309891 :           masked *= BITS_PER_UNIT;
    4041       309891 :           gcc_assert (masked < try_size);
    4042       309891 :           if (masked >= try_size / 2)
    4043              :             {
    4044         5170 :               while (masked >= try_size / 2)
    4045              :                 {
    4046         2689 :                   try_size /= 2;
    4047         2689 :                   try_pos += try_size / BITS_PER_UNIT;
    4048         2689 :                   size -= try_size;
    4049         2689 :                   masked -= try_size;
    4050              :                 }
    4051              :               /* Need to recompute the alignment, so just retry at the new
    4052              :                  position.  */
    4053         2481 :               continue;
    4054              :             }
    4055              :         }
    4056              : 
    4057       230568 :     found:
    4058      2907529 :       ++ret;
    4059              : 
    4060      2907529 :       if (split_stores)
    4061              :         {
    4062       906651 :           split_store *store
    4063       906651 :             = new split_store (try_pos, try_size, align);
    4064       906651 :           info = find_constituent_stores (group, &store->orig_stores,
    4065              :                                           &first, try_bitpos, try_size);
    4066       906651 :           if (info
    4067       808207 :               && !gimple_clobber_p (info->stmt)
    4068       808200 :               && info->bitpos >= try_bitpos
    4069       798815 :               && info->bitpos + info->bitsize <= try_bitpos + try_size
    4070      1703792 :               && (store->orig_stores.length () == 1
    4071        31215 :                   || found_orig
    4072         6765 :                   || (info->bitpos == try_bitpos
    4073         6667 :                       && (info->bitpos + info->bitsize
    4074              :                           == try_bitpos + try_size))))
    4075              :             {
    4076       790376 :               store->orig = true;
    4077       790376 :               any_orig = true;
    4078              :             }
    4079       906651 :           split_stores->safe_push (store);
    4080              :         }
    4081              : 
    4082      2907529 :       try_pos += try_size / BITS_PER_UNIT;
    4083      2907529 :       size -= try_size;
    4084              :     }
    4085              : 
    4086      1006627 :   if (total_orig)
    4087              :     {
    4088       328321 :       unsigned int i;
    4089       328321 :       split_store *store;
    4090              :       /* If we are reusing some original stores and any of the
    4091              :          original SSA_NAMEs had multiple uses, we need to subtract
    4092              :          those now before we add the new ones.  */
    4093       328321 :       if (total_new[0] && any_orig)
    4094              :         {
    4095       138206 :           FOR_EACH_VEC_ELT (*split_stores, i, store)
    4096        95345 :             if (store->orig)
    4097        94357 :               total_new[0] -= count_multiple_uses (store->orig_stores[0]);
    4098              :         }
    4099       328321 :       total_new[0] += ret; /* The new store.  */
    4100       328321 :       store_immediate_info *info = group->stores[0];
    4101       328321 :       if (info->ops[0].base_addr)
    4102        73531 :         total_new[0] += ret;
    4103       328321 :       if (info->ops[1].base_addr)
    4104         1492 :         total_new[0] += ret;
    4105       328321 :       switch (info->rhs_code)
    4106              :         {
    4107         1587 :         case BIT_AND_EXPR:
    4108         1587 :         case BIT_IOR_EXPR:
    4109         1587 :         case BIT_XOR_EXPR:
    4110         1587 :           total_new[0] += ret; /* The new BIT_*_EXPR stmt.  */
    4111         1587 :           break;
    4112              :         default:
    4113              :           break;
    4114              :         }
    4115      1236723 :       FOR_EACH_VEC_ELT (*split_stores, i, store)
    4116              :         {
    4117       908402 :           unsigned int j;
    4118       908402 :           bool bit_not_p[3] = { false, false, false };
    4119              :           /* If all orig_stores have certain bit_not_p set, then
    4120              :              we'd use a BIT_NOT_EXPR stmt and need to account for it.
    4121              :              If some orig_stores have certain bit_not_p set, then
    4122              :              we'd use a BIT_XOR_EXPR with a mask and need to account for
    4123              :              it.  */
    4124      1988493 :           FOR_EACH_VEC_ELT (store->orig_stores, j, info)
    4125              :             {
    4126      1080091 :               if (info->ops[0].bit_not_p)
    4127          355 :                 bit_not_p[0] = true;
    4128      1080091 :               if (info->ops[1].bit_not_p)
    4129           17 :                 bit_not_p[1] = true;
    4130      1080091 :               if (info->bit_not_p)
    4131           65 :                 bit_not_p[2] = true;
    4132              :             }
    4133       908402 :           total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
    4134              :         }
    4135              : 
    4136              :     }
    4137              : 
    4138              :   return ret;
    4139              : }
    4140              : 
    4141              : /* Return the operation through which the operand IDX (if < 2) or
    4142              :    result (IDX == 2) should be inverted.  If NOP_EXPR, no inversion
    4143              :    is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
    4144              :    the bits should be xored with mask.  */
    4145              : 
    4146              : static enum tree_code
    4147         3450 : invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
    4148              : {
    4149         3450 :   unsigned int i;
    4150         3450 :   store_immediate_info *info;
    4151         3450 :   unsigned int cnt = 0;
    4152         3450 :   bool any_paddings = false;
    4153        13512 :   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
    4154              :     {
    4155        10062 :       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
    4156        10062 :       if (bit_not_p)
    4157              :         {
    4158           89 :           ++cnt;
    4159           89 :           tree lhs = gimple_assign_lhs (info->stmt);
    4160          178 :           if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
    4161          178 :               && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
    4162              :             any_paddings = true;
    4163              :         }
    4164              :     }
    4165         3450 :   mask = NULL_TREE;
    4166         3450 :   if (cnt == 0)
    4167              :     return NOP_EXPR;
    4168           66 :   if (cnt == split_store->orig_stores.length () && !any_paddings)
    4169              :     return BIT_NOT_EXPR;
    4170              : 
    4171           26 :   unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
    4172           26 :   unsigned buf_size = split_store->size / BITS_PER_UNIT;
    4173           26 :   unsigned char *buf
    4174           26 :     = XALLOCAVEC (unsigned char, buf_size);
    4175           26 :   memset (buf, ~0U, buf_size);
    4176          106 :   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
    4177              :     {
    4178           80 :       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
    4179           80 :       if (!bit_not_p)
    4180           27 :         continue;
    4181              :       /* Clear regions with bit_not_p and invert afterwards, rather than
    4182              :          clear regions with !bit_not_p, so that gaps in between stores aren't
    4183              :          set in the mask.  */
    4184           53 :       unsigned HOST_WIDE_INT bitsize = info->bitsize;
    4185           53 :       unsigned HOST_WIDE_INT prec = bitsize;
    4186           53 :       unsigned int pos_in_buffer = 0;
    4187           53 :       if (any_paddings)
    4188              :         {
    4189           36 :           tree lhs = gimple_assign_lhs (info->stmt);
    4190           72 :           if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
    4191           72 :               && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
    4192           36 :             prec = TYPE_PRECISION (TREE_TYPE (lhs));
    4193              :         }
    4194           53 :       if (info->bitpos < try_bitpos)
    4195              :         {
    4196            0 :           gcc_assert (info->bitpos + bitsize > try_bitpos);
    4197            0 :           if (!BYTES_BIG_ENDIAN)
    4198              :             {
    4199            0 :               if (prec <= try_bitpos - info->bitpos)
    4200            0 :                 continue;
    4201            0 :               prec -= try_bitpos - info->bitpos;
    4202              :             }
    4203            0 :           bitsize -= try_bitpos - info->bitpos;
    4204            0 :           if (BYTES_BIG_ENDIAN && prec > bitsize)
    4205              :             prec = bitsize;
    4206              :         }
    4207              :       else
    4208           53 :         pos_in_buffer = info->bitpos - try_bitpos;
    4209           53 :       if (prec < bitsize)
    4210              :         {
    4211              :           /* If this is a bool inversion, invert just the least significant
    4212              :              prec bits rather than all bits of it.  */
    4213              :           if (BYTES_BIG_ENDIAN)
    4214              :             {
    4215              :               pos_in_buffer += bitsize - prec;
    4216              :               if (pos_in_buffer >= split_store->size)
    4217              :                 continue;
    4218              :             }
    4219              :           bitsize = prec;
    4220              :         }
    4221           53 :       if (pos_in_buffer + bitsize > split_store->size)
    4222            0 :         bitsize = split_store->size - pos_in_buffer;
    4223           53 :       unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
    4224           53 :       if (BYTES_BIG_ENDIAN)
    4225              :         clear_bit_region_be (p, (BITS_PER_UNIT - 1
    4226              :                                  - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
    4227              :       else
    4228           53 :         clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
    4229              :     }
    4230          182 :   for (unsigned int i = 0; i < buf_size; ++i)
    4231          156 :     buf[i] = ~buf[i];
    4232           26 :   mask = native_interpret_expr (int_type, buf, buf_size);
    4233           26 :   return BIT_XOR_EXPR;
    4234              : }
    4235              : 
    4236              : /* Given a merged store group GROUP output the widened version of it.
    4237              :    The store chain is against the base object BASE.
    4238              :    Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
    4239              :    unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
    4240              :    Make sure that the number of statements output is less than the number of
    4241              :    original statements.  If a better sequence is possible emit it and
    4242              :    return true.  */
    4243              : 
    4244              : bool
    4245       399954 : imm_store_chain_info::output_merged_store (merged_store_group *group)
    4246              : {
    4247       399954 :   const unsigned HOST_WIDE_INT start_byte_pos
    4248       399954 :     = group->bitregion_start / BITS_PER_UNIT;
    4249       470686 :   unsigned int orig_num_stmts = group->stores.length ();
    4250       399954 :   if (orig_num_stmts < 2)
    4251              :     return false;
    4252              : 
    4253       399954 :   bool allow_unaligned_store
    4254       399954 :     = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
    4255       399954 :   bool allow_unaligned_load = allow_unaligned_store;
    4256       399954 :   bool bzero_first = false;
    4257       399954 :   store_immediate_info *store;
    4258       399954 :   unsigned int num_clobber_stmts = 0;
    4259       399954 :   if (group->stores[0]->rhs_code == INTEGER_CST)
    4260              :     {
    4261              :       unsigned int i;
    4262       481697 :       FOR_EACH_VEC_ELT (group->stores, i, store)
    4263       410965 :         if (gimple_clobber_p (store->stmt))
    4264       160150 :           num_clobber_stmts++;
    4265       250815 :         else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
    4266        15373 :                  && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
    4267        15373 :                  && group->start == store->bitpos
    4268        14949 :                  && group->width == store->bitsize
    4269        10832 :                  && (group->start % BITS_PER_UNIT) == 0
    4270       261647 :                  && (group->width % BITS_PER_UNIT) == 0)
    4271              :           {
    4272              :             bzero_first = true;
    4273              :             break;
    4274              :           }
    4275              :         else
    4276              :           break;
    4277      1158088 :       FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
    4278       836541 :         if (gimple_clobber_p (store->stmt))
    4279         1877 :           num_clobber_stmts++;
    4280       321547 :       if (num_clobber_stmts == orig_num_stmts)
    4281              :         return false;
    4282       250815 :       orig_num_stmts -= num_clobber_stmts;
    4283              :     }
    4284       329222 :   if (allow_unaligned_store || bzero_first)
    4285              :     {
    4286              :       /* If unaligned stores are allowed, see how many stores we'd emit
    4287              :          for unaligned and how many stores we'd emit for aligned stores.
    4288              :          Only use unaligned stores if it allows fewer stores than aligned.
    4289              :          Similarly, if there is a whole region clear first, prefer expanding
    4290              :          it together compared to expanding clear first followed by merged
    4291              :          further stores.  */
    4292       329222 :       unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
    4293       329222 :       int pass_min = 0;
    4294      1646110 :       for (int pass = 0; pass < 4; ++pass)
    4295              :         {
    4296      1316888 :           if (!allow_unaligned_store && (pass & 1) != 0)
    4297            0 :             continue;
    4298      1316888 :           if (!bzero_first && (pass & 2) != 0)
    4299       636780 :             continue;
    4300      1360216 :           cnt[pass] = split_group (group, (pass & 1) != 0,
    4301       680108 :                                    allow_unaligned_load, (pass & 2) != 0,
    4302              :                                    NULL, NULL, NULL);
    4303       680108 :           if (cnt[pass] < cnt[pass_min])
    4304      1316888 :             pass_min = pass;
    4305              :         }
    4306       329222 :       if ((pass_min & 1) == 0)
    4307       321898 :         allow_unaligned_store = false;
    4308       329222 :       if ((pass_min & 2) == 0)
    4309       327471 :         bzero_first = false;
    4310              :     }
    4311              : 
    4312       329222 :   auto_vec<class split_store *, 32> split_stores;
    4313       329222 :   split_store *split_store;
    4314       329222 :   unsigned total_orig, total_new, i;
    4315       329222 :   split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
    4316              :                &split_stores, &total_orig, &total_new);
    4317              : 
    4318              :   /* Determine if there is a clobber covering the whole group at the start,
    4319              :      followed by proposed split stores that cover the whole group.  In that
    4320              :      case, prefer the transformation even if
    4321              :      split_stores.length () == orig_num_stmts.  */
    4322       329222 :   bool clobber_first = false;
    4323       329222 :   if (num_clobber_stmts
    4324        18867 :       && gimple_clobber_p (group->stores[0]->stmt)
    4325        18381 :       && group->start == group->stores[0]->bitpos
    4326        18381 :       && group->width == group->stores[0]->bitsize
    4327        18191 :       && (group->start % BITS_PER_UNIT) == 0
    4328       347413 :       && (group->width % BITS_PER_UNIT) == 0)
    4329              :     {
    4330        18191 :       clobber_first = true;
    4331        18191 :       unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
    4332        45613 :       FOR_EACH_VEC_ELT (split_stores, i, split_store)
    4333        32751 :         if (split_store->bytepos != pos)
    4334              :           {
    4335              :             clobber_first = false;
    4336              :             break;
    4337              :           }
    4338              :         else
    4339        27422 :           pos += split_store->size / BITS_PER_UNIT;
    4340        18191 :       if (pos != (group->start + group->width) / BITS_PER_UNIT)
    4341       324234 :         clobber_first = false;
    4342              :     }
    4343              : 
    4344       658444 :   if (split_stores.length () >= orig_num_stmts + clobber_first)
    4345              :     {
    4346              : 
    4347              :       /* We didn't manage to reduce the number of statements.  Bail out.  */
    4348       242788 :       if (dump_file && (dump_flags & TDF_DETAILS))
    4349            2 :         fprintf (dump_file, "Exceeded original number of stmts (%u)."
    4350              :                             "  Not profitable to emit new sequence.\n",
    4351              :                  orig_num_stmts);
    4352       847746 :       FOR_EACH_VEC_ELT (split_stores, i, split_store)
    4353      1209916 :         delete split_store;
    4354              :       return false;
    4355              :     }
    4356        86434 :   if (total_orig <= total_new)
    4357              :     {
    4358              :       /* If number of estimated new statements is above estimated original
    4359              :          statements, bail out too.  */
    4360         1741 :       if (dump_file && (dump_flags & TDF_DETAILS))
    4361            0 :         fprintf (dump_file, "Estimated number of original stmts (%u)"
    4362              :                             " not larger than estimated number of new"
    4363              :                             " stmts (%u).\n",
    4364              :                  total_orig, total_new);
    4365         3676 :       FOR_EACH_VEC_ELT (split_stores, i, split_store)
    4366         3870 :         delete split_store;
    4367              :       return false;
    4368              :     }
    4369        84693 :   if (group->stores[0]->rhs_code == INTEGER_CST)
    4370              :     {
    4371       163519 :       bool all_orig = true;
    4372       163519 :       FOR_EACH_VEC_ELT (split_stores, i, split_store)
    4373       159792 :         if (!split_store->orig)
    4374              :           {
    4375              :             all_orig = false;
    4376              :             break;
    4377              :           }
    4378        78483 :       if (all_orig)
    4379              :         {
    4380              :           unsigned int cnt = split_stores.length ();
    4381              :           store_immediate_info *store;
    4382        15244 :           FOR_EACH_VEC_ELT (group->stores, i, store)
    4383        11517 :             if (gimple_clobber_p (store->stmt))
    4384         3745 :               ++cnt;
    4385              :           /* Punt if we wouldn't make any real changes, i.e. keep all
    4386              :              orig stmts + all clobbers.  */
    4387         3727 :           if (cnt == group->stores.length ())
    4388              :             {
    4389         3717 :               if (dump_file && (dump_flags & TDF_DETAILS))
    4390            0 :                 fprintf (dump_file, "Exceeded original number of stmts (%u)."
    4391              :                                     "  Not profitable to emit new sequence.\n",
    4392              :                          orig_num_stmts);
    4393        11468 :               FOR_EACH_VEC_ELT (split_stores, i, split_store)
    4394        15502 :                 delete split_store;
    4395       248246 :               return false;
    4396              :             }
    4397              :         }
    4398              :     }
    4399              : 
    4400        80976 :   gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
    4401        80976 :   gimple_seq seq = NULL;
    4402        80976 :   tree last_vdef, new_vuse;
    4403        80976 :   last_vdef = gimple_vdef (group->last_stmt);
    4404        80976 :   new_vuse = gimple_vuse (group->last_stmt);
    4405        80976 :   tree bswap_res = NULL_TREE;
    4406              : 
    4407              :   /* Clobbers are not removed.  */
    4408        80976 :   if (gimple_clobber_p (group->last_stmt))
    4409              :     {
    4410            7 :       new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
    4411            7 :       gimple_set_vdef (group->last_stmt, new_vuse);
    4412              :     }
    4413              : 
    4414        80976 :   if (group->stores[0]->rhs_code == LROTATE_EXPR
    4415        80976 :       || group->stores[0]->rhs_code == NOP_EXPR)
    4416              :     {
    4417          834 :       tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
    4418          834 :       gimple *ins_stmt = group->stores[0]->ins_stmt;
    4419          834 :       struct symbolic_number *n = &group->stores[0]->n;
    4420          834 :       bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
    4421              : 
    4422          834 :       switch (n->range)
    4423              :         {
    4424          420 :         case 16:
    4425          420 :           load_type = bswap_type = uint16_type_node;
    4426          420 :           break;
    4427          219 :         case 32:
    4428          219 :           load_type = uint32_type_node;
    4429          219 :           if (bswap)
    4430              :             {
    4431          109 :               fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
    4432          109 :               bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
    4433              :             }
    4434              :           break;
    4435          195 :         case 64:
    4436          195 :           load_type = uint64_type_node;
    4437          195 :           if (bswap)
    4438              :             {
    4439           72 :               fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
    4440           72 :               bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
    4441              :             }
    4442              :           break;
    4443            0 :         default:
    4444            0 :           gcc_unreachable ();
    4445              :         }
    4446              : 
    4447              :       /* If the loads have each vuse of the corresponding store,
    4448              :          we've checked the aliasing already in try_coalesce_bswap and
    4449              :          we want to sink the need load into seq.  So need to use new_vuse
    4450              :          on the load.  */
    4451          834 :       if (n->base_addr)
    4452              :         {
    4453          421 :           if (n->vuse == NULL)
    4454              :             {
    4455           73 :               n->vuse = new_vuse;
    4456           73 :               ins_stmt = NULL;
    4457              :             }
    4458              :           else
    4459              :             /* Update vuse in case it has changed by output_merged_stores.  */
    4460          696 :             n->vuse = gimple_vuse (ins_stmt);
    4461              :         }
    4462          834 :       bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
    4463              :                                  bswap_type, load_type, n, bswap,
    4464              :                                  ~(uint64_t) 0, 0);
    4465          834 :       gcc_assert (bswap_res);
    4466              :     }
    4467              : 
    4468        80976 :   gimple *stmt = NULL;
    4469       161952 :   auto_vec<gimple *, 32> orig_stmts;
    4470        80976 :   gimple_seq this_seq;
    4471        80976 :   tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
    4472              :                                       is_gimple_mem_ref_addr, NULL_TREE);
    4473        80976 :   gimple_seq_add_seq_without_update (&seq, this_seq);
    4474              : 
    4475        80976 :   tree load_addr[2] = { NULL_TREE, NULL_TREE };
    4476        80976 :   gimple_seq load_seq[2] = { NULL, NULL };
    4477        80976 :   gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
    4478       242928 :   for (int j = 0; j < 2; ++j)
    4479              :     {
    4480       161952 :       store_operand_info &op = group->stores[0]->ops[j];
    4481       161952 :       if (op.base_addr == NULL_TREE)
    4482       159749 :         continue;
    4483              : 
    4484         2203 :       store_immediate_info *infol = group->stores.last ();
    4485         6609 :       if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
    4486              :         {
    4487              :           /* We can't pick the location randomly; while we've verified
    4488              :              all the loads have the same vuse, they can be still in different
    4489              :              basic blocks and we need to pick the one from the last bb:
    4490              :              int x = q[0];
    4491              :              if (x == N) return;
    4492              :              int y = q[1];
    4493              :              p[0] = x;
    4494              :              p[1] = y;
    4495              :              otherwise if we put the wider load at the q[0] load, we might
    4496              :              segfault if q[1] is not mapped.  */
    4497         1302 :           basic_block bb = gimple_bb (op.stmt);
    4498         1302 :           gimple *ostmt = op.stmt;
    4499         1302 :           store_immediate_info *info;
    4500         9456 :           FOR_EACH_VEC_ELT (group->stores, i, info)
    4501              :             {
    4502         8154 :               gimple *tstmt = info->ops[j].stmt;
    4503         8154 :               basic_block tbb = gimple_bb (tstmt);
    4504         8154 :               if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
    4505              :                 {
    4506         8134 :                   ostmt = tstmt;
    4507         8134 :                   bb = tbb;
    4508              :                 }
    4509              :             }
    4510         1302 :           load_gsi[j] = gsi_for_stmt (ostmt);
    4511         1302 :           load_addr[j]
    4512         1302 :             = force_gimple_operand_1 (unshare_expr (op.base_addr),
    4513              :                                       &load_seq[j], is_gimple_mem_ref_addr,
    4514              :                                       NULL_TREE);
    4515              :         }
    4516          901 :       else if (operand_equal_p (base_addr, op.base_addr, 0))
    4517           40 :         load_addr[j] = addr;
    4518              :       else
    4519              :         {
    4520          861 :           load_addr[j]
    4521          861 :             = force_gimple_operand_1 (unshare_expr (op.base_addr),
    4522              :                                       &this_seq, is_gimple_mem_ref_addr,
    4523              :                                       NULL_TREE);
    4524          861 :           gimple_seq_add_seq_without_update (&seq, this_seq);
    4525              :         }
    4526              :     }
    4527              : 
    4528       375635 :   FOR_EACH_VEC_ELT (split_stores, i, split_store)
    4529              :     {
    4530       294659 :       const unsigned HOST_WIDE_INT try_size = split_store->size;
    4531       294659 :       const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
    4532       294659 :       const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
    4533       294659 :       const unsigned HOST_WIDE_INT try_align = split_store->align;
    4534       294659 :       const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
    4535       294659 :       tree dest, src;
    4536       294659 :       location_t loc;
    4537              : 
    4538       294659 :       if (split_store->orig)
    4539              :         {
    4540              :           /* If there is just a single non-clobber constituent store
    4541              :              which covers the whole area, just reuse the lhs and rhs.  */
    4542       209132 :           gimple *orig_stmt = NULL;
    4543              :           store_immediate_info *store;
    4544              :           unsigned int j;
    4545       209132 :           FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
    4546       209132 :             if (!gimple_clobber_p (store->stmt))
    4547              :               {
    4548              :                 orig_stmt = store->stmt;
    4549              :                 break;
    4550              :               }
    4551       203253 :           dest = gimple_assign_lhs (orig_stmt);
    4552       203253 :           src = gimple_assign_rhs1 (orig_stmt);
    4553       203253 :           loc = gimple_location (orig_stmt);
    4554              :         }
    4555              :       else
    4556              :         {
    4557              :           store_immediate_info *info;
    4558              :           unsigned short clique, base;
    4559              :           unsigned int k;
    4560       315442 :           FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
    4561       224036 :             orig_stmts.safe_push (info->stmt);
    4562        91406 :           tree offset_type
    4563        91406 :             = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
    4564        91406 :           tree dest_type;
    4565        91406 :           loc = get_location_for_stmts (orig_stmts);
    4566        91406 :           orig_stmts.truncate (0);
    4567              : 
    4568        91406 :           if (group->string_concatenation)
    4569           67 :             dest_type
    4570           67 :               = build_array_type_nelts (char_type_node,
    4571           67 :                                         try_size / BITS_PER_UNIT);
    4572              :           else
    4573              :             {
    4574        91339 :               dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
    4575        91339 :               dest_type = build_aligned_type (dest_type, try_align);
    4576              :             }
    4577        91406 :           dest = fold_build2 (MEM_REF, dest_type, addr,
    4578              :                               build_int_cst (offset_type, try_pos));
    4579        91406 :           if (TREE_CODE (dest) == MEM_REF)
    4580              :             {
    4581        91406 :               MR_DEPENDENCE_CLIQUE (dest) = clique;
    4582        91406 :               MR_DEPENDENCE_BASE (dest) = base;
    4583              :             }
    4584              : 
    4585        91406 :           tree mask;
    4586        91406 :           if (bswap_res || group->string_concatenation)
    4587          901 :             mask = integer_zero_node;
    4588              :           else
    4589        90505 :             mask = native_interpret_expr (dest_type,
    4590        90505 :                                           group->mask + try_offset,
    4591        90505 :                                           group->buf_size);
    4592              : 
    4593        91406 :           tree ops[2];
    4594       182849 :           for (int j = 0;
    4595       365587 :                j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
    4596              :                ++j)
    4597              :             {
    4598        91443 :               store_operand_info &op = split_store->orig_stores[0]->ops[j];
    4599        91443 :               if (bswap_res)
    4600          834 :                 ops[j] = bswap_res;
    4601        90609 :               else if (group->string_concatenation)
    4602              :                 {
    4603          134 :                   ops[j] = build_string (try_size / BITS_PER_UNIT,
    4604           67 :                                          (const char *) group->val + try_offset);
    4605           67 :                   TREE_TYPE (ops[j]) = dest_type;
    4606              :                 }
    4607        90542 :               else if (op.base_addr)
    4608              :                 {
    4609         9449 :                   FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
    4610         6870 :                     orig_stmts.safe_push (info->ops[j].stmt);
    4611              : 
    4612         2579 :                   offset_type = get_alias_type_for_stmts (orig_stmts, true,
    4613              :                                                           &clique, &base);
    4614         2579 :                   location_t load_loc = get_location_for_stmts (orig_stmts);
    4615         2579 :                   orig_stmts.truncate (0);
    4616              : 
    4617         2579 :                   unsigned HOST_WIDE_INT load_align = group->load_align[j];
    4618         2579 :                   unsigned HOST_WIDE_INT align_bitpos
    4619         2579 :                     = known_alignment (try_bitpos
    4620         2579 :                                        - split_store->orig_stores[0]->bitpos
    4621         2579 :                                        + op.bitpos);
    4622         2579 :                   if (align_bitpos & (load_align - 1))
    4623          746 :                     load_align = least_bit_hwi (align_bitpos);
    4624              : 
    4625         2579 :                   tree load_int_type
    4626         2579 :                     = build_nonstandard_integer_type (try_size, UNSIGNED);
    4627         2579 :                   load_int_type
    4628         2579 :                     = build_aligned_type (load_int_type, load_align);
    4629              : 
    4630         2579 :                   poly_uint64 load_pos
    4631         2579 :                     = exact_div (try_bitpos
    4632         2579 :                                  - split_store->orig_stores[0]->bitpos
    4633         2579 :                                  + op.bitpos,
    4634              :                                  BITS_PER_UNIT);
    4635         2579 :                   ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
    4636              :                                         build_int_cst (offset_type, load_pos));
    4637         2579 :                   if (TREE_CODE (ops[j]) == MEM_REF)
    4638              :                     {
    4639         2579 :                       MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
    4640         2579 :                       MR_DEPENDENCE_BASE (ops[j]) = base;
    4641              :                     }
    4642         2579 :                   if (!integer_zerop (mask))
    4643              :                     {
    4644              :                       /* The load might load some bits (that will be masked
    4645              :                          off later on) uninitialized, avoid -W*uninitialized
    4646              :                          warnings in that case.  */
    4647          181 :                       suppress_warning (ops[j], OPT_Wuninitialized);
    4648              :                     }
    4649              : 
    4650         2579 :                   stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
    4651         2579 :                   gimple_set_location (stmt, load_loc);
    4652         2579 :                   if (gsi_bb (load_gsi[j]))
    4653              :                     {
    4654         3124 :                       gimple_set_vuse (stmt, gimple_vuse (op.stmt));
    4655         1562 :                       gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
    4656              :                     }
    4657              :                   else
    4658              :                     {
    4659         1017 :                       gimple_set_vuse (stmt, new_vuse);
    4660         1017 :                       gimple_seq_add_stmt_without_update (&seq, stmt);
    4661              :                     }
    4662         2579 :                   ops[j] = gimple_assign_lhs (stmt);
    4663         2579 :                   tree xor_mask;
    4664         2579 :                   enum tree_code inv_op
    4665         2579 :                     = invert_op (split_store, j, dest_type, xor_mask);
    4666         2579 :                   if (inv_op != NOP_EXPR)
    4667              :                     {
    4668           29 :                       stmt = gimple_build_assign (make_ssa_name (dest_type),
    4669              :                                                   inv_op, ops[j], xor_mask);
    4670           29 :                       gimple_set_location (stmt, load_loc);
    4671           29 :                       ops[j] = gimple_assign_lhs (stmt);
    4672              : 
    4673           29 :                       if (gsi_bb (load_gsi[j]))
    4674            7 :                         gimple_seq_add_stmt_without_update (&load_seq[j],
    4675              :                                                             stmt);
    4676              :                       else
    4677           22 :                         gimple_seq_add_stmt_without_update (&seq, stmt);
    4678              :                     }
    4679              :                 }
    4680              :               else
    4681        87963 :                 ops[j] = native_interpret_expr (dest_type,
    4682        87963 :                                                 group->val + try_offset,
    4683        87963 :                                                 group->buf_size);
    4684              :             }
    4685              : 
    4686        91406 :           switch (split_store->orig_stores[0]->rhs_code)
    4687              :             {
    4688              :             case BIT_AND_EXPR:
    4689              :             case BIT_IOR_EXPR:
    4690              :             case BIT_XOR_EXPR:
    4691          185 :               FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
    4692              :                 {
    4693          148 :                   tree rhs1 = gimple_assign_rhs1 (info->stmt);
    4694          148 :                   orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
    4695              :                 }
    4696           37 :               location_t bit_loc;
    4697           37 :               bit_loc = get_location_for_stmts (orig_stmts);
    4698           37 :               orig_stmts.truncate (0);
    4699              : 
    4700           37 :               stmt
    4701           37 :                 = gimple_build_assign (make_ssa_name (dest_type),
    4702           37 :                                        split_store->orig_stores[0]->rhs_code,
    4703              :                                        ops[0], ops[1]);
    4704           37 :               gimple_set_location (stmt, bit_loc);
    4705              :               /* If there is just one load and there is a separate
    4706              :                  load_seq[0], emit the bitwise op right after it.  */
    4707           37 :               if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
    4708            0 :                 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
    4709              :               /* Otherwise, if at least one load is in seq, we need to
    4710              :                  emit the bitwise op right before the store.  If there
    4711              :                  are two loads and are emitted somewhere else, it would
    4712              :                  be better to emit the bitwise op as early as possible;
    4713              :                  we don't track where that would be possible right now
    4714              :                  though.  */
    4715              :               else
    4716           37 :                 gimple_seq_add_stmt_without_update (&seq, stmt);
    4717           37 :               src = gimple_assign_lhs (stmt);
    4718           37 :               tree xor_mask;
    4719           37 :               enum tree_code inv_op;
    4720           37 :               inv_op = invert_op (split_store, 2, dest_type, xor_mask);
    4721           37 :               if (inv_op != NOP_EXPR)
    4722              :                 {
    4723            2 :                   stmt = gimple_build_assign (make_ssa_name (dest_type),
    4724              :                                               inv_op, src, xor_mask);
    4725            2 :                   gimple_set_location (stmt, bit_loc);
    4726            2 :                   if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
    4727            0 :                     gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
    4728              :                   else
    4729            2 :                     gimple_seq_add_stmt_without_update (&seq, stmt);
    4730            2 :                   src = gimple_assign_lhs (stmt);
    4731              :                 }
    4732              :               break;
    4733          834 :             case LROTATE_EXPR:
    4734          834 :             case NOP_EXPR:
    4735          834 :               src = ops[0];
    4736          834 :               if (!is_gimple_val (src))
    4737              :                 {
    4738            0 :                   stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
    4739              :                                               src);
    4740            0 :                   gimple_seq_add_stmt_without_update (&seq, stmt);
    4741            0 :                   src = gimple_assign_lhs (stmt);
    4742              :                 }
    4743          834 :               if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
    4744              :                 {
    4745           86 :                   stmt = gimple_build_assign (make_ssa_name (dest_type),
    4746              :                                               NOP_EXPR, src);
    4747           86 :                   gimple_seq_add_stmt_without_update (&seq, stmt);
    4748           86 :                   src = gimple_assign_lhs (stmt);
    4749              :                 }
    4750          834 :               inv_op = invert_op (split_store, 2, dest_type, xor_mask);
    4751          834 :               if (inv_op != NOP_EXPR)
    4752              :                 {
    4753            2 :                   stmt = gimple_build_assign (make_ssa_name (dest_type),
    4754              :                                               inv_op, src, xor_mask);
    4755            2 :                   gimple_set_location (stmt, loc);
    4756            2 :                   gimple_seq_add_stmt_without_update (&seq, stmt);
    4757            2 :                   src = gimple_assign_lhs (stmt);
    4758              :                 }
    4759              :               break;
    4760        90535 :             default:
    4761        90535 :               src = ops[0];
    4762        90535 :               break;
    4763              :             }
    4764              : 
    4765              :           /* If bit insertion is required, we use the source as an accumulator
    4766              :              into which the successive bit-field values are manually inserted.
    4767              :              FIXME: perhaps use BIT_INSERT_EXPR instead in some cases?  */
    4768        91406 :           if (group->bit_insertion)
    4769        24889 :             FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
    4770        19856 :               if (info->rhs_code == BIT_INSERT_EXPR
    4771        10915 :                   && info->bitpos < try_bitpos + try_size
    4772        10915 :                   && info->bitpos + info->bitsize > try_bitpos)
    4773              :                 {
    4774              :                   /* Mask, truncate, convert to final type, shift and ior into
    4775              :                      the accumulator.  Note that every step can be a no-op.  */
    4776        10915 :                   const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
    4777        10915 :                   const HOST_WIDE_INT end_gap
    4778        10915 :                     = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
    4779        10915 :                   tree tem = info->ops[0].val;
    4780        10915 :                   if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
    4781              :                     {
    4782            0 :                       const unsigned HOST_WIDE_INT size
    4783            0 :                         = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
    4784            0 :                       tree integer_type
    4785            0 :                         = build_nonstandard_integer_type (size, UNSIGNED);
    4786            0 :                       tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
    4787              :                                           integer_type, tem);
    4788              :                     }
    4789        10915 :                   if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
    4790              :                     {
    4791         7282 :                       tree bitfield_type
    4792         7282 :                         = build_nonstandard_integer_type (info->bitsize,
    4793              :                                                           UNSIGNED);
    4794         7282 :                       tem = gimple_convert (&seq, loc, bitfield_type, tem);
    4795              :                     }
    4796         3633 :                   else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
    4797              :                     {
    4798         2583 :                       wide_int imask
    4799              :                         = wi::mask (info->bitsize, false,
    4800         2583 :                                     TYPE_PRECISION (TREE_TYPE (tem)));
    4801         7749 :                       tem = gimple_build (&seq, loc,
    4802         2583 :                                           BIT_AND_EXPR, TREE_TYPE (tem), tem,
    4803         2583 :                                           wide_int_to_tree (TREE_TYPE (tem),
    4804         5166 :                                                             imask));
    4805         2583 :                     }
    4806        10915 :                   const HOST_WIDE_INT shift
    4807              :                     = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
    4808        10915 :                   if (shift < 0)
    4809            4 :                     tem = gimple_build (&seq, loc,
    4810            4 :                                         RSHIFT_EXPR, TREE_TYPE (tem), tem,
    4811            4 :                                         build_int_cst (NULL_TREE, -shift));
    4812        10915 :                   tem = gimple_convert (&seq, loc, dest_type, tem);
    4813        10915 :                   if (shift > 0)
    4814         8512 :                     tem = gimple_build (&seq, loc,
    4815              :                                         LSHIFT_EXPR, dest_type, tem,
    4816         8512 :                                         build_int_cst (NULL_TREE, shift));
    4817        10915 :                   src = gimple_build (&seq, loc,
    4818              :                                       BIT_IOR_EXPR, dest_type, tem, src);
    4819              :                 }
    4820              : 
    4821        91406 :           if (!integer_zerop (mask))
    4822              :             {
    4823         3979 :               tree tem = make_ssa_name (dest_type);
    4824         3979 :               tree load_src = unshare_expr (dest);
    4825              :               /* The load might load some or all bits uninitialized,
    4826              :                  avoid -W*uninitialized warnings in that case.
    4827              :                  As optimization, it would be nice if all the bits are
    4828              :                  provably uninitialized (no stores at all yet or previous
    4829              :                  store a CLOBBER) we'd optimize away the load and replace
    4830              :                  it e.g. with 0.  */
    4831         3979 :               suppress_warning (load_src, OPT_Wuninitialized);
    4832         3979 :               stmt = gimple_build_assign (tem, load_src);
    4833         3979 :               gimple_set_location (stmt, loc);
    4834         3979 :               gimple_set_vuse (stmt, new_vuse);
    4835         3979 :               gimple_seq_add_stmt_without_update (&seq, stmt);
    4836              : 
    4837              :               /* FIXME: If there is a single chunk of zero bits in mask,
    4838              :                  perhaps use BIT_INSERT_EXPR instead?  */
    4839         3979 :               stmt = gimple_build_assign (make_ssa_name (dest_type),
    4840              :                                           BIT_AND_EXPR, tem, mask);
    4841         3979 :               gimple_set_location (stmt, loc);
    4842         3979 :               gimple_seq_add_stmt_without_update (&seq, stmt);
    4843         3979 :               tem = gimple_assign_lhs (stmt);
    4844              : 
    4845         3979 :               if (TREE_CODE (src) == INTEGER_CST)
    4846         1080 :                 src = wide_int_to_tree (dest_type,
    4847         1080 :                                         wi::bit_and_not (wi::to_wide (src),
    4848         2160 :                                                          wi::to_wide (mask)));
    4849              :               else
    4850              :                 {
    4851         2899 :                   tree nmask
    4852         2899 :                     = wide_int_to_tree (dest_type,
    4853         2899 :                                         wi::bit_not (wi::to_wide (mask)));
    4854         2899 :                   stmt = gimple_build_assign (make_ssa_name (dest_type),
    4855              :                                               BIT_AND_EXPR, src, nmask);
    4856         2899 :                   gimple_set_location (stmt, loc);
    4857         2899 :                   gimple_seq_add_stmt_without_update (&seq, stmt);
    4858         2899 :                   src = gimple_assign_lhs (stmt);
    4859              :                 }
    4860         3979 :               stmt = gimple_build_assign (make_ssa_name (dest_type),
    4861              :                                           BIT_IOR_EXPR, tem, src);
    4862         3979 :               gimple_set_location (stmt, loc);
    4863         3979 :               gimple_seq_add_stmt_without_update (&seq, stmt);
    4864         3979 :               src = gimple_assign_lhs (stmt);
    4865              :             }
    4866              :         }
    4867              : 
    4868       294659 :       stmt = gimple_build_assign (dest, src);
    4869       294659 :       gimple_set_location (stmt, loc);
    4870       294659 :       gimple_set_vuse (stmt, new_vuse);
    4871       294659 :       gimple_seq_add_stmt_without_update (&seq, stmt);
    4872              : 
    4873       294659 :       if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
    4874           53 :         add_stmt_to_eh_lp (stmt, group->lp_nr);
    4875              : 
    4876       294659 :       tree new_vdef;
    4877       589318 :       if (i < split_stores.length () - 1)
    4878       213683 :         new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
    4879              :       else
    4880              :         new_vdef = last_vdef;
    4881              : 
    4882       294659 :       gimple_set_vdef (stmt, new_vdef);
    4883       294659 :       SSA_NAME_DEF_STMT (new_vdef) = stmt;
    4884       294659 :       new_vuse = new_vdef;
    4885              :     }
    4886              : 
    4887       375635 :   FOR_EACH_VEC_ELT (split_stores, i, split_store)
    4888       589318 :     delete split_store;
    4889              : 
    4890        80976 :   gcc_assert (seq);
    4891        80976 :   if (dump_file)
    4892              :     {
    4893          184 :       fprintf (dump_file,
    4894              :                "New sequence of %u stores to replace old one of %u stores\n",
    4895              :                split_stores.length (), orig_num_stmts);
    4896           92 :       if (dump_flags & TDF_DETAILS)
    4897           23 :         print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
    4898              :     }
    4899              : 
    4900        80976 :   if (gimple_clobber_p (group->last_stmt))
    4901            7 :     update_stmt (group->last_stmt);
    4902              : 
    4903        80976 :   if (group->lp_nr > 0)
    4904              :     {
    4905              :       /* We're going to insert a sequence of (potentially) throwing stores
    4906              :          into an active EH region.  This means that we're going to create
    4907              :          new basic blocks with EH edges pointing to the post landing pad
    4908              :          and, therefore, to have to update its PHI nodes, if any.  For the
    4909              :          virtual PHI node, we're going to use the VDEFs created above, but
    4910              :          for the other nodes, we need to record the original reaching defs.  */
    4911           46 :       eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
    4912           46 :       basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
    4913           46 :       basic_block last_bb = gimple_bb (group->last_stmt);
    4914           46 :       edge last_edge = find_edge (last_bb, lp_bb);
    4915           46 :       auto_vec<tree, 16> last_defs;
    4916           46 :       gphi_iterator gpi;
    4917           98 :       for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
    4918              :         {
    4919           52 :           gphi *phi = gpi.phi ();
    4920           52 :           tree last_def;
    4921          104 :           if (virtual_operand_p (gimple_phi_result (phi)))
    4922           46 :             last_def = NULL_TREE;
    4923              :           else
    4924            6 :             last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
    4925           52 :           last_defs.safe_push (last_def);
    4926              :         }
    4927              : 
    4928              :       /* Do the insertion.  Then, if new basic blocks have been created in the
    4929              :          process, rewind the chain of VDEFs create above to walk the new basic
    4930              :          blocks and update the corresponding arguments of the PHI nodes.  */
    4931           46 :       update_modified_stmts (seq);
    4932           46 :       if (gimple_find_sub_bbs (seq, &last_gsi))
    4933           99 :         while (last_vdef != gimple_vuse (group->last_stmt))
    4934              :           {
    4935           53 :             gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
    4936           53 :             if (stmt_could_throw_p (cfun, stmt))
    4937              :               {
    4938           53 :                 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
    4939           53 :                 unsigned int i;
    4940           53 :                 for (gpi = gsi_start_phis (lp_bb), i = 0;
    4941          112 :                      !gsi_end_p (gpi);
    4942           59 :                      gsi_next (&gpi), i++)
    4943              :                   {
    4944           59 :                     gphi *phi = gpi.phi ();
    4945           59 :                     tree new_def;
    4946          118 :                     if (virtual_operand_p (gimple_phi_result (phi)))
    4947              :                       new_def = last_vdef;
    4948              :                     else
    4949            6 :                       new_def = last_defs[i];
    4950           59 :                     add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
    4951              :                   }
    4952              :               }
    4953          152 :             last_vdef = gimple_vuse (stmt);
    4954              :           }
    4955           46 :     }
    4956              :   else
    4957        80930 :     gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
    4958              : 
    4959       242928 :   for (int j = 0; j < 2; ++j)
    4960       161952 :     if (load_seq[j])
    4961         1302 :       gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
    4962              : 
    4963        80976 :   return true;
    4964       329222 : }
    4965              : 
    4966              : /* Process the merged_store_group objects created in the coalescing phase.
    4967              :    The stores are all against the base object BASE.
    4968              :    Try to output the widened stores and delete the original statements if
    4969              :    successful.  Return true iff any changes were made.  */
    4970              : 
    4971              : bool
    4972       367582 : imm_store_chain_info::output_merged_stores ()
    4973              : {
    4974       367582 :   unsigned int i;
    4975       367582 :   merged_store_group *merged_store;
    4976       367582 :   bool ret = false;
    4977       767536 :   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
    4978              :     {
    4979       399954 :       if (dbg_cnt (store_merging)
    4980       399954 :           && output_merged_store (merged_store))
    4981              :         {
    4982              :           unsigned int j;
    4983              :           store_immediate_info *store;
    4984       901602 :           FOR_EACH_VEC_ELT (merged_store->stores, j, store)
    4985              :             {
    4986       420672 :               gimple *stmt = store->stmt;
    4987       420672 :               gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
    4988              :               /* Don't remove clobbers, they are still useful even if
    4989              :                  everything is overwritten afterwards.  */
    4990       420672 :               if (gimple_clobber_p (stmt))
    4991         2211 :                 continue;
    4992       418461 :               gsi_remove (&gsi, true);
    4993       418461 :               if (store->lp_nr)
    4994          165 :                 remove_stmt_from_eh_lp (stmt);
    4995       418461 :               if (stmt != merged_store->last_stmt)
    4996              :                 {
    4997       337492 :                   unlink_stmt_vdef (stmt);
    4998       337492 :                   release_defs (stmt);
    4999              :                 }
    5000              :             }
    5001              :           ret = true;
    5002              :         }
    5003              :     }
    5004       367582 :   if (ret && dump_file)
    5005           92 :     fprintf (dump_file, "Merging successful!\n");
    5006              : 
    5007       367582 :   return ret;
    5008              : }
    5009              : 
    5010              : /* Coalesce the store_immediate_info objects recorded against the base object
    5011              :    BASE in the first phase and output them.
    5012              :    Delete the allocated structures.
    5013              :    Return true if any changes were made.  */
    5014              : 
    5015              : bool
    5016      1915352 : imm_store_chain_info::terminate_and_process_chain ()
    5017              : {
    5018      1915352 :   if (dump_file && (dump_flags & TDF_DETAILS))
    5019           98 :     fprintf (dump_file, "Terminating chain with %u stores\n",
    5020              :              m_store_info.length ());
    5021              :   /* Process store chain.  */
    5022      1915352 :   bool ret = false;
    5023      1915352 :   if (m_store_info.length () > 1)
    5024              :     {
    5025       493956 :       ret = coalesce_immediate_stores ();
    5026       493956 :       if (ret)
    5027       367582 :         ret = output_merged_stores ();
    5028              :     }
    5029              : 
    5030              :   /* Delete all the entries we allocated ourselves.  */
    5031      1915352 :   store_immediate_info *info;
    5032      1915352 :   unsigned int i;
    5033      4966044 :   FOR_EACH_VEC_ELT (m_store_info, i, info)
    5034      3050692 :     delete info;
    5035              : 
    5036              :   merged_store_group *merged_info;
    5037      2315306 :   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
    5038       399954 :     delete merged_info;
    5039              : 
    5040      1915352 :   return ret;
    5041              : }
    5042              : 
    5043              : /* Return true iff LHS is a destination potentially interesting for
    5044              :    store merging.  In practice these are the codes that get_inner_reference
    5045              :    can process.  */
    5046              : 
    5047              : static bool
    5048      9419340 : lhs_valid_for_store_merging_p (tree lhs)
    5049              : {
    5050      9419340 :   if (DECL_P (lhs))
    5051              :     return true;
    5052              : 
    5053      7234831 :   switch (TREE_CODE (lhs))
    5054              :     {
    5055              :     case ARRAY_REF:
    5056              :     case ARRAY_RANGE_REF:
    5057              :     case BIT_FIELD_REF:
    5058              :     case COMPONENT_REF:
    5059              :     case MEM_REF:
    5060              :     case VIEW_CONVERT_EXPR:
    5061              :       return true;
    5062              :     default:
    5063              :       return false;
    5064              :     }
    5065              : }
    5066              : 
    5067              : /* Return true if the tree RHS is a constant we want to consider
    5068              :    during store merging.  In practice accept all codes that
    5069              :    native_encode_expr accepts.  */
    5070              : 
    5071              : static bool
    5072      5053350 : rhs_valid_for_store_merging_p (tree rhs)
    5073              : {
    5074      5053350 :   unsigned HOST_WIDE_INT size;
    5075      5053350 :   if (TREE_CODE (rhs) == CONSTRUCTOR
    5076       938189 :       && CONSTRUCTOR_NELTS (rhs) == 0
    5077       938189 :       && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
    5078      5991539 :       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
    5079              :     return true;
    5080      8230322 :   return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
    5081      4115161 :           && native_encode_expr (rhs, NULL, size) != 0);
    5082              : }
    5083              : 
    5084              : /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
    5085              :    and return true on success or false on failure.  */
    5086              : 
    5087              : static bool
    5088      1068705 : adjust_bit_pos (poly_offset_int byte_off,
    5089              :                 poly_int64  *pbitpos,
    5090              :                 poly_uint64 *pbitregion_start,
    5091              :                 poly_uint64 *pbitregion_end)
    5092              : {
    5093      1068705 :   poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
    5094      1068705 :   bit_off += *pbitpos;
    5095              : 
    5096      1068705 :   if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
    5097              :     {
    5098      1057477 :       if (maybe_ne (*pbitregion_end, 0U))
    5099              :         {
    5100         7384 :           bit_off = byte_off << LOG2_BITS_PER_UNIT;
    5101         7384 :           bit_off += *pbitregion_start;
    5102         7384 :           if (bit_off.to_uhwi (pbitregion_start))
    5103              :             {
    5104         7384 :               bit_off = byte_off << LOG2_BITS_PER_UNIT;
    5105         7384 :               bit_off += *pbitregion_end;
    5106         7384 :               if (!bit_off.to_uhwi (pbitregion_end))
    5107            0 :                 *pbitregion_end = 0;
    5108              :             }
    5109              :           else
    5110            0 :             *pbitregion_end = 0;
    5111              :         }
    5112      1057477 :       return true;
    5113              :     }
    5114              :   else
    5115        11228 :     return false;
    5116              : }
    5117              : 
    5118              : /* If MEM is a memory reference usable for store merging (either as
    5119              :    store destination or for loads), return the non-NULL base_addr
    5120              :    and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
    5121              :    Otherwise return NULL, *PBITPOS should be still valid even for that
    5122              :    case.  */
    5123              : 
    5124              : static tree
    5125      5611502 : mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
    5126              :                              poly_uint64 *pbitpos,
    5127              :                              poly_uint64 *pbitregion_start,
    5128              :                              poly_uint64 *pbitregion_end)
    5129              : {
    5130      5611502 :   poly_int64 bitsize, bitpos;
    5131      5611502 :   poly_uint64 bitregion_start = 0, bitregion_end = 0;
    5132      5611502 :   machine_mode mode;
    5133      5611502 :   int unsignedp = 0, reversep = 0, volatilep = 0;
    5134      5611502 :   tree offset;
    5135      5611502 :   tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
    5136              :                                         &unsignedp, &reversep, &volatilep);
    5137      5611502 :   *pbitsize = bitsize;
    5138      5611502 :   if (known_le (bitsize, 0))
    5139              :     return NULL_TREE;
    5140              : 
    5141      5610306 :   if (TREE_CODE (mem) == COMPONENT_REF
    5142      5610306 :       && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
    5143              :     {
    5144        52916 :       get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
    5145        52916 :       if (maybe_ne (bitregion_end, 0U))
    5146        52916 :         bitregion_end += 1;
    5147              :     }
    5148              : 
    5149      5610306 :   if (reversep)
    5150              :     return NULL_TREE;
    5151              : 
    5152              :   /* We do not want to rewrite TARGET_MEM_REFs.  */
    5153      5609812 :   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
    5154              :     return NULL_TREE;
    5155              :   /* In some cases get_inner_reference may return a
    5156              :      MEM_REF [ptr + byteoffset].  For the purposes of this pass
    5157              :      canonicalize the base_addr to MEM_REF [ptr] and take
    5158              :      byteoffset into account in the bitpos.  This occurs in
    5159              :      PR 23684 and this way we can catch more chains.  */
    5160      5561150 :   else if (TREE_CODE (base_addr) == MEM_REF)
    5161              :     {
    5162      1065211 :       if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
    5163              :                            &bitregion_start, &bitregion_end))
    5164              :         return NULL_TREE;
    5165      1053983 :       base_addr = TREE_OPERAND (base_addr, 0);
    5166              :     }
    5167              :   /* get_inner_reference returns the base object, get at its
    5168              :      address now.  */
    5169              :   else
    5170              :     {
    5171      4495939 :       if (maybe_lt (bitpos, 0))
    5172              :         return NULL_TREE;
    5173      4495786 :       base_addr = build_fold_addr_expr (base_addr);
    5174              :     }
    5175              : 
    5176      5549769 :   if (offset)
    5177              :     {
    5178              :       /* If the access is variable offset then a base decl has to be
    5179              :          address-taken to be able to emit pointer-based stores to it.
    5180              :          ???  We might be able to get away with re-using the original
    5181              :          base up to the first variable part and then wrapping that inside
    5182              :          a BIT_FIELD_REF.  */
    5183        33710 :       tree base = get_base_address (base_addr);
    5184        33710 :       if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
    5185              :         return NULL_TREE;
    5186              : 
    5187              :       /* Similarly to above for the base, remove constant from the offset.  */
    5188        33710 :       if (TREE_CODE (offset) == PLUS_EXPR
    5189         3550 :           && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
    5190        37260 :           && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
    5191              :                              &bitpos, &bitregion_start, &bitregion_end))
    5192         3494 :         offset = TREE_OPERAND (offset, 0);
    5193              : 
    5194        33710 :       base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
    5195              :                           base_addr, offset);
    5196              :     }
    5197              : 
    5198      5549769 :   if (known_eq (bitregion_end, 0U))
    5199              :     {
    5200      5497278 :       bitregion_start = round_down_to_byte_boundary (bitpos);
    5201      5497278 :       bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
    5202              :     }
    5203              : 
    5204      5549769 :   *pbitsize = bitsize;
    5205      5549769 :   *pbitpos = bitpos;
    5206      5549769 :   *pbitregion_start = bitregion_start;
    5207      5549769 :   *pbitregion_end = bitregion_end;
    5208      5549769 :   return base_addr;
    5209              : }
    5210              : 
    5211              : /* Return true if STMT is a load that can be used for store merging.
    5212              :    In that case fill in *OP.  BITSIZE, BITPOS, BITREGION_START and
    5213              :    BITREGION_END are properties of the corresponding store.  */
    5214              : 
    5215              : static bool
    5216      1036666 : handled_load (gimple *stmt, store_operand_info *op,
    5217              :               poly_uint64 bitsize, poly_uint64 bitpos,
    5218              :               poly_uint64 bitregion_start, poly_uint64 bitregion_end)
    5219              : {
    5220      1036666 :   if (!is_gimple_assign (stmt))
    5221              :     return false;
    5222      1036562 :   if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
    5223              :     {
    5224         1510 :       tree rhs1 = gimple_assign_rhs1 (stmt);
    5225         1510 :       if (TREE_CODE (rhs1) == SSA_NAME
    5226         1510 :           && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
    5227              :                            bitregion_start, bitregion_end))
    5228              :         {
    5229              :           /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
    5230              :              been optimized earlier, but if allowed here, would confuse the
    5231              :              multiple uses counting.  */
    5232          782 :           if (op->bit_not_p)
    5233              :             return false;
    5234          782 :           op->bit_not_p = !op->bit_not_p;
    5235          782 :           return true;
    5236              :         }
    5237          728 :       return false;
    5238              :     }
    5239      1035052 :   if (gimple_vuse (stmt)
    5240       543163 :       && gimple_assign_load_p (stmt)
    5241       543130 :       && !stmt_can_throw_internal (cfun, stmt)
    5242      1574873 :       && !gimple_has_volatile_ops (stmt))
    5243              :     {
    5244       539523 :       tree mem = gimple_assign_rhs1 (stmt);
    5245       539523 :       op->base_addr
    5246       539523 :         = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
    5247              :                                        &op->bitregion_start,
    5248              :                                        &op->bitregion_end);
    5249       539523 :       if (op->base_addr != NULL_TREE
    5250       487343 :           && known_eq (op->bitsize, bitsize)
    5251       974659 :           && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
    5252       487326 :           && known_ge (op->bitpos - op->bitregion_start,
    5253              :                        bitpos - bitregion_start)
    5254      1026615 :           && known_ge (op->bitregion_end - op->bitpos,
    5255              :                        bitregion_end - bitpos))
    5256              :         {
    5257       487037 :           op->stmt = stmt;
    5258       487037 :           op->val = mem;
    5259       487037 :           op->bit_not_p = false;
    5260       487037 :           return true;
    5261              :         }
    5262              :     }
    5263              :   return false;
    5264              : }
    5265              : 
    5266              : /* Return the index number of the landing pad for STMT, if any.  */
    5267              : 
    5268              : static int
    5269      3050692 : lp_nr_for_store (gimple *stmt)
    5270              : {
    5271      3050692 :   if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
    5272              :     return 0;
    5273              : 
    5274       800503 :   if (!stmt_could_throw_p (cfun, stmt))
    5275              :     return 0;
    5276              : 
    5277        78144 :   return lookup_stmt_eh_lp (stmt);
    5278              : }
    5279              : 
    5280              : /* Record the store STMT for store merging optimization if it can be
    5281              :    optimized.  Return true if any changes were made.  */
    5282              : 
    5283              : bool
    5284      5071979 : pass_store_merging::process_store (gimple *stmt)
    5285              : {
    5286      5071979 :   tree lhs = gimple_assign_lhs (stmt);
    5287      5071979 :   tree rhs = gimple_assign_rhs1 (stmt);
    5288      5071979 :   poly_uint64 bitsize, bitpos = 0;
    5289      5071979 :   poly_uint64 bitregion_start = 0, bitregion_end = 0;
    5290      5071979 :   tree base_addr
    5291      5071979 :     = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
    5292      5071979 :                                    &bitregion_start, &bitregion_end);
    5293      5071979 :   if (known_eq (bitsize, 0U))
    5294              :     return false;
    5295              : 
    5296      5070789 :   bool invalid = (base_addr == NULL_TREE
    5297      5070789 :                   || (maybe_gt (bitsize,
    5298              :                                 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
    5299       156936 :                       && TREE_CODE (rhs) != INTEGER_CST
    5300       156936 :                       && (TREE_CODE (rhs) != CONSTRUCTOR
    5301       143737 :                           || CONSTRUCTOR_NELTS (rhs) != 0)));
    5302      5070789 :   enum tree_code rhs_code = ERROR_MARK;
    5303      5070789 :   bool bit_not_p = false;
    5304      5070789 :   struct symbolic_number n;
    5305      5070789 :   gimple *ins_stmt = NULL;
    5306     15212367 :   store_operand_info ops[2];
    5307      5070789 :   if (invalid)
    5308              :     ;
    5309      5049227 :   else if (TREE_CODE (rhs) == STRING_CST)
    5310              :     {
    5311         4701 :       rhs_code = STRING_CST;
    5312         4701 :       ops[0].val = rhs;
    5313              :     }
    5314      5044526 :   else if (rhs_valid_for_store_merging_p (rhs))
    5315              :     {
    5316      2484116 :       rhs_code = INTEGER_CST;
    5317      2484116 :       ops[0].val = rhs;
    5318              :     }
    5319      2560410 :   else if (TREE_CODE (rhs) == SSA_NAME)
    5320              :     {
    5321      1660040 :       gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
    5322      1660040 :       if (!is_gimple_assign (def_stmt))
    5323              :         invalid = true;
    5324      1015093 :       else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
    5325              :                              bitregion_start, bitregion_end))
    5326              :         rhs_code = MEM_REF;
    5327       540348 :       else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
    5328              :         {
    5329          606 :           tree rhs1 = gimple_assign_rhs1 (def_stmt);
    5330          606 :           if (TREE_CODE (rhs1) == SSA_NAME
    5331          606 :               && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
    5332              :             {
    5333              :               bit_not_p = true;
    5334              :               def_stmt = SSA_NAME_DEF_STMT (rhs1);
    5335              :             }
    5336              :         }
    5337              : 
    5338      1660040 :       if (rhs_code == ERROR_MARK && !invalid)
    5339       540348 :         switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
    5340              :           {
    5341        16417 :           case BIT_AND_EXPR:
    5342        16417 :           case BIT_IOR_EXPR:
    5343        16417 :           case BIT_XOR_EXPR:
    5344        16417 :             tree rhs1, rhs2;
    5345        16417 :             rhs1 = gimple_assign_rhs1 (def_stmt);
    5346        16417 :             rhs2 = gimple_assign_rhs2 (def_stmt);
    5347        16417 :             invalid = true;
    5348        16417 :             if (TREE_CODE (rhs1) != SSA_NAME)
    5349              :               break;
    5350        16417 :             def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
    5351        16417 :             if (!is_gimple_assign (def_stmt1)
    5352        16417 :                 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
    5353              :                                   bitregion_start, bitregion_end))
    5354              :               break;
    5355         8824 :             if (rhs_valid_for_store_merging_p (rhs2))
    5356         4130 :               ops[1].val = rhs2;
    5357         4694 :             else if (TREE_CODE (rhs2) != SSA_NAME)
    5358              :               break;
    5359              :             else
    5360              :               {
    5361         4694 :                 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
    5362         4694 :                 if (!is_gimple_assign (def_stmt2))
    5363              :                   break;
    5364         4587 :                 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
    5365              :                                         bitregion_start, bitregion_end))
    5366              :                   break;
    5367              :               }
    5368              :             invalid = false;
    5369              :             break;
    5370              :           default:
    5371              :             invalid = true;
    5372              :             break;
    5373              :           }
    5374              : 
    5375      1660040 :       unsigned HOST_WIDE_INT const_bitsize;
    5376      1660040 :       if (bitsize.is_constant (&const_bitsize)
    5377      1660040 :           && (const_bitsize % BITS_PER_UNIT) == 0
    5378      1643013 :           && const_bitsize <= 64
    5379      1473953 :           && multiple_p (bitpos, BITS_PER_UNIT))
    5380              :         {
    5381      1473685 :           ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
    5382      1473685 :           if (ins_stmt)
    5383              :             {
    5384       474836 :               uint64_t nn = n.n;
    5385       474836 :               for (unsigned HOST_WIDE_INT i = 0;
    5386      3352523 :                    i < const_bitsize;
    5387      2877687 :                    i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
    5388      2889276 :                 if ((nn & MARKER_MASK) == 0
    5389      2889276 :                     || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
    5390              :                   {
    5391              :                     ins_stmt = NULL;
    5392              :                     break;
    5393              :                   }
    5394       474836 :               if (ins_stmt)
    5395              :                 {
    5396       463247 :                   if (invalid)
    5397              :                     {
    5398        63883 :                       rhs_code = LROTATE_EXPR;
    5399        63883 :                       ops[0].base_addr = NULL_TREE;
    5400        63883 :                       ops[1].base_addr = NULL_TREE;
    5401              :                     }
    5402              :                   invalid = false;
    5403              :                 }
    5404              :             }
    5405              :         }
    5406              : 
    5407      1196793 :       if (invalid
    5408      1113814 :           && bitsize.is_constant (&const_bitsize)
    5409      1113814 :           && ((const_bitsize % BITS_PER_UNIT) != 0
    5410      1098119 :               || !multiple_p (bitpos, BITS_PER_UNIT))
    5411      1228717 :           && const_bitsize <= MAX_FIXED_MODE_SIZE)
    5412              :         {
    5413              :           /* Bypass a conversion to the bit-field type.  */
    5414        15962 :           if (!bit_not_p
    5415        15923 :               && is_gimple_assign (def_stmt)
    5416        24174 :               && CONVERT_EXPR_CODE_P (rhs_code))
    5417              :             {
    5418         6045 :               tree rhs1 = gimple_assign_rhs1 (def_stmt);
    5419         6045 :               if (TREE_CODE (rhs1) == SSA_NAME
    5420         6045 :                   && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
    5421              :                 rhs = rhs1;
    5422              :             }
    5423        15962 :           rhs_code = BIT_INSERT_EXPR;
    5424        15962 :           bit_not_p = false;
    5425        15962 :           ops[0].val = rhs;
    5426        15962 :           ops[0].base_addr = NULL_TREE;
    5427        15962 :           ops[1].base_addr = NULL_TREE;
    5428        15962 :           invalid = false;
    5429              :         }
    5430              :     }
    5431              :   else
    5432              :     invalid = true;
    5433              : 
    5434      4148857 :   unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
    5435      4148857 :   unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
    5436        15962 :   if (invalid
    5437      3051005 :       || !bitsize.is_constant (&const_bitsize)
    5438      3051005 :       || !bitpos.is_constant (&const_bitpos)
    5439      3051005 :       || !bitregion_start.is_constant (&const_bitregion_start)
    5440      3051005 :       || !bitregion_end.is_constant (&const_bitregion_end)
    5441      4132895 :       || ((const_bitregion_end - const_bitregion_start + 1) / BITS_PER_UNIT
    5442      3051005 :           > (unsigned) param_store_merging_max_size))
    5443      2020097 :     return terminate_all_aliasing_chains (NULL, stmt);
    5444              : 
    5445      3050692 :   if (!ins_stmt)
    5446      2587454 :     memset (&n, 0, sizeof (n));
    5447              : 
    5448      3050692 :   class imm_store_chain_info **chain_info = NULL;
    5449      3050692 :   bool ret = false;
    5450      3050692 :   if (base_addr)
    5451      3050692 :     chain_info = m_stores.get (base_addr);
    5452              : 
    5453      3050692 :   store_immediate_info *info;
    5454      3050692 :   if (chain_info)
    5455              :     {
    5456      1135340 :       unsigned int ord = (*chain_info)->m_store_info.length ();
    5457      2270680 :       info = new store_immediate_info (const_bitsize, const_bitpos,
    5458              :                                        const_bitregion_start,
    5459              :                                        const_bitregion_end,
    5460              :                                        stmt, ord, rhs_code, n, ins_stmt,
    5461              :                                        bit_not_p, lp_nr_for_store (stmt),
    5462      1135340 :                                        ops[0], ops[1]);
    5463      1135340 :       if (dump_file && (dump_flags & TDF_DETAILS))
    5464              :         {
    5465          202 :           fprintf (dump_file, "Recording immediate store from stmt:\n");
    5466          202 :           print_gimple_stmt (dump_file, stmt, 0);
    5467              :         }
    5468      1135340 :       (*chain_info)->m_store_info.safe_push (info);
    5469      1135340 :       m_n_stores++;
    5470      1135340 :       ret |= terminate_all_aliasing_chains (chain_info, stmt);
    5471              :       /* If we reach the limit of stores to merge in a chain terminate and
    5472              :          process the chain now.  */
    5473      1135340 :       if ((*chain_info)->m_store_info.length ()
    5474      1135340 :           == (unsigned int) param_max_stores_to_merge)
    5475              :         {
    5476          458 :           if (dump_file && (dump_flags & TDF_DETAILS))
    5477            0 :             fprintf (dump_file,
    5478              :                      "Reached maximum number of statements to merge:\n");
    5479          458 :           ret |= terminate_and_process_chain (*chain_info);
    5480              :         }
    5481              :     }
    5482              :   else
    5483              :     {
    5484              :       /* Store aliases any existing chain?  */
    5485      1915352 :       ret |= terminate_all_aliasing_chains (NULL, stmt);
    5486              : 
    5487              :       /* Start a new chain.  */
    5488      1915352 :       class imm_store_chain_info *new_chain
    5489      1915352 :           = new imm_store_chain_info (m_stores_head, base_addr);
    5490      3830704 :       info = new store_immediate_info (const_bitsize, const_bitpos,
    5491              :                                        const_bitregion_start,
    5492              :                                        const_bitregion_end,
    5493              :                                        stmt, 0, rhs_code, n, ins_stmt,
    5494              :                                        bit_not_p, lp_nr_for_store (stmt),
    5495      1915352 :                                        ops[0], ops[1]);
    5496      1915352 :       new_chain->m_store_info.safe_push (info);
    5497      1915352 :       m_n_stores++;
    5498      1915352 :       m_stores.put (base_addr, new_chain);
    5499      1915352 :       m_n_chains++;
    5500      1915352 :       if (dump_file && (dump_flags & TDF_DETAILS))
    5501              :         {
    5502           49 :           fprintf (dump_file, "Starting active chain number %u with statement:\n",
    5503              :                    m_n_chains);
    5504           49 :           print_gimple_stmt (dump_file, stmt, 0);
    5505           49 :           fprintf (dump_file, "The base object is:\n");
    5506           49 :           print_generic_expr (dump_file, base_addr);
    5507           49 :           fprintf (dump_file, "\n");
    5508              :         }
    5509              :     }
    5510              : 
    5511              :   /* Prune oldest chains so that after adding the chain or store above
    5512              :      we're again within the limits set by the params.  */
    5513      3050692 :   if (m_n_chains > (unsigned)param_max_store_chains_to_track
    5514      3048671 :       || m_n_stores > (unsigned)param_max_stores_to_track)
    5515              :     {
    5516         2025 :       if (dump_file && (dump_flags & TDF_DETAILS))
    5517            0 :         fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
    5518              :                  "terminating oldest chain(s).\n", m_n_chains,
    5519              :                  param_max_store_chains_to_track, m_n_stores,
    5520              :                  param_max_stores_to_track);
    5521         2025 :       imm_store_chain_info **e = &m_stores_head;
    5522         2025 :       unsigned idx = 0;
    5523         2025 :       unsigned n_stores = 0;
    5524       133144 :       while (*e)
    5525              :         {
    5526       131119 :           if (idx >= (unsigned)param_max_store_chains_to_track
    5527       131119 :               || (n_stores + (*e)->m_store_info.length ()
    5528       129098 :                   > (unsigned)param_max_stores_to_track))
    5529         2025 :             ret |= terminate_and_process_chain (*e);
    5530              :           else
    5531              :             {
    5532       129094 :               n_stores += (*e)->m_store_info.length ();
    5533       129094 :               e = &(*e)->next;
    5534       129094 :               ++idx;
    5535              :             }
    5536              :         }
    5537              :     }
    5538              : 
    5539              :   return ret;
    5540              : }
    5541              : 
    5542              : /* Return true if STMT is a store valid for store merging.  */
    5543              : 
    5544              : static bool
    5545     36025323 : store_valid_for_store_merging_p (gimple *stmt)
    5546              : {
    5547     36025323 :   return gimple_assign_single_p (stmt)
    5548     16450198 :          && gimple_vdef (stmt)
    5549      9419340 :          && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
    5550     45178055 :          && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
    5551              : }
    5552              : 
    5553              : enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
    5554              : 
    5555              : /* Return the status of basic block BB wrt store merging.  */
    5556              : 
    5557              : static enum basic_block_status
    5558      8944513 : get_status_for_store_merging (basic_block bb)
    5559              : {
    5560      8944513 :   unsigned int num_statements = 0;
    5561      8944513 :   unsigned int num_constructors = 0;
    5562      8944513 :   gimple_stmt_iterator gsi;
    5563      8944513 :   edge e;
    5564      8944513 :   gimple *last_stmt = NULL;
    5565              : 
    5566     73055390 :   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    5567              :     {
    5568     65236510 :       gimple *stmt = gsi_stmt (gsi);
    5569              : 
    5570     65236510 :       if (is_gimple_debug (stmt))
    5571     39942772 :         continue;
    5572              : 
    5573     25293738 :       last_stmt = stmt;
    5574              : 
    5575     25293738 :       if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
    5576              :         break;
    5577              : 
    5578     24185295 :       if (is_gimple_assign (stmt)
    5579     24185295 :           && (gimple_assign_rhs_code (stmt) == CONSTRUCTOR
    5580     15015589 :               || gimple_assign_rhs_code (stmt) == VEC_PACK_TRUNC_EXPR))
    5581              :         {
    5582       629665 :           tree rhs = gimple_assign_rhs1 (stmt);
    5583       629665 :           if (VECTOR_TYPE_P (TREE_TYPE (rhs))
    5584       125123 :               && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
    5585       742986 :               && gimple_assign_lhs (stmt) != NULL_TREE)
    5586              :             {
    5587       113321 :               HOST_WIDE_INT sz
    5588       113321 :                 = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
    5589       113321 :               if (sz == 16 || sz == 32 || sz == 64)
    5590              :                 {
    5591              :                   num_constructors = 1;
    5592              :                   break;
    5593              :                 }
    5594              :             }
    5595              :         }
    5596              :     }
    5597              : 
    5598      8944513 :   if (num_statements == 0 && num_constructors == 0)
    5599              :     return BB_INVALID;
    5600              : 
    5601       872542 :   if (cfun->can_throw_non_call_exceptions && cfun->eh
    5602       872542 :       && store_valid_for_store_merging_p (last_stmt)
    5603       668270 :       && (e = find_fallthru_edge (bb->succs))
    5604      2624031 :       && e->dest == bb->next_bb)
    5605              :     return BB_EXTENDED_VALID;
    5606              : 
    5607      2073316 :   return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
    5608              : }
    5609              : 
    5610              : /* Entry point for the pass.  Go over each basic block recording chains of
    5611              :    immediate stores.  Upon encountering a terminating statement (as defined
    5612              :    by stmt_terminates_chain_p) process the recorded stores and emit the widened
    5613              :    variants.  */
    5614              : 
    5615              : unsigned int
    5616       960612 : pass_store_merging::execute (function *fun)
    5617              : {
    5618       960612 :   basic_block bb;
    5619       960612 :   hash_set<gimple *> orig_stmts;
    5620       960612 :   bool changed = false, open_chains = false;
    5621              : 
    5622              :   /* If the function can throw and catch non-call exceptions, we'll be trying
    5623              :      to merge stores across different basic blocks so we need to first unsplit
    5624              :      the EH edges in order to streamline the CFG of the function.  */
    5625       960612 :   if (cfun->can_throw_non_call_exceptions && cfun->eh)
    5626       240956 :     unsplit_eh_edges ();
    5627              : 
    5628       960612 :   calculate_dominance_info (CDI_DOMINATORS);
    5629              : 
    5630      9905125 :   FOR_EACH_BB_FN (bb, fun)
    5631              :     {
    5632      8944513 :       const basic_block_status bb_status = get_status_for_store_merging (bb);
    5633      8944513 :       gimple_stmt_iterator gsi;
    5634              : 
    5635      8983806 :       if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
    5636              :         {
    5637       119879 :           changed |= terminate_and_process_all_chains ();
    5638       119879 :           open_chains = false;
    5639              :         }
    5640              : 
    5641      8944513 :       if (bb_status == BB_INVALID)
    5642      7772673 :         continue;
    5643              : 
    5644      1171840 :       if (dump_file && (dump_flags & TDF_DETAILS))
    5645           23 :         fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
    5646              : 
    5647     29652378 :       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
    5648              :         {
    5649     28480538 :           gimple *stmt = gsi_stmt (gsi);
    5650     28480538 :           gsi_next (&gsi);
    5651              : 
    5652     28480538 :           if (is_gimple_debug (stmt))
    5653     18609784 :             continue;
    5654              : 
    5655     19280994 :           if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
    5656              :             {
    5657              :               /* Terminate all chains.  */
    5658         9498 :               if (dump_file && (dump_flags & TDF_DETAILS))
    5659            1 :                 fprintf (dump_file, "Volatile access terminates "
    5660              :                                     "all chains\n");
    5661         9498 :               changed |= terminate_and_process_all_chains ();
    5662         9498 :               open_chains = false;
    5663         9498 :               continue;
    5664              :             }
    5665              : 
    5666      9861256 :           if (is_gimple_assign (stmt)
    5667      7943678 :               && (gimple_assign_rhs_code (stmt) == CONSTRUCTOR
    5668      6916896 :                   || gimple_assign_rhs_code (stmt) == VEC_PACK_TRUNC_EXPR)
    5669     10890760 :               && maybe_optimize_vector_constructor (stmt))
    5670         2213 :             continue;
    5671              : 
    5672      9859043 :           if (store_valid_for_store_merging_p (stmt))
    5673      5071979 :             changed |= process_store (stmt);
    5674              :           else
    5675      4787064 :             changed |= terminate_all_aliasing_chains (NULL, stmt);
    5676              :         }
    5677              : 
    5678      1171840 :       if (bb_status == BB_EXTENDED_VALID)
    5679              :         open_chains = true;
    5680              :       else
    5681              :         {
    5682      1034332 :           changed |= terminate_and_process_all_chains ();
    5683      1034332 :           open_chains = false;
    5684              :         }
    5685              :     }
    5686              : 
    5687       960612 :   if (open_chains)
    5688            0 :     changed |= terminate_and_process_all_chains ();
    5689              : 
    5690              :   /* If the function can throw and catch non-call exceptions and something
    5691              :      changed during the pass, then the CFG has (very likely) changed too.  */
    5692       960612 :   if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
    5693              :     {
    5694         1019 :       free_dominance_info (CDI_DOMINATORS);
    5695         1019 :       return TODO_cleanup_cfg;
    5696              :     }
    5697              : 
    5698              :   return 0;
    5699       960612 : }
    5700              : 
    5701              : } // anon namespace
    5702              : 
    5703              : /* Construct and return a store merging pass object.  */
    5704              : 
    5705              : gimple_opt_pass *
    5706       287872 : make_pass_store_merging (gcc::context *ctxt)
    5707              : {
    5708       287872 :   return new pass_store_merging (ctxt);
    5709              : }
    5710              : 
    5711              : #if CHECKING_P
    5712              : 
    5713              : namespace selftest {
    5714              : 
    5715              : /* Selftests for store merging helpers.  */
    5716              : 
    5717              : /* Assert that all elements of the byte arrays X and Y, both of length N
    5718              :    are equal.  */
    5719              : 
    5720              : static void
    5721           32 : verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
    5722              : {
    5723          112 :   for (unsigned int i = 0; i < n; i++)
    5724              :     {
    5725           80 :       if (x[i] != y[i])
    5726              :         {
    5727            0 :           fprintf (stderr, "Arrays do not match.  X:\n");
    5728            0 :           dump_char_array (stderr, x, n);
    5729            0 :           fprintf (stderr, "Y:\n");
    5730            0 :           dump_char_array (stderr, y, n);
    5731              :         }
    5732           80 :       ASSERT_EQ (x[i], y[i]);
    5733              :     }
    5734           32 : }
    5735              : 
    5736              : /* Test shift_bytes_in_array_left and that it carries bits across between
    5737              :    bytes correctly.  */
    5738              : 
    5739              : static void
    5740            4 : verify_shift_bytes_in_array_left (void)
    5741              : {
    5742              :    /* byte 1   | byte 0
    5743              :       00011111 | 11100000.  */
    5744            4 :   unsigned char orig[2] = { 0xe0, 0x1f };
    5745            4 :   unsigned char in[2];
    5746            4 :   memcpy (in, orig, sizeof orig);
    5747              : 
    5748            4 :   unsigned char expected[2] = { 0x80, 0x7f };
    5749            4 :   shift_bytes_in_array_left (in, sizeof (in), 2);
    5750            4 :   verify_array_eq (in, expected, sizeof (in));
    5751              : 
    5752            4 :   memcpy (in, orig, sizeof orig);
    5753            4 :   memcpy (expected, orig, sizeof orig);
    5754              :   /* Check that shifting by zero doesn't change anything.  */
    5755            4 :   shift_bytes_in_array_left (in, sizeof (in), 0);
    5756            4 :   verify_array_eq (in, expected, sizeof (in));
    5757              : 
    5758            4 : }
    5759              : 
    5760              : /* Test shift_bytes_in_array_right and that it carries bits across between
    5761              :    bytes correctly.  */
    5762              : 
    5763              : static void
    5764            4 : verify_shift_bytes_in_array_right (void)
    5765              : {
    5766              :    /* byte 1   | byte 0
    5767              :       00011111 | 11100000.  */
    5768            4 :   unsigned char orig[2] = { 0x1f, 0xe0};
    5769            4 :   unsigned char in[2];
    5770            4 :   memcpy (in, orig, sizeof orig);
    5771            4 :   unsigned char expected[2] = { 0x07, 0xf8};
    5772            4 :   shift_bytes_in_array_right (in, sizeof (in), 2);
    5773            4 :   verify_array_eq (in, expected, sizeof (in));
    5774              : 
    5775            4 :   memcpy (in, orig, sizeof orig);
    5776            4 :   memcpy (expected, orig, sizeof orig);
    5777              :   /* Check that shifting by zero doesn't change anything.  */
    5778            4 :   shift_bytes_in_array_right (in, sizeof (in), 0);
    5779            4 :   verify_array_eq (in, expected, sizeof (in));
    5780            4 : }
    5781              : 
    5782              : /* Test clear_bit_region that it clears exactly the bits asked and
    5783              :    nothing more.  */
    5784              : 
    5785              : static void
    5786            4 : verify_clear_bit_region (void)
    5787              : {
    5788              :   /* Start with all bits set and test clearing various patterns in them.  */
    5789            4 :   unsigned char orig[3] = { 0xff, 0xff, 0xff};
    5790            4 :   unsigned char in[3];
    5791            4 :   unsigned char expected[3];
    5792            4 :   memcpy (in, orig, sizeof in);
    5793              : 
    5794              :   /* Check zeroing out all the bits.  */
    5795            4 :   clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
    5796            4 :   expected[0] = expected[1] = expected[2] = 0;
    5797            4 :   verify_array_eq (in, expected, sizeof in);
    5798              : 
    5799            4 :   memcpy (in, orig, sizeof in);
    5800              :   /* Leave the first and last bits intact.  */
    5801            4 :   clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
    5802            4 :   expected[0] = 0x1;
    5803            4 :   expected[1] = 0;
    5804            4 :   expected[2] = 0x80;
    5805            4 :   verify_array_eq (in, expected, sizeof in);
    5806            4 : }
    5807              : 
    5808              : /* Test clear_bit_region_be that it clears exactly the bits asked and
    5809              :    nothing more.  */
    5810              : 
    5811              : static void
    5812            4 : verify_clear_bit_region_be (void)
    5813              : {
    5814              :   /* Start with all bits set and test clearing various patterns in them.  */
    5815            4 :   unsigned char orig[3] = { 0xff, 0xff, 0xff};
    5816            4 :   unsigned char in[3];
    5817            4 :   unsigned char expected[3];
    5818            4 :   memcpy (in, orig, sizeof in);
    5819              : 
    5820              :   /* Check zeroing out all the bits.  */
    5821            4 :   clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
    5822            4 :   expected[0] = expected[1] = expected[2] = 0;
    5823            4 :   verify_array_eq (in, expected, sizeof in);
    5824              : 
    5825            4 :   memcpy (in, orig, sizeof in);
    5826              :   /* Leave the first and last bits intact.  */
    5827            4 :   clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
    5828            4 :   expected[0] = 0x80;
    5829            4 :   expected[1] = 0;
    5830            4 :   expected[2] = 0x1;
    5831            4 :   verify_array_eq (in, expected, sizeof in);
    5832            4 : }
    5833              : 
    5834              : 
    5835              : /* Run all of the selftests within this file.  */
    5836              : 
    5837              : void
    5838            4 : store_merging_cc_tests (void)
    5839              : {
    5840            4 :   verify_shift_bytes_in_array_left ();
    5841            4 :   verify_shift_bytes_in_array_right ();
    5842            4 :   verify_clear_bit_region ();
    5843            4 :   verify_clear_bit_region_be ();
    5844            4 : }
    5845              : 
    5846              : } // namespace selftest
    5847              : #endif /* CHECKING_P.  */
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.