#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "gimple.h"
#include "builtins.h"
#include "fold-const.h"
#include "tree-pass.h"
#include "ssa.h"
#include "gimple-pretty-print.h"
#include "alias.h"
#include "print-tree.h"
#include "tree-hash-traits.h"
#include "gimple-iterator.h"
#include "gimplify.h"
#include "gimple-fold.h"
#include "stor-layout.h"
#include "timevar.h"
#include "cfganal.h"
#include "cfgcleanup.h"
#include "tree-cfg.h"
#include "except.h"
#include "tree-eh.h"
#include "target.h"
#include "gimplify-me.h"
#include "rtl.h"
#include "expr.h"
#include "optabs-tree.h"
#include "dbgcnt.h"
#include "selftest.h"

Include dependency graph for gimple-ssa-store-merging.cc:

Macros
#define	MAX_STORE_BITSIZE (BITS_PER_WORD)

#define	MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)

#define	MAX_STORE_ALIAS_CHECKS 64

#define	BITS_PER_MARKER 8

#define	MARKER_MASK ((1 << BITS_PER_MARKER) - 1)

#define	MARKER_BYTE_UNKNOWN MARKER_MASK

#define	HEAD_MARKER(n, size) ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))

#define	CMPNOP

#define	CMPXCHG

Functions
gimple_opt_pass *	make_pass_optimize_bswap (gcc::context *ctxt)

gimple_opt_pass *	make_pass_store_merging (gcc::context *ctxt)

Macro Definition Documentation

◆ BITS_PER_MARKER

#define BITS_PER_MARKER 8

◆ CMPNOP

#define CMPNOP

Value:

(sizeof (int64_t) < 8 ? 0 : \

(uint64_t)0x08070605 << 32 | 0x04030201)

ggc_alloc

T * ggc_alloc(ALONE_CXX_MEM_STAT_INFO)

Definition ggc.h:184

The number which the find_bswap_or_nop_1 result should match in
order to have a nop.  The number is masked according to the size of
the symbolic number before using it.

◆ CMPXCHG

#define CMPXCHG

Value:

(sizeof (int64_t) < 8 ? 0 : \

(uint64_t)0x01020304 << 32 | 0x05060708)

The number which the find_bswap_or_nop_1 result should match in
order to have a byte swap.  The number is masked according to the
size of the symbolic number before using it.

◆ HEAD_MARKER

#define HEAD_MARKER	(		n,
			size ) ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))

◆ MARKER_BYTE_UNKNOWN

#define MARKER_BYTE_UNKNOWN MARKER_MASK

◆ MARKER_MASK

#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)

◆ MAX_STORE_ALIAS_CHECKS

#define MAX_STORE_ALIAS_CHECKS 64

Limit to bound the number of aliasing checks for loads with the same
vuse as the corresponding store.

◆ MAX_STORE_BITSIZE

#define MAX_STORE_BITSIZE (BITS_PER_WORD)

GIMPLE store merging and byte swapping passes.
Copyright (C) 2009-2024 Free Software Foundation, Inc.
Contributed by ARM Ltd.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.

GCC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.

The purpose of the store merging pass is to combine multiple memory stores
 of constant values, values loaded from memory, bitwise operations on those,
 or bit-field values, to consecutive locations, into fewer wider stores.

 For example, if we have a sequence peforming four byte stores to
 consecutive memory locations:
 [p     ] := imm1;
 [p + 1B] := imm2;
 [p + 2B] := imm3;
 [p + 3B] := imm4;
 we can transform this into a single 4-byte store if the target supports it:
 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.

 Or:
 [p     ] := [q     ];
 [p + 1B] := [q + 1B];
 [p + 2B] := [q + 2B];
 [p + 3B] := [q + 3B];
 if there is no overlap can be transformed into a single 4-byte
 load followed by single 4-byte store.

 Or:
 [p     ] := [q     ] ^ imm1;
 [p + 1B] := [q + 1B] ^ imm2;
 [p + 2B] := [q + 2B] ^ imm3;
 [p + 3B] := [q + 3B] ^ imm4;
 if there is no overlap can be transformed into a single 4-byte
 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.

 Or:
 [p:1 ] := imm;
 [p:31] := val & 0x7FFFFFFF;
 we can transform this into a single 4-byte store if the target supports it:
 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.

 The algorithm is applied to each basic block in three phases:

 1) Scan through the basic block and record assignments to destinations
 that can be expressed as a store to memory of a certain size at a certain
 bit offset from base expressions we can handle.  For bit-fields we also
 record the surrounding bit region, i.e. bits that could be stored in
 a read-modify-write operation when storing the bit-field.  Record store
 chains to different bases in a hash_map (m_stores) and make sure to
 terminate such chains when appropriate (for example when the stored
 values get used subsequently).
 These stores can be a result of structure element initializers, array stores
 etc.  A store_immediate_info object is recorded for every such store.
 Record as many such assignments to a single base as possible until a
 statement that interferes with the store sequence is encountered.
 Each store has up to 2 operands, which can be a either constant, a memory
 load or an SSA name, from which the value to be stored can be computed.
 At most one of the operands can be a constant.  The operands are recorded
 in store_operand_info struct.

 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
 store_immediate_info objects) and coalesce contiguous stores into
 merged_store_group objects.  For bit-field stores, we don't need to
 require the stores to be contiguous, just their surrounding bit regions
 have to be contiguous.  If the expression being stored is different
 between adjacent stores, such as one store storing a constant and
 following storing a value loaded from memory, or if the loaded memory
 objects are not adjacent, a new merged_store_group is created as well.

 For example, given the stores:
 [p     ] := 0;
 [p + 1B] := 1;
 [p + 3B] := 0;
 [p + 4B] := 1;
 [p + 5B] := 0;
 [p + 6B] := 0;
 This phase would produce two merged_store_group objects, one recording the
 two bytes stored in the memory region [p : p + 1] and another
 recording the four bytes stored in the memory region [p + 3 : p + 6].

 3) The merged_store_group objects produced in phase 2) are processed
 to generate the sequence of wider stores that set the contiguous memory
 regions to the sequence of bytes that correspond to it.  This may emit
 multiple stores per store group to handle contiguous stores that are not
 of a size that is a power of 2.  For example it can try to emit a 40-bit
 store as a 32-bit store followed by an 8-bit store.
 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
 or TARGET_SLOW_UNALIGNED_ACCESS settings.

 Note on endianness and example:
 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
 [p     ] := 0x1234;
 [p + 2B] := 0x5678;
 [p + 4B] := 0xab;
 [p + 5B] := 0xcd;

 The memory layout for little-endian (LE) and big-endian (BE) must be:
p |LE|BE|
---------
0 |34|12|
1 |12|34|
2 |78|56|
3 |56|78|
4 |ab|ab|
5 |cd|cd|

To merge these into a single 48-bit merged value 'val' in phase 2)
on little-endian we insert stores to higher (consecutive) bitpositions
into the most significant bits of the merged value.
The final merged value would be: 0xcdab56781234

For big-endian we insert stores to higher bitpositions into the least
significant bits of the merged value.
The final merged value would be: 0x12345678abcd

Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
followed by a 16-bit store.  Again, we must consider endianness when
breaking down the 48-bit value 'val' computed above.
For little endian we emit:
[p]      (32-bit) := 0x56781234; // val & 0x0000ffffffff;
[p + 4B] (16-bit) := 0xcdab;    // (val & 0xffff00000000) >> 32;

Whereas for big-endian we emit:
[p]      (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
[p + 4B] (16-bit) := 0xabcd;     //  val & 0x00000000ffff;

The maximum size (in bits) of the stores this pass should generate.

◆ MAX_STORE_BYTES

#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)

Function Documentation

◆ make_pass_optimize_bswap()

gimple_opt_pass * make_pass_optimize_bswap ( gcc::context * ctxt )

References ggc_alloc().

◆ make_pass_store_merging()

gimple_opt_pass * make_pass_store_merging ( gcc::context * ctxt )

Construct and return a store merging pass object.