GCC Middle and Back End API Reference
tree-loop-distribution.cc File Reference
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "gimple.h"
#include "cfghooks.h"
#include "tree-pass.h"
#include "ssa.h"
#include "gimple-pretty-print.h"
#include "fold-const.h"
#include "cfganal.h"
#include "gimple-iterator.h"
#include "gimplify-me.h"
#include "stor-layout.h"
#include "tree-cfg.h"
#include "tree-ssa-loop-manip.h"
#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop.h"
#include "tree-into-ssa.h"
#include "tree-ssa.h"
#include "cfgloop.h"
#include "tree-scalar-evolution.h"
#include "tree-vectorizer.h"
#include "tree-eh.h"
#include "gimple-fold.h"
#include "tree-affine.h"
#include "intl.h"
#include "rtl.h"
#include "memmodel.h"
#include "optabs.h"
#include "tree-ssa-loop-niter.h"
Include dependency graph for tree-loop-distribution.cc:

Data Structures

struct  ddr_hasher
 
struct  rdg_vertex
 
struct  rdg_edge
 
struct  builtin_info
 
struct  partition
 
class  loop_distribution
 
struct  pg_vdata
 
struct  pg_edata
 
struct  pg_edge_callback_data
 

Macros

#define MAX_DATAREFS_NUM    ((unsigned) param_loop_max_datarefs_for_datadeps)
 
#define NUM_PARTITION_THRESHOLD   (4)
 
#define DR_INDEX(dr)   ((uintptr_t) (dr)->aux)
 
#define RDGV_STMT(V)   ((struct rdg_vertex *) ((V)->data))->stmt
 
#define RDGV_DATAREFS(V)   ((struct rdg_vertex *) ((V)->data))->datarefs
 
#define RDGV_HAS_MEM_WRITE(V)   ((struct rdg_vertex *) ((V)->data))->has_mem_write
 
#define RDGV_HAS_MEM_READS(V)   ((struct rdg_vertex *) ((V)->data))->has_mem_reads
 
#define RDG_STMT(RDG, I)   RDGV_STMT (&(RDG->vertices[I]))
 
#define RDG_DATAREFS(RDG, I)   RDGV_DATAREFS (&(RDG->vertices[I]))
 
#define RDG_MEM_WRITE_STMT(RDG, I)   RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
 
#define RDG_MEM_READS_STMT(RDG, I)   RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
 
#define RDGE_TYPE(E)   ((struct rdg_edge *) ((E)->data))->type
 

Enumerations

enum  rdg_dep_type { flow_dd = 'f' , control_dd = 'c' }
 
enum  partition_kind {
  PKIND_NORMAL , PKIND_PARTIAL_MEMSET , PKIND_MEMSET , PKIND_MEMCPY ,
  PKIND_MEMMOVE
}
 
enum  partition_type { PTYPE_PARALLEL = 0 , PTYPE_SEQUENTIAL }
 
enum  fuse_type {
  FUSE_NON_BUILTIN = 0 , FUSE_REDUCTION = 1 , FUSE_SHARE_REF = 2 , FUSE_SAME_SCC = 3 ,
  FUSE_FINALIZE = 4
}
 

Functions

static void dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
 
DEBUG_FUNCTION void debug_rdg_vertex (struct graph *rdg, int i)
 
static void dump_rdg (FILE *file, struct graph *rdg)
 
DEBUG_FUNCTION void debug_rdg (struct graph *rdg)
 
static void dot_rdg_1 (FILE *file, struct graph *rdg)
 
DEBUG_FUNCTION void dot_rdg (struct graph *rdg)
 
static int rdg_vertex_for_stmt (struct graph *rdg, gimple *stmt)
 
static void create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
 
static void create_edge_for_control_dependence (struct graph *rdg, basic_block bb, int v, control_dependences *cd)
 
static void create_rdg_flow_edges (struct graph *rdg)
 
static void create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
 
static int bb_top_order_cmp_r (const void *x, const void *y, void *loop)
 
static void free_rdg (struct graph *rdg, loop_p loop)
 
static partitionpartition_alloc (void)
 
static void partition_free (partition *partition)
 
static bool partition_builtin_p (partition *partition)
 
static bool partition_reduction_p (partition *partition)
 
static bool ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
 
static bool stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
 
static class loopcopy_loop_before (class loop *loop, bool redirect_lc_phi_defs)
 
static void create_bb_after_loop (class loop *loop)
 
static void generate_loops_for_partition (class loop *loop, partition *partition, bool copy_p, bool keep_lc_phis_p)
 
static int const_with_all_bytes_same (tree val)
 
static void generate_memset_builtin (class loop *loop, partition *partition)
 
static void generate_memcpy_builtin (class loop *loop, partition *partition)
 
static void destroy_loop (class loop *loop)
 
static bool generate_code_for_partition (class loop *loop, partition *partition, bool copy_p, bool keep_lc_phis_p)
 
static bool find_single_drs (class loop *loop, struct graph *rdg, const bitmap &partition_stmts, data_reference_p *dst_dr, data_reference_p *src_dr)
 
static int compute_access_range (loop_p loop_nest, data_reference_p dr, tree *base, tree *size, vec< tree > *steps=NULL)
 
static struct builtin_infoalloc_builtin (data_reference_p dst_dr, data_reference_p src_dr, tree dst_base, tree src_base, tree size)
 
static void classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
 
static void dump_rdg_partitions (FILE *file, const vec< partition * > &partitions)
 
void debug_rdg_partitions (const vec< partition * > &)
 
static int number_of_rw_in_rdg (struct graph *rdg)
 
static int number_of_rw_in_partition (struct graph *rdg, partition *partition)
 
static bool partition_contains_all_rw (struct graph *rdg, const vec< partition * > &partitions)
 
static int pgcmp (const void *v1_, const void *v2_)
 
static void init_partition_graph_vertices (struct graph *pg, vec< struct partition * > *partitions)
 
static void add_partition_graph_edge (struct graph *pg, int i, int j, vec< ddr_p > *ddrs)
 
static bool pg_skip_alias_edge (struct graph_edge *e)
 
static void free_partition_graph_edata_cb (struct graph *, struct graph_edge *e, void *)
 
static void free_partition_graph_vdata (struct graph *pg)
 
static void sort_partitions_by_post_order (struct graph *pg, vec< struct partition * > *partitions)
 
static void pg_collect_alias_ddrs (struct graph *g, struct graph_edge *e, void *data)
 
static void pg_unmark_merged_alias_ddrs (struct graph *, struct graph_edge *e, void *data)
 
static tree data_ref_segment_size (struct data_reference *dr, tree niters)
 
static bool latch_dominated_by_data_ref (class loop *loop, data_reference *dr)
 
static void compute_alias_check_pairs (class loop *loop, vec< ddr_p > *alias_ddrs, vec< dr_with_seg_len_pair_t > *comp_alias_pairs)
 
static void version_loop_by_alias_check (vec< struct partition * > *partitions, class loop *loop, vec< ddr_p > *alias_ddrs)
 
static bool version_for_distribution_p (vec< struct partition * > *partitions, vec< ddr_p > *alias_ddrs)
 
static int offset_cmp (const void *vp1, const void *vp2)
 
static void fuse_memset_builtins (vec< struct partition * > *partitions)
 
static bool find_seed_stmts_for_distribution (class loop *loop, vec< gimple * > *work_list)
 
static void generate_reduction_builtin_1 (loop_p loop, gimple_seq &seq, tree reduction_var_old, tree reduction_var_new, const char *info, machine_mode load_mode)
 
static void generate_rawmemchr_builtin (loop_p loop, tree reduction_var, data_reference_p store_dr, tree base, tree pattern, location_t loc)
 
static void generate_strlen_builtin_1 (loop_p loop, gimple_seq &seq, tree reduction_var_old, tree reduction_var_new, machine_mode mode, tree start_len)
 
static void generate_strlen_builtin (loop_p loop, tree reduction_var, tree base, tree start_len, location_t loc)
 
static void generate_strlen_builtin_using_rawmemchr (loop_p loop, tree reduction_var, tree base, tree load_type, tree start_len, location_t loc)
 
static bool reduction_var_overflows_first (tree reduction_var_type, tree load_type)
 
static gimpledetermine_reduction_stmt_1 (const loop_p loop, const basic_block *bbs)
 
static gimpledetermine_reduction_stmt (const loop_p loop)
 
static class loopprepare_perfect_loop_nest (class loop *loop)
 
gimple_opt_passmake_pass_loop_distribution (gcc::context *ctxt)
 

Variables

static const charfuse_message []
 

Macro Definition Documentation

◆ DR_INDEX

#define DR_INDEX ( dr)    ((uintptr_t) (dr)->aux)

◆ MAX_DATAREFS_NUM

#define MAX_DATAREFS_NUM    ((unsigned) param_loop_max_datarefs_for_datadeps)
Loop distribution.
   Copyright (C) 2006-2024 Free Software Foundation, Inc.
   Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
   and Sebastian Pop <sebastian.pop@amd.com>.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.

GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.   
This pass performs loop distribution: for example, the loop

|DO I = 2, N
|    A(I) = B(I) + C
|    D(I) = A(I-1)*E
|ENDDO

is transformed to

|DOALL I = 2, N
|   A(I) = B(I) + C
|ENDDO
|
|DOALL I = 2, N
|   D(I) = A(I-1)*E
|ENDDO

Loop distribution is the dual of loop fusion.  It separates statements
of a loop (or loop nest) into multiple loops (or loop nests) with the
same loop header.  The major goal is to separate statements which may
be vectorized from those that can't.  This pass implements distribution
in the following steps:

  1) Seed partitions with specific type statements.  For now we support
     two types seed statements: statement defining variable used outside
     of loop; statement storing to memory.
  2) Build reduced dependence graph (RDG) for loop to be distributed.
     The vertices (RDG:V) model all statements in the loop and the edges
     (RDG:E) model flow and control dependencies between statements.
  3) Apart from RDG, compute data dependencies between memory references.
  4) Starting from seed statement, build up partition by adding depended
     statements according to RDG's dependence information.  Partition is
     classified as parallel type if it can be executed paralleled; or as
     sequential type if it can't.  Parallel type partition is further
     classified as different builtin kinds if it can be implemented as
     builtin function calls.
  5) Build partition dependence graph (PG) based on data dependencies.
     The vertices (PG:V) model all partitions and the edges (PG:E) model
     all data dependencies between every partitions pair.  In general,
     data dependence is either compilation time known or unknown.  In C
     family languages, there exists quite amount compilation time unknown
     dependencies because of possible alias relation of data references.
     We categorize PG's edge to two types: "true" edge that represents
     compilation time known data dependencies; "alias" edge for all other
     data dependencies.
  6) Traverse subgraph of PG as if all "alias" edges don't exist.  Merge
     partitions in each strong connected component (SCC) correspondingly.
     Build new PG for merged partitions.
  7) Traverse PG again and this time with both "true" and "alias" edges
     included.  We try to break SCCs by removing some edges.  Because
     SCCs by "true" edges are all fused in step 6), we can break SCCs
     by removing some "alias" edges.  It's NP-hard to choose optimal
     edge set, fortunately simple approximation is good enough for us
     given the small problem scale.
  8) Collect all data dependencies of the removed "alias" edges.  Create
     runtime alias checks for collected data dependencies.
  9) Version loop under the condition of runtime alias checks.  Given
     loop distribution generally introduces additional overhead, it is
     only useful if vectorization is achieved in distributed loop.  We
     version loop with internal function call IFN_LOOP_DIST_ALIAS.  If
     no distributed loop can be vectorized, we simply remove distributed
     loops and recover to the original one.

TODO:
  1) We only distribute innermost two-level loop nest now.  We should
     extend it for arbitrary loop nests in the future.
  2) We only fuse partitions in SCC now.  A better fusion algorithm is
     desired to minimize loop overhead, maximize parallelism and maximize
     data reuse.   

Referenced by loop_distribution::distribute_loop().

◆ NUM_PARTITION_THRESHOLD

#define NUM_PARTITION_THRESHOLD   (4)
Threshold controlling number of distributed partitions.  Given it may
be unnecessary if a memory stream cost model is invented in the future,
we define it as a temporary macro, rather than a parameter.   

Referenced by loop_distribution::finalize_partitions().

◆ RDG_DATAREFS

#define RDG_DATAREFS ( RDG,
I )   RDGV_DATAREFS (&(RDG->vertices[I]))

◆ RDG_MEM_READS_STMT

◆ RDG_MEM_WRITE_STMT

◆ RDG_STMT

◆ RDGE_TYPE

#define RDGE_TYPE ( E)    ((struct rdg_edge *) ((E)->data))->type

◆ RDGV_DATAREFS

#define RDGV_DATAREFS ( V)    ((struct rdg_vertex *) ((V)->data))->datarefs

◆ RDGV_HAS_MEM_READS

#define RDGV_HAS_MEM_READS ( V)    ((struct rdg_vertex *) ((V)->data))->has_mem_reads

◆ RDGV_HAS_MEM_WRITE

#define RDGV_HAS_MEM_WRITE ( V)    ((struct rdg_vertex *) ((V)->data))->has_mem_write

◆ RDGV_STMT

#define RDGV_STMT ( V)    ((struct rdg_vertex *) ((V)->data))->stmt

Enumeration Type Documentation

◆ fuse_type

Partitions are fused because of different reasons.   
Enumerator
FUSE_NON_BUILTIN 
FUSE_REDUCTION 
FUSE_SHARE_REF 
FUSE_SAME_SCC 
FUSE_FINALIZE 

◆ partition_kind

Kind of distributed loop.   
Enumerator
PKIND_NORMAL 
PKIND_PARTIAL_MEMSET 
PKIND_MEMSET 
PKIND_MEMCPY 
PKIND_MEMMOVE 

◆ partition_type

Type of distributed loop.   
Enumerator
PTYPE_PARALLEL 
PTYPE_SEQUENTIAL 

◆ rdg_dep_type

Data dependence type.   
Enumerator
flow_dd 
control_dd 

Function Documentation

◆ add_partition_graph_edge()

static void add_partition_graph_edge ( struct graph * pg,
int i,
int j,
vec< ddr_p > * ddrs )
static
Add edge <I, J> to partition dependence graph PG.  Attach vector of data
dependence relations to the EDGE if DDRS isn't NULL.   

References add_edge(), graph_edge::data, gcc_assert, ggc_alloc(), i, NULL, and vNULL.

Referenced by loop_distribution::build_partition_graph().

◆ alloc_builtin()

static struct builtin_info * alloc_builtin ( data_reference_p dst_dr,
data_reference_p src_dr,
tree dst_base,
tree src_base,
tree size )
static
Allocate and return builtin struct.  Record information like DST_DR,
SRC_DR, DST_BASE, SRC_BASE and SIZE in the allocated struct.   

References builtin_info::dst_base, builtin_info::dst_dr, ggc_alloc(), builtin_info::size, builtin_info::src_base, and builtin_info::src_dr.

Referenced by loop_distribution::classify_builtin_ldst(), classify_builtin_st(), and matching_alloc_calls_p().

◆ bb_top_order_cmp_r()

static int bb_top_order_cmp_r ( const void * x,
const void * y,
void * loop )
static
If X has a smaller topological sort number than Y, returns -1;
if greater, returns 1.   

References gcc_assert, ggc_alloc(), and y.

Referenced by loop_distribution::stmts_from_loop().

◆ classify_builtin_st()

◆ compute_access_range()

static int compute_access_range ( loop_p loop_nest,
data_reference_p dr,
tree * base,
tree * size,
vec< tree > * steps = NULL )
static
Given data reference DR in LOOP_NEST, this function checks the enclosing
loops from inner to outer to see if loop's step equals to access size at
each level of loop.  Return 2 if we can prove this at all level loops;
record access base and size in BASE and SIZE; save loop's step at each
level of loop in STEPS if it is not null.  For example:

  int arr[100][100][100];
  for (i = 0; i < 100; i++)       ;steps[2] = 40000
    for (j = 100; j > 0; j--)     ;steps[1] = -400
      for (k = 0; k < 100; k++)   ;steps[0] = 4
        arr[i][j - 1][k] = 0;     ;base = &arr, size = 4000000

Return 1 if we can prove the equality at the innermost loop, but not all
level loops.  In this case, no information is recorded.

Return 0 if no equality can be proven at any level loops.   

References analyze_scalar_evolution(), build_fold_addr_expr, CDI_DOMINATORS, CHREC_LEFT, CHREC_RIGHT, dominated_by_p(), DR_REF, DR_STMT, EV_DIR_DECREASES, EV_DIR_UNKNOWN, fold_build1_loc(), fold_build2_loc(), fold_build_pointer_plus_loc(), fold_convert_loc(), ggc_alloc(), gimple_bb(), gimple_location(), basic_block_def::loop_father, loop_outer(), NULL, number_of_latch_executions(), operand_equal_p(), scev_direction(), single_exit(), size_binop_loc(), size_one_node, sizetype, TREE_CODE, tree_contains_chrecs(), TREE_TYPE, and TYPE_SIZE_UNIT.

Referenced by loop_distribution::classify_builtin_ldst(), and classify_builtin_st().

◆ compute_alias_check_pairs()

◆ const_with_all_bytes_same()

static int const_with_all_bytes_same ( tree val)
static
If VAL memory representation contains the same value in all bytes,
return that value, otherwise return -1.
E.g. for 0x24242424 return 0x24, for IEEE double
747708026454360457216.0 return 0x44, etc.   

References CHAR_BIT, const_with_all_bytes_same(), CONSTRUCTOR_NELTS, count, ggc_alloc(), i, integer_zerop(), native_encode_expr(), real_isneg(), real_zerop(), TREE_CLOBBER_P, TREE_CODE, TREE_IMAGPART, TREE_REAL_CST_PTR, TREE_REALPART, VECTOR_CST_ENCODED_ELT, and vector_cst_encoded_nelts().

Referenced by classify_builtin_st(), const_with_all_bytes_same(), fuse_memset_builtins(), and generate_memset_builtin().

◆ copy_loop_before()

◆ create_bb_after_loop()

static void create_bb_after_loop ( class loop * loop)
static
Creates an empty basic block after LOOP.   

References single_exit(), and split_edge().

Referenced by generate_loops_for_partition().

◆ create_edge_for_control_dependence()

◆ create_rdg_cd_edges()

static void create_rdg_cd_edges ( struct graph * rdg,
control_dependences * cd,
loop_p loop )
static
Creates the edges of the reduced dependence graph RDG.   

References cd, create_edge_for_control_dependence(), flow_bb_inside_loop_p(), FOR_EACH_EDGE, ggc_alloc(), gimple_bb(), i, and RDG_STMT.

Referenced by loop_distribution::build_rdg().

◆ create_rdg_edges_for_scalar()

static void create_rdg_edges_for_scalar ( struct graph * rdg,
tree def,
int idef )
static
Creates dependence edges in RDG for all the uses of DEF.  IDEF is
the index of DEF in RDG.   

References add_edge(), graph_edge::data, flow_dd, FOR_EACH_IMM_USE_FAST, ggc_alloc(), rdg_vertex_for_stmt(), RDGE_TYPE, and USE_STMT.

Referenced by create_rdg_flow_edges().

◆ create_rdg_flow_edges()

static void create_rdg_flow_edges ( struct graph * rdg)
static
Creates the edges of the reduced dependence graph RDG.   

References create_rdg_edges_for_scalar(), DEF_FROM_PTR, FOR_EACH_PHI_OR_STMT_DEF, ggc_alloc(), i, RDG_STMT, and SSA_OP_DEF.

Referenced by loop_distribution::build_rdg().

◆ data_ref_segment_size()

static tree data_ref_segment_size ( struct data_reference * dr,
tree niters )
static
Compute and return an expression whose value is the segment length which
will be accessed by DR in NITERS iterations.   

References DR_STEP, fold_convert, ggc_alloc(), size_binop, size_one_node, and sizetype.

Referenced by compute_alias_check_pairs().

◆ debug_rdg()

DEBUG_FUNCTION void debug_rdg ( struct graph * rdg)
Call dump_rdg on stderr.   

References dump_rdg(), and ggc_alloc().

◆ debug_rdg_partitions()

DEBUG_FUNCTION void debug_rdg_partitions ( const vec< partition * > & partitions)
extern
Debug PARTITIONS.   

References dump_rdg_partitions(), and ggc_alloc().

◆ debug_rdg_vertex()

DEBUG_FUNCTION void debug_rdg_vertex ( struct graph * rdg,
int i )
Call dump_rdg_vertex on stderr.   

References dump_rdg_vertex(), ggc_alloc(), and i.

◆ destroy_loop()

◆ determine_reduction_stmt()

static gimple * determine_reduction_stmt ( const loop_p loop)
static
If LOOP has a single non-volatile reduction statement, then return a pointer
to it.  Otherwise return NULL.   

References determine_reduction_stmt_1(), get_loop_body(), and ggc_alloc().

Referenced by loop_distribution::transform_reduction_loop().

◆ determine_reduction_stmt_1()

◆ dot_rdg()

DEBUG_FUNCTION void dot_rdg ( struct graph * rdg)
Display the Reduced Dependence Graph using dotty.   

References dot_rdg_1(), and ggc_alloc().

◆ dot_rdg_1()

◆ dump_rdg()

static void dump_rdg ( FILE * file,
struct graph * rdg )
static
Dump the reduced dependence graph RDG to FILE.   

References dump_rdg_vertex(), ggc_alloc(), and i.

Referenced by debug_rdg(), and loop_distribution::distribute_loop().

◆ dump_rdg_partitions()

static void dump_rdg_partitions ( FILE * file,
const vec< partition * > & partitions )
static

◆ dump_rdg_vertex()

◆ find_seed_stmts_for_distribution()

static bool find_seed_stmts_for_distribution ( class loop * loop,
vec< gimple * > * work_list )
static

◆ find_single_drs()

static bool find_single_drs ( class loop * loop,
struct graph * rdg,
const bitmap & partition_stmts,
data_reference_p * dst_dr,
data_reference_p * src_dr )
static
Given PARTITION of LOOP and RDG, record single load/store data references
for builtin partition in SRC_DR/DST_DR, return false if there is no such
data references.   

References ADDR_SPACE_GENERIC_P, CDI_DOMINATORS, DECL_BIT_FIELD, dominated_by_p(), DR_IS_READ, DR_REF, DR_STMT, EXECUTE_IF_SET_IN_BITMAP, ggc_alloc(), gimple_assign_single_p(), gimple_bb(), gimple_vuse(), loop::header, i, loop::latch, NULL, RDG_DATAREFS, RDG_STMT, single_exit(), TREE_CODE, TREE_OPERAND, TREE_TYPE, and TYPE_ADDR_SPACE.

Referenced by loop_distribution::classify_partition(), and loop_distribution::transform_reduction_loop().

◆ free_partition_graph_edata_cb()

static void free_partition_graph_edata_cb ( struct graph * ,
struct graph_edge * e,
void *  )
static
Callback function freeing data attached to edge E of graph.   

References graph_edge::data, and NULL.

Referenced by loop_distribution::break_alias_scc_partitions(), and loop_distribution::merge_dep_scc_partitions().

◆ free_partition_graph_vdata()

static void free_partition_graph_vdata ( struct graph * pg)
static
Free data attached to vertice of partition dependence graph PG.   

References ggc_alloc(), and i.

Referenced by loop_distribution::break_alias_scc_partitions(), and loop_distribution::merge_dep_scc_partitions().

◆ free_rdg()

◆ fuse_memset_builtins()

static void fuse_memset_builtins ( vec< struct partition * > * partitions)
static
Fuse adjacent memset builtin PARTITIONS if possible.  This is a special
case optimization transforming below code:

  __builtin_memset (&obj, 0, 100);
  _1 = &obj + 100;
  __builtin_memset (_1, 0, 200);
  _2 = &obj + 300;
  __builtin_memset (_2, 0, 100);

into:

  __builtin_memset (&obj, 0, 400);

Note we don't have dependence information between different partitions
at this point, as a result, we can't handle nonadjacent memset builtin
partitions since dependence might be broken.   

References wi::add(), partition::builtin, const_with_all_bytes_same(), DR_STMT, builtin_info::dst_base_base, builtin_info::dst_base_offset, builtin_info::dst_dr, fold_build2, gcc_stablesort(), ggc_alloc(), gimple_assign_rhs1(), i, partition::kind, wi::ne_p(), offset_cmp(), operand_equal_p(), partition_free(), PKIND_MEMSET, builtin_info::size, sizetype, wi::to_wide(), and TREE_CODE.

Referenced by loop_distribution::finalize_partitions().

◆ generate_code_for_partition()

static bool generate_code_for_partition ( class loop * loop,
partition * partition,
bool copy_p,
bool keep_lc_phis_p )
static

◆ generate_loops_for_partition()

static void generate_loops_for_partition ( class loop * loop,
partition * partition,
bool copy_p,
bool keep_lc_phis_p )
static

◆ generate_memcpy_builtin()

◆ generate_memset_builtin()

◆ generate_rawmemchr_builtin()

static void generate_rawmemchr_builtin ( loop_p loop,
tree reduction_var,
data_reference_p store_dr,
tree base,
tree pattern,
location_t loc )
static
Generate a call to rawmemchr and place it before LOOP.  REDUCTION_VAR is
replaced with a fresh SSA name representing the result of the call.   

References copy_ssa_name(), DR_REF, force_gimple_operand(), g, generate_reduction_builtin_1(), ggc_alloc(), gimple_build_assign(), gimple_build_call_internal(), gimple_call_set_lhs(), gimple_seq_add_stmt(), gimple_set_location(), partition::loc, NULL, NULL_TREE, TREE_TYPE, and TYPE_MODE.

Referenced by loop_distribution::transform_reduction_loop().

◆ generate_reduction_builtin_1()

static void generate_reduction_builtin_1 ( loop_p loop,
gimple_seq & seq,
tree reduction_var_old,
tree reduction_var_new,
const char * info,
machine_mode load_mode )
static
A helper function for generate_{rawmemchr,strlen}_builtin functions in order
to place new statements SEQ before LOOP and replace the old reduction
variable with the new one.   

References dump_file, dump_flags, FOR_EACH_IMM_USE_ON_STMT, FOR_EACH_IMM_USE_STMT, gcc_assert, GET_MODE_NAME, ggc_alloc(), GSI_CONTINUE_LINKING, gsi_insert_seq_after(), gsi_last_bb(), loop_preheader_edge(), SET_USE, TDF_DETAILS, and update_stmt().

Referenced by generate_rawmemchr_builtin(), and generate_strlen_builtin_1().

◆ generate_strlen_builtin()

static void generate_strlen_builtin ( loop_p loop,
tree reduction_var,
tree base,
tree start_len,
location_t loc )
static
Generate a call to strlen and place it before LOOP.  REDUCTION_VAR is
replaced with a fresh SSA name representing the result of the call.   

References build_fold_addr_expr, builtin_decl_implicit(), force_gimple_operand(), generate_strlen_builtin_1(), ggc_alloc(), gimple_build_call(), gimple_call_set_lhs(), gimple_seq_add_stmt(), gimple_set_location(), partition::loc, make_ssa_name(), NULL, NULL_TREE, and size_type_node.

Referenced by loop_distribution::transform_reduction_loop().

◆ generate_strlen_builtin_1()

static void generate_strlen_builtin_1 ( loop_p loop,
gimple_seq & seq,
tree reduction_var_old,
tree reduction_var_new,
machine_mode mode,
tree start_len )
static

◆ generate_strlen_builtin_using_rawmemchr()

static void generate_strlen_builtin_using_rawmemchr ( loop_p loop,
tree reduction_var,
tree base,
tree load_type,
tree start_len,
location_t loc )
static
Generate code in order to mimic the behaviour of strlen but this time over
an array of elements with mode different than QI.  REDUCTION_VAR is replaced
with a fresh SSA name representing the result, i.e., the length.   

References build_zero_cst(), count, end(), force_gimple_operand(), generate_strlen_builtin_1(), ggc_alloc(), gimple_build_assign(), gimple_build_call_internal(), gimple_call_set_lhs(), gimple_convert(), gimple_seq_add_stmt(), gimple_set_location(), partition::loc, make_ssa_name(), NULL, NULL_TREE, ptrdiff_type_node, TREE_TYPE, TYPE_MODE, and TYPE_SIZE_UNIT.

Referenced by loop_distribution::transform_reduction_loop().

◆ init_partition_graph_vertices()

static void init_partition_graph_vertices ( struct graph * pg,
vec< struct partition * > * partitions )
static
Initialize vertice's data for partition dependence graph PG with
PARTITIONS.   

References ggc_alloc(), i, and pg_vdata::partition.

Referenced by loop_distribution::build_partition_graph().

◆ latch_dominated_by_data_ref()

static bool latch_dominated_by_data_ref ( class loop * loop,
data_reference * dr )
inlinestatic
Return true if LOOP's latch is dominated by statement for data reference
DR.   

References CDI_DOMINATORS, dominated_by_p(), DR_STMT, gimple_bb(), and single_exit().

Referenced by compute_alias_check_pairs().

◆ make_pass_loop_distribution()

gimple_opt_pass * make_pass_loop_distribution ( gcc::context * ctxt)

References ggc_alloc().

◆ number_of_rw_in_partition()

static int number_of_rw_in_partition ( struct graph * rdg,
partition * partition )
static
Returns the number of read and write operations in a PARTITION of
the RDG.   

References EXECUTE_IF_SET_IN_BITMAP, ggc_alloc(), i, RDG_MEM_READS_STMT, RDG_MEM_WRITE_STMT, and partition::stmts.

Referenced by partition_contains_all_rw().

◆ number_of_rw_in_rdg()

static int number_of_rw_in_rdg ( struct graph * rdg)
static
Returns the number of read and write operations in the RDG.   

References ggc_alloc(), i, RDG_MEM_READS_STMT, and RDG_MEM_WRITE_STMT.

Referenced by partition_contains_all_rw().

◆ offset_cmp()

static int offset_cmp ( const void * vp1,
const void * vp2 )
static
Compare base offset of builtin mem* partitions P1 and P2.   

References ggc_alloc().

Referenced by fuse_memset_builtins().

◆ partition_alloc()

◆ partition_builtin_p()

static bool partition_builtin_p ( partition * partition)
static

◆ partition_contains_all_rw()

static bool partition_contains_all_rw ( struct graph * rdg,
const vec< partition * > & partitions )
static
Returns true when one of the PARTITIONS contains all the read or
write operations of RDG.   

References FOR_EACH_VEC_ELT, ggc_alloc(), i, number_of_rw_in_partition(), and number_of_rw_in_rdg().

Referenced by loop_distribution::distribute_loop().

◆ partition_free()

◆ partition_reduction_p()

◆ pg_collect_alias_ddrs()

static void pg_collect_alias_ddrs ( struct graph * g,
struct graph_edge * e,
void * data )
static
Callback function for traversing edge E in graph G.  DATA is private
callback data.   

References bitmap_bit_p, graph_edge::data, graph_edge::dest, g, ggc_alloc(), i, NULL, and graph_edge::src.

Referenced by loop_distribution::break_alias_scc_partitions().

◆ pg_skip_alias_edge()

static bool pg_skip_alias_edge ( struct graph_edge * e)
static
Callback function for graph travesal algorithm.  It returns true
if edge E should skipped when traversing the graph.   

References graph_edge::data, and NULL.

Referenced by loop_distribution::break_alias_scc_partitions().

◆ pg_unmark_merged_alias_ddrs()

static void pg_unmark_merged_alias_ddrs ( struct graph * ,
struct graph_edge * e,
void * data )
static
Callback function for traversing edge E.  DATA is private
callback data.   

References bitmap_bit_p, graph_edge::data, graph_edge::dest, ggc_alloc(), i, NULL, and graph_edge::src.

Referenced by loop_distribution::break_alias_scc_partitions().

◆ pgcmp()

static int pgcmp ( const void * v1_,
const void * v2_ )
static
Compare postorder number of the partition graph vertices V1 and V2.   

References ggc_alloc().

Referenced by sort_partitions_by_post_order().

◆ prepare_perfect_loop_nest()

static class loop * prepare_perfect_loop_nest ( class loop * loop)
static
Given innermost LOOP, return the outermost enclosing loop that forms a
perfect loop nest.   

References chrec_contains_symbols_defined_in_loop(), chrec_dont_know, ggc_alloc(), loop::inner, loop_outer(), loop::next, NULL, NULL_TREE, loop::num, number_of_latch_executions(), and single_exit().

Referenced by loop_distribution::execute().

◆ rdg_vertex_for_stmt()

◆ reduction_var_overflows_first()

static bool reduction_var_overflows_first ( tree reduction_var_type,
tree load_type )
static
Return true if we can count at least as many characters by taking pointer
difference as we can count via reduction_var without an overflow.  Thus
compute 2^n < (2^(m-1) / s) where n = TYPE_PRECISION (reduction_var_type),
m = TYPE_PRECISION (ptrdiff_type_node), and s = size of each character.   

References ggc_alloc(), wi::lshift(), wi::ltu_p(), ptrdiff_type_node, wi::to_widest(), TYPE_PRECISION, TYPE_SIZE_UNIT, and wi::udiv_trunc().

Referenced by loop_distribution::transform_reduction_loop().

◆ sort_partitions_by_post_order()

static void sort_partitions_by_post_order ( struct graph * pg,
vec< struct partition * > * partitions )
static
Sort partitions in PG in descending post order and store them in
PARTITIONS.   

References ggc_alloc(), i, pgcmp(), and qsort.

Referenced by loop_distribution::break_alias_scc_partitions(), and loop_distribution::merge_dep_scc_partitions().

◆ ssa_name_has_uses_outside_loop_p()

static bool ssa_name_has_uses_outside_loop_p ( tree def,
loop_p loop )
static
Returns true when DEF is an SSA_NAME defined in LOOP and used after
the LOOP.   

References flow_bb_inside_loop_p(), FOR_EACH_IMM_USE_FAST, ggc_alloc(), gimple_bb(), is_gimple_debug(), and USE_STMT.

Referenced by stmt_has_scalar_dependences_outside_loop().

◆ stmt_has_scalar_dependences_outside_loop()

static bool stmt_has_scalar_dependences_outside_loop ( loop_p loop,
gimple * stmt )
static

◆ version_for_distribution_p()

static bool version_for_distribution_p ( vec< struct partition * > * partitions,
vec< ddr_p > * alias_ddrs )
inlinestatic
Return true if loop versioning is needed to distrubute PARTITIONS.
ALIAS_DDRS are data dependence relations for runtime alias check.   

References ggc_alloc().

Referenced by loop_distribution::distribute_loop().

◆ version_loop_by_alias_check()

Variable Documentation

◆ fuse_message

const char* fuse_message[]
static
Initial value:
= {
"they are non-builtins",
"they have reductions",
"they have shared memory refs",
"they are in the same dependence scc",
"there is no point to distribute loop"}
Description on different fusing reason.   

Referenced by loop_distribution::partition_merge_into().