GCC Middle and Back End API Reference
tree-parloops.cc File Reference
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "tree.h"
#include "gimple.h"
#include "cfghooks.h"
#include "tree-pass.h"
#include "ssa.h"
#include "cgraph.h"
#include "gimple-pretty-print.h"
#include "fold-const.h"
#include "gimplify.h"
#include "gimple-iterator.h"
#include "gimplify-me.h"
#include "gimple-walk.h"
#include "stor-layout.h"
#include "tree-nested.h"
#include "tree-cfg.h"
#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop-manip.h"
#include "tree-ssa-loop-niter.h"
#include "tree-ssa-loop.h"
#include "tree-into-ssa.h"
#include "cfgloop.h"
#include "tree-scalar-evolution.h"
#include "langhooks.h"
#include "tree-vectorizer.h"
#include "tree-hasher.h"
#include "tree-parloops.h"
#include "omp-general.h"
#include "omp-low.h"
#include "tree-ssa.h"
#include "tree-ssa-alias.h"
#include "tree-eh.h"
#include "gomp-constants.h"
#include "tree-dfa.h"
#include "stringpool.h"
#include "attribs.h"
Include dependency graph for tree-parloops.cc:

Data Structures

struct  reduction_info
 
struct  reduction_hasher
 
struct  name_to_copy_elt
 
struct  name_to_copy_hasher
 
struct  lambda_trans_matrix_s
 
struct  elv_data
 
struct  clsn_data
 

Macros

#define MIN_PER_THREAD   param_parloops_min_per_thread
 
#define LTM_MATRIX(T)   ((T)->matrix)
 
#define LTM_ROWSIZE(T)   ((T)->rowsize)
 
#define LTM_COLSIZE(T)   ((T)->colsize)
 
#define LTM_DENOMINATOR(T)   ((T)->denominator)
 

Typedefs

typedef hash_table< reduction_hasherreduction_info_table_type
 
typedef hash_table< name_to_copy_hashername_to_copy_table_type
 
typedef struct lambda_trans_matrix_slambda_trans_matrix
 

Functions

static void report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
 
static bool parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
 
static bool parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi, gimple *first_stmt)
 
static bool parloops_needs_fold_left_reduction_p (tree type, tree_code code, bool need_wrapping_integral_overflow)
 
static stmt_vec_info parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, bool *double_reduc, bool need_wrapping_integral_overflow, enum vect_reduction_type *v_reduc_type)
 
stmt_vec_info parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, bool *double_reduc, bool need_wrapping_integral_overflow)
 
static struct reduction_inforeduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
 
static lambda_trans_matrix lambda_trans_matrix_new (int colsize, int rowsize, struct obstack *lambda_obstack)
 
static void lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n, lambda_vector vec, lambda_vector dest)
 
static bool lambda_transform_legal_p (lambda_trans_matrix trans, int nb_loops, vec< ddr_p > dependence_relations)
 
static bool loop_parallel_p (class loop *loop, struct obstack *parloop_obstack)
 
static bool loop_has_blocks_with_irreducible_flag (class loop *loop)
 
static tree take_address_of (tree obj, tree type, edge entry, int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
 
static tree reduc_stmt_res (gimple *stmt)
 
int initialize_reductions (reduction_info **slot, class loop *loop)
 
static tree eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
 
static void eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi, int_tree_htab_type *decl_address)
 
static void eliminate_local_variables (edge entry, edge exit)
 
static bool expr_invariant_in_region_p (edge entry, edge exit, tree expr)
 
static tree separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies, int_tree_htab_type *decl_copies, bool copy_name_p)
 
static void separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt, name_to_copy_table_type *name_copies, int_tree_htab_type *decl_copies)
 
static bool separate_decls_in_region_debug (gimple *stmt, name_to_copy_table_type *name_copies, int_tree_htab_type *decl_copies)
 
int add_field_for_reduction (reduction_info **slot, tree type)
 
int add_field_for_name (name_to_copy_elt **slot, tree type)
 
int create_phi_for_local_result (reduction_info **slot, class loop *loop)
 
int create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
 
static void create_call_for_reduction (class loop *loop, reduction_info_table_type *reduction_list, struct clsn_data *ld_st_data)
 
int create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
 
static void create_final_loads_for_reduction (reduction_info_table_type *reduction_list, struct clsn_data *ld_st_data)
 
int create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
 
int create_loads_and_stores_for_name (name_to_copy_elt **slot, struct clsn_data *clsn_data)
 
static void separate_decls_in_region (edge entry, edge exit, reduction_info_table_type *reduction_list, tree *arg_struct, tree *new_arg_struct, struct clsn_data *ld_st_data)
 
bool parallelized_function_p (tree fndecl)
 
static tree create_loop_fn (location_t loc)
 
static void replace_uses_in_bb_by (tree name, tree val, basic_block bb)
 
static void transform_to_exit_first_loop_alt (class loop *loop, reduction_info_table_type *reduction_list, tree bound)
 
static bool try_transform_to_exit_first_loop_alt (class loop *loop, reduction_info_table_type *reduction_list, tree nit)
 
static void transform_to_exit_first_loop (class loop *loop, reduction_info_table_type *reduction_list, tree nit)
 
static void create_parallel_loop (class loop *loop, tree loop_fn, tree data, tree new_data, unsigned n_threads, location_t loc, bool oacc_kernels_p)
 
static unsigned int num_phis (basic_block bb, bool count_virtual_p)
 
static void gen_parallel_loop (class loop *loop, reduction_info_table_type *reduction_list, unsigned n_threads, class tree_niter_desc *niter, bool oacc_kernels_p)
 
static bool loop_has_vector_phi_nodes (class loop *loop)
 
static void build_new_reduction (reduction_info_table_type *reduction_list, gimple *reduc_stmt, gphi *phi)
 
int set_reduc_phi_uids (reduction_info **slot, void *data)
 
static bool valid_reduction_p (stmt_vec_info stmt_info)
 
static void gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
 
static bool try_get_loop_niter (loop_p loop, class tree_niter_desc *niter)
 
static tree get_omp_data_i_param (void)
 
static tree find_reduc_addr (class loop *loop, gphi *phi)
 
static bool try_create_reduction_list (loop_p loop, reduction_info_table_type *reduction_list, bool oacc_kernels_p)
 
static bool loop_has_phi_with_address_arg (class loop *loop)
 
static bool ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref, bool ref_is_store, vec< basic_block > region_bbs, unsigned int i, gimple *skip_stmt)
 
static bool oacc_entry_exit_ok_1 (bitmap in_loop_bbs, const vec< basic_block > &region_bbs, reduction_info_table_type *reduction_list, bitmap reduction_stores)
 
static bool oacc_entry_exit_single_gang (bitmap in_loop_bbs, const vec< basic_block > &region_bbs, bitmap reduction_stores)
 
static bool oacc_entry_exit_ok (class loop *loop, reduction_info_table_type *reduction_list)
 
static bool parallelize_loops (bool oacc_kernels_p)
 
gimple_opt_passmake_pass_parallelize_loops (gcc::context *ctxt)
 

Macro Definition Documentation

◆ LTM_COLSIZE

#define LTM_COLSIZE ( T)    ((T)->colsize)

◆ LTM_DENOMINATOR

#define LTM_DENOMINATOR ( T)    ((T)->denominator)

Referenced by lambda_trans_matrix_new().

◆ LTM_MATRIX

#define LTM_MATRIX ( T)    ((T)->matrix)

◆ LTM_ROWSIZE

#define LTM_ROWSIZE ( T)    ((T)->rowsize)

◆ MIN_PER_THREAD

#define MIN_PER_THREAD   param_parloops_min_per_thread
Minimal number of iterations of a loop that should be executed in each
thread.   

Referenced by gen_parallel_loop(), and parallelize_loops().

Typedef Documentation

◆ lambda_trans_matrix

A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
represents the denominator for every element in the matrix.   

◆ name_to_copy_table_type

◆ reduction_info_table_type

Function Documentation

◆ add_field_for_name()

int add_field_for_name ( name_to_copy_elt ** slot,
tree type )
Callback for htab_traverse.  Adds a field corresponding to a ssa name
described in SLOT. The type is passed in DATA.   

References build_decl(), name_to_copy_elt::field, ggc_alloc(), insert_field_into_struct(), ssa_name, SSA_NAME_IDENTIFIER, TREE_TYPE, UNKNOWN_LOCATION, and name_to_copy_elt::version.

Referenced by separate_decls_in_region().

◆ add_field_for_reduction()

int add_field_for_reduction ( reduction_info ** slot,
tree type )
Callback for htab_traverse.  Adds a field corresponding to the reduction
specified in SLOT. The type is passed in DATA.   

References build_decl(), reduction_info::field, ggc_alloc(), gimple_location(), insert_field_into_struct(), reduc_stmt_res(), SSA_NAME_IDENTIFIER, and TREE_TYPE.

Referenced by separate_decls_in_region().

◆ build_new_reduction()

static void build_new_reduction ( reduction_info_table_type * reduction_list,
gimple * reduc_stmt,
gphi * phi )
static
Create a reduction_info struct, initialize it with REDUC_STMT
and PHI, insert it to the REDUCTION_LIST.   

References dump_file, dump_flags, gcc_assert, ggc_alloc(), gimple_assign_rhs_code(), gimple_phi_result(), PHI_ARG_DEF, print_gimple_stmt(), reduction_info::reduc_stmt, reduction_info::reduction_code, SSA_NAME_DEF_STMT, SSA_NAME_VERSION, and TDF_DETAILS.

Referenced by gather_scalar_reductions().

◆ create_call_for_reduction()

static void create_call_for_reduction ( class loop * loop,
reduction_info_table_type * reduction_list,
struct clsn_data * ld_st_data )
static
Create the atomic operation at the join point of the threads.
REDUCTION_LIST describes the reductions in the LOOP.
LD_ST_DATA describes the shared data structure where
shared data is stored in and loaded from.   

References create_call_for_reduction_1(), create_phi_for_local_result(), FALLTHRU_EDGE, ggc_alloc(), loop::latch, and single_pred().

Referenced by gen_parallel_loop().

◆ create_call_for_reduction_1()

int create_call_for_reduction_1 ( reduction_info ** slot,
struct clsn_data * clsn_data )

◆ create_final_loads_for_reduction()

static void create_final_loads_for_reduction ( reduction_info_table_type * reduction_list,
struct clsn_data * ld_st_data )
static
Load the reduction result that was stored in LD_ST_DATA.
REDUCTION_LIST describes the list of reductions that the
loads should be generated for.   

References build_fold_addr_expr, create_loads_for_reductions(), ggc_alloc(), gimple_build_assign(), gsi_after_labels(), gsi_insert_before(), and GSI_NEW_STMT.

Referenced by separate_decls_in_region().

◆ create_loads_and_stores_for_name()

int create_loads_and_stores_for_name ( name_to_copy_elt ** slot,
struct clsn_data * clsn_data )
Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
store to a field of STORE in STORE_BB for the ssa name and its duplicate
specified in SLOT.   

References build3(), build_simple_mem_ref, name_to_copy_elt::field, ggc_alloc(), gimple_build_assign(), gsi_insert_after(), gsi_last_bb(), GSI_NEW_STMT, clsn_data::load, clsn_data::load_bb, name_to_copy_elt::new_name, NULL_TREE, ssa_name, clsn_data::store, clsn_data::store_bb, TREE_TYPE, and name_to_copy_elt::version.

Referenced by separate_decls_in_region().

◆ create_loads_for_reductions()

int create_loads_for_reductions ( reduction_info ** slot,
struct clsn_data * clsn_data )

◆ create_loop_fn()

◆ create_parallel_loop()

static void create_parallel_loop ( class loop * loop,
tree loop_fn,
tree data,
tree new_data,
unsigned n_threads,
location_t loc,
bool oacc_kernels_p )
static
Create the parallel constructs for LOOP as described in gen_parallel_loop.
LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
NEW_DATA is the variable that should be initialized from the argument
of LOOP_FN.  N_THREADS is the requested number of threads, which can be 0 if
that number is to be determined later.   

References add_phi_arg(), build2(), build_fold_addr_expr, build_int_cst(), build_omp_clause(), calculate_dominance_info(), CDI_DOMINATORS, cfun, copy_ssa_name(), DECL_ARGUMENTS, DECL_ATTRIBUTES, end(), extract_true_false_edges_from_block(), fold_convert, free_dominance_info(), gcc_assert, gcc_checking_assert, gcc_unreachable, get_identifier(), GF_OMP_FOR_KIND_FOR, GF_OMP_FOR_KIND_OACC_LOOP, ggc_alloc(), gimple_bb(), gimple_build_assign(), gimple_build_omp_continue(), gimple_build_omp_for(), gimple_build_omp_parallel(), gimple_build_omp_return(), gimple_cond_code(), gimple_cond_lhs(), gimple_cond_rhs(), gimple_cond_set_lhs(), gimple_omp_for_set_cond(), gimple_omp_for_set_final(), gimple_omp_for_set_incr(), gimple_omp_for_set_index(), gimple_omp_for_set_initial(), gimple_phi_arg_location_from_edge(), gimple_set_location(), gsi_after_labels(), gsi_end_p(), gsi_insert_after(), gsi_insert_before(), gsi_last_bb(), gsi_last_nondebug_bb(), GSI_NEW_STMT, gsi_next(), gsi_remove(), GSI_SAME_STMT, gsi_start_phis(), gsi_stmt(), profile_probability::guessed_never(), loop::header, integer_type_node, loop::latch, lookup_attribute(), loop_latch_edge(), loop_preheader_edge(), make_edge(), make_single_succ_edge(), make_ssa_name(), NULL, NULL_TREE, OMP_CLAUSE_GANG, OMP_CLAUSE_NUM_THREADS, OMP_CLAUSE_NUM_THREADS_EXPR, OMP_CLAUSE_SCHEDULE, OMP_CLAUSE_SCHEDULE_AUTO, OMP_CLAUSE_SCHEDULE_CHUNK_EXPR, OMP_CLAUSE_SCHEDULE_DYNAMIC, OMP_CLAUSE_SCHEDULE_GUIDED, OMP_CLAUSE_SCHEDULE_KIND, OMP_CLAUSE_SCHEDULE_RUNTIME, OMP_CLAUSE_SCHEDULE_STATIC, PARLOOPS_SCHEDULE_AUTO, PARLOOPS_SCHEDULE_DYNAMIC, PARLOOPS_SCHEDULE_GUIDED, PARLOOPS_SCHEDULE_RUNTIME, PARLOOPS_SCHEDULE_STATIC, PENDING_STMT, PHI_ARG_DEF_FROM_EDGE, PHI_ARG_DEF_PTR_FROM_EDGE, redirect_edge_and_branch(), rescan_loop_exit(), SET_USE, single_dom_exit(), single_pred(), single_pred_edge(), single_succ_edge(), split_edge(), split_loop_exit_edge(), SSA_NAME_DEF_STMT, SSA_NAME_VAR, tree_cons(), TREE_TYPE, and type().

Referenced by gen_parallel_loop().

◆ create_phi_for_local_result()

int create_phi_for_local_result ( reduction_info ** slot,
class loop * loop )
Callback for htab_traverse.  A local result is the intermediate result
computed by a single
thread, or the initial value in case no iteration was executed.
This function creates a phi node reflecting these values.
The phi's result will be stored in NEW_PHI field of the
reduction's data structure.   

References add_phi_arg(), copy_ssa_name(), create_phi_node(), EDGE_PRED, FALLTHRU_EDGE, ggc_alloc(), gimple_location(), loop::latch, reduction_info::new_phi, reduc_stmt_res(), and single_pred().

Referenced by create_call_for_reduction().

◆ create_stores_for_reduction()

int create_stores_for_reduction ( reduction_info ** slot,
struct clsn_data * clsn_data )
Callback for htab_traverse.  Store the neutral value for the
particular reduction's operation, e.g. 0 for PLUS_EXPR,
1 for MULT_EXPR, etc. into the reduction field.
The reduction is specified in SLOT. The store information is
passed in DATA.   

References build3(), ggc_alloc(), gimple_build_assign(), gsi_insert_after(), gsi_last_bb(), GSI_NEW_STMT, NULL_TREE, reduc_stmt_res(), clsn_data::store, clsn_data::store_bb, and TREE_TYPE.

Referenced by separate_decls_in_region().

◆ eliminate_local_variables()

static void eliminate_local_variables ( edge entry,
edge exit )
static
Eliminates the references to local variables from the single entry
single exit region between the ENTRY and EXIT edges.

This includes:
1) Taking address of a local variable -- these are moved out of the
region (and temporary variable is created to hold the address if
necessary).

2) Dereferencing a local variable -- these are replaced with indirect
references.   

References elv_data::decl_address, eliminate_local_variables_stmt(), elv_data::entry, FOR_EACH_VEC_ELT, gather_blocks_in_sese_region(), ggc_alloc(), gimple_debug_bind_p(), elv_data::gsi, gsi_end_p(), gsi_next(), gsi_start_bb(), gsi_stmt(), i, and is_gimple_debug().

Referenced by gen_parallel_loop().

◆ eliminate_local_variables_1()

static tree eliminate_local_variables_1 ( tree * tp,
int * walk_subtrees,
void * data )
static
Eliminates references to local variables in *TP out of the single
entry single exit region starting at DTA->ENTRY.
DECL_ADDRESS contains addresses of the references that had their
address taken already.  If the expression is changed, CHANGED is
set to true.  Callback for walk_tree.   

References build_pointer_type(), build_simple_mem_ref, DECL_EXTERNAL, DECL_P, EXPR_P, get_base_address(), ggc_alloc(), is_gimple_val(), NULL, NULL_TREE, SSA_VAR_P, take_address_of(), TREE_CODE, TREE_OPERAND, TREE_TYPE, and type().

Referenced by eliminate_local_variables_stmt().

◆ eliminate_local_variables_stmt()

static void eliminate_local_variables_stmt ( edge entry,
gimple_stmt_iterator * gsi,
int_tree_htab_type * decl_address )
static
Moves the references to local variables in STMT at *GSI out of the single
entry single exit region starting at ENTRY.  DECL_ADDRESS contains
addresses of the references that had their address taken
already.   

References elv_data::decl_address, eliminate_local_variables_1(), elv_data::entry, ggc_alloc(), gimple_build_nop(), gimple_clobber_p(), gimple_debug_bind_get_value_ptr(), gimple_debug_bind_p(), gimple_debug_bind_reset_value(), elv_data::gsi, gsi_replace(), gsi_stmt(), NULL, unlink_stmt_vdef(), update_stmt(), walk_gimple_op(), and walk_tree.

Referenced by eliminate_local_variables().

◆ expr_invariant_in_region_p()

static bool expr_invariant_in_region_p ( edge entry,
edge exit,
tree expr )
static
Returns true if expression EXPR is not defined between ENTRY and
EXIT, i.e. if all its operands are defined outside of the region.   

References CDI_DOMINATORS, dominated_by_p(), elv_data::entry, ggc_alloc(), gimple_bb(), is_gimple_min_invariant(), SSA_NAME_DEF_STMT, and TREE_CODE.

Referenced by separate_decls_in_region_stmt().

◆ find_reduc_addr()

static tree find_reduc_addr ( class loop * loop,
gphi * phi )
static
For PHI in loop header of LOOP, look for pattern:

<bb preheader>
.omp_data_i = &.omp_data_arr;
addr = .omp_data_i->sum;
sum_a = *addr;

<bb header>:
sum_b = PHI <sum_a (preheader), sum_c (latch)>

and return addr.  Otherwise, return NULL_TREE.   

References get_omp_data_i_param(), ggc_alloc(), gimple_assign_rhs1(), gimple_assign_single_p(), loop_preheader_edge(), NULL_TREE, PHI_ARG_DEF_FROM_EDGE, SSA_NAME_DEF_STMT, TREE_CODE, and TREE_OPERAND.

Referenced by try_create_reduction_list().

◆ gather_scalar_reductions()

◆ gen_parallel_loop()

static void gen_parallel_loop ( class loop * loop,
reduction_info_table_type * reduction_list,
unsigned n_threads,
class tree_niter_desc * niter,
bool oacc_kernels_p )
static

◆ get_omp_data_i_param()

static tree get_omp_data_i_param ( void )
static
Return the default def of the first function argument.   

References cfun, DECL_ARGUMENTS, DECL_CHAIN, gcc_assert, NULL_TREE, and ssa_default_def().

Referenced by find_reduc_addr(), and oacc_entry_exit_ok_1().

◆ initialize_reductions()

int initialize_reductions ( reduction_info ** slot,
class loop * loop )
Callback for htab_traverse.  Create the initialization statement
for reduction described in SLOT, and place it at the preheader of
the loop described in DATA.   

References ggc_alloc(), gimple_location(), reduction_info::init, loop_preheader_edge(), omp_reduction_init_op(), PHI_ARG_DEF_FROM_EDGE, PHI_ARG_DEF_PTR_FROM_EDGE, PHI_RESULT, SET_USE, TREE_TYPE, and type().

Referenced by gen_parallel_loop().

◆ lambda_matrix_vector_mult()

static void lambda_matrix_vector_mult ( lambda_matrix matrix,
int m,
int n,
lambda_vector vec,
lambda_vector dest )
static
Multiply a vector VEC by a matrix MAT.
MAT is an M*N matrix, and VEC is a vector with length N.  The result
is stored in DEST which must be a vector of length M.   

References ggc_alloc(), i, and lambda_vector_clear().

Referenced by lambda_transform_legal_p().

◆ lambda_trans_matrix_new()

static lambda_trans_matrix lambda_trans_matrix_new ( int colsize,
int rowsize,
struct obstack * lambda_obstack )
static
Allocate a new transformation matrix.   

References ggc_alloc(), lambda_matrix_new(), LTM_COLSIZE, LTM_DENOMINATOR, LTM_MATRIX, and LTM_ROWSIZE.

Referenced by loop_parallel_p().

◆ lambda_transform_legal_p()

static bool lambda_transform_legal_p ( lambda_trans_matrix trans,
int nb_loops,
vec< ddr_p > dependence_relations )
static
Return true if TRANS is a legal transformation matrix that respects
the dependence vectors in DISTS and DIRS.  The conservative answer
is false.

"Wolfe proves that a unimodular transformation represented by the
matrix T is legal when applied to a loop nest with a set of
lexicographically non-negative distance vectors RDG if and only if
for each vector d in RDG, (T.d >= 0) is lexicographically positive.
i.e.: if and only if it transforms the lexicographically positive
distance vectors to lexicographically positive vectors.  Note that
a unimodular matrix must transform the zero vector (and only it) to
the zero vector." S.Muchnick.   

References chrec_dont_know, chrec_known, DDR_A, DDR_ARE_DEPENDENT, DDR_B, DDR_DIST_VECT, DDR_NUM_DIST_VECTS, DR_IS_READ, FOR_EACH_VEC_ELT, gcc_assert, ggc_alloc(), i, lambda_matrix_vector_mult(), lambda_vector_lexico_pos(), lambda_vector_new(), LTM_COLSIZE, LTM_MATRIX, LTM_ROWSIZE, and NULL.

Referenced by loop_parallel_p().

◆ loop_has_blocks_with_irreducible_flag()

static bool loop_has_blocks_with_irreducible_flag ( class loop * loop)
inlinestatic
Return true when LOOP contains basic blocks marked with the
BB_IRREDUCIBLE_LOOP flag.   

References end(), free(), get_loop_body_in_dom_order(), ggc_alloc(), i, and loop::num_nodes.

Referenced by parallelize_loops().

◆ loop_has_phi_with_address_arg()

static bool loop_has_phi_with_address_arg ( class loop * loop)
static

◆ loop_has_vector_phi_nodes()

static bool loop_has_vector_phi_nodes ( class loop * loop)
static

◆ loop_parallel_p()

static bool loop_parallel_p ( class loop * loop,
struct obstack * parloop_obstack )
static
Data dependency analysis. Returns true if the iterations of LOOP
are independent on each other (that is, if we can execute them
in parallel).   

References compute_data_dependences_for_loop(), dump_data_dependence_relations(), dump_file, dump_flags, end(), free_data_refs(), free_dependence_relations(), ggc_alloc(), loop::inner, lambda_trans_matrix_new(), lambda_transform_legal_p(), data_dependence_relation::loop_nest, LTM_MATRIX, loop::num, and TDF_DETAILS.

Referenced by parallelize_loops().

◆ make_pass_parallelize_loops()

gimple_opt_pass * make_pass_parallelize_loops ( gcc::context * ctxt)

References ggc_alloc().

◆ num_phis()

static unsigned int num_phis ( basic_block bb,
bool count_virtual_p )
static
Return number of phis in bb.  If COUNT_VIRTUAL_P is false, don't count the
virtual phi.   

References ggc_alloc(), gsi_end_p(), gsi_next(), gsi_start_phis(), gphi_iterator::phi(), PHI_RESULT, and virtual_operand_p().

Referenced by gen_parallel_loop(), vect_find_reusable_accumulator(), and vect_transform_cycle_phi().

◆ oacc_entry_exit_ok()

static bool oacc_entry_exit_ok ( class loop * loop,
reduction_info_table_type * reduction_list )
static
Return true if the statements before and after the LOOP can be executed in
parallel with the function containing the loop.  Resolve conflicting stores
outside LOOP by guarding them such that only a single gang executes them.   

References BITMAP_ALLOC, bitmap_clear(), BITMAP_FREE, bitmap_set_bit, calculate_dominance_info(), CDI_DOMINATORS, cfun, changed, ENTRY_BLOCK_PTR_FOR_FN, free(), free_dominance_info(), get_all_dominated_blocks(), get_loop_body_in_dom_order(), ggc_alloc(), i, NULL, loop::num_nodes, oacc_entry_exit_ok_1(), and oacc_entry_exit_single_gang().

Referenced by parallelize_loops().

◆ oacc_entry_exit_ok_1()

◆ oacc_entry_exit_single_gang()

◆ parallelize_loops()

◆ parallelized_function_p()

bool parallelized_function_p ( tree fndecl)
Returns true if FN was created to run in parallel.   

References gcc_assert, cgraph_node::get(), NULL, and cgraph_node::parallelized_function.

Referenced by parallelize_loops().

◆ parloops_force_simple_reduction()

stmt_vec_info parloops_force_simple_reduction ( loop_vec_info loop_info,
stmt_vec_info phi_info,
bool * double_reduc,
bool need_wrapping_integral_overflow )
Wrapper around vect_is_simple_reduction, which will modify code
in-place if it enables detection of more reductions.  Arguments
as there.   

References ggc_alloc(), parloops_is_simple_reduction(), STMT_VINFO_REDUC_DEF, and STMT_VINFO_REDUC_TYPE.

Referenced by gather_scalar_reductions().

◆ parloops_is_simple_reduction()

static stmt_vec_info parloops_is_simple_reduction ( loop_vec_info loop_info,
stmt_vec_info phi_info,
bool * double_reduc,
bool need_wrapping_integral_overflow,
enum vect_reduction_type * v_reduc_type )
static
Function parloops_is_simple_reduction

  (1) Detect a cross-iteration def-use cycle that represents a simple
  reduction computation.  We look for the following pattern:

  loop_header:
    a1 = phi < a0, a2 >
    a3 = ...
    a2 = operation (a3, a1)

  or

  a3 = ...
  loop_header:
    a1 = phi < a0, a2 >
    a2 = operation (a3, a1)

  such that:
  1. operation is commutative and associative and it is safe to
     change the order of the computation
  2. no uses for a2 in the loop (a2 is used out of the loop)
  3. no uses of a1 in the loop besides the reduction operation
  4. no uses of a1 outside the loop.

  Conditions 1,4 are tested here.
  Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.

  (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
  nested cycles.

  (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
  reductions:

    a1 = phi < a0, a2 >
    inner loop (def of a3)
    a2 = phi < a3 >

  (4) Detect condition expressions, ie:
    for (int i = 0; i < N; i++)
      if (a[i] < val)
       ret_val = a[i];

References associative_tree_code(), check_reduction_path(), commutative_tree_code(), COMPARISON_CLASS_P, COND_REDUCTION, dump_enabled_p(), dump_printf(), dump_printf_loc(), flow_bb_inside_loop_p(), flow_loop_nested_p(), FOLD_LEFT_REDUCTION, FOR_EACH_IMM_USE_FAST, FOR_EACH_VEC_ELT, get_gimple_rhs_class(), ggc_alloc(), gimple_assign_lhs(), gimple_assign_rhs1(), gimple_assign_rhs1_ptr(), gimple_assign_rhs2(), gimple_assign_rhs2_ptr(), gimple_assign_rhs3(), gimple_assign_rhs3_ptr(), gimple_assign_rhs_code(), gimple_bb(), GIMPLE_BINARY_RHS, gimple_phi_num_args(), gimple_phi_result(), GIMPLE_TERNARY_RHS, has_zero_uses(), HONOR_NANS(), i, loop::inner, invert_tree_comparison(), is_gimple_assign(), is_gimple_debug(), loop_latch_edge(), LOOP_VINFO_LOOP, MSG_MISSED_OPTIMIZATION, MSG_NOTE, NULL, NULL_TREE, parloops_is_slp_reduction(), parloops_needs_fold_left_reduction_p(), parloops_valid_reduction_input_p(), PHI_ARG_DEF, PHI_ARG_DEF_FROM_EDGE, PHI_RESULT, report_ploop_op(), SSA_NAME_DEF_STMT, STMT_VINFO_DEF_TYPE, swap_ssa_operands(), tcc_comparison, TREE_CODE, TREE_CODE_CLASS, TREE_CODE_REDUCTION, TREE_OPERAND, TREE_SET_CODE, TREE_TYPE, type(), types_compatible_p(), USE_STMT, vect_double_reduction_def, and vect_location.

Referenced by parloops_force_simple_reduction().

◆ parloops_is_slp_reduction()

static bool parloops_is_slp_reduction ( loop_vec_info loop_info,
gimple * phi,
gimple * first_stmt )
static
Detect SLP reduction of the form:

#a1 = phi <a5, a0>
a2 = operation (a1)
a3 = operation (a2)
a4 = operation (a3)
a5 = operation (a4)

#a = phi <a5>

PHI is the reduction phi node (#a1 = phi <a5, a0> above)
FIRST_STMT is the first reduction stmt in the chain
(a2 = operation (a1)).

Return TRUE if a reduction chain was detected.   

References dump_enabled_p(), dump_printf_loc(), first_stmt(), flow_bb_inside_loop_p(), FOR_EACH_IMM_USE_FAST, ggc_alloc(), gimple_assign_lhs(), gimple_assign_rhs1(), gimple_assign_rhs1_ptr(), gimple_assign_rhs2(), gimple_assign_rhs2_ptr(), gimple_assign_rhs_code(), gimple_bb(), i, is_gimple_assign(), is_gimple_debug(), LOOP_VINFO_LOOP, LOOP_VINFO_REDUCTION_CHAINS, MSG_NOTE, NULL, parloops_valid_reduction_input_p(), PHI_RESULT, REDUC_GROUP_FIRST_ELEMENT, REDUC_GROUP_NEXT_ELEMENT, REDUC_GROUP_SIZE, swap_ssa_operands(), update_stmt(), USE_STMT, and vect_location.

Referenced by parloops_is_simple_reduction().

◆ parloops_needs_fold_left_reduction_p()

static bool parloops_needs_fold_left_reduction_p ( tree type,
tree_code code,
bool need_wrapping_integral_overflow )
static
Return true if we need an in-order reduction for operation CODE
on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
overflow must wrap.   

References ggc_alloc(), INTEGRAL_TYPE_P, operation_can_overflow(), operation_no_trapping_overflow(), SAT_FIXED_POINT_TYPE_P, SCALAR_FLOAT_TYPE_P, and TYPE_OVERFLOW_WRAPS.

Referenced by parloops_is_simple_reduction().

◆ parloops_valid_reduction_input_p()

static bool parloops_valid_reduction_input_p ( stmt_vec_info def_stmt_info)
static
DEF_STMT_INFO occurs in a loop that contains a potential reduction
operation.  Return true if the results of DEF_STMT_INFO are something
that can be accumulated by such a reduction.   

References ggc_alloc(), gimple_bb(), is_gimple_assign(), is_gimple_call(), is_loop_header_bb_p(), STMT_VINFO_DEF_TYPE, vect_induction_def, and vect_internal_def.

Referenced by parloops_is_simple_reduction(), and parloops_is_slp_reduction().

◆ reduc_stmt_res()

◆ reduction_phi()

◆ ref_conflicts_with_region()

static bool ref_conflicts_with_region ( gimple_stmt_iterator gsi,
ao_ref * ref,
bool ref_is_store,
vec< basic_block > region_bbs,
unsigned int i,
gimple * skip_stmt )
static
Return true if memory ref REF (corresponding to the stmt at GSI in
REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
or the statements in REGIONS_BB[I + n].  REF_IS_STORE indicates if REF is a
store.  Ignore conflicts with SKIP_STMT.   

References dump_file, ggc_alloc(), gimple_vdef(), gimple_vuse(), gsi_end_p(), gsi_next(), gsi_start_bb(), gsi_stmt(), i, print_gimple_stmt(), ref_maybe_used_by_stmt_p(), and stmt_may_clobber_ref_p_1().

Referenced by oacc_entry_exit_ok_1().

◆ replace_uses_in_bb_by()

static void replace_uses_in_bb_by ( tree name,
tree val,
basic_block bb )
static
Replace uses of NAME by VAL in block BB.   

References FOR_EACH_IMM_USE_ON_STMT, FOR_EACH_IMM_USE_STMT, ggc_alloc(), gimple_bb(), and SET_USE.

Referenced by transform_to_exit_first_loop_alt().

◆ report_ploop_op()

static void report_ploop_op ( dump_flags_t msg_type,
gimple * stmt,
const char * msg )
static
Loop autoparallelization.
   Copyright (C) 2006-2024 Free Software Foundation, Inc.
   Contributed by Sebastian Pop <pop@cri.ensmp.fr> 
   Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.   
This pass tries to distribute iterations of loops into several threads.
The implementation is straightforward -- for each loop we test whether its
iterations are independent, and if it is the case (and some additional
conditions regarding profitability and correctness are satisfied), we
add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
machinery do its job.

The most of the complexity is in bringing the code into shape expected
by the omp expanders:
-- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
   variable and that the exit test is at the start of the loop body
-- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
   variables by accesses through pointers, and breaking up ssa chains
   by storing the values incoming to the parallelized loop to a structure
   passed to the new function as an argument (something similar is done
   in omp gimplification, unfortunately only a small part of the code
   can be shared).

TODO:
-- if there are several parallelizable loops in a function, it may be
   possible to generate the threads just once (using synchronization to
   ensure that cross-loop dependences are obeyed).
-- handling of common reduction patterns for outer loops.  
 
More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC   
Error reporting helper for parloops_is_simple_reduction below.  GIMPLE
statement STMT is printed with a message MSG.  

References dump_printf_loc(), ggc_alloc(), msg, and vect_location.

Referenced by parloops_is_simple_reduction().

◆ separate_decls_in_region()

static void separate_decls_in_region ( edge entry,
edge exit,
reduction_info_table_type * reduction_list,
tree * arg_struct,
tree * new_arg_struct,
struct clsn_data * ld_st_data )
static
Moves all the variables used in LOOP and defined outside of it (including
the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
name) to a structure created for this purpose.  The code

while (1)
  {
    use (a);
    use (b);
  }

is transformed this way:

bb0:
old.a = a;
old.b = b;

bb1:
a' = new->a;
b' = new->b;
while (1)
  {
    use (a');
    use (b');
  }

`old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
pointer `new' is intentionally not initialized (the loop will be split to a
separate function later, and `new' will be initialized from its arguments).
LD_ST_DATA holds information about the shared data structure used to pass
information among the threads.  It is initialized here, and
gen_parallel_loop will pass it to create_call_for_reduction that
needs this information.  REDUCTION_LIST describes the reductions
in LOOP.   

References add_field_for_name(), add_field_for_reduction(), build_decl(), build_pointer_type(), create_final_loads_for_reduction(), create_loads_and_stores_for_name(), create_stores_for_reduction(), create_tmp_var, create_tmp_var_name(), FOR_EACH_VEC_ELT, gather_blocks_in_sese_region(), ggc_alloc(), gsi_end_p(), gsi_next(), gsi_remove(), gsi_start_bb(), gsi_start_phis(), gsi_stmt(), i, is_gimple_debug(), layout_type(), clsn_data::load, clsn_data::load_bb, make_ssa_name(), lang_hooks_for_types::make_type, NULL, separate_decls_in_region_debug(), separate_decls_in_region_stmt(), single_pred(), single_succ_edge(), split_edge(), clsn_data::store, type(), TYPE_NAME, lang_hooks::types, and UNKNOWN_LOCATION.

Referenced by gen_parallel_loop().

◆ separate_decls_in_region_debug()

static bool separate_decls_in_region_debug ( gimple * stmt,
name_to_copy_table_type * name_copies,
int_tree_htab_type * decl_copies )
static
Finds the ssa names used in STMT that are defined outside the
region between ENTRY and EXIT and replaces such ssa names with
their duplicates.  The duplicates are stored to NAME_COPIES.  Base
decls of all ssa names used in STMT (including those defined in
LOOP) are replaced with the new temporary variables; the
replacement decls are stored in DECL_COPIES.   

References DECL_P, DECL_UID, FOR_EACH_PHI_OR_STMT_USE, gcc_assert, ggc_alloc(), gimple_debug_bind_get_var(), gimple_debug_bind_p(), gimple_debug_bind_reset_value(), gimple_debug_bind_set_var(), gimple_debug_source_bind_get_var(), gimple_debug_source_bind_p(), gimple_debug_source_bind_set_var(), SET_USE, SSA_NAME_VERSION, SSA_OP_USE, SSA_VAR_P, TREE_CODE, update_stmt(), USE_FROM_PTR, and name_to_copy_elt::version.

Referenced by separate_decls_in_region().

◆ separate_decls_in_region_name()

static tree separate_decls_in_region_name ( tree name,
name_to_copy_table_type * name_copies,
int_tree_htab_type * decl_copies,
bool copy_name_p )
static
If COPY_NAME_P is true, creates and returns a duplicate of NAME.
The copies are stored to NAME_COPIES, if NAME was already duplicated,
its duplicate stored in NAME_COPIES is returned.

Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
duplicated, storing the copies in DECL_COPIES.   

References create_tmp_var, DECL_NOT_GIMPLE_REG_P, DECL_UID, duplicate_ssa_name(), name_to_copy_elt::field, gcc_assert, get_name(), ggc_alloc(), name_to_copy_elt::new_name, NULL, NULL_TREE, replace_ssa_name_symbol(), SSA_NAME_VAR, SSA_NAME_VERSION, TREE_CODE, TREE_TYPE, and name_to_copy_elt::version.

Referenced by separate_decls_in_region_stmt().

◆ separate_decls_in_region_stmt()

static void separate_decls_in_region_stmt ( edge entry,
edge exit,
gimple * stmt,
name_to_copy_table_type * name_copies,
int_tree_htab_type * decl_copies )
static
Finds the ssa names used in STMT that are defined outside the
region between ENTRY and EXIT and replaces such ssa names with
their duplicates.  The duplicates are stored to NAME_COPIES.  Base
decls of all ssa names used in STMT (including those defined in
LOOP) are replaced with the new temporary variables; the
replacement decls are stored in DECL_COPIES.   

References DEF_FROM_PTR, expr_invariant_in_region_p(), FOR_EACH_PHI_OR_STMT_DEF, FOR_EACH_PHI_OR_STMT_USE, gcc_assert, ggc_alloc(), separate_decls_in_region_name(), SET_USE, SSA_OP_DEF, SSA_OP_USE, TREE_CODE, and USE_FROM_PTR.

Referenced by separate_decls_in_region().

◆ set_reduc_phi_uids()

int set_reduc_phi_uids ( reduction_info ** slot,
void * data )
Callback for htab_traverse.  Sets gimple_uid of reduc_phi stmts.   

References ggc_alloc(), and gimple_set_uid().

Referenced by gather_scalar_reductions().

◆ take_address_of()

static tree take_address_of ( tree obj,
tree type,
edge entry,
int_tree_htab_type * decl_address,
gimple_stmt_iterator * gsi )
static
Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
to their addresses that can be reused.  The address of OBJ is known to
be invariant in the whole function.  Other needed statements are placed
right before GSI.   

References build_addr(), build_fold_addr_expr, build_fold_addr_expr_with_type, build_simple_mem_ref, DECL_P, DECL_UID, hash_table< Descriptor, Lazy, Allocator >::find_slot(), fold_convert, force_gimple_operand(), get_name(), ggc_alloc(), gimple_build_assign(), gimple_seq_empty_p(), gsi_insert_on_edge_immediate(), gsi_insert_seq_before(), GSI_SAME_STMT, handled_component_p(), make_ssa_name(), make_temp_ssa_name(), NULL, NULL_TREE, TREE_OPERAND, TREE_TYPE, int_tree_map::uid, unshare_expr(), and useless_type_conversion_p().

Referenced by eliminate_local_variables_1().

◆ transform_to_exit_first_loop()

◆ transform_to_exit_first_loop_alt()

static void transform_to_exit_first_loop_alt ( class loop * loop,
reduction_info_table_type * reduction_list,
tree bound )
static
Do transformation from:

     <bb preheader>:
     ...
     goto <bb header>

     <bb header>:
     ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
     sum_a = PHI <sum_init (preheader), sum_b (latch)>
     ...
     use (ivtmp_a)
     ...
     sum_b = sum_a + sum_update
     ...
     if (ivtmp_a < n)
       goto <bb latch>;
     else
       goto <bb exit>;

     <bb latch>:
     ivtmp_b = ivtmp_a + 1;
     goto <bb header>

     <bb exit>:
     sum_z = PHI <sum_b (cond[1]), ...>

     [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
         that's <bb header>.

   to:

     <bb preheader>:
     ...
     goto <bb newheader>

     <bb header>:
     ivtmp_a = PHI <ivtmp_c (latch)>
     sum_a = PHI <sum_c (latch)>
     ...
     use (ivtmp_a)
     ...
     sum_b = sum_a + sum_update
     ...
     goto <bb latch>;

     <bb newheader>:
     ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
     sum_c = PHI <sum_init (preheader), sum_b (latch)>
     if (ivtmp_c < n + 1)
       goto <bb header>;
     else
       goto <bb newexit>;

     <bb latch>:
     ivtmp_b = ivtmp_a + 1;
     goto <bb newheader>

     <bb newexit>:
     sum_y = PHI <sum_c (newheader)>

     <bb exit>:
     sum_z = PHI <sum_y (newexit), ...>


   In unified diff format:

      <bb preheader>:
      ...
-     goto <bb header>
+     goto <bb newheader>

      <bb header>:
-     ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
-     sum_a = PHI <sum_init (preheader), sum_b (latch)>
+     ivtmp_a = PHI <ivtmp_c (latch)>
+     sum_a = PHI <sum_c (latch)>
      ...
      use (ivtmp_a)
      ...
      sum_b = sum_a + sum_update
      ...
-     if (ivtmp_a < n)
-       goto <bb latch>;
+     goto <bb latch>;
+
+     <bb newheader>:
+     ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
+     sum_c = PHI <sum_init (preheader), sum_b (latch)>
+     if (ivtmp_c < n + 1)
+       goto <bb header>;
      else
        goto <bb exit>;

      <bb latch>:
      ivtmp_b = ivtmp_a + 1;
-     goto <bb header>
+     goto <bb newheader>

+    <bb newexit>:
+    sum_y = PHI <sum_c (newheader)>

      <bb exit>:
-     sum_z = PHI <sum_b (cond[1]), ...>
+     sum_z = PHI <sum_y (newexit), ...>

   Note: the example does not show any virtual phis, but these are handled more
   or less as reductions.


   Moves the exit condition of LOOP to the beginning of its header.
   REDUCTION_LIST describes the reductions in LOOP.  BOUND is the new loop
   bound.   

References add_phi_arg(), calculate_dominance_info(), CDI_DOMINATORS, copy_ssa_name(), create_phi_node(), flush_pending_stmts(), free_dominance_info(), gcc_assert, ggc_alloc(), gimple_cond_lhs(), gimple_cond_set_rhs(), gimple_set_uid(), gsi_end_p(), gsi_last_bb(), gsi_next(), gsi_start_phis(), loop::header, i, loop::latch, loop_preheader_edge(), NULL, gphi_iterator::phi(), PHI_ARG_DEF_FROM_EDGE, PHI_RESULT, redirect_edge_and_branch(), redirect_edge_var_map_def(), redirect_edge_var_map_vector(), reduction_info::reduc_phi, reduction_phi(), replace_uses_in_bb_by(), single_dom_exit(), single_pred_edge(), single_pred_p(), single_succ_edge(), split_block_before_cond_jump(), split_edge(), SSA_NAME_DEF_STMT, UNKNOWN_LOCATION, update_stmt(), and virtual_operand_p().

Referenced by try_transform_to_exit_first_loop_alt().

◆ try_create_reduction_list()

◆ try_get_loop_niter()

static bool try_get_loop_niter ( loop_p loop,
class tree_niter_desc * niter )
static
Try to initialize NITER for code generation part.   

References dump_file, dump_flags, gcc_assert, ggc_alloc(), number_of_iterations_exit(), single_dom_exit(), and TDF_DETAILS.

Referenced by parallelize_loops().

◆ try_transform_to_exit_first_loop_alt()

◆ valid_reduction_p()

static bool valid_reduction_p ( stmt_vec_info stmt_info)
static
Return true if the type of reduction performed by STMT_INFO is suitable
for this pass.   

References FOLD_LEFT_REDUCTION, and STMT_VINFO_REDUC_TYPE.

Referenced by gather_scalar_reductions().