GCC Middle and Back End API Reference
omp-oacc-neuter-broadcast.cc File Reference
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "rtl.h"
#include "tree.h"
#include "gimple.h"
#include "tree-pass.h"
#include "ssa.h"
#include "cgraph.h"
#include "pretty-print.h"
#include "fold-const.h"
#include "gimplify.h"
#include "gimple-iterator.h"
#include "gimple-walk.h"
#include "tree-inline.h"
#include "langhooks.h"
#include "omp-general.h"
#include "omp-low.h"
#include "gimple-pretty-print.h"
#include "cfghooks.h"
#include "insn-config.h"
#include "recog.h"
#include "internal-fn.h"
#include "bitmap.h"
#include "tree-nested.h"
#include "stor-layout.h"
#include "tree-ssa-threadupdate.h"
#include "tree-into-ssa.h"
#include "splay-tree.h"
#include "target.h"
#include "cfgloop.h"
#include "tree-cfg.h"
#include "omp-offload.h"
#include "attribs.h"
#include "targhooks.h"
#include "diagnostic-core.h"
Include dependency graph for omp-oacc-neuter-broadcast.cc:

Data Structures

struct  parallel_g
 
class  addr_range
 

Macros

#define GOMP_DIM_SEQ   GOMP_DIM_MAX
 

Typedefs

typedef hash_map< basic_block, gimple * > bb_stmt_map_t
 
typedef hash_map< tree, treefield_map_t
 
typedef hash_map< tree, field_map_trecord_field_map_t
 
typedef hash_set< treepropagation_set
 
typedef hash_map< basic_block, std::pair< unsigned HOST_WIDE_INT, bool > > blk_offset_map_t
 
typedef std::pair< int, treeidx_decl_pair_t
 
typedef auto_vec< splay_treeused_range_vec_t
 

Functions

static bool local_var_based_p (tree decl)
 
static bool omp_sese_active_worker_call (gcall *call)
 
static void omp_sese_split_blocks (bb_stmt_map_t *map)
 
static const charmask_name (unsigned mask)
 
static void omp_sese_dump_pars (parallel_g *par, unsigned depth)
 
static parallel_gomp_sese_find_par (bb_stmt_map_t *map, parallel_g *par, basic_block block)
 
static parallel_gomp_sese_discover_pars (bb_stmt_map_t *map)
 
static void populate_single_mode_bitmaps (parallel_g *par, bitmap worker_single, bitmap vector_single, unsigned outer_mask, int depth)
 
static void install_var_field (tree var, tree record_type, field_map_t *fields)
 
static void find_ssa_names_to_propagate (parallel_g *par, unsigned outer_mask, bitmap worker_single, bitmap vector_single, vec< propagation_set * > *prop_set)
 
static tree find_partitioned_var_uses_1 (tree *node, int *, void *data)
 
static void find_partitioned_var_uses (parallel_g *par, unsigned outer_mask, hash_set< tree > *partitioned_var_uses)
 
static void find_gang_private_vars (hash_set< tree > *gang_private_vars)
 
static void find_local_vars_to_propagate (parallel_g *par, unsigned outer_mask, hash_set< tree > *partitioned_var_uses, hash_set< tree > *gang_private_vars, bitmap writes_gang_private, vec< propagation_set * > *prop_set)
 
static void worker_single_simple (basic_block from, basic_block to, hash_set< tree > *def_escapes_block)
 
static tree build_receiver_ref (tree var, tree receiver_decl, field_map_t *fields)
 
static tree build_sender_ref (tree var, tree sender_decl, field_map_t *fields)
 
static int sort_by_ssa_version_or_uid (const void *p1, const void *p2)
 
static int sort_by_size_then_ssa_version_or_uid (const void *p1, const void *p2)
 
static void worker_single_copy (basic_block from, basic_block to, hash_set< tree > *def_escapes_block, hash_set< tree > *worker_partitioned_uses, tree record_type, record_field_map_t *record_field_map, unsigned HOST_WIDE_INT placement, bool isolate_broadcasts, bool has_gang_private_write)
 
static void neuter_worker_single (parallel_g *par, unsigned outer_mask, bitmap worker_single, bitmap vector_single, vec< propagation_set * > *prop_set, hash_set< tree > *partitioned_var_uses, record_field_map_t *record_field_map, blk_offset_map_t *blk_offset_map, bitmap writes_gang_private)
 
static void dfs_broadcast_reachable_1 (basic_block bb, sbitmap reachable)
 
static int sort_size_descending (const void *a, const void *b)
 
static int splay_tree_compare_addr_range (splay_tree_key a, splay_tree_key b)
 
static void splay_tree_free_key (splay_tree_key k)
 
static addr_range first_fit_range (splay_tree s, unsigned HOST_WIDE_INT size, unsigned HOST_WIDE_INT align, addr_range *bounds)
 
static int merge_ranges_1 (splay_tree_node n, void *ptr)
 
static void merge_ranges (splay_tree accum, splay_tree sp)
 
static void oacc_do_neutering (unsigned HOST_WIDE_INT bounds_lo, unsigned HOST_WIDE_INT bounds_hi)
 
static int execute_omp_oacc_neuter_broadcast ()
 
gimple_opt_passmake_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
 

Macro Definition Documentation

◆ GOMP_DIM_SEQ

#define GOMP_DIM_SEQ   GOMP_DIM_MAX

Typedef Documentation

◆ bb_stmt_map_t

Map of basic blocks to gimple stmts.   

◆ blk_offset_map_t

◆ field_map_t

A map from SSA names or var decls to record fields.   

◆ idx_decl_pair_t

typedef std::pair<int, tree> idx_decl_pair_t

◆ propagation_set

Sets of SSA_NAMES or VAR_DECLs to propagate.   

◆ record_field_map_t

For each propagation record type, this is a map from SSA names or var decls
to propagate, to the field in the record type that should be used for
transmission and reception.   

◆ used_range_vec_t

Function Documentation

◆ build_receiver_ref()

static tree build_receiver_ref ( tree var,
tree receiver_decl,
field_map_t * fields )
static

◆ build_sender_ref()

static tree build_sender_ref ( tree var,
tree sender_decl,
field_map_t * fields )
static

◆ dfs_broadcast_reachable_1()

◆ execute_omp_oacc_neuter_broadcast()

◆ find_gang_private_vars()

static void find_gang_private_vars ( hash_set< tree > * gang_private_vars)
static
Gang-private variables (typically placed in a GPU's shared memory) do not
need to be processed by the worker-propagation mechanism.  Populate the
GANG_PRIVATE_VARS set with any such variables found in the current
function.   

References cfun, FOR_EACH_BB_FN, gcc_assert, ggc_alloc(), gimple_call_arg(), gimple_call_internal_p(), gimple_call_num_args(), gsi_end_p(), gsi_next(), gsi_start_bb(), gsi_stmt(), i, TREE_CODE, TREE_INT_CST_LOW, and TREE_OPERAND.

Referenced by oacc_do_neutering().

◆ find_local_vars_to_propagate()

◆ find_partitioned_var_uses()

static void find_partitioned_var_uses ( parallel_g * par,
unsigned outer_mask,
hash_set< tree > * partitioned_var_uses )
static

◆ find_partitioned_var_uses_1()

static tree find_partitioned_var_uses_1 ( tree * node,
int * ,
void * data )
static
Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a
statement.   

References ggc_alloc(), NULL_TREE, and VAR_P.

Referenced by find_partitioned_var_uses().

◆ find_ssa_names_to_propagate()

◆ first_fit_range()

static addr_range first_fit_range ( splay_tree s,
unsigned HOST_WIDE_INT size,
unsigned HOST_WIDE_INT align,
addr_range * bounds )
static

References ggc_alloc().

Referenced by oacc_do_neutering().

◆ install_var_field()

◆ local_var_based_p()

static bool local_var_based_p ( tree decl)
static

◆ make_pass_omp_oacc_neuter_broadcast()

gimple_opt_pass * make_pass_omp_oacc_neuter_broadcast ( gcc::context * ctxt)

References ggc_alloc().

◆ mask_name()

static const char * mask_name ( unsigned mask)
static

◆ merge_ranges()

static void merge_ranges ( splay_tree accum,
splay_tree sp )
static

References ggc_alloc(), and merge_ranges_1().

Referenced by oacc_do_neutering().

◆ merge_ranges_1()

static int merge_ranges_1 ( splay_tree_node n,
void * ptr )
static

References ggc_alloc(), MAX, MIN, and splay_tree_node< T >::value().

Referenced by merge_ranges().

◆ neuter_worker_single()

◆ oacc_do_neutering()

◆ omp_sese_active_worker_call()

static bool omp_sese_active_worker_call ( gcall * call)
static
Calls to OpenACC routines are made by all workers/wavefronts/warps, since
the routine likely contains partitioned loops (else will do its own
neutering and variable propagation). Return TRUE if a function call CALL
should be made in (worker) single mode instead, rather than redundant
mode.   

References gimple_call_fndecl(), GOMP_DIM_SEQ, oacc_fn_attrib_level(), and oacc_get_fn_attrib().

Referenced by neuter_worker_single(), and omp_sese_split_blocks().

◆ omp_sese_discover_pars()

static parallel_g * omp_sese_discover_pars ( bb_stmt_map_t * map)
static
DFS walk the CFG looking for fork & join markers.  Construct
loop structures as we go.  MAP is a mapping of basic blocks
to head & tail markers, discovered when splitting blocks.  This
speeds up the discovery.  We rely on the BB visited flag having
been cleared when splitting blocks.   
Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_discover_pars'.   

References BB_VISITED, cfun, dump_file, ENTRY_BLOCK_PTR_FOR_FN, EXIT_BLOCK_PTR_FOR_FN, basic_block_def::flags, ggc_alloc(), map, omp_sese_dump_pars(), and omp_sese_find_par().

Referenced by oacc_do_neutering().

◆ omp_sese_dump_pars()

static void omp_sese_dump_pars ( parallel_g * par,
unsigned depth )
static
Dump this parallel and all its inner parallels.   
Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_dump_pars'.   

References dump_file, ggc_alloc(), basic_block_def::index, mask_name(), and omp_sese_dump_pars().

Referenced by omp_sese_discover_pars(), and omp_sese_dump_pars().

◆ omp_sese_find_par()

static parallel_g * omp_sese_find_par ( bb_stmt_map_t * map,
parallel_g * par,
basic_block block )
static
If BLOCK contains a fork/join marker, process it to create or
terminate a loop structure.  Add this block to the current loop,
and then walk successor blocks.    
Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_find_par'.   

References BB_VISITED, basic_block_def::flags, FOR_EACH_EDGE, gcc_assert, gcc_unreachable, ggc_alloc(), gimple_call_arg(), gimple_call_internal_p(), gimple_nop_p(), gsi_last_bb(), gsi_stmt(), is_gimple_assign(), map, omp_sese_find_par(), single_pred(), basic_block_def::succs, and TREE_INT_CST_LOW.

Referenced by omp_sese_discover_pars(), and omp_sese_find_par().

◆ omp_sese_split_blocks()

static void omp_sese_split_blocks ( bb_stmt_map_t * map)
static
Split basic blocks such that each forked and join unspecs are at
the start of their basic blocks.  Thus afterwards each block will
have a single partitioning mode.  We also do the same for return
insns, as they are executed by every thread.  Return the
partitioning mode of the function as a whole.  Populate MAP with
head and tail blocks.  We also clear the BB visited flag, which is
used when finding partitions.   
Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_split_blocks'.   

References boolean_false_node, boolean_type_node, cfun, basic_block_def::flags, FOR_ALL_BB_FN, gcc_assert, ggc_alloc(), gimple_assign_lhs(), gimple_bb(), gimple_build_assign(), gimple_build_cond(), gimple_build_nop(), gimple_call_arg(), gimple_call_internal_p(), gimple_cond_false_label(), gimple_cond_lhs(), gimple_cond_rhs(), gimple_cond_true_label(), gimple_expr_code(), gsi_end_p(), gsi_for_stmt(), gsi_insert_before(), gsi_next(), gsi_one_before_end_p(), gsi_prev(), gsi_replace(), GSI_SAME_STMT, gsi_start_bb(), gsi_stmt(), is_gimple_assign(), local_var_based_p(), make_ssa_name(), map, omp_sese_active_worker_call(), single_succ(), split_block(), TREE_CODE, TREE_INT_CST_LOW, TREE_OPERAND, and worklist.

Referenced by oacc_do_neutering().

◆ populate_single_mode_bitmaps()

static void populate_single_mode_bitmaps ( parallel_g * par,
bitmap worker_single,
bitmap vector_single,
unsigned outer_mask,
int depth )
static

◆ sort_by_size_then_ssa_version_or_uid()

static int sort_by_size_then_ssa_version_or_uid ( const void * p1,
const void * p2 )
static

◆ sort_by_ssa_version_or_uid()

static int sort_by_ssa_version_or_uid ( const void * p1,
const void * p2 )
static

◆ sort_size_descending()

static int sort_size_descending ( const void * a,
const void * b )
static

References a, b, ggc_alloc(), tree_to_uhwi(), and TYPE_SIZE_UNIT.

Referenced by oacc_do_neutering().

◆ splay_tree_compare_addr_range()

static int splay_tree_compare_addr_range ( splay_tree_key a,
splay_tree_key b )
static

References a, b, and ggc_alloc().

Referenced by oacc_do_neutering().

◆ splay_tree_free_key()

static void splay_tree_free_key ( splay_tree_key k)
static

References ggc_alloc().

Referenced by oacc_do_neutering().

◆ worker_single_copy()

◆ worker_single_simple()

static void worker_single_simple ( basic_block from,
basic_block to,
hash_set< tree > * def_escapes_block )
static
Transform basic blocks FROM, TO (which may be the same block) into:
  if (GOACC_single_start ())
    BLOCK;
  GOACC_barrier ();
                      \  |  /
                      +----+
                      |    |        (new) predicate block
                      +----+--
  \  |  /   \  |  /             |t    \
  +----+    +----+            +----+  |
  |     |    |    |     ===>  |    |  | f   (old) from block
  +----+    +----+            +----+  |
    |       t/  \f              |    /
                      +----+/
 (split  (split before       |    |        skip block
 at end)   condition)         +----+
                      t/  \f

References add_phi_arg(), boolean_true_node, build_zero_cst(), builtin_decl_explicit(), copy_ssa_name(), create_new_def_for(), create_phi_node(), create_tmp_var, EDGE_COUNT, fold_convert_loc(), FOR_EACH_SSA_TREE_OPERAND, gcc_assert, ggc_alloc(), gimple_build_assign(), gimple_build_call(), gimple_build_cond(), gimple_call_set_lhs(), gimple_phi_result_ptr(), gsi_after_labels(), GSI_CONTINUE_LINKING, gsi_end_p(), gsi_insert_after(), gsi_insert_before(), gsi_last_bb(), GSI_NEW_STMT, gsi_next(), gsi_prev(), gsi_start_bb(), gsi_stmt(), profile_probability::likely(), make_edge(), NULL, NULL_TREE, single_succ_edge(), split_block(), split_edge(), SSA_OP_DEF, basic_block_def::succs, TREE_TYPE, UNKNOWN_LOCATION, and update_stmt().

Referenced by neuter_worker_single().