GCC Middle and Back End API Reference
omp-oacc-kernels-decompose.cc File Reference
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "tree.h"
#include "langhooks.h"
#include "gimple.h"
#include "tree-pass.h"
#include "cgraph.h"
#include "fold-const.h"
#include "gimplify.h"
#include "gimple-iterator.h"
#include "gimple-walk.h"
#include "gomp-constants.h"
#include "omp-general.h"
#include "diagnostic-core.h"
Include dependency graph for omp-oacc-kernels-decompose.cc:

Data Structures

struct  adjust_nested_loop_clauses_wi_info
 
class  control_flow_regions
 

Functions

static gimpletop_level_omp_for_in_stmt (gimple *stmt)
 
static tree adjust_region_code_walk_stmt_fn (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, struct walk_stmt_info *wi)
 
static void adjust_region_code (gimple_seq gs, int *region_code)
 
static tree visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, struct walk_stmt_info *)
 
static void make_loops_gang_single (gimple_seq gs)
 
static gimplemake_region_seq (location_t loc, gimple_seq stmts, tree num_gangs_clause, tree num_workers_clause, tree vector_length_clause, tree clauses)
 
static tree add_parent_or_loop_num_clause (tree parent_clause, tree loop_clause, omp_clause_code clause_code, tree clauses)
 
static tree adjust_nested_loop_clauses (gimple_stmt_iterator *gsi_p, bool *, struct walk_stmt_info *wi)
 
static tree transform_kernels_loop_clauses (gimple *omp_for, tree num_gangs_clause, tree num_workers_clause, tree vector_length_clause, tree clauses)
 
static gimplemake_region_loop_nest (gimple *omp_for, gimple_seq stmts, tree num_gangs_clause, tree num_workers_clause, tree vector_length_clause, tree clauses)
 
static tree flatten_binds (gbind *bind, bool include_toplevel_vars=false)
 
static gimplemake_data_region_try_statement (location_t loc, gimple *body)
 
static gimplemaybe_build_inner_data_region (location_t loc, gimple *body, tree inner_bind_vars, gimple *inner_cleanup)
 
static void add_wait (location_t loc, gimple_seq *region_body)
 
static void add_async_clauses_and_wait (location_t loc, gimple_seq *region_body)
 
static gimpledecompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses)
 
static gimpleomp_oacc_kernels_decompose_1 (gimple *kernels_stmt)
 
static tree omp_oacc_kernels_decompose_callback_stmt (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, struct walk_stmt_info *)
 
static unsigned int omp_oacc_kernels_decompose (void)
 
gimple_opt_passmake_pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
 

Function Documentation

◆ add_async_clauses_and_wait()

static void add_async_clauses_and_wait ( location_t loc,
gimple_seq * region_body )
static
Helper function of decompose_kernels_region_body.  The statements in
REGION_BODY are expected to be decomposed parts; add an 'async' clause to
each.  Also add a 'wait' directive at the end of the sequence.   

References add_wait(), as_a(), build_int_cst(), build_omp_clause(), gimple_omp_target_clauses(), gimple_omp_target_set_clauses(), gsi_end_p(), gsi_next(), gsi_start(), gsi_stmt(), integer_type_node, OMP_CLAUSE_ASYNC, OMP_CLAUSE_CHAIN, and OMP_CLAUSE_OPERAND.

Referenced by decompose_kernels_region_body().

◆ add_parent_or_loop_num_clause()

static tree add_parent_or_loop_num_clause ( tree parent_clause,
tree loop_clause,
omp_clause_code clause_code,
tree clauses )
static
Helper function for make_region_loop_nest.  Adds a 'num_gangs'
('num_workers', 'vector_length') clause to the given CLAUSES, either the one
from the parent compute construct (PARENT_CLAUSE) or a new one based on the
loop's own LOOP_CLAUSE ('gang (num: N)' or similar for 'worker' or 'vector'
clauses) with the given CLAUSE_CODE.  Does nothing if neither PARENT_CLAUSE
nor LOOP_CLAUSE exist.  Returns the new clauses.   

References build_omp_clause(), NULL, OMP_CLAUSE_CHAIN, OMP_CLAUSE_LOCATION, OMP_CLAUSE_OPERAND, and unshare_expr().

Referenced by make_region_loop_nest(), and transform_kernels_loop_clauses().

◆ add_wait()

◆ adjust_nested_loop_clauses()

◆ adjust_region_code()

static void adjust_region_code ( gimple_seq gs,
int * region_code )
static
Adjust the REGION_CODE for the region in GS.   

References adjust_region_code_walk_stmt_fn(), NULL, and walk_gimple_seq().

Referenced by make_region_loop_nest(), and make_region_seq().

◆ adjust_region_code_walk_stmt_fn()

static tree adjust_region_code_walk_stmt_fn ( gimple_stmt_iterator * gsi_p,
bool * handled_ops_p,
struct walk_stmt_info * wi )
static

◆ decompose_kernels_region_body()

◆ flatten_binds()

static tree flatten_binds ( gbind * bind,
bool include_toplevel_vars = false )
static
Eliminate any binds directly inside BIND by adding their statements to
BIND (i.e., modifying it in place), excluding binds that hold only an
OMP_FOR loop and associated setup/cleanup code.  Recurse into binds but
not other statements.  Return a chain of the local variables of eliminated
binds, i.e., the local variables found in nested binds.  If
INCLUDE_TOPLEVEL_VARS is true, this also includes the variables belonging
to BIND itself.  

References as_a(), flatten_binds(), gcc_assert, gimple_bind_body(), gimple_bind_set_body(), gimple_bind_vars(), gimple_seq_add_seq(), gimple_seq_add_stmt(), gsi_end_p(), gsi_next(), gsi_start(), gsi_stmt(), NULL, top_level_omp_for_in_stmt(), and TREE_CHAIN.

Referenced by decompose_kernels_region_body(), and flatten_binds().

◆ make_data_region_try_statement()

static gimple * make_data_region_try_statement ( location_t loc,
gimple * body )
static
Helper function for places where we construct data regions.  Wraps the BODY
inside a try-finally construct at LOC that calls __builtin_GOACC_data_end
in its cleanup block.  Returns this try statement.   

References builtin_decl_explicit(), gimple_build_call(), gimple_build_try(), gimple_seq_add_stmt(), gimple_set_location(), GIMPLE_TRY_FINALLY, and NULL.

Referenced by maybe_build_inner_data_region(), and omp_oacc_kernels_decompose_1().

◆ make_loops_gang_single()

static void make_loops_gang_single ( gimple_seq gs)
static
Visit all nested OpenACC loops in the sequence indicated by GS.  This
statement is expected to be inside a gang-single region.  Issue a warning
for any loops inside it that have gang clauses and remove the clauses.   

References NULL, visit_loops_in_gang_single_region(), and walk_gimple_seq().

Referenced by make_region_seq().

◆ make_pass_omp_oacc_kernels_decompose()

gimple_opt_pass * make_pass_omp_oacc_kernels_decompose ( gcc::context * ctxt)

◆ make_region_loop_nest()

static gimple * make_region_loop_nest ( gimple * omp_for,
gimple_seq stmts,
tree num_gangs_clause,
tree num_workers_clause,
tree vector_length_clause,
tree clauses )
static
Construct a possibly gang-parallel compute construct containing the STMT,
which must be identical to, or a bind containing, the loop OMP_FOR.

The NUM_GANGS_CLAUSE, NUM_WORKERS_CLAUSE, and VECTOR_LENGTH_CLAUSE are
optional clauses from the original kernels region and must not be contained
in the other CLAUSES. The newly created compute construct is annotated with
the optional NUM_GANGS_CLAUSE as well as the other CLAUSES.  If there is no
NUM_GANGS_CLAUSE but the loop has a 'gang (num: N)' clause, that is
converted to a 'num_gangs (N)' clause on the new compute construct, and
similarly for 'worker' and 'vector' clauses.

The outermost loop gets an 'auto' clause unless there already is an
'seq'/'independent'/'auto' clause.  Nested loops inside OMP_FOR are treated
similarly by the adjust_nested_loop_clauses function.   

References add_parent_or_loop_num_clause(), adjust_region_code(), dump_enabled_p(), dump_printf_loc(), gcc_unreachable, GF_OMP_TARGET_KIND_OACC_KERNELS, GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED, gimple_build_bind(), gimple_build_omp_target(), gimple_location(), gimple_set_location(), make_node(), MSG_NOTE, NULL, OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, OMP_CLAUSE_VECTOR_LENGTH, transform_kernels_loop_clauses(), and unshare_expr().

Referenced by decompose_kernels_region_body().

◆ make_region_seq()

static gimple * make_region_seq ( location_t loc,
gimple_seq stmts,
tree num_gangs_clause,
tree num_workers_clause,
tree vector_length_clause,
tree clauses )
static
Construct a "gang-single" compute construct at LOC containing the STMTS.
Annotate with CLAUSES, which must not contain a 'num_gangs' clause, and an
additional 'num_gangs (1)' clause to force "gang-single" execution.   

References adjust_region_code(), build_omp_clause(), dump_enabled_p(), dump_printf_loc(), gcc_unreachable, GF_OMP_TARGET_KIND_OACC_KERNELS, GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE, gimple_build_bind(), gimple_build_omp_target(), gimple_omp_set_body(), gimple_seq_first(), gimple_set_location(), integer_one_node, make_loops_gang_single(), make_node(), MSG_NOTE, NULL, OMP_CLAUSE_CHAIN, OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_OPERAND, and unshare_expr().

Referenced by decompose_kernels_region_body().

◆ maybe_build_inner_data_region()

static gimple * maybe_build_inner_data_region ( location_t loc,
gimple * body,
tree inner_bind_vars,
gimple * inner_cleanup )
static
If INNER_BIND_VARS holds variables, build an OpenACC data region with
location LOC containing BODY and having 'create (var)' clauses for each
variable (as a side effect, such variables also get TREE_ADDRESSABLE set).
If INNER_CLEANUP is present, add a try-finally statement with
this cleanup code in the finally block.  Return the new data region, or
the original BODY if no data region was needed.   

References build_omp_clause(), current_function_decl, DECL_ARTIFICIAL, DECL_SIZE_UNIT, lang_hooks::decls, dump_enabled_p(), dump_printf_loc(), dump_user_location_t::from_location_t(), gcc_assert, lang_hooks_for_decls::get_generic_function_decl, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS, gimple_build_bind(), gimple_build_omp_target(), gimple_build_try(), gimple_omp_set_body(), gimple_set_location(), GIMPLE_TRY_FINALLY, make_data_region_try_statement(), make_node(), MSG_NOTE, NULL, OMP_CLAUSE_CHAIN, OMP_CLAUSE_DECL, OMP_CLAUSE_MAP, OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE, OMP_CLAUSE_SET_MAP_KIND, OMP_CLAUSE_SIZE, TREE_ADDRESSABLE, TREE_CHAIN, and TREE_CODE.

Referenced by decompose_kernels_region_body().

◆ omp_oacc_kernels_decompose()

static unsigned int omp_oacc_kernels_decompose ( void )
static

◆ omp_oacc_kernels_decompose_1()

◆ omp_oacc_kernels_decompose_callback_stmt()

static tree omp_oacc_kernels_decompose_callback_stmt ( gimple_stmt_iterator * gsi_p,
bool * handled_ops_p,
struct walk_stmt_info *  )
static
Decompose OpenACC 'kernels' constructs in the current function.   

References GF_OMP_TARGET_KIND_OACC_KERNELS, gimple_omp_target_kind(), gsi_replace(), gsi_stmt(), NULL, and omp_oacc_kernels_decompose_1().

Referenced by omp_oacc_kernels_decompose().

◆ top_level_omp_for_in_stmt()

static gimple * top_level_omp_for_in_stmt ( gimple * stmt)
static
Decompose OpenACC 'kernels' constructs into parts, a sequence of compute
   constructs

   Copyright (C) 2020-2024 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.   
This preprocessing pass is run immediately before lower_omp.  It decomposes
  OpenACC 'kernels' constructs into parts, a sequence of compute constructs.

  The translation is as follows:
    - The entire 'kernels' region is turned into a 'data' region with clauses
      taken from the 'kernels' region.  New 'create' clauses are added for all
      variables declared at the top level in the kernels region.
    - Any loop nests annotated with an OpenACC 'loop' directive are wrapped in
      a new compute construct.
        - 'loop' directives without an explicit 'independent' or 'seq' clause
          get an 'auto' clause added; other clauses are preserved on the loop
          or moved to the new surrounding compute construct, as applicable.
    - Any sequences of other code (non-loops, non-OpenACC 'loop's) are wrapped
      in new "gang-single" compute construct: 'worker'/'vector' parallelism is
      preserved, but 'num_gangs (1)' is enforced.
    - Both points above only apply at the topmost level in the region, that
      is, the transformation does not introduce new compute constructs inside
      nested statement bodies.  In particular, this means that a
      gang-parallelizable loop inside an 'if' statement is made "gang-single".
    - In order to make the host wait only once for the whole region instead
      of once per device kernel launch, the new compute constructs are
      annotated 'async'.  Unless the original 'kernels' construct already was
      marked 'async', the entire region ends with a 'wait' directive.  If the
      original 'kernels' construct was marked 'async', the synthesized 'async'
      clauses use the original 'kernels' construct's 'async' argument
      (possibly implicit).
Helper function for decompose_kernels_region_body.  If STMT contains a
"top-level" OMP_FOR statement, returns a pointer to that statement;
returns NULL otherwise.

A "top-level" OMP_FOR statement is one that is possibly accompanied by
small snippets of setup code.  Specifically, this function accepts an
OMP_FOR possibly wrapped in a singleton bind and a singleton try
statement to allow for a local loop variable, but not an OMP_FOR
statement nested in any other constructs.  Alternatively, it accepts a
non-singleton bind containing only assignments and then an OMP_FOR
statement at the very end.  The former style can be generated by the C
frontend, the latter by the Fortran frontend.   

References as_a(), gimple_bind_body(), gimple_seq_first_stmt(), gimple_seq_singleton_p(), gimple_try_eval(), gsi_end_p(), gsi_next(), gsi_one_before_end_p(), gsi_start(), gsi_stmt(), and NULL.

Referenced by control_flow_regions::compute_regions(), decompose_kernels_region_body(), and flatten_binds().

◆ transform_kernels_loop_clauses()

◆ visit_loops_in_gang_single_region()

static tree visit_loops_in_gang_single_region ( gimple_stmt_iterator * gsi_p,
bool * handled_ops_p,
struct walk_stmt_info *  )
static
Helper function for make_loops_gang_single for walking the tree.  If the
statement indicated by GSI_P is an OpenACC for loop with a gang clause,
issue a warning and remove the clause.   

References gcc_unreachable, gimple_location(), gimple_omp_for_clauses(), gimple_omp_for_set_clauses(), gsi_stmt(), internal_error(), NULL, OMP_CLAUSE_CHAIN, OMP_CLAUSE_CODE, OMP_CLAUSE_GANG, sorry(), walk_stmt_info::stmt, and warning_at().

Referenced by make_loops_gang_single().