LCOV - code coverage report
Current view: top level - gcc - omp-offload.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 76.2 % 1298 989
Test Date: 2026-02-28 14:20:25 Functions: 83.3 % 60 50
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Bits of OpenMP and OpenACC handling that is specific to device offloading
       2              :    and a lowering pass for OpenACC device directives.
       3              : 
       4              :    Copyright (C) 2005-2026 Free Software Foundation, Inc.
       5              : 
       6              : This file is part of GCC.
       7              : 
       8              : GCC is free software; you can redistribute it and/or modify it under
       9              : the terms of the GNU General Public License as published by the Free
      10              : Software Foundation; either version 3, or (at your option) any later
      11              : version.
      12              : 
      13              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      14              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16              : for more details.
      17              : 
      18              : You should have received a copy of the GNU General Public License
      19              : along with GCC; see the file COPYING3.  If not see
      20              : <http://www.gnu.org/licenses/>.  */
      21              : 
      22              : #include "config.h"
      23              : #include "system.h"
      24              : #include "coretypes.h"
      25              : #include "backend.h"
      26              : #include "target.h"
      27              : #include "tree.h"
      28              : #include "gimple.h"
      29              : #include "tree-pass.h"
      30              : #include "ssa.h"
      31              : #include "cgraph.h"
      32              : #include "pretty-print.h"
      33              : #include "diagnostic-core.h"
      34              : #include "fold-const.h"
      35              : #include "internal-fn.h"
      36              : #include "langhooks.h"
      37              : #include "gimplify.h"
      38              : #include "gimple-iterator.h"
      39              : #include "gimplify-me.h"
      40              : #include "gimple-walk.h"
      41              : #include "tree-cfg.h"
      42              : #include "tree-into-ssa.h"
      43              : #include "tree-nested.h"
      44              : #include "stor-layout.h"
      45              : #include "common/common-target.h"
      46              : #include "omp-general.h"
      47              : #include "omp-offload.h"
      48              : #include "lto-section-names.h"
      49              : #include "gomp-constants.h"
      50              : #include "gimple-pretty-print.h"
      51              : #include "intl.h"
      52              : #include "stringpool.h"
      53              : #include "attribs.h"
      54              : #include "cfgloop.h"
      55              : #include "context.h"
      56              : #include "convert.h"
      57              : #include "opts.h"
      58              : 
      59              : /* Describe the OpenACC looping structure of a function.  The entire
      60              :    function is held in a 'NULL' loop.  */
      61              : 
      62              : struct oacc_loop
      63              : {
      64              :   oacc_loop *parent; /* Containing loop.  */
      65              : 
      66              :   oacc_loop *child; /* First inner loop.  */
      67              : 
      68              :   oacc_loop *sibling; /* Next loop within same parent.  */
      69              : 
      70              :   location_t loc; /* Location of the loop start.  */
      71              : 
      72              :   gcall *marker; /* Initial head marker.  */
      73              : 
      74              :   gcall *heads[GOMP_DIM_MAX];  /* Head marker functions.  */
      75              :   gcall *tails[GOMP_DIM_MAX];  /* Tail marker functions.  */
      76              : 
      77              :   tree routine;  /* Pseudo-loop enclosing a routine.  */
      78              : 
      79              :   unsigned mask;   /* Partitioning mask.  */
      80              :   unsigned e_mask; /* Partitioning of element loops (when tiling).  */
      81              :   unsigned inner;  /* Partitioning of inner loops.  */
      82              :   unsigned flags;  /* Partitioning flags.  */
      83              :   vec<gcall *> ifns;  /* Contained loop abstraction functions.  */
      84              :   tree chunk_size; /* Chunk size.  */
      85              :   gcall *head_end; /* Final marker of head sequence.  */
      86              : };
      87              : 
      88              : /* Holds offload tables with decls.  */
      89              : vec<tree, va_gc> *offload_funcs, *offload_vars, *offload_ind_funcs;
      90              : 
      91              : /* Return level at which oacc routine may spawn a partitioned loop, or
      92              :    -1 if it is not a routine (i.e. is an offload fn).  */
      93              : 
      94              : int
      95        11139 : oacc_fn_attrib_level (tree attr)
      96              : {
      97        11139 :   tree pos = TREE_VALUE (attr);
      98              : 
      99        11139 :   if (!TREE_PURPOSE (pos))
     100              :     return -1;
     101              : 
     102              :   int ix = 0;
     103         5370 :   for (ix = 0; ix != GOMP_DIM_MAX;
     104         3615 :        ix++, pos = TREE_CHAIN (pos))
     105         4401 :     if (!integer_zerop (TREE_PURPOSE (pos)))
     106              :       break;
     107              : 
     108              :   return ix;
     109              : }
     110              : 
     111              : /* Helper function for omp_finish_file routine.  Takes decls from V_DECLS and
     112              :    adds their addresses and sizes to constructor-vector V_CTOR.  */
     113              : 
     114              : static void
     115           87 : add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
     116              :                                          vec<constructor_elt, va_gc> *v_ctor)
     117              : {
     118           87 :   unsigned len = vec_safe_length (v_decls);
     119          162 :   for (unsigned i = 0; i < len; i++)
     120              :     {
     121           75 :       tree it = (*v_decls)[i];
     122           75 :       bool is_var = VAR_P (it);
     123           75 :       bool is_link_var
     124              :         = is_var
     125              : #ifdef ACCEL_COMPILER
     126              :           && DECL_HAS_VALUE_EXPR_P (it)
     127              : #endif
     128           75 :           && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
     129              : 
     130              :       /* See also omp_finish_file and output_offload_tables in lto-cgraph.cc.  */
     131           75 :       if (!in_lto_p && !symtab_node::get (it))
     132            0 :         continue;
     133              : 
     134           75 :       tree size = NULL_TREE;
     135           75 :       if (is_var)
     136            0 :         size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
     137              : 
     138           75 :       tree addr;
     139           75 :       if (!is_link_var)
     140           75 :         addr = build_fold_addr_expr (it);
     141              :       else
     142              :         {
     143              : #ifdef ACCEL_COMPILER
     144              :           /* For "omp declare target link" vars add address of the pointer to
     145              :              the target table, instead of address of the var.  */
     146              :           tree value_expr = DECL_VALUE_EXPR (it);
     147              :           tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
     148              :           varpool_node::finalize_decl (link_ptr_decl);
     149              :           addr = build_fold_addr_expr (link_ptr_decl);
     150              : #else
     151            0 :           addr = build_fold_addr_expr (it);
     152              : #endif
     153              : 
     154              :           /* Most significant bit of the size marks "omp declare target link"
     155              :              vars in host and target tables.  */
     156            0 :           unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
     157            0 :           isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
     158            0 :                             * BITS_PER_UNIT - 1);
     159            0 :           size = wide_int_to_tree (const_ptr_type_node, isize);
     160              :         }
     161              : 
     162           75 :       CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
     163           75 :       if (is_var)
     164            0 :         CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
     165              :     }
     166           87 : }
     167              : 
     168              : /* Return true if DECL is a function for which its references should be
     169              :    analyzed.  */
     170              : 
     171              : static bool
     172       198040 : omp_declare_target_fn_p (tree decl)
     173              : {
     174       198040 :   return (TREE_CODE (decl) == FUNCTION_DECL
     175       198040 :           && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
     176        24335 :           && !lookup_attribute ("omp declare target host",
     177        24335 :                                 DECL_ATTRIBUTES (decl))
     178       222331 :           && (!flag_openacc
     179           45 :               || oacc_get_fn_attrib (decl) == NULL_TREE));
     180              : }
     181              : 
     182              : /* Return true if DECL Is a variable for which its initializer references
     183              :    should be analyzed.  */
     184              : 
     185              : static bool
     186       110381 : omp_declare_target_var_p (tree decl)
     187              : {
     188       110381 :   return (VAR_P (decl)
     189       110381 :           && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
     190       110866 :           && !lookup_attribute ("omp declare target link",
     191          485 :                                 DECL_ATTRIBUTES (decl)));
     192              : }
     193              : 
     194              : /* Helper function for omp_discover_implicit_declare_target, called through
     195              :    walk_tree.  Mark referenced FUNCTION_DECLs implicitly as
     196              :    declare target to.  */
     197              : 
     198              : static tree
     199       968720 : omp_discover_declare_target_tgt_fn_r (tree *tp, int *walk_subtrees, void *data)
     200              : {
     201       968720 :   if (TREE_CODE (*tp) == CALL_EXPR
     202        25563 :       && CALL_EXPR_FN (*tp)
     203        25512 :       && TREE_CODE (CALL_EXPR_FN (*tp)) == ADDR_EXPR
     204        25463 :       && TREE_CODE (TREE_OPERAND (CALL_EXPR_FN (*tp), 0)) == FUNCTION_DECL
     205       994183 :       && lookup_attribute ("omp declare variant base",
     206        25463 :                            DECL_ATTRIBUTES (TREE_OPERAND (CALL_EXPR_FN (*tp),
     207              :                                                           0))))
     208              :     {
     209           83 :       tree fn = TREE_OPERAND (CALL_EXPR_FN (*tp), 0);
     210          196 :       for (tree attr = DECL_ATTRIBUTES (fn); attr; attr = TREE_CHAIN (attr))
     211              :         {
     212          114 :           attr = lookup_attribute ("omp declare variant base", attr);
     213          114 :           if (attr == NULL_TREE)
     214              :             break;
     215          113 :           tree purpose = TREE_PURPOSE (TREE_VALUE (attr));
     216          113 :           if (TREE_CODE (purpose) == FUNCTION_DECL)
     217          113 :             omp_discover_declare_target_tgt_fn_r (&purpose, walk_subtrees, data);
     218              :         }
     219              :     }
     220       968637 :   else if (TREE_CODE (*tp) == FUNCTION_DECL)
     221              :     {
     222        20415 :       tree decl = *tp;
     223        20415 :       tree id = get_identifier ("omp declare target");
     224        20415 :       symtab_node *node = symtab_node::get (*tp);
     225        20415 :       if (node != NULL)
     226              :         {
     227        13613 :           while (node->alias_target
     228        13613 :                  && TREE_CODE (node->alias_target) == FUNCTION_DECL)
     229              :             {
     230            4 :               if (!omp_declare_target_fn_p (node->decl)
     231            8 :                   && !lookup_attribute ("omp declare target host",
     232            4 :                                         DECL_ATTRIBUTES (node->decl)))
     233              :                 {
     234            4 :                   node->offloadable = 1;
     235            4 :                   DECL_ATTRIBUTES (node->decl)
     236            8 :                     = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
     237              :                 }
     238            4 :               node = symtab_node::get (node->alias_target);
     239              :             }
     240        13609 :           symtab_node *new_node = node->ultimate_alias_target ();
     241        13609 :           decl = new_node->decl;
     242        15324 :           while (node != new_node)
     243              :             {
     244         1715 :               if (!omp_declare_target_fn_p (node->decl)
     245         2597 :                   && !lookup_attribute ("omp declare target host",
     246          882 :                                         DECL_ATTRIBUTES (node->decl)))
     247              :                 {
     248          882 :                   node->offloadable = 1;
     249          882 :                   DECL_ATTRIBUTES (node->decl)
     250         1764 :                     = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
     251              :                 }
     252         1715 :               gcc_assert (node->alias && node->analyzed);
     253         1715 :               node = node->get_alias_target ();
     254              :             }
     255        13609 :           node->offloadable = 1;
     256        13609 :           if (ENABLE_OFFLOADING)
     257              :             g->have_offload = true;
     258              :         }
     259        20415 :       if (omp_declare_target_fn_p (decl)
     260        29022 :           || lookup_attribute ("omp declare target host",
     261         8607 :                                DECL_ATTRIBUTES (decl)))
     262        11808 :         return NULL_TREE;
     263              : 
     264         8607 :       if (DECL_SAVED_TREE (decl)
     265         8607 :           && (!DECL_EXTERNAL (decl) || DECL_DECLARED_INLINE_P (decl)))
     266         6557 :         ((vec<tree> *) data)->safe_push (decl);
     267         8607 :       DECL_ATTRIBUTES (decl) = tree_cons (id, NULL_TREE,
     268         8607 :                                           DECL_ATTRIBUTES (decl));
     269              :     }
     270       948222 :   else if (TYPE_P (*tp))
     271           44 :     *walk_subtrees = 0;
     272       948178 :   else if (TREE_CODE (*tp) == OMP_TARGET)
     273              :     {
     274         1672 :       tree c = omp_find_clause (OMP_CLAUSES (*tp), OMP_CLAUSE_DEVICE);
     275         1672 :       if (c && OMP_CLAUSE_DEVICE_ANCESTOR (c))
     276           43 :         *walk_subtrees = 0;
     277              :     }
     278              :   return NULL_TREE;
     279              : }
     280              : 
     281              : /* Similarly, but ignore references outside of OMP_TARGET regions.  */
     282              : 
     283              : static tree
     284       664106 : omp_discover_declare_target_fn_r (tree *tp, int *walk_subtrees, void *data)
     285              : {
     286       664106 :   if (TREE_CODE (*tp) == OMP_TARGET)
     287              :     {
     288        11606 :       tree c = omp_find_clause (OMP_CLAUSES (*tp), OMP_CLAUSE_DEVICE);
     289        11606 :       if (!c || !OMP_CLAUSE_DEVICE_ANCESTOR (c))
     290        11548 :         walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp),
     291              :                                       omp_discover_declare_target_tgt_fn_r,
     292              :                                       data);
     293        11606 :       *walk_subtrees = 0;
     294              :     }
     295       652500 :   else if (TYPE_P (*tp))
     296          299 :     *walk_subtrees = 0;
     297       664106 :   return NULL_TREE;
     298              : }
     299              : 
     300              : /* Helper function for omp_discover_implicit_declare_target, called through
     301              :    walk_tree.  Mark referenced FUNCTION_DECLs implicitly as
     302              :    declare target to.  */
     303              : 
     304              : static tree
     305          514 : omp_discover_declare_target_var_r (tree *tp, int *walk_subtrees, void *data)
     306              : {
     307          514 :   if (TREE_CODE (*tp) == FUNCTION_DECL)
     308           24 :     return omp_discover_declare_target_tgt_fn_r (tp, walk_subtrees, data);
     309          490 :   else if (VAR_P (*tp)
     310           54 :            && is_global_var (*tp)
     311          533 :            && !omp_declare_target_var_p (*tp))
     312              :     {
     313           15 :       tree id = get_identifier ("omp declare target");
     314           15 :       if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp)))
     315              :         {
     316            0 :           error_at (DECL_SOURCE_LOCATION (*tp),
     317              :                     "%qD specified both in declare target %<link%> and "
     318              :                     "implicitly in %<to%> clauses", *tp);
     319            0 :           DECL_ATTRIBUTES (*tp)
     320            0 :             = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp));
     321              :         }
     322           15 :       if (TREE_STATIC (*tp) && lang_hooks.decls.omp_get_decl_init (*tp))
     323           15 :         ((vec<tree> *) data)->safe_push (*tp);
     324           15 :       DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp));
     325           15 :       symtab_node *node = symtab_node::get (*tp);
     326           15 :       if (node != NULL && !node->offloadable)
     327              :         {
     328           15 :           node->offloadable = 1;
     329           15 :           if (ENABLE_OFFLOADING)
     330              :             {
     331              :               g->have_offload = true;
     332              :               if (is_a <varpool_node *> (node))
     333              :                 vec_safe_push (offload_vars, node->decl);
     334              :             }
     335              :         }
     336              :     }
     337          475 :   else if (TYPE_P (*tp))
     338            0 :     *walk_subtrees = 0;
     339              :   return NULL_TREE;
     340              : }
     341              : 
     342              : /* Perform the OpenMP implicit declare target to discovery.  */
     343              : 
     344              : void
     345         9360 : omp_discover_implicit_declare_target (void)
     346              : {
     347         9360 :   cgraph_node *node;
     348         9360 :   varpool_node *vnode;
     349         9360 :   auto_vec<tree> worklist;
     350              : 
     351       167559 :   FOR_EACH_DEFINED_FUNCTION (node)
     352       158199 :     if (DECL_SAVED_TREE (node->decl))
     353              :       {
     354       157743 :         struct cgraph_node *cgn;
     355       157743 :         if (lookup_attribute ("omp declare target indirect",
     356       157743 :                               DECL_ATTRIBUTES (node->decl)))
     357          123 :           vec_safe_push (offload_ind_funcs, node->decl);
     358       157743 :         if (omp_declare_target_fn_p (node->decl))
     359         2504 :           worklist.safe_push (node->decl);
     360       155239 :         else if (DECL_STRUCT_FUNCTION (node->decl)
     361       155239 :                  && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target)
     362         6610 :           worklist.safe_push (node->decl);
     363       159793 :         for (cgn = first_nested_function (node);
     364       159793 :              cgn; cgn = next_nested_function (cgn))
     365         2050 :           if (omp_declare_target_fn_p (cgn->decl))
     366           33 :             worklist.safe_push (cgn->decl);
     367         2017 :           else if (DECL_STRUCT_FUNCTION (cgn->decl)
     368         2017 :                    && DECL_STRUCT_FUNCTION (cgn->decl)->has_omp_target)
     369          409 :             worklist.safe_push (cgn->decl);
     370              :       }
     371       127748 :   FOR_EACH_VARIABLE (vnode)
     372       118388 :     if (lang_hooks.decls.omp_get_decl_init (vnode->decl)
     373       118388 :         && omp_declare_target_var_p (vnode->decl))
     374          457 :       worklist.safe_push (vnode->decl);
     375        25945 :   while (!worklist.is_empty ())
     376              :     {
     377        16585 :       tree decl = worklist.pop ();
     378        16585 :       if (VAR_P (decl))
     379          472 :         walk_tree_without_duplicates (lang_hooks.decls.omp_get_decl_init (decl),
     380              :                                       omp_discover_declare_target_var_r,
     381              :                                       &worklist);
     382        16113 :       else if (omp_declare_target_fn_p (decl))
     383         9094 :         walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
     384              :                                       omp_discover_declare_target_tgt_fn_r,
     385              :                                       &worklist);
     386              :       else
     387         7019 :         walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
     388              :                                       omp_discover_declare_target_fn_r,
     389              :                                       &worklist);
     390              :     }
     391              : 
     392         9360 :   lang_hooks.decls.omp_finish_decl_inits ();
     393         9360 : }
     394              : 
     395              : 
     396              : /* Create new symbols containing (address, size) pairs for global variables,
     397              :    marked with "omp declare target" attribute, as well as addresses for the
     398              :    functions, which are outlined offloading regions.  */
     399              : void
     400       230133 : omp_finish_file (void)
     401              : {
     402       230133 :   unsigned num_funcs = vec_safe_length (offload_funcs);
     403       230133 :   unsigned num_vars = vec_safe_length (offload_vars);
     404       230133 :   unsigned num_ind_funcs = vec_safe_length (offload_ind_funcs);
     405              : 
     406       230133 :   if (num_funcs == 0 && num_vars == 0 && num_ind_funcs == 0)
     407       230133 :     return;
     408              : 
     409           29 :   if (targetm_common.have_named_sections)
     410              :     {
     411           29 :       vec<constructor_elt, va_gc> *v_f, *v_v, *v_if;
     412           29 :       vec_alloc (v_f, num_funcs);
     413           29 :       vec_alloc (v_v, num_vars * 2);
     414           29 :       vec_alloc (v_if, num_ind_funcs);
     415              : 
     416           29 :       add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
     417           29 :       add_decls_addresses_to_decl_constructor (offload_vars, v_v);
     418           29 :       add_decls_addresses_to_decl_constructor (offload_ind_funcs, v_if);
     419              : 
     420           29 :       tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
     421           29 :                                                     vec_safe_length (v_v));
     422           29 :       tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
     423           29 :                                                      num_funcs);
     424           29 :       tree ind_funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
     425           29 :                                                          num_ind_funcs);
     426              : 
     427           29 :       SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
     428           29 :       SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
     429           29 :       SET_TYPE_ALIGN (ind_funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
     430           29 :       tree ctor_v = build_constructor (vars_decl_type, v_v);
     431           29 :       tree ctor_f = build_constructor (funcs_decl_type, v_f);
     432           29 :       tree ctor_if = build_constructor (ind_funcs_decl_type, v_if);
     433           29 :       TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = TREE_CONSTANT (ctor_if) = 1;
     434           29 :       TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = TREE_STATIC (ctor_if) = 1;
     435           29 :       tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
     436              :                                     get_identifier (".offload_func_table"),
     437              :                                     funcs_decl_type);
     438           29 :       tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
     439              :                                    get_identifier (".offload_var_table"),
     440              :                                    vars_decl_type);
     441           29 :       tree ind_funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
     442              :                                         get_identifier (".offload_ind_func_table"),
     443              :                                         ind_funcs_decl_type);
     444           29 :       TREE_STATIC (funcs_decl) = TREE_STATIC (ind_funcs_decl) = 1;
     445           29 :       TREE_STATIC (vars_decl) = 1;
     446              :       /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
     447              :          otherwise a joint table in a binary will contain padding between
     448              :          tables from multiple object files.  */
     449           29 :       DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (ind_funcs_decl) = 1;
     450           29 :       DECL_USER_ALIGN (vars_decl) = 1;
     451           29 :       SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
     452           29 :       SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
     453           29 :       SET_DECL_ALIGN (ind_funcs_decl, TYPE_ALIGN (ind_funcs_decl_type));
     454           29 :       DECL_INITIAL (funcs_decl) = ctor_f;
     455           29 :       DECL_INITIAL (vars_decl) = ctor_v;
     456           29 :       DECL_INITIAL (ind_funcs_decl) = ctor_if;
     457           29 :       set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
     458           29 :       set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
     459           29 :       set_decl_section_name (ind_funcs_decl,
     460              :                              OFFLOAD_IND_FUNC_TABLE_SECTION_NAME);
     461           29 :       varpool_node::finalize_decl (vars_decl);
     462           29 :       varpool_node::finalize_decl (funcs_decl);
     463           29 :       varpool_node::finalize_decl (ind_funcs_decl);
     464              :     }
     465              :   else
     466              :     {
     467            0 :       for (unsigned i = 0; i < num_funcs; i++)
     468              :         {
     469            0 :           tree it = (*offload_funcs)[i];
     470              :           /* See also add_decls_addresses_to_decl_constructor
     471              :              and output_offload_tables in lto-cgraph.cc.  */
     472            0 :           if (!in_lto_p && !symtab_node::get (it))
     473            0 :             continue;
     474            0 :           targetm.record_offload_symbol (it);
     475              :         }
     476            0 :       for (unsigned i = 0; i < num_vars; i++)
     477              :         {
     478            0 :           tree it = (*offload_vars)[i];
     479            0 :           if (!in_lto_p && !symtab_node::get (it))
     480            0 :             continue;
     481              : #ifdef ACCEL_COMPILER
     482              :           if (DECL_HAS_VALUE_EXPR_P (it)
     483              :               && lookup_attribute ("omp declare target link",
     484              :                                    DECL_ATTRIBUTES (it)))
     485              :             {
     486              :               tree value_expr = DECL_VALUE_EXPR (it);
     487              :               tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
     488              :               targetm.record_offload_symbol (link_ptr_decl);
     489              :               varpool_node::finalize_decl (link_ptr_decl);
     490              :             }
     491              :           else
     492              : #endif
     493            0 :             targetm.record_offload_symbol (it);
     494              :         }
     495            0 :       for (unsigned i = 0; i < num_ind_funcs; i++)
     496              :         {
     497            0 :           tree it = (*offload_ind_funcs)[i];
     498              :           /* See also add_decls_addresses_to_decl_constructor
     499              :              and output_offload_tables in lto-cgraph.cc.  */
     500            0 :           if (!in_lto_p && !symtab_node::get (it))
     501            0 :             continue;
     502            0 :           targetm.record_offload_symbol (it);
     503              :         }
     504              :     }
     505              : }
     506              : 
     507              : /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
     508              :    axis DIM.  Return a tmp var holding the result.  */
     509              : 
     510              : static tree
     511        30709 : oacc_dim_call (bool pos, int dim, gimple_seq *seq)
     512              : {
     513        30709 :   tree arg = build_int_cst (unsigned_type_node, dim);
     514        30709 :   tree size = create_tmp_var (integer_type_node);
     515        30709 :   enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
     516        30709 :   gimple *call = gimple_build_call_internal (fn, 1, arg);
     517              : 
     518        30709 :   gimple_call_set_lhs (call, size);
     519        30709 :   gimple_seq_add_stmt (seq, call);
     520              : 
     521        30709 :   return size;
     522              : }
     523              : 
     524              : /* Find the number of threads (POS = false), or thread number (POS =
     525              :    true) for an OpenACC region partitioned as MASK.  Setup code
     526              :    required for the calculation is added to SEQ.  */
     527              : 
     528              : static tree
     529        23606 : oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
     530              : {
     531        23606 :   tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
     532        23606 :   unsigned ix;
     533              : 
     534              :   /* Start at gang level, and examine relevant dimension indices.  */
     535        94424 :   for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
     536        70818 :     if (GOMP_DIM_MASK (ix) & mask)
     537              :       {
     538        26569 :         if (res)
     539              :           {
     540              :             /* We had an outer index, so scale that by the size of
     541              :                this dimension.  */
     542        17369 :             tree n = oacc_dim_call (false, ix, seq);
     543        17369 :             res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
     544              :           }
     545        26569 :         if (pos)
     546              :           {
     547              :             /* Determine index in this dimension.  */
     548        13340 :             tree id = oacc_dim_call (true, ix, seq);
     549        13340 :             if (res)
     550         4140 :               res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
     551              :             else
     552              :               res = id;
     553              :           }
     554              :       }
     555              : 
     556        23606 :   if (res == NULL_TREE)
     557         2655 :     res = integer_zero_node;
     558              : 
     559        23606 :   return res;
     560              : }
     561              : 
     562              : /* Transform IFN_GOACC_LOOP calls to actual code.  See
     563              :    expand_oacc_for for where these are generated.  At the vector
     564              :    level, we stride loops, such that each member of a warp will
     565              :    operate on adjacent iterations.  At the worker and gang level,
     566              :    each gang/warp executes a set of contiguous iterations.  Chunking
     567              :    can override this such that each iteration engine executes a
     568              :    contiguous chunk, and then moves on to stride to the next chunk.  */
     569              : 
     570              : static void
     571        46694 : oacc_xform_loop (gcall *call)
     572              : {
     573        46694 :   gimple_stmt_iterator gsi = gsi_for_stmt (call);
     574        46694 :   enum ifn_goacc_loop_kind code
     575        46694 :     = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
     576        46694 :   tree dir = gimple_call_arg (call, 1);
     577        46694 :   tree range = gimple_call_arg (call, 2);
     578        46694 :   tree step = gimple_call_arg (call, 3);
     579        46694 :   tree chunk_size = NULL_TREE;
     580        46694 :   unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
     581        46694 :   tree lhs = gimple_call_lhs (call);
     582        46694 :   tree type = NULL_TREE;
     583        46694 :   tree diff_type = TREE_TYPE (range);
     584        46694 :   tree r = NULL_TREE;
     585        46694 :   gimple_seq seq = NULL;
     586        46694 :   bool chunking = false, striding = true;
     587        46694 :   unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
     588        46694 :   unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
     589              : 
     590              :   /* Skip lowering if return value of IFN_GOACC_LOOP call is not used.  */
     591        46694 :   if (!lhs)
     592              :     {
     593            8 :       gsi_replace_with_seq (&gsi, seq, true);
     594            8 :       return;
     595              :     }
     596              : 
     597        46686 :   type = TREE_TYPE (lhs);
     598              : 
     599              : #ifdef ACCEL_COMPILER
     600              :   chunk_size = gimple_call_arg (call, 4);
     601              :   if (integer_minus_onep (chunk_size)  /* Force static allocation.  */
     602              :       || integer_zerop (chunk_size))   /* Default (also static).  */
     603              :     {
     604              :       /* If we're at the gang level, we want each to execute a
     605              :          contiguous run of iterations.  Otherwise we want each element
     606              :          to stride.  */
     607              :       striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
     608              :       chunking = false;
     609              :     }
     610              :   else
     611              :     {
     612              :       /* Chunk of size 1 is striding.  */
     613              :       striding = integer_onep (chunk_size);
     614              :       chunking = !striding;
     615              :     }
     616              : #endif
     617              : 
     618              :   /* striding=true, chunking=true
     619              :        -> invalid.
     620              :      striding=true, chunking=false
     621              :        -> chunks=1
     622              :      striding=false,chunking=true
     623              :        -> chunks=ceil (range/(chunksize*threads*step))
     624              :      striding=false,chunking=false
     625              :        -> chunk_size=ceil(range/(threads*step)),chunks=1  */
     626        46686 :   push_gimplify_context (true);
     627              : 
     628        46686 :   switch (code)
     629              :     {
     630            0 :     default: gcc_unreachable ();
     631              : 
     632        11229 :     case IFN_GOACC_LOOP_CHUNKS:
     633        11229 :       if (!chunking)
     634        11229 :         r = build_int_cst (type, 1);
     635              :       else
     636              :         {
     637              :           /* chunk_max
     638              :              = (range - dir) / (chunks * step * num_threads) + dir  */
     639              :           tree per = oacc_thread_numbers (false, mask, &seq);
     640              :           per = fold_convert (type, per);
     641              :           chunk_size = fold_convert (type, chunk_size);
     642              :           per = fold_build2 (MULT_EXPR, type, per, chunk_size);
     643              :           per = fold_build2 (MULT_EXPR, type, per, step);
     644              :           r = build2 (MINUS_EXPR, type, range, dir);
     645              :           r = build2 (PLUS_EXPR, type, r, per);
     646              :           r = build2 (TRUNC_DIV_EXPR, type, r, per);
     647              :         }
     648              :       break;
     649              : 
     650        11751 :     case IFN_GOACC_LOOP_STEP:
     651        11751 :       {
     652              :         /* If striding, step by the entire compute volume, otherwise
     653              :            step by the inner volume.  */
     654        11751 :         unsigned volume = striding ? mask : inner_mask;
     655              : 
     656        11751 :         r = oacc_thread_numbers (false, volume, &seq);
     657        11751 :         r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
     658              :       }
     659        11751 :       break;
     660              : 
     661        11855 :     case IFN_GOACC_LOOP_OFFSET:
     662              :       /* Enable vectorization on non-SIMT targets.  */
     663        11855 :       if (!targetm.simt.vf
     664        11855 :           && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
     665              :           /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
     666              :              the loop.  */
     667         1951 :           && (flag_tree_loop_vectorize
     668         1463 :               || !OPTION_SET_P (flag_tree_loop_vectorize)))
     669              :         {
     670         1951 :           basic_block bb = gsi_bb (gsi);
     671         1951 :           class loop *parent = bb->loop_father;
     672         1951 :           class loop *body = parent->inner;
     673              : 
     674         1951 :           parent->force_vectorize = true;
     675         1951 :           parent->safelen = INT_MAX;
     676              : 
     677              :           /* "Chunking loops" may have inner loops.  */
     678         1951 :           if (parent->inner)
     679              :             {
     680         1939 :               body->force_vectorize = true;
     681         1939 :               body->safelen = INT_MAX;
     682              :             }
     683              : 
     684         1951 :           cfun->has_force_vectorize_loops = true;
     685              :         }
     686        11855 :       if (striding)
     687              :         {
     688        11855 :           r = oacc_thread_numbers (true, mask, &seq);
     689        11855 :           r = fold_convert (diff_type, r);
     690              :         }
     691              :       else
     692              :         {
     693              :           tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
     694              :           tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
     695              :           tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
     696              :                                      inner_size, outer_size);
     697              : 
     698              :           volume = fold_convert (diff_type, volume);
     699              :           if (chunking)
     700              :             chunk_size = fold_convert (diff_type, chunk_size);
     701              :           else
     702              :             {
     703              :               tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
     704              : 
     705              :               chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
     706              :               chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
     707              :               chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
     708              :             }
     709              : 
     710              :           tree span = build2 (MULT_EXPR, diff_type, chunk_size,
     711              :                               fold_convert (diff_type, inner_size));
     712              :           r = oacc_thread_numbers (true, outer_mask, &seq);
     713              :           r = fold_convert (diff_type, r);
     714              :           r = build2 (MULT_EXPR, diff_type, r, span);
     715              : 
     716              :           tree inner = oacc_thread_numbers (true, inner_mask, &seq);
     717              :           inner = fold_convert (diff_type, inner);
     718              :           r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
     719              : 
     720              :           if (chunking)
     721              :             {
     722              :               tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
     723              :               tree per
     724              :                 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
     725              :               per = build2 (MULT_EXPR, diff_type, per, chunk);
     726              : 
     727              :               r = build2 (PLUS_EXPR, diff_type, r, per);
     728              :             }
     729              :         }
     730        11855 :       r = fold_build2 (MULT_EXPR, diff_type, r, step);
     731        11855 :       if (type != diff_type)
     732          178 :         r = fold_convert (type, r);
     733              :       break;
     734              : 
     735        11851 :     case IFN_GOACC_LOOP_BOUND:
     736        11851 :       if (striding)
     737        11851 :         r = range;
     738              :       else
     739              :         {
     740              :           tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
     741              :           tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
     742              :           tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
     743              :                                      inner_size, outer_size);
     744              : 
     745              :           volume = fold_convert (diff_type, volume);
     746              :           if (chunking)
     747              :             chunk_size = fold_convert (diff_type, chunk_size);
     748              :           else
     749              :             {
     750              :               tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
     751              : 
     752              :               chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
     753              :               chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
     754              :               chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
     755              :             }
     756              : 
     757              :           tree span = build2 (MULT_EXPR, diff_type, chunk_size,
     758              :                               fold_convert (diff_type, inner_size));
     759              : 
     760              :           r = fold_build2 (MULT_EXPR, diff_type, span, step);
     761              : 
     762              :           tree offset = gimple_call_arg (call, 6);
     763              :           r = build2 (PLUS_EXPR, diff_type, r,
     764              :                       fold_convert (diff_type, offset));
     765              :           r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
     766              :                       diff_type, r, range);
     767              :         }
     768        11851 :       if (diff_type != type)
     769          178 :         r = fold_convert (type, r);
     770              :       break;
     771              :     }
     772              : 
     773        46686 :   gimplify_assign (lhs, r, &seq);
     774              : 
     775        46686 :   pop_gimplify_context (NULL);
     776              : 
     777        46686 :   gsi_replace_with_seq (&gsi, seq, true);
     778              : }
     779              : 
     780              : /* Transform a GOACC_TILE call.  Determines the element loop span for
     781              :    the specified loop of the nest.  This is 1 if we're not tiling.
     782              : 
     783              :    GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element);  */
     784              : 
     785              : static void
     786          284 : oacc_xform_tile (gcall *call)
     787              : {
     788          284 :   gimple_stmt_iterator gsi = gsi_for_stmt (call);
     789          284 :   unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
     790              :   /* Inner loops have higher loop_nos.  */
     791          284 :   unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
     792          284 :   tree tile_size = gimple_call_arg (call, 2);
     793          284 :   unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
     794          284 :   tree lhs = gimple_call_lhs (call);
     795          284 :   tree type = TREE_TYPE (lhs);
     796          284 :   gimple_seq seq = NULL;
     797          284 :   tree span = build_int_cst (type, 1);
     798              : 
     799          284 :   gcc_assert (!(e_mask
     800              :                 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
     801              :                     | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
     802          284 :   push_gimplify_context (!seen_error ());
     803              : 
     804              : #ifndef ACCEL_COMPILER
     805              :   /* Partitioning disabled on host compilers.  */
     806          284 :   e_mask = 0;
     807              : #endif
     808          284 :   if (!e_mask)
     809              :     /* Not paritioning.  */
     810          284 :     span = integer_one_node;
     811              :   else if (!integer_zerop (tile_size))
     812              :     /* User explicitly specified size.  */
     813              :     span = tile_size;
     814              :   else
     815              :     {
     816              :       /* Pick a size based on the paritioning of the element loop and
     817              :          the number of loop nests.  */
     818              :       tree first_size = NULL_TREE;
     819              :       tree second_size = NULL_TREE;
     820              : 
     821              :       if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
     822              :         first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
     823              :       if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
     824              :         second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
     825              : 
     826              :       if (!first_size)
     827              :         {
     828              :           first_size = second_size;
     829              :           second_size = NULL_TREE;
     830              :         }
     831              : 
     832              :       if (loop_no + 1 == collapse)
     833              :         {
     834              :           span = first_size;
     835              :           if (!loop_no && second_size)
     836              :             span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
     837              :                                 span, second_size);
     838              :         }
     839              :       else if (loop_no + 2 == collapse)
     840              :         span = second_size;
     841              :       else
     842              :         span = NULL_TREE;
     843              : 
     844              :       if (!span)
     845              :         /* There's no obvious element size for this loop.  Options
     846              :            are 1, first_size or some non-unity constant (32 is my
     847              :            favourite).   We should gather some statistics.  */
     848              :         span = first_size;
     849              :     }
     850              : 
     851          284 :   span = fold_convert (type, span);
     852          284 :   gimplify_assign (lhs, span, &seq);
     853              : 
     854          284 :   pop_gimplify_context (NULL);
     855              : 
     856          284 :   gsi_replace_with_seq (&gsi, seq, true);
     857          284 : }
     858              : 
     859              : /* Default partitioned and minimum partitioned dimensions.  */
     860              : 
     861              : static int oacc_default_dims[GOMP_DIM_MAX];
     862              : static int oacc_min_dims[GOMP_DIM_MAX];
     863              : 
     864              : int
     865            0 : oacc_get_default_dim (int dim)
     866              : {
     867            0 :   gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
     868            0 :   return oacc_default_dims[dim];
     869              : }
     870              : 
     871              : int
     872            0 : oacc_get_min_dim (int dim)
     873              : {
     874            0 :   gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
     875            0 :   return oacc_min_dims[dim];
     876              : }
     877              : 
     878              : /* Parse the default dimension parameter.  This is a set of
     879              :    :-separated optional compute dimensions.  Each specified dimension
     880              :    is a positive integer.  When device type support is added, it is
     881              :    planned to be a comma separated list of such compute dimensions,
     882              :    with all but the first prefixed by the colon-terminated device
     883              :    type.  */
     884              : 
     885              : static void
     886         2279 : oacc_parse_default_dims (const char *dims)
     887              : {
     888         2279 :   int ix;
     889              : 
     890         9116 :   for (ix = GOMP_DIM_MAX; ix--;)
     891              :     {
     892         6837 :       oacc_default_dims[ix] = -1;
     893         6837 :       oacc_min_dims[ix] = 1;
     894              :     }
     895              : 
     896              : #ifndef ACCEL_COMPILER
     897              :   /* Cannot be overridden on the host.  */
     898         2279 :   dims = NULL;
     899              : #endif
     900         2279 :   if (dims)
     901              :     {
     902              :       const char *pos = dims;
     903              : 
     904              :       for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
     905              :         {
     906              :           if (ix)
     907              :             {
     908              :               if (*pos != ':')
     909              :                 goto malformed;
     910              :               pos++;
     911              :             }
     912              : 
     913              :           if (*pos != ':')
     914              :             {
     915              :               long val;
     916              :               const char *eptr;
     917              : 
     918              :               errno = 0;
     919              :               val = strtol (pos, const_cast<char **> (&eptr), 10);
     920              :               if (errno || val <= 0 || (int) val != val)
     921              :                 goto malformed;
     922              :               pos = eptr;
     923              :               oacc_default_dims[ix] = (int) val;
     924              :             }
     925              :         }
     926              :       if (*pos)
     927              :         {
     928              :         malformed:
     929              :           error_at (UNKNOWN_LOCATION,
     930              :                     "%<-fopenacc-dim%> operand is malformed at %qs", pos);
     931              :         }
     932              :     }
     933              : 
     934              :   /* Allow the backend to validate the dimensions.  */
     935         2279 :   targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
     936         2279 :   targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
     937         2279 : }
     938              : 
     939              : /* Validate and update the dimensions for offloaded FN.  ATTRS is the
     940              :    raw attribute.  DIMS is an array of dimensions, which is filled in.
     941              :    LEVEL is the partitioning level of a routine, or -1 for an offload
     942              :    region itself.  USED is the mask of partitioned execution in the
     943              :    function.  */
     944              : 
     945              : static void
     946         9876 : oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
     947              : {
     948         9876 :   tree purpose[GOMP_DIM_MAX];
     949         9876 :   unsigned ix;
     950         9876 :   tree pos = TREE_VALUE (attrs);
     951              : 
     952              :   /* Make sure the attribute creator attached the dimension
     953              :      information.  */
     954         9876 :   gcc_assert (pos);
     955              : 
     956        39504 :   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
     957              :     {
     958        29628 :       purpose[ix] = TREE_PURPOSE (pos);
     959        29628 :       tree val = TREE_VALUE (pos);
     960        29628 :       dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
     961        29628 :       pos = TREE_CHAIN (pos);
     962              :     }
     963              : 
     964         9876 :   bool check = true;
     965              : #ifdef ACCEL_COMPILER
     966              :   check = false;
     967              : #endif
     968         9876 :   if (check
     969         9876 :       && warn_openacc_parallelism
     970         1371 :       && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn)))
     971              :     {
     972         1268 :       static char const *const axes[] =
     973              :       /* Must be kept in sync with GOMP_DIM enumeration.  */
     974              :         { "gang", "worker", "vector" };
     975         4775 :       for (ix = level >= 0 ? level : 0; ix != GOMP_DIM_MAX; ix++)
     976         3507 :         if (dims[ix] < 0)
     977              :           ; /* Defaulting axis.  */
     978         1970 :         else if ((used & GOMP_DIM_MASK (ix)) && dims[ix] == 1)
     979              :           /* There is partitioned execution, but the user requested a
     980              :              dimension size of 1.  They're probably confused.  */
     981           94 :           warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
     982              :                       "region contains %s partitioned code but"
     983           94 :                       " is not %s partitioned", axes[ix], axes[ix]);
     984         1876 :         else if (!(used & GOMP_DIM_MASK (ix)) && dims[ix] != 1)
     985              :           /* The dimension is explicitly partitioned to non-unity, but
     986              :              no use is made within the region.  */
     987          500 :           warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
     988              :                       "region is %s partitioned but"
     989              :                       " does not contain %s partitioned code",
     990          500 :                       axes[ix], axes[ix]);
     991              :     }
     992              : 
     993         9876 :   bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
     994              : 
     995              :   /* Default anything left to 1 or a partitioned default.  */
     996        49380 :   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
     997        29628 :     if (dims[ix] < 0)
     998              :       {
     999              :         /* The OpenACC spec says 'If the [num_gangs] clause is not
    1000              :            specified, an implementation-defined default will be used;
    1001              :            the default may depend on the code within the construct.'
    1002              :            (2.5.6).  Thus an implementation is free to choose
    1003              :            non-unity default for a parallel region that doesn't have
    1004              :            any gang-partitioned loops.  However, it appears that there
    1005              :            is a sufficient body of user code that expects non-gang
    1006              :            partitioned regions to not execute in gang-redundant mode.
    1007              :            So we (a) don't warn about the non-portability and (b) pick
    1008              :            the minimum permissible dimension size when there is no
    1009              :            partitioned execution.  Otherwise we pick the global
    1010              :            default for the dimension, which the user can control.  The
    1011              :            same wording and logic applies to num_workers and
    1012              :            vector_length, however the worker- or vector- single
    1013              :            execution doesn't have the same impact as gang-redundant
    1014              :            execution.  (If the minimum gang-level partioning is not 1,
    1015              :            the target is probably too confusing.)  */
    1016            0 :         dims[ix] = (used & GOMP_DIM_MASK (ix)
    1017            0 :                     ? oacc_default_dims[ix] : oacc_min_dims[ix]);
    1018            0 :         changed = true;
    1019              :       }
    1020              : 
    1021         9876 :   if (changed)
    1022              :     {
    1023              :       /* Replace the attribute with new values.  */
    1024              :       pos = NULL_TREE;
    1025        35412 :       for (ix = GOMP_DIM_MAX; ix--;)
    1026        26559 :         pos = tree_cons (purpose[ix],
    1027        26559 :                          build_int_cst (integer_type_node, dims[ix]), pos);
    1028         8853 :       oacc_replace_fn_attrib (fn, pos);
    1029              :     }
    1030         9876 : }
    1031              : 
    1032              : /* Create an empty OpenACC loop structure at LOC.  */
    1033              : 
    1034              : static oacc_loop *
    1035        21344 : new_oacc_loop_raw (oacc_loop *parent, location_t loc)
    1036              : {
    1037        10835 :   oacc_loop *loop = XCNEW (oacc_loop);
    1038              : 
    1039        21344 :   loop->parent = parent;
    1040              : 
    1041        10835 :   if (parent)
    1042              :     {
    1043        10835 :       loop->sibling = parent->child;
    1044        10835 :       parent->child = loop;
    1045              :     }
    1046              : 
    1047        21344 :   loop->loc = loc;
    1048        21344 :   return loop;
    1049              : }
    1050              : 
    1051              : /* Create an outermost, dummy OpenACC loop for offloaded function
    1052              :    DECL.  */
    1053              : 
    1054              : static oacc_loop *
    1055         9876 : new_oacc_loop_outer (tree decl)
    1056              : {
    1057         9876 :   return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
    1058              : }
    1059              : 
    1060              : /* Start a new OpenACC loop  structure beginning at head marker HEAD.
    1061              :    Link into PARENT loop.  Return the new loop.  */
    1062              : 
    1063              : static oacc_loop *
    1064         9634 : new_oacc_loop (oacc_loop *parent, gcall *marker)
    1065              : {
    1066         9634 :   oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
    1067              : 
    1068         9634 :   loop->marker = marker;
    1069              : 
    1070              :   /* TODO: This is where device_type flattening would occur for the loop
    1071              :      flags.  */
    1072              : 
    1073         9634 :   loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
    1074              : 
    1075         9634 :   tree chunk_size = integer_zero_node;
    1076         9634 :   if (loop->flags & OLF_GANG_STATIC)
    1077          146 :     chunk_size = gimple_call_arg (marker, 4);
    1078         9634 :   loop->chunk_size = chunk_size;
    1079              : 
    1080         9634 :   return loop;
    1081              : }
    1082              : 
    1083              : /* Create a dummy loop encompassing a call to a openACC routine.
    1084              :    Extract the routine's partitioning requirements.  */
    1085              : 
    1086              : static void
    1087         1201 : new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
    1088              : {
    1089         1201 :   oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
    1090         1201 :   int level = oacc_fn_attrib_level (attrs);
    1091              : 
    1092         1201 :   gcc_assert (level >= 0);
    1093              : 
    1094         1201 :   loop->marker = call;
    1095         1201 :   loop->routine = decl;
    1096         1201 :   loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
    1097         1201 :                 ^ (GOMP_DIM_MASK (level) - 1));
    1098         1201 : }
    1099              : 
    1100              : /* Finish off the current OpenACC loop ending at tail marker TAIL.
    1101              :    Return the parent loop.  */
    1102              : 
    1103              : static oacc_loop *
    1104         9634 : finish_oacc_loop (oacc_loop *loop)
    1105              : {
    1106              :   /* If the loop has been collapsed, don't partition it.  */
    1107            0 :   if (loop->ifns.is_empty ())
    1108            0 :     loop->mask = loop->flags = 0;
    1109         9634 :   return loop->parent;
    1110              : }
    1111              : 
    1112              : /* Free all OpenACC loop structures within LOOP (inclusive).  */
    1113              : 
    1114              : static void
    1115        21344 : free_oacc_loop (oacc_loop *loop)
    1116              : {
    1117        21344 :   if (loop->sibling)
    1118         2194 :     free_oacc_loop (loop->sibling);
    1119        21344 :   if (loop->child)
    1120         8641 :     free_oacc_loop (loop->child);
    1121              : 
    1122        21344 :   loop->ifns.release ();
    1123        21344 :   free (loop);
    1124        21344 : }
    1125              : 
    1126              : /* Dump out the OpenACC loop head or tail beginning at FROM.  */
    1127              : 
    1128              : static void
    1129          238 : dump_oacc_loop_part (FILE *file, gcall *from, int depth,
    1130              :                      const char *title, int level)
    1131              : {
    1132          238 :   enum ifn_unique_kind kind
    1133          238 :     = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
    1134              : 
    1135          238 :   fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
    1136          238 :   for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
    1137              :     {
    1138          719 :       gimple *stmt = gsi_stmt (gsi);
    1139              : 
    1140          719 :       if (gimple_call_internal_p (stmt, IFN_UNIQUE))
    1141              :         {
    1142          719 :           enum ifn_unique_kind k
    1143          719 :             = ((enum ifn_unique_kind) TREE_INT_CST_LOW
    1144          719 :                (gimple_call_arg (stmt, 0)));
    1145              : 
    1146          719 :           if (k == kind && stmt != from)
    1147              :             break;
    1148              :         }
    1149          481 :       print_gimple_stmt (file, stmt, depth * 2 + 2);
    1150              : 
    1151          481 :       gsi_next (&gsi);
    1152          962 :       while (gsi_end_p (gsi))
    1153          962 :         gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
    1154              :     }
    1155          238 : }
    1156              : 
    1157              : /* Dump OpenACC loop LOOP, its children, and its siblings.  */
    1158              : 
    1159              : static void
    1160          183 : dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
    1161              : {
    1162          222 :   int ix;
    1163              : 
    1164          222 :   fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
    1165              :            loop->flags, loop->mask,
    1166          222 :            LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
    1167              : 
    1168          222 :   if (loop->marker)
    1169          108 :     print_gimple_stmt (file, loop->marker, depth * 2);
    1170              : 
    1171          222 :   if (loop->routine)
    1172           48 :     fprintf (file, "%*sRoutine %s:%u:%s\n",
    1173           48 :              depth * 2, "", DECL_SOURCE_FILE (loop->routine),
    1174           96 :              DECL_SOURCE_LINE (loop->routine),
    1175           48 :              IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
    1176              : 
    1177          888 :   for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
    1178          666 :     if (loop->heads[ix])
    1179          119 :       dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
    1180          888 :   for (ix = GOMP_DIM_MAX; ix--;)
    1181          666 :     if (loop->tails[ix])
    1182          119 :       dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
    1183              : 
    1184          222 :   if (loop->child)
    1185           69 :     dump_oacc_loop (file, loop->child, depth + 1);
    1186          222 :   if (loop->sibling)
    1187              :     dump_oacc_loop (file, loop->sibling, depth);
    1188          183 : }
    1189              : 
    1190              : void debug_oacc_loop (oacc_loop *);
    1191              : 
    1192              : /* Dump loops to stderr.  */
    1193              : 
    1194              : DEBUG_FUNCTION void
    1195            0 : debug_oacc_loop (oacc_loop *loop)
    1196              : {
    1197            0 :   dump_oacc_loop (stderr, loop, 0);
    1198            0 : }
    1199              : 
    1200              : /* Provide diagnostics on OpenACC loop LOOP, its children, and its
    1201              :    siblings.  */
    1202              : 
    1203              : static void
    1204         2741 : inform_oacc_loop (const oacc_loop *loop)
    1205              : {
    1206         1796 :   const char *gang
    1207         2741 :     = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
    1208         2226 :   const char *worker
    1209         2741 :     = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
    1210         1747 :   const char *vector
    1211         2741 :     = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
    1212         2741 :   const char *seq = loop->mask == 0 ? " seq" : "";
    1213         2741 :   const dump_user_location_t loc
    1214         2741 :     = dump_user_location_t::from_location_t (loop->loc);
    1215         2741 :   dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
    1216              :                    "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
    1217              :                    vector, seq);
    1218              : 
    1219         2741 :   if (loop->child)
    1220          698 :     inform_oacc_loop (loop->child);
    1221         2741 :   if (loop->sibling)
    1222          280 :     inform_oacc_loop (loop->sibling);
    1223         2741 : }
    1224              : 
    1225              : /* DFS walk of basic blocks BB onwards, creating OpenACC loop
    1226              :    structures as we go.  By construction these loops are properly
    1227              :    nested.  */
    1228              : 
    1229              : static void
    1230       176084 : oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
    1231              : {
    1232       176084 :   int marker = 0;
    1233       176084 :   int remaining = 0;
    1234              : 
    1235       176084 :   if (bb->flags & BB_VISITED)
    1236        40096 :     return;
    1237              : 
    1238       135988 :  follow:
    1239       202173 :   bb->flags |= BB_VISITED;
    1240              : 
    1241              :   /* Scan for loop markers.  */
    1242       789432 :   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
    1243       385086 :        gsi_next (&gsi))
    1244              :     {
    1245       385086 :       gimple *stmt = gsi_stmt (gsi);
    1246              : 
    1247       385086 :       if (!is_gimple_call (stmt))
    1248       220759 :         continue;
    1249              : 
    1250       169106 :       gcall *call = as_a <gcall *> (stmt);
    1251              : 
    1252              :       /* If this is a routine, make a dummy loop for it.  */
    1253       169106 :       if (tree decl = gimple_call_fndecl (call))
    1254         4777 :         if (tree attrs = oacc_get_fn_attrib (decl))
    1255              :           {
    1256         1201 :             gcc_assert (!marker);
    1257         1201 :             new_oacc_loop_routine (loop, call, decl, attrs);
    1258              :           }
    1259              : 
    1260       169106 :       if (!gimple_call_internal_p (call))
    1261         4779 :         continue;
    1262              : 
    1263       164327 :       switch (gimple_call_internal_fn (call))
    1264              :         {
    1265              :         default:
    1266              :           break;
    1267              : 
    1268        46978 :         case IFN_GOACC_LOOP:
    1269        46978 :         case IFN_GOACC_TILE:
    1270              :           /* Record the abstraction function, so we can manipulate it
    1271              :              later.  */
    1272        46978 :           loop->ifns.safe_push (call);
    1273        46978 :           break;
    1274              : 
    1275        85483 :         case IFN_UNIQUE:
    1276        85483 :           enum ifn_unique_kind kind
    1277        85483 :             = (enum ifn_unique_kind) (TREE_INT_CST_LOW
    1278        85483 :                                       (gimple_call_arg (call, 0)));
    1279        85483 :           if (kind == IFN_UNIQUE_OACC_HEAD_MARK
    1280        85483 :               || kind == IFN_UNIQUE_OACC_TAIL_MARK)
    1281              :             {
    1282        52246 :               if (gimple_call_num_args (call) == 2)
    1283              :                 {
    1284        19268 :                   gcc_assert (marker && !remaining);
    1285        19268 :                   marker = 0;
    1286        19268 :                   if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
    1287        19268 :                     loop = finish_oacc_loop (loop);
    1288              :                   else
    1289         9634 :                     loop->head_end = call;
    1290              :                 }
    1291              :               else
    1292              :                 {
    1293        32978 :                   int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
    1294              : 
    1295        32978 :                   if (!marker)
    1296              :                     {
    1297        19268 :                       if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
    1298         9634 :                         loop = new_oacc_loop (loop, call);
    1299              :                       remaining = count;
    1300              :                     }
    1301        32978 :                   gcc_assert (count == remaining);
    1302        32978 :                   if (remaining)
    1303              :                     {
    1304        32978 :                       remaining--;
    1305        32978 :                       if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
    1306        16489 :                         loop->heads[marker] = call;
    1307              :                       else
    1308        16489 :                         loop->tails[remaining] = call;
    1309              :                     }
    1310        32978 :                   marker++;
    1311              :                 }
    1312              :             }
    1313              :         }
    1314              :     }
    1315       202173 :   if (remaining || marker)
    1316              :     {
    1317        66185 :       bb = single_succ (bb);
    1318        66185 :       gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
    1319        66185 :       goto follow;
    1320              :     }
    1321              : 
    1322              :   /* Walk successor blocks.  */
    1323       135988 :   edge e;
    1324       135988 :   edge_iterator ei;
    1325              : 
    1326       302196 :   FOR_EACH_EDGE (e, ei, bb->succs)
    1327       166208 :     oacc_loop_discover_walk (loop, e->dest);
    1328              : }
    1329              : 
    1330              : /* LOOP is the first sibling.  Reverse the order in place and return
    1331              :    the new first sibling.  Recurse to child loops.  */
    1332              : 
    1333              : static oacc_loop *
    1334        18517 : oacc_loop_sibling_nreverse (oacc_loop *loop)
    1335              : {
    1336        18517 :   oacc_loop *last = NULL;
    1337        20711 :   do
    1338              :     {
    1339        20711 :       if (loop->child)
    1340         8641 :         loop->child = oacc_loop_sibling_nreverse (loop->child);
    1341              : 
    1342        20711 :       oacc_loop *next = loop->sibling;
    1343        20711 :       loop->sibling = last;
    1344        20711 :       last = loop;
    1345        20711 :       loop = next;
    1346              :     }
    1347        20711 :   while (loop);
    1348              : 
    1349        18517 :   return last;
    1350              : }
    1351              : 
    1352              : /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
    1353              :    the current function.  */
    1354              : 
    1355              : static oacc_loop *
    1356         9876 : oacc_loop_discovery ()
    1357              : {
    1358              :   /* Clear basic block flags, in particular BB_VISITED which we're going to use
    1359              :      in the following.  */
    1360         9876 :   clear_bb_flags ();
    1361              : 
    1362         9876 :   oacc_loop *top = new_oacc_loop_outer (current_function_decl);
    1363         9876 :   oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
    1364              : 
    1365              :   /* The siblings were constructed in reverse order, reverse them so
    1366              :      that diagnostics come out in an unsurprising order.  */
    1367         9876 :   top = oacc_loop_sibling_nreverse (top);
    1368              : 
    1369         9876 :   return top;
    1370              : }
    1371              : 
    1372              : /* Transform the abstract internal function markers starting at FROM
    1373              :    to be for partitioning level LEVEL.  Stop when we meet another HEAD
    1374              :    or TAIL  marker.  */
    1375              : 
    1376              : static void
    1377        25614 : oacc_loop_xform_head_tail (gcall *from, int level)
    1378              : {
    1379        25614 :   enum ifn_unique_kind kind
    1380        25614 :     = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
    1381        25614 :   tree replacement = build_int_cst (unsigned_type_node, level);
    1382              : 
    1383        25614 :   for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
    1384              :     {
    1385       107435 :       gimple *stmt = gsi_stmt (gsi);
    1386              : 
    1387       107435 :       if (gimple_call_internal_p (stmt, IFN_UNIQUE))
    1388              :         {
    1389        77068 :           enum ifn_unique_kind k
    1390              :             = ((enum ifn_unique_kind)
    1391        77068 :                TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
    1392              : 
    1393        77068 :           if (k == IFN_UNIQUE_OACC_FORK
    1394        77068 :               || k == IFN_UNIQUE_OACC_JOIN
    1395        77068 :               || k == IFN_UNIQUE_OACC_PRIVATE)
    1396        25840 :             *gimple_call_arg_ptr (stmt, 2) = replacement;
    1397        51228 :           else if (k == kind && stmt != from)
    1398              :             break;
    1399              :         }
    1400        30367 :       else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
    1401        19228 :         *gimple_call_arg_ptr (stmt, 3) = replacement;
    1402        81821 :       update_stmt (stmt);
    1403              : 
    1404        81821 :       gsi_next (&gsi);
    1405       133275 :       while (gsi_end_p (gsi))
    1406       102908 :         gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
    1407              :     }
    1408        25614 : }
    1409              : 
    1410              : /* Process the discovered OpenACC loops, setting the correct
    1411              :    partitioning level etc.  */
    1412              : 
    1413              : static void
    1414        20711 : oacc_loop_process (oacc_loop *loop, int fn_level)
    1415              : {
    1416        20711 :   if (loop->child)
    1417         8641 :     oacc_loop_process (loop->child, fn_level);
    1418              : 
    1419        20711 :   if (loop->mask && !loop->routine)
    1420              :     {
    1421         8645 :       int ix;
    1422         8645 :       tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
    1423         8645 :       tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
    1424         8645 :       tree chunk_arg = loop->chunk_size;
    1425         8645 :       gcall *call;
    1426              : 
    1427        43708 :       for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
    1428              :         {
    1429        35063 :           switch (gimple_call_internal_fn (call))
    1430              :             {
    1431        34857 :             case IFN_GOACC_LOOP:
    1432        34857 :               {
    1433        34857 :                 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
    1434        69335 :                 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
    1435        34857 :                 if (!is_e)
    1436        34478 :                   gimple_call_set_arg (call, 4, chunk_arg);
    1437              :               }
    1438              :               break;
    1439              : 
    1440          206 :             case IFN_GOACC_TILE:
    1441          206 :               gimple_call_set_arg (call, 3, mask_arg);
    1442          206 :               gimple_call_set_arg (call, 4, e_mask_arg);
    1443          206 :               break;
    1444              : 
    1445            0 :             default:
    1446            0 :               gcc_unreachable ();
    1447              :             }
    1448        35063 :           update_stmt (call);
    1449              :         }
    1450              : 
    1451         8645 :       unsigned dim = GOMP_DIM_GANG;
    1452         8645 :       unsigned mask = loop->mask | loop->e_mask;
    1453        21452 :       for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
    1454              :         {
    1455        25113 :           while (!(GOMP_DIM_MASK (dim) & mask))
    1456        12306 :             dim++;
    1457              : 
    1458        12807 :           oacc_loop_xform_head_tail (loop->heads[ix], dim);
    1459        12807 :           oacc_loop_xform_head_tail (loop->tails[ix], dim);
    1460              : 
    1461        12807 :           mask ^= GOMP_DIM_MASK (dim);
    1462              :         }
    1463              :     }
    1464              : 
    1465        20711 :   if (loop->sibling)
    1466         2194 :     oacc_loop_process (loop->sibling, fn_level);
    1467              : 
    1468              : 
    1469              :   /* OpenACC 2.6, 2.9.11. "reduction clause" places a restriction such that
    1470              :      "The 'reduction' clause may not be specified on an orphaned 'loop'
    1471              :      construct with the 'gang' clause, or on an orphaned 'loop' construct that
    1472              :      will generate gang parallelism in a procedure that is compiled with the
    1473              :      'routine gang' clause."  */
    1474        20711 :   if (fn_level == GOMP_DIM_GANG
    1475          624 :       && (loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
    1476          209 :       && (loop->flags & OLF_REDUCTION))
    1477          106 :     error_at (loop->loc,
    1478              :               "gang reduction on an orphan loop");
    1479        20711 : }
    1480              : 
    1481              : /* Walk the OpenACC loop heirarchy checking and assigning the
    1482              :    programmer-specified partitionings.  OUTER_MASK is the partitioning
    1483              :    this loop is contained within.  Return mask of partitioning
    1484              :    encountered.  If any auto loops are discovered, set GOMP_DIM_MAX
    1485              :    bit.  */
    1486              : 
    1487              : static unsigned
    1488        20711 : oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
    1489              : {
    1490        20711 :   unsigned this_mask = loop->mask;
    1491        20711 :   unsigned mask_all = 0;
    1492        20711 :   bool noisy = true;
    1493              : 
    1494              : #ifdef ACCEL_COMPILER
    1495              :   /* When device_type is supported, we want the device compiler to be
    1496              :      noisy, if the loop parameters are device_type-specific.  */
    1497              :   noisy = false;
    1498              : #endif
    1499              : 
    1500        20711 :   if (!loop->routine)
    1501              :     {
    1502        19510 :       bool auto_par = (loop->flags & OLF_AUTO) != 0;
    1503        19510 :       bool seq_par = (loop->flags & OLF_SEQ) != 0;
    1504        19510 :       bool tiling = (loop->flags & OLF_TILE) != 0;
    1505              : 
    1506        19510 :       this_mask = ((loop->flags >> OLF_DIM_BASE)
    1507              :                    & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
    1508              : 
    1509              :       /* Apply auto partitioning if this is a non-partitioned regular
    1510              :          loop, or (no more than) single axis tiled loop.  */
    1511        39020 :       bool maybe_auto
    1512        19510 :         = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
    1513              : 
    1514        19510 :       if ((this_mask != 0) + auto_par + seq_par > 1)
    1515              :         {
    1516          170 :           if (noisy)
    1517          250 :             error_at (loop->loc,
    1518              :                       seq_par
    1519              :                       ? G_("%<seq%> overrides other OpenACC loop specifiers")
    1520              :                       : G_("%<auto%> conflicts with other OpenACC loop "
    1521              :                            "specifiers"));
    1522          170 :           maybe_auto = false;
    1523          170 :           loop->flags &= ~OLF_AUTO;
    1524          170 :           if (seq_par)
    1525              :             {
    1526           90 :               loop->flags
    1527           90 :                 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
    1528           90 :               this_mask = 0;
    1529              :             }
    1530              :         }
    1531              : 
    1532        19430 :       if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
    1533              :         {
    1534         5699 :           loop->flags |= OLF_AUTO;
    1535         5699 :           mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
    1536              :         }
    1537              :     }
    1538              : 
    1539        20711 :   if (this_mask & outer_mask)
    1540              :     {
    1541          248 :       const oacc_loop *outer;
    1542          350 :       for (outer = loop->parent; outer; outer = outer->parent)
    1543          248 :         if ((outer->mask | outer->e_mask) & this_mask)
    1544              :           break;
    1545              : 
    1546          248 :       if (noisy)
    1547              :         {
    1548          248 :           if (outer)
    1549              :             {
    1550          146 :               error_at (loop->loc,
    1551          146 :                         loop->routine
    1552              :                         ? G_("routine call uses same OpenACC parallelism"
    1553              :                              " as containing loop")
    1554              :                         : G_("inner loop uses same OpenACC parallelism"
    1555              :                              " as containing loop"));
    1556          146 :               inform (outer->loc, "containing loop here");
    1557              :             }
    1558              :           else
    1559          102 :             error_at (loop->loc,
    1560          102 :                       loop->routine
    1561              :                       ? G_("routine call uses OpenACC parallelism disallowed"
    1562              :                            " by containing routine")
    1563              :                       : G_("loop uses OpenACC parallelism disallowed"
    1564              :                            " by containing routine"));
    1565              : 
    1566          248 :           if (loop->routine)
    1567          154 :             inform (DECL_SOURCE_LOCATION (loop->routine),
    1568              :                     "routine %qD declared here", loop->routine);
    1569              :         }
    1570          248 :       this_mask &= ~outer_mask;
    1571              :     }
    1572              :   else
    1573              :     {
    1574        20463 :       unsigned outermost = least_bit_hwi (this_mask);
    1575              : 
    1576        20463 :       if (outermost && outermost <= outer_mask)
    1577              :         {
    1578           40 :           if (noisy)
    1579              :             {
    1580           40 :               error_at (loop->loc,
    1581              :                         "incorrectly nested OpenACC loop parallelism");
    1582              : 
    1583           40 :               const oacc_loop *outer;
    1584           40 :               for (outer = loop->parent;
    1585           40 :                    outer->flags && outer->flags < outermost;
    1586            0 :                    outer = outer->parent)
    1587            0 :                 continue;
    1588           40 :               inform (outer->loc, "containing loop here");
    1589            0 :             }
    1590              : 
    1591           40 :           this_mask &= ~outermost;
    1592              :         }
    1593              :     }
    1594              : 
    1595        20711 :   mask_all |= this_mask;
    1596              : 
    1597        20711 :   if (loop->flags & OLF_TILE)
    1598              :     {
    1599              :       /* When tiling, vector goes to the element loop, and failing
    1600              :          that we put worker there.  The std doesn't contemplate
    1601              :          specifying all three.  We choose to put worker and vector on
    1602              :          the element loops in that case.  */
    1603          136 :       unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
    1604          136 :       if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
    1605          120 :         this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
    1606              : 
    1607          136 :       loop->e_mask = this_e_mask;
    1608          136 :       this_mask ^= this_e_mask;
    1609              :     }
    1610              : 
    1611        20711 :   loop->mask = this_mask;
    1612              : 
    1613        20711 :   if (dump_file)
    1614          222 :     fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
    1615          444 :              LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
    1616              :              loop->mask, loop->e_mask);
    1617              : 
    1618        20711 :   if (loop->child)
    1619              :     {
    1620         8641 :       unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
    1621         8641 :       loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
    1622         8641 :       mask_all |= loop->inner;
    1623              :     }
    1624              : 
    1625        20711 :   if (loop->sibling)
    1626         2194 :     mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
    1627              : 
    1628        20711 :   return mask_all;
    1629              : }
    1630              : 
    1631              : /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
    1632              :    OUTER_MASK is the partitioning this loop is contained within.
    1633              :    OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
    1634              :    Return the cumulative partitioning used by this loop, siblings and
    1635              :    children.  */
    1636              : 
    1637              : static unsigned
    1638        10192 : oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
    1639              :                            bool outer_assign)
    1640              : {
    1641        10192 :   bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
    1642        10192 :   bool noisy = true;
    1643        10192 :   bool tiling = loop->flags & OLF_TILE;
    1644              : 
    1645              : #ifdef ACCEL_COMPILER
    1646              :   /* When device_type is supported, we want the device compiler to be
    1647              :      noisy, if the loop parameters are device_type-specific.  */
    1648              :   noisy = false;
    1649              : #endif
    1650              : 
    1651        10192 :   if (assign && (!outer_assign || loop->inner))
    1652              :     {
    1653              :       /* Allocate outermost and non-innermost loops at the outermost
    1654              :          non-innermost available level.  */
    1655              :       unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
    1656              : 
    1657              :       /* Find the first outermost available partition. */
    1658         6659 :       while (this_mask <= outer_mask)
    1659         1923 :         this_mask <<= 1;
    1660              : 
    1661              :       /* Grab two axes if tiling, and we've not assigned anything  */
    1662         4736 :       if (tiling && !(loop->mask | loop->e_mask))
    1663           94 :         this_mask |= this_mask << 1;
    1664              : 
    1665              :       /* Prohibit the innermost partitioning at the moment.  */
    1666         4736 :       this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
    1667              : 
    1668              :       /* Don't use any dimension explicitly claimed by an inner loop. */
    1669         4736 :       this_mask &= ~loop->inner;
    1670              : 
    1671         4736 :       if (tiling && !loop->e_mask)
    1672              :         {
    1673              :           /* If we got two axes, allocate the inner one to the element
    1674              :              loop.  */
    1675           98 :           loop->e_mask = this_mask & (this_mask << 1);
    1676           98 :           this_mask ^= loop->e_mask;
    1677              :         }
    1678              : 
    1679         4736 :       loop->mask |= this_mask;
    1680              :     }
    1681              : 
    1682        10192 :   if (loop->child)
    1683              :     {
    1684         5360 :       unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
    1685         5360 :       loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
    1686         5360 :                                                outer_assign | assign);
    1687              :     }
    1688              : 
    1689        10192 :   if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
    1690              :     {
    1691              :       /* Allocate the loop at the innermost available level.  Note
    1692              :          that we do this even if we already assigned this loop the
    1693              :          outermost available level above.  That way we'll partition
    1694              :          this along 2 axes, if they are available.  */
    1695         5044 :       unsigned this_mask = 0;
    1696              : 
    1697              :       /* Determine the outermost partitioning used within this loop.  */
    1698         5044 :       this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
    1699         5044 :       this_mask = least_bit_hwi (this_mask);
    1700              : 
    1701              :       /* Pick the partitioning just inside that one.  */
    1702         5044 :       this_mask >>= 1;
    1703              : 
    1704              :       /* And avoid picking one use by an outer loop.  */
    1705         5044 :       this_mask &= ~outer_mask;
    1706              : 
    1707              :       /* If tiling and we failed completely above, grab the next one
    1708              :          too.  Making sure it doesn't hit an outer loop.  */
    1709         5044 :       if (tiling)
    1710              :         {
    1711          110 :           this_mask &= ~(loop->e_mask | loop->mask);
    1712          110 :           unsigned tile_mask = ((this_mask >> 1)
    1713          110 :                                 & ~(outer_mask | loop->e_mask | loop->mask));
    1714              : 
    1715          110 :           if (tile_mask || loop->mask)
    1716              :             {
    1717          102 :               loop->e_mask |= this_mask;
    1718          102 :               this_mask = tile_mask;
    1719              :             }
    1720          110 :           if (!loop->e_mask && noisy)
    1721            8 :             warning_at (loop->loc, 0,
    1722              :                         "insufficient partitioning available"
    1723              :                         " to parallelize element loop");
    1724              :         }
    1725              : 
    1726         5044 :       loop->mask |= this_mask;
    1727         5044 :       if (!loop->mask && noisy)
    1728         1078 :         warning_at (loop->loc, 0,
    1729              :                     tiling
    1730              :                     ? G_("insufficient partitioning available"
    1731              :                          " to parallelize tile loop")
    1732              :                     : G_("insufficient partitioning available"
    1733              :                          " to parallelize loop"));
    1734              :     }
    1735              : 
    1736         5699 :   if (assign && dump_file)
    1737           41 :     fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
    1738           82 :              LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
    1739              :              loop->mask, loop->e_mask);
    1740              : 
    1741        10192 :   unsigned inner_mask = 0;
    1742              : 
    1743        10192 :   if (loop->sibling)
    1744         1714 :     inner_mask |= oacc_loop_auto_partitions (loop->sibling,
    1745              :                                              outer_mask, outer_assign);
    1746              : 
    1747        10192 :   inner_mask |= loop->inner | loop->mask | loop->e_mask;
    1748              : 
    1749        10192 :   return inner_mask;
    1750              : }
    1751              : 
    1752              : /* Walk the OpenACC loop heirarchy to check and assign partitioning
    1753              :    axes.  Return mask of partitioning.  */
    1754              : 
    1755              : static unsigned
    1756         9876 : oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
    1757              : {
    1758         9876 :   unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
    1759              : 
    1760         9876 :   if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
    1761              :     {
    1762         3118 :       mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
    1763         3118 :       mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
    1764              :     }
    1765         9876 :   return mask_all;
    1766              : }
    1767              : 
    1768              : /* Default fork/join early expander.  Delete the function calls if
    1769              :    there is no RTL expander.  */
    1770              : 
    1771              : bool
    1772        25614 : default_goacc_fork_join (gcall *ARG_UNUSED (call),
    1773              :                          const int *ARG_UNUSED (dims), bool is_fork)
    1774              : {
    1775        25614 :   if (is_fork)
    1776        12807 :     return targetm.have_oacc_fork ();
    1777              :   else
    1778        12807 :     return targetm.have_oacc_join ();
    1779              : }
    1780              : 
    1781              : /* Default goacc.reduction early expander.
    1782              : 
    1783              :    LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
    1784              :    If RES_PTR is not integer-zerop:
    1785              :        SETUP - emit 'LHS = *RES_PTR', LHS = NULL
    1786              :        TEARDOWN - emit '*RES_PTR = VAR'
    1787              :    If LHS is not NULL
    1788              :        emit 'LHS = VAR'   */
    1789              : 
    1790              : void
    1791        30884 : default_goacc_reduction (gcall *call)
    1792              : {
    1793        30884 :   unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
    1794        30884 :   gimple_stmt_iterator gsi = gsi_for_stmt (call);
    1795        30884 :   tree lhs = gimple_call_lhs (call);
    1796        30884 :   tree var = gimple_call_arg (call, 2);
    1797        30884 :   gimple_seq seq = NULL;
    1798              : 
    1799        30884 :   if (code == IFN_GOACC_REDUCTION_SETUP
    1800        30884 :       || code == IFN_GOACC_REDUCTION_TEARDOWN)
    1801              :     {
    1802              :       /* Setup and Teardown need to copy from/to the receiver object,
    1803              :          if there is one.  */
    1804        15442 :       tree ref_to_res = gimple_call_arg (call, 1);
    1805              : 
    1806        15442 :       if (!integer_zerop (ref_to_res))
    1807              :         {
    1808         5086 :           tree dst = build_simple_mem_ref (ref_to_res);
    1809         5086 :           tree src = var;
    1810              : 
    1811         5086 :           if (code == IFN_GOACC_REDUCTION_SETUP)
    1812              :             {
    1813         2543 :               src = dst;
    1814         2543 :               dst = lhs;
    1815         2543 :               lhs = NULL;
    1816              :             }
    1817         5086 :           gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
    1818              :         }
    1819              :     }
    1820              : 
    1821              :   /* Copy VAR to LHS, if there is an LHS.  */
    1822        30884 :   if (lhs)
    1823        26734 :     gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
    1824              : 
    1825        30884 :   gsi_replace_with_seq (&gsi, seq, true);
    1826        30884 : }
    1827              : 
    1828              : struct var_decl_rewrite_info
    1829              : {
    1830              :   gimple *stmt;
    1831              :   hash_map<tree, tree> *adjusted_vars;
    1832              :   bool avoid_pointer_conversion;
    1833              :   bool modified;
    1834              : };
    1835              : 
    1836              : /* Helper function for execute_oacc_device_lower.  Rewrite VAR_DECLs (by
    1837              :    themselves or wrapped in various other nodes) according to ADJUSTED_VARS in
    1838              :    the var_decl_rewrite_info pointed to via DATA.  Used as part of coercing
    1839              :    gang-private variables in OpenACC offload regions to reside in GPU shared
    1840              :    memory.  */
    1841              : 
    1842              : static tree
    1843            0 : oacc_rewrite_var_decl (tree *tp, int *walk_subtrees, void *data)
    1844              : {
    1845            0 :   walk_stmt_info *wi = (walk_stmt_info *) data;
    1846            0 :   var_decl_rewrite_info *info = (var_decl_rewrite_info *) wi->info;
    1847              : 
    1848            0 :   if (TREE_CODE (*tp) == ADDR_EXPR)
    1849              :     {
    1850            0 :       tree arg = TREE_OPERAND (*tp, 0);
    1851            0 :       tree *new_arg = info->adjusted_vars->get (arg);
    1852              : 
    1853            0 :       if (new_arg)
    1854              :         {
    1855            0 :           if (info->avoid_pointer_conversion)
    1856              :             {
    1857            0 :               *tp = build_fold_addr_expr (*new_arg);
    1858            0 :               info->modified = true;
    1859            0 :               *walk_subtrees = 0;
    1860              :             }
    1861              :           else
    1862              :             {
    1863            0 :               gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
    1864            0 :               tree repl = build_fold_addr_expr (*new_arg);
    1865            0 :               gimple *stmt1
    1866            0 :                 = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
    1867            0 :               tree conv = convert_to_pointer (TREE_TYPE (*tp),
    1868              :                                               gimple_assign_lhs (stmt1));
    1869            0 :               gimple *stmt2
    1870            0 :                 = gimple_build_assign (make_ssa_name (TREE_TYPE (*tp)), conv);
    1871            0 :               gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
    1872            0 :               gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
    1873            0 :               *tp = gimple_assign_lhs (stmt2);
    1874            0 :               info->modified = true;
    1875            0 :               *walk_subtrees = 0;
    1876              :             }
    1877              :         }
    1878              :     }
    1879            0 :   else if (TREE_CODE (*tp) == COMPONENT_REF || TREE_CODE (*tp) == ARRAY_REF)
    1880              :     {
    1881            0 :       tree *base = &TREE_OPERAND (*tp, 0);
    1882              : 
    1883            0 :       while (TREE_CODE (*base) == COMPONENT_REF
    1884            0 :              || TREE_CODE (*base) == ARRAY_REF)
    1885            0 :         base = &TREE_OPERAND (*base, 0);
    1886              : 
    1887            0 :       if (TREE_CODE (*base) != VAR_DECL)
    1888              :         return NULL;
    1889              : 
    1890            0 :       tree *new_decl = info->adjusted_vars->get (*base);
    1891            0 :       if (!new_decl)
    1892              :         return NULL;
    1893              : 
    1894            0 :       int base_quals = TYPE_QUALS (TREE_TYPE (*new_decl));
    1895            0 :       tree field = TREE_OPERAND (*tp, 1);
    1896              : 
    1897              :       /* Adjust the type of the field.  */
    1898            0 :       int field_quals = TYPE_QUALS (TREE_TYPE (field));
    1899            0 :       if (TREE_CODE (field) == FIELD_DECL && field_quals != base_quals)
    1900              :         {
    1901            0 :           tree *field_type = &TREE_TYPE (field);
    1902            0 :           while (TREE_CODE (*field_type) == ARRAY_TYPE)
    1903            0 :             field_type = &TREE_TYPE (*field_type);
    1904            0 :           field_quals |= base_quals;
    1905            0 :           *field_type = build_qualified_type (*field_type, field_quals);
    1906              :         }
    1907              : 
    1908              :       /* Adjust the type of the component ref itself.  */
    1909            0 :       tree comp_type = TREE_TYPE (*tp);
    1910            0 :       int comp_quals = TYPE_QUALS (comp_type);
    1911            0 :       if (TREE_CODE (*tp) == COMPONENT_REF && comp_quals != base_quals)
    1912              :         {
    1913            0 :           comp_quals |= base_quals;
    1914            0 :           TREE_TYPE (*tp)
    1915            0 :             = build_qualified_type (comp_type, comp_quals);
    1916              :         }
    1917              : 
    1918            0 :       *base = *new_decl;
    1919            0 :       info->modified = true;
    1920            0 :     }
    1921            0 :   else if (VAR_P (*tp))
    1922              :     {
    1923            0 :       tree *new_decl = info->adjusted_vars->get (*tp);
    1924            0 :       if (new_decl)
    1925              :         {
    1926            0 :           *tp = *new_decl;
    1927            0 :           info->modified = true;
    1928              :         }
    1929              :     }
    1930              : 
    1931              :   return NULL_TREE;
    1932              : }
    1933              : 
    1934              : /* Return TRUE if CALL is a call to a builtin atomic/sync operation.  */
    1935              : 
    1936              : static bool
    1937            0 : is_sync_builtin_call (gcall *call)
    1938              : {
    1939            0 :   tree callee = gimple_call_fndecl (call);
    1940              : 
    1941            0 :   if (callee != NULL_TREE
    1942            0 :       && gimple_call_builtin_p (call, BUILT_IN_NORMAL))
    1943            0 :     switch (DECL_FUNCTION_CODE (callee))
    1944              :       {
    1945              : #undef DEF_SYNC_BUILTIN
    1946              : #define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
    1947              : #include "sync-builtins.def"
    1948              : #undef DEF_SYNC_BUILTIN
    1949              :         return true;
    1950              : 
    1951              :       default:
    1952              :         ;
    1953              :       }
    1954              : 
    1955              :   return false;
    1956              : }
    1957              : 
    1958              : /* Main entry point for oacc transformations which run on the device
    1959              :    compiler after LTO, so we know what the target device is at this
    1960              :    point (including the host fallback).  */
    1961              : 
    1962              : static unsigned int
    1963        15267 : execute_oacc_loop_designation ()
    1964              : {
    1965        15267 :   tree attrs = oacc_get_fn_attrib (current_function_decl);
    1966              : 
    1967        15267 :   if (!attrs)
    1968              :     /* Not an offloaded function.  */
    1969              :     return 0;
    1970              : 
    1971              :   /* Parse the default dim argument exactly once.  */
    1972         9938 :   if ((const void *)flag_openacc_dims != &flag_openacc_dims)
    1973              :     {
    1974         2279 :       oacc_parse_default_dims (flag_openacc_dims);
    1975         2279 :       flag_openacc_dims = (char *)&flag_openacc_dims;
    1976              :     }
    1977              : 
    1978         9938 :   bool is_oacc_parallel
    1979         9938 :     = (lookup_attribute ("oacc parallel",
    1980         9938 :                          DECL_ATTRIBUTES (current_function_decl)) != NULL);
    1981         9938 :   bool is_oacc_kernels
    1982         9938 :     = (lookup_attribute ("oacc kernels",
    1983         9938 :                          DECL_ATTRIBUTES (current_function_decl)) != NULL);
    1984         9938 :   bool is_oacc_serial
    1985         9938 :     = (lookup_attribute ("oacc serial",
    1986         9938 :                          DECL_ATTRIBUTES (current_function_decl)) != NULL);
    1987         9938 :   bool is_oacc_parallel_kernels_parallelized
    1988         9938 :     = (lookup_attribute ("oacc parallel_kernels_parallelized",
    1989         9938 :                          DECL_ATTRIBUTES (current_function_decl)) != NULL);
    1990         9938 :   bool is_oacc_parallel_kernels_gang_single
    1991         9938 :     = (lookup_attribute ("oacc parallel_kernels_gang_single",
    1992         9938 :                          DECL_ATTRIBUTES (current_function_decl)) != NULL);
    1993         9938 :   int fn_level = oacc_fn_attrib_level (attrs);
    1994         9938 :   bool is_oacc_routine = (fn_level >= 0);
    1995         9938 :   gcc_checking_assert (is_oacc_parallel
    1996              :                        + is_oacc_kernels
    1997              :                        + is_oacc_serial
    1998              :                        + is_oacc_parallel_kernels_parallelized
    1999              :                        + is_oacc_parallel_kernels_gang_single
    2000              :                        + is_oacc_routine
    2001              :                        == 1);
    2002              : 
    2003         9938 :   bool is_oacc_kernels_parallelized
    2004         9938 :     = (lookup_attribute ("oacc kernels parallelized",
    2005         9938 :                          DECL_ATTRIBUTES (current_function_decl)) != NULL);
    2006         9938 :   if (is_oacc_kernels_parallelized)
    2007          386 :     gcc_checking_assert (is_oacc_kernels);
    2008              : 
    2009         9938 :   if (dump_file)
    2010              :     {
    2011          154 :       if (is_oacc_parallel)
    2012           34 :         fprintf (dump_file, "Function is OpenACC parallel offload\n");
    2013          120 :       else if (is_oacc_kernels)
    2014           66 :         fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
    2015              :                  (is_oacc_kernels_parallelized
    2016              :                   ? "parallelized" : "unparallelized"));
    2017           82 :       else if (is_oacc_serial)
    2018           26 :         fprintf (dump_file, "Function is OpenACC serial offload\n");
    2019           56 :       else if (is_oacc_parallel_kernels_parallelized)
    2020            0 :         fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
    2021              :                  "parallel_kernels_parallelized");
    2022           56 :       else if (is_oacc_parallel_kernels_gang_single)
    2023            0 :         fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
    2024              :                  "parallel_kernels_gang_single");
    2025           56 :       else if (is_oacc_routine)
    2026           56 :         fprintf (dump_file, "Function is OpenACC routine level %d\n",
    2027              :                  fn_level);
    2028              :       else
    2029            0 :         gcc_unreachable ();
    2030              :     }
    2031              : 
    2032              :   /* This doesn't belong into 'pass_oacc_loop_designation' conceptually, but
    2033              :      it's a convenient place, so...  */
    2034         9938 :   if (is_oacc_routine)
    2035              :     {
    2036          554 :       tree attr = lookup_attribute ("omp declare target",
    2037          554 :                                     DECL_ATTRIBUTES (current_function_decl));
    2038          554 :       gcc_checking_assert (attr);
    2039          554 :       tree clauses = TREE_VALUE (attr);
    2040          554 :       gcc_checking_assert (clauses);
    2041              : 
    2042              :       /* Should this OpenACC routine be discarded?  */
    2043          554 :       bool discard = false;
    2044              : 
    2045          554 :       tree clause_nohost = omp_find_clause (clauses, OMP_CLAUSE_NOHOST);
    2046          554 :       if (dump_file)
    2047           56 :         fprintf (dump_file,
    2048              :                  "OpenACC routine '%s' %s '%s' clause.\n",
    2049           56 :                  lang_hooks.decl_printable_name (current_function_decl, 2),
    2050              :                  clause_nohost ? "has" : "doesn't have",
    2051           56 :                  omp_clause_code_name[OMP_CLAUSE_NOHOST]);
    2052              :       /* Host compiler, 'nohost' clause?  */
    2053              : #ifndef ACCEL_COMPILER
    2054          554 :       if (clause_nohost)
    2055           62 :         discard = true;
    2056              : #endif
    2057              : 
    2058          554 :       if (dump_file)
    2059          112 :         fprintf (dump_file,
    2060              :                  "OpenACC routine '%s' %sdiscarded.\n",
    2061           56 :                  lang_hooks.decl_printable_name (current_function_decl, 2),
    2062              :                  discard ? "" : "not ");
    2063          554 :       if (discard)
    2064              :         {
    2065           62 :           TREE_ASM_WRITTEN (current_function_decl) = 1;
    2066           62 :           return TODO_discard_function;
    2067              :         }
    2068              :     }
    2069              : 
    2070              :   /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
    2071              :      kernels, so remove the parallelism dimensions function attributes
    2072              :      potentially set earlier on.  */
    2073         9876 :   if (is_oacc_kernels && !is_oacc_kernels_parallelized)
    2074              :     {
    2075         1261 :       oacc_set_fn_attrib (current_function_decl, NULL, NULL);
    2076         1261 :       attrs = oacc_get_fn_attrib (current_function_decl);
    2077              :     }
    2078              : 
    2079              :   /* Discover, partition and process the loops.  */
    2080         9876 :   oacc_loop *loops = oacc_loop_discovery ();
    2081              : 
    2082         9876 :   unsigned outer_mask = 0;
    2083         9876 :   if (is_oacc_routine)
    2084          492 :     outer_mask = GOMP_DIM_MASK (fn_level) - 1;
    2085         9876 :   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
    2086              :   /* OpenACC kernels constructs are special: they currently don't use the
    2087              :      generic oacc_loop infrastructure and attribute/dimension processing.  */
    2088         9876 :   if (is_oacc_kernels && is_oacc_kernels_parallelized)
    2089              :     {
    2090              :       /* Parallelized OpenACC kernels constructs use gang parallelism.  See
    2091              :          also tree-parloops.cc:create_parallel_loop.  */
    2092          386 :       used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
    2093              :     }
    2094              : 
    2095         9876 :   int dims[GOMP_DIM_MAX];
    2096         9876 :   oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
    2097              : 
    2098         9876 :   if (dump_file)
    2099              :     {
    2100              :       const char *comma = "Compute dimensions [";
    2101          456 :       for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
    2102          342 :         fprintf (dump_file, "%s%d", comma, dims[ix]);
    2103          114 :       fprintf (dump_file, "]\n");
    2104              :     }
    2105              : 
    2106              :   /* Verify that for OpenACC 'kernels' decomposed "gang-single" parts we launch
    2107              :      a single gang only.  */
    2108         9876 :   if (is_oacc_parallel_kernels_gang_single)
    2109          109 :     gcc_checking_assert (dims[GOMP_DIM_GANG] == 1);
    2110              : 
    2111         9876 :   oacc_loop_process (loops, fn_level);
    2112         9876 :   if (dump_file)
    2113              :     {
    2114          114 :       fprintf (dump_file, "OpenACC loops\n");
    2115          114 :       dump_oacc_loop (dump_file, loops, 0);
    2116          114 :       fprintf (dump_file, "\n");
    2117              :     }
    2118         9876 :   if (dump_enabled_p ())
    2119              :     {
    2120         2280 :       oacc_loop *l = loops;
    2121              :       /* OpenACC kernels constructs are special: they currently don't use the
    2122              :          generic oacc_loop infrastructure.  */
    2123         2280 :       if (is_oacc_kernels)
    2124              :         {
    2125              :           /* Create a fake oacc_loop for diagnostic purposes.  */
    2126          633 :           l = new_oacc_loop_raw (NULL,
    2127          633 :                                  DECL_SOURCE_LOCATION (current_function_decl));
    2128          633 :           l->mask = used_mask;
    2129              :         }
    2130              :       else
    2131              :         {
    2132              :           /* Skip the outermost, dummy OpenACC loop  */
    2133         1647 :           l = l->child;
    2134              :         }
    2135         2280 :       if (l)
    2136         1763 :         inform_oacc_loop (l);
    2137         2280 :       if (is_oacc_kernels)
    2138          633 :         free_oacc_loop (l);
    2139              :     }
    2140              : 
    2141         9876 :   free_oacc_loop (loops);
    2142              : 
    2143         9876 :   return 0;
    2144              : }
    2145              : 
    2146              : static unsigned int
    2147        15205 : execute_oacc_device_lower ()
    2148              : {
    2149        15205 :   tree attrs = oacc_get_fn_attrib (current_function_decl);
    2150              : 
    2151        15205 :   if (!attrs)
    2152              :     /* Not an offloaded function.  */
    2153              :     return 0;
    2154              : 
    2155              :   int dims[GOMP_DIM_MAX];
    2156        39504 :   for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
    2157        29628 :     dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
    2158              : 
    2159         9876 :   hash_map<tree, tree> adjusted_vars;
    2160              : 
    2161              :   /* Now lower internal loop functions to target-specific code
    2162              :      sequences.  */
    2163         9876 :   basic_block bb;
    2164       185146 :   FOR_ALL_BB_FN (bb, cfun)
    2165       934104 :     for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
    2166              :       {
    2167       583564 :         gimple *stmt = gsi_stmt (gsi);
    2168       583564 :         if (!is_gimple_call (stmt))
    2169              :           {
    2170       383749 :             gsi_next (&gsi);
    2171       383749 :             continue;
    2172              :           }
    2173              : 
    2174       199815 :         gcall *call = as_a <gcall *> (stmt);
    2175       199815 :         if (!gimple_call_internal_p (call))
    2176              :           {
    2177         4779 :             gsi_next (&gsi);
    2178         4779 :             continue;
    2179              :           }
    2180              : 
    2181              :         /* Rewind to allow rescan.  */
    2182       195036 :         gsi_prev (&gsi);
    2183       195036 :         bool rescan = false, remove = false;
    2184       195036 :         enum  internal_fn ifn_code = gimple_call_internal_fn (call);
    2185              : 
    2186       195036 :         switch (ifn_code)
    2187              :           {
    2188              :           default: break;
    2189              : 
    2190          284 :           case IFN_GOACC_TILE:
    2191          284 :             oacc_xform_tile (call);
    2192          284 :             rescan = true;
    2193          284 :             break;
    2194              : 
    2195        46694 :           case IFN_GOACC_LOOP:
    2196        46694 :             oacc_xform_loop (call);
    2197        46694 :             rescan = true;
    2198        46694 :             break;
    2199              : 
    2200        30884 :           case IFN_GOACC_REDUCTION:
    2201              :             /* Mark the function for SSA renaming.  */
    2202        30884 :             mark_virtual_operands_for_renaming (cfun);
    2203              : 
    2204              :             /* If the level is -1, this ended up being an unused
    2205              :                axis.  Handle as a default.  */
    2206        30884 :             if (integer_minus_onep (gimple_call_arg (call, 3)))
    2207         8528 :               default_goacc_reduction (call);
    2208              :             else
    2209        22356 :               targetm.goacc.reduction (call);
    2210              :             rescan = true;
    2211              :             break;
    2212              : 
    2213        85483 :           case IFN_UNIQUE:
    2214        85483 :             {
    2215        85483 :               enum ifn_unique_kind kind
    2216              :                 = ((enum ifn_unique_kind)
    2217        85483 :                    TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
    2218              : 
    2219        85483 :               switch (kind)
    2220              :                 {
    2221              :                 default:
    2222              :                   break;
    2223              : 
    2224        32978 :                 case IFN_UNIQUE_OACC_FORK:
    2225        32978 :                 case IFN_UNIQUE_OACC_JOIN:
    2226        32978 :                   if (integer_minus_onep (gimple_call_arg (call, 2)))
    2227              :                     remove = true;
    2228        25614 :                   else if (!targetm.goacc.fork_join
    2229        25614 :                            (call, dims, kind == IFN_UNIQUE_OACC_FORK))
    2230        85483 :                     remove = true;
    2231              :                   break;
    2232              : 
    2233              :                 case IFN_UNIQUE_OACC_HEAD_MARK:
    2234              :                 case IFN_UNIQUE_OACC_TAIL_MARK:
    2235        85483 :                   remove = true;
    2236              :                   break;
    2237              : 
    2238          259 :                 case IFN_UNIQUE_OACC_PRIVATE:
    2239          259 :                   {
    2240          259 :                     dump_flags_t l_dump_flags
    2241          259 :                       = get_openacc_privatization_dump_flags ();
    2242              : 
    2243          259 :                     location_t loc = gimple_location (stmt);
    2244          259 :                     if (LOCATION_LOCUS (loc) == UNKNOWN_LOCATION)
    2245           30 :                       loc = DECL_SOURCE_LOCATION (current_function_decl);
    2246          259 :                     const dump_user_location_t d_u_loc
    2247          259 :                       = dump_user_location_t::from_location_t (loc);
    2248              : 
    2249          259 :                     HOST_WIDE_INT level
    2250          259 :                       = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
    2251          259 :                     gcc_checking_assert (level == -1
    2252              :                                          || (level >= 0
    2253              :                                              && level < GOMP_DIM_MAX));
    2254          339 :                     for (unsigned i = 3;
    2255          598 :                          i < gimple_call_num_args (call);
    2256              :                          i++)
    2257              :                       {
    2258          339 :                         static char const *const axes[] =
    2259              :                         /* Must be kept in sync with GOMP_DIM enumeration.  */
    2260              :                           { "gang", "worker", "vector" };
    2261              : 
    2262          339 :                         tree arg = gimple_call_arg (call, i);
    2263          339 :                         gcc_checking_assert (TREE_CODE (arg) == ADDR_EXPR);
    2264          339 :                         tree decl = TREE_OPERAND (arg, 0);
    2265          339 :                         if (dump_enabled_p ())
    2266              : /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
    2267              : #if __GNUC__ >= 10
    2268          318 : # pragma GCC diagnostic push
    2269          318 : # pragma GCC diagnostic ignored "-Wformat"
    2270              : #endif
    2271          318 :                           dump_printf_loc (l_dump_flags, d_u_loc,
    2272              :                                            "variable %<%T%> ought to be"
    2273              :                                            " adjusted for OpenACC"
    2274              :                                            " privatization level: %qs\n",
    2275              :                                            decl,
    2276              :                                            (level == -1
    2277              :                                             ? "UNKNOWN" : axes[level]));
    2278              : #if __GNUC__ >= 10
    2279          339 : # pragma GCC diagnostic pop
    2280              : #endif
    2281          339 :                         bool adjusted;
    2282          339 :                         if (level == -1)
    2283              :                           adjusted = false;
    2284          336 :                         else if (!targetm.goacc.adjust_private_decl)
    2285              :                           adjusted = false;
    2286            0 :                         else if (level == GOMP_DIM_VECTOR)
    2287              :                           {
    2288              :                             /* That's the default behavior.  */
    2289              :                             adjusted = true;
    2290              :                           }
    2291              :                         else
    2292              :                           {
    2293            0 :                             tree oldtype = TREE_TYPE (decl);
    2294            0 :                             tree newdecl
    2295            0 :                               = targetm.goacc.adjust_private_decl (loc, decl,
    2296            0 :                                                                    level);
    2297            0 :                             adjusted = (TREE_TYPE (newdecl) != oldtype
    2298            0 :                                         || newdecl != decl);
    2299            0 :                             if (adjusted)
    2300            0 :                               adjusted_vars.put (decl, newdecl);
    2301              :                           }
    2302            0 :                         if (adjusted
    2303            0 :                             && dump_enabled_p ())
    2304              : /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
    2305              : #if __GNUC__ >= 10
    2306            0 : # pragma GCC diagnostic push
    2307            0 : # pragma GCC diagnostic ignored "-Wformat"
    2308              : #endif
    2309            0 :                           dump_printf_loc (l_dump_flags, d_u_loc,
    2310              :                                            "variable %<%T%> adjusted for"
    2311              :                                            " OpenACC privatization level:"
    2312              :                                            " %qs\n",
    2313            0 :                                            decl, axes[level]);
    2314              : #if __GNUC__ >= 10
    2315          339 : # pragma GCC diagnostic pop
    2316              : #endif
    2317              :                       }
    2318          259 :                     remove = true;
    2319              :                   }
    2320          259 :                   break;
    2321              :                 }
    2322              :               break;
    2323              :             }
    2324              :           }
    2325              : 
    2326       195036 :         if (gsi_end_p (gsi))
    2327              :           /* We rewound past the beginning of the BB.  */
    2328       188824 :           gsi = gsi_start_bb (bb);
    2329              :         else
    2330              :           /* Undo the rewind.  */
    2331       100624 :           gsi_next (&gsi);
    2332              : 
    2333       195036 :         if (remove)
    2334              :           {
    2335       170966 :             if (gimple_vdef (call))
    2336        85483 :               replace_uses_by (gimple_vdef (call), gimple_vuse (call));
    2337        85483 :             if (gimple_call_lhs (call))
    2338              :               {
    2339              :                 /* Propagate the data dependency var.  */
    2340        79856 :                 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
    2341              :                                                    gimple_call_arg (call, 1));
    2342        79856 :                 gsi_replace (&gsi, ass,  false);
    2343              :               }
    2344              :             else
    2345         5627 :               gsi_remove (&gsi, true);
    2346              :           }
    2347       109553 :         else if (!rescan)
    2348              :           /* If not rescanning, advance over the call.  */
    2349        31691 :           gsi_next (&gsi);
    2350              :       }
    2351              : 
    2352              :   /* Regarding the OpenACC privatization level, we're currently only looking at
    2353              :      making the gang-private level work.  Regarding that, we have the following
    2354              :      configurations:
    2355              : 
    2356              :        - GCN offloading: 'targetm.goacc.adjust_private_decl' does the work (in
    2357              :          particular, change 'TREE_TYPE', etc.) and there is no
    2358              :          'targetm.goacc.expand_var_decl'.
    2359              : 
    2360              :        - nvptx offloading: 'targetm.goacc.adjust_private_decl' only sets a
    2361              :          marker and then 'targetm.goacc.expand_var_decl' does the work.
    2362              : 
    2363              :      Eventually (in particular, for worker-private level?), both
    2364              :      'targetm.goacc.adjust_private_decl' and 'targetm.goacc.expand_var_decl'
    2365              :      may need to do things, but that's currently not meant to be addressed, and
    2366              :      thus not fully worked out and implemented, and thus untested.  Hence,
    2367              :      'assert' what currently is implemented/tested, only.  */
    2368              : 
    2369         9876 :   if (targetm.goacc.expand_var_decl)
    2370            0 :     gcc_assert (adjusted_vars.is_empty ());
    2371              : 
    2372              :   /* Make adjustments to gang-private local variables if required by the
    2373              :      target, e.g. forcing them into a particular address space.  Afterwards,
    2374              :      ADDR_EXPR nodes which have adjusted variables as their argument need to
    2375              :      be modified in one of two ways:
    2376              : 
    2377              :        1. They can be recreated, making a pointer to the variable in the new
    2378              :           address space, or
    2379              : 
    2380              :        2. The address of the variable in the new address space can be taken,
    2381              :           converted to the default (original) address space, and the result of
    2382              :           that conversion subsituted in place of the original ADDR_EXPR node.
    2383              : 
    2384              :      Which of these is done depends on the gimple statement being processed.
    2385              :      At present atomic operations and inline asms use (1), and everything else
    2386              :      uses (2).  At least on AMD GCN, there are atomic operations that work
    2387              :      directly in the LDS address space.
    2388              : 
    2389              :      COMPONENT_REFS, ARRAY_REFS and plain VAR_DECLs are also rewritten to use
    2390              :      the new decl, adjusting types of appropriate tree nodes as necessary.  */
    2391              : 
    2392         9876 :   if (targetm.goacc.adjust_private_decl
    2393         9876 :       && !adjusted_vars.is_empty ())
    2394              :     {
    2395            0 :       FOR_ALL_BB_FN (bb, cfun)
    2396            0 :         for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
    2397            0 :              !gsi_end_p (gsi);
    2398            0 :              gsi_next (&gsi))
    2399              :           {
    2400            0 :             gimple *stmt = gsi_stmt (gsi);
    2401            0 :             walk_stmt_info wi;
    2402            0 :             var_decl_rewrite_info info;
    2403              : 
    2404            0 :             info.avoid_pointer_conversion
    2405            0 :               = (is_gimple_call (stmt)
    2406            0 :                  && is_sync_builtin_call (as_a <gcall *> (stmt)))
    2407            0 :                 || gimple_code (stmt) == GIMPLE_ASM;
    2408            0 :             info.stmt = stmt;
    2409            0 :             info.modified = false;
    2410            0 :             info.adjusted_vars = &adjusted_vars;
    2411              : 
    2412            0 :             memset (&wi, 0, sizeof (wi));
    2413            0 :             wi.info = &info;
    2414              : 
    2415            0 :             walk_gimple_op (stmt, oacc_rewrite_var_decl, &wi);
    2416              : 
    2417            0 :             if (info.modified)
    2418            0 :               update_stmt (stmt);
    2419              :           }
    2420              :     }
    2421              : 
    2422         9876 :   return 0;
    2423         9876 : }
    2424              : 
    2425              : /* Default launch dimension validator.  Force everything to 1.  A
    2426              :    backend that wants to provide larger dimensions must override this
    2427              :    hook.  */
    2428              : 
    2429              : bool
    2430        14434 : default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
    2431              :                              int ARG_UNUSED (fn_level),
    2432              :                              unsigned ARG_UNUSED (used))
    2433              : {
    2434        14434 :   bool changed = false;
    2435              : 
    2436        57736 :   for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
    2437              :     {
    2438        43302 :       if (dims[ix] != 1)
    2439              :         {
    2440        33036 :           dims[ix] = 1;
    2441        33036 :           changed = true;
    2442              :         }
    2443              :     }
    2444              : 
    2445        14434 :   return changed;
    2446              : }
    2447              : 
    2448              : /* Default dimension bound is unknown on accelerator and 1 on host.  */
    2449              : 
    2450              : int
    2451            0 : default_goacc_dim_limit (int ARG_UNUSED (axis))
    2452              : {
    2453              : #ifdef ACCEL_COMPILER
    2454              :   return 0;
    2455              : #else
    2456            0 :   return 1;
    2457              : #endif
    2458              : }
    2459              : 
    2460              : namespace {
    2461              : 
    2462              : const pass_data pass_data_oacc_loop_designation =
    2463              : {
    2464              :   GIMPLE_PASS, /* type */
    2465              :   "oaccloops", /* name */
    2466              :   OPTGROUP_OMP, /* optinfo_flags */
    2467              :   TV_NONE, /* tv_id */
    2468              :   PROP_cfg, /* properties_required */
    2469              :   0 /* Possibly PROP_gimple_eomp.  */, /* properties_provided */
    2470              :   0, /* properties_destroyed */
    2471              :   0, /* todo_flags_start */
    2472              :   TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
    2473              : };
    2474              : 
    2475              : class pass_oacc_loop_designation : public gimple_opt_pass
    2476              : {
    2477              : public:
    2478       285722 :   pass_oacc_loop_designation (gcc::context *ctxt)
    2479       571444 :     : gimple_opt_pass (pass_data_oacc_loop_designation, ctxt)
    2480              :   {}
    2481              : 
    2482              :   /* opt_pass methods: */
    2483      1472320 :   bool gate (function *) final override { return flag_openacc; };
    2484              : 
    2485        15267 :   unsigned int execute (function *) final override
    2486              :     {
    2487        15267 :       return execute_oacc_loop_designation ();
    2488              :     }
    2489              : 
    2490              : }; // class pass_oacc_loop_designation
    2491              : 
    2492              : const pass_data pass_data_oacc_device_lower =
    2493              : {
    2494              :   GIMPLE_PASS, /* type */
    2495              :   "oaccdevlow", /* name */
    2496              :   OPTGROUP_OMP, /* optinfo_flags */
    2497              :   TV_NONE, /* tv_id */
    2498              :   PROP_cfg, /* properties_required */
    2499              :   0 /* Possibly PROP_gimple_eomp.  */, /* properties_provided */
    2500              :   0, /* properties_destroyed */
    2501              :   0, /* todo_flags_start */
    2502              :   TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
    2503              : };
    2504              : 
    2505              : class pass_oacc_device_lower : public gimple_opt_pass
    2506              : {
    2507              : public:
    2508       285722 :   pass_oacc_device_lower (gcc::context *ctxt)
    2509       571444 :     : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
    2510              :   {}
    2511              : 
    2512              :   /* opt_pass methods: */
    2513      1472258 :   bool gate (function *) final override { return flag_openacc; };
    2514              : 
    2515        15205 :   unsigned int execute (function *) final override
    2516              :     {
    2517        15205 :       return execute_oacc_device_lower ();
    2518              :     }
    2519              : 
    2520              : }; // class pass_oacc_device_lower
    2521              : 
    2522              : } // anon namespace
    2523              : 
    2524              : gimple_opt_pass *
    2525       285722 : make_pass_oacc_loop_designation (gcc::context *ctxt)
    2526              : {
    2527       285722 :   return new pass_oacc_loop_designation (ctxt);
    2528              : }
    2529              : 
    2530              : gimple_opt_pass *
    2531       285722 : make_pass_oacc_device_lower (gcc::context *ctxt)
    2532              : {
    2533       285722 :   return new pass_oacc_device_lower (ctxt);
    2534              : }
    2535              : 
    2536              : 
    2537              : /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
    2538              :    GOMP_SIMT_ENTER call identifying the privatized variables, which are
    2539              :    turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
    2540              :    Set *REGIMPLIFY to true, except if no privatized variables were seen.  */
    2541              : 
    2542              : static void
    2543            0 : ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
    2544              : {
    2545            0 :   gimple *alloc_stmt = gsi_stmt (*gsi);
    2546            0 :   tree simtrec = gimple_call_lhs (alloc_stmt);
    2547            0 :   tree simduid = gimple_call_arg (alloc_stmt, 0);
    2548            0 :   gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
    2549            0 :   gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
    2550            0 :   tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
    2551            0 :   TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
    2552            0 :   TREE_ADDRESSABLE (rectype) = 1;
    2553            0 :   TREE_TYPE (simtrec) = build_pointer_type (rectype);
    2554            0 :   for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
    2555              :     {
    2556            0 :       tree *argp = gimple_call_arg_ptr (enter_stmt, i);
    2557            0 :       if (*argp == null_pointer_node)
    2558            0 :         continue;
    2559            0 :       gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
    2560              :                   && VAR_P (TREE_OPERAND (*argp, 0)));
    2561            0 :       tree var = TREE_OPERAND (*argp, 0);
    2562              : 
    2563            0 :       tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
    2564            0 :                                DECL_NAME (var), TREE_TYPE (var));
    2565            0 :       SET_DECL_ALIGN (field, DECL_ALIGN (var));
    2566            0 :       DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
    2567            0 :       TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
    2568              : 
    2569            0 :       insert_field_into_struct (rectype, field);
    2570              : 
    2571            0 :       tree t = build_simple_mem_ref (simtrec);
    2572            0 :       t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
    2573            0 :       TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
    2574            0 :       SET_DECL_VALUE_EXPR (var, t);
    2575            0 :       DECL_HAS_VALUE_EXPR_P (var) = 1;
    2576            0 :       *regimplify = true;
    2577              :     }
    2578            0 :   layout_type (rectype);
    2579            0 :   tree size = TYPE_SIZE_UNIT (rectype);
    2580            0 :   tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
    2581              : 
    2582            0 :   alloc_stmt
    2583            0 :     = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
    2584            0 :   gimple_call_set_lhs (alloc_stmt, simtrec);
    2585            0 :   gsi_replace (gsi, alloc_stmt, false);
    2586            0 :   gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
    2587            0 :   enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
    2588            0 :   gsi_replace (&enter_gsi, enter_stmt, false);
    2589              : 
    2590            0 :   use_operand_p use;
    2591            0 :   gimple *exit_stmt;
    2592            0 :   if (single_imm_use (simtrec, &use, &exit_stmt))
    2593              :     {
    2594            0 :       gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
    2595            0 :       gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
    2596            0 :       tree clobber = build_clobber (rectype);
    2597            0 :       exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
    2598            0 :       gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
    2599              :     }
    2600              :   else
    2601            0 :     gcc_checking_assert (has_zero_uses (simtrec));
    2602            0 : }
    2603              : 
    2604              : /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables.  */
    2605              : 
    2606              : static tree
    2607            0 : find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
    2608              : {
    2609            0 :   tree t = *tp;
    2610              : 
    2611            0 :   if (VAR_P (t)
    2612            0 :       && DECL_HAS_VALUE_EXPR_P (t)
    2613            0 :       && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
    2614              :     {
    2615            0 :       *walk_subtrees = 0;
    2616            0 :       return t;
    2617              :     }
    2618              :   return NULL_TREE;
    2619              : }
    2620              : 
    2621              : /* Helper function for execute_omp_device_lower, invoked via walk_gimple_op.
    2622              :    Resolve any OMP_TARGET_DEVICE_MATCHES and OMP_NEXT_VARIANT exprs to
    2623              :    constants.  */
    2624              : static tree
    2625        13877 : resolve_omp_variant_cookies (tree *tp, int *walk_subtrees,
    2626              :                              void *data ATTRIBUTE_UNUSED)
    2627              : {
    2628        13877 :   if (TREE_CODE (*tp) == OMP_TARGET_DEVICE_MATCHES)
    2629              :     {
    2630            0 :       *tp = resolve_omp_target_device_matches (*tp);
    2631            0 :       *walk_subtrees = 0;
    2632            0 :       return NULL_TREE;
    2633              :     }
    2634              : 
    2635        13877 :   if (TREE_CODE (*tp) != OMP_NEXT_VARIANT)
    2636              :     return NULL_TREE;
    2637          324 :   tree index = OMP_NEXT_VARIANT_INDEX (*tp);
    2638          324 :   tree state = OMP_NEXT_VARIANT_STATE (*tp);
    2639              : 
    2640              :   /* State is a triplet of (result-vector, construct_context, selector_vec).
    2641              :      If result-vector has already been computed, just use it.  Otherwise we
    2642              :      must resolve the variant and fill in that part of the state object.
    2643              :      All OMP_NEXT_VARIANT exprs for the same variant construct are supposed
    2644              :      to share the same state object, but if something bad happens and we end
    2645              :      up with copies, that is OK, it will just cause the result-vector to be
    2646              :      computed multiple times.  */
    2647          324 :   tree result_vector = TREE_PURPOSE (state);
    2648          324 :   if (!result_vector)
    2649              :     {
    2650          304 :       tree construct_context = TREE_VALUE (state);
    2651          304 :       tree selectors = TREE_CHAIN (state);
    2652              : 
    2653          304 :       vec<struct omp_variant> candidates
    2654          304 :         = omp_resolve_variant_construct (construct_context, selectors);
    2655          304 :       int n = TREE_VEC_LENGTH (selectors);
    2656          304 :       TREE_PURPOSE (state) = result_vector = make_tree_vec (n + 1);
    2657              :       /* The result vector maps the index of each element of the original
    2658              :          selectors vector onto the index of the next element of the filtered/
    2659              :          sorted candidates vector.  Since some of the original variants may
    2660              :          have been discarded as non-matching in candidates, initialize the
    2661              :          whole array to zero so that we have a placeholder "next" value for
    2662              :          those elements.  Hopefully dead code elimination will take care of
    2663              :          subsequently discarding the unreachable cases in the already-generated
    2664              :          switch statement.  */
    2665         2108 :       for (int i = 1; i <= n; i++)
    2666         1804 :         TREE_VEC_ELT (result_vector, i) = integer_zero_node;
    2667              :       /* Element 0 is the case label of the first variant in the sorted
    2668              :          list.  */
    2669          304 :       if (dump_file)
    2670            0 :         fprintf (dump_file, "Computing case map for variant directive\n");
    2671              :       int j = 0;
    2672         1588 :       for (unsigned int i = 0; i < candidates.length(); i++)
    2673              :         {
    2674         1284 :           if (dump_file)
    2675            0 :             fprintf (dump_file, "  %d -> case %d\n",
    2676            0 :                      j, (int) tree_to_shwi (candidates[i].alternative));
    2677         1284 :           TREE_VEC_ELT (result_vector, j) = candidates[i].alternative;
    2678         1284 :           j = (int) tree_to_shwi (candidates[i].alternative);
    2679              :         }
    2680              :     }
    2681              : 
    2682              :   /* Now just grab the value out of the precomputed array.  */
    2683          324 :   gcc_assert (TREE_CODE (index) == INTEGER_CST);
    2684          324 :   int indexval = (int) tree_to_shwi (index);
    2685          324 :   *tp = TREE_VEC_ELT (result_vector, indexval);
    2686          324 :   *walk_subtrees = 0;
    2687          324 :   return NULL_TREE;
    2688              : }
    2689              : 
    2690              : 
    2691              : /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
    2692              :    VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
    2693              :    LANE is kept to be expanded to RTL later on.  Also cleanup all other SIMT
    2694              :    internal functions on non-SIMT targets, and likewise some SIMD internal
    2695              :    functions on SIMT targets.  */
    2696              : 
    2697              : static unsigned int
    2698        20565 : execute_omp_device_lower ()
    2699              : {
    2700        20565 :   int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
    2701        20565 :   bool regimplify = false;
    2702        20565 :   basic_block bb;
    2703        20565 :   gimple_stmt_iterator gsi;
    2704              : #ifdef ACCEL_COMPILER
    2705              :   bool omp_redirect_indirect_calls = vec_safe_length (offload_ind_funcs) > 0;
    2706              :   tree map_ptr_fn
    2707              :     = builtin_decl_explicit (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR);
    2708              : #endif
    2709              : 
    2710              :   /* Handle expansion of magic cookies for variant constructs first.  */
    2711        20565 :   if (cgraph_node::get (cfun->decl)->has_omp_variant_constructs)
    2712         1822 :     FOR_EACH_BB_FN (bb, cfun)
    2713              :       {
    2714         6873 :         for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    2715         3409 :           walk_gimple_op (gsi_stmt (gsi), resolve_omp_variant_cookies, NULL);
    2716         2510 :         for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    2717          778 :           walk_gimple_op (gsi_stmt (gsi), resolve_omp_variant_cookies, NULL);
    2718              :       }
    2719              : 
    2720        55539 :   FOR_EACH_BB_FN (bb, cfun)
    2721       239245 :     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    2722              :       {
    2723       169297 :         gimple *stmt = gsi_stmt (gsi);
    2724       169297 :         if (!is_gimple_call (stmt))
    2725       152561 :           continue;
    2726        16736 :         if (!gimple_call_internal_p (stmt))
    2727              :           {
    2728              : #ifdef ACCEL_COMPILER
    2729              :             if (omp_redirect_indirect_calls
    2730              :                 && gimple_call_fndecl (stmt) == NULL_TREE)
    2731              :               {
    2732              :                 gcall *orig_call = dyn_cast <gcall *> (stmt);
    2733              :                 tree call_fn = gimple_call_fn (stmt);
    2734              :                 tree fn_ty = TREE_TYPE (call_fn);
    2735              : 
    2736              :                 if (TREE_CODE (call_fn) == OBJ_TYPE_REF)
    2737              :                   {
    2738              :                     tree obj_ref = create_tmp_reg (TREE_TYPE (call_fn),
    2739              :                                                    ".ind_fn_objref");
    2740              :                     gimple *gassign = gimple_build_assign (obj_ref, call_fn);
    2741              :                     gsi_insert_before  (&gsi, gassign, GSI_SAME_STMT);
    2742              :                     call_fn = obj_ref;
    2743              :                   }
    2744              :                 tree mapped_fn = create_tmp_reg (fn_ty, ".ind_fn");
    2745              :                 gimple *gcall =
    2746              :                     gimple_build_call (map_ptr_fn, 1, call_fn);
    2747              :                 gimple_set_location (gcall, gimple_location (stmt));
    2748              :                 gimple_call_set_lhs (gcall, mapped_fn);
    2749              :                 gsi_insert_before (&gsi, gcall, GSI_SAME_STMT);
    2750              : 
    2751              :                 gimple_call_set_fn (orig_call, mapped_fn);
    2752              :                 update_stmt (orig_call);
    2753              :               }
    2754              : #endif
    2755        16538 :             continue;
    2756              :           }
    2757          198 :         tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
    2758          198 :         tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
    2759          198 :         switch (gimple_call_internal_fn (stmt))
    2760              :           {
    2761            0 :           case IFN_GOMP_TARGET_REV:
    2762            0 :             {
    2763              : #ifndef ACCEL_COMPILER
    2764            0 :               gimple_stmt_iterator gsi2 = gsi;
    2765            0 :               gsi_next (&gsi2);
    2766            0 :               gcc_assert (!gsi_end_p (gsi2));
    2767            0 :               gcc_assert (gimple_call_builtin_p (gsi_stmt (gsi2),
    2768              :                                                  BUILT_IN_GOMP_TARGET));
    2769            0 :               tree old_decl
    2770            0 :                 = TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi2), 1), 0);
    2771            0 :               tree new_decl = gimple_call_arg (gsi_stmt (gsi), 0);
    2772            0 :               gimple_call_set_arg (gsi_stmt (gsi2), 1, new_decl);
    2773            0 :               update_stmt (gsi_stmt (gsi2));
    2774            0 :               new_decl = TREE_OPERAND (new_decl, 0);
    2775            0 :               unsigned i;
    2776            0 :               unsigned num_funcs = vec_safe_length (offload_funcs);
    2777            0 :               for (i = 0; i < num_funcs; i++)
    2778              :                 {
    2779            0 :                   if ((*offload_funcs)[i] == old_decl)
    2780              :                     {
    2781            0 :                       (*offload_funcs)[i] = new_decl;
    2782            0 :                       break;
    2783              :                     }
    2784            0 :                   else if ((*offload_funcs)[i] == new_decl)
    2785              :                     break;  /* This can happen due to inlining.  */
    2786              :                 }
    2787            0 :               gcc_assert (i < num_funcs);
    2788              : #else
    2789              :               tree old_decl = TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi), 0),
    2790              :                                             0);
    2791              : #endif
    2792              :               /* FIXME: Find a way to actually prevent outputting the empty-body
    2793              :                  old_decl as debug symbol + function in the assembly file.  */
    2794            0 :               cgraph_node *node = cgraph_node::get (old_decl);
    2795            0 :               node->address_taken = false;
    2796            0 :               node->need_lto_streaming = false;
    2797            0 :               node->offloadable = false;
    2798              : 
    2799            0 :               unlink_stmt_vdef (stmt);
    2800              :             }
    2801            0 :             break;
    2802            0 :           case IFN_GOMP_USE_SIMT:
    2803            0 :             rhs = vf == 1 ? integer_zero_node : integer_one_node;
    2804              :             break;
    2805            0 :           case IFN_GOMP_SIMT_ENTER:
    2806            0 :             rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
    2807            0 :             goto simtreg_enter_exit;
    2808            0 :           case IFN_GOMP_SIMT_ENTER_ALLOC:
    2809            0 :             if (vf != 1)
    2810            0 :               ompdevlow_adjust_simt_enter (&gsi, &regimplify);
    2811            0 :             rhs = vf == 1 ? null_pointer_node : NULL_TREE;
    2812            0 :             goto simtreg_enter_exit;
    2813            0 :           case IFN_GOMP_SIMT_EXIT:
    2814            0 :           simtreg_enter_exit:
    2815            0 :             if (vf != 1)
    2816            0 :               continue;
    2817            0 :             unlink_stmt_vdef (stmt);
    2818            0 :             break;
    2819            0 :           case IFN_GOMP_SIMT_LANE:
    2820            0 :           case IFN_GOMP_SIMT_LAST_LANE:
    2821            0 :             rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
    2822              :             break;
    2823            0 :           case IFN_GOMP_SIMT_VF:
    2824            0 :             rhs = build_int_cst (type, vf);
    2825            0 :             break;
    2826            2 :           case IFN_GOMP_MAX_VF:
    2827            2 :             rhs = build_int_cst (type, omp_max_vf (false));
    2828            2 :             break;
    2829            0 :           case IFN_GOMP_SIMT_ORDERED_PRED:
    2830            0 :             rhs = vf == 1 ? integer_zero_node : NULL_TREE;
    2831            0 :             if (rhs || !lhs)
    2832            0 :               unlink_stmt_vdef (stmt);
    2833              :             break;
    2834            0 :           case IFN_GOMP_SIMT_VOTE_ANY:
    2835            0 :           case IFN_GOMP_SIMT_XCHG_BFLY:
    2836            0 :           case IFN_GOMP_SIMT_XCHG_IDX:
    2837            0 :             rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
    2838              :             break;
    2839            0 :           case IFN_GOMP_SIMD_LANE:
    2840            0 :           case IFN_GOMP_SIMD_LAST_LANE:
    2841            0 :             rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
    2842              :             break;
    2843            0 :           case IFN_GOMP_SIMD_VF:
    2844            0 :             rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
    2845              :             break;
    2846          196 :           default:
    2847          196 :             continue;
    2848          196 :           }
    2849            2 :         if (lhs && !rhs)
    2850            0 :           continue;
    2851            2 :         stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
    2852            2 :         gsi_replace (&gsi, stmt, false);
    2853              :       }
    2854        20565 :   if (regimplify)
    2855            0 :     FOR_EACH_BB_REVERSE_FN (bb, cfun)
    2856            0 :       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
    2857            0 :         if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
    2858              :           {
    2859            0 :             if (gimple_clobber_p (gsi_stmt (gsi)))
    2860            0 :               gsi_remove (&gsi, true);
    2861              :             else
    2862            0 :               gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
    2863              :           }
    2864        20565 :   if (vf != 1)
    2865            0 :     cfun->has_force_vectorize_loops = false;
    2866        20565 :   return 0;
    2867              : }
    2868              : 
    2869              : namespace {
    2870              : 
    2871              : const pass_data pass_data_omp_device_lower =
    2872              : {
    2873              :   GIMPLE_PASS, /* type */
    2874              :   "ompdevlow", /* name */
    2875              :   OPTGROUP_OMP, /* optinfo_flags */
    2876              :   TV_NONE, /* tv_id */
    2877              :   PROP_cfg, /* properties_required */
    2878              :   PROP_gimple_lomp_dev, /* properties_provided */
    2879              :   0, /* properties_destroyed */
    2880              :   0, /* todo_flags_start */
    2881              :   TODO_update_ssa, /* todo_flags_finish */
    2882              : };
    2883              : 
    2884              : class pass_omp_device_lower : public gimple_opt_pass
    2885              : {
    2886              : public:
    2887       285722 :   pass_omp_device_lower (gcc::context *ctxt)
    2888       571444 :     : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
    2889              :   {}
    2890              : 
    2891              :   /* opt_pass methods: */
    2892      1472258 :   bool gate (function *fun) final override
    2893              :     {
    2894      1472258 :       cgraph_node *node = cgraph_node::get (fun->decl);
    2895              : #ifdef ACCEL_COMPILER
    2896              :       bool offload_ind_funcs_p = vec_safe_length (offload_ind_funcs) > 0;
    2897              : #else
    2898      1472258 :       bool offload_ind_funcs_p = false;
    2899              : #endif
    2900      1472258 :       return (!(fun->curr_properties & PROP_gimple_lomp_dev)
    2901      1472258 :               || (flag_openmp
    2902        64557 :                   && (node->has_omp_variant_constructs || offload_ind_funcs_p)));
    2903              :     }
    2904        20565 :   unsigned int execute (function *) final override
    2905              :     {
    2906        20565 :       return execute_omp_device_lower ();
    2907              :     }
    2908              : 
    2909              : }; // class pass_expand_omp_ssa
    2910              : 
    2911              : } // anon namespace
    2912              : 
    2913              : gimple_opt_pass *
    2914       285722 : make_pass_omp_device_lower (gcc::context *ctxt)
    2915              : {
    2916       285722 :   return new pass_omp_device_lower (ctxt);
    2917              : }
    2918              : 
    2919              : /* "omp declare target link" handling pass.  */
    2920              : 
    2921              : namespace {
    2922              : 
    2923              : const pass_data pass_data_omp_target_link =
    2924              : {
    2925              :   GIMPLE_PASS,                  /* type */
    2926              :   "omptargetlink",            /* name */
    2927              :   OPTGROUP_OMP,                 /* optinfo_flags */
    2928              :   TV_NONE,                      /* tv_id */
    2929              :   PROP_ssa,                     /* properties_required */
    2930              :   0,                            /* properties_provided */
    2931              :   0,                            /* properties_destroyed */
    2932              :   0,                            /* todo_flags_start */
    2933              :   TODO_update_ssa,              /* todo_flags_finish */
    2934              : };
    2935              : 
    2936              : class pass_omp_target_link : public gimple_opt_pass
    2937              : {
    2938              : public:
    2939       285722 :   pass_omp_target_link (gcc::context *ctxt)
    2940       571444 :     : gimple_opt_pass (pass_data_omp_target_link, ctxt)
    2941              :   {}
    2942              : 
    2943              :   /* opt_pass methods: */
    2944      1472258 :   bool gate (function *fun) final override
    2945              :     {
    2946              : #ifdef ACCEL_COMPILER
    2947              :       return offloading_function_p (fun->decl);
    2948              : #else
    2949      1472258 :       (void) fun;
    2950      1472258 :       return false;
    2951              : #endif
    2952              :     }
    2953              : 
    2954              :   unsigned execute (function *) final override;
    2955              : };
    2956              : 
    2957              : /* Callback for walk_gimple_stmt used to scan for link var operands.  */
    2958              : 
    2959              : static tree
    2960            0 : process_link_var_op (tree *tp, int *walk_subtrees, void *data)
    2961              : {
    2962            0 :   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
    2963            0 :   tree t = *tp;
    2964              : 
    2965            0 :   if (VAR_P (t)
    2966            0 :       && DECL_HAS_VALUE_EXPR_P (t)
    2967            0 :       && is_global_var (t)
    2968            0 :       && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
    2969              :     {
    2970            0 :       wi->info = *tp = unshare_expr (DECL_VALUE_EXPR (t));
    2971            0 :       *walk_subtrees = 0;
    2972            0 :       return NULL_TREE;
    2973              :     }
    2974              : 
    2975              :   return NULL_TREE;
    2976              : }
    2977              : 
    2978              : unsigned
    2979            0 : pass_omp_target_link::execute (function *fun)
    2980              : {
    2981            0 :   basic_block bb;
    2982            0 :   FOR_EACH_BB_FN (bb, fun)
    2983              :     {
    2984            0 :       gimple_stmt_iterator gsi;
    2985            0 :       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    2986              :         {
    2987            0 :           if (gimple_call_builtin_p (gsi_stmt (gsi), BUILT_IN_GOMP_TARGET))
    2988              :             {
    2989            0 :               tree dev = gimple_call_arg (gsi_stmt (gsi), 0);
    2990            0 :               tree fn = gimple_call_arg (gsi_stmt (gsi), 1);
    2991            0 :               if (POINTER_TYPE_P (TREE_TYPE (fn)))
    2992            0 :                 fn = TREE_OPERAND (fn, 0);
    2993            0 :               if (TREE_CODE (dev) == INTEGER_CST
    2994            0 :                   && wi::to_wide (dev) == GOMP_DEVICE_HOST_FALLBACK
    2995            0 :                   && lookup_attribute ("omp target device_ancestor_nohost",
    2996            0 :                                        DECL_ATTRIBUTES (fn)) != NULL_TREE)
    2997            0 :                 continue;  /* ancestor:1  */
    2998              :               /* Nullify the second argument of __builtin_GOMP_target_ext.  */
    2999            0 :               gimple_call_set_arg (gsi_stmt (gsi), 1, null_pointer_node);
    3000            0 :               update_stmt (gsi_stmt (gsi));
    3001              :             }
    3002            0 :           struct walk_stmt_info wi;
    3003            0 :           memset (&wi, 0, sizeof (wi));
    3004            0 :           walk_gimple_stmt (&gsi, NULL, process_link_var_op, &wi);
    3005            0 :           if (wi.info)
    3006            0 :             gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
    3007              :         }
    3008              :     }
    3009              : 
    3010            0 :   return 0;
    3011              : }
    3012              : 
    3013              : } // anon namespace
    3014              : 
    3015              : gimple_opt_pass *
    3016       285722 : make_pass_omp_target_link (gcc::context *ctxt)
    3017              : {
    3018       285722 :   return new pass_omp_target_link (ctxt);
    3019              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.