LCOV - code coverage report
Current view: top level - gcc - omp-oacc-neuter-broadcast.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 0.7 % 941 7
Test Date: 2026-02-28 14:20:25 Functions: 5.7 % 35 2
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* OpenACC worker partitioning via middle end neutering/broadcasting scheme
       2              : 
       3              :    Copyright (C) 2015-2026 Free Software Foundation, Inc.
       4              : 
       5              :    This file is part of GCC.
       6              : 
       7              :    GCC is free software; you can redistribute it and/or modify it
       8              :    under the terms of the GNU General Public License as published
       9              :    by the Free Software Foundation; either version 3, or (at your
      10              :    option) any later version.
      11              : 
      12              :    GCC is distributed in the hope that it will be useful, but WITHOUT
      13              :    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
      14              :    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
      15              :    License for more details.
      16              : 
      17              :    You should have received a copy of the GNU General Public License
      18              :    along with GCC; see the file COPYING3.  If not see
      19              :    <http://www.gnu.org/licenses/>.  */
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "rtl.h"
      26              : #include "tree.h"
      27              : #include "gimple.h"
      28              : #include "tree-pass.h"
      29              : #include "ssa.h"
      30              : #include "cgraph.h"
      31              : #include "pretty-print.h"
      32              : #include "fold-const.h"
      33              : #include "gimplify.h"
      34              : #include "gimple-iterator.h"
      35              : #include "gimple-walk.h"
      36              : #include "tree-inline.h"
      37              : #include "langhooks.h"
      38              : #include "omp-general.h"
      39              : #include "omp-low.h"
      40              : #include "gimple-pretty-print.h"
      41              : #include "cfghooks.h"
      42              : #include "insn-config.h"
      43              : #include "recog.h"
      44              : #include "internal-fn.h"
      45              : #include "bitmap.h"
      46              : #include "tree-nested.h"
      47              : #include "stor-layout.h"
      48              : #include "tree-ssa-threadupdate.h"
      49              : #include "tree-into-ssa.h"
      50              : #include "splay-tree.h"
      51              : #include "target.h"
      52              : #include "cfgloop.h"
      53              : #include "tree-cfg.h"
      54              : #include "omp-offload.h"
      55              : #include "attribs.h"
      56              : #include "targhooks.h"
      57              : #include "diagnostic-core.h"
      58              : 
      59              : /* Loop structure of the function.  The entire function is described as
      60              :    a NULL loop.  */
      61              : /* Adapted from 'gcc/config/nvptx/nvptx.cc:struct parallel'.  */
      62              : 
      63              : struct parallel_g
      64              : {
      65              :   /* Parent parallel.  */
      66              :   parallel_g *parent;
      67              : 
      68              :   /* Next sibling parallel.  */
      69              :   parallel_g *next;
      70              : 
      71              :   /* First child parallel.  */
      72              :   parallel_g *inner;
      73              : 
      74              :   /* Partitioning mask of the parallel.  */
      75              :   unsigned mask;
      76              : 
      77              :   /* Partitioning used within inner parallels. */
      78              :   unsigned inner_mask;
      79              : 
      80              :   /* Location of parallel forked and join.  The forked is the first
      81              :      block in the parallel and the join is the first block after of
      82              :      the partition.  */
      83              :   basic_block forked_block;
      84              :   basic_block join_block;
      85              : 
      86              :   gimple *forked_stmt;
      87              :   gimple *join_stmt;
      88              : 
      89              :   gimple *fork_stmt;
      90              :   gimple *joining_stmt;
      91              : 
      92              :   /* Basic blocks in this parallel, but not in child parallels.  The
      93              :      FORKED and JOINING blocks are in the partition.  The FORK and JOIN
      94              :      blocks are not.  */
      95              :   auto_vec<basic_block> blocks;
      96              : 
      97              :   tree record_type;
      98              :   tree sender_decl;
      99              :   tree receiver_decl;
     100              : 
     101              : public:
     102              :   parallel_g (parallel_g *parent, unsigned mode);
     103              :   ~parallel_g ();
     104              : };
     105              : 
     106              : /* Constructor links the new parallel into it's parent's chain of
     107              :    children.  */
     108              : 
     109            0 : parallel_g::parallel_g (parallel_g *parent_, unsigned mask_)
     110            0 :   :parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0)
     111              : {
     112            0 :   forked_block = join_block = 0;
     113            0 :   forked_stmt = join_stmt = NULL;
     114            0 :   fork_stmt = joining_stmt = NULL;
     115              : 
     116            0 :   record_type = NULL_TREE;
     117            0 :   sender_decl = NULL_TREE;
     118            0 :   receiver_decl = NULL_TREE;
     119              : 
     120            0 :   if (parent)
     121              :     {
     122            0 :       next = parent->inner;
     123            0 :       parent->inner = this;
     124              :     }
     125            0 : }
     126              : 
     127            0 : parallel_g::~parallel_g ()
     128              : {
     129            0 :   delete inner;
     130            0 :   delete next;
     131            0 : }
     132              : 
     133              : static bool
     134            0 : local_var_based_p (tree decl)
     135              : {
     136            0 :   switch (TREE_CODE (decl))
     137              :     {
     138            0 :     case VAR_DECL:
     139            0 :       return !is_global_var (decl);
     140              : 
     141            0 :     case COMPONENT_REF:
     142            0 :     case BIT_FIELD_REF:
     143            0 :     case ARRAY_REF:
     144            0 :       return local_var_based_p (TREE_OPERAND (decl, 0));
     145              : 
     146              :     default:
     147              :       return false;
     148              :     }
     149              : }
     150              : 
     151              : /* Map of basic blocks to gimple stmts.  */
     152              : typedef hash_map<basic_block, gimple *> bb_stmt_map_t;
     153              : 
     154              : /* Calls to OpenACC routines are made by all workers/wavefronts/warps, since
     155              :    the routine likely contains partitioned loops (else will do its own
     156              :    neutering and variable propagation). Return TRUE if a function call CALL
     157              :    should be made in (worker) single mode instead, rather than redundant
     158              :    mode.  */
     159              : 
     160              : static bool
     161            0 : omp_sese_active_worker_call (gcall *call)
     162              : {
     163              : #define GOMP_DIM_SEQ GOMP_DIM_MAX
     164            0 :   tree fndecl = gimple_call_fndecl (call);
     165              : 
     166            0 :   if (!fndecl)
     167              :     return true;
     168              : 
     169            0 :   tree attrs = oacc_get_fn_attrib (fndecl);
     170              : 
     171            0 :   if (!attrs)
     172              :     return true;
     173              : 
     174            0 :   int level = oacc_fn_attrib_level (attrs);
     175              : 
     176              :   /* Neither regular functions nor "seq" routines should be run by all threads
     177              :      in worker-single mode.  */
     178            0 :   return level == -1 || level == GOMP_DIM_SEQ;
     179              : #undef GOMP_DIM_SEQ
     180              : }
     181              : 
     182              : /* Split basic blocks such that each forked and join unspecs are at
     183              :    the start of their basic blocks.  Thus afterwards each block will
     184              :    have a single partitioning mode.  We also do the same for return
     185              :    insns, as they are executed by every thread.  Return the
     186              :    partitioning mode of the function as a whole.  Populate MAP with
     187              :    head and tail blocks.  We also clear the BB visited flag, which is
     188              :    used when finding partitions.  */
     189              : /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_split_blocks'.  */
     190              : 
     191              : static void
     192            0 : omp_sese_split_blocks (bb_stmt_map_t *map)
     193              : {
     194            0 :   auto_vec<gimple *> worklist;
     195            0 :   basic_block block;
     196              : 
     197              :   /* Locate all the reorg instructions of interest.  */
     198            0 :   FOR_ALL_BB_FN (block, cfun)
     199              :     {
     200              :       /* Clear visited flag, for use by parallel locator  */
     201            0 :       block->flags &= ~BB_VISITED;
     202              : 
     203            0 :       for (gimple_stmt_iterator gsi = gsi_start_bb (block);
     204            0 :            !gsi_end_p (gsi);
     205            0 :            gsi_next (&gsi))
     206              :         {
     207            0 :           gimple *stmt = gsi_stmt (gsi);
     208              : 
     209            0 :           if (gimple_call_internal_p (stmt, IFN_UNIQUE))
     210              :             {
     211            0 :               enum ifn_unique_kind k = ((enum ifn_unique_kind)
     212            0 :                 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
     213              : 
     214            0 :               if (k == IFN_UNIQUE_OACC_JOIN)
     215            0 :                 worklist.safe_push (stmt);
     216            0 :               else if (k == IFN_UNIQUE_OACC_FORK)
     217              :                 {
     218            0 :                   gcc_assert (gsi_one_before_end_p (gsi));
     219            0 :                   basic_block forked_block = single_succ (block);
     220            0 :                   gimple_stmt_iterator gsi2 = gsi_start_bb (forked_block);
     221              : 
     222              :                   /* We push a NOP as a placeholder for the "forked" stmt.
     223              :                      This is then recognized in omp_sese_find_par.  */
     224            0 :                   gimple *nop = gimple_build_nop ();
     225            0 :                   gsi_insert_before (&gsi2, nop, GSI_SAME_STMT);
     226              : 
     227            0 :                   worklist.safe_push (nop);
     228              :                 }
     229              :             }
     230            0 :           else if (gimple_code (stmt) == GIMPLE_RETURN
     231            0 :                    || gimple_code (stmt) == GIMPLE_COND
     232            0 :                    || gimple_code (stmt) == GIMPLE_SWITCH
     233            0 :                    || (gimple_code (stmt) == GIMPLE_CALL
     234            0 :                        && !gimple_call_internal_p (stmt)
     235            0 :                        && !omp_sese_active_worker_call (as_a <gcall *> (stmt))))
     236            0 :             worklist.safe_push (stmt);
     237            0 :           else if (is_gimple_assign (stmt))
     238              :             {
     239            0 :               tree lhs = gimple_assign_lhs (stmt);
     240              : 
     241              :               /* Force assignments to components/fields/elements of local
     242              :                  aggregates into fully-partitioned (redundant) mode.  This
     243              :                  avoids having to broadcast the whole aggregate.  The RHS of
     244              :                  the assignment will be propagated using the normal
     245              :                  mechanism.  */
     246              : 
     247            0 :               switch (TREE_CODE (lhs))
     248              :                 {
     249            0 :                 case COMPONENT_REF:
     250            0 :                 case BIT_FIELD_REF:
     251            0 :                 case ARRAY_REF:
     252            0 :                   {
     253            0 :                     tree aggr = TREE_OPERAND (lhs, 0);
     254              : 
     255            0 :                     if (local_var_based_p (aggr))
     256            0 :                       worklist.safe_push (stmt);
     257              :                   }
     258              :                   break;
     259              : 
     260              :                 default:
     261              :                   ;
     262              :                 }
     263              :             }
     264              :         }
     265              :     }
     266              : 
     267              :   /* Split blocks on the worklist.  */
     268              :   unsigned ix;
     269              :   gimple *stmt;
     270              : 
     271            0 :   for (ix = 0; worklist.iterate (ix, &stmt); ix++)
     272              :     {
     273            0 :       basic_block block = gimple_bb (stmt);
     274              : 
     275            0 :       if (gimple_code (stmt) == GIMPLE_COND)
     276              :         {
     277            0 :           gcond *orig_cond = as_a <gcond *> (stmt);
     278            0 :           tree_code code = gimple_expr_code (orig_cond);
     279            0 :           tree pred = make_ssa_name (boolean_type_node);
     280            0 :           gimple *asgn = gimple_build_assign (pred, code,
     281              :                            gimple_cond_lhs (orig_cond),
     282              :                            gimple_cond_rhs (orig_cond));
     283            0 :           gcond *new_cond
     284            0 :             = gimple_build_cond (NE_EXPR, pred, boolean_false_node,
     285              :                                  gimple_cond_true_label (orig_cond),
     286              :                                  gimple_cond_false_label (orig_cond));
     287              : 
     288            0 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
     289            0 :           gsi_insert_before (&gsi, asgn, GSI_SAME_STMT);
     290            0 :           gsi_replace (&gsi, new_cond, true);
     291              : 
     292            0 :           edge e = split_block (block, asgn);
     293            0 :           block = e->dest;
     294            0 :           map->get_or_insert (block) = new_cond;
     295              :         }
     296            0 :       else if ((gimple_code (stmt) == GIMPLE_CALL
     297            0 :                 && !gimple_call_internal_p (stmt))
     298            0 :                || is_gimple_assign (stmt))
     299              :         {
     300            0 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
     301            0 :           gsi_prev (&gsi);
     302              : 
     303            0 :           edge call = split_block (block, gsi_stmt (gsi));
     304              : 
     305            0 :           gimple *call_stmt = gsi_stmt (gsi_start_bb (call->dest));
     306              : 
     307            0 :           edge call_to_ret = split_block (call->dest, call_stmt);
     308              : 
     309            0 :           map->get_or_insert (call_to_ret->src) = call_stmt;
     310              :         }
     311              :       else
     312              :         {
     313            0 :           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
     314            0 :           gsi_prev (&gsi);
     315              : 
     316            0 :           if (gsi_end_p (gsi))
     317            0 :             map->get_or_insert (block) = stmt;
     318              :           else
     319              :             {
     320              :               /* Split block before insn. The insn is in the new block.  */
     321            0 :               edge e = split_block (block, gsi_stmt (gsi));
     322              : 
     323            0 :               block = e->dest;
     324            0 :               map->get_or_insert (block) = stmt;
     325              :             }
     326              :         }
     327              :     }
     328            0 : }
     329              : 
     330              : static const char *
     331            0 : mask_name (unsigned mask)
     332              : {
     333            0 :   switch (mask)
     334              :     {
     335              :     case 0: return "gang redundant";
     336            0 :     case 1: return "gang partitioned";
     337            0 :     case 2: return "worker partitioned";
     338            0 :     case 3: return "gang+worker partitioned";
     339            0 :     case 4: return "vector partitioned";
     340            0 :     case 5: return "gang+vector partitioned";
     341            0 :     case 6: return "worker+vector partitioned";
     342            0 :     case 7: return "fully partitioned";
     343            0 :     default: return "<illegal>";
     344              :     }
     345              : }
     346              : 
     347              : /* Dump this parallel and all its inner parallels.  */
     348              : /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_dump_pars'.  */
     349              : 
     350              : static void
     351            0 : omp_sese_dump_pars (parallel_g *par, unsigned depth)
     352              : {
     353            0 :   fprintf (dump_file, "%u: mask %d (%s) head=%d, tail=%d\n",
     354              :            depth, par->mask, mask_name (par->mask),
     355            0 :            par->forked_block ? par->forked_block->index : -1,
     356            0 :            par->join_block ? par->join_block->index : -1);
     357              : 
     358            0 :   fprintf (dump_file, "    blocks:");
     359              : 
     360            0 :   basic_block block;
     361            0 :   for (unsigned ix = 0; par->blocks.iterate (ix, &block); ix++)
     362            0 :     fprintf (dump_file, " %d", block->index);
     363            0 :   fprintf (dump_file, "\n");
     364            0 :   if (par->inner)
     365            0 :     omp_sese_dump_pars (par->inner, depth + 1);
     366              : 
     367            0 :   if (par->next)
     368              :     omp_sese_dump_pars (par->next, depth);
     369            0 : }
     370              : 
     371              : /* If BLOCK contains a fork/join marker, process it to create or
     372              :    terminate a loop structure.  Add this block to the current loop,
     373              :    and then walk successor blocks.   */
     374              : /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_find_par'.  */
     375              : 
     376              : static parallel_g *
     377            0 : omp_sese_find_par (bb_stmt_map_t *map, parallel_g *par, basic_block block)
     378              : {
     379            0 :   if (block->flags & BB_VISITED)
     380              :     return par;
     381            0 :   block->flags |= BB_VISITED;
     382              : 
     383            0 :   if (gimple **stmtp = map->get (block))
     384              :     {
     385            0 :       gimple *stmt = *stmtp;
     386              : 
     387            0 :       if (gimple_code (stmt) == GIMPLE_COND
     388            0 :           || gimple_code (stmt) == GIMPLE_SWITCH
     389            0 :           || gimple_code (stmt) == GIMPLE_RETURN
     390            0 :           || (gimple_code (stmt) == GIMPLE_CALL
     391            0 :               && !gimple_call_internal_p (stmt))
     392            0 :           || is_gimple_assign (stmt))
     393              :         {
     394              :           /* A single block that is forced to be at the maximum partition
     395              :              level.  Make a singleton par for it.  */
     396            0 :           par = new parallel_g (par, GOMP_DIM_MASK (GOMP_DIM_GANG)
     397              :                                    | GOMP_DIM_MASK (GOMP_DIM_WORKER)
     398            0 :                                    | GOMP_DIM_MASK (GOMP_DIM_VECTOR));
     399            0 :           par->forked_block = block;
     400            0 :           par->forked_stmt = stmt;
     401            0 :           par->blocks.safe_push (block);
     402            0 :           par = par->parent;
     403            0 :           goto walk_successors;
     404              :         }
     405            0 :       else if (gimple_nop_p (stmt))
     406              :         {
     407            0 :           basic_block pred = single_pred (block);
     408            0 :           gcc_assert (pred);
     409            0 :           gimple_stmt_iterator gsi = gsi_last_bb (pred);
     410            0 :           gimple *final_stmt = gsi_stmt (gsi);
     411              : 
     412            0 :           if (gimple_call_internal_p (final_stmt, IFN_UNIQUE))
     413              :             {
     414            0 :               gcall *call = as_a <gcall *> (final_stmt);
     415            0 :               enum ifn_unique_kind k = ((enum ifn_unique_kind)
     416            0 :                 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
     417              : 
     418            0 :               if (k == IFN_UNIQUE_OACC_FORK)
     419              :                 {
     420            0 :                   HOST_WIDE_INT dim
     421            0 :                     = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
     422            0 :                   unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
     423              : 
     424            0 :                   par = new parallel_g (par, mask);
     425            0 :                   par->forked_block = block;
     426            0 :                   par->forked_stmt = final_stmt;
     427            0 :                   par->fork_stmt = stmt;
     428              :                 }
     429              :               else
     430            0 :                 gcc_unreachable ();
     431              :             }
     432              :           else
     433            0 :             gcc_unreachable ();
     434              :         }
     435            0 :       else if (gimple_call_internal_p (stmt, IFN_UNIQUE))
     436              :         {
     437            0 :           gcall *call = as_a <gcall *> (stmt);
     438            0 :           enum ifn_unique_kind k = ((enum ifn_unique_kind)
     439            0 :             TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
     440            0 :           if (k == IFN_UNIQUE_OACC_JOIN)
     441              :             {
     442            0 :               HOST_WIDE_INT dim = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
     443            0 :               unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
     444              : 
     445            0 :               gcc_assert (par->mask == mask);
     446            0 :               par->join_block = block;
     447            0 :               par->join_stmt = stmt;
     448            0 :               par = par->parent;
     449              :             }
     450              :           else
     451            0 :             gcc_unreachable ();
     452              :         }
     453              :       else
     454            0 :         gcc_unreachable ();
     455              :     }
     456              : 
     457            0 :   if (par)
     458              :     /* Add this block onto the current loop's list of blocks.  */
     459            0 :     par->blocks.safe_push (block);
     460              :   else
     461              :     /* This must be the entry block.  Create a NULL parallel.  */
     462            0 :     par = new parallel_g (0, 0);
     463              : 
     464            0 : walk_successors:
     465              :   /* Walk successor blocks.  */
     466            0 :   edge e;
     467            0 :   edge_iterator ei;
     468              : 
     469            0 :   FOR_EACH_EDGE (e, ei, block->succs)
     470            0 :     omp_sese_find_par (map, par, e->dest);
     471              : 
     472              :   return par;
     473              : }
     474              : 
     475              : /* DFS walk the CFG looking for fork & join markers.  Construct
     476              :    loop structures as we go.  MAP is a mapping of basic blocks
     477              :    to head & tail markers, discovered when splitting blocks.  This
     478              :    speeds up the discovery.  We rely on the BB visited flag having
     479              :    been cleared when splitting blocks.  */
     480              : /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_discover_pars'.  */
     481              : 
     482              : static parallel_g *
     483            0 : omp_sese_discover_pars (bb_stmt_map_t *map)
     484              : {
     485            0 :   basic_block block;
     486              : 
     487              :   /* Mark exit blocks as visited.  */
     488            0 :   block = EXIT_BLOCK_PTR_FOR_FN (cfun);
     489            0 :   block->flags |= BB_VISITED;
     490              : 
     491              :   /* And entry block as not.  */
     492            0 :   block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
     493            0 :   block->flags &= ~BB_VISITED;
     494              : 
     495            0 :   parallel_g *par = omp_sese_find_par (map, 0, block);
     496              : 
     497            0 :   if (dump_file)
     498              :     {
     499            0 :       fprintf (dump_file, "\nLoops\n");
     500            0 :       omp_sese_dump_pars (par, 0);
     501            0 :       fprintf (dump_file, "\n");
     502              :     }
     503              : 
     504            0 :   return par;
     505              : }
     506              : 
     507              : static void
     508            0 : populate_single_mode_bitmaps (parallel_g *par, bitmap worker_single,
     509              :                               bitmap vector_single, unsigned outer_mask,
     510              :                               int depth)
     511              : {
     512            0 :   unsigned mask = outer_mask | par->mask;
     513              : 
     514            0 :   basic_block block;
     515              : 
     516            0 :   for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
     517              :     {
     518            0 :       if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
     519            0 :         bitmap_set_bit (worker_single, block->index);
     520              : 
     521            0 :       if ((mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)) == 0)
     522            0 :         bitmap_set_bit (vector_single, block->index);
     523              :     }
     524              : 
     525            0 :   if (par->inner)
     526            0 :     populate_single_mode_bitmaps (par->inner, worker_single, vector_single,
     527              :                                   mask, depth + 1);
     528            0 :   if (par->next)
     529              :     populate_single_mode_bitmaps (par->next, worker_single, vector_single,
     530              :                                   outer_mask, depth);
     531            0 : }
     532              : 
     533              : /* A map from SSA names or var decls to record fields.  */
     534              : 
     535              : typedef hash_map<tree, tree> field_map_t;
     536              : 
     537              : /* For each propagation record type, this is a map from SSA names or var decls
     538              :    to propagate, to the field in the record type that should be used for
     539              :    transmission and reception.  */
     540              : 
     541              : typedef hash_map<tree, field_map_t> record_field_map_t;
     542              : 
     543              : static void
     544            0 : install_var_field (tree var, tree record_type, field_map_t *fields)
     545              : {
     546            0 :   tree name;
     547            0 :   char tmp[20];
     548              : 
     549            0 :   if (TREE_CODE (var) == SSA_NAME)
     550              :     {
     551            0 :       name = SSA_NAME_IDENTIFIER (var);
     552            0 :       if (!name)
     553              :         {
     554            0 :           sprintf (tmp, "_%u", (unsigned) SSA_NAME_VERSION (var));
     555            0 :           name = get_identifier (tmp);
     556              :         }
     557              :     }
     558            0 :   else if (VAR_P (var))
     559              :     {
     560            0 :       name = DECL_NAME (var);
     561            0 :       if (!name)
     562              :         {
     563            0 :           sprintf (tmp, "D_%u", (unsigned) DECL_UID (var));
     564            0 :           name = get_identifier (tmp);
     565              :         }
     566              :     }
     567              :   else
     568            0 :     gcc_unreachable ();
     569              : 
     570            0 :   gcc_assert (!fields->get (var));
     571              : 
     572            0 :   tree type = TREE_TYPE (var);
     573              : 
     574            0 :   if (POINTER_TYPE_P (type)
     575            0 :       && TYPE_RESTRICT (type))
     576            0 :     type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT);
     577              : 
     578            0 :   tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, name, type);
     579              : 
     580            0 :   if (VAR_P (var) && type == TREE_TYPE (var))
     581              :     {
     582            0 :       SET_DECL_ALIGN (field, DECL_ALIGN (var));
     583            0 :       DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
     584            0 :       TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
     585              :     }
     586              :   else
     587            0 :     SET_DECL_ALIGN (field, TYPE_ALIGN (type));
     588              : 
     589            0 :   fields->put (var, field);
     590              : 
     591            0 :   insert_field_into_struct (record_type, field);
     592            0 : }
     593              : 
     594              : /* Sets of SSA_NAMES or VAR_DECLs to propagate.  */
     595              : typedef hash_set<tree> propagation_set;
     596              : 
     597              : static void
     598            0 : find_ssa_names_to_propagate (parallel_g *par, unsigned outer_mask,
     599              :                              bitmap worker_single, bitmap vector_single,
     600              :                              vec<propagation_set *> *prop_set)
     601              : {
     602            0 :   unsigned mask = outer_mask | par->mask;
     603              : 
     604            0 :   if (par->inner)
     605            0 :     find_ssa_names_to_propagate (par->inner, mask, worker_single,
     606              :                                  vector_single, prop_set);
     607            0 :   if (par->next)
     608            0 :     find_ssa_names_to_propagate (par->next, outer_mask, worker_single,
     609              :                                  vector_single, prop_set);
     610              : 
     611            0 :   if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
     612              :     {
     613              :       basic_block block;
     614              :       int ix;
     615              : 
     616            0 :       for (ix = 0; par->blocks.iterate (ix, &block); ix++)
     617              :         {
     618            0 :           for (gphi_iterator psi = gsi_start_phis (block);
     619            0 :                !gsi_end_p (psi); gsi_next (&psi))
     620              :             {
     621            0 :               gphi *phi = psi.phi ();
     622            0 :               use_operand_p use;
     623            0 :               ssa_op_iter iter;
     624              : 
     625            0 :               FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE)
     626              :                 {
     627            0 :                   tree var = USE_FROM_PTR (use);
     628              : 
     629            0 :                   if (TREE_CODE (var) != SSA_NAME)
     630            0 :                     continue;
     631              : 
     632            0 :                   gimple *def_stmt = SSA_NAME_DEF_STMT (var);
     633              : 
     634            0 :                   if (gimple_nop_p (def_stmt))
     635            0 :                     continue;
     636              : 
     637            0 :                   basic_block def_bb = gimple_bb (def_stmt);
     638              : 
     639            0 :                   if (bitmap_bit_p (worker_single, def_bb->index))
     640              :                     {
     641            0 :                       if (!(*prop_set)[def_bb->index])
     642            0 :                         (*prop_set)[def_bb->index] = new propagation_set;
     643              : 
     644            0 :                       propagation_set *ws_prop = (*prop_set)[def_bb->index];
     645              : 
     646            0 :                       ws_prop->add (var);
     647              :                     }
     648              :                 }
     649              :             }
     650              : 
     651            0 :           for (gimple_stmt_iterator gsi = gsi_start_bb (block);
     652            0 :                !gsi_end_p (gsi); gsi_next (&gsi))
     653              :             {
     654            0 :               use_operand_p use;
     655            0 :               ssa_op_iter iter;
     656            0 :               gimple *stmt = gsi_stmt (gsi);
     657              : 
     658            0 :               FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
     659              :                 {
     660            0 :                   tree var = USE_FROM_PTR (use);
     661              : 
     662            0 :                   gimple *def_stmt = SSA_NAME_DEF_STMT (var);
     663              : 
     664            0 :                   if (gimple_nop_p (def_stmt))
     665            0 :                     continue;
     666              : 
     667            0 :                   basic_block def_bb = gimple_bb (def_stmt);
     668              : 
     669            0 :                   if (bitmap_bit_p (worker_single, def_bb->index))
     670              :                     {
     671            0 :                       if (!(*prop_set)[def_bb->index])
     672            0 :                         (*prop_set)[def_bb->index] = new propagation_set;
     673              : 
     674            0 :                       propagation_set *ws_prop = (*prop_set)[def_bb->index];
     675              : 
     676            0 :                       ws_prop->add (var);
     677              :                     }
     678              :                 }
     679              :             }
     680              :         }
     681              :     }
     682            0 : }
     683              : 
     684              : /* Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a
     685              :    statement.  */
     686              : 
     687              : static tree
     688            0 : find_partitioned_var_uses_1 (tree *node, int *, void *data)
     689              : {
     690            0 :   walk_stmt_info *wi = (walk_stmt_info *) data;
     691            0 :   hash_set<tree> *partitioned_var_uses = (hash_set<tree> *) wi->info;
     692              : 
     693            0 :   if (!wi->is_lhs && VAR_P (*node))
     694            0 :     partitioned_var_uses->add (*node);
     695              : 
     696            0 :   return NULL_TREE;
     697              : }
     698              : 
     699              : static void
     700            0 : find_partitioned_var_uses (parallel_g *par, unsigned outer_mask,
     701              :                            hash_set<tree> *partitioned_var_uses)
     702              : {
     703            0 :   unsigned mask = outer_mask | par->mask;
     704              : 
     705            0 :   if (par->inner)
     706            0 :     find_partitioned_var_uses (par->inner, mask, partitioned_var_uses);
     707            0 :   if (par->next)
     708            0 :     find_partitioned_var_uses (par->next, outer_mask, partitioned_var_uses);
     709              : 
     710            0 :   if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
     711              :     {
     712              :       basic_block block;
     713              :       int ix;
     714              : 
     715            0 :       for (ix = 0; par->blocks.iterate (ix, &block); ix++)
     716            0 :         for (gimple_stmt_iterator gsi = gsi_start_bb (block);
     717            0 :              !gsi_end_p (gsi); gsi_next (&gsi))
     718              :           {
     719            0 :             walk_stmt_info wi;
     720            0 :             memset (&wi, 0, sizeof (wi));
     721            0 :             wi.info = (void *) partitioned_var_uses;
     722            0 :             walk_gimple_stmt (&gsi, NULL, find_partitioned_var_uses_1, &wi);
     723              :           }
     724              :     }
     725            0 : }
     726              : 
     727              : /* Gang-private variables (typically placed in a GPU's shared memory) do not
     728              :    need to be processed by the worker-propagation mechanism.  Populate the
     729              :    GANG_PRIVATE_VARS set with any such variables found in the current
     730              :    function.  */
     731              : 
     732              : static void
     733            0 : find_gang_private_vars (hash_set<tree> *gang_private_vars)
     734              : {
     735            0 :   basic_block block;
     736              : 
     737            0 :   FOR_EACH_BB_FN (block, cfun)
     738              :     {
     739            0 :       for (gimple_stmt_iterator gsi = gsi_start_bb (block);
     740            0 :            !gsi_end_p (gsi);
     741            0 :            gsi_next (&gsi))
     742              :         {
     743            0 :           gimple *stmt = gsi_stmt (gsi);
     744              : 
     745            0 :           if (gimple_call_internal_p (stmt, IFN_UNIQUE))
     746              :             {
     747            0 :               enum ifn_unique_kind k = ((enum ifn_unique_kind)
     748            0 :                 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
     749            0 :               if (k == IFN_UNIQUE_OACC_PRIVATE)
     750              :                 {
     751            0 :                   HOST_WIDE_INT level
     752            0 :                     = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
     753            0 :                   if (level != GOMP_DIM_GANG)
     754            0 :                     continue;
     755            0 :                   for (unsigned i = 3; i < gimple_call_num_args (stmt); i++)
     756              :                     {
     757            0 :                       tree arg = gimple_call_arg (stmt, i);
     758            0 :                       gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
     759            0 :                       tree decl = TREE_OPERAND (arg, 0);
     760            0 :                       gang_private_vars->add (decl);
     761              :                     }
     762              :                 }
     763              :             }
     764              :         }
     765              :     }
     766            0 : }
     767              : 
     768              : static void
     769            0 : find_local_vars_to_propagate (parallel_g *par, unsigned outer_mask,
     770              :                               hash_set<tree> *partitioned_var_uses,
     771              :                               hash_set<tree> *gang_private_vars,
     772              :                               bitmap writes_gang_private,
     773              :                               vec<propagation_set *> *prop_set)
     774              : {
     775            0 :   unsigned mask = outer_mask | par->mask;
     776              : 
     777            0 :   if (par->inner)
     778            0 :     find_local_vars_to_propagate (par->inner, mask, partitioned_var_uses,
     779              :                                   gang_private_vars, writes_gang_private,
     780              :                                   prop_set);
     781            0 :   if (par->next)
     782            0 :     find_local_vars_to_propagate (par->next, outer_mask, partitioned_var_uses,
     783              :                                   gang_private_vars, writes_gang_private,
     784              :                                   prop_set);
     785              : 
     786            0 :   if (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)))
     787              :     {
     788              :       basic_block block;
     789              :       int ix;
     790              : 
     791            0 :       for (ix = 0; par->blocks.iterate (ix, &block); ix++)
     792              :         {
     793            0 :           for (gimple_stmt_iterator gsi = gsi_start_bb (block);
     794            0 :                !gsi_end_p (gsi); gsi_next (&gsi))
     795              :             {
     796            0 :               gimple *stmt = gsi_stmt (gsi);
     797            0 :               tree var;
     798            0 :               unsigned i;
     799              : 
     800            0 :               FOR_EACH_LOCAL_DECL (cfun, i, var)
     801              :                 {
     802            0 :                   if (!VAR_P (var)
     803            0 :                       || is_global_var (var)
     804            0 :                       || AGGREGATE_TYPE_P (TREE_TYPE (var))
     805            0 :                       || !partitioned_var_uses->contains (var))
     806            0 :                     continue;
     807              : 
     808            0 :                   if (stmt_may_clobber_ref_p (stmt, var))
     809              :                     {
     810            0 :                       if (dump_file)
     811              :                         {
     812            0 :                           fprintf (dump_file, "bb %u: local variable may be "
     813              :                                    "clobbered in %s mode: ", block->index,
     814              :                                    mask_name (mask));
     815            0 :                           print_generic_expr (dump_file, var, TDF_SLIM);
     816            0 :                           fprintf (dump_file, "\n");
     817              :                         }
     818              : 
     819            0 :                       if (gang_private_vars->contains (var))
     820              :                         {
     821              :                           /* If we write a gang-private variable, we want a
     822              :                              barrier at the end of the block.  */
     823            0 :                           bitmap_set_bit (writes_gang_private, block->index);
     824            0 :                           continue;
     825              :                         }
     826              : 
     827            0 :                       if (!(*prop_set)[block->index])
     828            0 :                         (*prop_set)[block->index] = new propagation_set;
     829              : 
     830            0 :                       propagation_set *ws_prop
     831            0 :                         = (*prop_set)[block->index];
     832              : 
     833            0 :                       ws_prop->add (var);
     834              :                     }
     835              :                 }
     836              :             }
     837              :         }
     838              :     }
     839            0 : }
     840              : 
     841              : /* Transform basic blocks FROM, TO (which may be the same block) into:
     842              :    if (GOACC_single_start ())
     843              :      BLOCK;
     844              :    GOACC_barrier ();
     845              :                               \  |  /
     846              :                               +----+
     847              :                               |    |        (new) predicate block
     848              :                               +----+--
     849              :    \  |  /   \  |  /            |t    \
     850              :    +----+    +----+           +----+  |
     851              :    |    |    |    |     ===>  |    |  | f   (old) from block
     852              :    +----+    +----+           +----+  |
     853              :      |       t/  \f             |    /
     854              :                               +----+/
     855              :   (split  (split before       |    |        skip block
     856              :   at end)   condition)        +----+
     857              :                               t/  \f
     858              : */
     859              : 
     860              : static void
     861            0 : worker_single_simple (basic_block from, basic_block to,
     862              :                       hash_set<tree> *def_escapes_block)
     863              : {
     864            0 :   gimple *call, *cond;
     865            0 :   tree lhs, decl;
     866            0 :   basic_block skip_block;
     867              : 
     868            0 :   gimple_stmt_iterator gsi = gsi_last_bb (to);
     869            0 :   if (EDGE_COUNT (to->succs) > 1)
     870              :     {
     871            0 :       gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND);
     872            0 :       gsi_prev (&gsi);
     873              :     }
     874            0 :   edge e = split_block (to, gsi_stmt (gsi));
     875            0 :   skip_block = e->dest;
     876              : 
     877            0 :   gimple_stmt_iterator start = gsi_after_labels (from);
     878              : 
     879            0 :   decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_START);
     880            0 :   lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
     881            0 :   call = gimple_build_call (decl, 0);
     882            0 :   gimple_call_set_lhs (call, lhs);
     883            0 :   gsi_insert_before (&start, call, GSI_NEW_STMT);
     884            0 :   update_stmt (call);
     885              : 
     886            0 :   cond = gimple_build_cond (EQ_EXPR, lhs,
     887              :                             fold_convert_loc (UNKNOWN_LOCATION,
     888            0 :                                               TREE_TYPE (lhs),
     889              :                                               boolean_true_node),
     890              :                             NULL_TREE, NULL_TREE);
     891            0 :   gsi_insert_after (&start, cond, GSI_NEW_STMT);
     892            0 :   update_stmt (cond);
     893              : 
     894            0 :   edge et = split_block (from, cond);
     895            0 :   et->flags &= ~EDGE_FALLTHRU;
     896            0 :   et->flags |= EDGE_TRUE_VALUE;
     897              :   /* Make the active worker the more probable path so we prefer fallthrough
     898              :      (letting the idle workers jump around more).  */
     899            0 :   et->probability = profile_probability::likely ();
     900              : 
     901            0 :   edge ef = make_edge (from, skip_block, EDGE_FALSE_VALUE);
     902            0 :   ef->probability = et->probability.invert ();
     903              : 
     904            0 :   basic_block neutered = split_edge (ef);
     905            0 :   gimple_stmt_iterator neut_gsi = gsi_last_bb (neutered);
     906              : 
     907            0 :   for (gsi = gsi_start_bb (et->dest); !gsi_end_p (gsi); gsi_next (&gsi))
     908              :     {
     909            0 :       gimple *stmt = gsi_stmt (gsi);
     910            0 :       ssa_op_iter iter;
     911            0 :       tree var;
     912              : 
     913            0 :       FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF)
     914              :         {
     915            0 :           if (def_escapes_block->contains (var))
     916              :             {
     917            0 :               gphi *join_phi = create_phi_node (NULL_TREE, skip_block);
     918            0 :               create_new_def_for (var, join_phi,
     919              :                                   gimple_phi_result_ptr (join_phi));
     920            0 :               add_phi_arg (join_phi, var, e, UNKNOWN_LOCATION);
     921              : 
     922            0 :               tree neutered_def = copy_ssa_name (var, NULL);
     923              :               /* We really want "don't care" or some value representing
     924              :                  undefined here, but optimizers will probably get rid of the
     925              :                  zero-assignments anyway.  */
     926            0 :               gassign *zero = gimple_build_assign (neutered_def,
     927            0 :                                 build_zero_cst (TREE_TYPE (neutered_def)));
     928              : 
     929            0 :               gsi_insert_after (&neut_gsi, zero, GSI_CONTINUE_LINKING);
     930            0 :               update_stmt (zero);
     931              : 
     932            0 :               add_phi_arg (join_phi, neutered_def, single_succ_edge (neutered),
     933              :                            UNKNOWN_LOCATION);
     934            0 :               update_stmt (join_phi);
     935              :             }
     936              :         }
     937              :     }
     938            0 : }
     939              : 
     940              : static tree
     941            0 : build_receiver_ref (tree var, tree receiver_decl, field_map_t *fields)
     942              : {
     943            0 :   tree x = build_simple_mem_ref (receiver_decl);
     944            0 :   tree field = *fields->get (var);
     945            0 :   TREE_THIS_NOTRAP (x) = 1;
     946            0 :   x = omp_build_component_ref (x, field);
     947            0 :   return x;
     948              : }
     949              : 
     950              : static tree
     951            0 : build_sender_ref (tree var, tree sender_decl, field_map_t *fields)
     952              : {
     953            0 :   if (POINTER_TYPE_P (TREE_TYPE (sender_decl)))
     954            0 :     sender_decl = build_simple_mem_ref (sender_decl);
     955            0 :   tree field = *fields->get (var);
     956            0 :   return omp_build_component_ref (sender_decl, field);
     957              : }
     958              : 
     959              : static int
     960            0 : sort_by_ssa_version_or_uid (const void *p1, const void *p2)
     961              : {
     962            0 :   const tree t1 = *(const tree *)p1;
     963            0 :   const tree t2 = *(const tree *)p2;
     964              : 
     965            0 :   if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) == SSA_NAME)
     966            0 :     return SSA_NAME_VERSION (t1) - SSA_NAME_VERSION (t2);
     967            0 :   else if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) != SSA_NAME)
     968              :     return -1;
     969            0 :   else if (TREE_CODE (t1) != SSA_NAME && TREE_CODE (t2) == SSA_NAME)
     970              :     return 1;
     971              :   else
     972            0 :     return DECL_UID (t1) - DECL_UID (t2);
     973              : }
     974              : 
     975              : static int
     976            0 : sort_by_size_then_ssa_version_or_uid (const void *p1, const void *p2)
     977              : {
     978            0 :   const tree t1 = *(const tree *)p1;
     979            0 :   const tree t2 = *(const tree *)p2;
     980            0 :   unsigned HOST_WIDE_INT s1 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t1)));
     981            0 :   unsigned HOST_WIDE_INT s2 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t2)));
     982            0 :   if (s1 != s2)
     983            0 :     return s2 - s1;
     984              :   else
     985            0 :     return sort_by_ssa_version_or_uid (p1, p2);
     986              : }
     987              : 
     988              : static void
     989            0 : worker_single_copy (basic_block from, basic_block to,
     990              :                     hash_set<tree> *def_escapes_block,
     991              :                     hash_set<tree> *worker_partitioned_uses,
     992              :                     tree record_type, record_field_map_t *record_field_map,
     993              :                     unsigned HOST_WIDE_INT placement,
     994              :                     bool isolate_broadcasts, bool has_gang_private_write)
     995              : {
     996              :   /* If we only have virtual defs, we'll have no record type, but we still want
     997              :      to emit single_copy_start and (particularly) single_copy_end to act as
     998              :      a vdef source on the neutered edge representing memory writes on the
     999              :      non-neutered edge.  */
    1000            0 :   if (!record_type)
    1001            0 :     record_type = char_type_node;
    1002              : 
    1003            0 :   tree sender_decl
    1004            0 :     = targetm.goacc.create_worker_broadcast_record (record_type, true,
    1005              :                                                     ".oacc_worker_o",
    1006              :                                                     placement);
    1007            0 :   tree receiver_decl
    1008            0 :     = targetm.goacc.create_worker_broadcast_record (record_type, false,
    1009              :                                                     ".oacc_worker_i",
    1010              :                                                     placement);
    1011              : 
    1012            0 :   gimple_stmt_iterator gsi = gsi_last_bb (to);
    1013            0 :   if (EDGE_COUNT (to->succs) > 1)
    1014            0 :     gsi_prev (&gsi);
    1015            0 :   edge e = split_block (to, gsi_stmt (gsi));
    1016            0 :   basic_block barrier_block = e->dest;
    1017              : 
    1018            0 :   gimple_stmt_iterator start = gsi_after_labels (from);
    1019              : 
    1020            0 :   tree decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_START);
    1021              : 
    1022            0 :   tree lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
    1023              : 
    1024            0 :   gimple *call
    1025            0 :     = gimple_build_call (decl, 1,
    1026            0 :                          POINTER_TYPE_P (TREE_TYPE (sender_decl))
    1027            0 :                          ? sender_decl : build_fold_addr_expr (sender_decl));
    1028            0 :   gimple_call_set_lhs (call, lhs);
    1029            0 :   gsi_insert_before (&start, call, GSI_NEW_STMT);
    1030            0 :   update_stmt (call);
    1031              : 
    1032              :   /* The shared-memory range for this block overflowed.  Add a barrier before
    1033              :      the GOACC_single_copy_start call.  */
    1034            0 :   if (isolate_broadcasts)
    1035              :     {
    1036            0 :       decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
    1037            0 :       gimple *acc_bar = gimple_build_call (decl, 0);
    1038            0 :       gsi_insert_before (&start, acc_bar, GSI_SAME_STMT);
    1039              :     }
    1040              : 
    1041            0 :   tree conv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
    1042              : 
    1043            0 :   gimple *conv = gimple_build_assign (conv_tmp,
    1044            0 :                                       fold_convert (TREE_TYPE (receiver_decl),
    1045              :                                                     lhs));
    1046            0 :   update_stmt (conv);
    1047            0 :   gsi_insert_after (&start, conv, GSI_NEW_STMT);
    1048            0 :   gimple *asgn = gimple_build_assign (receiver_decl, conv_tmp);
    1049            0 :   gsi_insert_after (&start, asgn, GSI_NEW_STMT);
    1050            0 :   update_stmt (asgn);
    1051              : 
    1052            0 :   tree zero_ptr = build_int_cst (TREE_TYPE (receiver_decl), 0);
    1053              : 
    1054            0 :   tree recv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
    1055            0 :   asgn = gimple_build_assign (recv_tmp, receiver_decl);
    1056            0 :   gsi_insert_after (&start, asgn, GSI_NEW_STMT);
    1057            0 :   update_stmt (asgn);
    1058              : 
    1059            0 :   gimple *cond = gimple_build_cond (EQ_EXPR, recv_tmp, zero_ptr, NULL_TREE,
    1060              :                                     NULL_TREE);
    1061            0 :   update_stmt (cond);
    1062              : 
    1063            0 :   gsi_insert_after (&start, cond, GSI_NEW_STMT);
    1064              : 
    1065            0 :   edge et = split_block (from, cond);
    1066            0 :   et->flags &= ~EDGE_FALLTHRU;
    1067            0 :   et->flags |= EDGE_TRUE_VALUE;
    1068              :   /* Make the active worker the more probable path so we prefer fallthrough
    1069              :      (letting the idle workers jump around more).  */
    1070            0 :   et->probability = profile_probability::likely ();
    1071              : 
    1072            0 :   basic_block body = et->dest;
    1073              : 
    1074            0 :   edge ef = make_edge (from, barrier_block, EDGE_FALSE_VALUE);
    1075            0 :   ef->probability = et->probability.invert ();
    1076              : 
    1077            0 :   gimple_stmt_iterator bar_gsi = gsi_start_bb (barrier_block);
    1078            0 :   cond = gimple_build_cond (NE_EXPR, recv_tmp, zero_ptr, NULL_TREE, NULL_TREE);
    1079              : 
    1080            0 :   if (record_type != char_type_node || has_gang_private_write)
    1081              :     {
    1082            0 :       decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
    1083            0 :       gimple *acc_bar = gimple_build_call (decl, 0);
    1084              : 
    1085            0 :       gsi_insert_before (&bar_gsi, acc_bar, GSI_NEW_STMT);
    1086            0 :       gsi_insert_after (&bar_gsi, cond, GSI_NEW_STMT);
    1087            0 :     }
    1088              :   else
    1089            0 :     gsi_insert_before (&bar_gsi, cond, GSI_NEW_STMT);
    1090              : 
    1091            0 :   edge et2 = split_block (barrier_block, cond);
    1092            0 :   et2->flags &= ~EDGE_FALLTHRU;
    1093            0 :   et2->flags |= EDGE_TRUE_VALUE;
    1094            0 :   et2->probability = profile_probability::unlikely ();
    1095              : 
    1096            0 :   basic_block exit_block = et2->dest;
    1097              : 
    1098            0 :   basic_block copyout_block = split_edge (et2);
    1099            0 :   edge ef2 = make_edge (barrier_block, exit_block, EDGE_FALSE_VALUE);
    1100            0 :   ef2->probability = et2->probability.invert ();
    1101              : 
    1102            0 :   gimple_stmt_iterator copyout_gsi = gsi_start_bb (copyout_block);
    1103              : 
    1104            0 :   edge copyout_to_exit = single_succ_edge (copyout_block);
    1105              : 
    1106            0 :   gimple_seq sender_seq = NULL;
    1107              : 
    1108              :   /* Make sure we iterate over definitions in a stable order.  */
    1109            0 :   auto_vec<tree> escape_vec (def_escapes_block->elements ());
    1110            0 :   for (hash_set<tree>::iterator it = def_escapes_block->begin ();
    1111            0 :        it != def_escapes_block->end (); ++it)
    1112            0 :     escape_vec.quick_push (*it);
    1113            0 :   escape_vec.qsort (sort_by_ssa_version_or_uid);
    1114              : 
    1115            0 :   for (unsigned i = 0; i < escape_vec.length (); i++)
    1116              :     {
    1117            0 :       tree var = escape_vec[i];
    1118              : 
    1119            0 :       if (TREE_CODE (var) == SSA_NAME && SSA_NAME_IS_VIRTUAL_OPERAND (var))
    1120            0 :         continue;
    1121              : 
    1122            0 :       tree barrier_def = 0;
    1123              : 
    1124            0 :       if (TREE_CODE (var) == SSA_NAME)
    1125              :         {
    1126            0 :           gimple *def_stmt = SSA_NAME_DEF_STMT (var);
    1127              : 
    1128            0 :           if (gimple_nop_p (def_stmt))
    1129            0 :             continue;
    1130              : 
    1131              :           /* The barrier phi takes one result from the actual work of the
    1132              :              block we're neutering, and the other result is constant zero of
    1133              :              the same type.  */
    1134              : 
    1135            0 :           gphi *barrier_phi = create_phi_node (NULL_TREE, barrier_block);
    1136            0 :           barrier_def = create_new_def_for (var, barrier_phi,
    1137              :                           gimple_phi_result_ptr (barrier_phi));
    1138              : 
    1139            0 :           add_phi_arg (barrier_phi, var, e, UNKNOWN_LOCATION);
    1140            0 :           add_phi_arg (barrier_phi, build_zero_cst (TREE_TYPE (var)), ef,
    1141              :                        UNKNOWN_LOCATION);
    1142              : 
    1143            0 :           update_stmt (barrier_phi);
    1144              :         }
    1145              :       else
    1146            0 :         gcc_assert (VAR_P (var));
    1147              : 
    1148              :       /* If we had no record type, we will have no fields map.  */
    1149            0 :       field_map_t *fields = record_field_map->get (record_type);
    1150              : 
    1151            0 :       if (worker_partitioned_uses->contains (var)
    1152            0 :           && fields
    1153            0 :           && fields->get (var))
    1154              :         {
    1155            0 :           tree neutered_def = make_ssa_name (TREE_TYPE (var));
    1156              : 
    1157              :           /* Receive definition from shared memory block.  */
    1158              : 
    1159            0 :           tree receiver_ref = build_receiver_ref (var, receiver_decl, fields);
    1160            0 :           gassign *recv = gimple_build_assign (neutered_def,
    1161              :                                                receiver_ref);
    1162            0 :           gsi_insert_after (&copyout_gsi, recv, GSI_CONTINUE_LINKING);
    1163            0 :           update_stmt (recv);
    1164              : 
    1165            0 :           if (VAR_P (var))
    1166              :             {
    1167              :               /* If it's a VAR_DECL, we only copied to an SSA temporary.  Copy
    1168              :                  to the final location now.  */
    1169            0 :               gassign *asgn = gimple_build_assign (var, neutered_def);
    1170            0 :               gsi_insert_after (&copyout_gsi, asgn, GSI_CONTINUE_LINKING);
    1171            0 :               update_stmt (asgn);
    1172              :             }
    1173              :           else
    1174              :             {
    1175              :               /* If it's an SSA name, create a new phi at the join node to
    1176              :                  represent either the output from the active worker (the
    1177              :                  barrier) or the inactive workers (the copyout block).  */
    1178            0 :               gphi *join_phi = create_phi_node (NULL_TREE, exit_block);
    1179            0 :               create_new_def_for (barrier_def, join_phi,
    1180              :                                   gimple_phi_result_ptr (join_phi));
    1181            0 :               add_phi_arg (join_phi, barrier_def, ef2, UNKNOWN_LOCATION);
    1182            0 :               add_phi_arg (join_phi, neutered_def, copyout_to_exit,
    1183              :                            UNKNOWN_LOCATION);
    1184            0 :               update_stmt (join_phi);
    1185              :             }
    1186              : 
    1187              :           /* Send definition to shared memory block.  */
    1188              : 
    1189            0 :           tree sender_ref = build_sender_ref (var, sender_decl, fields);
    1190              : 
    1191            0 :           if (TREE_CODE (var) == SSA_NAME)
    1192              :             {
    1193            0 :               gassign *send = gimple_build_assign (sender_ref, var);
    1194            0 :               gimple_seq_add_stmt (&sender_seq, send);
    1195            0 :               update_stmt (send);
    1196              :             }
    1197            0 :           else if (VAR_P (var))
    1198              :             {
    1199            0 :               tree tmp = make_ssa_name (TREE_TYPE (var));
    1200            0 :               gassign *send = gimple_build_assign (tmp, var);
    1201            0 :               gimple_seq_add_stmt (&sender_seq, send);
    1202            0 :               update_stmt (send);
    1203            0 :               send = gimple_build_assign (sender_ref, tmp);
    1204            0 :               gimple_seq_add_stmt (&sender_seq, send);
    1205            0 :               update_stmt (send);
    1206              :             }
    1207              :           else
    1208            0 :             gcc_unreachable ();
    1209              :         }
    1210              :     }
    1211              : 
    1212              :   /* The shared-memory range for this block overflowed.  Add a barrier at the
    1213              :      end.  */
    1214            0 :   if (isolate_broadcasts)
    1215              :     {
    1216            0 :       gsi = gsi_start_bb (exit_block);
    1217            0 :       decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
    1218            0 :       gimple *acc_bar = gimple_build_call (decl, 0);
    1219            0 :       gsi_insert_before (&gsi, acc_bar, GSI_SAME_STMT);
    1220              :     }
    1221              : 
    1222              :   /* It's possible for the ET->DEST block (the work done by the active thread)
    1223              :      to finish with a control-flow insn, e.g. a UNIQUE function call.  Split
    1224              :      the block and add SENDER_SEQ in the latter part to avoid having control
    1225              :      flow in the middle of a BB.  */
    1226              : 
    1227            0 :   decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_END);
    1228            0 :   call = gimple_build_call (decl, 1,
    1229            0 :                             POINTER_TYPE_P (TREE_TYPE (sender_decl))
    1230              :                             ? sender_decl
    1231            0 :                             : build_fold_addr_expr (sender_decl));
    1232            0 :   gimple_seq_add_stmt (&sender_seq, call);
    1233              : 
    1234            0 :   gsi = gsi_last_bb (body);
    1235            0 :   gimple *last = gsi_stmt (gsi);
    1236            0 :   basic_block sender_block = split_block (body, last)->dest;
    1237            0 :   gsi = gsi_last_bb (sender_block);
    1238            0 :   gsi_insert_seq_after (&gsi, sender_seq, GSI_CONTINUE_LINKING);
    1239            0 : }
    1240              : 
    1241              : typedef hash_map<basic_block, std::pair<unsigned HOST_WIDE_INT, bool> >
    1242              :   blk_offset_map_t;
    1243              : 
    1244              : static void
    1245            0 : neuter_worker_single (parallel_g *par, unsigned outer_mask,
    1246              :                       bitmap worker_single, bitmap vector_single,
    1247              :                       vec<propagation_set *> *prop_set,
    1248              :                       hash_set<tree> *partitioned_var_uses,
    1249              :                       record_field_map_t *record_field_map,
    1250              :                       blk_offset_map_t *blk_offset_map,
    1251              :                       bitmap writes_gang_private)
    1252              : {
    1253            0 :   unsigned mask = outer_mask | par->mask;
    1254              : 
    1255            0 :   if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
    1256              :     {
    1257              :       basic_block block;
    1258              : 
    1259            0 :       for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
    1260              :         {
    1261            0 :           bool has_defs = false;
    1262            0 :           hash_set<tree> def_escapes_block;
    1263            0 :           hash_set<tree> worker_partitioned_uses;
    1264            0 :           unsigned j;
    1265            0 :           tree var;
    1266              : 
    1267            0 :           FOR_EACH_SSA_NAME (j, var, cfun)
    1268              :             {
    1269            0 :               if (SSA_NAME_IS_VIRTUAL_OPERAND (var))
    1270              :                 {
    1271            0 :                   has_defs = true;
    1272            0 :                   continue;
    1273              :                 }
    1274              : 
    1275            0 :               gimple *def_stmt = SSA_NAME_DEF_STMT (var);
    1276              : 
    1277            0 :               if (gimple_nop_p (def_stmt))
    1278            0 :                 continue;
    1279              : 
    1280            0 :               if (gimple_bb (def_stmt)->index != block->index)
    1281            0 :                 continue;
    1282              : 
    1283            0 :               gimple *use_stmt;
    1284            0 :               imm_use_iterator use_iter;
    1285            0 :               bool uses_outside_block = false;
    1286            0 :               bool worker_partitioned_use = false;
    1287              : 
    1288            0 :               FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, var)
    1289              :                 {
    1290            0 :                   int blocknum = gimple_bb (use_stmt)->index;
    1291              : 
    1292              :                   /* Don't propagate SSA names that are only used in the
    1293              :                      current block, unless the usage is in a phi node: that
    1294              :                      means the name left the block, then came back in at the
    1295              :                      top.  */
    1296            0 :                   if (blocknum != block->index
    1297            0 :                       || gimple_code (use_stmt) == GIMPLE_PHI)
    1298              :                     uses_outside_block = true;
    1299            0 :                   if (!bitmap_bit_p (worker_single, blocknum))
    1300            0 :                     worker_partitioned_use = true;
    1301            0 :                 }
    1302              : 
    1303            0 :               if (uses_outside_block)
    1304            0 :                 def_escapes_block.add (var);
    1305              : 
    1306            0 :               if (worker_partitioned_use)
    1307              :                 {
    1308            0 :                   worker_partitioned_uses.add (var);
    1309            0 :                   has_defs = true;
    1310              :                 }
    1311              :             }
    1312              : 
    1313            0 :           propagation_set *ws_prop = (*prop_set)[block->index];
    1314              : 
    1315            0 :           if (ws_prop)
    1316              :             {
    1317            0 :               for (propagation_set::iterator it = ws_prop->begin ();
    1318            0 :                    it != ws_prop->end ();
    1319            0 :                    ++it)
    1320              :                 {
    1321            0 :                   tree var = *it;
    1322            0 :                   if (TREE_CODE (var) == VAR_DECL)
    1323              :                     {
    1324            0 :                       def_escapes_block.add (var);
    1325            0 :                       if (partitioned_var_uses->contains (var))
    1326              :                         {
    1327            0 :                           worker_partitioned_uses.add (var);
    1328            0 :                           has_defs = true;
    1329              :                         }
    1330              :                     }
    1331              :                 }
    1332              : 
    1333            0 :               delete ws_prop;
    1334            0 :               (*prop_set)[block->index] = 0;
    1335              :             }
    1336              : 
    1337            0 :           bool only_marker_fns = true;
    1338            0 :           bool join_block = false;
    1339              : 
    1340            0 :           for (gimple_stmt_iterator gsi = gsi_start_bb (block);
    1341            0 :                !gsi_end_p (gsi);
    1342            0 :                gsi_next (&gsi))
    1343              :             {
    1344            0 :               gimple *stmt = gsi_stmt (gsi);
    1345            0 :               if (gimple_code (stmt) == GIMPLE_CALL
    1346            0 :                   && gimple_call_internal_p (stmt, IFN_UNIQUE))
    1347              :                 {
    1348            0 :                   enum ifn_unique_kind k = ((enum ifn_unique_kind)
    1349            0 :                     TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
    1350            0 :                   if (k != IFN_UNIQUE_OACC_PRIVATE
    1351            0 :                       && k != IFN_UNIQUE_OACC_JOIN
    1352            0 :                       && k != IFN_UNIQUE_OACC_FORK
    1353            0 :                       && k != IFN_UNIQUE_OACC_HEAD_MARK
    1354            0 :                       && k != IFN_UNIQUE_OACC_TAIL_MARK)
    1355              :                     only_marker_fns = false;
    1356            0 :                   else if (k == IFN_UNIQUE_OACC_JOIN)
    1357              :                     /* The JOIN marker is special in that it *cannot* be
    1358              :                        predicated for worker zero, because it may be lowered
    1359              :                        to a barrier instruction and all workers must typically
    1360              :                        execute that barrier.  We shouldn't be doing any
    1361              :                        broadcasts from the join block anyway.  */
    1362            0 :                     join_block = true;
    1363              :                 }
    1364            0 :               else if (gimple_code (stmt) == GIMPLE_CALL
    1365            0 :                        && gimple_call_internal_p (stmt, IFN_GOACC_LOOP))
    1366              :                 /* Empty.  */;
    1367            0 :               else if (gimple_nop_p (stmt))
    1368              :                 /* Empty.  */;
    1369              :               else
    1370            0 :                 only_marker_fns = false;
    1371              :             }
    1372              : 
    1373              :           /* We can skip predicating this block for worker zero if the only
    1374              :              thing it contains is marker functions that will be removed in the
    1375              :              oaccdevlow pass anyway.
    1376              :              Don't do this if the block has (any) phi nodes, because those
    1377              :              might define SSA names that need broadcasting.
    1378              :              TODO: We might be able to skip transforming blocks that only
    1379              :              contain some other trivial statements too.  */
    1380            0 :           if (only_marker_fns && !phi_nodes (block))
    1381            0 :             continue;
    1382              : 
    1383            0 :           gcc_assert (!join_block);
    1384              : 
    1385            0 :           if (has_defs)
    1386              :             {
    1387            0 :               tree record_type = (tree) block->aux;
    1388            0 :               std::pair<unsigned HOST_WIDE_INT, bool> *off_rngalloc
    1389            0 :                 = blk_offset_map->get (block);
    1390            0 :               gcc_assert (!record_type || off_rngalloc);
    1391            0 :               unsigned HOST_WIDE_INT offset
    1392            0 :                 = off_rngalloc ? off_rngalloc->first : 0;
    1393            0 :               bool range_allocated
    1394            0 :                 = off_rngalloc ? off_rngalloc->second : true;
    1395            0 :               bool has_gang_private_write
    1396            0 :                 = bitmap_bit_p (writes_gang_private, block->index);
    1397            0 :               worker_single_copy (block, block, &def_escapes_block,
    1398              :                                   &worker_partitioned_uses, record_type,
    1399              :                                   record_field_map,
    1400            0 :                                   offset, !range_allocated,
    1401              :                                   has_gang_private_write);
    1402              :             }
    1403              :           else
    1404            0 :             worker_single_simple (block, block, &def_escapes_block);
    1405            0 :         }
    1406              :     }
    1407              : 
    1408            0 :   if ((outer_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
    1409              :     {
    1410              :       basic_block block;
    1411              : 
    1412            0 :       for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
    1413            0 :         for (gimple_stmt_iterator gsi = gsi_start_bb (block);
    1414            0 :              !gsi_end_p (gsi);
    1415            0 :              gsi_next (&gsi))
    1416              :           {
    1417            0 :             gimple *stmt = gsi_stmt (gsi);
    1418              : 
    1419            0 :             if (gimple_code (stmt) == GIMPLE_CALL
    1420            0 :                 && !gimple_call_internal_p (stmt)
    1421            0 :                 && !omp_sese_active_worker_call (as_a <gcall *> (stmt)))
    1422              :               {
    1423              :                 /* If we have an OpenACC routine call in worker-single mode,
    1424              :                    place barriers before and afterwards to prevent
    1425              :                    clobbering re-used shared memory regions (as are used
    1426              :                    for AMDGCN at present, for example).  */
    1427            0 :                 tree decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
    1428            0 :                 gsi_insert_before (&gsi, gimple_build_call (decl, 0),
    1429              :                                    GSI_SAME_STMT);
    1430            0 :                 gsi_insert_after (&gsi, gimple_build_call (decl, 0),
    1431              :                                   GSI_NEW_STMT);
    1432              :               }
    1433              :           }
    1434              :     }
    1435              : 
    1436            0 :   if (par->inner)
    1437            0 :     neuter_worker_single (par->inner, mask, worker_single, vector_single,
    1438              :                           prop_set, partitioned_var_uses, record_field_map,
    1439              :                           blk_offset_map, writes_gang_private);
    1440            0 :   if (par->next)
    1441              :     neuter_worker_single (par->next, outer_mask, worker_single, vector_single,
    1442              :                           prop_set, partitioned_var_uses, record_field_map,
    1443              :                           blk_offset_map, writes_gang_private);
    1444            0 : }
    1445              : 
    1446              : static void
    1447            0 : dfs_broadcast_reachable_1 (basic_block bb, sbitmap reachable)
    1448              : {
    1449            0 :   if (bb->flags & BB_VISITED)
    1450              :     return;
    1451              : 
    1452            0 :   bb->flags |= BB_VISITED;
    1453              : 
    1454            0 :   if (bb->succs)
    1455              :     {
    1456            0 :       edge e;
    1457            0 :       edge_iterator ei;
    1458            0 :       FOR_EACH_EDGE (e, ei, bb->succs)
    1459              :         {
    1460            0 :           basic_block dest = e->dest;
    1461            0 :           if (dest->aux)
    1462            0 :             bitmap_set_bit (reachable, dest->index);
    1463              :           else
    1464            0 :             dfs_broadcast_reachable_1 (dest, reachable);
    1465              :         }
    1466              :     }
    1467              : }
    1468              : 
    1469              : typedef std::pair<int, tree> idx_decl_pair_t;
    1470              : 
    1471              : typedef auto_vec<splay_tree> used_range_vec_t;
    1472              : 
    1473              : static int
    1474            0 : sort_size_descending (const void *a, const void *b)
    1475              : {
    1476            0 :   const idx_decl_pair_t *pa = (const idx_decl_pair_t *) a;
    1477            0 :   const idx_decl_pair_t *pb = (const idx_decl_pair_t *) b;
    1478            0 :   unsigned HOST_WIDE_INT asize = tree_to_uhwi (TYPE_SIZE_UNIT (pa->second));
    1479            0 :   unsigned HOST_WIDE_INT bsize = tree_to_uhwi (TYPE_SIZE_UNIT (pb->second));
    1480            0 :   return bsize - asize;
    1481              : }
    1482              : 
    1483              : class addr_range
    1484              : {
    1485              : public:
    1486            0 :   addr_range (unsigned HOST_WIDE_INT addr_lo, unsigned HOST_WIDE_INT addr_hi)
    1487            0 :     : lo (addr_lo), hi (addr_hi)
    1488            0 :     { }
    1489            0 :   addr_range (const addr_range &ar) : lo (ar.lo), hi (ar.hi)
    1490              :     { }
    1491            0 :   addr_range () : lo (0), hi (0)
    1492            0 :     { }
    1493              : 
    1494            0 :   bool invalid () { return lo == 0 && hi == 0; }
    1495              : 
    1496              :   unsigned HOST_WIDE_INT lo;
    1497              :   unsigned HOST_WIDE_INT hi;
    1498              : };
    1499              : 
    1500              : static int
    1501            0 : splay_tree_compare_addr_range (splay_tree_key a, splay_tree_key b)
    1502              : {
    1503            0 :   addr_range *ar = (addr_range *) a;
    1504            0 :   addr_range *br = (addr_range *) b;
    1505            0 :   if (ar->lo == br->lo && ar->hi == br->hi)
    1506              :     return 0;
    1507            0 :   if (ar->hi <= br->lo)
    1508              :     return -1;
    1509            0 :   else if (ar->lo >= br->hi)
    1510            0 :     return 1;
    1511              :   return 0;
    1512              : }
    1513              : 
    1514              : static void
    1515            0 : splay_tree_free_key (splay_tree_key k)
    1516              : {
    1517            0 :   addr_range *ar = (addr_range *) k;
    1518            0 :   delete ar;
    1519            0 : }
    1520              : 
    1521              : static addr_range
    1522            0 : first_fit_range (splay_tree s, unsigned HOST_WIDE_INT size,
    1523              :                  unsigned HOST_WIDE_INT align, addr_range *bounds)
    1524              : {
    1525            0 :   splay_tree_node min = splay_tree_min (s);
    1526            0 :   if (min)
    1527              :     {
    1528              :       splay_tree_node next;
    1529            0 :       while ((next = splay_tree_successor (s, min->key)))
    1530              :         {
    1531            0 :           unsigned HOST_WIDE_INT lo = ((addr_range *) min->key)->hi;
    1532            0 :           unsigned HOST_WIDE_INT hi = ((addr_range *) next->key)->lo;
    1533            0 :           unsigned HOST_WIDE_INT base = (lo + align - 1) & ~(align - 1);
    1534            0 :           if (base + size <= hi)
    1535            0 :             return addr_range (base, base + size);
    1536              :           min = next;
    1537              :         }
    1538              : 
    1539            0 :       unsigned HOST_WIDE_INT base = ((addr_range *)min->key)->hi;
    1540            0 :       base = (base + align - 1) & ~(align - 1);
    1541            0 :       if (base + size <= bounds->hi)
    1542            0 :         return addr_range (base, base + size);
    1543              :       else
    1544            0 :         return addr_range ();
    1545              :     }
    1546              :   else
    1547              :     {
    1548            0 :       unsigned HOST_WIDE_INT lo = bounds->lo;
    1549            0 :       lo = (lo + align - 1) & ~(align - 1);
    1550            0 :       if (lo + size <= bounds->hi)
    1551            0 :         return addr_range (lo, lo + size);
    1552              :       else
    1553            0 :         return addr_range ();
    1554              :     }
    1555              : }
    1556              : 
    1557              : static int
    1558            0 : merge_ranges_1 (splay_tree_node n, void *ptr)
    1559              : {
    1560            0 :   splay_tree accum = (splay_tree) ptr;
    1561            0 :   addr_range ar = *(addr_range *) n->key;
    1562              : 
    1563            0 :   splay_tree_node old = splay_tree_lookup (accum, n->key);
    1564              : 
    1565              :   /* We might have an overlap.  Create a new range covering the
    1566              :      overlapping parts.  */
    1567            0 :   if (old)
    1568              :     {
    1569            0 :       addr_range *old_ar = (addr_range *) old->key;
    1570            0 :       ar.lo = MIN (old_ar->lo, ar.lo);
    1571            0 :       ar.hi = MAX (old_ar->hi, ar.hi);
    1572            0 :       splay_tree_remove (accum, old->key);
    1573              :     }
    1574              : 
    1575            0 :   addr_range *new_ar = new addr_range (ar);
    1576              : 
    1577            0 :   splay_tree_insert (accum, (splay_tree_key) new_ar, n->value);
    1578              : 
    1579            0 :   return 0;
    1580              : }
    1581              : 
    1582              : static void
    1583            0 : merge_ranges (splay_tree accum, splay_tree sp)
    1584              : {
    1585            0 :   splay_tree_foreach (sp, merge_ranges_1, (void *) accum);
    1586            0 : }
    1587              : 
    1588              : static void
    1589            0 : oacc_do_neutering (unsigned HOST_WIDE_INT bounds_lo,
    1590              :                    unsigned HOST_WIDE_INT bounds_hi)
    1591              : {
    1592            0 :   bb_stmt_map_t bb_stmt_map;
    1593            0 :   auto_bitmap worker_single, vector_single;
    1594              : 
    1595            0 :   omp_sese_split_blocks (&bb_stmt_map);
    1596              : 
    1597            0 :   if (dump_file)
    1598              :     {
    1599            0 :       fprintf (dump_file, "\n\nAfter splitting:\n\n");
    1600            0 :       dump_function_to_file (current_function_decl, dump_file, dump_flags);
    1601              :     }
    1602              : 
    1603            0 :   unsigned mask = 0;
    1604              : 
    1605              :   /* If this is a routine, calculate MASK as if the outer levels are already
    1606              :      partitioned.  */
    1607            0 :   {
    1608            0 :     tree attr = oacc_get_fn_attrib (current_function_decl);
    1609            0 :     tree dims = TREE_VALUE (attr);
    1610            0 :     unsigned ix;
    1611            0 :     for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
    1612              :       {
    1613            0 :         tree allowed = TREE_PURPOSE (dims);
    1614            0 :         if (allowed && integer_zerop (allowed))
    1615            0 :           mask |= GOMP_DIM_MASK (ix);
    1616              :       }
    1617              :   }
    1618              : 
    1619            0 :   parallel_g *par = omp_sese_discover_pars (&bb_stmt_map);
    1620            0 :   populate_single_mode_bitmaps (par, worker_single, vector_single, mask, 0);
    1621              : 
    1622            0 :   basic_block bb;
    1623            0 :   FOR_ALL_BB_FN (bb, cfun)
    1624            0 :     bb->aux = NULL;
    1625              : 
    1626            0 :   vec<propagation_set *> prop_set (vNULL);
    1627            0 :   prop_set.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
    1628              : 
    1629            0 :   find_ssa_names_to_propagate (par, mask, worker_single, vector_single,
    1630              :                                &prop_set);
    1631              : 
    1632            0 :   hash_set<tree> partitioned_var_uses;
    1633            0 :   hash_set<tree> gang_private_vars;
    1634            0 :   auto_bitmap writes_gang_private;
    1635              : 
    1636            0 :   find_gang_private_vars (&gang_private_vars);
    1637            0 :   find_partitioned_var_uses (par, mask, &partitioned_var_uses);
    1638            0 :   find_local_vars_to_propagate (par, mask, &partitioned_var_uses,
    1639              :                                 &gang_private_vars, writes_gang_private,
    1640              :                                 &prop_set);
    1641              : 
    1642            0 :   record_field_map_t record_field_map;
    1643              : 
    1644            0 :   FOR_ALL_BB_FN (bb, cfun)
    1645              :     {
    1646            0 :       propagation_set *ws_prop = prop_set[bb->index];
    1647            0 :       if (ws_prop)
    1648              :         {
    1649            0 :           tree record_type = lang_hooks.types.make_type (RECORD_TYPE);
    1650            0 :           tree name = create_tmp_var_name (".oacc_ws_data_s");
    1651            0 :           name = build_decl (UNKNOWN_LOCATION, TYPE_DECL, name, record_type);
    1652            0 :           DECL_ARTIFICIAL (name) = 1;
    1653            0 :           DECL_NAMELESS (name) = 1;
    1654            0 :           TYPE_NAME (record_type) = name;
    1655            0 :           TYPE_ARTIFICIAL (record_type) = 1;
    1656              : 
    1657            0 :           auto_vec<tree> field_vec (ws_prop->elements ());
    1658            0 :           for (hash_set<tree>::iterator it = ws_prop->begin ();
    1659            0 :                it != ws_prop->end (); ++it)
    1660            0 :             field_vec.quick_push (*it);
    1661              : 
    1662            0 :           field_vec.qsort (sort_by_size_then_ssa_version_or_uid);
    1663              : 
    1664            0 :           bool existed;
    1665            0 :           field_map_t *fields
    1666            0 :             = &record_field_map.get_or_insert (record_type, &existed);
    1667            0 :           gcc_checking_assert (!existed);
    1668              : 
    1669              :           /* Insert var fields in reverse order, so the last inserted element
    1670              :              is the first in the structure.  */
    1671            0 :           for (int i = field_vec.length () - 1; i >= 0; i--)
    1672            0 :             install_var_field (field_vec[i], record_type, fields);
    1673              : 
    1674            0 :           layout_type (record_type);
    1675              : 
    1676            0 :           bb->aux = (tree) record_type;
    1677            0 :         }
    1678              :     }
    1679              : 
    1680            0 :   sbitmap *reachable
    1681            0 :     = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
    1682            0 :                             last_basic_block_for_fn (cfun));
    1683              : 
    1684            0 :   bitmap_vector_clear (reachable, last_basic_block_for_fn (cfun));
    1685              : 
    1686            0 :   auto_vec<std::pair<int, tree> > priority;
    1687              : 
    1688            0 :   FOR_ALL_BB_FN (bb, cfun)
    1689              :     {
    1690            0 :       if (bb->aux)
    1691              :         {
    1692            0 :           tree record_type = (tree) bb->aux;
    1693              : 
    1694            0 :           basic_block bb2;
    1695            0 :           FOR_ALL_BB_FN (bb2, cfun)
    1696            0 :             bb2->flags &= ~BB_VISITED;
    1697              : 
    1698            0 :           priority.safe_push (std::make_pair (bb->index, record_type));
    1699            0 :           dfs_broadcast_reachable_1 (bb, reachable[bb->index]);
    1700              :         }
    1701              :     }
    1702              : 
    1703            0 :   sbitmap *inverted
    1704            0 :     = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
    1705            0 :                             last_basic_block_for_fn (cfun));
    1706              : 
    1707            0 :   bitmap_vector_clear (inverted, last_basic_block_for_fn (cfun));
    1708              : 
    1709            0 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1710              :     {
    1711            0 :       sbitmap_iterator bi;
    1712            0 :       unsigned int j;
    1713            0 :       EXECUTE_IF_SET_IN_BITMAP (reachable[i], 0, j, bi)
    1714            0 :         bitmap_set_bit (inverted[j], i);
    1715              :     }
    1716              : 
    1717            0 :   for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
    1718            0 :     bitmap_ior (reachable[i], reachable[i], inverted[i]);
    1719              : 
    1720            0 :   sbitmap_vector_free (inverted);
    1721              : 
    1722            0 :   used_range_vec_t used_ranges;
    1723              : 
    1724            0 :   used_ranges.safe_grow_cleared (last_basic_block_for_fn (cfun));
    1725              : 
    1726            0 :   blk_offset_map_t blk_offset_map;
    1727              : 
    1728            0 :   addr_range worker_shm_bounds (bounds_lo, bounds_hi);
    1729              : 
    1730            0 :   priority.qsort (sort_size_descending);
    1731            0 :   for (unsigned int i = 0; i < priority.length (); i++)
    1732              :     {
    1733            0 :       idx_decl_pair_t p = priority[i];
    1734            0 :       int blkno = p.first;
    1735            0 :       tree record_type = p.second;
    1736            0 :       HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (record_type));
    1737            0 :       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (record_type);
    1738              : 
    1739            0 :       splay_tree conflicts = splay_tree_new (splay_tree_compare_addr_range,
    1740              :                                              splay_tree_free_key, NULL);
    1741              : 
    1742            0 :       if (!used_ranges[blkno])
    1743            0 :         used_ranges[blkno] = splay_tree_new (splay_tree_compare_addr_range,
    1744              :                                              splay_tree_free_key, NULL);
    1745              :       else
    1746            0 :         merge_ranges (conflicts, used_ranges[blkno]);
    1747              : 
    1748            0 :       sbitmap_iterator bi;
    1749            0 :       unsigned int j;
    1750            0 :       EXECUTE_IF_SET_IN_BITMAP (reachable[blkno], 0, j, bi)
    1751            0 :         if (used_ranges[j])
    1752            0 :           merge_ranges (conflicts, used_ranges[j]);
    1753              : 
    1754            0 :       addr_range ar
    1755            0 :         = first_fit_range (conflicts, size, align, &worker_shm_bounds);
    1756              : 
    1757            0 :       splay_tree_delete (conflicts);
    1758              : 
    1759            0 :       if (ar.invalid ())
    1760              :         {
    1761            0 :           unsigned HOST_WIDE_INT base
    1762            0 :             = (bounds_lo + align - 1) & ~(align - 1);
    1763            0 :           if (base + size > bounds_hi)
    1764            0 :             error_at (UNKNOWN_LOCATION, "shared-memory region overflow");
    1765            0 :           std::pair<unsigned HOST_WIDE_INT, bool> base_inrng
    1766            0 :             = std::make_pair (base, false);
    1767            0 :           blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng);
    1768              :         }
    1769              :       else
    1770              :         {
    1771            0 :           splay_tree_node old = splay_tree_lookup (used_ranges[blkno],
    1772              :                                                    (splay_tree_key) &ar);
    1773            0 :           if (old)
    1774              :             {
    1775            0 :               fprintf (stderr, "trying to map [%d..%d] but [%d..%d] is "
    1776            0 :                        "already mapped in block %d\n", (int) ar.lo,
    1777            0 :                        (int) ar.hi, (int) ((addr_range *) old->key)->lo,
    1778            0 :                        (int) ((addr_range *) old->key)->hi, blkno);
    1779            0 :               abort ();
    1780              :             }
    1781              : 
    1782            0 :           addr_range *arp = new addr_range (ar);
    1783            0 :           splay_tree_insert (used_ranges[blkno], (splay_tree_key) arp,
    1784              :                              (splay_tree_value) blkno);
    1785            0 :           std::pair<unsigned HOST_WIDE_INT, bool> base_inrng
    1786            0 :             = std::make_pair (ar.lo, true);
    1787            0 :           blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng);
    1788              :         }
    1789              :     }
    1790              : 
    1791            0 :   sbitmap_vector_free (reachable);
    1792              : 
    1793            0 :   neuter_worker_single (par, mask, worker_single, vector_single, &prop_set,
    1794              :                         &partitioned_var_uses, &record_field_map,
    1795              :                         &blk_offset_map, writes_gang_private);
    1796              : 
    1797            0 :   record_field_map.empty ();
    1798              : 
    1799              :   /* These are supposed to have been 'delete'd by 'neuter_worker_single'.  */
    1800            0 :   for (auto it : prop_set)
    1801            0 :     gcc_checking_assert (!it);
    1802            0 :   prop_set.release ();
    1803              : 
    1804            0 :   delete par;
    1805              : 
    1806              :   /* This doesn't seem to make a difference.  */
    1807            0 :   loops_state_clear (LOOP_CLOSED_SSA);
    1808              : 
    1809              :   /* Neutering worker-single neutered blocks will invalidate dominance info.
    1810              :      It may be possible to incrementally update just the affected blocks, but
    1811              :      obliterate everything for now.  */
    1812            0 :   free_dominance_info (CDI_DOMINATORS);
    1813            0 :   free_dominance_info (CDI_POST_DOMINATORS);
    1814              : 
    1815            0 :   if (dump_file)
    1816              :     {
    1817            0 :       fprintf (dump_file, "\n\nAfter neutering:\n\n");
    1818            0 :       dump_function_to_file (current_function_decl, dump_file, dump_flags);
    1819              :     }
    1820            0 : }
    1821              : 
    1822              : static int
    1823            0 : execute_omp_oacc_neuter_broadcast ()
    1824              : {
    1825            0 :   unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX];
    1826            0 :   unsigned HOST_WIDE_INT private_size[GOMP_DIM_MAX];
    1827              : 
    1828            0 :   for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
    1829              :     {
    1830            0 :       reduction_size[i] = 0;
    1831            0 :       private_size[i] = 0;
    1832              :     }
    1833              : 
    1834              :   /* Calculate shared memory size required for reduction variables and
    1835              :      gang-private memory for this offloaded function.  */
    1836            0 :   basic_block bb;
    1837            0 :   FOR_ALL_BB_FN (bb, cfun)
    1838              :     {
    1839            0 :       for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
    1840            0 :            !gsi_end_p (gsi);
    1841            0 :            gsi_next (&gsi))
    1842              :         {
    1843            0 :           gimple *stmt = gsi_stmt (gsi);
    1844            0 :           if (!is_gimple_call (stmt))
    1845            0 :             continue;
    1846            0 :           gcall *call = as_a <gcall *> (stmt);
    1847            0 :           if (!gimple_call_internal_p (call))
    1848            0 :             continue;
    1849            0 :           enum internal_fn ifn_code = gimple_call_internal_fn (call);
    1850            0 :           switch (ifn_code)
    1851              :             {
    1852              :             default: break;
    1853            0 :             case IFN_GOACC_REDUCTION:
    1854            0 :               if (integer_minus_onep (gimple_call_arg (call, 3)))
    1855            0 :                 continue;
    1856              :               else
    1857              :                 {
    1858            0 :                   unsigned code = TREE_INT_CST_LOW (gimple_call_arg (call, 0));
    1859              :                   /* Only count reduction variables once: the choice to pick
    1860              :                      the setup call is fairly arbitrary.  */
    1861            0 :                   if (code == IFN_GOACC_REDUCTION_SETUP)
    1862              :                     {
    1863            0 :                       int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
    1864            0 :                       tree var = gimple_call_arg (call, 2);
    1865            0 :                       tree offset = gimple_call_arg (call, 5);
    1866            0 :                       tree var_type = TREE_TYPE (var);
    1867            0 :                       unsigned HOST_WIDE_INT limit
    1868            0 :                         = (tree_to_uhwi (offset)
    1869            0 :                            + tree_to_uhwi (TYPE_SIZE_UNIT (var_type)));
    1870            0 :                       reduction_size[level]
    1871            0 :                         = MAX (reduction_size[level], limit);
    1872              :                     }
    1873              :                 }
    1874              :               break;
    1875            0 :             case IFN_UNIQUE:
    1876            0 :               {
    1877            0 :                 enum ifn_unique_kind kind
    1878              :                   = ((enum ifn_unique_kind)
    1879            0 :                      TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
    1880              : 
    1881            0 :                 if (kind == IFN_UNIQUE_OACC_PRIVATE)
    1882              :                   {
    1883            0 :                     HOST_WIDE_INT level
    1884            0 :                       = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
    1885            0 :                     if (level == -1)
    1886              :                       break;
    1887            0 :                     for (unsigned i = 3;
    1888            0 :                          i < gimple_call_num_args (call);
    1889              :                          i++)
    1890              :                       {
    1891            0 :                         tree arg = gimple_call_arg (call, i);
    1892            0 :                         gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
    1893            0 :                         tree decl = TREE_OPERAND (arg, 0);
    1894            0 :                         unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (decl);
    1895            0 :                         private_size[level] = ((private_size[level] + align - 1)
    1896            0 :                                                & ~(align - 1));
    1897            0 :                         unsigned HOST_WIDE_INT decl_size
    1898            0 :                           = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (decl)));
    1899            0 :                         private_size[level] += decl_size;
    1900              :                       }
    1901              :                   }
    1902              :               }
    1903              :               break;
    1904              :             }
    1905              :         }
    1906              :     }
    1907              : 
    1908              :   int dims[GOMP_DIM_MAX];
    1909            0 :   for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
    1910            0 :     dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
    1911              : 
    1912              :   /* Find bounds of shared-memory buffer space we can use.  */
    1913            0 :   unsigned HOST_WIDE_INT bounds_lo = 0, bounds_hi = 0;
    1914            0 :   if (targetm.goacc.shared_mem_layout)
    1915            0 :     targetm.goacc.shared_mem_layout (&bounds_lo, &bounds_hi, dims,
    1916              :                                      private_size, reduction_size);
    1917              : 
    1918              :   /* Perform worker partitioning unless we know 'num_workers(1)'.  */
    1919            0 :   if (dims[GOMP_DIM_WORKER] != 1)
    1920            0 :     oacc_do_neutering (bounds_lo, bounds_hi);
    1921              : 
    1922            0 :   return 0;
    1923              : }
    1924              : 
    1925              : namespace {
    1926              : 
    1927              : const pass_data pass_data_omp_oacc_neuter_broadcast =
    1928              : {
    1929              :   GIMPLE_PASS, /* type */
    1930              :   "omp_oacc_neuter_broadcast", /* name */
    1931              :   OPTGROUP_OMP, /* optinfo_flags */
    1932              :   TV_NONE, /* tv_id */
    1933              :   PROP_cfg, /* properties_required */
    1934              :   0, /* properties_provided */
    1935              :   0, /* properties_destroyed */
    1936              :   0, /* todo_flags_start */
    1937              :   TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
    1938              : };
    1939              : 
    1940              : class pass_omp_oacc_neuter_broadcast : public gimple_opt_pass
    1941              : {
    1942              : public:
    1943       285722 :   pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
    1944       571444 :     : gimple_opt_pass (pass_data_omp_oacc_neuter_broadcast, ctxt)
    1945              :   {}
    1946              : 
    1947              :   /* opt_pass methods: */
    1948      1472258 :   bool gate (function *fun) final override
    1949              :   {
    1950      1472258 :     if (!flag_openacc)
    1951              :       return false;
    1952              : 
    1953        15205 :     if (!targetm.goacc.create_worker_broadcast_record)
    1954              :       return false;
    1955              : 
    1956              :     /* Only relevant for OpenACC offloaded functions.  */
    1957            0 :     tree attr = oacc_get_fn_attrib (fun->decl);
    1958            0 :     if (!attr)
    1959              :       return false;
    1960              : 
    1961              :     return true;
    1962              :   }
    1963              : 
    1964            0 :   unsigned int execute (function *) final override
    1965              :     {
    1966            0 :       return execute_omp_oacc_neuter_broadcast ();
    1967              :     }
    1968              : 
    1969              : }; // class pass_omp_oacc_neuter_broadcast
    1970              : 
    1971              : } // anon namespace
    1972              : 
    1973              : gimple_opt_pass *
    1974       285722 : make_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
    1975              : {
    1976       285722 :   return new pass_omp_oacc_neuter_broadcast (ctxt);
    1977              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.