Line data Source code
1 : /* OpenACC worker partitioning via middle end neutering/broadcasting scheme
2 :
3 : Copyright (C) 2015-2026 Free Software Foundation, Inc.
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it
8 : under the terms of the GNU General Public License as published
9 : by the Free Software Foundation; either version 3, or (at your
10 : option) any later version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT
13 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 : or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 : License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #include "system.h"
23 : #include "coretypes.h"
24 : #include "backend.h"
25 : #include "rtl.h"
26 : #include "tree.h"
27 : #include "gimple.h"
28 : #include "tree-pass.h"
29 : #include "ssa.h"
30 : #include "cgraph.h"
31 : #include "pretty-print.h"
32 : #include "fold-const.h"
33 : #include "gimplify.h"
34 : #include "gimple-iterator.h"
35 : #include "gimple-walk.h"
36 : #include "tree-inline.h"
37 : #include "langhooks.h"
38 : #include "omp-general.h"
39 : #include "omp-low.h"
40 : #include "gimple-pretty-print.h"
41 : #include "cfghooks.h"
42 : #include "insn-config.h"
43 : #include "recog.h"
44 : #include "internal-fn.h"
45 : #include "bitmap.h"
46 : #include "tree-nested.h"
47 : #include "stor-layout.h"
48 : #include "tree-ssa-threadupdate.h"
49 : #include "tree-into-ssa.h"
50 : #include "splay-tree.h"
51 : #include "target.h"
52 : #include "cfgloop.h"
53 : #include "tree-cfg.h"
54 : #include "omp-offload.h"
55 : #include "attribs.h"
56 : #include "targhooks.h"
57 : #include "diagnostic-core.h"
58 :
59 : /* Loop structure of the function. The entire function is described as
60 : a NULL loop. */
61 : /* Adapted from 'gcc/config/nvptx/nvptx.cc:struct parallel'. */
62 :
63 : struct parallel_g
64 : {
65 : /* Parent parallel. */
66 : parallel_g *parent;
67 :
68 : /* Next sibling parallel. */
69 : parallel_g *next;
70 :
71 : /* First child parallel. */
72 : parallel_g *inner;
73 :
74 : /* Partitioning mask of the parallel. */
75 : unsigned mask;
76 :
77 : /* Partitioning used within inner parallels. */
78 : unsigned inner_mask;
79 :
80 : /* Location of parallel forked and join. The forked is the first
81 : block in the parallel and the join is the first block after of
82 : the partition. */
83 : basic_block forked_block;
84 : basic_block join_block;
85 :
86 : gimple *forked_stmt;
87 : gimple *join_stmt;
88 :
89 : gimple *fork_stmt;
90 : gimple *joining_stmt;
91 :
92 : /* Basic blocks in this parallel, but not in child parallels. The
93 : FORKED and JOINING blocks are in the partition. The FORK and JOIN
94 : blocks are not. */
95 : auto_vec<basic_block> blocks;
96 :
97 : tree record_type;
98 : tree sender_decl;
99 : tree receiver_decl;
100 :
101 : public:
102 : parallel_g (parallel_g *parent, unsigned mode);
103 : ~parallel_g ();
104 : };
105 :
106 : /* Constructor links the new parallel into it's parent's chain of
107 : children. */
108 :
109 0 : parallel_g::parallel_g (parallel_g *parent_, unsigned mask_)
110 0 : :parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0)
111 : {
112 0 : forked_block = join_block = 0;
113 0 : forked_stmt = join_stmt = NULL;
114 0 : fork_stmt = joining_stmt = NULL;
115 :
116 0 : record_type = NULL_TREE;
117 0 : sender_decl = NULL_TREE;
118 0 : receiver_decl = NULL_TREE;
119 :
120 0 : if (parent)
121 : {
122 0 : next = parent->inner;
123 0 : parent->inner = this;
124 : }
125 0 : }
126 :
127 0 : parallel_g::~parallel_g ()
128 : {
129 0 : delete inner;
130 0 : delete next;
131 0 : }
132 :
133 : static bool
134 0 : local_var_based_p (tree decl)
135 : {
136 0 : switch (TREE_CODE (decl))
137 : {
138 0 : case VAR_DECL:
139 0 : return !is_global_var (decl);
140 :
141 0 : case COMPONENT_REF:
142 0 : case BIT_FIELD_REF:
143 0 : case ARRAY_REF:
144 0 : return local_var_based_p (TREE_OPERAND (decl, 0));
145 :
146 : default:
147 : return false;
148 : }
149 : }
150 :
151 : /* Map of basic blocks to gimple stmts. */
152 : typedef hash_map<basic_block, gimple *> bb_stmt_map_t;
153 :
154 : /* Calls to OpenACC routines are made by all workers/wavefronts/warps, since
155 : the routine likely contains partitioned loops (else will do its own
156 : neutering and variable propagation). Return TRUE if a function call CALL
157 : should be made in (worker) single mode instead, rather than redundant
158 : mode. */
159 :
160 : static bool
161 0 : omp_sese_active_worker_call (gcall *call)
162 : {
163 : #define GOMP_DIM_SEQ GOMP_DIM_MAX
164 0 : tree fndecl = gimple_call_fndecl (call);
165 :
166 0 : if (!fndecl)
167 : return true;
168 :
169 0 : tree attrs = oacc_get_fn_attrib (fndecl);
170 :
171 0 : if (!attrs)
172 : return true;
173 :
174 0 : int level = oacc_fn_attrib_level (attrs);
175 :
176 : /* Neither regular functions nor "seq" routines should be run by all threads
177 : in worker-single mode. */
178 0 : return level == -1 || level == GOMP_DIM_SEQ;
179 : #undef GOMP_DIM_SEQ
180 : }
181 :
182 : /* Split basic blocks such that each forked and join unspecs are at
183 : the start of their basic blocks. Thus afterwards each block will
184 : have a single partitioning mode. We also do the same for return
185 : insns, as they are executed by every thread. Return the
186 : partitioning mode of the function as a whole. Populate MAP with
187 : head and tail blocks. We also clear the BB visited flag, which is
188 : used when finding partitions. */
189 : /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_split_blocks'. */
190 :
191 : static void
192 0 : omp_sese_split_blocks (bb_stmt_map_t *map)
193 : {
194 0 : auto_vec<gimple *> worklist;
195 0 : basic_block block;
196 :
197 : /* Locate all the reorg instructions of interest. */
198 0 : FOR_ALL_BB_FN (block, cfun)
199 : {
200 : /* Clear visited flag, for use by parallel locator */
201 0 : block->flags &= ~BB_VISITED;
202 :
203 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (block);
204 0 : !gsi_end_p (gsi);
205 0 : gsi_next (&gsi))
206 : {
207 0 : gimple *stmt = gsi_stmt (gsi);
208 :
209 0 : if (gimple_call_internal_p (stmt, IFN_UNIQUE))
210 : {
211 0 : enum ifn_unique_kind k = ((enum ifn_unique_kind)
212 0 : TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
213 :
214 0 : if (k == IFN_UNIQUE_OACC_JOIN)
215 0 : worklist.safe_push (stmt);
216 0 : else if (k == IFN_UNIQUE_OACC_FORK)
217 : {
218 0 : gcc_assert (gsi_one_before_end_p (gsi));
219 0 : basic_block forked_block = single_succ (block);
220 0 : gimple_stmt_iterator gsi2 = gsi_start_bb (forked_block);
221 :
222 : /* We push a NOP as a placeholder for the "forked" stmt.
223 : This is then recognized in omp_sese_find_par. */
224 0 : gimple *nop = gimple_build_nop ();
225 0 : gsi_insert_before (&gsi2, nop, GSI_SAME_STMT);
226 :
227 0 : worklist.safe_push (nop);
228 : }
229 : }
230 0 : else if (gimple_code (stmt) == GIMPLE_RETURN
231 0 : || gimple_code (stmt) == GIMPLE_COND
232 0 : || gimple_code (stmt) == GIMPLE_SWITCH
233 0 : || (gimple_code (stmt) == GIMPLE_CALL
234 0 : && !gimple_call_internal_p (stmt)
235 0 : && !omp_sese_active_worker_call (as_a <gcall *> (stmt))))
236 0 : worklist.safe_push (stmt);
237 0 : else if (is_gimple_assign (stmt))
238 : {
239 0 : tree lhs = gimple_assign_lhs (stmt);
240 :
241 : /* Force assignments to components/fields/elements of local
242 : aggregates into fully-partitioned (redundant) mode. This
243 : avoids having to broadcast the whole aggregate. The RHS of
244 : the assignment will be propagated using the normal
245 : mechanism. */
246 :
247 0 : switch (TREE_CODE (lhs))
248 : {
249 0 : case COMPONENT_REF:
250 0 : case BIT_FIELD_REF:
251 0 : case ARRAY_REF:
252 0 : {
253 0 : tree aggr = TREE_OPERAND (lhs, 0);
254 :
255 0 : if (local_var_based_p (aggr))
256 0 : worklist.safe_push (stmt);
257 : }
258 : break;
259 :
260 : default:
261 : ;
262 : }
263 : }
264 : }
265 : }
266 :
267 : /* Split blocks on the worklist. */
268 : unsigned ix;
269 : gimple *stmt;
270 :
271 0 : for (ix = 0; worklist.iterate (ix, &stmt); ix++)
272 : {
273 0 : basic_block block = gimple_bb (stmt);
274 :
275 0 : if (gimple_code (stmt) == GIMPLE_COND)
276 : {
277 0 : gcond *orig_cond = as_a <gcond *> (stmt);
278 0 : tree_code code = gimple_expr_code (orig_cond);
279 0 : tree pred = make_ssa_name (boolean_type_node);
280 0 : gimple *asgn = gimple_build_assign (pred, code,
281 : gimple_cond_lhs (orig_cond),
282 : gimple_cond_rhs (orig_cond));
283 0 : gcond *new_cond
284 0 : = gimple_build_cond (NE_EXPR, pred, boolean_false_node,
285 : gimple_cond_true_label (orig_cond),
286 : gimple_cond_false_label (orig_cond));
287 :
288 0 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
289 0 : gsi_insert_before (&gsi, asgn, GSI_SAME_STMT);
290 0 : gsi_replace (&gsi, new_cond, true);
291 :
292 0 : edge e = split_block (block, asgn);
293 0 : block = e->dest;
294 0 : map->get_or_insert (block) = new_cond;
295 : }
296 0 : else if ((gimple_code (stmt) == GIMPLE_CALL
297 0 : && !gimple_call_internal_p (stmt))
298 0 : || is_gimple_assign (stmt))
299 : {
300 0 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
301 0 : gsi_prev (&gsi);
302 :
303 0 : edge call = split_block (block, gsi_stmt (gsi));
304 :
305 0 : gimple *call_stmt = gsi_stmt (gsi_start_bb (call->dest));
306 :
307 0 : edge call_to_ret = split_block (call->dest, call_stmt);
308 :
309 0 : map->get_or_insert (call_to_ret->src) = call_stmt;
310 : }
311 : else
312 : {
313 0 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
314 0 : gsi_prev (&gsi);
315 :
316 0 : if (gsi_end_p (gsi))
317 0 : map->get_or_insert (block) = stmt;
318 : else
319 : {
320 : /* Split block before insn. The insn is in the new block. */
321 0 : edge e = split_block (block, gsi_stmt (gsi));
322 :
323 0 : block = e->dest;
324 0 : map->get_or_insert (block) = stmt;
325 : }
326 : }
327 : }
328 0 : }
329 :
330 : static const char *
331 0 : mask_name (unsigned mask)
332 : {
333 0 : switch (mask)
334 : {
335 : case 0: return "gang redundant";
336 0 : case 1: return "gang partitioned";
337 0 : case 2: return "worker partitioned";
338 0 : case 3: return "gang+worker partitioned";
339 0 : case 4: return "vector partitioned";
340 0 : case 5: return "gang+vector partitioned";
341 0 : case 6: return "worker+vector partitioned";
342 0 : case 7: return "fully partitioned";
343 0 : default: return "<illegal>";
344 : }
345 : }
346 :
347 : /* Dump this parallel and all its inner parallels. */
348 : /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_dump_pars'. */
349 :
350 : static void
351 0 : omp_sese_dump_pars (parallel_g *par, unsigned depth)
352 : {
353 0 : fprintf (dump_file, "%u: mask %d (%s) head=%d, tail=%d\n",
354 : depth, par->mask, mask_name (par->mask),
355 0 : par->forked_block ? par->forked_block->index : -1,
356 0 : par->join_block ? par->join_block->index : -1);
357 :
358 0 : fprintf (dump_file, " blocks:");
359 :
360 0 : basic_block block;
361 0 : for (unsigned ix = 0; par->blocks.iterate (ix, &block); ix++)
362 0 : fprintf (dump_file, " %d", block->index);
363 0 : fprintf (dump_file, "\n");
364 0 : if (par->inner)
365 0 : omp_sese_dump_pars (par->inner, depth + 1);
366 :
367 0 : if (par->next)
368 : omp_sese_dump_pars (par->next, depth);
369 0 : }
370 :
371 : /* If BLOCK contains a fork/join marker, process it to create or
372 : terminate a loop structure. Add this block to the current loop,
373 : and then walk successor blocks. */
374 : /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_find_par'. */
375 :
376 : static parallel_g *
377 0 : omp_sese_find_par (bb_stmt_map_t *map, parallel_g *par, basic_block block)
378 : {
379 0 : if (block->flags & BB_VISITED)
380 : return par;
381 0 : block->flags |= BB_VISITED;
382 :
383 0 : if (gimple **stmtp = map->get (block))
384 : {
385 0 : gimple *stmt = *stmtp;
386 :
387 0 : if (gimple_code (stmt) == GIMPLE_COND
388 0 : || gimple_code (stmt) == GIMPLE_SWITCH
389 0 : || gimple_code (stmt) == GIMPLE_RETURN
390 0 : || (gimple_code (stmt) == GIMPLE_CALL
391 0 : && !gimple_call_internal_p (stmt))
392 0 : || is_gimple_assign (stmt))
393 : {
394 : /* A single block that is forced to be at the maximum partition
395 : level. Make a singleton par for it. */
396 0 : par = new parallel_g (par, GOMP_DIM_MASK (GOMP_DIM_GANG)
397 : | GOMP_DIM_MASK (GOMP_DIM_WORKER)
398 0 : | GOMP_DIM_MASK (GOMP_DIM_VECTOR));
399 0 : par->forked_block = block;
400 0 : par->forked_stmt = stmt;
401 0 : par->blocks.safe_push (block);
402 0 : par = par->parent;
403 0 : goto walk_successors;
404 : }
405 0 : else if (gimple_nop_p (stmt))
406 : {
407 0 : basic_block pred = single_pred (block);
408 0 : gcc_assert (pred);
409 0 : gimple_stmt_iterator gsi = gsi_last_bb (pred);
410 0 : gimple *final_stmt = gsi_stmt (gsi);
411 :
412 0 : if (gimple_call_internal_p (final_stmt, IFN_UNIQUE))
413 : {
414 0 : gcall *call = as_a <gcall *> (final_stmt);
415 0 : enum ifn_unique_kind k = ((enum ifn_unique_kind)
416 0 : TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
417 :
418 0 : if (k == IFN_UNIQUE_OACC_FORK)
419 : {
420 0 : HOST_WIDE_INT dim
421 0 : = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
422 0 : unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
423 :
424 0 : par = new parallel_g (par, mask);
425 0 : par->forked_block = block;
426 0 : par->forked_stmt = final_stmt;
427 0 : par->fork_stmt = stmt;
428 : }
429 : else
430 0 : gcc_unreachable ();
431 : }
432 : else
433 0 : gcc_unreachable ();
434 : }
435 0 : else if (gimple_call_internal_p (stmt, IFN_UNIQUE))
436 : {
437 0 : gcall *call = as_a <gcall *> (stmt);
438 0 : enum ifn_unique_kind k = ((enum ifn_unique_kind)
439 0 : TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
440 0 : if (k == IFN_UNIQUE_OACC_JOIN)
441 : {
442 0 : HOST_WIDE_INT dim = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
443 0 : unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0;
444 :
445 0 : gcc_assert (par->mask == mask);
446 0 : par->join_block = block;
447 0 : par->join_stmt = stmt;
448 0 : par = par->parent;
449 : }
450 : else
451 0 : gcc_unreachable ();
452 : }
453 : else
454 0 : gcc_unreachable ();
455 : }
456 :
457 0 : if (par)
458 : /* Add this block onto the current loop's list of blocks. */
459 0 : par->blocks.safe_push (block);
460 : else
461 : /* This must be the entry block. Create a NULL parallel. */
462 0 : par = new parallel_g (0, 0);
463 :
464 0 : walk_successors:
465 : /* Walk successor blocks. */
466 0 : edge e;
467 0 : edge_iterator ei;
468 :
469 0 : FOR_EACH_EDGE (e, ei, block->succs)
470 0 : omp_sese_find_par (map, par, e->dest);
471 :
472 : return par;
473 : }
474 :
475 : /* DFS walk the CFG looking for fork & join markers. Construct
476 : loop structures as we go. MAP is a mapping of basic blocks
477 : to head & tail markers, discovered when splitting blocks. This
478 : speeds up the discovery. We rely on the BB visited flag having
479 : been cleared when splitting blocks. */
480 : /* Adapted from 'gcc/config/nvptx/nvptx.cc:nvptx_discover_pars'. */
481 :
482 : static parallel_g *
483 0 : omp_sese_discover_pars (bb_stmt_map_t *map)
484 : {
485 0 : basic_block block;
486 :
487 : /* Mark exit blocks as visited. */
488 0 : block = EXIT_BLOCK_PTR_FOR_FN (cfun);
489 0 : block->flags |= BB_VISITED;
490 :
491 : /* And entry block as not. */
492 0 : block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
493 0 : block->flags &= ~BB_VISITED;
494 :
495 0 : parallel_g *par = omp_sese_find_par (map, 0, block);
496 :
497 0 : if (dump_file)
498 : {
499 0 : fprintf (dump_file, "\nLoops\n");
500 0 : omp_sese_dump_pars (par, 0);
501 0 : fprintf (dump_file, "\n");
502 : }
503 :
504 0 : return par;
505 : }
506 :
507 : static void
508 0 : populate_single_mode_bitmaps (parallel_g *par, bitmap worker_single,
509 : bitmap vector_single, unsigned outer_mask,
510 : int depth)
511 : {
512 0 : unsigned mask = outer_mask | par->mask;
513 :
514 0 : basic_block block;
515 :
516 0 : for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
517 : {
518 0 : if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
519 0 : bitmap_set_bit (worker_single, block->index);
520 :
521 0 : if ((mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)) == 0)
522 0 : bitmap_set_bit (vector_single, block->index);
523 : }
524 :
525 0 : if (par->inner)
526 0 : populate_single_mode_bitmaps (par->inner, worker_single, vector_single,
527 : mask, depth + 1);
528 0 : if (par->next)
529 : populate_single_mode_bitmaps (par->next, worker_single, vector_single,
530 : outer_mask, depth);
531 0 : }
532 :
533 : /* A map from SSA names or var decls to record fields. */
534 :
535 : typedef hash_map<tree, tree> field_map_t;
536 :
537 : /* For each propagation record type, this is a map from SSA names or var decls
538 : to propagate, to the field in the record type that should be used for
539 : transmission and reception. */
540 :
541 : typedef hash_map<tree, field_map_t> record_field_map_t;
542 :
543 : static void
544 0 : install_var_field (tree var, tree record_type, field_map_t *fields)
545 : {
546 0 : tree name;
547 0 : char tmp[20];
548 :
549 0 : if (TREE_CODE (var) == SSA_NAME)
550 : {
551 0 : name = SSA_NAME_IDENTIFIER (var);
552 0 : if (!name)
553 : {
554 0 : sprintf (tmp, "_%u", (unsigned) SSA_NAME_VERSION (var));
555 0 : name = get_identifier (tmp);
556 : }
557 : }
558 0 : else if (VAR_P (var))
559 : {
560 0 : name = DECL_NAME (var);
561 0 : if (!name)
562 : {
563 0 : sprintf (tmp, "D_%u", (unsigned) DECL_UID (var));
564 0 : name = get_identifier (tmp);
565 : }
566 : }
567 : else
568 0 : gcc_unreachable ();
569 :
570 0 : gcc_assert (!fields->get (var));
571 :
572 0 : tree type = TREE_TYPE (var);
573 :
574 0 : if (POINTER_TYPE_P (type)
575 0 : && TYPE_RESTRICT (type))
576 0 : type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT);
577 :
578 0 : tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, name, type);
579 :
580 0 : if (VAR_P (var) && type == TREE_TYPE (var))
581 : {
582 0 : SET_DECL_ALIGN (field, DECL_ALIGN (var));
583 0 : DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
584 0 : TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
585 : }
586 : else
587 0 : SET_DECL_ALIGN (field, TYPE_ALIGN (type));
588 :
589 0 : fields->put (var, field);
590 :
591 0 : insert_field_into_struct (record_type, field);
592 0 : }
593 :
594 : /* Sets of SSA_NAMES or VAR_DECLs to propagate. */
595 : typedef hash_set<tree> propagation_set;
596 :
597 : static void
598 0 : find_ssa_names_to_propagate (parallel_g *par, unsigned outer_mask,
599 : bitmap worker_single, bitmap vector_single,
600 : vec<propagation_set *> *prop_set)
601 : {
602 0 : unsigned mask = outer_mask | par->mask;
603 :
604 0 : if (par->inner)
605 0 : find_ssa_names_to_propagate (par->inner, mask, worker_single,
606 : vector_single, prop_set);
607 0 : if (par->next)
608 0 : find_ssa_names_to_propagate (par->next, outer_mask, worker_single,
609 : vector_single, prop_set);
610 :
611 0 : if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
612 : {
613 : basic_block block;
614 : int ix;
615 :
616 0 : for (ix = 0; par->blocks.iterate (ix, &block); ix++)
617 : {
618 0 : for (gphi_iterator psi = gsi_start_phis (block);
619 0 : !gsi_end_p (psi); gsi_next (&psi))
620 : {
621 0 : gphi *phi = psi.phi ();
622 0 : use_operand_p use;
623 0 : ssa_op_iter iter;
624 :
625 0 : FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE)
626 : {
627 0 : tree var = USE_FROM_PTR (use);
628 :
629 0 : if (TREE_CODE (var) != SSA_NAME)
630 0 : continue;
631 :
632 0 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
633 :
634 0 : if (gimple_nop_p (def_stmt))
635 0 : continue;
636 :
637 0 : basic_block def_bb = gimple_bb (def_stmt);
638 :
639 0 : if (bitmap_bit_p (worker_single, def_bb->index))
640 : {
641 0 : if (!(*prop_set)[def_bb->index])
642 0 : (*prop_set)[def_bb->index] = new propagation_set;
643 :
644 0 : propagation_set *ws_prop = (*prop_set)[def_bb->index];
645 :
646 0 : ws_prop->add (var);
647 : }
648 : }
649 : }
650 :
651 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (block);
652 0 : !gsi_end_p (gsi); gsi_next (&gsi))
653 : {
654 0 : use_operand_p use;
655 0 : ssa_op_iter iter;
656 0 : gimple *stmt = gsi_stmt (gsi);
657 :
658 0 : FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
659 : {
660 0 : tree var = USE_FROM_PTR (use);
661 :
662 0 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
663 :
664 0 : if (gimple_nop_p (def_stmt))
665 0 : continue;
666 :
667 0 : basic_block def_bb = gimple_bb (def_stmt);
668 :
669 0 : if (bitmap_bit_p (worker_single, def_bb->index))
670 : {
671 0 : if (!(*prop_set)[def_bb->index])
672 0 : (*prop_set)[def_bb->index] = new propagation_set;
673 :
674 0 : propagation_set *ws_prop = (*prop_set)[def_bb->index];
675 :
676 0 : ws_prop->add (var);
677 : }
678 : }
679 : }
680 : }
681 : }
682 0 : }
683 :
684 : /* Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a
685 : statement. */
686 :
687 : static tree
688 0 : find_partitioned_var_uses_1 (tree *node, int *, void *data)
689 : {
690 0 : walk_stmt_info *wi = (walk_stmt_info *) data;
691 0 : hash_set<tree> *partitioned_var_uses = (hash_set<tree> *) wi->info;
692 :
693 0 : if (!wi->is_lhs && VAR_P (*node))
694 0 : partitioned_var_uses->add (*node);
695 :
696 0 : return NULL_TREE;
697 : }
698 :
699 : static void
700 0 : find_partitioned_var_uses (parallel_g *par, unsigned outer_mask,
701 : hash_set<tree> *partitioned_var_uses)
702 : {
703 0 : unsigned mask = outer_mask | par->mask;
704 :
705 0 : if (par->inner)
706 0 : find_partitioned_var_uses (par->inner, mask, partitioned_var_uses);
707 0 : if (par->next)
708 0 : find_partitioned_var_uses (par->next, outer_mask, partitioned_var_uses);
709 :
710 0 : if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
711 : {
712 : basic_block block;
713 : int ix;
714 :
715 0 : for (ix = 0; par->blocks.iterate (ix, &block); ix++)
716 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (block);
717 0 : !gsi_end_p (gsi); gsi_next (&gsi))
718 : {
719 0 : walk_stmt_info wi;
720 0 : memset (&wi, 0, sizeof (wi));
721 0 : wi.info = (void *) partitioned_var_uses;
722 0 : walk_gimple_stmt (&gsi, NULL, find_partitioned_var_uses_1, &wi);
723 : }
724 : }
725 0 : }
726 :
727 : /* Gang-private variables (typically placed in a GPU's shared memory) do not
728 : need to be processed by the worker-propagation mechanism. Populate the
729 : GANG_PRIVATE_VARS set with any such variables found in the current
730 : function. */
731 :
732 : static void
733 0 : find_gang_private_vars (hash_set<tree> *gang_private_vars)
734 : {
735 0 : basic_block block;
736 :
737 0 : FOR_EACH_BB_FN (block, cfun)
738 : {
739 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (block);
740 0 : !gsi_end_p (gsi);
741 0 : gsi_next (&gsi))
742 : {
743 0 : gimple *stmt = gsi_stmt (gsi);
744 :
745 0 : if (gimple_call_internal_p (stmt, IFN_UNIQUE))
746 : {
747 0 : enum ifn_unique_kind k = ((enum ifn_unique_kind)
748 0 : TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
749 0 : if (k == IFN_UNIQUE_OACC_PRIVATE)
750 : {
751 0 : HOST_WIDE_INT level
752 0 : = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2));
753 0 : if (level != GOMP_DIM_GANG)
754 0 : continue;
755 0 : for (unsigned i = 3; i < gimple_call_num_args (stmt); i++)
756 : {
757 0 : tree arg = gimple_call_arg (stmt, i);
758 0 : gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
759 0 : tree decl = TREE_OPERAND (arg, 0);
760 0 : gang_private_vars->add (decl);
761 : }
762 : }
763 : }
764 : }
765 : }
766 0 : }
767 :
768 : static void
769 0 : find_local_vars_to_propagate (parallel_g *par, unsigned outer_mask,
770 : hash_set<tree> *partitioned_var_uses,
771 : hash_set<tree> *gang_private_vars,
772 : bitmap writes_gang_private,
773 : vec<propagation_set *> *prop_set)
774 : {
775 0 : unsigned mask = outer_mask | par->mask;
776 :
777 0 : if (par->inner)
778 0 : find_local_vars_to_propagate (par->inner, mask, partitioned_var_uses,
779 : gang_private_vars, writes_gang_private,
780 : prop_set);
781 0 : if (par->next)
782 0 : find_local_vars_to_propagate (par->next, outer_mask, partitioned_var_uses,
783 : gang_private_vars, writes_gang_private,
784 : prop_set);
785 :
786 0 : if (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)))
787 : {
788 : basic_block block;
789 : int ix;
790 :
791 0 : for (ix = 0; par->blocks.iterate (ix, &block); ix++)
792 : {
793 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (block);
794 0 : !gsi_end_p (gsi); gsi_next (&gsi))
795 : {
796 0 : gimple *stmt = gsi_stmt (gsi);
797 0 : tree var;
798 0 : unsigned i;
799 :
800 0 : FOR_EACH_LOCAL_DECL (cfun, i, var)
801 : {
802 0 : if (!VAR_P (var)
803 0 : || is_global_var (var)
804 0 : || AGGREGATE_TYPE_P (TREE_TYPE (var))
805 0 : || !partitioned_var_uses->contains (var))
806 0 : continue;
807 :
808 0 : if (stmt_may_clobber_ref_p (stmt, var))
809 : {
810 0 : if (dump_file)
811 : {
812 0 : fprintf (dump_file, "bb %u: local variable may be "
813 : "clobbered in %s mode: ", block->index,
814 : mask_name (mask));
815 0 : print_generic_expr (dump_file, var, TDF_SLIM);
816 0 : fprintf (dump_file, "\n");
817 : }
818 :
819 0 : if (gang_private_vars->contains (var))
820 : {
821 : /* If we write a gang-private variable, we want a
822 : barrier at the end of the block. */
823 0 : bitmap_set_bit (writes_gang_private, block->index);
824 0 : continue;
825 : }
826 :
827 0 : if (!(*prop_set)[block->index])
828 0 : (*prop_set)[block->index] = new propagation_set;
829 :
830 0 : propagation_set *ws_prop
831 0 : = (*prop_set)[block->index];
832 :
833 0 : ws_prop->add (var);
834 : }
835 : }
836 : }
837 : }
838 : }
839 0 : }
840 :
841 : /* Transform basic blocks FROM, TO (which may be the same block) into:
842 : if (GOACC_single_start ())
843 : BLOCK;
844 : GOACC_barrier ();
845 : \ | /
846 : +----+
847 : | | (new) predicate block
848 : +----+--
849 : \ | / \ | / |t \
850 : +----+ +----+ +----+ |
851 : | | | | ===> | | | f (old) from block
852 : +----+ +----+ +----+ |
853 : | t/ \f | /
854 : +----+/
855 : (split (split before | | skip block
856 : at end) condition) +----+
857 : t/ \f
858 : */
859 :
860 : static void
861 0 : worker_single_simple (basic_block from, basic_block to,
862 : hash_set<tree> *def_escapes_block)
863 : {
864 0 : gimple *call, *cond;
865 0 : tree lhs, decl;
866 0 : basic_block skip_block;
867 :
868 0 : gimple_stmt_iterator gsi = gsi_last_bb (to);
869 0 : if (EDGE_COUNT (to->succs) > 1)
870 : {
871 0 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND);
872 0 : gsi_prev (&gsi);
873 : }
874 0 : edge e = split_block (to, gsi_stmt (gsi));
875 0 : skip_block = e->dest;
876 :
877 0 : gimple_stmt_iterator start = gsi_after_labels (from);
878 :
879 0 : decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_START);
880 0 : lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
881 0 : call = gimple_build_call (decl, 0);
882 0 : gimple_call_set_lhs (call, lhs);
883 0 : gsi_insert_before (&start, call, GSI_NEW_STMT);
884 0 : update_stmt (call);
885 :
886 0 : cond = gimple_build_cond (EQ_EXPR, lhs,
887 : fold_convert_loc (UNKNOWN_LOCATION,
888 0 : TREE_TYPE (lhs),
889 : boolean_true_node),
890 : NULL_TREE, NULL_TREE);
891 0 : gsi_insert_after (&start, cond, GSI_NEW_STMT);
892 0 : update_stmt (cond);
893 :
894 0 : edge et = split_block (from, cond);
895 0 : et->flags &= ~EDGE_FALLTHRU;
896 0 : et->flags |= EDGE_TRUE_VALUE;
897 : /* Make the active worker the more probable path so we prefer fallthrough
898 : (letting the idle workers jump around more). */
899 0 : et->probability = profile_probability::likely ();
900 :
901 0 : edge ef = make_edge (from, skip_block, EDGE_FALSE_VALUE);
902 0 : ef->probability = et->probability.invert ();
903 :
904 0 : basic_block neutered = split_edge (ef);
905 0 : gimple_stmt_iterator neut_gsi = gsi_last_bb (neutered);
906 :
907 0 : for (gsi = gsi_start_bb (et->dest); !gsi_end_p (gsi); gsi_next (&gsi))
908 : {
909 0 : gimple *stmt = gsi_stmt (gsi);
910 0 : ssa_op_iter iter;
911 0 : tree var;
912 :
913 0 : FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF)
914 : {
915 0 : if (def_escapes_block->contains (var))
916 : {
917 0 : gphi *join_phi = create_phi_node (NULL_TREE, skip_block);
918 0 : create_new_def_for (var, join_phi,
919 : gimple_phi_result_ptr (join_phi));
920 0 : add_phi_arg (join_phi, var, e, UNKNOWN_LOCATION);
921 :
922 0 : tree neutered_def = copy_ssa_name (var, NULL);
923 : /* We really want "don't care" or some value representing
924 : undefined here, but optimizers will probably get rid of the
925 : zero-assignments anyway. */
926 0 : gassign *zero = gimple_build_assign (neutered_def,
927 0 : build_zero_cst (TREE_TYPE (neutered_def)));
928 :
929 0 : gsi_insert_after (&neut_gsi, zero, GSI_CONTINUE_LINKING);
930 0 : update_stmt (zero);
931 :
932 0 : add_phi_arg (join_phi, neutered_def, single_succ_edge (neutered),
933 : UNKNOWN_LOCATION);
934 0 : update_stmt (join_phi);
935 : }
936 : }
937 : }
938 0 : }
939 :
940 : static tree
941 0 : build_receiver_ref (tree var, tree receiver_decl, field_map_t *fields)
942 : {
943 0 : tree x = build_simple_mem_ref (receiver_decl);
944 0 : tree field = *fields->get (var);
945 0 : TREE_THIS_NOTRAP (x) = 1;
946 0 : x = omp_build_component_ref (x, field);
947 0 : return x;
948 : }
949 :
950 : static tree
951 0 : build_sender_ref (tree var, tree sender_decl, field_map_t *fields)
952 : {
953 0 : if (POINTER_TYPE_P (TREE_TYPE (sender_decl)))
954 0 : sender_decl = build_simple_mem_ref (sender_decl);
955 0 : tree field = *fields->get (var);
956 0 : return omp_build_component_ref (sender_decl, field);
957 : }
958 :
959 : static int
960 0 : sort_by_ssa_version_or_uid (const void *p1, const void *p2)
961 : {
962 0 : const tree t1 = *(const tree *)p1;
963 0 : const tree t2 = *(const tree *)p2;
964 :
965 0 : if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) == SSA_NAME)
966 0 : return SSA_NAME_VERSION (t1) - SSA_NAME_VERSION (t2);
967 0 : else if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) != SSA_NAME)
968 : return -1;
969 0 : else if (TREE_CODE (t1) != SSA_NAME && TREE_CODE (t2) == SSA_NAME)
970 : return 1;
971 : else
972 0 : return DECL_UID (t1) - DECL_UID (t2);
973 : }
974 :
975 : static int
976 0 : sort_by_size_then_ssa_version_or_uid (const void *p1, const void *p2)
977 : {
978 0 : const tree t1 = *(const tree *)p1;
979 0 : const tree t2 = *(const tree *)p2;
980 0 : unsigned HOST_WIDE_INT s1 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t1)));
981 0 : unsigned HOST_WIDE_INT s2 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t2)));
982 0 : if (s1 != s2)
983 0 : return s2 - s1;
984 : else
985 0 : return sort_by_ssa_version_or_uid (p1, p2);
986 : }
987 :
988 : static void
989 0 : worker_single_copy (basic_block from, basic_block to,
990 : hash_set<tree> *def_escapes_block,
991 : hash_set<tree> *worker_partitioned_uses,
992 : tree record_type, record_field_map_t *record_field_map,
993 : unsigned HOST_WIDE_INT placement,
994 : bool isolate_broadcasts, bool has_gang_private_write)
995 : {
996 : /* If we only have virtual defs, we'll have no record type, but we still want
997 : to emit single_copy_start and (particularly) single_copy_end to act as
998 : a vdef source on the neutered edge representing memory writes on the
999 : non-neutered edge. */
1000 0 : if (!record_type)
1001 0 : record_type = char_type_node;
1002 :
1003 0 : tree sender_decl
1004 0 : = targetm.goacc.create_worker_broadcast_record (record_type, true,
1005 : ".oacc_worker_o",
1006 : placement);
1007 0 : tree receiver_decl
1008 0 : = targetm.goacc.create_worker_broadcast_record (record_type, false,
1009 : ".oacc_worker_i",
1010 : placement);
1011 :
1012 0 : gimple_stmt_iterator gsi = gsi_last_bb (to);
1013 0 : if (EDGE_COUNT (to->succs) > 1)
1014 0 : gsi_prev (&gsi);
1015 0 : edge e = split_block (to, gsi_stmt (gsi));
1016 0 : basic_block barrier_block = e->dest;
1017 :
1018 0 : gimple_stmt_iterator start = gsi_after_labels (from);
1019 :
1020 0 : tree decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_START);
1021 :
1022 0 : tree lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
1023 :
1024 0 : gimple *call
1025 0 : = gimple_build_call (decl, 1,
1026 0 : POINTER_TYPE_P (TREE_TYPE (sender_decl))
1027 0 : ? sender_decl : build_fold_addr_expr (sender_decl));
1028 0 : gimple_call_set_lhs (call, lhs);
1029 0 : gsi_insert_before (&start, call, GSI_NEW_STMT);
1030 0 : update_stmt (call);
1031 :
1032 : /* The shared-memory range for this block overflowed. Add a barrier before
1033 : the GOACC_single_copy_start call. */
1034 0 : if (isolate_broadcasts)
1035 : {
1036 0 : decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
1037 0 : gimple *acc_bar = gimple_build_call (decl, 0);
1038 0 : gsi_insert_before (&start, acc_bar, GSI_SAME_STMT);
1039 : }
1040 :
1041 0 : tree conv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
1042 :
1043 0 : gimple *conv = gimple_build_assign (conv_tmp,
1044 0 : fold_convert (TREE_TYPE (receiver_decl),
1045 : lhs));
1046 0 : update_stmt (conv);
1047 0 : gsi_insert_after (&start, conv, GSI_NEW_STMT);
1048 0 : gimple *asgn = gimple_build_assign (receiver_decl, conv_tmp);
1049 0 : gsi_insert_after (&start, asgn, GSI_NEW_STMT);
1050 0 : update_stmt (asgn);
1051 :
1052 0 : tree zero_ptr = build_int_cst (TREE_TYPE (receiver_decl), 0);
1053 :
1054 0 : tree recv_tmp = make_ssa_name (TREE_TYPE (receiver_decl));
1055 0 : asgn = gimple_build_assign (recv_tmp, receiver_decl);
1056 0 : gsi_insert_after (&start, asgn, GSI_NEW_STMT);
1057 0 : update_stmt (asgn);
1058 :
1059 0 : gimple *cond = gimple_build_cond (EQ_EXPR, recv_tmp, zero_ptr, NULL_TREE,
1060 : NULL_TREE);
1061 0 : update_stmt (cond);
1062 :
1063 0 : gsi_insert_after (&start, cond, GSI_NEW_STMT);
1064 :
1065 0 : edge et = split_block (from, cond);
1066 0 : et->flags &= ~EDGE_FALLTHRU;
1067 0 : et->flags |= EDGE_TRUE_VALUE;
1068 : /* Make the active worker the more probable path so we prefer fallthrough
1069 : (letting the idle workers jump around more). */
1070 0 : et->probability = profile_probability::likely ();
1071 :
1072 0 : basic_block body = et->dest;
1073 :
1074 0 : edge ef = make_edge (from, barrier_block, EDGE_FALSE_VALUE);
1075 0 : ef->probability = et->probability.invert ();
1076 :
1077 0 : gimple_stmt_iterator bar_gsi = gsi_start_bb (barrier_block);
1078 0 : cond = gimple_build_cond (NE_EXPR, recv_tmp, zero_ptr, NULL_TREE, NULL_TREE);
1079 :
1080 0 : if (record_type != char_type_node || has_gang_private_write)
1081 : {
1082 0 : decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
1083 0 : gimple *acc_bar = gimple_build_call (decl, 0);
1084 :
1085 0 : gsi_insert_before (&bar_gsi, acc_bar, GSI_NEW_STMT);
1086 0 : gsi_insert_after (&bar_gsi, cond, GSI_NEW_STMT);
1087 0 : }
1088 : else
1089 0 : gsi_insert_before (&bar_gsi, cond, GSI_NEW_STMT);
1090 :
1091 0 : edge et2 = split_block (barrier_block, cond);
1092 0 : et2->flags &= ~EDGE_FALLTHRU;
1093 0 : et2->flags |= EDGE_TRUE_VALUE;
1094 0 : et2->probability = profile_probability::unlikely ();
1095 :
1096 0 : basic_block exit_block = et2->dest;
1097 :
1098 0 : basic_block copyout_block = split_edge (et2);
1099 0 : edge ef2 = make_edge (barrier_block, exit_block, EDGE_FALSE_VALUE);
1100 0 : ef2->probability = et2->probability.invert ();
1101 :
1102 0 : gimple_stmt_iterator copyout_gsi = gsi_start_bb (copyout_block);
1103 :
1104 0 : edge copyout_to_exit = single_succ_edge (copyout_block);
1105 :
1106 0 : gimple_seq sender_seq = NULL;
1107 :
1108 : /* Make sure we iterate over definitions in a stable order. */
1109 0 : auto_vec<tree> escape_vec (def_escapes_block->elements ());
1110 0 : for (hash_set<tree>::iterator it = def_escapes_block->begin ();
1111 0 : it != def_escapes_block->end (); ++it)
1112 0 : escape_vec.quick_push (*it);
1113 0 : escape_vec.qsort (sort_by_ssa_version_or_uid);
1114 :
1115 0 : for (unsigned i = 0; i < escape_vec.length (); i++)
1116 : {
1117 0 : tree var = escape_vec[i];
1118 :
1119 0 : if (TREE_CODE (var) == SSA_NAME && SSA_NAME_IS_VIRTUAL_OPERAND (var))
1120 0 : continue;
1121 :
1122 0 : tree barrier_def = 0;
1123 :
1124 0 : if (TREE_CODE (var) == SSA_NAME)
1125 : {
1126 0 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
1127 :
1128 0 : if (gimple_nop_p (def_stmt))
1129 0 : continue;
1130 :
1131 : /* The barrier phi takes one result from the actual work of the
1132 : block we're neutering, and the other result is constant zero of
1133 : the same type. */
1134 :
1135 0 : gphi *barrier_phi = create_phi_node (NULL_TREE, barrier_block);
1136 0 : barrier_def = create_new_def_for (var, barrier_phi,
1137 : gimple_phi_result_ptr (barrier_phi));
1138 :
1139 0 : add_phi_arg (barrier_phi, var, e, UNKNOWN_LOCATION);
1140 0 : add_phi_arg (barrier_phi, build_zero_cst (TREE_TYPE (var)), ef,
1141 : UNKNOWN_LOCATION);
1142 :
1143 0 : update_stmt (barrier_phi);
1144 : }
1145 : else
1146 0 : gcc_assert (VAR_P (var));
1147 :
1148 : /* If we had no record type, we will have no fields map. */
1149 0 : field_map_t *fields = record_field_map->get (record_type);
1150 :
1151 0 : if (worker_partitioned_uses->contains (var)
1152 0 : && fields
1153 0 : && fields->get (var))
1154 : {
1155 0 : tree neutered_def = make_ssa_name (TREE_TYPE (var));
1156 :
1157 : /* Receive definition from shared memory block. */
1158 :
1159 0 : tree receiver_ref = build_receiver_ref (var, receiver_decl, fields);
1160 0 : gassign *recv = gimple_build_assign (neutered_def,
1161 : receiver_ref);
1162 0 : gsi_insert_after (©out_gsi, recv, GSI_CONTINUE_LINKING);
1163 0 : update_stmt (recv);
1164 :
1165 0 : if (VAR_P (var))
1166 : {
1167 : /* If it's a VAR_DECL, we only copied to an SSA temporary. Copy
1168 : to the final location now. */
1169 0 : gassign *asgn = gimple_build_assign (var, neutered_def);
1170 0 : gsi_insert_after (©out_gsi, asgn, GSI_CONTINUE_LINKING);
1171 0 : update_stmt (asgn);
1172 : }
1173 : else
1174 : {
1175 : /* If it's an SSA name, create a new phi at the join node to
1176 : represent either the output from the active worker (the
1177 : barrier) or the inactive workers (the copyout block). */
1178 0 : gphi *join_phi = create_phi_node (NULL_TREE, exit_block);
1179 0 : create_new_def_for (barrier_def, join_phi,
1180 : gimple_phi_result_ptr (join_phi));
1181 0 : add_phi_arg (join_phi, barrier_def, ef2, UNKNOWN_LOCATION);
1182 0 : add_phi_arg (join_phi, neutered_def, copyout_to_exit,
1183 : UNKNOWN_LOCATION);
1184 0 : update_stmt (join_phi);
1185 : }
1186 :
1187 : /* Send definition to shared memory block. */
1188 :
1189 0 : tree sender_ref = build_sender_ref (var, sender_decl, fields);
1190 :
1191 0 : if (TREE_CODE (var) == SSA_NAME)
1192 : {
1193 0 : gassign *send = gimple_build_assign (sender_ref, var);
1194 0 : gimple_seq_add_stmt (&sender_seq, send);
1195 0 : update_stmt (send);
1196 : }
1197 0 : else if (VAR_P (var))
1198 : {
1199 0 : tree tmp = make_ssa_name (TREE_TYPE (var));
1200 0 : gassign *send = gimple_build_assign (tmp, var);
1201 0 : gimple_seq_add_stmt (&sender_seq, send);
1202 0 : update_stmt (send);
1203 0 : send = gimple_build_assign (sender_ref, tmp);
1204 0 : gimple_seq_add_stmt (&sender_seq, send);
1205 0 : update_stmt (send);
1206 : }
1207 : else
1208 0 : gcc_unreachable ();
1209 : }
1210 : }
1211 :
1212 : /* The shared-memory range for this block overflowed. Add a barrier at the
1213 : end. */
1214 0 : if (isolate_broadcasts)
1215 : {
1216 0 : gsi = gsi_start_bb (exit_block);
1217 0 : decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
1218 0 : gimple *acc_bar = gimple_build_call (decl, 0);
1219 0 : gsi_insert_before (&gsi, acc_bar, GSI_SAME_STMT);
1220 : }
1221 :
1222 : /* It's possible for the ET->DEST block (the work done by the active thread)
1223 : to finish with a control-flow insn, e.g. a UNIQUE function call. Split
1224 : the block and add SENDER_SEQ in the latter part to avoid having control
1225 : flow in the middle of a BB. */
1226 :
1227 0 : decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_END);
1228 0 : call = gimple_build_call (decl, 1,
1229 0 : POINTER_TYPE_P (TREE_TYPE (sender_decl))
1230 : ? sender_decl
1231 0 : : build_fold_addr_expr (sender_decl));
1232 0 : gimple_seq_add_stmt (&sender_seq, call);
1233 :
1234 0 : gsi = gsi_last_bb (body);
1235 0 : gimple *last = gsi_stmt (gsi);
1236 0 : basic_block sender_block = split_block (body, last)->dest;
1237 0 : gsi = gsi_last_bb (sender_block);
1238 0 : gsi_insert_seq_after (&gsi, sender_seq, GSI_CONTINUE_LINKING);
1239 0 : }
1240 :
1241 : typedef hash_map<basic_block, std::pair<unsigned HOST_WIDE_INT, bool> >
1242 : blk_offset_map_t;
1243 :
1244 : static void
1245 0 : neuter_worker_single (parallel_g *par, unsigned outer_mask,
1246 : bitmap worker_single, bitmap vector_single,
1247 : vec<propagation_set *> *prop_set,
1248 : hash_set<tree> *partitioned_var_uses,
1249 : record_field_map_t *record_field_map,
1250 : blk_offset_map_t *blk_offset_map,
1251 : bitmap writes_gang_private)
1252 : {
1253 0 : unsigned mask = outer_mask | par->mask;
1254 :
1255 0 : if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
1256 : {
1257 : basic_block block;
1258 :
1259 0 : for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
1260 : {
1261 0 : bool has_defs = false;
1262 0 : hash_set<tree> def_escapes_block;
1263 0 : hash_set<tree> worker_partitioned_uses;
1264 0 : unsigned j;
1265 0 : tree var;
1266 :
1267 0 : FOR_EACH_SSA_NAME (j, var, cfun)
1268 : {
1269 0 : if (SSA_NAME_IS_VIRTUAL_OPERAND (var))
1270 : {
1271 0 : has_defs = true;
1272 0 : continue;
1273 : }
1274 :
1275 0 : gimple *def_stmt = SSA_NAME_DEF_STMT (var);
1276 :
1277 0 : if (gimple_nop_p (def_stmt))
1278 0 : continue;
1279 :
1280 0 : if (gimple_bb (def_stmt)->index != block->index)
1281 0 : continue;
1282 :
1283 0 : gimple *use_stmt;
1284 0 : imm_use_iterator use_iter;
1285 0 : bool uses_outside_block = false;
1286 0 : bool worker_partitioned_use = false;
1287 :
1288 0 : FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, var)
1289 : {
1290 0 : int blocknum = gimple_bb (use_stmt)->index;
1291 :
1292 : /* Don't propagate SSA names that are only used in the
1293 : current block, unless the usage is in a phi node: that
1294 : means the name left the block, then came back in at the
1295 : top. */
1296 0 : if (blocknum != block->index
1297 0 : || gimple_code (use_stmt) == GIMPLE_PHI)
1298 : uses_outside_block = true;
1299 0 : if (!bitmap_bit_p (worker_single, blocknum))
1300 0 : worker_partitioned_use = true;
1301 0 : }
1302 :
1303 0 : if (uses_outside_block)
1304 0 : def_escapes_block.add (var);
1305 :
1306 0 : if (worker_partitioned_use)
1307 : {
1308 0 : worker_partitioned_uses.add (var);
1309 0 : has_defs = true;
1310 : }
1311 : }
1312 :
1313 0 : propagation_set *ws_prop = (*prop_set)[block->index];
1314 :
1315 0 : if (ws_prop)
1316 : {
1317 0 : for (propagation_set::iterator it = ws_prop->begin ();
1318 0 : it != ws_prop->end ();
1319 0 : ++it)
1320 : {
1321 0 : tree var = *it;
1322 0 : if (TREE_CODE (var) == VAR_DECL)
1323 : {
1324 0 : def_escapes_block.add (var);
1325 0 : if (partitioned_var_uses->contains (var))
1326 : {
1327 0 : worker_partitioned_uses.add (var);
1328 0 : has_defs = true;
1329 : }
1330 : }
1331 : }
1332 :
1333 0 : delete ws_prop;
1334 0 : (*prop_set)[block->index] = 0;
1335 : }
1336 :
1337 0 : bool only_marker_fns = true;
1338 0 : bool join_block = false;
1339 :
1340 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (block);
1341 0 : !gsi_end_p (gsi);
1342 0 : gsi_next (&gsi))
1343 : {
1344 0 : gimple *stmt = gsi_stmt (gsi);
1345 0 : if (gimple_code (stmt) == GIMPLE_CALL
1346 0 : && gimple_call_internal_p (stmt, IFN_UNIQUE))
1347 : {
1348 0 : enum ifn_unique_kind k = ((enum ifn_unique_kind)
1349 0 : TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1350 0 : if (k != IFN_UNIQUE_OACC_PRIVATE
1351 0 : && k != IFN_UNIQUE_OACC_JOIN
1352 0 : && k != IFN_UNIQUE_OACC_FORK
1353 0 : && k != IFN_UNIQUE_OACC_HEAD_MARK
1354 0 : && k != IFN_UNIQUE_OACC_TAIL_MARK)
1355 : only_marker_fns = false;
1356 0 : else if (k == IFN_UNIQUE_OACC_JOIN)
1357 : /* The JOIN marker is special in that it *cannot* be
1358 : predicated for worker zero, because it may be lowered
1359 : to a barrier instruction and all workers must typically
1360 : execute that barrier. We shouldn't be doing any
1361 : broadcasts from the join block anyway. */
1362 0 : join_block = true;
1363 : }
1364 0 : else if (gimple_code (stmt) == GIMPLE_CALL
1365 0 : && gimple_call_internal_p (stmt, IFN_GOACC_LOOP))
1366 : /* Empty. */;
1367 0 : else if (gimple_nop_p (stmt))
1368 : /* Empty. */;
1369 : else
1370 0 : only_marker_fns = false;
1371 : }
1372 :
1373 : /* We can skip predicating this block for worker zero if the only
1374 : thing it contains is marker functions that will be removed in the
1375 : oaccdevlow pass anyway.
1376 : Don't do this if the block has (any) phi nodes, because those
1377 : might define SSA names that need broadcasting.
1378 : TODO: We might be able to skip transforming blocks that only
1379 : contain some other trivial statements too. */
1380 0 : if (only_marker_fns && !phi_nodes (block))
1381 0 : continue;
1382 :
1383 0 : gcc_assert (!join_block);
1384 :
1385 0 : if (has_defs)
1386 : {
1387 0 : tree record_type = (tree) block->aux;
1388 0 : std::pair<unsigned HOST_WIDE_INT, bool> *off_rngalloc
1389 0 : = blk_offset_map->get (block);
1390 0 : gcc_assert (!record_type || off_rngalloc);
1391 0 : unsigned HOST_WIDE_INT offset
1392 0 : = off_rngalloc ? off_rngalloc->first : 0;
1393 0 : bool range_allocated
1394 0 : = off_rngalloc ? off_rngalloc->second : true;
1395 0 : bool has_gang_private_write
1396 0 : = bitmap_bit_p (writes_gang_private, block->index);
1397 0 : worker_single_copy (block, block, &def_escapes_block,
1398 : &worker_partitioned_uses, record_type,
1399 : record_field_map,
1400 0 : offset, !range_allocated,
1401 : has_gang_private_write);
1402 : }
1403 : else
1404 0 : worker_single_simple (block, block, &def_escapes_block);
1405 0 : }
1406 : }
1407 :
1408 0 : if ((outer_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0)
1409 : {
1410 : basic_block block;
1411 :
1412 0 : for (unsigned i = 0; par->blocks.iterate (i, &block); i++)
1413 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (block);
1414 0 : !gsi_end_p (gsi);
1415 0 : gsi_next (&gsi))
1416 : {
1417 0 : gimple *stmt = gsi_stmt (gsi);
1418 :
1419 0 : if (gimple_code (stmt) == GIMPLE_CALL
1420 0 : && !gimple_call_internal_p (stmt)
1421 0 : && !omp_sese_active_worker_call (as_a <gcall *> (stmt)))
1422 : {
1423 : /* If we have an OpenACC routine call in worker-single mode,
1424 : place barriers before and afterwards to prevent
1425 : clobbering re-used shared memory regions (as are used
1426 : for AMDGCN at present, for example). */
1427 0 : tree decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER);
1428 0 : gsi_insert_before (&gsi, gimple_build_call (decl, 0),
1429 : GSI_SAME_STMT);
1430 0 : gsi_insert_after (&gsi, gimple_build_call (decl, 0),
1431 : GSI_NEW_STMT);
1432 : }
1433 : }
1434 : }
1435 :
1436 0 : if (par->inner)
1437 0 : neuter_worker_single (par->inner, mask, worker_single, vector_single,
1438 : prop_set, partitioned_var_uses, record_field_map,
1439 : blk_offset_map, writes_gang_private);
1440 0 : if (par->next)
1441 : neuter_worker_single (par->next, outer_mask, worker_single, vector_single,
1442 : prop_set, partitioned_var_uses, record_field_map,
1443 : blk_offset_map, writes_gang_private);
1444 0 : }
1445 :
1446 : static void
1447 0 : dfs_broadcast_reachable_1 (basic_block bb, sbitmap reachable)
1448 : {
1449 0 : if (bb->flags & BB_VISITED)
1450 : return;
1451 :
1452 0 : bb->flags |= BB_VISITED;
1453 :
1454 0 : if (bb->succs)
1455 : {
1456 0 : edge e;
1457 0 : edge_iterator ei;
1458 0 : FOR_EACH_EDGE (e, ei, bb->succs)
1459 : {
1460 0 : basic_block dest = e->dest;
1461 0 : if (dest->aux)
1462 0 : bitmap_set_bit (reachable, dest->index);
1463 : else
1464 0 : dfs_broadcast_reachable_1 (dest, reachable);
1465 : }
1466 : }
1467 : }
1468 :
1469 : typedef std::pair<int, tree> idx_decl_pair_t;
1470 :
1471 : typedef auto_vec<splay_tree> used_range_vec_t;
1472 :
1473 : static int
1474 0 : sort_size_descending (const void *a, const void *b)
1475 : {
1476 0 : const idx_decl_pair_t *pa = (const idx_decl_pair_t *) a;
1477 0 : const idx_decl_pair_t *pb = (const idx_decl_pair_t *) b;
1478 0 : unsigned HOST_WIDE_INT asize = tree_to_uhwi (TYPE_SIZE_UNIT (pa->second));
1479 0 : unsigned HOST_WIDE_INT bsize = tree_to_uhwi (TYPE_SIZE_UNIT (pb->second));
1480 0 : return bsize - asize;
1481 : }
1482 :
1483 : class addr_range
1484 : {
1485 : public:
1486 0 : addr_range (unsigned HOST_WIDE_INT addr_lo, unsigned HOST_WIDE_INT addr_hi)
1487 0 : : lo (addr_lo), hi (addr_hi)
1488 0 : { }
1489 0 : addr_range (const addr_range &ar) : lo (ar.lo), hi (ar.hi)
1490 : { }
1491 0 : addr_range () : lo (0), hi (0)
1492 0 : { }
1493 :
1494 0 : bool invalid () { return lo == 0 && hi == 0; }
1495 :
1496 : unsigned HOST_WIDE_INT lo;
1497 : unsigned HOST_WIDE_INT hi;
1498 : };
1499 :
1500 : static int
1501 0 : splay_tree_compare_addr_range (splay_tree_key a, splay_tree_key b)
1502 : {
1503 0 : addr_range *ar = (addr_range *) a;
1504 0 : addr_range *br = (addr_range *) b;
1505 0 : if (ar->lo == br->lo && ar->hi == br->hi)
1506 : return 0;
1507 0 : if (ar->hi <= br->lo)
1508 : return -1;
1509 0 : else if (ar->lo >= br->hi)
1510 0 : return 1;
1511 : return 0;
1512 : }
1513 :
1514 : static void
1515 0 : splay_tree_free_key (splay_tree_key k)
1516 : {
1517 0 : addr_range *ar = (addr_range *) k;
1518 0 : delete ar;
1519 0 : }
1520 :
1521 : static addr_range
1522 0 : first_fit_range (splay_tree s, unsigned HOST_WIDE_INT size,
1523 : unsigned HOST_WIDE_INT align, addr_range *bounds)
1524 : {
1525 0 : splay_tree_node min = splay_tree_min (s);
1526 0 : if (min)
1527 : {
1528 : splay_tree_node next;
1529 0 : while ((next = splay_tree_successor (s, min->key)))
1530 : {
1531 0 : unsigned HOST_WIDE_INT lo = ((addr_range *) min->key)->hi;
1532 0 : unsigned HOST_WIDE_INT hi = ((addr_range *) next->key)->lo;
1533 0 : unsigned HOST_WIDE_INT base = (lo + align - 1) & ~(align - 1);
1534 0 : if (base + size <= hi)
1535 0 : return addr_range (base, base + size);
1536 : min = next;
1537 : }
1538 :
1539 0 : unsigned HOST_WIDE_INT base = ((addr_range *)min->key)->hi;
1540 0 : base = (base + align - 1) & ~(align - 1);
1541 0 : if (base + size <= bounds->hi)
1542 0 : return addr_range (base, base + size);
1543 : else
1544 0 : return addr_range ();
1545 : }
1546 : else
1547 : {
1548 0 : unsigned HOST_WIDE_INT lo = bounds->lo;
1549 0 : lo = (lo + align - 1) & ~(align - 1);
1550 0 : if (lo + size <= bounds->hi)
1551 0 : return addr_range (lo, lo + size);
1552 : else
1553 0 : return addr_range ();
1554 : }
1555 : }
1556 :
1557 : static int
1558 0 : merge_ranges_1 (splay_tree_node n, void *ptr)
1559 : {
1560 0 : splay_tree accum = (splay_tree) ptr;
1561 0 : addr_range ar = *(addr_range *) n->key;
1562 :
1563 0 : splay_tree_node old = splay_tree_lookup (accum, n->key);
1564 :
1565 : /* We might have an overlap. Create a new range covering the
1566 : overlapping parts. */
1567 0 : if (old)
1568 : {
1569 0 : addr_range *old_ar = (addr_range *) old->key;
1570 0 : ar.lo = MIN (old_ar->lo, ar.lo);
1571 0 : ar.hi = MAX (old_ar->hi, ar.hi);
1572 0 : splay_tree_remove (accum, old->key);
1573 : }
1574 :
1575 0 : addr_range *new_ar = new addr_range (ar);
1576 :
1577 0 : splay_tree_insert (accum, (splay_tree_key) new_ar, n->value);
1578 :
1579 0 : return 0;
1580 : }
1581 :
1582 : static void
1583 0 : merge_ranges (splay_tree accum, splay_tree sp)
1584 : {
1585 0 : splay_tree_foreach (sp, merge_ranges_1, (void *) accum);
1586 0 : }
1587 :
1588 : static void
1589 0 : oacc_do_neutering (unsigned HOST_WIDE_INT bounds_lo,
1590 : unsigned HOST_WIDE_INT bounds_hi)
1591 : {
1592 0 : bb_stmt_map_t bb_stmt_map;
1593 0 : auto_bitmap worker_single, vector_single;
1594 :
1595 0 : omp_sese_split_blocks (&bb_stmt_map);
1596 :
1597 0 : if (dump_file)
1598 : {
1599 0 : fprintf (dump_file, "\n\nAfter splitting:\n\n");
1600 0 : dump_function_to_file (current_function_decl, dump_file, dump_flags);
1601 : }
1602 :
1603 0 : unsigned mask = 0;
1604 :
1605 : /* If this is a routine, calculate MASK as if the outer levels are already
1606 : partitioned. */
1607 0 : {
1608 0 : tree attr = oacc_get_fn_attrib (current_function_decl);
1609 0 : tree dims = TREE_VALUE (attr);
1610 0 : unsigned ix;
1611 0 : for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims))
1612 : {
1613 0 : tree allowed = TREE_PURPOSE (dims);
1614 0 : if (allowed && integer_zerop (allowed))
1615 0 : mask |= GOMP_DIM_MASK (ix);
1616 : }
1617 : }
1618 :
1619 0 : parallel_g *par = omp_sese_discover_pars (&bb_stmt_map);
1620 0 : populate_single_mode_bitmaps (par, worker_single, vector_single, mask, 0);
1621 :
1622 0 : basic_block bb;
1623 0 : FOR_ALL_BB_FN (bb, cfun)
1624 0 : bb->aux = NULL;
1625 :
1626 0 : vec<propagation_set *> prop_set (vNULL);
1627 0 : prop_set.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
1628 :
1629 0 : find_ssa_names_to_propagate (par, mask, worker_single, vector_single,
1630 : &prop_set);
1631 :
1632 0 : hash_set<tree> partitioned_var_uses;
1633 0 : hash_set<tree> gang_private_vars;
1634 0 : auto_bitmap writes_gang_private;
1635 :
1636 0 : find_gang_private_vars (&gang_private_vars);
1637 0 : find_partitioned_var_uses (par, mask, &partitioned_var_uses);
1638 0 : find_local_vars_to_propagate (par, mask, &partitioned_var_uses,
1639 : &gang_private_vars, writes_gang_private,
1640 : &prop_set);
1641 :
1642 0 : record_field_map_t record_field_map;
1643 :
1644 0 : FOR_ALL_BB_FN (bb, cfun)
1645 : {
1646 0 : propagation_set *ws_prop = prop_set[bb->index];
1647 0 : if (ws_prop)
1648 : {
1649 0 : tree record_type = lang_hooks.types.make_type (RECORD_TYPE);
1650 0 : tree name = create_tmp_var_name (".oacc_ws_data_s");
1651 0 : name = build_decl (UNKNOWN_LOCATION, TYPE_DECL, name, record_type);
1652 0 : DECL_ARTIFICIAL (name) = 1;
1653 0 : DECL_NAMELESS (name) = 1;
1654 0 : TYPE_NAME (record_type) = name;
1655 0 : TYPE_ARTIFICIAL (record_type) = 1;
1656 :
1657 0 : auto_vec<tree> field_vec (ws_prop->elements ());
1658 0 : for (hash_set<tree>::iterator it = ws_prop->begin ();
1659 0 : it != ws_prop->end (); ++it)
1660 0 : field_vec.quick_push (*it);
1661 :
1662 0 : field_vec.qsort (sort_by_size_then_ssa_version_or_uid);
1663 :
1664 0 : bool existed;
1665 0 : field_map_t *fields
1666 0 : = &record_field_map.get_or_insert (record_type, &existed);
1667 0 : gcc_checking_assert (!existed);
1668 :
1669 : /* Insert var fields in reverse order, so the last inserted element
1670 : is the first in the structure. */
1671 0 : for (int i = field_vec.length () - 1; i >= 0; i--)
1672 0 : install_var_field (field_vec[i], record_type, fields);
1673 :
1674 0 : layout_type (record_type);
1675 :
1676 0 : bb->aux = (tree) record_type;
1677 0 : }
1678 : }
1679 :
1680 0 : sbitmap *reachable
1681 0 : = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
1682 0 : last_basic_block_for_fn (cfun));
1683 :
1684 0 : bitmap_vector_clear (reachable, last_basic_block_for_fn (cfun));
1685 :
1686 0 : auto_vec<std::pair<int, tree> > priority;
1687 :
1688 0 : FOR_ALL_BB_FN (bb, cfun)
1689 : {
1690 0 : if (bb->aux)
1691 : {
1692 0 : tree record_type = (tree) bb->aux;
1693 :
1694 0 : basic_block bb2;
1695 0 : FOR_ALL_BB_FN (bb2, cfun)
1696 0 : bb2->flags &= ~BB_VISITED;
1697 :
1698 0 : priority.safe_push (std::make_pair (bb->index, record_type));
1699 0 : dfs_broadcast_reachable_1 (bb, reachable[bb->index]);
1700 : }
1701 : }
1702 :
1703 0 : sbitmap *inverted
1704 0 : = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
1705 0 : last_basic_block_for_fn (cfun));
1706 :
1707 0 : bitmap_vector_clear (inverted, last_basic_block_for_fn (cfun));
1708 :
1709 0 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1710 : {
1711 0 : sbitmap_iterator bi;
1712 0 : unsigned int j;
1713 0 : EXECUTE_IF_SET_IN_BITMAP (reachable[i], 0, j, bi)
1714 0 : bitmap_set_bit (inverted[j], i);
1715 : }
1716 :
1717 0 : for (int i = 0; i < last_basic_block_for_fn (cfun); i++)
1718 0 : bitmap_ior (reachable[i], reachable[i], inverted[i]);
1719 :
1720 0 : sbitmap_vector_free (inverted);
1721 :
1722 0 : used_range_vec_t used_ranges;
1723 :
1724 0 : used_ranges.safe_grow_cleared (last_basic_block_for_fn (cfun));
1725 :
1726 0 : blk_offset_map_t blk_offset_map;
1727 :
1728 0 : addr_range worker_shm_bounds (bounds_lo, bounds_hi);
1729 :
1730 0 : priority.qsort (sort_size_descending);
1731 0 : for (unsigned int i = 0; i < priority.length (); i++)
1732 : {
1733 0 : idx_decl_pair_t p = priority[i];
1734 0 : int blkno = p.first;
1735 0 : tree record_type = p.second;
1736 0 : HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (record_type));
1737 0 : HOST_WIDE_INT align = TYPE_ALIGN_UNIT (record_type);
1738 :
1739 0 : splay_tree conflicts = splay_tree_new (splay_tree_compare_addr_range,
1740 : splay_tree_free_key, NULL);
1741 :
1742 0 : if (!used_ranges[blkno])
1743 0 : used_ranges[blkno] = splay_tree_new (splay_tree_compare_addr_range,
1744 : splay_tree_free_key, NULL);
1745 : else
1746 0 : merge_ranges (conflicts, used_ranges[blkno]);
1747 :
1748 0 : sbitmap_iterator bi;
1749 0 : unsigned int j;
1750 0 : EXECUTE_IF_SET_IN_BITMAP (reachable[blkno], 0, j, bi)
1751 0 : if (used_ranges[j])
1752 0 : merge_ranges (conflicts, used_ranges[j]);
1753 :
1754 0 : addr_range ar
1755 0 : = first_fit_range (conflicts, size, align, &worker_shm_bounds);
1756 :
1757 0 : splay_tree_delete (conflicts);
1758 :
1759 0 : if (ar.invalid ())
1760 : {
1761 0 : unsigned HOST_WIDE_INT base
1762 0 : = (bounds_lo + align - 1) & ~(align - 1);
1763 0 : if (base + size > bounds_hi)
1764 0 : error_at (UNKNOWN_LOCATION, "shared-memory region overflow");
1765 0 : std::pair<unsigned HOST_WIDE_INT, bool> base_inrng
1766 0 : = std::make_pair (base, false);
1767 0 : blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng);
1768 : }
1769 : else
1770 : {
1771 0 : splay_tree_node old = splay_tree_lookup (used_ranges[blkno],
1772 : (splay_tree_key) &ar);
1773 0 : if (old)
1774 : {
1775 0 : fprintf (stderr, "trying to map [%d..%d] but [%d..%d] is "
1776 0 : "already mapped in block %d\n", (int) ar.lo,
1777 0 : (int) ar.hi, (int) ((addr_range *) old->key)->lo,
1778 0 : (int) ((addr_range *) old->key)->hi, blkno);
1779 0 : abort ();
1780 : }
1781 :
1782 0 : addr_range *arp = new addr_range (ar);
1783 0 : splay_tree_insert (used_ranges[blkno], (splay_tree_key) arp,
1784 : (splay_tree_value) blkno);
1785 0 : std::pair<unsigned HOST_WIDE_INT, bool> base_inrng
1786 0 : = std::make_pair (ar.lo, true);
1787 0 : blk_offset_map.put (BASIC_BLOCK_FOR_FN (cfun, blkno), base_inrng);
1788 : }
1789 : }
1790 :
1791 0 : sbitmap_vector_free (reachable);
1792 :
1793 0 : neuter_worker_single (par, mask, worker_single, vector_single, &prop_set,
1794 : &partitioned_var_uses, &record_field_map,
1795 : &blk_offset_map, writes_gang_private);
1796 :
1797 0 : record_field_map.empty ();
1798 :
1799 : /* These are supposed to have been 'delete'd by 'neuter_worker_single'. */
1800 0 : for (auto it : prop_set)
1801 0 : gcc_checking_assert (!it);
1802 0 : prop_set.release ();
1803 :
1804 0 : delete par;
1805 :
1806 : /* This doesn't seem to make a difference. */
1807 0 : loops_state_clear (LOOP_CLOSED_SSA);
1808 :
1809 : /* Neutering worker-single neutered blocks will invalidate dominance info.
1810 : It may be possible to incrementally update just the affected blocks, but
1811 : obliterate everything for now. */
1812 0 : free_dominance_info (CDI_DOMINATORS);
1813 0 : free_dominance_info (CDI_POST_DOMINATORS);
1814 :
1815 0 : if (dump_file)
1816 : {
1817 0 : fprintf (dump_file, "\n\nAfter neutering:\n\n");
1818 0 : dump_function_to_file (current_function_decl, dump_file, dump_flags);
1819 : }
1820 0 : }
1821 :
1822 : static int
1823 0 : execute_omp_oacc_neuter_broadcast ()
1824 : {
1825 0 : unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX];
1826 0 : unsigned HOST_WIDE_INT private_size[GOMP_DIM_MAX];
1827 :
1828 0 : for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
1829 : {
1830 0 : reduction_size[i] = 0;
1831 0 : private_size[i] = 0;
1832 : }
1833 :
1834 : /* Calculate shared memory size required for reduction variables and
1835 : gang-private memory for this offloaded function. */
1836 0 : basic_block bb;
1837 0 : FOR_ALL_BB_FN (bb, cfun)
1838 : {
1839 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
1840 0 : !gsi_end_p (gsi);
1841 0 : gsi_next (&gsi))
1842 : {
1843 0 : gimple *stmt = gsi_stmt (gsi);
1844 0 : if (!is_gimple_call (stmt))
1845 0 : continue;
1846 0 : gcall *call = as_a <gcall *> (stmt);
1847 0 : if (!gimple_call_internal_p (call))
1848 0 : continue;
1849 0 : enum internal_fn ifn_code = gimple_call_internal_fn (call);
1850 0 : switch (ifn_code)
1851 : {
1852 : default: break;
1853 0 : case IFN_GOACC_REDUCTION:
1854 0 : if (integer_minus_onep (gimple_call_arg (call, 3)))
1855 0 : continue;
1856 : else
1857 : {
1858 0 : unsigned code = TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1859 : /* Only count reduction variables once: the choice to pick
1860 : the setup call is fairly arbitrary. */
1861 0 : if (code == IFN_GOACC_REDUCTION_SETUP)
1862 : {
1863 0 : int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
1864 0 : tree var = gimple_call_arg (call, 2);
1865 0 : tree offset = gimple_call_arg (call, 5);
1866 0 : tree var_type = TREE_TYPE (var);
1867 0 : unsigned HOST_WIDE_INT limit
1868 0 : = (tree_to_uhwi (offset)
1869 0 : + tree_to_uhwi (TYPE_SIZE_UNIT (var_type)));
1870 0 : reduction_size[level]
1871 0 : = MAX (reduction_size[level], limit);
1872 : }
1873 : }
1874 : break;
1875 0 : case IFN_UNIQUE:
1876 0 : {
1877 0 : enum ifn_unique_kind kind
1878 : = ((enum ifn_unique_kind)
1879 0 : TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1880 :
1881 0 : if (kind == IFN_UNIQUE_OACC_PRIVATE)
1882 : {
1883 0 : HOST_WIDE_INT level
1884 0 : = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
1885 0 : if (level == -1)
1886 : break;
1887 0 : for (unsigned i = 3;
1888 0 : i < gimple_call_num_args (call);
1889 : i++)
1890 : {
1891 0 : tree arg = gimple_call_arg (call, i);
1892 0 : gcc_assert (TREE_CODE (arg) == ADDR_EXPR);
1893 0 : tree decl = TREE_OPERAND (arg, 0);
1894 0 : unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (decl);
1895 0 : private_size[level] = ((private_size[level] + align - 1)
1896 0 : & ~(align - 1));
1897 0 : unsigned HOST_WIDE_INT decl_size
1898 0 : = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (decl)));
1899 0 : private_size[level] += decl_size;
1900 : }
1901 : }
1902 : }
1903 : break;
1904 : }
1905 : }
1906 : }
1907 :
1908 : int dims[GOMP_DIM_MAX];
1909 0 : for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
1910 0 : dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
1911 :
1912 : /* Find bounds of shared-memory buffer space we can use. */
1913 0 : unsigned HOST_WIDE_INT bounds_lo = 0, bounds_hi = 0;
1914 0 : if (targetm.goacc.shared_mem_layout)
1915 0 : targetm.goacc.shared_mem_layout (&bounds_lo, &bounds_hi, dims,
1916 : private_size, reduction_size);
1917 :
1918 : /* Perform worker partitioning unless we know 'num_workers(1)'. */
1919 0 : if (dims[GOMP_DIM_WORKER] != 1)
1920 0 : oacc_do_neutering (bounds_lo, bounds_hi);
1921 :
1922 0 : return 0;
1923 : }
1924 :
1925 : namespace {
1926 :
1927 : const pass_data pass_data_omp_oacc_neuter_broadcast =
1928 : {
1929 : GIMPLE_PASS, /* type */
1930 : "omp_oacc_neuter_broadcast", /* name */
1931 : OPTGROUP_OMP, /* optinfo_flags */
1932 : TV_NONE, /* tv_id */
1933 : PROP_cfg, /* properties_required */
1934 : 0, /* properties_provided */
1935 : 0, /* properties_destroyed */
1936 : 0, /* todo_flags_start */
1937 : TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1938 : };
1939 :
1940 : class pass_omp_oacc_neuter_broadcast : public gimple_opt_pass
1941 : {
1942 : public:
1943 285722 : pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
1944 571444 : : gimple_opt_pass (pass_data_omp_oacc_neuter_broadcast, ctxt)
1945 : {}
1946 :
1947 : /* opt_pass methods: */
1948 1472258 : bool gate (function *fun) final override
1949 : {
1950 1472258 : if (!flag_openacc)
1951 : return false;
1952 :
1953 15205 : if (!targetm.goacc.create_worker_broadcast_record)
1954 : return false;
1955 :
1956 : /* Only relevant for OpenACC offloaded functions. */
1957 0 : tree attr = oacc_get_fn_attrib (fun->decl);
1958 0 : if (!attr)
1959 : return false;
1960 :
1961 : return true;
1962 : }
1963 :
1964 0 : unsigned int execute (function *) final override
1965 : {
1966 0 : return execute_omp_oacc_neuter_broadcast ();
1967 : }
1968 :
1969 : }; // class pass_omp_oacc_neuter_broadcast
1970 :
1971 : } // anon namespace
1972 :
1973 : gimple_opt_pass *
1974 285722 : make_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt)
1975 : {
1976 285722 : return new pass_omp_oacc_neuter_broadcast (ctxt);
1977 : }
|