Line data Source code
1 : /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 : and a lowering pass for OpenACC device directives.
3 :
4 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify it under
9 : the terms of the GNU General Public License as published by the Free
10 : Software Foundation; either version 3, or (at your option) any later
11 : version.
12 :
13 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 : for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : #include "config.h"
23 : #include "system.h"
24 : #include "coretypes.h"
25 : #include "backend.h"
26 : #include "target.h"
27 : #include "tree.h"
28 : #include "gimple.h"
29 : #include "tree-pass.h"
30 : #include "ssa.h"
31 : #include "cgraph.h"
32 : #include "pretty-print.h"
33 : #include "diagnostic-core.h"
34 : #include "fold-const.h"
35 : #include "internal-fn.h"
36 : #include "langhooks.h"
37 : #include "gimplify.h"
38 : #include "gimple-iterator.h"
39 : #include "gimplify-me.h"
40 : #include "gimple-walk.h"
41 : #include "tree-cfg.h"
42 : #include "tree-into-ssa.h"
43 : #include "tree-nested.h"
44 : #include "stor-layout.h"
45 : #include "common/common-target.h"
46 : #include "omp-general.h"
47 : #include "omp-offload.h"
48 : #include "lto-section-names.h"
49 : #include "gomp-constants.h"
50 : #include "gimple-pretty-print.h"
51 : #include "intl.h"
52 : #include "stringpool.h"
53 : #include "attribs.h"
54 : #include "cfgloop.h"
55 : #include "context.h"
56 : #include "convert.h"
57 : #include "opts.h"
58 :
59 : /* Describe the OpenACC looping structure of a function. The entire
60 : function is held in a 'NULL' loop. */
61 :
62 : struct oacc_loop
63 : {
64 : oacc_loop *parent; /* Containing loop. */
65 :
66 : oacc_loop *child; /* First inner loop. */
67 :
68 : oacc_loop *sibling; /* Next loop within same parent. */
69 :
70 : location_t loc; /* Location of the loop start. */
71 :
72 : gcall *marker; /* Initial head marker. */
73 :
74 : gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
75 : gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
76 :
77 : tree routine; /* Pseudo-loop enclosing a routine. */
78 :
79 : unsigned mask; /* Partitioning mask. */
80 : unsigned e_mask; /* Partitioning of element loops (when tiling). */
81 : unsigned inner; /* Partitioning of inner loops. */
82 : unsigned flags; /* Partitioning flags. */
83 : vec<gcall *> ifns; /* Contained loop abstraction functions. */
84 : tree chunk_size; /* Chunk size. */
85 : gcall *head_end; /* Final marker of head sequence. */
86 : };
87 :
88 : /* Holds offload tables with decls. */
89 : vec<tree, va_gc> *offload_funcs, *offload_vars, *offload_ind_funcs;
90 :
91 : /* Return level at which oacc routine may spawn a partitioned loop, or
92 : -1 if it is not a routine (i.e. is an offload fn). */
93 :
94 : int
95 11139 : oacc_fn_attrib_level (tree attr)
96 : {
97 11139 : tree pos = TREE_VALUE (attr);
98 :
99 11139 : if (!TREE_PURPOSE (pos))
100 : return -1;
101 :
102 : int ix = 0;
103 5370 : for (ix = 0; ix != GOMP_DIM_MAX;
104 3615 : ix++, pos = TREE_CHAIN (pos))
105 4401 : if (!integer_zerop (TREE_PURPOSE (pos)))
106 : break;
107 :
108 : return ix;
109 : }
110 :
111 : /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
112 : adds their addresses and sizes to constructor-vector V_CTOR. */
113 :
114 : static void
115 87 : add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
116 : vec<constructor_elt, va_gc> *v_ctor)
117 : {
118 87 : unsigned len = vec_safe_length (v_decls);
119 162 : for (unsigned i = 0; i < len; i++)
120 : {
121 75 : tree it = (*v_decls)[i];
122 75 : bool is_var = VAR_P (it);
123 75 : bool is_link_var
124 : = is_var
125 : #ifdef ACCEL_COMPILER
126 : && DECL_HAS_VALUE_EXPR_P (it)
127 : #endif
128 75 : && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
129 :
130 : /* See also omp_finish_file and output_offload_tables in lto-cgraph.cc. */
131 75 : if (!in_lto_p && !symtab_node::get (it))
132 0 : continue;
133 :
134 75 : tree size = NULL_TREE;
135 75 : if (is_var)
136 0 : size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
137 :
138 75 : tree addr;
139 75 : if (!is_link_var)
140 75 : addr = build_fold_addr_expr (it);
141 : else
142 : {
143 : #ifdef ACCEL_COMPILER
144 : /* For "omp declare target link" vars add address of the pointer to
145 : the target table, instead of address of the var. */
146 : tree value_expr = DECL_VALUE_EXPR (it);
147 : tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
148 : varpool_node::finalize_decl (link_ptr_decl);
149 : addr = build_fold_addr_expr (link_ptr_decl);
150 : #else
151 0 : addr = build_fold_addr_expr (it);
152 : #endif
153 :
154 : /* Most significant bit of the size marks "omp declare target link"
155 : vars in host and target tables. */
156 0 : unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
157 0 : isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
158 0 : * BITS_PER_UNIT - 1);
159 0 : size = wide_int_to_tree (const_ptr_type_node, isize);
160 : }
161 :
162 75 : CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
163 75 : if (is_var)
164 0 : CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
165 : }
166 87 : }
167 :
168 : /* Return true if DECL is a function for which its references should be
169 : analyzed. */
170 :
171 : static bool
172 198040 : omp_declare_target_fn_p (tree decl)
173 : {
174 198040 : return (TREE_CODE (decl) == FUNCTION_DECL
175 198040 : && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
176 24335 : && !lookup_attribute ("omp declare target host",
177 24335 : DECL_ATTRIBUTES (decl))
178 222331 : && (!flag_openacc
179 45 : || oacc_get_fn_attrib (decl) == NULL_TREE));
180 : }
181 :
182 : /* Return true if DECL Is a variable for which its initializer references
183 : should be analyzed. */
184 :
185 : static bool
186 110381 : omp_declare_target_var_p (tree decl)
187 : {
188 110381 : return (VAR_P (decl)
189 110381 : && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
190 110866 : && !lookup_attribute ("omp declare target link",
191 485 : DECL_ATTRIBUTES (decl)));
192 : }
193 :
194 : /* Helper function for omp_discover_implicit_declare_target, called through
195 : walk_tree. Mark referenced FUNCTION_DECLs implicitly as
196 : declare target to. */
197 :
198 : static tree
199 968720 : omp_discover_declare_target_tgt_fn_r (tree *tp, int *walk_subtrees, void *data)
200 : {
201 968720 : if (TREE_CODE (*tp) == CALL_EXPR
202 25563 : && CALL_EXPR_FN (*tp)
203 25512 : && TREE_CODE (CALL_EXPR_FN (*tp)) == ADDR_EXPR
204 25463 : && TREE_CODE (TREE_OPERAND (CALL_EXPR_FN (*tp), 0)) == FUNCTION_DECL
205 994183 : && lookup_attribute ("omp declare variant base",
206 25463 : DECL_ATTRIBUTES (TREE_OPERAND (CALL_EXPR_FN (*tp),
207 : 0))))
208 : {
209 83 : tree fn = TREE_OPERAND (CALL_EXPR_FN (*tp), 0);
210 196 : for (tree attr = DECL_ATTRIBUTES (fn); attr; attr = TREE_CHAIN (attr))
211 : {
212 114 : attr = lookup_attribute ("omp declare variant base", attr);
213 114 : if (attr == NULL_TREE)
214 : break;
215 113 : tree purpose = TREE_PURPOSE (TREE_VALUE (attr));
216 113 : if (TREE_CODE (purpose) == FUNCTION_DECL)
217 113 : omp_discover_declare_target_tgt_fn_r (&purpose, walk_subtrees, data);
218 : }
219 : }
220 968637 : else if (TREE_CODE (*tp) == FUNCTION_DECL)
221 : {
222 20415 : tree decl = *tp;
223 20415 : tree id = get_identifier ("omp declare target");
224 20415 : symtab_node *node = symtab_node::get (*tp);
225 20415 : if (node != NULL)
226 : {
227 13613 : while (node->alias_target
228 13613 : && TREE_CODE (node->alias_target) == FUNCTION_DECL)
229 : {
230 4 : if (!omp_declare_target_fn_p (node->decl)
231 8 : && !lookup_attribute ("omp declare target host",
232 4 : DECL_ATTRIBUTES (node->decl)))
233 : {
234 4 : node->offloadable = 1;
235 4 : DECL_ATTRIBUTES (node->decl)
236 8 : = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
237 : }
238 4 : node = symtab_node::get (node->alias_target);
239 : }
240 13609 : symtab_node *new_node = node->ultimate_alias_target ();
241 13609 : decl = new_node->decl;
242 15324 : while (node != new_node)
243 : {
244 1715 : if (!omp_declare_target_fn_p (node->decl)
245 2597 : && !lookup_attribute ("omp declare target host",
246 882 : DECL_ATTRIBUTES (node->decl)))
247 : {
248 882 : node->offloadable = 1;
249 882 : DECL_ATTRIBUTES (node->decl)
250 1764 : = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
251 : }
252 1715 : gcc_assert (node->alias && node->analyzed);
253 1715 : node = node->get_alias_target ();
254 : }
255 13609 : node->offloadable = 1;
256 13609 : if (ENABLE_OFFLOADING)
257 : g->have_offload = true;
258 : }
259 20415 : if (omp_declare_target_fn_p (decl)
260 29022 : || lookup_attribute ("omp declare target host",
261 8607 : DECL_ATTRIBUTES (decl)))
262 11808 : return NULL_TREE;
263 :
264 8607 : if (DECL_SAVED_TREE (decl)
265 8607 : && (!DECL_EXTERNAL (decl) || DECL_DECLARED_INLINE_P (decl)))
266 6557 : ((vec<tree> *) data)->safe_push (decl);
267 8607 : DECL_ATTRIBUTES (decl) = tree_cons (id, NULL_TREE,
268 8607 : DECL_ATTRIBUTES (decl));
269 : }
270 948222 : else if (TYPE_P (*tp))
271 44 : *walk_subtrees = 0;
272 948178 : else if (TREE_CODE (*tp) == OMP_TARGET)
273 : {
274 1672 : tree c = omp_find_clause (OMP_CLAUSES (*tp), OMP_CLAUSE_DEVICE);
275 1672 : if (c && OMP_CLAUSE_DEVICE_ANCESTOR (c))
276 43 : *walk_subtrees = 0;
277 : }
278 : return NULL_TREE;
279 : }
280 :
281 : /* Similarly, but ignore references outside of OMP_TARGET regions. */
282 :
283 : static tree
284 664106 : omp_discover_declare_target_fn_r (tree *tp, int *walk_subtrees, void *data)
285 : {
286 664106 : if (TREE_CODE (*tp) == OMP_TARGET)
287 : {
288 11606 : tree c = omp_find_clause (OMP_CLAUSES (*tp), OMP_CLAUSE_DEVICE);
289 11606 : if (!c || !OMP_CLAUSE_DEVICE_ANCESTOR (c))
290 11548 : walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp),
291 : omp_discover_declare_target_tgt_fn_r,
292 : data);
293 11606 : *walk_subtrees = 0;
294 : }
295 652500 : else if (TYPE_P (*tp))
296 299 : *walk_subtrees = 0;
297 664106 : return NULL_TREE;
298 : }
299 :
300 : /* Helper function for omp_discover_implicit_declare_target, called through
301 : walk_tree. Mark referenced FUNCTION_DECLs implicitly as
302 : declare target to. */
303 :
304 : static tree
305 514 : omp_discover_declare_target_var_r (tree *tp, int *walk_subtrees, void *data)
306 : {
307 514 : if (TREE_CODE (*tp) == FUNCTION_DECL)
308 24 : return omp_discover_declare_target_tgt_fn_r (tp, walk_subtrees, data);
309 490 : else if (VAR_P (*tp)
310 54 : && is_global_var (*tp)
311 533 : && !omp_declare_target_var_p (*tp))
312 : {
313 15 : tree id = get_identifier ("omp declare target");
314 15 : if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp)))
315 : {
316 0 : error_at (DECL_SOURCE_LOCATION (*tp),
317 : "%qD specified both in declare target %<link%> and "
318 : "implicitly in %<to%> clauses", *tp);
319 0 : DECL_ATTRIBUTES (*tp)
320 0 : = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp));
321 : }
322 15 : if (TREE_STATIC (*tp) && lang_hooks.decls.omp_get_decl_init (*tp))
323 15 : ((vec<tree> *) data)->safe_push (*tp);
324 15 : DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp));
325 15 : symtab_node *node = symtab_node::get (*tp);
326 15 : if (node != NULL && !node->offloadable)
327 : {
328 15 : node->offloadable = 1;
329 15 : if (ENABLE_OFFLOADING)
330 : {
331 : g->have_offload = true;
332 : if (is_a <varpool_node *> (node))
333 : vec_safe_push (offload_vars, node->decl);
334 : }
335 : }
336 : }
337 475 : else if (TYPE_P (*tp))
338 0 : *walk_subtrees = 0;
339 : return NULL_TREE;
340 : }
341 :
342 : /* Perform the OpenMP implicit declare target to discovery. */
343 :
344 : void
345 9360 : omp_discover_implicit_declare_target (void)
346 : {
347 9360 : cgraph_node *node;
348 9360 : varpool_node *vnode;
349 9360 : auto_vec<tree> worklist;
350 :
351 167559 : FOR_EACH_DEFINED_FUNCTION (node)
352 158199 : if (DECL_SAVED_TREE (node->decl))
353 : {
354 157743 : struct cgraph_node *cgn;
355 157743 : if (lookup_attribute ("omp declare target indirect",
356 157743 : DECL_ATTRIBUTES (node->decl)))
357 123 : vec_safe_push (offload_ind_funcs, node->decl);
358 157743 : if (omp_declare_target_fn_p (node->decl))
359 2504 : worklist.safe_push (node->decl);
360 155239 : else if (DECL_STRUCT_FUNCTION (node->decl)
361 155239 : && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target)
362 6610 : worklist.safe_push (node->decl);
363 159793 : for (cgn = first_nested_function (node);
364 159793 : cgn; cgn = next_nested_function (cgn))
365 2050 : if (omp_declare_target_fn_p (cgn->decl))
366 33 : worklist.safe_push (cgn->decl);
367 2017 : else if (DECL_STRUCT_FUNCTION (cgn->decl)
368 2017 : && DECL_STRUCT_FUNCTION (cgn->decl)->has_omp_target)
369 409 : worklist.safe_push (cgn->decl);
370 : }
371 127748 : FOR_EACH_VARIABLE (vnode)
372 118388 : if (lang_hooks.decls.omp_get_decl_init (vnode->decl)
373 118388 : && omp_declare_target_var_p (vnode->decl))
374 457 : worklist.safe_push (vnode->decl);
375 25945 : while (!worklist.is_empty ())
376 : {
377 16585 : tree decl = worklist.pop ();
378 16585 : if (VAR_P (decl))
379 472 : walk_tree_without_duplicates (lang_hooks.decls.omp_get_decl_init (decl),
380 : omp_discover_declare_target_var_r,
381 : &worklist);
382 16113 : else if (omp_declare_target_fn_p (decl))
383 9094 : walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
384 : omp_discover_declare_target_tgt_fn_r,
385 : &worklist);
386 : else
387 7019 : walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
388 : omp_discover_declare_target_fn_r,
389 : &worklist);
390 : }
391 :
392 9360 : lang_hooks.decls.omp_finish_decl_inits ();
393 9360 : }
394 :
395 :
396 : /* Create new symbols containing (address, size) pairs for global variables,
397 : marked with "omp declare target" attribute, as well as addresses for the
398 : functions, which are outlined offloading regions. */
399 : void
400 230133 : omp_finish_file (void)
401 : {
402 230133 : unsigned num_funcs = vec_safe_length (offload_funcs);
403 230133 : unsigned num_vars = vec_safe_length (offload_vars);
404 230133 : unsigned num_ind_funcs = vec_safe_length (offload_ind_funcs);
405 :
406 230133 : if (num_funcs == 0 && num_vars == 0 && num_ind_funcs == 0)
407 230133 : return;
408 :
409 29 : if (targetm_common.have_named_sections)
410 : {
411 29 : vec<constructor_elt, va_gc> *v_f, *v_v, *v_if;
412 29 : vec_alloc (v_f, num_funcs);
413 29 : vec_alloc (v_v, num_vars * 2);
414 29 : vec_alloc (v_if, num_ind_funcs);
415 :
416 29 : add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
417 29 : add_decls_addresses_to_decl_constructor (offload_vars, v_v);
418 29 : add_decls_addresses_to_decl_constructor (offload_ind_funcs, v_if);
419 :
420 29 : tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
421 29 : vec_safe_length (v_v));
422 29 : tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
423 29 : num_funcs);
424 29 : tree ind_funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
425 29 : num_ind_funcs);
426 :
427 29 : SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
428 29 : SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
429 29 : SET_TYPE_ALIGN (ind_funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
430 29 : tree ctor_v = build_constructor (vars_decl_type, v_v);
431 29 : tree ctor_f = build_constructor (funcs_decl_type, v_f);
432 29 : tree ctor_if = build_constructor (ind_funcs_decl_type, v_if);
433 29 : TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = TREE_CONSTANT (ctor_if) = 1;
434 29 : TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = TREE_STATIC (ctor_if) = 1;
435 29 : tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
436 : get_identifier (".offload_func_table"),
437 : funcs_decl_type);
438 29 : tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
439 : get_identifier (".offload_var_table"),
440 : vars_decl_type);
441 29 : tree ind_funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
442 : get_identifier (".offload_ind_func_table"),
443 : ind_funcs_decl_type);
444 29 : TREE_STATIC (funcs_decl) = TREE_STATIC (ind_funcs_decl) = 1;
445 29 : TREE_STATIC (vars_decl) = 1;
446 : /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
447 : otherwise a joint table in a binary will contain padding between
448 : tables from multiple object files. */
449 29 : DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (ind_funcs_decl) = 1;
450 29 : DECL_USER_ALIGN (vars_decl) = 1;
451 29 : SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
452 29 : SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
453 29 : SET_DECL_ALIGN (ind_funcs_decl, TYPE_ALIGN (ind_funcs_decl_type));
454 29 : DECL_INITIAL (funcs_decl) = ctor_f;
455 29 : DECL_INITIAL (vars_decl) = ctor_v;
456 29 : DECL_INITIAL (ind_funcs_decl) = ctor_if;
457 29 : set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
458 29 : set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
459 29 : set_decl_section_name (ind_funcs_decl,
460 : OFFLOAD_IND_FUNC_TABLE_SECTION_NAME);
461 29 : varpool_node::finalize_decl (vars_decl);
462 29 : varpool_node::finalize_decl (funcs_decl);
463 29 : varpool_node::finalize_decl (ind_funcs_decl);
464 : }
465 : else
466 : {
467 0 : for (unsigned i = 0; i < num_funcs; i++)
468 : {
469 0 : tree it = (*offload_funcs)[i];
470 : /* See also add_decls_addresses_to_decl_constructor
471 : and output_offload_tables in lto-cgraph.cc. */
472 0 : if (!in_lto_p && !symtab_node::get (it))
473 0 : continue;
474 0 : targetm.record_offload_symbol (it);
475 : }
476 0 : for (unsigned i = 0; i < num_vars; i++)
477 : {
478 0 : tree it = (*offload_vars)[i];
479 0 : if (!in_lto_p && !symtab_node::get (it))
480 0 : continue;
481 : #ifdef ACCEL_COMPILER
482 : if (DECL_HAS_VALUE_EXPR_P (it)
483 : && lookup_attribute ("omp declare target link",
484 : DECL_ATTRIBUTES (it)))
485 : {
486 : tree value_expr = DECL_VALUE_EXPR (it);
487 : tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
488 : targetm.record_offload_symbol (link_ptr_decl);
489 : varpool_node::finalize_decl (link_ptr_decl);
490 : }
491 : else
492 : #endif
493 0 : targetm.record_offload_symbol (it);
494 : }
495 0 : for (unsigned i = 0; i < num_ind_funcs; i++)
496 : {
497 0 : tree it = (*offload_ind_funcs)[i];
498 : /* See also add_decls_addresses_to_decl_constructor
499 : and output_offload_tables in lto-cgraph.cc. */
500 0 : if (!in_lto_p && !symtab_node::get (it))
501 0 : continue;
502 0 : targetm.record_offload_symbol (it);
503 : }
504 : }
505 : }
506 :
507 : /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
508 : axis DIM. Return a tmp var holding the result. */
509 :
510 : static tree
511 30709 : oacc_dim_call (bool pos, int dim, gimple_seq *seq)
512 : {
513 30709 : tree arg = build_int_cst (unsigned_type_node, dim);
514 30709 : tree size = create_tmp_var (integer_type_node);
515 30709 : enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
516 30709 : gimple *call = gimple_build_call_internal (fn, 1, arg);
517 :
518 30709 : gimple_call_set_lhs (call, size);
519 30709 : gimple_seq_add_stmt (seq, call);
520 :
521 30709 : return size;
522 : }
523 :
524 : /* Find the number of threads (POS = false), or thread number (POS =
525 : true) for an OpenACC region partitioned as MASK. Setup code
526 : required for the calculation is added to SEQ. */
527 :
528 : static tree
529 23606 : oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
530 : {
531 23606 : tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
532 23606 : unsigned ix;
533 :
534 : /* Start at gang level, and examine relevant dimension indices. */
535 94424 : for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
536 70818 : if (GOMP_DIM_MASK (ix) & mask)
537 : {
538 26569 : if (res)
539 : {
540 : /* We had an outer index, so scale that by the size of
541 : this dimension. */
542 17369 : tree n = oacc_dim_call (false, ix, seq);
543 17369 : res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
544 : }
545 26569 : if (pos)
546 : {
547 : /* Determine index in this dimension. */
548 13340 : tree id = oacc_dim_call (true, ix, seq);
549 13340 : if (res)
550 4140 : res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
551 : else
552 : res = id;
553 : }
554 : }
555 :
556 23606 : if (res == NULL_TREE)
557 2655 : res = integer_zero_node;
558 :
559 23606 : return res;
560 : }
561 :
562 : /* Transform IFN_GOACC_LOOP calls to actual code. See
563 : expand_oacc_for for where these are generated. At the vector
564 : level, we stride loops, such that each member of a warp will
565 : operate on adjacent iterations. At the worker and gang level,
566 : each gang/warp executes a set of contiguous iterations. Chunking
567 : can override this such that each iteration engine executes a
568 : contiguous chunk, and then moves on to stride to the next chunk. */
569 :
570 : static void
571 46694 : oacc_xform_loop (gcall *call)
572 : {
573 46694 : gimple_stmt_iterator gsi = gsi_for_stmt (call);
574 46694 : enum ifn_goacc_loop_kind code
575 46694 : = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
576 46694 : tree dir = gimple_call_arg (call, 1);
577 46694 : tree range = gimple_call_arg (call, 2);
578 46694 : tree step = gimple_call_arg (call, 3);
579 46694 : tree chunk_size = NULL_TREE;
580 46694 : unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
581 46694 : tree lhs = gimple_call_lhs (call);
582 46694 : tree type = NULL_TREE;
583 46694 : tree diff_type = TREE_TYPE (range);
584 46694 : tree r = NULL_TREE;
585 46694 : gimple_seq seq = NULL;
586 46694 : bool chunking = false, striding = true;
587 46694 : unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
588 46694 : unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
589 :
590 : /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
591 46694 : if (!lhs)
592 : {
593 8 : gsi_replace_with_seq (&gsi, seq, true);
594 8 : return;
595 : }
596 :
597 46686 : type = TREE_TYPE (lhs);
598 :
599 : #ifdef ACCEL_COMPILER
600 : chunk_size = gimple_call_arg (call, 4);
601 : if (integer_minus_onep (chunk_size) /* Force static allocation. */
602 : || integer_zerop (chunk_size)) /* Default (also static). */
603 : {
604 : /* If we're at the gang level, we want each to execute a
605 : contiguous run of iterations. Otherwise we want each element
606 : to stride. */
607 : striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
608 : chunking = false;
609 : }
610 : else
611 : {
612 : /* Chunk of size 1 is striding. */
613 : striding = integer_onep (chunk_size);
614 : chunking = !striding;
615 : }
616 : #endif
617 :
618 : /* striding=true, chunking=true
619 : -> invalid.
620 : striding=true, chunking=false
621 : -> chunks=1
622 : striding=false,chunking=true
623 : -> chunks=ceil (range/(chunksize*threads*step))
624 : striding=false,chunking=false
625 : -> chunk_size=ceil(range/(threads*step)),chunks=1 */
626 46686 : push_gimplify_context (true);
627 :
628 46686 : switch (code)
629 : {
630 0 : default: gcc_unreachable ();
631 :
632 11229 : case IFN_GOACC_LOOP_CHUNKS:
633 11229 : if (!chunking)
634 11229 : r = build_int_cst (type, 1);
635 : else
636 : {
637 : /* chunk_max
638 : = (range - dir) / (chunks * step * num_threads) + dir */
639 : tree per = oacc_thread_numbers (false, mask, &seq);
640 : per = fold_convert (type, per);
641 : chunk_size = fold_convert (type, chunk_size);
642 : per = fold_build2 (MULT_EXPR, type, per, chunk_size);
643 : per = fold_build2 (MULT_EXPR, type, per, step);
644 : r = build2 (MINUS_EXPR, type, range, dir);
645 : r = build2 (PLUS_EXPR, type, r, per);
646 : r = build2 (TRUNC_DIV_EXPR, type, r, per);
647 : }
648 : break;
649 :
650 11751 : case IFN_GOACC_LOOP_STEP:
651 11751 : {
652 : /* If striding, step by the entire compute volume, otherwise
653 : step by the inner volume. */
654 11751 : unsigned volume = striding ? mask : inner_mask;
655 :
656 11751 : r = oacc_thread_numbers (false, volume, &seq);
657 11751 : r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
658 : }
659 11751 : break;
660 :
661 11855 : case IFN_GOACC_LOOP_OFFSET:
662 : /* Enable vectorization on non-SIMT targets. */
663 11855 : if (!targetm.simt.vf
664 11855 : && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
665 : /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
666 : the loop. */
667 1951 : && (flag_tree_loop_vectorize
668 1463 : || !OPTION_SET_P (flag_tree_loop_vectorize)))
669 : {
670 1951 : basic_block bb = gsi_bb (gsi);
671 1951 : class loop *parent = bb->loop_father;
672 1951 : class loop *body = parent->inner;
673 :
674 1951 : parent->force_vectorize = true;
675 1951 : parent->safelen = INT_MAX;
676 :
677 : /* "Chunking loops" may have inner loops. */
678 1951 : if (parent->inner)
679 : {
680 1939 : body->force_vectorize = true;
681 1939 : body->safelen = INT_MAX;
682 : }
683 :
684 1951 : cfun->has_force_vectorize_loops = true;
685 : }
686 11855 : if (striding)
687 : {
688 11855 : r = oacc_thread_numbers (true, mask, &seq);
689 11855 : r = fold_convert (diff_type, r);
690 : }
691 : else
692 : {
693 : tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
694 : tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
695 : tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
696 : inner_size, outer_size);
697 :
698 : volume = fold_convert (diff_type, volume);
699 : if (chunking)
700 : chunk_size = fold_convert (diff_type, chunk_size);
701 : else
702 : {
703 : tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
704 :
705 : chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
706 : chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
707 : chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
708 : }
709 :
710 : tree span = build2 (MULT_EXPR, diff_type, chunk_size,
711 : fold_convert (diff_type, inner_size));
712 : r = oacc_thread_numbers (true, outer_mask, &seq);
713 : r = fold_convert (diff_type, r);
714 : r = build2 (MULT_EXPR, diff_type, r, span);
715 :
716 : tree inner = oacc_thread_numbers (true, inner_mask, &seq);
717 : inner = fold_convert (diff_type, inner);
718 : r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
719 :
720 : if (chunking)
721 : {
722 : tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
723 : tree per
724 : = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
725 : per = build2 (MULT_EXPR, diff_type, per, chunk);
726 :
727 : r = build2 (PLUS_EXPR, diff_type, r, per);
728 : }
729 : }
730 11855 : r = fold_build2 (MULT_EXPR, diff_type, r, step);
731 11855 : if (type != diff_type)
732 178 : r = fold_convert (type, r);
733 : break;
734 :
735 11851 : case IFN_GOACC_LOOP_BOUND:
736 11851 : if (striding)
737 11851 : r = range;
738 : else
739 : {
740 : tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
741 : tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
742 : tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
743 : inner_size, outer_size);
744 :
745 : volume = fold_convert (diff_type, volume);
746 : if (chunking)
747 : chunk_size = fold_convert (diff_type, chunk_size);
748 : else
749 : {
750 : tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
751 :
752 : chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
753 : chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
754 : chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
755 : }
756 :
757 : tree span = build2 (MULT_EXPR, diff_type, chunk_size,
758 : fold_convert (diff_type, inner_size));
759 :
760 : r = fold_build2 (MULT_EXPR, diff_type, span, step);
761 :
762 : tree offset = gimple_call_arg (call, 6);
763 : r = build2 (PLUS_EXPR, diff_type, r,
764 : fold_convert (diff_type, offset));
765 : r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
766 : diff_type, r, range);
767 : }
768 11851 : if (diff_type != type)
769 178 : r = fold_convert (type, r);
770 : break;
771 : }
772 :
773 46686 : gimplify_assign (lhs, r, &seq);
774 :
775 46686 : pop_gimplify_context (NULL);
776 :
777 46686 : gsi_replace_with_seq (&gsi, seq, true);
778 : }
779 :
780 : /* Transform a GOACC_TILE call. Determines the element loop span for
781 : the specified loop of the nest. This is 1 if we're not tiling.
782 :
783 : GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
784 :
785 : static void
786 284 : oacc_xform_tile (gcall *call)
787 : {
788 284 : gimple_stmt_iterator gsi = gsi_for_stmt (call);
789 284 : unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
790 : /* Inner loops have higher loop_nos. */
791 284 : unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
792 284 : tree tile_size = gimple_call_arg (call, 2);
793 284 : unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
794 284 : tree lhs = gimple_call_lhs (call);
795 284 : tree type = TREE_TYPE (lhs);
796 284 : gimple_seq seq = NULL;
797 284 : tree span = build_int_cst (type, 1);
798 :
799 284 : gcc_assert (!(e_mask
800 : & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
801 : | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
802 284 : push_gimplify_context (!seen_error ());
803 :
804 : #ifndef ACCEL_COMPILER
805 : /* Partitioning disabled on host compilers. */
806 284 : e_mask = 0;
807 : #endif
808 284 : if (!e_mask)
809 : /* Not paritioning. */
810 284 : span = integer_one_node;
811 : else if (!integer_zerop (tile_size))
812 : /* User explicitly specified size. */
813 : span = tile_size;
814 : else
815 : {
816 : /* Pick a size based on the paritioning of the element loop and
817 : the number of loop nests. */
818 : tree first_size = NULL_TREE;
819 : tree second_size = NULL_TREE;
820 :
821 : if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
822 : first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
823 : if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
824 : second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
825 :
826 : if (!first_size)
827 : {
828 : first_size = second_size;
829 : second_size = NULL_TREE;
830 : }
831 :
832 : if (loop_no + 1 == collapse)
833 : {
834 : span = first_size;
835 : if (!loop_no && second_size)
836 : span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
837 : span, second_size);
838 : }
839 : else if (loop_no + 2 == collapse)
840 : span = second_size;
841 : else
842 : span = NULL_TREE;
843 :
844 : if (!span)
845 : /* There's no obvious element size for this loop. Options
846 : are 1, first_size or some non-unity constant (32 is my
847 : favourite). We should gather some statistics. */
848 : span = first_size;
849 : }
850 :
851 284 : span = fold_convert (type, span);
852 284 : gimplify_assign (lhs, span, &seq);
853 :
854 284 : pop_gimplify_context (NULL);
855 :
856 284 : gsi_replace_with_seq (&gsi, seq, true);
857 284 : }
858 :
859 : /* Default partitioned and minimum partitioned dimensions. */
860 :
861 : static int oacc_default_dims[GOMP_DIM_MAX];
862 : static int oacc_min_dims[GOMP_DIM_MAX];
863 :
864 : int
865 0 : oacc_get_default_dim (int dim)
866 : {
867 0 : gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
868 0 : return oacc_default_dims[dim];
869 : }
870 :
871 : int
872 0 : oacc_get_min_dim (int dim)
873 : {
874 0 : gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
875 0 : return oacc_min_dims[dim];
876 : }
877 :
878 : /* Parse the default dimension parameter. This is a set of
879 : :-separated optional compute dimensions. Each specified dimension
880 : is a positive integer. When device type support is added, it is
881 : planned to be a comma separated list of such compute dimensions,
882 : with all but the first prefixed by the colon-terminated device
883 : type. */
884 :
885 : static void
886 2279 : oacc_parse_default_dims (const char *dims)
887 : {
888 2279 : int ix;
889 :
890 9116 : for (ix = GOMP_DIM_MAX; ix--;)
891 : {
892 6837 : oacc_default_dims[ix] = -1;
893 6837 : oacc_min_dims[ix] = 1;
894 : }
895 :
896 : #ifndef ACCEL_COMPILER
897 : /* Cannot be overridden on the host. */
898 2279 : dims = NULL;
899 : #endif
900 2279 : if (dims)
901 : {
902 : const char *pos = dims;
903 :
904 : for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
905 : {
906 : if (ix)
907 : {
908 : if (*pos != ':')
909 : goto malformed;
910 : pos++;
911 : }
912 :
913 : if (*pos != ':')
914 : {
915 : long val;
916 : const char *eptr;
917 :
918 : errno = 0;
919 : val = strtol (pos, const_cast<char **> (&eptr), 10);
920 : if (errno || val <= 0 || (int) val != val)
921 : goto malformed;
922 : pos = eptr;
923 : oacc_default_dims[ix] = (int) val;
924 : }
925 : }
926 : if (*pos)
927 : {
928 : malformed:
929 : error_at (UNKNOWN_LOCATION,
930 : "%<-fopenacc-dim%> operand is malformed at %qs", pos);
931 : }
932 : }
933 :
934 : /* Allow the backend to validate the dimensions. */
935 2279 : targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
936 2279 : targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
937 2279 : }
938 :
939 : /* Validate and update the dimensions for offloaded FN. ATTRS is the
940 : raw attribute. DIMS is an array of dimensions, which is filled in.
941 : LEVEL is the partitioning level of a routine, or -1 for an offload
942 : region itself. USED is the mask of partitioned execution in the
943 : function. */
944 :
945 : static void
946 9876 : oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
947 : {
948 9876 : tree purpose[GOMP_DIM_MAX];
949 9876 : unsigned ix;
950 9876 : tree pos = TREE_VALUE (attrs);
951 :
952 : /* Make sure the attribute creator attached the dimension
953 : information. */
954 9876 : gcc_assert (pos);
955 :
956 39504 : for (ix = 0; ix != GOMP_DIM_MAX; ix++)
957 : {
958 29628 : purpose[ix] = TREE_PURPOSE (pos);
959 29628 : tree val = TREE_VALUE (pos);
960 29628 : dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
961 29628 : pos = TREE_CHAIN (pos);
962 : }
963 :
964 9876 : bool check = true;
965 : #ifdef ACCEL_COMPILER
966 : check = false;
967 : #endif
968 9876 : if (check
969 9876 : && warn_openacc_parallelism
970 1371 : && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn)))
971 : {
972 1268 : static char const *const axes[] =
973 : /* Must be kept in sync with GOMP_DIM enumeration. */
974 : { "gang", "worker", "vector" };
975 4775 : for (ix = level >= 0 ? level : 0; ix != GOMP_DIM_MAX; ix++)
976 3507 : if (dims[ix] < 0)
977 : ; /* Defaulting axis. */
978 1970 : else if ((used & GOMP_DIM_MASK (ix)) && dims[ix] == 1)
979 : /* There is partitioned execution, but the user requested a
980 : dimension size of 1. They're probably confused. */
981 94 : warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
982 : "region contains %s partitioned code but"
983 94 : " is not %s partitioned", axes[ix], axes[ix]);
984 1876 : else if (!(used & GOMP_DIM_MASK (ix)) && dims[ix] != 1)
985 : /* The dimension is explicitly partitioned to non-unity, but
986 : no use is made within the region. */
987 500 : warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
988 : "region is %s partitioned but"
989 : " does not contain %s partitioned code",
990 500 : axes[ix], axes[ix]);
991 : }
992 :
993 9876 : bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
994 :
995 : /* Default anything left to 1 or a partitioned default. */
996 49380 : for (ix = 0; ix != GOMP_DIM_MAX; ix++)
997 29628 : if (dims[ix] < 0)
998 : {
999 : /* The OpenACC spec says 'If the [num_gangs] clause is not
1000 : specified, an implementation-defined default will be used;
1001 : the default may depend on the code within the construct.'
1002 : (2.5.6). Thus an implementation is free to choose
1003 : non-unity default for a parallel region that doesn't have
1004 : any gang-partitioned loops. However, it appears that there
1005 : is a sufficient body of user code that expects non-gang
1006 : partitioned regions to not execute in gang-redundant mode.
1007 : So we (a) don't warn about the non-portability and (b) pick
1008 : the minimum permissible dimension size when there is no
1009 : partitioned execution. Otherwise we pick the global
1010 : default for the dimension, which the user can control. The
1011 : same wording and logic applies to num_workers and
1012 : vector_length, however the worker- or vector- single
1013 : execution doesn't have the same impact as gang-redundant
1014 : execution. (If the minimum gang-level partioning is not 1,
1015 : the target is probably too confusing.) */
1016 0 : dims[ix] = (used & GOMP_DIM_MASK (ix)
1017 0 : ? oacc_default_dims[ix] : oacc_min_dims[ix]);
1018 0 : changed = true;
1019 : }
1020 :
1021 9876 : if (changed)
1022 : {
1023 : /* Replace the attribute with new values. */
1024 : pos = NULL_TREE;
1025 35412 : for (ix = GOMP_DIM_MAX; ix--;)
1026 26559 : pos = tree_cons (purpose[ix],
1027 26559 : build_int_cst (integer_type_node, dims[ix]), pos);
1028 8853 : oacc_replace_fn_attrib (fn, pos);
1029 : }
1030 9876 : }
1031 :
1032 : /* Create an empty OpenACC loop structure at LOC. */
1033 :
1034 : static oacc_loop *
1035 21344 : new_oacc_loop_raw (oacc_loop *parent, location_t loc)
1036 : {
1037 10835 : oacc_loop *loop = XCNEW (oacc_loop);
1038 :
1039 21344 : loop->parent = parent;
1040 :
1041 10835 : if (parent)
1042 : {
1043 10835 : loop->sibling = parent->child;
1044 10835 : parent->child = loop;
1045 : }
1046 :
1047 21344 : loop->loc = loc;
1048 21344 : return loop;
1049 : }
1050 :
1051 : /* Create an outermost, dummy OpenACC loop for offloaded function
1052 : DECL. */
1053 :
1054 : static oacc_loop *
1055 9876 : new_oacc_loop_outer (tree decl)
1056 : {
1057 9876 : return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
1058 : }
1059 :
1060 : /* Start a new OpenACC loop structure beginning at head marker HEAD.
1061 : Link into PARENT loop. Return the new loop. */
1062 :
1063 : static oacc_loop *
1064 9634 : new_oacc_loop (oacc_loop *parent, gcall *marker)
1065 : {
1066 9634 : oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
1067 :
1068 9634 : loop->marker = marker;
1069 :
1070 : /* TODO: This is where device_type flattening would occur for the loop
1071 : flags. */
1072 :
1073 9634 : loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
1074 :
1075 9634 : tree chunk_size = integer_zero_node;
1076 9634 : if (loop->flags & OLF_GANG_STATIC)
1077 146 : chunk_size = gimple_call_arg (marker, 4);
1078 9634 : loop->chunk_size = chunk_size;
1079 :
1080 9634 : return loop;
1081 : }
1082 :
1083 : /* Create a dummy loop encompassing a call to a openACC routine.
1084 : Extract the routine's partitioning requirements. */
1085 :
1086 : static void
1087 1201 : new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
1088 : {
1089 1201 : oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
1090 1201 : int level = oacc_fn_attrib_level (attrs);
1091 :
1092 1201 : gcc_assert (level >= 0);
1093 :
1094 1201 : loop->marker = call;
1095 1201 : loop->routine = decl;
1096 1201 : loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
1097 1201 : ^ (GOMP_DIM_MASK (level) - 1));
1098 1201 : }
1099 :
1100 : /* Finish off the current OpenACC loop ending at tail marker TAIL.
1101 : Return the parent loop. */
1102 :
1103 : static oacc_loop *
1104 9634 : finish_oacc_loop (oacc_loop *loop)
1105 : {
1106 : /* If the loop has been collapsed, don't partition it. */
1107 0 : if (loop->ifns.is_empty ())
1108 0 : loop->mask = loop->flags = 0;
1109 9634 : return loop->parent;
1110 : }
1111 :
1112 : /* Free all OpenACC loop structures within LOOP (inclusive). */
1113 :
1114 : static void
1115 21344 : free_oacc_loop (oacc_loop *loop)
1116 : {
1117 21344 : if (loop->sibling)
1118 2194 : free_oacc_loop (loop->sibling);
1119 21344 : if (loop->child)
1120 8641 : free_oacc_loop (loop->child);
1121 :
1122 21344 : loop->ifns.release ();
1123 21344 : free (loop);
1124 21344 : }
1125 :
1126 : /* Dump out the OpenACC loop head or tail beginning at FROM. */
1127 :
1128 : static void
1129 238 : dump_oacc_loop_part (FILE *file, gcall *from, int depth,
1130 : const char *title, int level)
1131 : {
1132 238 : enum ifn_unique_kind kind
1133 238 : = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1134 :
1135 238 : fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
1136 238 : for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1137 : {
1138 719 : gimple *stmt = gsi_stmt (gsi);
1139 :
1140 719 : if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1141 : {
1142 719 : enum ifn_unique_kind k
1143 719 : = ((enum ifn_unique_kind) TREE_INT_CST_LOW
1144 719 : (gimple_call_arg (stmt, 0)));
1145 :
1146 719 : if (k == kind && stmt != from)
1147 : break;
1148 : }
1149 481 : print_gimple_stmt (file, stmt, depth * 2 + 2);
1150 :
1151 481 : gsi_next (&gsi);
1152 962 : while (gsi_end_p (gsi))
1153 962 : gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1154 : }
1155 238 : }
1156 :
1157 : /* Dump OpenACC loop LOOP, its children, and its siblings. */
1158 :
1159 : static void
1160 183 : dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
1161 : {
1162 222 : int ix;
1163 :
1164 222 : fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
1165 : loop->flags, loop->mask,
1166 222 : LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
1167 :
1168 222 : if (loop->marker)
1169 108 : print_gimple_stmt (file, loop->marker, depth * 2);
1170 :
1171 222 : if (loop->routine)
1172 48 : fprintf (file, "%*sRoutine %s:%u:%s\n",
1173 48 : depth * 2, "", DECL_SOURCE_FILE (loop->routine),
1174 96 : DECL_SOURCE_LINE (loop->routine),
1175 48 : IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
1176 :
1177 888 : for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
1178 666 : if (loop->heads[ix])
1179 119 : dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
1180 888 : for (ix = GOMP_DIM_MAX; ix--;)
1181 666 : if (loop->tails[ix])
1182 119 : dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
1183 :
1184 222 : if (loop->child)
1185 69 : dump_oacc_loop (file, loop->child, depth + 1);
1186 222 : if (loop->sibling)
1187 : dump_oacc_loop (file, loop->sibling, depth);
1188 183 : }
1189 :
1190 : void debug_oacc_loop (oacc_loop *);
1191 :
1192 : /* Dump loops to stderr. */
1193 :
1194 : DEBUG_FUNCTION void
1195 0 : debug_oacc_loop (oacc_loop *loop)
1196 : {
1197 0 : dump_oacc_loop (stderr, loop, 0);
1198 0 : }
1199 :
1200 : /* Provide diagnostics on OpenACC loop LOOP, its children, and its
1201 : siblings. */
1202 :
1203 : static void
1204 2741 : inform_oacc_loop (const oacc_loop *loop)
1205 : {
1206 1796 : const char *gang
1207 2741 : = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
1208 2226 : const char *worker
1209 2741 : = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
1210 1747 : const char *vector
1211 2741 : = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
1212 2741 : const char *seq = loop->mask == 0 ? " seq" : "";
1213 2741 : const dump_user_location_t loc
1214 2741 : = dump_user_location_t::from_location_t (loop->loc);
1215 2741 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1216 : "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
1217 : vector, seq);
1218 :
1219 2741 : if (loop->child)
1220 698 : inform_oacc_loop (loop->child);
1221 2741 : if (loop->sibling)
1222 280 : inform_oacc_loop (loop->sibling);
1223 2741 : }
1224 :
1225 : /* DFS walk of basic blocks BB onwards, creating OpenACC loop
1226 : structures as we go. By construction these loops are properly
1227 : nested. */
1228 :
1229 : static void
1230 176084 : oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
1231 : {
1232 176084 : int marker = 0;
1233 176084 : int remaining = 0;
1234 :
1235 176084 : if (bb->flags & BB_VISITED)
1236 40096 : return;
1237 :
1238 135988 : follow:
1239 202173 : bb->flags |= BB_VISITED;
1240 :
1241 : /* Scan for loop markers. */
1242 789432 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1243 385086 : gsi_next (&gsi))
1244 : {
1245 385086 : gimple *stmt = gsi_stmt (gsi);
1246 :
1247 385086 : if (!is_gimple_call (stmt))
1248 220759 : continue;
1249 :
1250 169106 : gcall *call = as_a <gcall *> (stmt);
1251 :
1252 : /* If this is a routine, make a dummy loop for it. */
1253 169106 : if (tree decl = gimple_call_fndecl (call))
1254 4777 : if (tree attrs = oacc_get_fn_attrib (decl))
1255 : {
1256 1201 : gcc_assert (!marker);
1257 1201 : new_oacc_loop_routine (loop, call, decl, attrs);
1258 : }
1259 :
1260 169106 : if (!gimple_call_internal_p (call))
1261 4779 : continue;
1262 :
1263 164327 : switch (gimple_call_internal_fn (call))
1264 : {
1265 : default:
1266 : break;
1267 :
1268 46978 : case IFN_GOACC_LOOP:
1269 46978 : case IFN_GOACC_TILE:
1270 : /* Record the abstraction function, so we can manipulate it
1271 : later. */
1272 46978 : loop->ifns.safe_push (call);
1273 46978 : break;
1274 :
1275 85483 : case IFN_UNIQUE:
1276 85483 : enum ifn_unique_kind kind
1277 85483 : = (enum ifn_unique_kind) (TREE_INT_CST_LOW
1278 85483 : (gimple_call_arg (call, 0)));
1279 85483 : if (kind == IFN_UNIQUE_OACC_HEAD_MARK
1280 85483 : || kind == IFN_UNIQUE_OACC_TAIL_MARK)
1281 : {
1282 52246 : if (gimple_call_num_args (call) == 2)
1283 : {
1284 19268 : gcc_assert (marker && !remaining);
1285 19268 : marker = 0;
1286 19268 : if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
1287 19268 : loop = finish_oacc_loop (loop);
1288 : else
1289 9634 : loop->head_end = call;
1290 : }
1291 : else
1292 : {
1293 32978 : int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
1294 :
1295 32978 : if (!marker)
1296 : {
1297 19268 : if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1298 9634 : loop = new_oacc_loop (loop, call);
1299 : remaining = count;
1300 : }
1301 32978 : gcc_assert (count == remaining);
1302 32978 : if (remaining)
1303 : {
1304 32978 : remaining--;
1305 32978 : if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1306 16489 : loop->heads[marker] = call;
1307 : else
1308 16489 : loop->tails[remaining] = call;
1309 : }
1310 32978 : marker++;
1311 : }
1312 : }
1313 : }
1314 : }
1315 202173 : if (remaining || marker)
1316 : {
1317 66185 : bb = single_succ (bb);
1318 66185 : gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
1319 66185 : goto follow;
1320 : }
1321 :
1322 : /* Walk successor blocks. */
1323 135988 : edge e;
1324 135988 : edge_iterator ei;
1325 :
1326 302196 : FOR_EACH_EDGE (e, ei, bb->succs)
1327 166208 : oacc_loop_discover_walk (loop, e->dest);
1328 : }
1329 :
1330 : /* LOOP is the first sibling. Reverse the order in place and return
1331 : the new first sibling. Recurse to child loops. */
1332 :
1333 : static oacc_loop *
1334 18517 : oacc_loop_sibling_nreverse (oacc_loop *loop)
1335 : {
1336 18517 : oacc_loop *last = NULL;
1337 20711 : do
1338 : {
1339 20711 : if (loop->child)
1340 8641 : loop->child = oacc_loop_sibling_nreverse (loop->child);
1341 :
1342 20711 : oacc_loop *next = loop->sibling;
1343 20711 : loop->sibling = last;
1344 20711 : last = loop;
1345 20711 : loop = next;
1346 : }
1347 20711 : while (loop);
1348 :
1349 18517 : return last;
1350 : }
1351 :
1352 : /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1353 : the current function. */
1354 :
1355 : static oacc_loop *
1356 9876 : oacc_loop_discovery ()
1357 : {
1358 : /* Clear basic block flags, in particular BB_VISITED which we're going to use
1359 : in the following. */
1360 9876 : clear_bb_flags ();
1361 :
1362 9876 : oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1363 9876 : oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1364 :
1365 : /* The siblings were constructed in reverse order, reverse them so
1366 : that diagnostics come out in an unsurprising order. */
1367 9876 : top = oacc_loop_sibling_nreverse (top);
1368 :
1369 9876 : return top;
1370 : }
1371 :
1372 : /* Transform the abstract internal function markers starting at FROM
1373 : to be for partitioning level LEVEL. Stop when we meet another HEAD
1374 : or TAIL marker. */
1375 :
1376 : static void
1377 25614 : oacc_loop_xform_head_tail (gcall *from, int level)
1378 : {
1379 25614 : enum ifn_unique_kind kind
1380 25614 : = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1381 25614 : tree replacement = build_int_cst (unsigned_type_node, level);
1382 :
1383 25614 : for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1384 : {
1385 107435 : gimple *stmt = gsi_stmt (gsi);
1386 :
1387 107435 : if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1388 : {
1389 77068 : enum ifn_unique_kind k
1390 : = ((enum ifn_unique_kind)
1391 77068 : TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1392 :
1393 77068 : if (k == IFN_UNIQUE_OACC_FORK
1394 77068 : || k == IFN_UNIQUE_OACC_JOIN
1395 77068 : || k == IFN_UNIQUE_OACC_PRIVATE)
1396 25840 : *gimple_call_arg_ptr (stmt, 2) = replacement;
1397 51228 : else if (k == kind && stmt != from)
1398 : break;
1399 : }
1400 30367 : else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1401 19228 : *gimple_call_arg_ptr (stmt, 3) = replacement;
1402 81821 : update_stmt (stmt);
1403 :
1404 81821 : gsi_next (&gsi);
1405 133275 : while (gsi_end_p (gsi))
1406 102908 : gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1407 : }
1408 25614 : }
1409 :
1410 : /* Process the discovered OpenACC loops, setting the correct
1411 : partitioning level etc. */
1412 :
1413 : static void
1414 20711 : oacc_loop_process (oacc_loop *loop, int fn_level)
1415 : {
1416 20711 : if (loop->child)
1417 8641 : oacc_loop_process (loop->child, fn_level);
1418 :
1419 20711 : if (loop->mask && !loop->routine)
1420 : {
1421 8645 : int ix;
1422 8645 : tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1423 8645 : tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
1424 8645 : tree chunk_arg = loop->chunk_size;
1425 8645 : gcall *call;
1426 :
1427 43708 : for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1428 : {
1429 35063 : switch (gimple_call_internal_fn (call))
1430 : {
1431 34857 : case IFN_GOACC_LOOP:
1432 34857 : {
1433 34857 : bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1434 69335 : gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1435 34857 : if (!is_e)
1436 34478 : gimple_call_set_arg (call, 4, chunk_arg);
1437 : }
1438 : break;
1439 :
1440 206 : case IFN_GOACC_TILE:
1441 206 : gimple_call_set_arg (call, 3, mask_arg);
1442 206 : gimple_call_set_arg (call, 4, e_mask_arg);
1443 206 : break;
1444 :
1445 0 : default:
1446 0 : gcc_unreachable ();
1447 : }
1448 35063 : update_stmt (call);
1449 : }
1450 :
1451 8645 : unsigned dim = GOMP_DIM_GANG;
1452 8645 : unsigned mask = loop->mask | loop->e_mask;
1453 21452 : for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1454 : {
1455 25113 : while (!(GOMP_DIM_MASK (dim) & mask))
1456 12306 : dim++;
1457 :
1458 12807 : oacc_loop_xform_head_tail (loop->heads[ix], dim);
1459 12807 : oacc_loop_xform_head_tail (loop->tails[ix], dim);
1460 :
1461 12807 : mask ^= GOMP_DIM_MASK (dim);
1462 : }
1463 : }
1464 :
1465 20711 : if (loop->sibling)
1466 2194 : oacc_loop_process (loop->sibling, fn_level);
1467 :
1468 :
1469 : /* OpenACC 2.6, 2.9.11. "reduction clause" places a restriction such that
1470 : "The 'reduction' clause may not be specified on an orphaned 'loop'
1471 : construct with the 'gang' clause, or on an orphaned 'loop' construct that
1472 : will generate gang parallelism in a procedure that is compiled with the
1473 : 'routine gang' clause." */
1474 20711 : if (fn_level == GOMP_DIM_GANG
1475 624 : && (loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1476 209 : && (loop->flags & OLF_REDUCTION))
1477 106 : error_at (loop->loc,
1478 : "gang reduction on an orphan loop");
1479 20711 : }
1480 :
1481 : /* Walk the OpenACC loop heirarchy checking and assigning the
1482 : programmer-specified partitionings. OUTER_MASK is the partitioning
1483 : this loop is contained within. Return mask of partitioning
1484 : encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1485 : bit. */
1486 :
1487 : static unsigned
1488 20711 : oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1489 : {
1490 20711 : unsigned this_mask = loop->mask;
1491 20711 : unsigned mask_all = 0;
1492 20711 : bool noisy = true;
1493 :
1494 : #ifdef ACCEL_COMPILER
1495 : /* When device_type is supported, we want the device compiler to be
1496 : noisy, if the loop parameters are device_type-specific. */
1497 : noisy = false;
1498 : #endif
1499 :
1500 20711 : if (!loop->routine)
1501 : {
1502 19510 : bool auto_par = (loop->flags & OLF_AUTO) != 0;
1503 19510 : bool seq_par = (loop->flags & OLF_SEQ) != 0;
1504 19510 : bool tiling = (loop->flags & OLF_TILE) != 0;
1505 :
1506 19510 : this_mask = ((loop->flags >> OLF_DIM_BASE)
1507 : & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1508 :
1509 : /* Apply auto partitioning if this is a non-partitioned regular
1510 : loop, or (no more than) single axis tiled loop. */
1511 39020 : bool maybe_auto
1512 19510 : = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1513 :
1514 19510 : if ((this_mask != 0) + auto_par + seq_par > 1)
1515 : {
1516 170 : if (noisy)
1517 250 : error_at (loop->loc,
1518 : seq_par
1519 : ? G_("%<seq%> overrides other OpenACC loop specifiers")
1520 : : G_("%<auto%> conflicts with other OpenACC loop "
1521 : "specifiers"));
1522 170 : maybe_auto = false;
1523 170 : loop->flags &= ~OLF_AUTO;
1524 170 : if (seq_par)
1525 : {
1526 90 : loop->flags
1527 90 : &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
1528 90 : this_mask = 0;
1529 : }
1530 : }
1531 :
1532 19430 : if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1533 : {
1534 5699 : loop->flags |= OLF_AUTO;
1535 5699 : mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1536 : }
1537 : }
1538 :
1539 20711 : if (this_mask & outer_mask)
1540 : {
1541 248 : const oacc_loop *outer;
1542 350 : for (outer = loop->parent; outer; outer = outer->parent)
1543 248 : if ((outer->mask | outer->e_mask) & this_mask)
1544 : break;
1545 :
1546 248 : if (noisy)
1547 : {
1548 248 : if (outer)
1549 : {
1550 146 : error_at (loop->loc,
1551 146 : loop->routine
1552 : ? G_("routine call uses same OpenACC parallelism"
1553 : " as containing loop")
1554 : : G_("inner loop uses same OpenACC parallelism"
1555 : " as containing loop"));
1556 146 : inform (outer->loc, "containing loop here");
1557 : }
1558 : else
1559 102 : error_at (loop->loc,
1560 102 : loop->routine
1561 : ? G_("routine call uses OpenACC parallelism disallowed"
1562 : " by containing routine")
1563 : : G_("loop uses OpenACC parallelism disallowed"
1564 : " by containing routine"));
1565 :
1566 248 : if (loop->routine)
1567 154 : inform (DECL_SOURCE_LOCATION (loop->routine),
1568 : "routine %qD declared here", loop->routine);
1569 : }
1570 248 : this_mask &= ~outer_mask;
1571 : }
1572 : else
1573 : {
1574 20463 : unsigned outermost = least_bit_hwi (this_mask);
1575 :
1576 20463 : if (outermost && outermost <= outer_mask)
1577 : {
1578 40 : if (noisy)
1579 : {
1580 40 : error_at (loop->loc,
1581 : "incorrectly nested OpenACC loop parallelism");
1582 :
1583 40 : const oacc_loop *outer;
1584 40 : for (outer = loop->parent;
1585 40 : outer->flags && outer->flags < outermost;
1586 0 : outer = outer->parent)
1587 0 : continue;
1588 40 : inform (outer->loc, "containing loop here");
1589 0 : }
1590 :
1591 40 : this_mask &= ~outermost;
1592 : }
1593 : }
1594 :
1595 20711 : mask_all |= this_mask;
1596 :
1597 20711 : if (loop->flags & OLF_TILE)
1598 : {
1599 : /* When tiling, vector goes to the element loop, and failing
1600 : that we put worker there. The std doesn't contemplate
1601 : specifying all three. We choose to put worker and vector on
1602 : the element loops in that case. */
1603 136 : unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1604 136 : if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1605 120 : this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1606 :
1607 136 : loop->e_mask = this_e_mask;
1608 136 : this_mask ^= this_e_mask;
1609 : }
1610 :
1611 20711 : loop->mask = this_mask;
1612 :
1613 20711 : if (dump_file)
1614 222 : fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1615 444 : LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1616 : loop->mask, loop->e_mask);
1617 :
1618 20711 : if (loop->child)
1619 : {
1620 8641 : unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1621 8641 : loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
1622 8641 : mask_all |= loop->inner;
1623 : }
1624 :
1625 20711 : if (loop->sibling)
1626 2194 : mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1627 :
1628 20711 : return mask_all;
1629 : }
1630 :
1631 : /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1632 : OUTER_MASK is the partitioning this loop is contained within.
1633 : OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1634 : Return the cumulative partitioning used by this loop, siblings and
1635 : children. */
1636 :
1637 : static unsigned
1638 10192 : oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1639 : bool outer_assign)
1640 : {
1641 10192 : bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1642 10192 : bool noisy = true;
1643 10192 : bool tiling = loop->flags & OLF_TILE;
1644 :
1645 : #ifdef ACCEL_COMPILER
1646 : /* When device_type is supported, we want the device compiler to be
1647 : noisy, if the loop parameters are device_type-specific. */
1648 : noisy = false;
1649 : #endif
1650 :
1651 10192 : if (assign && (!outer_assign || loop->inner))
1652 : {
1653 : /* Allocate outermost and non-innermost loops at the outermost
1654 : non-innermost available level. */
1655 : unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1656 :
1657 : /* Find the first outermost available partition. */
1658 6659 : while (this_mask <= outer_mask)
1659 1923 : this_mask <<= 1;
1660 :
1661 : /* Grab two axes if tiling, and we've not assigned anything */
1662 4736 : if (tiling && !(loop->mask | loop->e_mask))
1663 94 : this_mask |= this_mask << 1;
1664 :
1665 : /* Prohibit the innermost partitioning at the moment. */
1666 4736 : this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
1667 :
1668 : /* Don't use any dimension explicitly claimed by an inner loop. */
1669 4736 : this_mask &= ~loop->inner;
1670 :
1671 4736 : if (tiling && !loop->e_mask)
1672 : {
1673 : /* If we got two axes, allocate the inner one to the element
1674 : loop. */
1675 98 : loop->e_mask = this_mask & (this_mask << 1);
1676 98 : this_mask ^= loop->e_mask;
1677 : }
1678 :
1679 4736 : loop->mask |= this_mask;
1680 : }
1681 :
1682 10192 : if (loop->child)
1683 : {
1684 5360 : unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1685 5360 : loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1686 5360 : outer_assign | assign);
1687 : }
1688 :
1689 10192 : if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
1690 : {
1691 : /* Allocate the loop at the innermost available level. Note
1692 : that we do this even if we already assigned this loop the
1693 : outermost available level above. That way we'll partition
1694 : this along 2 axes, if they are available. */
1695 5044 : unsigned this_mask = 0;
1696 :
1697 : /* Determine the outermost partitioning used within this loop. */
1698 5044 : this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1699 5044 : this_mask = least_bit_hwi (this_mask);
1700 :
1701 : /* Pick the partitioning just inside that one. */
1702 5044 : this_mask >>= 1;
1703 :
1704 : /* And avoid picking one use by an outer loop. */
1705 5044 : this_mask &= ~outer_mask;
1706 :
1707 : /* If tiling and we failed completely above, grab the next one
1708 : too. Making sure it doesn't hit an outer loop. */
1709 5044 : if (tiling)
1710 : {
1711 110 : this_mask &= ~(loop->e_mask | loop->mask);
1712 110 : unsigned tile_mask = ((this_mask >> 1)
1713 110 : & ~(outer_mask | loop->e_mask | loop->mask));
1714 :
1715 110 : if (tile_mask || loop->mask)
1716 : {
1717 102 : loop->e_mask |= this_mask;
1718 102 : this_mask = tile_mask;
1719 : }
1720 110 : if (!loop->e_mask && noisy)
1721 8 : warning_at (loop->loc, 0,
1722 : "insufficient partitioning available"
1723 : " to parallelize element loop");
1724 : }
1725 :
1726 5044 : loop->mask |= this_mask;
1727 5044 : if (!loop->mask && noisy)
1728 1078 : warning_at (loop->loc, 0,
1729 : tiling
1730 : ? G_("insufficient partitioning available"
1731 : " to parallelize tile loop")
1732 : : G_("insufficient partitioning available"
1733 : " to parallelize loop"));
1734 : }
1735 :
1736 5699 : if (assign && dump_file)
1737 41 : fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
1738 82 : LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1739 : loop->mask, loop->e_mask);
1740 :
1741 10192 : unsigned inner_mask = 0;
1742 :
1743 10192 : if (loop->sibling)
1744 1714 : inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1745 : outer_mask, outer_assign);
1746 :
1747 10192 : inner_mask |= loop->inner | loop->mask | loop->e_mask;
1748 :
1749 10192 : return inner_mask;
1750 : }
1751 :
1752 : /* Walk the OpenACC loop heirarchy to check and assign partitioning
1753 : axes. Return mask of partitioning. */
1754 :
1755 : static unsigned
1756 9876 : oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1757 : {
1758 9876 : unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1759 :
1760 9876 : if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1761 : {
1762 3118 : mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
1763 3118 : mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
1764 : }
1765 9876 : return mask_all;
1766 : }
1767 :
1768 : /* Default fork/join early expander. Delete the function calls if
1769 : there is no RTL expander. */
1770 :
1771 : bool
1772 25614 : default_goacc_fork_join (gcall *ARG_UNUSED (call),
1773 : const int *ARG_UNUSED (dims), bool is_fork)
1774 : {
1775 25614 : if (is_fork)
1776 12807 : return targetm.have_oacc_fork ();
1777 : else
1778 12807 : return targetm.have_oacc_join ();
1779 : }
1780 :
1781 : /* Default goacc.reduction early expander.
1782 :
1783 : LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1784 : If RES_PTR is not integer-zerop:
1785 : SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1786 : TEARDOWN - emit '*RES_PTR = VAR'
1787 : If LHS is not NULL
1788 : emit 'LHS = VAR' */
1789 :
1790 : void
1791 30884 : default_goacc_reduction (gcall *call)
1792 : {
1793 30884 : unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1794 30884 : gimple_stmt_iterator gsi = gsi_for_stmt (call);
1795 30884 : tree lhs = gimple_call_lhs (call);
1796 30884 : tree var = gimple_call_arg (call, 2);
1797 30884 : gimple_seq seq = NULL;
1798 :
1799 30884 : if (code == IFN_GOACC_REDUCTION_SETUP
1800 30884 : || code == IFN_GOACC_REDUCTION_TEARDOWN)
1801 : {
1802 : /* Setup and Teardown need to copy from/to the receiver object,
1803 : if there is one. */
1804 15442 : tree ref_to_res = gimple_call_arg (call, 1);
1805 :
1806 15442 : if (!integer_zerop (ref_to_res))
1807 : {
1808 5086 : tree dst = build_simple_mem_ref (ref_to_res);
1809 5086 : tree src = var;
1810 :
1811 5086 : if (code == IFN_GOACC_REDUCTION_SETUP)
1812 : {
1813 2543 : src = dst;
1814 2543 : dst = lhs;
1815 2543 : lhs = NULL;
1816 : }
1817 5086 : gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1818 : }
1819 : }
1820 :
1821 : /* Copy VAR to LHS, if there is an LHS. */
1822 30884 : if (lhs)
1823 26734 : gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1824 :
1825 30884 : gsi_replace_with_seq (&gsi, seq, true);
1826 30884 : }
1827 :
1828 : struct var_decl_rewrite_info
1829 : {
1830 : gimple *stmt;
1831 : hash_map<tree, tree> *adjusted_vars;
1832 : bool avoid_pointer_conversion;
1833 : bool modified;
1834 : };
1835 :
1836 : /* Helper function for execute_oacc_device_lower. Rewrite VAR_DECLs (by
1837 : themselves or wrapped in various other nodes) according to ADJUSTED_VARS in
1838 : the var_decl_rewrite_info pointed to via DATA. Used as part of coercing
1839 : gang-private variables in OpenACC offload regions to reside in GPU shared
1840 : memory. */
1841 :
1842 : static tree
1843 0 : oacc_rewrite_var_decl (tree *tp, int *walk_subtrees, void *data)
1844 : {
1845 0 : walk_stmt_info *wi = (walk_stmt_info *) data;
1846 0 : var_decl_rewrite_info *info = (var_decl_rewrite_info *) wi->info;
1847 :
1848 0 : if (TREE_CODE (*tp) == ADDR_EXPR)
1849 : {
1850 0 : tree arg = TREE_OPERAND (*tp, 0);
1851 0 : tree *new_arg = info->adjusted_vars->get (arg);
1852 :
1853 0 : if (new_arg)
1854 : {
1855 0 : if (info->avoid_pointer_conversion)
1856 : {
1857 0 : *tp = build_fold_addr_expr (*new_arg);
1858 0 : info->modified = true;
1859 0 : *walk_subtrees = 0;
1860 : }
1861 : else
1862 : {
1863 0 : gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
1864 0 : tree repl = build_fold_addr_expr (*new_arg);
1865 0 : gimple *stmt1
1866 0 : = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
1867 0 : tree conv = convert_to_pointer (TREE_TYPE (*tp),
1868 : gimple_assign_lhs (stmt1));
1869 0 : gimple *stmt2
1870 0 : = gimple_build_assign (make_ssa_name (TREE_TYPE (*tp)), conv);
1871 0 : gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1872 0 : gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
1873 0 : *tp = gimple_assign_lhs (stmt2);
1874 0 : info->modified = true;
1875 0 : *walk_subtrees = 0;
1876 : }
1877 : }
1878 : }
1879 0 : else if (TREE_CODE (*tp) == COMPONENT_REF || TREE_CODE (*tp) == ARRAY_REF)
1880 : {
1881 0 : tree *base = &TREE_OPERAND (*tp, 0);
1882 :
1883 0 : while (TREE_CODE (*base) == COMPONENT_REF
1884 0 : || TREE_CODE (*base) == ARRAY_REF)
1885 0 : base = &TREE_OPERAND (*base, 0);
1886 :
1887 0 : if (TREE_CODE (*base) != VAR_DECL)
1888 : return NULL;
1889 :
1890 0 : tree *new_decl = info->adjusted_vars->get (*base);
1891 0 : if (!new_decl)
1892 : return NULL;
1893 :
1894 0 : int base_quals = TYPE_QUALS (TREE_TYPE (*new_decl));
1895 0 : tree field = TREE_OPERAND (*tp, 1);
1896 :
1897 : /* Adjust the type of the field. */
1898 0 : int field_quals = TYPE_QUALS (TREE_TYPE (field));
1899 0 : if (TREE_CODE (field) == FIELD_DECL && field_quals != base_quals)
1900 : {
1901 0 : tree *field_type = &TREE_TYPE (field);
1902 0 : while (TREE_CODE (*field_type) == ARRAY_TYPE)
1903 0 : field_type = &TREE_TYPE (*field_type);
1904 0 : field_quals |= base_quals;
1905 0 : *field_type = build_qualified_type (*field_type, field_quals);
1906 : }
1907 :
1908 : /* Adjust the type of the component ref itself. */
1909 0 : tree comp_type = TREE_TYPE (*tp);
1910 0 : int comp_quals = TYPE_QUALS (comp_type);
1911 0 : if (TREE_CODE (*tp) == COMPONENT_REF && comp_quals != base_quals)
1912 : {
1913 0 : comp_quals |= base_quals;
1914 0 : TREE_TYPE (*tp)
1915 0 : = build_qualified_type (comp_type, comp_quals);
1916 : }
1917 :
1918 0 : *base = *new_decl;
1919 0 : info->modified = true;
1920 0 : }
1921 0 : else if (VAR_P (*tp))
1922 : {
1923 0 : tree *new_decl = info->adjusted_vars->get (*tp);
1924 0 : if (new_decl)
1925 : {
1926 0 : *tp = *new_decl;
1927 0 : info->modified = true;
1928 : }
1929 : }
1930 :
1931 : return NULL_TREE;
1932 : }
1933 :
1934 : /* Return TRUE if CALL is a call to a builtin atomic/sync operation. */
1935 :
1936 : static bool
1937 0 : is_sync_builtin_call (gcall *call)
1938 : {
1939 0 : tree callee = gimple_call_fndecl (call);
1940 :
1941 0 : if (callee != NULL_TREE
1942 0 : && gimple_call_builtin_p (call, BUILT_IN_NORMAL))
1943 0 : switch (DECL_FUNCTION_CODE (callee))
1944 : {
1945 : #undef DEF_SYNC_BUILTIN
1946 : #define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
1947 : #include "sync-builtins.def"
1948 : #undef DEF_SYNC_BUILTIN
1949 : return true;
1950 :
1951 : default:
1952 : ;
1953 : }
1954 :
1955 : return false;
1956 : }
1957 :
1958 : /* Main entry point for oacc transformations which run on the device
1959 : compiler after LTO, so we know what the target device is at this
1960 : point (including the host fallback). */
1961 :
1962 : static unsigned int
1963 15267 : execute_oacc_loop_designation ()
1964 : {
1965 15267 : tree attrs = oacc_get_fn_attrib (current_function_decl);
1966 :
1967 15267 : if (!attrs)
1968 : /* Not an offloaded function. */
1969 : return 0;
1970 :
1971 : /* Parse the default dim argument exactly once. */
1972 9938 : if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1973 : {
1974 2279 : oacc_parse_default_dims (flag_openacc_dims);
1975 2279 : flag_openacc_dims = (char *)&flag_openacc_dims;
1976 : }
1977 :
1978 9938 : bool is_oacc_parallel
1979 9938 : = (lookup_attribute ("oacc parallel",
1980 9938 : DECL_ATTRIBUTES (current_function_decl)) != NULL);
1981 9938 : bool is_oacc_kernels
1982 9938 : = (lookup_attribute ("oacc kernels",
1983 9938 : DECL_ATTRIBUTES (current_function_decl)) != NULL);
1984 9938 : bool is_oacc_serial
1985 9938 : = (lookup_attribute ("oacc serial",
1986 9938 : DECL_ATTRIBUTES (current_function_decl)) != NULL);
1987 9938 : bool is_oacc_parallel_kernels_parallelized
1988 9938 : = (lookup_attribute ("oacc parallel_kernels_parallelized",
1989 9938 : DECL_ATTRIBUTES (current_function_decl)) != NULL);
1990 9938 : bool is_oacc_parallel_kernels_gang_single
1991 9938 : = (lookup_attribute ("oacc parallel_kernels_gang_single",
1992 9938 : DECL_ATTRIBUTES (current_function_decl)) != NULL);
1993 9938 : int fn_level = oacc_fn_attrib_level (attrs);
1994 9938 : bool is_oacc_routine = (fn_level >= 0);
1995 9938 : gcc_checking_assert (is_oacc_parallel
1996 : + is_oacc_kernels
1997 : + is_oacc_serial
1998 : + is_oacc_parallel_kernels_parallelized
1999 : + is_oacc_parallel_kernels_gang_single
2000 : + is_oacc_routine
2001 : == 1);
2002 :
2003 9938 : bool is_oacc_kernels_parallelized
2004 9938 : = (lookup_attribute ("oacc kernels parallelized",
2005 9938 : DECL_ATTRIBUTES (current_function_decl)) != NULL);
2006 9938 : if (is_oacc_kernels_parallelized)
2007 386 : gcc_checking_assert (is_oacc_kernels);
2008 :
2009 9938 : if (dump_file)
2010 : {
2011 154 : if (is_oacc_parallel)
2012 34 : fprintf (dump_file, "Function is OpenACC parallel offload\n");
2013 120 : else if (is_oacc_kernels)
2014 66 : fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
2015 : (is_oacc_kernels_parallelized
2016 : ? "parallelized" : "unparallelized"));
2017 82 : else if (is_oacc_serial)
2018 26 : fprintf (dump_file, "Function is OpenACC serial offload\n");
2019 56 : else if (is_oacc_parallel_kernels_parallelized)
2020 0 : fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
2021 : "parallel_kernels_parallelized");
2022 56 : else if (is_oacc_parallel_kernels_gang_single)
2023 0 : fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
2024 : "parallel_kernels_gang_single");
2025 56 : else if (is_oacc_routine)
2026 56 : fprintf (dump_file, "Function is OpenACC routine level %d\n",
2027 : fn_level);
2028 : else
2029 0 : gcc_unreachable ();
2030 : }
2031 :
2032 : /* This doesn't belong into 'pass_oacc_loop_designation' conceptually, but
2033 : it's a convenient place, so... */
2034 9938 : if (is_oacc_routine)
2035 : {
2036 554 : tree attr = lookup_attribute ("omp declare target",
2037 554 : DECL_ATTRIBUTES (current_function_decl));
2038 554 : gcc_checking_assert (attr);
2039 554 : tree clauses = TREE_VALUE (attr);
2040 554 : gcc_checking_assert (clauses);
2041 :
2042 : /* Should this OpenACC routine be discarded? */
2043 554 : bool discard = false;
2044 :
2045 554 : tree clause_nohost = omp_find_clause (clauses, OMP_CLAUSE_NOHOST);
2046 554 : if (dump_file)
2047 56 : fprintf (dump_file,
2048 : "OpenACC routine '%s' %s '%s' clause.\n",
2049 56 : lang_hooks.decl_printable_name (current_function_decl, 2),
2050 : clause_nohost ? "has" : "doesn't have",
2051 56 : omp_clause_code_name[OMP_CLAUSE_NOHOST]);
2052 : /* Host compiler, 'nohost' clause? */
2053 : #ifndef ACCEL_COMPILER
2054 554 : if (clause_nohost)
2055 62 : discard = true;
2056 : #endif
2057 :
2058 554 : if (dump_file)
2059 112 : fprintf (dump_file,
2060 : "OpenACC routine '%s' %sdiscarded.\n",
2061 56 : lang_hooks.decl_printable_name (current_function_decl, 2),
2062 : discard ? "" : "not ");
2063 554 : if (discard)
2064 : {
2065 62 : TREE_ASM_WRITTEN (current_function_decl) = 1;
2066 62 : return TODO_discard_function;
2067 : }
2068 : }
2069 :
2070 : /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
2071 : kernels, so remove the parallelism dimensions function attributes
2072 : potentially set earlier on. */
2073 9876 : if (is_oacc_kernels && !is_oacc_kernels_parallelized)
2074 : {
2075 1261 : oacc_set_fn_attrib (current_function_decl, NULL, NULL);
2076 1261 : attrs = oacc_get_fn_attrib (current_function_decl);
2077 : }
2078 :
2079 : /* Discover, partition and process the loops. */
2080 9876 : oacc_loop *loops = oacc_loop_discovery ();
2081 :
2082 9876 : unsigned outer_mask = 0;
2083 9876 : if (is_oacc_routine)
2084 492 : outer_mask = GOMP_DIM_MASK (fn_level) - 1;
2085 9876 : unsigned used_mask = oacc_loop_partition (loops, outer_mask);
2086 : /* OpenACC kernels constructs are special: they currently don't use the
2087 : generic oacc_loop infrastructure and attribute/dimension processing. */
2088 9876 : if (is_oacc_kernels && is_oacc_kernels_parallelized)
2089 : {
2090 : /* Parallelized OpenACC kernels constructs use gang parallelism. See
2091 : also tree-parloops.cc:create_parallel_loop. */
2092 386 : used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
2093 : }
2094 :
2095 9876 : int dims[GOMP_DIM_MAX];
2096 9876 : oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
2097 :
2098 9876 : if (dump_file)
2099 : {
2100 : const char *comma = "Compute dimensions [";
2101 456 : for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
2102 342 : fprintf (dump_file, "%s%d", comma, dims[ix]);
2103 114 : fprintf (dump_file, "]\n");
2104 : }
2105 :
2106 : /* Verify that for OpenACC 'kernels' decomposed "gang-single" parts we launch
2107 : a single gang only. */
2108 9876 : if (is_oacc_parallel_kernels_gang_single)
2109 109 : gcc_checking_assert (dims[GOMP_DIM_GANG] == 1);
2110 :
2111 9876 : oacc_loop_process (loops, fn_level);
2112 9876 : if (dump_file)
2113 : {
2114 114 : fprintf (dump_file, "OpenACC loops\n");
2115 114 : dump_oacc_loop (dump_file, loops, 0);
2116 114 : fprintf (dump_file, "\n");
2117 : }
2118 9876 : if (dump_enabled_p ())
2119 : {
2120 2280 : oacc_loop *l = loops;
2121 : /* OpenACC kernels constructs are special: they currently don't use the
2122 : generic oacc_loop infrastructure. */
2123 2280 : if (is_oacc_kernels)
2124 : {
2125 : /* Create a fake oacc_loop for diagnostic purposes. */
2126 633 : l = new_oacc_loop_raw (NULL,
2127 633 : DECL_SOURCE_LOCATION (current_function_decl));
2128 633 : l->mask = used_mask;
2129 : }
2130 : else
2131 : {
2132 : /* Skip the outermost, dummy OpenACC loop */
2133 1647 : l = l->child;
2134 : }
2135 2280 : if (l)
2136 1763 : inform_oacc_loop (l);
2137 2280 : if (is_oacc_kernels)
2138 633 : free_oacc_loop (l);
2139 : }
2140 :
2141 9876 : free_oacc_loop (loops);
2142 :
2143 9876 : return 0;
2144 : }
2145 :
2146 : static unsigned int
2147 15205 : execute_oacc_device_lower ()
2148 : {
2149 15205 : tree attrs = oacc_get_fn_attrib (current_function_decl);
2150 :
2151 15205 : if (!attrs)
2152 : /* Not an offloaded function. */
2153 : return 0;
2154 :
2155 : int dims[GOMP_DIM_MAX];
2156 39504 : for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
2157 29628 : dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
2158 :
2159 9876 : hash_map<tree, tree> adjusted_vars;
2160 :
2161 : /* Now lower internal loop functions to target-specific code
2162 : sequences. */
2163 9876 : basic_block bb;
2164 185146 : FOR_ALL_BB_FN (bb, cfun)
2165 934104 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2166 : {
2167 583564 : gimple *stmt = gsi_stmt (gsi);
2168 583564 : if (!is_gimple_call (stmt))
2169 : {
2170 383749 : gsi_next (&gsi);
2171 383749 : continue;
2172 : }
2173 :
2174 199815 : gcall *call = as_a <gcall *> (stmt);
2175 199815 : if (!gimple_call_internal_p (call))
2176 : {
2177 4779 : gsi_next (&gsi);
2178 4779 : continue;
2179 : }
2180 :
2181 : /* Rewind to allow rescan. */
2182 195036 : gsi_prev (&gsi);
2183 195036 : bool rescan = false, remove = false;
2184 195036 : enum internal_fn ifn_code = gimple_call_internal_fn (call);
2185 :
2186 195036 : switch (ifn_code)
2187 : {
2188 : default: break;
2189 :
2190 284 : case IFN_GOACC_TILE:
2191 284 : oacc_xform_tile (call);
2192 284 : rescan = true;
2193 284 : break;
2194 :
2195 46694 : case IFN_GOACC_LOOP:
2196 46694 : oacc_xform_loop (call);
2197 46694 : rescan = true;
2198 46694 : break;
2199 :
2200 30884 : case IFN_GOACC_REDUCTION:
2201 : /* Mark the function for SSA renaming. */
2202 30884 : mark_virtual_operands_for_renaming (cfun);
2203 :
2204 : /* If the level is -1, this ended up being an unused
2205 : axis. Handle as a default. */
2206 30884 : if (integer_minus_onep (gimple_call_arg (call, 3)))
2207 8528 : default_goacc_reduction (call);
2208 : else
2209 22356 : targetm.goacc.reduction (call);
2210 : rescan = true;
2211 : break;
2212 :
2213 85483 : case IFN_UNIQUE:
2214 85483 : {
2215 85483 : enum ifn_unique_kind kind
2216 : = ((enum ifn_unique_kind)
2217 85483 : TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
2218 :
2219 85483 : switch (kind)
2220 : {
2221 : default:
2222 : break;
2223 :
2224 32978 : case IFN_UNIQUE_OACC_FORK:
2225 32978 : case IFN_UNIQUE_OACC_JOIN:
2226 32978 : if (integer_minus_onep (gimple_call_arg (call, 2)))
2227 : remove = true;
2228 25614 : else if (!targetm.goacc.fork_join
2229 25614 : (call, dims, kind == IFN_UNIQUE_OACC_FORK))
2230 85483 : remove = true;
2231 : break;
2232 :
2233 : case IFN_UNIQUE_OACC_HEAD_MARK:
2234 : case IFN_UNIQUE_OACC_TAIL_MARK:
2235 85483 : remove = true;
2236 : break;
2237 :
2238 259 : case IFN_UNIQUE_OACC_PRIVATE:
2239 259 : {
2240 259 : dump_flags_t l_dump_flags
2241 259 : = get_openacc_privatization_dump_flags ();
2242 :
2243 259 : location_t loc = gimple_location (stmt);
2244 259 : if (LOCATION_LOCUS (loc) == UNKNOWN_LOCATION)
2245 30 : loc = DECL_SOURCE_LOCATION (current_function_decl);
2246 259 : const dump_user_location_t d_u_loc
2247 259 : = dump_user_location_t::from_location_t (loc);
2248 :
2249 259 : HOST_WIDE_INT level
2250 259 : = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
2251 259 : gcc_checking_assert (level == -1
2252 : || (level >= 0
2253 : && level < GOMP_DIM_MAX));
2254 339 : for (unsigned i = 3;
2255 598 : i < gimple_call_num_args (call);
2256 : i++)
2257 : {
2258 339 : static char const *const axes[] =
2259 : /* Must be kept in sync with GOMP_DIM enumeration. */
2260 : { "gang", "worker", "vector" };
2261 :
2262 339 : tree arg = gimple_call_arg (call, i);
2263 339 : gcc_checking_assert (TREE_CODE (arg) == ADDR_EXPR);
2264 339 : tree decl = TREE_OPERAND (arg, 0);
2265 339 : if (dump_enabled_p ())
2266 : /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
2267 : #if __GNUC__ >= 10
2268 318 : # pragma GCC diagnostic push
2269 318 : # pragma GCC diagnostic ignored "-Wformat"
2270 : #endif
2271 318 : dump_printf_loc (l_dump_flags, d_u_loc,
2272 : "variable %<%T%> ought to be"
2273 : " adjusted for OpenACC"
2274 : " privatization level: %qs\n",
2275 : decl,
2276 : (level == -1
2277 : ? "UNKNOWN" : axes[level]));
2278 : #if __GNUC__ >= 10
2279 339 : # pragma GCC diagnostic pop
2280 : #endif
2281 339 : bool adjusted;
2282 339 : if (level == -1)
2283 : adjusted = false;
2284 336 : else if (!targetm.goacc.adjust_private_decl)
2285 : adjusted = false;
2286 0 : else if (level == GOMP_DIM_VECTOR)
2287 : {
2288 : /* That's the default behavior. */
2289 : adjusted = true;
2290 : }
2291 : else
2292 : {
2293 0 : tree oldtype = TREE_TYPE (decl);
2294 0 : tree newdecl
2295 0 : = targetm.goacc.adjust_private_decl (loc, decl,
2296 0 : level);
2297 0 : adjusted = (TREE_TYPE (newdecl) != oldtype
2298 0 : || newdecl != decl);
2299 0 : if (adjusted)
2300 0 : adjusted_vars.put (decl, newdecl);
2301 : }
2302 0 : if (adjusted
2303 0 : && dump_enabled_p ())
2304 : /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
2305 : #if __GNUC__ >= 10
2306 0 : # pragma GCC diagnostic push
2307 0 : # pragma GCC diagnostic ignored "-Wformat"
2308 : #endif
2309 0 : dump_printf_loc (l_dump_flags, d_u_loc,
2310 : "variable %<%T%> adjusted for"
2311 : " OpenACC privatization level:"
2312 : " %qs\n",
2313 0 : decl, axes[level]);
2314 : #if __GNUC__ >= 10
2315 339 : # pragma GCC diagnostic pop
2316 : #endif
2317 : }
2318 259 : remove = true;
2319 : }
2320 259 : break;
2321 : }
2322 : break;
2323 : }
2324 : }
2325 :
2326 195036 : if (gsi_end_p (gsi))
2327 : /* We rewound past the beginning of the BB. */
2328 188824 : gsi = gsi_start_bb (bb);
2329 : else
2330 : /* Undo the rewind. */
2331 100624 : gsi_next (&gsi);
2332 :
2333 195036 : if (remove)
2334 : {
2335 170966 : if (gimple_vdef (call))
2336 85483 : replace_uses_by (gimple_vdef (call), gimple_vuse (call));
2337 85483 : if (gimple_call_lhs (call))
2338 : {
2339 : /* Propagate the data dependency var. */
2340 79856 : gimple *ass = gimple_build_assign (gimple_call_lhs (call),
2341 : gimple_call_arg (call, 1));
2342 79856 : gsi_replace (&gsi, ass, false);
2343 : }
2344 : else
2345 5627 : gsi_remove (&gsi, true);
2346 : }
2347 109553 : else if (!rescan)
2348 : /* If not rescanning, advance over the call. */
2349 31691 : gsi_next (&gsi);
2350 : }
2351 :
2352 : /* Regarding the OpenACC privatization level, we're currently only looking at
2353 : making the gang-private level work. Regarding that, we have the following
2354 : configurations:
2355 :
2356 : - GCN offloading: 'targetm.goacc.adjust_private_decl' does the work (in
2357 : particular, change 'TREE_TYPE', etc.) and there is no
2358 : 'targetm.goacc.expand_var_decl'.
2359 :
2360 : - nvptx offloading: 'targetm.goacc.adjust_private_decl' only sets a
2361 : marker and then 'targetm.goacc.expand_var_decl' does the work.
2362 :
2363 : Eventually (in particular, for worker-private level?), both
2364 : 'targetm.goacc.adjust_private_decl' and 'targetm.goacc.expand_var_decl'
2365 : may need to do things, but that's currently not meant to be addressed, and
2366 : thus not fully worked out and implemented, and thus untested. Hence,
2367 : 'assert' what currently is implemented/tested, only. */
2368 :
2369 9876 : if (targetm.goacc.expand_var_decl)
2370 0 : gcc_assert (adjusted_vars.is_empty ());
2371 :
2372 : /* Make adjustments to gang-private local variables if required by the
2373 : target, e.g. forcing them into a particular address space. Afterwards,
2374 : ADDR_EXPR nodes which have adjusted variables as their argument need to
2375 : be modified in one of two ways:
2376 :
2377 : 1. They can be recreated, making a pointer to the variable in the new
2378 : address space, or
2379 :
2380 : 2. The address of the variable in the new address space can be taken,
2381 : converted to the default (original) address space, and the result of
2382 : that conversion subsituted in place of the original ADDR_EXPR node.
2383 :
2384 : Which of these is done depends on the gimple statement being processed.
2385 : At present atomic operations and inline asms use (1), and everything else
2386 : uses (2). At least on AMD GCN, there are atomic operations that work
2387 : directly in the LDS address space.
2388 :
2389 : COMPONENT_REFS, ARRAY_REFS and plain VAR_DECLs are also rewritten to use
2390 : the new decl, adjusting types of appropriate tree nodes as necessary. */
2391 :
2392 9876 : if (targetm.goacc.adjust_private_decl
2393 9876 : && !adjusted_vars.is_empty ())
2394 : {
2395 0 : FOR_ALL_BB_FN (bb, cfun)
2396 0 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2397 0 : !gsi_end_p (gsi);
2398 0 : gsi_next (&gsi))
2399 : {
2400 0 : gimple *stmt = gsi_stmt (gsi);
2401 0 : walk_stmt_info wi;
2402 0 : var_decl_rewrite_info info;
2403 :
2404 0 : info.avoid_pointer_conversion
2405 0 : = (is_gimple_call (stmt)
2406 0 : && is_sync_builtin_call (as_a <gcall *> (stmt)))
2407 0 : || gimple_code (stmt) == GIMPLE_ASM;
2408 0 : info.stmt = stmt;
2409 0 : info.modified = false;
2410 0 : info.adjusted_vars = &adjusted_vars;
2411 :
2412 0 : memset (&wi, 0, sizeof (wi));
2413 0 : wi.info = &info;
2414 :
2415 0 : walk_gimple_op (stmt, oacc_rewrite_var_decl, &wi);
2416 :
2417 0 : if (info.modified)
2418 0 : update_stmt (stmt);
2419 : }
2420 : }
2421 :
2422 9876 : return 0;
2423 9876 : }
2424 :
2425 : /* Default launch dimension validator. Force everything to 1. A
2426 : backend that wants to provide larger dimensions must override this
2427 : hook. */
2428 :
2429 : bool
2430 14434 : default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
2431 : int ARG_UNUSED (fn_level),
2432 : unsigned ARG_UNUSED (used))
2433 : {
2434 14434 : bool changed = false;
2435 :
2436 57736 : for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
2437 : {
2438 43302 : if (dims[ix] != 1)
2439 : {
2440 33036 : dims[ix] = 1;
2441 33036 : changed = true;
2442 : }
2443 : }
2444 :
2445 14434 : return changed;
2446 : }
2447 :
2448 : /* Default dimension bound is unknown on accelerator and 1 on host. */
2449 :
2450 : int
2451 0 : default_goacc_dim_limit (int ARG_UNUSED (axis))
2452 : {
2453 : #ifdef ACCEL_COMPILER
2454 : return 0;
2455 : #else
2456 0 : return 1;
2457 : #endif
2458 : }
2459 :
2460 : namespace {
2461 :
2462 : const pass_data pass_data_oacc_loop_designation =
2463 : {
2464 : GIMPLE_PASS, /* type */
2465 : "oaccloops", /* name */
2466 : OPTGROUP_OMP, /* optinfo_flags */
2467 : TV_NONE, /* tv_id */
2468 : PROP_cfg, /* properties_required */
2469 : 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
2470 : 0, /* properties_destroyed */
2471 : 0, /* todo_flags_start */
2472 : TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
2473 : };
2474 :
2475 : class pass_oacc_loop_designation : public gimple_opt_pass
2476 : {
2477 : public:
2478 285722 : pass_oacc_loop_designation (gcc::context *ctxt)
2479 571444 : : gimple_opt_pass (pass_data_oacc_loop_designation, ctxt)
2480 : {}
2481 :
2482 : /* opt_pass methods: */
2483 1472320 : bool gate (function *) final override { return flag_openacc; };
2484 :
2485 15267 : unsigned int execute (function *) final override
2486 : {
2487 15267 : return execute_oacc_loop_designation ();
2488 : }
2489 :
2490 : }; // class pass_oacc_loop_designation
2491 :
2492 : const pass_data pass_data_oacc_device_lower =
2493 : {
2494 : GIMPLE_PASS, /* type */
2495 : "oaccdevlow", /* name */
2496 : OPTGROUP_OMP, /* optinfo_flags */
2497 : TV_NONE, /* tv_id */
2498 : PROP_cfg, /* properties_required */
2499 : 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
2500 : 0, /* properties_destroyed */
2501 : 0, /* todo_flags_start */
2502 : TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
2503 : };
2504 :
2505 : class pass_oacc_device_lower : public gimple_opt_pass
2506 : {
2507 : public:
2508 285722 : pass_oacc_device_lower (gcc::context *ctxt)
2509 571444 : : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
2510 : {}
2511 :
2512 : /* opt_pass methods: */
2513 1472258 : bool gate (function *) final override { return flag_openacc; };
2514 :
2515 15205 : unsigned int execute (function *) final override
2516 : {
2517 15205 : return execute_oacc_device_lower ();
2518 : }
2519 :
2520 : }; // class pass_oacc_device_lower
2521 :
2522 : } // anon namespace
2523 :
2524 : gimple_opt_pass *
2525 285722 : make_pass_oacc_loop_designation (gcc::context *ctxt)
2526 : {
2527 285722 : return new pass_oacc_loop_designation (ctxt);
2528 : }
2529 :
2530 : gimple_opt_pass *
2531 285722 : make_pass_oacc_device_lower (gcc::context *ctxt)
2532 : {
2533 285722 : return new pass_oacc_device_lower (ctxt);
2534 : }
2535 :
2536 :
2537 : /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
2538 : GOMP_SIMT_ENTER call identifying the privatized variables, which are
2539 : turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
2540 : Set *REGIMPLIFY to true, except if no privatized variables were seen. */
2541 :
2542 : static void
2543 0 : ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
2544 : {
2545 0 : gimple *alloc_stmt = gsi_stmt (*gsi);
2546 0 : tree simtrec = gimple_call_lhs (alloc_stmt);
2547 0 : tree simduid = gimple_call_arg (alloc_stmt, 0);
2548 0 : gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
2549 0 : gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
2550 0 : tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
2551 0 : TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
2552 0 : TREE_ADDRESSABLE (rectype) = 1;
2553 0 : TREE_TYPE (simtrec) = build_pointer_type (rectype);
2554 0 : for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
2555 : {
2556 0 : tree *argp = gimple_call_arg_ptr (enter_stmt, i);
2557 0 : if (*argp == null_pointer_node)
2558 0 : continue;
2559 0 : gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
2560 : && VAR_P (TREE_OPERAND (*argp, 0)));
2561 0 : tree var = TREE_OPERAND (*argp, 0);
2562 :
2563 0 : tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
2564 0 : DECL_NAME (var), TREE_TYPE (var));
2565 0 : SET_DECL_ALIGN (field, DECL_ALIGN (var));
2566 0 : DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
2567 0 : TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
2568 :
2569 0 : insert_field_into_struct (rectype, field);
2570 :
2571 0 : tree t = build_simple_mem_ref (simtrec);
2572 0 : t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
2573 0 : TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
2574 0 : SET_DECL_VALUE_EXPR (var, t);
2575 0 : DECL_HAS_VALUE_EXPR_P (var) = 1;
2576 0 : *regimplify = true;
2577 : }
2578 0 : layout_type (rectype);
2579 0 : tree size = TYPE_SIZE_UNIT (rectype);
2580 0 : tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
2581 :
2582 0 : alloc_stmt
2583 0 : = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
2584 0 : gimple_call_set_lhs (alloc_stmt, simtrec);
2585 0 : gsi_replace (gsi, alloc_stmt, false);
2586 0 : gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
2587 0 : enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
2588 0 : gsi_replace (&enter_gsi, enter_stmt, false);
2589 :
2590 0 : use_operand_p use;
2591 0 : gimple *exit_stmt;
2592 0 : if (single_imm_use (simtrec, &use, &exit_stmt))
2593 : {
2594 0 : gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
2595 0 : gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
2596 0 : tree clobber = build_clobber (rectype);
2597 0 : exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
2598 0 : gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
2599 : }
2600 : else
2601 0 : gcc_checking_assert (has_zero_uses (simtrec));
2602 0 : }
2603 :
2604 : /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
2605 :
2606 : static tree
2607 0 : find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
2608 : {
2609 0 : tree t = *tp;
2610 :
2611 0 : if (VAR_P (t)
2612 0 : && DECL_HAS_VALUE_EXPR_P (t)
2613 0 : && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
2614 : {
2615 0 : *walk_subtrees = 0;
2616 0 : return t;
2617 : }
2618 : return NULL_TREE;
2619 : }
2620 :
2621 : /* Helper function for execute_omp_device_lower, invoked via walk_gimple_op.
2622 : Resolve any OMP_TARGET_DEVICE_MATCHES and OMP_NEXT_VARIANT exprs to
2623 : constants. */
2624 : static tree
2625 13877 : resolve_omp_variant_cookies (tree *tp, int *walk_subtrees,
2626 : void *data ATTRIBUTE_UNUSED)
2627 : {
2628 13877 : if (TREE_CODE (*tp) == OMP_TARGET_DEVICE_MATCHES)
2629 : {
2630 0 : *tp = resolve_omp_target_device_matches (*tp);
2631 0 : *walk_subtrees = 0;
2632 0 : return NULL_TREE;
2633 : }
2634 :
2635 13877 : if (TREE_CODE (*tp) != OMP_NEXT_VARIANT)
2636 : return NULL_TREE;
2637 324 : tree index = OMP_NEXT_VARIANT_INDEX (*tp);
2638 324 : tree state = OMP_NEXT_VARIANT_STATE (*tp);
2639 :
2640 : /* State is a triplet of (result-vector, construct_context, selector_vec).
2641 : If result-vector has already been computed, just use it. Otherwise we
2642 : must resolve the variant and fill in that part of the state object.
2643 : All OMP_NEXT_VARIANT exprs for the same variant construct are supposed
2644 : to share the same state object, but if something bad happens and we end
2645 : up with copies, that is OK, it will just cause the result-vector to be
2646 : computed multiple times. */
2647 324 : tree result_vector = TREE_PURPOSE (state);
2648 324 : if (!result_vector)
2649 : {
2650 304 : tree construct_context = TREE_VALUE (state);
2651 304 : tree selectors = TREE_CHAIN (state);
2652 :
2653 304 : vec<struct omp_variant> candidates
2654 304 : = omp_resolve_variant_construct (construct_context, selectors);
2655 304 : int n = TREE_VEC_LENGTH (selectors);
2656 304 : TREE_PURPOSE (state) = result_vector = make_tree_vec (n + 1);
2657 : /* The result vector maps the index of each element of the original
2658 : selectors vector onto the index of the next element of the filtered/
2659 : sorted candidates vector. Since some of the original variants may
2660 : have been discarded as non-matching in candidates, initialize the
2661 : whole array to zero so that we have a placeholder "next" value for
2662 : those elements. Hopefully dead code elimination will take care of
2663 : subsequently discarding the unreachable cases in the already-generated
2664 : switch statement. */
2665 2108 : for (int i = 1; i <= n; i++)
2666 1804 : TREE_VEC_ELT (result_vector, i) = integer_zero_node;
2667 : /* Element 0 is the case label of the first variant in the sorted
2668 : list. */
2669 304 : if (dump_file)
2670 0 : fprintf (dump_file, "Computing case map for variant directive\n");
2671 : int j = 0;
2672 1588 : for (unsigned int i = 0; i < candidates.length(); i++)
2673 : {
2674 1284 : if (dump_file)
2675 0 : fprintf (dump_file, " %d -> case %d\n",
2676 0 : j, (int) tree_to_shwi (candidates[i].alternative));
2677 1284 : TREE_VEC_ELT (result_vector, j) = candidates[i].alternative;
2678 1284 : j = (int) tree_to_shwi (candidates[i].alternative);
2679 : }
2680 : }
2681 :
2682 : /* Now just grab the value out of the precomputed array. */
2683 324 : gcc_assert (TREE_CODE (index) == INTEGER_CST);
2684 324 : int indexval = (int) tree_to_shwi (index);
2685 324 : *tp = TREE_VEC_ELT (result_vector, indexval);
2686 324 : *walk_subtrees = 0;
2687 324 : return NULL_TREE;
2688 : }
2689 :
2690 :
2691 : /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
2692 : VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
2693 : LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
2694 : internal functions on non-SIMT targets, and likewise some SIMD internal
2695 : functions on SIMT targets. */
2696 :
2697 : static unsigned int
2698 20565 : execute_omp_device_lower ()
2699 : {
2700 20565 : int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
2701 20565 : bool regimplify = false;
2702 20565 : basic_block bb;
2703 20565 : gimple_stmt_iterator gsi;
2704 : #ifdef ACCEL_COMPILER
2705 : bool omp_redirect_indirect_calls = vec_safe_length (offload_ind_funcs) > 0;
2706 : tree map_ptr_fn
2707 : = builtin_decl_explicit (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR);
2708 : #endif
2709 :
2710 : /* Handle expansion of magic cookies for variant constructs first. */
2711 20565 : if (cgraph_node::get (cfun->decl)->has_omp_variant_constructs)
2712 1822 : FOR_EACH_BB_FN (bb, cfun)
2713 : {
2714 6873 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2715 3409 : walk_gimple_op (gsi_stmt (gsi), resolve_omp_variant_cookies, NULL);
2716 2510 : for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2717 778 : walk_gimple_op (gsi_stmt (gsi), resolve_omp_variant_cookies, NULL);
2718 : }
2719 :
2720 55539 : FOR_EACH_BB_FN (bb, cfun)
2721 239245 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2722 : {
2723 169297 : gimple *stmt = gsi_stmt (gsi);
2724 169297 : if (!is_gimple_call (stmt))
2725 152561 : continue;
2726 16736 : if (!gimple_call_internal_p (stmt))
2727 : {
2728 : #ifdef ACCEL_COMPILER
2729 : if (omp_redirect_indirect_calls
2730 : && gimple_call_fndecl (stmt) == NULL_TREE)
2731 : {
2732 : gcall *orig_call = dyn_cast <gcall *> (stmt);
2733 : tree call_fn = gimple_call_fn (stmt);
2734 : tree fn_ty = TREE_TYPE (call_fn);
2735 :
2736 : if (TREE_CODE (call_fn) == OBJ_TYPE_REF)
2737 : {
2738 : tree obj_ref = create_tmp_reg (TREE_TYPE (call_fn),
2739 : ".ind_fn_objref");
2740 : gimple *gassign = gimple_build_assign (obj_ref, call_fn);
2741 : gsi_insert_before (&gsi, gassign, GSI_SAME_STMT);
2742 : call_fn = obj_ref;
2743 : }
2744 : tree mapped_fn = create_tmp_reg (fn_ty, ".ind_fn");
2745 : gimple *gcall =
2746 : gimple_build_call (map_ptr_fn, 1, call_fn);
2747 : gimple_set_location (gcall, gimple_location (stmt));
2748 : gimple_call_set_lhs (gcall, mapped_fn);
2749 : gsi_insert_before (&gsi, gcall, GSI_SAME_STMT);
2750 :
2751 : gimple_call_set_fn (orig_call, mapped_fn);
2752 : update_stmt (orig_call);
2753 : }
2754 : #endif
2755 16538 : continue;
2756 : }
2757 198 : tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
2758 198 : tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
2759 198 : switch (gimple_call_internal_fn (stmt))
2760 : {
2761 0 : case IFN_GOMP_TARGET_REV:
2762 0 : {
2763 : #ifndef ACCEL_COMPILER
2764 0 : gimple_stmt_iterator gsi2 = gsi;
2765 0 : gsi_next (&gsi2);
2766 0 : gcc_assert (!gsi_end_p (gsi2));
2767 0 : gcc_assert (gimple_call_builtin_p (gsi_stmt (gsi2),
2768 : BUILT_IN_GOMP_TARGET));
2769 0 : tree old_decl
2770 0 : = TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi2), 1), 0);
2771 0 : tree new_decl = gimple_call_arg (gsi_stmt (gsi), 0);
2772 0 : gimple_call_set_arg (gsi_stmt (gsi2), 1, new_decl);
2773 0 : update_stmt (gsi_stmt (gsi2));
2774 0 : new_decl = TREE_OPERAND (new_decl, 0);
2775 0 : unsigned i;
2776 0 : unsigned num_funcs = vec_safe_length (offload_funcs);
2777 0 : for (i = 0; i < num_funcs; i++)
2778 : {
2779 0 : if ((*offload_funcs)[i] == old_decl)
2780 : {
2781 0 : (*offload_funcs)[i] = new_decl;
2782 0 : break;
2783 : }
2784 0 : else if ((*offload_funcs)[i] == new_decl)
2785 : break; /* This can happen due to inlining. */
2786 : }
2787 0 : gcc_assert (i < num_funcs);
2788 : #else
2789 : tree old_decl = TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi), 0),
2790 : 0);
2791 : #endif
2792 : /* FIXME: Find a way to actually prevent outputting the empty-body
2793 : old_decl as debug symbol + function in the assembly file. */
2794 0 : cgraph_node *node = cgraph_node::get (old_decl);
2795 0 : node->address_taken = false;
2796 0 : node->need_lto_streaming = false;
2797 0 : node->offloadable = false;
2798 :
2799 0 : unlink_stmt_vdef (stmt);
2800 : }
2801 0 : break;
2802 0 : case IFN_GOMP_USE_SIMT:
2803 0 : rhs = vf == 1 ? integer_zero_node : integer_one_node;
2804 : break;
2805 0 : case IFN_GOMP_SIMT_ENTER:
2806 0 : rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2807 0 : goto simtreg_enter_exit;
2808 0 : case IFN_GOMP_SIMT_ENTER_ALLOC:
2809 0 : if (vf != 1)
2810 0 : ompdevlow_adjust_simt_enter (&gsi, ®implify);
2811 0 : rhs = vf == 1 ? null_pointer_node : NULL_TREE;
2812 0 : goto simtreg_enter_exit;
2813 0 : case IFN_GOMP_SIMT_EXIT:
2814 0 : simtreg_enter_exit:
2815 0 : if (vf != 1)
2816 0 : continue;
2817 0 : unlink_stmt_vdef (stmt);
2818 0 : break;
2819 0 : case IFN_GOMP_SIMT_LANE:
2820 0 : case IFN_GOMP_SIMT_LAST_LANE:
2821 0 : rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
2822 : break;
2823 0 : case IFN_GOMP_SIMT_VF:
2824 0 : rhs = build_int_cst (type, vf);
2825 0 : break;
2826 2 : case IFN_GOMP_MAX_VF:
2827 2 : rhs = build_int_cst (type, omp_max_vf (false));
2828 2 : break;
2829 0 : case IFN_GOMP_SIMT_ORDERED_PRED:
2830 0 : rhs = vf == 1 ? integer_zero_node : NULL_TREE;
2831 0 : if (rhs || !lhs)
2832 0 : unlink_stmt_vdef (stmt);
2833 : break;
2834 0 : case IFN_GOMP_SIMT_VOTE_ANY:
2835 0 : case IFN_GOMP_SIMT_XCHG_BFLY:
2836 0 : case IFN_GOMP_SIMT_XCHG_IDX:
2837 0 : rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2838 : break;
2839 0 : case IFN_GOMP_SIMD_LANE:
2840 0 : case IFN_GOMP_SIMD_LAST_LANE:
2841 0 : rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
2842 : break;
2843 0 : case IFN_GOMP_SIMD_VF:
2844 0 : rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
2845 : break;
2846 196 : default:
2847 196 : continue;
2848 196 : }
2849 2 : if (lhs && !rhs)
2850 0 : continue;
2851 2 : stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
2852 2 : gsi_replace (&gsi, stmt, false);
2853 : }
2854 20565 : if (regimplify)
2855 0 : FOR_EACH_BB_REVERSE_FN (bb, cfun)
2856 0 : for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2857 0 : if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
2858 : {
2859 0 : if (gimple_clobber_p (gsi_stmt (gsi)))
2860 0 : gsi_remove (&gsi, true);
2861 : else
2862 0 : gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2863 : }
2864 20565 : if (vf != 1)
2865 0 : cfun->has_force_vectorize_loops = false;
2866 20565 : return 0;
2867 : }
2868 :
2869 : namespace {
2870 :
2871 : const pass_data pass_data_omp_device_lower =
2872 : {
2873 : GIMPLE_PASS, /* type */
2874 : "ompdevlow", /* name */
2875 : OPTGROUP_OMP, /* optinfo_flags */
2876 : TV_NONE, /* tv_id */
2877 : PROP_cfg, /* properties_required */
2878 : PROP_gimple_lomp_dev, /* properties_provided */
2879 : 0, /* properties_destroyed */
2880 : 0, /* todo_flags_start */
2881 : TODO_update_ssa, /* todo_flags_finish */
2882 : };
2883 :
2884 : class pass_omp_device_lower : public gimple_opt_pass
2885 : {
2886 : public:
2887 285722 : pass_omp_device_lower (gcc::context *ctxt)
2888 571444 : : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
2889 : {}
2890 :
2891 : /* opt_pass methods: */
2892 1472258 : bool gate (function *fun) final override
2893 : {
2894 1472258 : cgraph_node *node = cgraph_node::get (fun->decl);
2895 : #ifdef ACCEL_COMPILER
2896 : bool offload_ind_funcs_p = vec_safe_length (offload_ind_funcs) > 0;
2897 : #else
2898 1472258 : bool offload_ind_funcs_p = false;
2899 : #endif
2900 1472258 : return (!(fun->curr_properties & PROP_gimple_lomp_dev)
2901 1472258 : || (flag_openmp
2902 64557 : && (node->has_omp_variant_constructs || offload_ind_funcs_p)));
2903 : }
2904 20565 : unsigned int execute (function *) final override
2905 : {
2906 20565 : return execute_omp_device_lower ();
2907 : }
2908 :
2909 : }; // class pass_expand_omp_ssa
2910 :
2911 : } // anon namespace
2912 :
2913 : gimple_opt_pass *
2914 285722 : make_pass_omp_device_lower (gcc::context *ctxt)
2915 : {
2916 285722 : return new pass_omp_device_lower (ctxt);
2917 : }
2918 :
2919 : /* "omp declare target link" handling pass. */
2920 :
2921 : namespace {
2922 :
2923 : const pass_data pass_data_omp_target_link =
2924 : {
2925 : GIMPLE_PASS, /* type */
2926 : "omptargetlink", /* name */
2927 : OPTGROUP_OMP, /* optinfo_flags */
2928 : TV_NONE, /* tv_id */
2929 : PROP_ssa, /* properties_required */
2930 : 0, /* properties_provided */
2931 : 0, /* properties_destroyed */
2932 : 0, /* todo_flags_start */
2933 : TODO_update_ssa, /* todo_flags_finish */
2934 : };
2935 :
2936 : class pass_omp_target_link : public gimple_opt_pass
2937 : {
2938 : public:
2939 285722 : pass_omp_target_link (gcc::context *ctxt)
2940 571444 : : gimple_opt_pass (pass_data_omp_target_link, ctxt)
2941 : {}
2942 :
2943 : /* opt_pass methods: */
2944 1472258 : bool gate (function *fun) final override
2945 : {
2946 : #ifdef ACCEL_COMPILER
2947 : return offloading_function_p (fun->decl);
2948 : #else
2949 1472258 : (void) fun;
2950 1472258 : return false;
2951 : #endif
2952 : }
2953 :
2954 : unsigned execute (function *) final override;
2955 : };
2956 :
2957 : /* Callback for walk_gimple_stmt used to scan for link var operands. */
2958 :
2959 : static tree
2960 0 : process_link_var_op (tree *tp, int *walk_subtrees, void *data)
2961 : {
2962 0 : struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
2963 0 : tree t = *tp;
2964 :
2965 0 : if (VAR_P (t)
2966 0 : && DECL_HAS_VALUE_EXPR_P (t)
2967 0 : && is_global_var (t)
2968 0 : && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
2969 : {
2970 0 : wi->info = *tp = unshare_expr (DECL_VALUE_EXPR (t));
2971 0 : *walk_subtrees = 0;
2972 0 : return NULL_TREE;
2973 : }
2974 :
2975 : return NULL_TREE;
2976 : }
2977 :
2978 : unsigned
2979 0 : pass_omp_target_link::execute (function *fun)
2980 : {
2981 0 : basic_block bb;
2982 0 : FOR_EACH_BB_FN (bb, fun)
2983 : {
2984 0 : gimple_stmt_iterator gsi;
2985 0 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2986 : {
2987 0 : if (gimple_call_builtin_p (gsi_stmt (gsi), BUILT_IN_GOMP_TARGET))
2988 : {
2989 0 : tree dev = gimple_call_arg (gsi_stmt (gsi), 0);
2990 0 : tree fn = gimple_call_arg (gsi_stmt (gsi), 1);
2991 0 : if (POINTER_TYPE_P (TREE_TYPE (fn)))
2992 0 : fn = TREE_OPERAND (fn, 0);
2993 0 : if (TREE_CODE (dev) == INTEGER_CST
2994 0 : && wi::to_wide (dev) == GOMP_DEVICE_HOST_FALLBACK
2995 0 : && lookup_attribute ("omp target device_ancestor_nohost",
2996 0 : DECL_ATTRIBUTES (fn)) != NULL_TREE)
2997 0 : continue; /* ancestor:1 */
2998 : /* Nullify the second argument of __builtin_GOMP_target_ext. */
2999 0 : gimple_call_set_arg (gsi_stmt (gsi), 1, null_pointer_node);
3000 0 : update_stmt (gsi_stmt (gsi));
3001 : }
3002 0 : struct walk_stmt_info wi;
3003 0 : memset (&wi, 0, sizeof (wi));
3004 0 : walk_gimple_stmt (&gsi, NULL, process_link_var_op, &wi);
3005 0 : if (wi.info)
3006 0 : gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
3007 : }
3008 : }
3009 :
3010 0 : return 0;
3011 : }
3012 :
3013 : } // anon namespace
3014 :
3015 : gimple_opt_pass *
3016 285722 : make_pass_omp_target_link (gcc::context *ctxt)
3017 : {
3018 285722 : return new pass_omp_target_link (ctxt);
3019 : }
|