Branch data Line data Source code
1 : : /* Decompose OpenACC 'kernels' constructs into parts, a sequence of compute
2 : : constructs
3 : :
4 : : Copyright (C) 2020-2025 Free Software Foundation, Inc.
5 : :
6 : : This file is part of GCC.
7 : :
8 : : GCC is free software; you can redistribute it and/or modify it under
9 : : the terms of the GNU General Public License as published by the Free
10 : : Software Foundation; either version 3, or (at your option) any later
11 : : version.
12 : :
13 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 : : for more details.
17 : :
18 : : You should have received a copy of the GNU General Public License
19 : : along with GCC; see the file COPYING3. If not see
20 : : <http://www.gnu.org/licenses/>. */
21 : :
22 : : #include "config.h"
23 : : #include "system.h"
24 : : #include "coretypes.h"
25 : : #include "backend.h"
26 : : #include "target.h"
27 : : #include "tree.h"
28 : : #include "langhooks.h"
29 : : #include "gimple.h"
30 : : #include "tree-pass.h"
31 : : #include "cgraph.h"
32 : : #include "fold-const.h"
33 : : #include "gimplify.h"
34 : : #include "gimple-iterator.h"
35 : : #include "gimple-walk.h"
36 : : #include "gomp-constants.h"
37 : : #include "omp-general.h"
38 : : #include "diagnostic-core.h"
39 : :
40 : :
41 : : /* This preprocessing pass is run immediately before lower_omp. It decomposes
42 : : OpenACC 'kernels' constructs into parts, a sequence of compute constructs.
43 : :
44 : : The translation is as follows:
45 : : - The entire 'kernels' region is turned into a 'data' region with clauses
46 : : taken from the 'kernels' region. New 'create' clauses are added for all
47 : : variables declared at the top level in the kernels region.
48 : : - Any loop nests annotated with an OpenACC 'loop' directive are wrapped in
49 : : a new compute construct.
50 : : - 'loop' directives without an explicit 'independent' or 'seq' clause
51 : : get an 'auto' clause added; other clauses are preserved on the loop
52 : : or moved to the new surrounding compute construct, as applicable.
53 : : - Any sequences of other code (non-loops, non-OpenACC 'loop's) are wrapped
54 : : in new "gang-single" compute construct: 'worker'/'vector' parallelism is
55 : : preserved, but 'num_gangs (1)' is enforced.
56 : : - Both points above only apply at the topmost level in the region, that
57 : : is, the transformation does not introduce new compute constructs inside
58 : : nested statement bodies. In particular, this means that a
59 : : gang-parallelizable loop inside an 'if' statement is made "gang-single".
60 : : - In order to make the host wait only once for the whole region instead
61 : : of once per device kernel launch, the new compute constructs are
62 : : annotated 'async'. Unless the original 'kernels' construct already was
63 : : marked 'async', the entire region ends with a 'wait' directive. If the
64 : : original 'kernels' construct was marked 'async', the synthesized 'async'
65 : : clauses use the original 'kernels' construct's 'async' argument
66 : : (possibly implicit).
67 : : */
68 : :
69 : :
70 : : /*TODO Things are conceptually wrong here: 'loop' clauses may be hidden behind
71 : : 'device_type', so we have to defer a lot of processing until we're in the
72 : : offloading compilation. "Fortunately", GCC doesn't support the OpenACC
73 : : 'device_type' clause yet, so we get away that. */
74 : :
75 : :
76 : : /* Helper function for decompose_kernels_region_body. If STMT contains a
77 : : "top-level" OMP_FOR statement, returns a pointer to that statement;
78 : : returns NULL otherwise.
79 : :
80 : : A "top-level" OMP_FOR statement is one that is possibly accompanied by
81 : : small snippets of setup code. Specifically, this function accepts an
82 : : OMP_FOR possibly wrapped in a singleton bind and a singleton try
83 : : statement to allow for a local loop variable, but not an OMP_FOR
84 : : statement nested in any other constructs. Alternatively, it accepts a
85 : : non-singleton bind containing only assignments and then an OMP_FOR
86 : : statement at the very end. The former style can be generated by the C
87 : : frontend, the latter by the Fortran frontend. */
88 : :
89 : : static gimple *
90 : 18282 : top_level_omp_for_in_stmt (gimple *stmt)
91 : : {
92 : 18282 : if (gimple_code (stmt) == GIMPLE_OMP_FOR)
93 : : return stmt;
94 : :
95 : 17999 : if (gimple_code (stmt) == GIMPLE_BIND)
96 : : {
97 : 1634 : gimple_seq body = gimple_bind_body (as_a <gbind *> (stmt));
98 : 1634 : if (gimple_seq_singleton_p (body))
99 : : {
100 : : /* Accept an OMP_FOR statement, or a try statement containing only
101 : : a single OMP_FOR. */
102 : 915 : gimple *maybe_for_or_try = gimple_seq_first_stmt (body);
103 : 915 : if (gimple_code (maybe_for_or_try) == GIMPLE_OMP_FOR)
104 : 1597 : return maybe_for_or_try;
105 : 55 : else if (gimple_code (maybe_for_or_try) == GIMPLE_TRY)
106 : : {
107 : 25 : gimple_seq try_body = gimple_try_eval (maybe_for_or_try);
108 : 714 : if (!gimple_seq_singleton_p (try_body))
109 : : return NULL;
110 : 0 : gimple *maybe_omp_for_stmt = gimple_seq_first_stmt (try_body);
111 : 0 : if (gimple_code (maybe_omp_for_stmt) == GIMPLE_OMP_FOR)
112 : : return maybe_omp_for_stmt;
113 : : }
114 : : }
115 : : else
116 : : {
117 : : gimple_stmt_iterator gsi;
118 : : /* Accept only a block of optional assignments followed by an
119 : : OMP_FOR at the end. No other kinds of statements allowed. */
120 : 1368 : for (gsi = gsi_start (body); !gsi_end_p (gsi); gsi_next (&gsi))
121 : : {
122 : 1361 : gimple *body_stmt = gsi_stmt (gsi);
123 : 1361 : if (gimple_code (body_stmt) == GIMPLE_ASSIGN)
124 : 649 : continue;
125 : 712 : else if (gimple_code (body_stmt) == GIMPLE_OMP_FOR
126 : 712 : && gsi_one_before_end_p (gsi))
127 : 1597 : return body_stmt;
128 : : else
129 : 664 : return NULL;
130 : : }
131 : : }
132 : : }
133 : :
134 : : return NULL;
135 : : }
136 : :
137 : : /* Helper for adjust_region_code: evaluate the statement at GSI_P. */
138 : :
139 : : static tree
140 : 2346 : adjust_region_code_walk_stmt_fn (gimple_stmt_iterator *gsi_p,
141 : : bool *handled_ops_p,
142 : : struct walk_stmt_info *wi)
143 : : {
144 : 2346 : int *region_code = (int *) wi->info;
145 : :
146 : 2346 : gimple *stmt = gsi_stmt (*gsi_p);
147 : 2346 : switch (gimple_code (stmt))
148 : : {
149 : 428 : case GIMPLE_OMP_FOR:
150 : 428 : {
151 : 428 : tree clauses = gimple_omp_for_clauses (stmt);
152 : 428 : if (omp_find_clause (clauses, OMP_CLAUSE_INDEPENDENT))
153 : : {
154 : : /* Explicit 'independent' clause. */
155 : : /* Keep going; recurse into loop body. */
156 : : break;
157 : : }
158 : 361 : else if (omp_find_clause (clauses, OMP_CLAUSE_SEQ))
159 : : {
160 : : /* Explicit 'seq' clause. */
161 : : /* We'll "parallelize" if at some level a loop construct has been
162 : : marked up by the user as unparallelizable ('seq' clause; we'll
163 : : respect that in the later processing). Given that the user has
164 : : explicitly marked it up, this loop construct cannot be
165 : : performance-critical, and in this case it's also fine to
166 : : "parallelize" instead of "gang-single", because any outer or
167 : : inner loops may still exploit the available parallelism. */
168 : : /* Keep going; recurse into loop body. */
169 : : break;
170 : : }
171 : : else
172 : : {
173 : : /* Explicit or implicit 'auto' clause. */
174 : : /* The user would like this loop analyzed ('auto' clause) and
175 : : typically parallelized, but we don't have available yet the
176 : : compiler logic to analyze this, so can't parallelize it here, so
177 : : we'd very likely be running into a performance problem if we
178 : : were to execute this unparallelized, thus forward the whole loop
179 : : nest to 'parloops'. */
180 : 350 : *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
181 : : /* Terminate: final decision for this region. */
182 : 350 : *handled_ops_p = true;
183 : 350 : return integer_zero_node;
184 : : }
185 : : gcc_unreachable ();
186 : : }
187 : :
188 : 266 : case GIMPLE_COND:
189 : 266 : case GIMPLE_GOTO:
190 : 266 : case GIMPLE_SWITCH:
191 : 266 : case GIMPLE_ASM:
192 : 266 : case GIMPLE_ASSUME:
193 : 266 : case GIMPLE_TRANSACTION:
194 : 266 : case GIMPLE_RETURN:
195 : : /* Statement that might constitute some looping/control flow pattern. */
196 : : /* The user would like this code analyzed (implicit inside a 'kernels'
197 : : region) and typically parallelized, but we don't have available yet
198 : : the compiler logic to analyze this, so can't parallelize it here, so
199 : : we'd very likely be running into a performance problem if we were to
200 : : execute this unparallelized, thus forward the whole thing to
201 : : 'parloops'. */
202 : 266 : *region_code = GF_OMP_TARGET_KIND_OACC_KERNELS;
203 : : /* Terminate: final decision for this region. */
204 : 266 : *handled_ops_p = true;
205 : 266 : return integer_zero_node;
206 : :
207 : : default:
208 : : /* Keep going. */
209 : : break;
210 : : }
211 : :
212 : : return NULL;
213 : : }
214 : :
215 : : /* Adjust the REGION_CODE for the region in GS. */
216 : :
217 : : static void
218 : 812 : adjust_region_code (gimple_seq gs, int *region_code)
219 : : {
220 : 812 : struct walk_stmt_info wi;
221 : 812 : memset (&wi, 0, sizeof (wi));
222 : 812 : wi.info = region_code;
223 : 812 : walk_gimple_seq (gs, adjust_region_code_walk_stmt_fn, NULL, &wi);
224 : 812 : }
225 : :
226 : : /* Helper function for make_loops_gang_single for walking the tree. If the
227 : : statement indicated by GSI_P is an OpenACC for loop with a gang clause,
228 : : issue a warning and remove the clause. */
229 : :
230 : : static tree
231 : 310 : visit_loops_in_gang_single_region (gimple_stmt_iterator *gsi_p,
232 : : bool *handled_ops_p,
233 : : struct walk_stmt_info *)
234 : : {
235 : 310 : *handled_ops_p = false;
236 : :
237 : 310 : gimple *stmt = gsi_stmt (*gsi_p);
238 : 310 : switch (gimple_code (stmt))
239 : : {
240 : 6 : case GIMPLE_OMP_FOR:
241 : : /*TODO Given the current 'adjust_region_code' algorithm, this is
242 : : actually... */
243 : : #if 0
244 : : gcc_unreachable ();
245 : : #else
246 : : /* ..., but due to bugs (PR100400), we may actually come here.
247 : : Reliably catch this, regardless of checking level. */
248 : 6 : internal_error ("PR100400");
249 : : #endif
250 : :
251 : : {
252 : : tree clauses = gimple_omp_for_clauses (stmt);
253 : : tree prev_clause = NULL;
254 : : for (tree clause = clauses; clause; clause = OMP_CLAUSE_CHAIN (clause))
255 : : {
256 : : if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_GANG)
257 : : {
258 : : /* It makes no sense to have a 'gang' clause in a "gang-single"
259 : : region, so warn and remove it. */
260 : : warning_at (gimple_location (stmt), 0,
261 : : "conditionally executed loop in %<kernels%> region"
262 : : " will be executed by a single gang;"
263 : : " ignoring %<gang%> clause");
264 : : if (prev_clause != NULL)
265 : : OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (clause);
266 : : else
267 : : clauses = OMP_CLAUSE_CHAIN (clause);
268 : :
269 : : break;
270 : : }
271 : : prev_clause = clause;
272 : : }
273 : : gimple_omp_for_set_clauses (stmt, clauses);
274 : : }
275 : : /* No need to recurse into nested statements; no loop nested inside
276 : : this loop can be gang-partitioned. */
277 : : sorry ("%<gang%> loop in %<gang-single%> region");
278 : : *handled_ops_p = true;
279 : : break;
280 : :
281 : 304 : default:
282 : 304 : break;
283 : : }
284 : :
285 : 304 : return NULL;
286 : : }
287 : :
288 : : /* Visit all nested OpenACC loops in the sequence indicated by GS. This
289 : : statement is expected to be inside a gang-single region. Issue a warning
290 : : for any loops inside it that have gang clauses and remove the clauses. */
291 : :
292 : : static void
293 : 134 : make_loops_gang_single (gimple_seq gs)
294 : : {
295 : 134 : struct walk_stmt_info wi;
296 : 134 : memset (&wi, 0, sizeof (wi));
297 : 134 : walk_gimple_seq (gs, visit_loops_in_gang_single_region, NULL, &wi);
298 : 128 : }
299 : :
300 : : /* Construct a "gang-single" compute construct at LOC containing the STMTS.
301 : : Annotate with CLAUSES, which must not contain a 'num_gangs' clause, and an
302 : : additional 'num_gangs (1)' clause to force "gang-single" execution. */
303 : :
304 : : static gimple *
305 : 400 : make_region_seq (location_t loc, gimple_seq stmts,
306 : : tree num_gangs_clause,
307 : : tree num_workers_clause,
308 : : tree vector_length_clause,
309 : : tree clauses)
310 : : {
311 : : /* This correctly unshares the entire clause chain rooted here. */
312 : 400 : clauses = unshare_expr (clauses);
313 : :
314 : 400 : dump_user_location_t loc_stmts_first = gimple_seq_first (stmts);
315 : :
316 : : /* Figure out the region code for this region. */
317 : : /* Optimistic default: assume "setup code", no looping; thus not
318 : : performance-critical. */
319 : 400 : int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE;
320 : 400 : adjust_region_code (stmts, ®ion_code);
321 : :
322 : 400 : if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE)
323 : : {
324 : 134 : if (dump_enabled_p ())
325 : : /*TODO MSG_MISSED_OPTIMIZATION? */
326 : 134 : dump_printf_loc (MSG_NOTE, loc_stmts_first,
327 : : "beginning %<gang-single%> part"
328 : : " in OpenACC %<kernels%> region\n");
329 : :
330 : : /* Synthesize a 'num_gangs (1)' clause. */
331 : 134 : tree gang_single_clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
332 : 134 : OMP_CLAUSE_OPERAND (gang_single_clause, 0) = integer_one_node;
333 : 134 : OMP_CLAUSE_CHAIN (gang_single_clause) = clauses;
334 : 134 : clauses = gang_single_clause;
335 : :
336 : : /* Remove and issue warnings about gang clauses on any OpenACC
337 : : loops nested inside this sequentially executed statement. */
338 : 134 : make_loops_gang_single (stmts);
339 : : }
340 : 266 : else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS)
341 : : {
342 : 266 : if (dump_enabled_p ())
343 : 254 : dump_printf_loc (MSG_NOTE, loc_stmts_first,
344 : : "beginning %<parloops%> part"
345 : : " in OpenACC %<kernels%> region\n");
346 : :
347 : : /* As we're transforming a 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another
348 : : 'GF_OMP_TARGET_KIND_OACC_KERNELS', this isn't doing any of the clauses
349 : : mangling that 'make_region_loop_nest' is doing. */
350 : : /* Re-assemble the clauses stripped off earlier. */
351 : 266 : if (num_gangs_clause != NULL)
352 : : {
353 : 4 : tree c = unshare_expr (num_gangs_clause);
354 : 4 : OMP_CLAUSE_CHAIN (c) = clauses;
355 : 4 : clauses = c;
356 : : }
357 : 266 : if (num_workers_clause != NULL)
358 : : {
359 : 4 : tree c = unshare_expr (num_workers_clause);
360 : 4 : OMP_CLAUSE_CHAIN (c) = clauses;
361 : 4 : clauses = c;
362 : : }
363 : 266 : if (vector_length_clause != NULL)
364 : : {
365 : 4 : tree c = unshare_expr (vector_length_clause);
366 : 4 : OMP_CLAUSE_CHAIN (c) = clauses;
367 : 4 : clauses = c;
368 : : }
369 : : }
370 : : else
371 : 0 : gcc_unreachable ();
372 : :
373 : : /* Build the gang-single region. */
374 : 394 : gimple *single_region = gimple_build_omp_target (NULL, region_code, clauses);
375 : 394 : gimple_set_location (single_region, loc);
376 : 394 : gbind *single_body = gimple_build_bind (NULL, stmts, make_node (BLOCK));
377 : 394 : gimple_omp_set_body (single_region, single_body);
378 : :
379 : 394 : return single_region;
380 : : }
381 : :
382 : : /* Helper function for make_region_loop_nest. Adds a 'num_gangs'
383 : : ('num_workers', 'vector_length') clause to the given CLAUSES, either the one
384 : : from the parent compute construct (PARENT_CLAUSE) or a new one based on the
385 : : loop's own LOOP_CLAUSE ('gang (num: N)' or similar for 'worker' or 'vector'
386 : : clauses) with the given CLAUSE_CODE. Does nothing if neither PARENT_CLAUSE
387 : : nor LOOP_CLAUSE exist. Returns the new clauses. */
388 : :
389 : : static tree
390 : 1236 : add_parent_or_loop_num_clause (tree parent_clause, tree loop_clause,
391 : : omp_clause_code clause_code, tree clauses)
392 : : {
393 : 1236 : if (parent_clause != NULL)
394 : : {
395 : 6 : tree num_clause = unshare_expr (parent_clause);
396 : 6 : OMP_CLAUSE_CHAIN (num_clause) = clauses;
397 : 6 : clauses = num_clause;
398 : : }
399 : 1230 : else if (loop_clause != NULL)
400 : : {
401 : : /* The kernels region does not have a 'num_gangs' clause, but the loop
402 : : itself had a 'gang (num: N)' clause. Honor it by adding a
403 : : 'num_gangs (N)' clause on the compute construct. */
404 : 0 : tree num = OMP_CLAUSE_OPERAND (loop_clause, 0);
405 : 0 : tree new_num_clause
406 : 0 : = build_omp_clause (OMP_CLAUSE_LOCATION (loop_clause), clause_code);
407 : 0 : OMP_CLAUSE_OPERAND (new_num_clause, 0) = num;
408 : 0 : OMP_CLAUSE_CHAIN (new_num_clause) = clauses;
409 : 0 : clauses = new_num_clause;
410 : : }
411 : 1236 : return clauses;
412 : : }
413 : :
414 : : /* Helper for make_region_loop_nest, looking for 'worker (num: N)' or 'vector
415 : : (length: N)' clauses in nested loops. Removes the argument, transferring it
416 : : to the enclosing compute construct (via WI->INFO). If arguments within the
417 : : same loop nest conflict, emits a warning.
418 : :
419 : : This function also decides whether to add an 'auto' clause on each of these
420 : : nested loops. */
421 : :
422 : : struct adjust_nested_loop_clauses_wi_info
423 : : {
424 : : tree *loop_gang_clause_ptr;
425 : : tree *loop_worker_clause_ptr;
426 : : tree *loop_vector_clause_ptr;
427 : : };
428 : :
429 : : static tree
430 : 315 : adjust_nested_loop_clauses (gimple_stmt_iterator *gsi_p, bool *,
431 : : struct walk_stmt_info *wi)
432 : : {
433 : 315 : struct adjust_nested_loop_clauses_wi_info *wi_info
434 : : = (struct adjust_nested_loop_clauses_wi_info *) wi->info;
435 : 315 : gimple *stmt = gsi_stmt (*gsi_p);
436 : :
437 : 315 : if (gimple_code (stmt) == GIMPLE_OMP_FOR)
438 : : {
439 : 10 : bool add_auto_clause = true;
440 : 10 : tree loop_clauses = gimple_omp_for_clauses (stmt);
441 : 10 : tree loop_clause = loop_clauses;
442 : 30 : for (; loop_clause; loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
443 : : {
444 : 20 : tree *outer_clause_ptr = NULL;
445 : 20 : switch (OMP_CLAUSE_CODE (loop_clause))
446 : : {
447 : 0 : case OMP_CLAUSE_GANG:
448 : 0 : outer_clause_ptr = wi_info->loop_gang_clause_ptr;
449 : 0 : break;
450 : 0 : case OMP_CLAUSE_WORKER:
451 : 0 : outer_clause_ptr = wi_info->loop_worker_clause_ptr;
452 : 0 : break;
453 : 0 : case OMP_CLAUSE_VECTOR:
454 : 0 : outer_clause_ptr = wi_info->loop_vector_clause_ptr;
455 : 0 : break;
456 : : case OMP_CLAUSE_SEQ:
457 : : case OMP_CLAUSE_INDEPENDENT:
458 : : case OMP_CLAUSE_AUTO:
459 : : add_auto_clause = false;
460 : : default:
461 : : break;
462 : : }
463 : 0 : if (outer_clause_ptr != NULL)
464 : : {
465 : 0 : if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL
466 : 0 : && *outer_clause_ptr == NULL)
467 : : {
468 : : /* Transfer the clause to the enclosing compute construct and
469 : : remove the numerical argument from the 'loop'. */
470 : 0 : *outer_clause_ptr = unshare_expr (loop_clause);
471 : 0 : OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
472 : : }
473 : 0 : else if (OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL &&
474 : 0 : OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0) != NULL)
475 : : {
476 : : /* See if both of these are the same constant. If they
477 : : aren't, emit a warning. */
478 : 0 : tree old_op = OMP_CLAUSE_OPERAND (*outer_clause_ptr, 0);
479 : 0 : tree new_op = OMP_CLAUSE_OPERAND (loop_clause, 0);
480 : 0 : if (!(cst_and_fits_in_hwi (old_op) &&
481 : 0 : cst_and_fits_in_hwi (new_op) &&
482 : 0 : int_cst_value (old_op) == int_cst_value (new_op)))
483 : : {
484 : 0 : const char *clause_name
485 : 0 : = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
486 : 0 : error_at (gimple_location (stmt),
487 : : "cannot honor conflicting %qs clause",
488 : : clause_name);
489 : 0 : inform (OMP_CLAUSE_LOCATION (*outer_clause_ptr),
490 : : "location of the previous clause"
491 : : " in the same loop nest");
492 : : }
493 : 0 : OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
494 : : }
495 : : }
496 : : }
497 : 10 : if (add_auto_clause)
498 : : {
499 : 0 : tree auto_clause
500 : 0 : = build_omp_clause (gimple_location (stmt), OMP_CLAUSE_AUTO);
501 : 0 : OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
502 : 0 : gimple_omp_for_set_clauses (stmt, auto_clause);
503 : : }
504 : : }
505 : :
506 : 315 : return NULL;
507 : : }
508 : :
509 : : /* Helper for make_region_loop_nest. Transform OpenACC 'kernels'/'loop'
510 : : construct clauses into OpenACC 'parallel'/'loop' construct ones. */
511 : :
512 : : static tree
513 : 62 : transform_kernels_loop_clauses (gimple *omp_for,
514 : : tree num_gangs_clause,
515 : : tree num_workers_clause,
516 : : tree vector_length_clause,
517 : : tree clauses)
518 : : {
519 : : /* If this loop in a kernels region does not have an explicit 'seq',
520 : : 'independent', or 'auto' clause, we must give it an explicit 'auto'
521 : : clause.
522 : : We also check for 'gang (num: N)' clauses. These must not appear in
523 : : kernels regions that have their own 'num_gangs' clause. Otherwise, they
524 : : must be converted and put on the region; similarly for 'worker' and
525 : : 'vector' clauses. */
526 : 62 : bool add_auto_clause = true;
527 : 62 : tree loop_gang_clause = NULL, loop_worker_clause = NULL,
528 : 62 : loop_vector_clause = NULL;
529 : 62 : tree loop_clauses = gimple_omp_for_clauses (omp_for);
530 : 205 : for (tree loop_clause = loop_clauses;
531 : 205 : loop_clause;
532 : 143 : loop_clause = OMP_CLAUSE_CHAIN (loop_clause))
533 : : {
534 : 143 : bool found_num_clause = false;
535 : 143 : tree *clause_ptr, clause_to_check;
536 : 143 : switch (OMP_CLAUSE_CODE (loop_clause))
537 : : {
538 : : case OMP_CLAUSE_GANG:
539 : : found_num_clause = true;
540 : : clause_ptr = &loop_gang_clause;
541 : : clause_to_check = num_gangs_clause;
542 : : break;
543 : 0 : case OMP_CLAUSE_WORKER:
544 : 0 : found_num_clause = true;
545 : 0 : clause_ptr = &loop_worker_clause;
546 : 0 : clause_to_check = num_workers_clause;
547 : 0 : break;
548 : 0 : case OMP_CLAUSE_VECTOR:
549 : 0 : found_num_clause = true;
550 : 0 : clause_ptr = &loop_vector_clause;
551 : 0 : clause_to_check = vector_length_clause;
552 : 0 : break;
553 : 62 : case OMP_CLAUSE_INDEPENDENT:
554 : 62 : case OMP_CLAUSE_SEQ:
555 : 62 : case OMP_CLAUSE_AUTO:
556 : 62 : add_auto_clause = false;
557 : : default:
558 : : break;
559 : : }
560 : 64 : if (found_num_clause && OMP_CLAUSE_OPERAND (loop_clause, 0) != NULL)
561 : : {
562 : 0 : if (clause_to_check)
563 : : {
564 : 0 : const char *clause_name
565 : 0 : = omp_clause_code_name[OMP_CLAUSE_CODE (loop_clause)];
566 : 0 : const char *parent_clause_name
567 : 0 : = omp_clause_code_name[OMP_CLAUSE_CODE (clause_to_check)];
568 : 0 : error_at (OMP_CLAUSE_LOCATION (loop_clause),
569 : : "argument not permitted on %qs clause"
570 : : " in OpenACC %<kernels%> region with a %qs clause",
571 : : clause_name, parent_clause_name);
572 : 0 : inform (OMP_CLAUSE_LOCATION (clause_to_check),
573 : : "location of OpenACC %<kernels%>");
574 : : }
575 : : /* Copy the 'gang (N)'/'worker (N)'/'vector (N)' clause to the
576 : : enclosing compute construct. */
577 : 0 : *clause_ptr = unshare_expr (loop_clause);
578 : 0 : OMP_CLAUSE_CHAIN (*clause_ptr) = NULL;
579 : : /* Leave a 'gang'/'worker'/'vector' clause on the 'loop', but without
580 : : argument. */
581 : 0 : OMP_CLAUSE_OPERAND (loop_clause, 0) = NULL;
582 : : }
583 : : }
584 : 62 : if (add_auto_clause)
585 : : {
586 : 0 : tree auto_clause = build_omp_clause (gimple_location (omp_for),
587 : : OMP_CLAUSE_AUTO);
588 : 0 : OMP_CLAUSE_CHAIN (auto_clause) = loop_clauses;
589 : 0 : loop_clauses = auto_clause;
590 : : }
591 : 62 : gimple_omp_for_set_clauses (omp_for, loop_clauses);
592 : : /* We must also recurse into the loop; it might contain nested loops having
593 : : their own 'worker (num: W)' or 'vector (length: V)' clauses. Turn these
594 : : into 'worker'/'vector' clauses on the compute construct. */
595 : 62 : struct walk_stmt_info wi;
596 : 62 : memset (&wi, 0, sizeof (wi));
597 : 62 : struct adjust_nested_loop_clauses_wi_info wi_info;
598 : 62 : wi_info.loop_gang_clause_ptr = &loop_gang_clause;
599 : 62 : wi_info.loop_worker_clause_ptr = &loop_worker_clause;
600 : 62 : wi_info.loop_vector_clause_ptr = &loop_vector_clause;
601 : 62 : wi.info = &wi_info;
602 : 62 : gimple *body = gimple_omp_body (omp_for);
603 : 62 : walk_gimple_seq (body, adjust_nested_loop_clauses, NULL, &wi);
604 : : /* Check if there were conflicting numbers of workers or vector length. */
605 : 62 : if (loop_gang_clause != NULL &&
606 : 0 : OMP_CLAUSE_OPERAND (loop_gang_clause, 0) == NULL)
607 : 0 : loop_gang_clause = NULL;
608 : 62 : if (loop_worker_clause != NULL &&
609 : 0 : OMP_CLAUSE_OPERAND (loop_worker_clause, 0) == NULL)
610 : 0 : loop_worker_clause = NULL;
611 : 62 : if (loop_vector_clause != NULL &&
612 : 0 : OMP_CLAUSE_OPERAND (loop_vector_clause, 0) == NULL)
613 : : vector_length_clause = NULL;
614 : :
615 : : /* If the kernels region had 'num_gangs', 'num_worker', 'vector_length'
616 : : clauses, add these to this new compute construct. */
617 : 62 : clauses
618 : 62 : = add_parent_or_loop_num_clause (num_gangs_clause, loop_gang_clause,
619 : : OMP_CLAUSE_NUM_GANGS, clauses);
620 : 62 : clauses
621 : 62 : = add_parent_or_loop_num_clause (num_workers_clause, loop_worker_clause,
622 : : OMP_CLAUSE_NUM_WORKERS, clauses);
623 : 62 : clauses
624 : 62 : = add_parent_or_loop_num_clause (vector_length_clause, loop_vector_clause,
625 : : OMP_CLAUSE_VECTOR_LENGTH, clauses);
626 : :
627 : 62 : return clauses;
628 : : }
629 : :
630 : : /* Construct a possibly gang-parallel compute construct containing the STMT,
631 : : which must be identical to, or a bind containing, the loop OMP_FOR.
632 : :
633 : : The NUM_GANGS_CLAUSE, NUM_WORKERS_CLAUSE, and VECTOR_LENGTH_CLAUSE are
634 : : optional clauses from the original kernels region and must not be contained
635 : : in the other CLAUSES. The newly created compute construct is annotated with
636 : : the optional NUM_GANGS_CLAUSE as well as the other CLAUSES. If there is no
637 : : NUM_GANGS_CLAUSE but the loop has a 'gang (num: N)' clause, that is
638 : : converted to a 'num_gangs (N)' clause on the new compute construct, and
639 : : similarly for 'worker' and 'vector' clauses.
640 : :
641 : : The outermost loop gets an 'auto' clause unless there already is an
642 : : 'seq'/'independent'/'auto' clause. Nested loops inside OMP_FOR are treated
643 : : similarly by the adjust_nested_loop_clauses function. */
644 : :
645 : : static gimple *
646 : 412 : make_region_loop_nest (gimple *omp_for, gimple_seq stmts,
647 : : tree num_gangs_clause,
648 : : tree num_workers_clause,
649 : : tree vector_length_clause,
650 : : tree clauses)
651 : : {
652 : : /* This correctly unshares the entire clause chain rooted here. */
653 : 412 : clauses = unshare_expr (clauses);
654 : :
655 : : /* Figure out the region code for this region. */
656 : : /* Optimistic default: assume that the loop nest is parallelizable
657 : : (essentially, no GIMPLE_OMP_FOR with (explicit or implicit) 'auto' clause,
658 : : and no un-annotated loops). */
659 : 412 : int region_code = GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED;
660 : 412 : adjust_region_code (stmts, ®ion_code);
661 : :
662 : 412 : if (region_code == GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED)
663 : : {
664 : 62 : if (dump_enabled_p ())
665 : : /* This is not MSG_OPTIMIZED_LOCATIONS, as we're just doing what the
666 : : user asked us to. */
667 : 62 : dump_printf_loc (MSG_NOTE, omp_for,
668 : : "parallelized loop nest"
669 : : " in OpenACC %<kernels%> region\n");
670 : :
671 : 62 : clauses = transform_kernels_loop_clauses (omp_for,
672 : : num_gangs_clause,
673 : : num_workers_clause,
674 : : vector_length_clause,
675 : : clauses);
676 : : }
677 : 350 : else if (region_code == GF_OMP_TARGET_KIND_OACC_KERNELS)
678 : : {
679 : 350 : if (dump_enabled_p ())
680 : 350 : dump_printf_loc (MSG_NOTE, omp_for,
681 : : "forwarded loop nest"
682 : : " in OpenACC %<kernels%> region"
683 : : " to %<parloops%> for analysis\n");
684 : :
685 : : /* We're transforming one 'GF_OMP_TARGET_KIND_OACC_KERNELS' into another
686 : : 'GF_OMP_TARGET_KIND_OACC_KERNELS', so don't have to
687 : : 'transform_kernels_loop_clauses'. */
688 : : /* Re-assemble the clauses stripped off earlier. */
689 : 350 : clauses
690 : 350 : = add_parent_or_loop_num_clause (num_gangs_clause, NULL,
691 : : OMP_CLAUSE_NUM_GANGS, clauses);
692 : 350 : clauses
693 : 350 : = add_parent_or_loop_num_clause (num_workers_clause, NULL,
694 : : OMP_CLAUSE_NUM_WORKERS, clauses);
695 : 350 : clauses
696 : 350 : = add_parent_or_loop_num_clause (vector_length_clause, NULL,
697 : : OMP_CLAUSE_VECTOR_LENGTH, clauses);
698 : : }
699 : : else
700 : 0 : gcc_unreachable ();
701 : :
702 : 412 : gimple *parallel_body_bind
703 : 412 : = gimple_build_bind (NULL, stmts, make_node (BLOCK));
704 : 412 : gimple *parallel_region
705 : 412 : = gimple_build_omp_target (parallel_body_bind, region_code, clauses);
706 : 412 : gimple_set_location (parallel_region, gimple_location (omp_for));
707 : :
708 : 412 : return parallel_region;
709 : : }
710 : :
711 : : /* Eliminate any binds directly inside BIND by adding their statements to
712 : : BIND (i.e., modifying it in place), excluding binds that hold only an
713 : : OMP_FOR loop and associated setup/cleanup code. Recurse into binds but
714 : : not other statements. Return a chain of the local variables of eliminated
715 : : binds, i.e., the local variables found in nested binds. If
716 : : INCLUDE_TOPLEVEL_VARS is true, this also includes the variables belonging
717 : : to BIND itself. */
718 : :
719 : : static tree
720 : 1406 : flatten_binds (gbind *bind, bool include_toplevel_vars = false)
721 : : {
722 : 1406 : tree vars = NULL, last_var = NULL;
723 : :
724 : 1406 : if (include_toplevel_vars)
725 : : {
726 : 726 : vars = gimple_bind_vars (bind);
727 : 726 : last_var = vars;
728 : : }
729 : :
730 : 1406 : gimple_seq new_body = NULL;
731 : 1406 : gimple_seq body_sequence = gimple_bind_body (bind);
732 : 1406 : gimple_stmt_iterator gsi, gsi_n;
733 : 10720 : for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n)
734 : : {
735 : : /* Advance the iterator here because otherwise it would be invalidated
736 : : by moving statements below. */
737 : 9314 : gsi_n = gsi;
738 : 9314 : gsi_next (&gsi_n);
739 : :
740 : 9314 : gimple *stmt = gsi_stmt (gsi);
741 : : /* Flatten bind statements, except the ones that contain only an
742 : : OpenACC for loop. */
743 : 9314 : if (gimple_code (stmt) == GIMPLE_BIND
744 : 9314 : && !top_level_omp_for_in_stmt (stmt))
745 : : {
746 : 726 : gbind *inner_bind = as_a <gbind *> (stmt);
747 : : /* Flatten recursively, and collect all variables. */
748 : 726 : tree inner_vars = flatten_binds (inner_bind, true);
749 : 726 : gimple_seq inner_sequence = gimple_bind_body (inner_bind);
750 : 726 : if (flag_checking)
751 : : {
752 : : for (gimple_stmt_iterator inner_gsi = gsi_start (inner_sequence);
753 : 19662 : !gsi_end_p (inner_gsi);
754 : 18936 : gsi_next (&inner_gsi))
755 : : {
756 : 18936 : gimple *inner_stmt = gsi_stmt (inner_gsi);
757 : 18936 : gcc_assert (gimple_code (inner_stmt) != GIMPLE_BIND
758 : : || top_level_omp_for_in_stmt (inner_stmt));
759 : : }
760 : : }
761 : 726 : gimple_seq_add_seq (&new_body, inner_sequence);
762 : : /* Find the last variable; we will append others to it. */
763 : 1452 : while (last_var != NULL && TREE_CHAIN (last_var) != NULL)
764 : 0 : last_var = TREE_CHAIN (last_var);
765 : 726 : if (last_var != NULL)
766 : : {
767 : 160 : TREE_CHAIN (last_var) = inner_vars;
768 : 160 : last_var = inner_vars;
769 : : }
770 : : else
771 : : {
772 : : vars = inner_vars;
773 : : last_var = vars;
774 : : }
775 : : }
776 : : else
777 : 8588 : gimple_seq_add_stmt (&new_body, stmt);
778 : : }
779 : :
780 : : /* Put the possibly transformed body back into the bind. */
781 : 1406 : gimple_bind_set_body (bind, new_body);
782 : 1406 : return vars;
783 : : }
784 : :
785 : : /* Helper function for places where we construct data regions. Wraps the BODY
786 : : inside a try-finally construct at LOC that calls __builtin_GOACC_data_end
787 : : in its cleanup block. Returns this try statement. */
788 : :
789 : : static gimple *
790 : 770 : make_data_region_try_statement (location_t loc, gimple *body)
791 : : {
792 : 770 : tree data_end_fn = builtin_decl_explicit (BUILT_IN_GOACC_DATA_END);
793 : 770 : gimple *call = gimple_build_call (data_end_fn, 0);
794 : 770 : gimple_seq cleanup = NULL;
795 : 770 : gimple_seq_add_stmt (&cleanup, call);
796 : 770 : gimple *try_stmt = gimple_build_try (body, cleanup, GIMPLE_TRY_FINALLY);
797 : 770 : gimple_set_location (body, loc);
798 : 770 : return try_stmt;
799 : : }
800 : :
801 : : /* If INNER_BIND_VARS holds variables, build an OpenACC data region with
802 : : location LOC containing BODY and having 'create (var)' clauses for each
803 : : variable (as a side effect, such variables also get TREE_ADDRESSABLE set).
804 : : If INNER_CLEANUP is present, add a try-finally statement with
805 : : this cleanup code in the finally block. Return the new data region, or
806 : : the original BODY if no data region was needed. */
807 : :
808 : : static gimple *
809 : 670 : maybe_build_inner_data_region (location_t loc, gimple *body,
810 : : tree inner_bind_vars, gimple *inner_cleanup)
811 : : {
812 : : /* Is this an instantiation of a template? (In this case, we don't care what
813 : : the generic decl is - just whether the function decl has one.) */
814 : 670 : bool generic_inst_p
815 : 670 : = (lang_hooks.decls.get_generic_function_decl (current_function_decl)
816 : 670 : != NULL);
817 : :
818 : : /* Build data 'create (var)' clauses for these local variables.
819 : : Below we will add these to a data region enclosing the entire body
820 : : of the decomposed kernels region. */
821 : 670 : tree prev_mapped_var = NULL, next = NULL, artificial_vars = NULL,
822 : 670 : inner_data_clauses = NULL;
823 : 1132 : for (tree v = inner_bind_vars; v; v = next)
824 : : {
825 : 462 : next = TREE_CHAIN (v);
826 : 462 : if (DECL_ARTIFICIAL (v)
827 : 260 : || TREE_CODE (v) == CONST_DECL
828 : 566 : || generic_inst_p)
829 : : {
830 : : /* If this is an artificial temporary, it need not be mapped. We
831 : : move its declaration into the bind inside the data region.
832 : : Also avoid mapping variables if we are inside a template
833 : : instantiation; the code does not contain all the copies to
834 : : temporaries that would make this legal. */
835 : 358 : TREE_CHAIN (v) = artificial_vars;
836 : 358 : artificial_vars = v;
837 : 358 : if (prev_mapped_var != NULL)
838 : 0 : TREE_CHAIN (prev_mapped_var) = next;
839 : : else
840 : : inner_bind_vars = next;
841 : : }
842 : : else
843 : : {
844 : : /* Otherwise, build the map clause. */
845 : 104 : tree new_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
846 : 104 : OMP_CLAUSE_SET_MAP_KIND (new_clause, GOMP_MAP_ALLOC);
847 : 104 : OMP_CLAUSE_DECL (new_clause) = v;
848 : 104 : OMP_CLAUSE_SIZE (new_clause) = DECL_SIZE_UNIT (v);
849 : 104 : OMP_CLAUSE_CHAIN (new_clause) = inner_data_clauses;
850 : 104 : inner_data_clauses = new_clause;
851 : :
852 : 104 : prev_mapped_var = v;
853 : :
854 : : /* See <https://gcc.gnu.org/PR100280>. */
855 : 104 : if (!TREE_ADDRESSABLE (v))
856 : : {
857 : : /* Request that OMP lowering make 'v' addressable. */
858 : 89 : OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1;
859 : :
860 : 89 : if (dump_enabled_p ())
861 : : {
862 : 77 : const dump_user_location_t d_u_loc
863 : 77 : = dump_user_location_t::from_location_t (loc);
864 : : /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
865 : : #if __GNUC__ >= 10
866 : 77 : # pragma GCC diagnostic push
867 : 77 : # pragma GCC diagnostic ignored "-Wformat"
868 : : #endif
869 : 77 : dump_printf_loc (MSG_NOTE, d_u_loc,
870 : : "OpenACC %<kernels%> decomposition:"
871 : : " variable %<%T%> declared in block"
872 : : " requested to be made addressable\n",
873 : : v);
874 : : #if __GNUC__ >= 10
875 : 77 : # pragma GCC diagnostic pop
876 : : #endif
877 : : }
878 : : }
879 : : }
880 : : }
881 : :
882 : 670 : if (artificial_vars)
883 : 202 : body = gimple_build_bind (artificial_vars, body, make_node (BLOCK));
884 : :
885 : : /* If we determined above that there are variables that need to be created
886 : : on the device, construct a data region for them and wrap the body
887 : : inside that. */
888 : 670 : if (inner_data_clauses != NULL)
889 : : {
890 : 100 : gcc_assert (inner_bind_vars != NULL);
891 : 100 : gimple *inner_data_region
892 : 100 : = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
893 : : inner_data_clauses);
894 : 100 : gimple_set_location (inner_data_region, loc);
895 : : /* Make sure __builtin_GOACC_data_end is called at the end. */
896 : 100 : gimple *try_stmt = make_data_region_try_statement (loc, body);
897 : 100 : gimple_omp_set_body (inner_data_region, try_stmt);
898 : 100 : gimple *bind_body;
899 : 100 : if (inner_cleanup != NULL)
900 : : /* Clobber all the inner variables that need to be clobbered. */
901 : 15 : bind_body = gimple_build_try (inner_data_region, inner_cleanup,
902 : : GIMPLE_TRY_FINALLY);
903 : : else
904 : : bind_body = inner_data_region;
905 : 100 : body = gimple_build_bind (inner_bind_vars, bind_body, make_node (BLOCK));
906 : : }
907 : :
908 : 670 : return body;
909 : : }
910 : :
911 : : static void
912 : 670 : add_wait (location_t loc, gimple_seq *region_body)
913 : : {
914 : : /* A "#pragma acc wait" is just a call GOACC_wait (acc_async_sync, 0). */
915 : 670 : tree wait_fn = builtin_decl_explicit (BUILT_IN_GOACC_WAIT);
916 : 670 : tree sync_arg = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC);
917 : 670 : gimple *wait_call = gimple_build_call (wait_fn, 2,
918 : : sync_arg, integer_zero_node);
919 : 670 : gimple_set_location (wait_call, loc);
920 : 670 : gimple_seq_add_stmt (region_body, wait_call);
921 : 670 : }
922 : :
923 : : /* Helper function of decompose_kernels_region_body. The statements in
924 : : REGION_BODY are expected to be decomposed parts; add an 'async' clause to
925 : : each. Also add a 'wait' directive at the end of the sequence. */
926 : :
927 : : static void
928 : 557 : add_async_clauses_and_wait (location_t loc, gimple_seq *region_body)
929 : : {
930 : 557 : tree default_async_queue
931 : 557 : = build_int_cst (integer_type_node, GOMP_ASYNC_NOVAL);
932 : 557 : for (gimple_stmt_iterator gsi = gsi_start (*region_body);
933 : 1250 : !gsi_end_p (gsi);
934 : 693 : gsi_next (&gsi))
935 : : {
936 : 693 : gimple *stmt = gsi_stmt (gsi);
937 : 693 : tree target_clauses = gimple_omp_target_clauses (stmt);
938 : 693 : tree new_async_clause = build_omp_clause (loc, OMP_CLAUSE_ASYNC);
939 : 693 : OMP_CLAUSE_OPERAND (new_async_clause, 0) = default_async_queue;
940 : 693 : OMP_CLAUSE_CHAIN (new_async_clause) = target_clauses;
941 : 693 : target_clauses = new_async_clause;
942 : 693 : gimple_omp_target_set_clauses (as_a <gomp_target *> (stmt),
943 : : target_clauses);
944 : : }
945 : 557 : add_wait (loc, region_body);
946 : 557 : }
947 : :
948 : : /* Auxiliary analysis of the body of a kernels region, to determine for each
949 : : OpenACC loop whether it is control-dependent (i.e., not necessarily
950 : : executed every time the kernels region is entered) or not.
951 : : We say that a loop is control-dependent if there is some cond, switch, or
952 : : goto statement that jumps over it, forwards or backwards. For example,
953 : : if the loop is controlled by an if statement, then a jump to the true
954 : : block, the false block, or from one of those blocks to the control flow
955 : : join point will necessarily jump over the loop.
956 : : This analysis implements an ad-hoc union-find data structure classifying
957 : : statements into "control-flow regions" as follows: Most statements are in
958 : : the same region as their predecessor, except that each OpenACC loop is in
959 : : a region of its own, and each OpenACC loop's successor starts a new
960 : : region. We then unite the regions of any statements linked by jumps,
961 : : placing any cond, switch, or goto statement in the same region as its
962 : : target label(s).
963 : : In the end, control dependence of OpenACC loops can be determined by
964 : : comparing their immediate predecessor and successor statements' regions.
965 : : A jump crosses the loop if and only if the predecessor and successor are
966 : : in the same region. (If there is no predecessor or successor, the loop
967 : : is executed unconditionally.)
968 : : The methods in this class identify statements by their index in the
969 : : kernels region's body. */
970 : :
971 : : class control_flow_regions
972 : : {
973 : : public:
974 : : /* Initialize an instance and pre-compute the control-flow region
975 : : information for the statement sequence SEQ. */
976 : : control_flow_regions (gimple_seq seq);
977 : :
978 : : /* Return true if the statement with the given index IDX in the analyzed
979 : : statement sequence is an unconditionally executed OpenACC loop. */
980 : : bool is_unconditional_oacc_for_loop (size_t idx);
981 : :
982 : : private:
983 : : /* Find the region representative for the statement identified by index
984 : : STMT_IDX. */
985 : : size_t find_rep (size_t stmt_idx);
986 : :
987 : : /* Union the regions containing the statements represented by
988 : : representatives A and B. */
989 : : void union_reps (size_t a, size_t b);
990 : :
991 : : /* Helper for the constructor. Performs the actual computation of the
992 : : control-flow regions in the statement sequence SEQ. */
993 : : void compute_regions (gimple_seq seq);
994 : :
995 : : /* The mapping from statement indices to region representatives. */
996 : : vec <size_t> representatives;
997 : :
998 : : /* A cache mapping statement indices to a flag indicating whether the
999 : : statement is a top level OpenACC for loop. */
1000 : : vec <bool> omp_for_loops;
1001 : : };
1002 : :
1003 : 680 : control_flow_regions::control_flow_regions (gimple_seq seq)
1004 : : {
1005 : 680 : representatives.create (1);
1006 : 680 : omp_for_loops.create (1);
1007 : 680 : compute_regions (seq);
1008 : 680 : }
1009 : :
1010 : : bool
1011 : 422 : control_flow_regions::is_unconditional_oacc_for_loop (size_t idx)
1012 : : {
1013 : 541 : if (idx == 0 || idx == representatives.length () - 1)
1014 : : /* The first or last statement in the kernels region. This means that
1015 : : there is no room before or after it for a jump or a label. Thus
1016 : : there cannot be a jump across it, so it is unconditional. */
1017 : : return true;
1018 : : /* Otherwise, the loop is unconditional if the statements before and after
1019 : : it are in different control flow regions. Scan forward and backward,
1020 : : skipping over neighboring OpenACC for loops, to find these preceding
1021 : : statements. */
1022 : 44 : size_t prev_index = idx - 1;
1023 : 50 : while (prev_index > 0 && omp_for_loops [prev_index] == true)
1024 : 6 : prev_index--;
1025 : : /* If all preceding statements are also OpenACC loops, all of these are
1026 : : unconditional. */
1027 : 44 : if (prev_index == 0)
1028 : : return true;
1029 : 32 : size_t succ_index = idx + 1;
1030 : 32 : while (succ_index < omp_for_loops.length ()
1031 : 43 : && omp_for_loops [succ_index] == true)
1032 : 11 : succ_index++;
1033 : : /* If all following statements are also OpenACC loops, all of these are
1034 : : unconditional. */
1035 : 64 : if (succ_index == omp_for_loops.length ())
1036 : : return true;
1037 : 27 : return (find_rep (prev_index) != find_rep (succ_index));
1038 : : }
1039 : :
1040 : : size_t
1041 : 2574 : control_flow_regions::find_rep (size_t stmt_idx)
1042 : : {
1043 : 2574 : size_t rep = stmt_idx, aux = stmt_idx;
1044 : : /* Find the root representative of this statement. */
1045 : 2605 : while (representatives[rep] != rep)
1046 : : rep = representatives[rep];
1047 : : /* Compress the path from the original statement to the representative. */
1048 : 2574 : while (representatives[aux] != rep)
1049 : : {
1050 : 0 : size_t tmp = representatives[aux];
1051 : 0 : representatives[aux] = rep;
1052 : 0 : aux = tmp;
1053 : : }
1054 : 2574 : return rep;
1055 : : }
1056 : :
1057 : : void
1058 : 1260 : control_flow_regions::union_reps (size_t a, size_t b)
1059 : : {
1060 : 1260 : a = find_rep (a);
1061 : 1260 : b = find_rep (b);
1062 : 1260 : representatives[b] = a;
1063 : 1260 : }
1064 : :
1065 : : void
1066 : 680 : control_flow_regions::compute_regions (gimple_seq seq)
1067 : : {
1068 : 680 : hash_map <gimple *, size_t> control_flow_reps;
1069 : 680 : hash_map <tree, size_t> label_reps;
1070 : 680 : size_t current_region = 0, idx = 0;
1071 : :
1072 : : /* In a first pass, assign an initial region to each statement. Except in
1073 : : the case of OpenACC loops, each statement simply gets the same region
1074 : : representative as its predecessor. */
1075 : 680 : for (gimple_stmt_iterator gsi = gsi_start (seq);
1076 : 9290 : !gsi_end_p (gsi);
1077 : 8610 : gsi_next (&gsi))
1078 : : {
1079 : 8610 : gimple *stmt = gsi_stmt (gsi);
1080 : 8610 : gimple *omp_for = top_level_omp_for_in_stmt (stmt);
1081 : 8610 : omp_for_loops.safe_push (omp_for != NULL);
1082 : 8610 : if (omp_for != NULL)
1083 : : {
1084 : : /* Assign a new region to this loop and to its successor. */
1085 : 423 : current_region = idx;
1086 : 423 : representatives.safe_push (current_region);
1087 : 423 : current_region++;
1088 : : }
1089 : : else
1090 : : {
1091 : 8187 : representatives.safe_push (current_region);
1092 : : /* Remember any jumps and labels for the second pass below. */
1093 : 8187 : if (gimple_code (stmt) == GIMPLE_COND
1094 : 7765 : || gimple_code (stmt) == GIMPLE_SWITCH
1095 : 15952 : || gimple_code (stmt) == GIMPLE_GOTO)
1096 : 838 : control_flow_reps.put (stmt, current_region);
1097 : 7349 : else if (gimple_code (stmt) == GIMPLE_LABEL)
1098 : 1620 : label_reps.put (gimple_label_label (as_a <glabel *> (stmt)),
1099 : : current_region);
1100 : : }
1101 : 8610 : idx++;
1102 : : }
1103 : 2040 : gcc_assert (representatives.length () == omp_for_loops.length ());
1104 : :
1105 : : /* Revisit all the control flow statements and union the region of each
1106 : : cond, switch, or goto statement with the target labels' regions. */
1107 : 1518 : for (hash_map <gimple *, size_t>::iterator it = control_flow_reps.begin ();
1108 : 2198 : it != control_flow_reps.end ();
1109 : 838 : ++it)
1110 : : {
1111 : 838 : gimple *stmt = (*it).first;
1112 : 838 : size_t stmt_rep = (*it).second;
1113 : 838 : switch (gimple_code (stmt))
1114 : : {
1115 : 422 : tree label;
1116 : 422 : unsigned int n;
1117 : :
1118 : 422 : case GIMPLE_COND:
1119 : 422 : label = gimple_cond_true_label (as_a <gcond *> (stmt));
1120 : 422 : union_reps (stmt_rep, *label_reps.get (label));
1121 : 422 : label = gimple_cond_false_label (as_a <gcond *> (stmt));
1122 : 422 : union_reps (stmt_rep, *label_reps.get (label));
1123 : 422 : break;
1124 : :
1125 : 0 : case GIMPLE_SWITCH:
1126 : 0 : n = gimple_switch_num_labels (as_a <gswitch *> (stmt));
1127 : 0 : for (unsigned int i = 0; i < n; i++)
1128 : : {
1129 : 0 : tree switch_case
1130 : 0 : = gimple_switch_label (as_a <gswitch *> (stmt), i);
1131 : 0 : label = CASE_LABEL (switch_case);
1132 : 0 : union_reps (stmt_rep, *label_reps.get (label));
1133 : : }
1134 : : break;
1135 : :
1136 : 416 : case GIMPLE_GOTO:
1137 : 416 : label = gimple_goto_dest (stmt);
1138 : 416 : union_reps (stmt_rep, *label_reps.get (label));
1139 : 416 : break;
1140 : :
1141 : 0 : default:
1142 : 0 : gcc_unreachable ();
1143 : : }
1144 : : }
1145 : 680 : }
1146 : :
1147 : : /* Decompose the body of the KERNELS_REGION, which was originally annotated
1148 : : with the KERNELS_CLAUSES, into a series of compute constructs. */
1149 : :
1150 : : static gimple *
1151 : 680 : decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses)
1152 : : {
1153 : 680 : location_t loc = gimple_location (kernels_region);
1154 : :
1155 : : /* The kernels clauses will be propagated to the child clauses unmodified,
1156 : : except that the 'num_gangs', 'num_workers', and 'vector_length' clauses
1157 : : will only be added to loop regions. The other regions are "gang-single"
1158 : : and get an explicit 'num_gangs (1)' clause. So separate out the
1159 : : 'num_gangs', 'num_workers', and 'vector_length' clauses here.
1160 : : Also check for the presence of an 'async' clause but do not remove it from
1161 : : the 'kernels' clauses. */
1162 : 680 : tree num_gangs_clause = NULL, num_workers_clause = NULL,
1163 : 680 : vector_length_clause = NULL;
1164 : 680 : tree async_clause = NULL;
1165 : 680 : tree prev_clause = NULL, next_clause = NULL;
1166 : 680 : tree parallel_clauses = kernels_clauses;
1167 : 3636 : for (tree c = parallel_clauses; c; c = next_clause)
1168 : : {
1169 : : /* Preserve this here, as we might NULL it later. */
1170 : 2956 : next_clause = OMP_CLAUSE_CHAIN (c);
1171 : :
1172 : 2956 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_GANGS
1173 : 2949 : || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_NUM_WORKERS
1174 : 5898 : || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_VECTOR_LENGTH)
1175 : : {
1176 : : /* Cut this clause out of the chain. */
1177 : 21 : if (prev_clause != NULL)
1178 : 3 : OMP_CLAUSE_CHAIN (prev_clause) = OMP_CLAUSE_CHAIN (c);
1179 : : else
1180 : 18 : kernels_clauses = OMP_CLAUSE_CHAIN (c);
1181 : 21 : OMP_CLAUSE_CHAIN (c) = NULL;
1182 : 21 : switch (OMP_CLAUSE_CODE (c))
1183 : : {
1184 : : case OMP_CLAUSE_NUM_GANGS:
1185 : : num_gangs_clause = c;
1186 : : break;
1187 : 7 : case OMP_CLAUSE_NUM_WORKERS:
1188 : 7 : num_workers_clause = c;
1189 : 7 : break;
1190 : 7 : case OMP_CLAUSE_VECTOR_LENGTH:
1191 : 7 : vector_length_clause = c;
1192 : 7 : break;
1193 : 0 : default:
1194 : 0 : gcc_unreachable ();
1195 : : }
1196 : : }
1197 : : else
1198 : : prev_clause = c;
1199 : 2956 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC)
1200 : 113 : async_clause = c;
1201 : : }
1202 : :
1203 : 680 : gimple *kernels_body = gimple_omp_body (kernels_region);
1204 : 680 : gbind *kernels_bind = as_a <gbind *> (kernels_body);
1205 : :
1206 : : /* The body of the region may contain other nested binds declaring inner
1207 : : local variables. Collapse all these binds into one to ensure that we
1208 : : have a single sequence of statements to iterate over; also, collect all
1209 : : inner variables. */
1210 : 680 : tree inner_bind_vars = flatten_binds (kernels_bind);
1211 : 680 : gimple_seq body_sequence = gimple_bind_body (kernels_bind);
1212 : :
1213 : : /* All these inner variables will get allocated on the device (below, by
1214 : : calling maybe_build_inner_data_region). Here we create 'present'
1215 : : clauses for them and add these clauses to the list of clauses to be
1216 : : attached to each inner compute construct. */
1217 : 680 : tree present_clauses = kernels_clauses;
1218 : 1152 : for (tree var = inner_bind_vars; var; var = TREE_CHAIN (var))
1219 : : {
1220 : 472 : if (!DECL_ARTIFICIAL (var) && TREE_CODE (var) != CONST_DECL)
1221 : : {
1222 : 114 : tree present_clause = build_omp_clause (loc, OMP_CLAUSE_MAP);
1223 : 114 : OMP_CLAUSE_SET_MAP_KIND (present_clause, GOMP_MAP_FORCE_PRESENT);
1224 : 114 : OMP_CLAUSE_DECL (present_clause) = var;
1225 : 114 : OMP_CLAUSE_SIZE (present_clause) = DECL_SIZE_UNIT (var);
1226 : 114 : OMP_CLAUSE_CHAIN (present_clause) = present_clauses;
1227 : 114 : present_clauses = present_clause;
1228 : : }
1229 : : }
1230 : 680 : kernels_clauses = present_clauses;
1231 : :
1232 : : /* In addition to nested binds, the "real" body of the region may be
1233 : : nested inside a try-finally block. Find its cleanup block, which
1234 : : contains code to clobber the local variables that must be clobbered. */
1235 : 680 : gimple *inner_cleanup = NULL;
1236 : 680 : if (body_sequence != NULL && gimple_code (body_sequence) == GIMPLE_TRY)
1237 : : {
1238 : 16 : if (gimple_seq_singleton_p (body_sequence))
1239 : : {
1240 : : /* The try statement is the only thing inside the bind. */
1241 : 16 : inner_cleanup = gimple_try_cleanup (body_sequence);
1242 : 16 : body_sequence = gimple_try_eval (body_sequence);
1243 : : }
1244 : : else
1245 : : {
1246 : : /* The bind's body starts with a try statement, but it is followed
1247 : : by other things. */
1248 : 0 : gimple_stmt_iterator gsi = gsi_start (body_sequence);
1249 : 0 : gimple *try_stmt = gsi_stmt (gsi);
1250 : 0 : inner_cleanup = gimple_try_cleanup (try_stmt);
1251 : 0 : gimple *try_body = gimple_try_eval (try_stmt);
1252 : :
1253 : 0 : gsi_remove (&gsi, false);
1254 : : /* Now gsi indicates the sequence of statements after the try
1255 : : statement in the bind. Append the statement in the try body and
1256 : : the trailing statements from gsi. */
1257 : 0 : gsi_insert_seq_before (&gsi, try_body, GSI_CONTINUE_LINKING);
1258 : 0 : body_sequence = gsi_stmt (gsi);
1259 : : }
1260 : : }
1261 : :
1262 : : /* This sequence will collect all the top-level statements in the body of
1263 : : the data region we are about to construct. */
1264 : 680 : gimple_seq region_body = NULL;
1265 : : /* This sequence will collect consecutive statements to be put into a
1266 : : gang-single region. */
1267 : 680 : gimple_seq gang_single_seq = NULL;
1268 : : /* Flag recording whether the gang_single_seq only contains copies to
1269 : : local variables. These may be loop setup code that should not be
1270 : : separated from the loop. */
1271 : 680 : bool only_simple_assignments = true;
1272 : :
1273 : : /* Precompute the control flow region information to determine whether an
1274 : : OpenACC loop is executed conditionally or unconditionally. */
1275 : 680 : control_flow_regions cf_regions (body_sequence);
1276 : :
1277 : : /* Iterate over the statements in the kernels region's body. */
1278 : 680 : size_t idx = 0;
1279 : 680 : gimple_stmt_iterator gsi, gsi_n;
1280 : 9280 : for (gsi = gsi_start (body_sequence); !gsi_end_p (gsi); gsi = gsi_n, idx++)
1281 : : {
1282 : : /* Advance the iterator here because otherwise it would be invalidated
1283 : : by moving statements below. */
1284 : 8604 : gsi_n = gsi;
1285 : 8604 : gsi_next (&gsi_n);
1286 : :
1287 : 8604 : gimple *stmt = gsi_stmt (gsi);
1288 : 8604 : if (gimple_code (stmt) == GIMPLE_DEBUG)
1289 : : {
1290 : 92 : if (flag_compare_debug_opt || flag_compare_debug)
1291 : : /* Let the usual '-fcompare-debug' analysis bail out, as
1292 : : necessary. */
1293 : : ;
1294 : : else
1295 : 20 : sorry_at (loc, "%qs not yet supported",
1296 : 20 : gimple_code_name[gimple_code (stmt)]);
1297 : : }
1298 : 8600 : gimple *omp_for = top_level_omp_for_in_stmt (stmt);
1299 : 8600 : bool is_unconditional_oacc_for_loop = false;
1300 : 8600 : if (omp_for != NULL)
1301 : 422 : is_unconditional_oacc_for_loop
1302 : 422 : = cf_regions.is_unconditional_oacc_for_loop (idx);
1303 : 8600 : if (omp_for != NULL
1304 : 8600 : && is_unconditional_oacc_for_loop)
1305 : : {
1306 : : /* This is an OMP for statement, put it into a separate region.
1307 : : But first, construct a gang-single region containing any
1308 : : complex sequential statements we may have seen. */
1309 : 412 : if (gang_single_seq != NULL && !only_simple_assignments)
1310 : : {
1311 : 69 : gimple *single_region
1312 : 69 : = make_region_seq (loc, gang_single_seq,
1313 : : num_gangs_clause,
1314 : : num_workers_clause,
1315 : : vector_length_clause,
1316 : : kernels_clauses);
1317 : 69 : gimple_seq_add_stmt (®ion_body, single_region);
1318 : 69 : }
1319 : 343 : else if (gang_single_seq != NULL && only_simple_assignments)
1320 : : {
1321 : : /* There is a sequence of sequential statements preceding this
1322 : : loop, but they are all simple assignments. This is
1323 : : probably setup code for the loop; in particular, Fortran DO
1324 : : loops are preceded by code to copy the loop limit variable
1325 : : to a temporary. Group this code together with the loop
1326 : : itself. */
1327 : 8 : gimple_seq_add_stmt (&gang_single_seq, stmt);
1328 : 8 : stmt = gimple_build_bind (NULL, gang_single_seq,
1329 : : make_node (BLOCK));
1330 : : }
1331 : 412 : gang_single_seq = NULL;
1332 : 412 : only_simple_assignments = true;
1333 : :
1334 : 412 : gimple_seq parallel_seq = NULL;
1335 : 412 : gimple_seq_add_stmt (¶llel_seq, stmt);
1336 : 412 : gimple *parallel_region
1337 : 412 : = make_region_loop_nest (omp_for, parallel_seq,
1338 : : num_gangs_clause,
1339 : : num_workers_clause,
1340 : : vector_length_clause,
1341 : : kernels_clauses);
1342 : 412 : gimple_seq_add_stmt (®ion_body, parallel_region);
1343 : : }
1344 : : else
1345 : : {
1346 : 8188 : if (omp_for != NULL)
1347 : : {
1348 : 10 : gcc_checking_assert (!is_unconditional_oacc_for_loop);
1349 : 10 : if (dump_enabled_p ())
1350 : 10 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, omp_for,
1351 : : "unparallelized loop nest"
1352 : : " in OpenACC %<kernels%> region:"
1353 : : " it's executed conditionally\n");
1354 : : }
1355 : :
1356 : : /* This is not an unconditional OMP for statement, so it will be
1357 : : put into a gang-single region. */
1358 : 8188 : gimple_seq_add_stmt (&gang_single_seq, stmt);
1359 : : /* Is this a simple assignment? We call it simple if it is an
1360 : : assignment to an artificial local variable. This captures
1361 : : Fortran loop setup code computing loop bounds and offsets. */
1362 : 8188 : bool is_simple_assignment
1363 : 8188 : = (gimple_code (stmt) == GIMPLE_ASSIGN
1364 : 5437 : && TREE_CODE (gimple_assign_lhs (stmt)) == VAR_DECL
1365 : 13189 : && DECL_ARTIFICIAL (gimple_assign_lhs (stmt)));
1366 : : if (!is_simple_assignment)
1367 : : only_simple_assignments = false;
1368 : : }
1369 : : }
1370 : :
1371 : : /* If we did not emit a new region, and are not going to emit one now
1372 : : (that is, the original region was empty), prepare to emit a dummy so as
1373 : : to preserve the original construct, which other processing (at least
1374 : : test cases) depend on. */
1375 : 676 : if (region_body == NULL && gang_single_seq == NULL)
1376 : : {
1377 : 18 : gimple *stmt = gimple_build_nop ();
1378 : 18 : gimple_set_location (stmt, loc);
1379 : 18 : gimple_seq_add_stmt (&gang_single_seq, stmt);
1380 : : }
1381 : :
1382 : : /* Gather up any remaining gang-single statements. */
1383 : 676 : if (gang_single_seq != NULL)
1384 : : {
1385 : 331 : gimple *single_region
1386 : 331 : = make_region_seq (loc, gang_single_seq,
1387 : : num_gangs_clause,
1388 : : num_workers_clause,
1389 : : vector_length_clause,
1390 : : kernels_clauses);
1391 : 325 : gimple_seq_add_stmt (®ion_body, single_region);
1392 : : }
1393 : :
1394 : : /* We want to launch these kernels asynchronously. If the original
1395 : : kernels region had an async clause, this is done automatically because
1396 : : that async clause was copied to the individual regions we created.
1397 : : Otherwise, add an async clause to each newly created region, as well as
1398 : : a wait directive at the end. */
1399 : 670 : if (async_clause == NULL)
1400 : 557 : add_async_clauses_and_wait (loc, ®ion_body);
1401 : : else
1402 : : /* !!! If we have asynchronous parallel blocks inside a (synchronous) data
1403 : : region, then target memory will get unmapped at the point the data
1404 : : region ends, even if the inner asynchronous parallels have not yet
1405 : : completed. For kernels marked "async", we might want to use "enter data
1406 : : async(...)" and "exit data async(...)" instead, or asynchronous data
1407 : : regions (see also <https://gcc.gnu.org/PR97390>
1408 : : "[OpenACC] 'async' clause on 'data' construct",
1409 : : which is to share the same implementation).
1410 : : For now, insert a (synchronous) wait at the end of the block. */
1411 : 113 : add_wait (loc, ®ion_body);
1412 : :
1413 : 670 : tree kernels_locals = gimple_bind_vars (as_a <gbind *> (kernels_body));
1414 : 670 : gimple *body = gimple_build_bind (kernels_locals, region_body,
1415 : : make_node (BLOCK));
1416 : :
1417 : : /* If we found variables declared in nested scopes, build a data region to
1418 : : map them to the device. */
1419 : 670 : body = maybe_build_inner_data_region (loc, body, inner_bind_vars,
1420 : : inner_cleanup);
1421 : :
1422 : 670 : return body;
1423 : : }
1424 : :
1425 : : /* Decompose one OpenACC 'kernels' construct into an OpenACC 'data' construct
1426 : : containing the original OpenACC 'kernels' construct's region cut up into a
1427 : : sequence of compute constructs. */
1428 : :
1429 : : static gimple *
1430 : 680 : omp_oacc_kernels_decompose_1 (gimple *kernels_stmt)
1431 : : {
1432 : 680 : gcc_checking_assert (gimple_omp_target_kind (kernels_stmt)
1433 : : == GF_OMP_TARGET_KIND_OACC_KERNELS);
1434 : 680 : location_t loc = gimple_location (kernels_stmt);
1435 : :
1436 : : /* Collect the data clauses of the OpenACC 'kernels' directive and create a
1437 : : new OpenACC 'data' construct with those clauses. */
1438 : 680 : tree kernels_clauses = gimple_omp_target_clauses (kernels_stmt);
1439 : 680 : tree data_clauses = NULL;
1440 : 3636 : for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c))
1441 : : {
1442 : : /* Certain clauses are copied to the enclosing OpenACC 'data'. Other
1443 : : clauses remain on the OpenACC 'kernels'. */
1444 : 2956 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP)
1445 : : {
1446 : 2612 : tree decl = OMP_CLAUSE_DECL (c);
1447 : 2612 : HOST_WIDE_INT map_kind = OMP_CLAUSE_MAP_KIND (c);
1448 : 2612 : switch (map_kind)
1449 : : {
1450 : 1357 : default:
1451 : 1357 : if (map_kind == GOMP_MAP_ALLOC
1452 : 1357 : && integer_zerop (OMP_CLAUSE_SIZE (c)))
1453 : : /* ??? This is an alloc clause for mapping a pointer whose
1454 : : target is already mapped. We leave these on the inner
1455 : : compute constructs because moving them to the outer data
1456 : : region causes runtime errors. */
1457 : : break;
1458 : :
1459 : : /* For non-artificial variables, and for non-declaration
1460 : : expressions like A[0:n], copy the clause to the data
1461 : : region. */
1462 : 648 : if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl))
1463 : 1353 : || !DECL_P (decl))
1464 : : {
1465 : 1285 : tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c),
1466 : : OMP_CLAUSE_MAP);
1467 : 1285 : OMP_CLAUSE_SET_MAP_KIND (new_clause, map_kind);
1468 : : /* This must be unshared here to avoid "incorrect sharing
1469 : : of tree nodes" errors from verify_gimple. */
1470 : 1285 : OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl);
1471 : 1285 : OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c);
1472 : 1285 : OMP_CLAUSE_CHAIN (new_clause) = data_clauses;
1473 : 1285 : data_clauses = new_clause;
1474 : :
1475 : : /* Now that this data is mapped, turn the data clause on the
1476 : : inner OpenACC 'kernels' into a 'present' clause. */
1477 : 1285 : OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT);
1478 : :
1479 : : /* See <https://gcc.gnu.org/PR100280>,
1480 : : <https://gcc.gnu.org/PR104086>. */
1481 : 1285 : if (DECL_P (decl)
1482 : 614 : && !TREE_ADDRESSABLE (decl))
1483 : : {
1484 : : /* Request that OMP lowering make 'decl' addressable. */
1485 : 381 : OMP_CLAUSE_MAP_DECL_MAKE_ADDRESSABLE (new_clause) = 1;
1486 : :
1487 : 381 : if (dump_enabled_p ())
1488 : : {
1489 : 381 : location_t loc = OMP_CLAUSE_LOCATION (new_clause);
1490 : 381 : const dump_user_location_t d_u_loc
1491 : 381 : = dump_user_location_t::from_location_t (loc);
1492 : : /* PR100695 "Format decoder, quoting in 'dump_printf'
1493 : : etc." */
1494 : : #if __GNUC__ >= 10
1495 : 381 : # pragma GCC diagnostic push
1496 : 381 : # pragma GCC diagnostic ignored "-Wformat"
1497 : : #endif
1498 : 381 : dump_printf_loc
1499 : 381 : (MSG_NOTE, d_u_loc,
1500 : : "OpenACC %<kernels%> decomposition:"
1501 : : " variable %<%T%> in %qs clause"
1502 : : " requested to be made addressable\n",
1503 : : decl,
1504 : : user_omp_clause_code_name (new_clause, true));
1505 : : #if __GNUC__ >= 10
1506 : 381 : # pragma GCC diagnostic pop
1507 : : #endif
1508 : : }
1509 : : }
1510 : : }
1511 : : break;
1512 : :
1513 : : case GOMP_MAP_POINTER:
1514 : : case GOMP_MAP_TO_PSET:
1515 : : case GOMP_MAP_FIRSTPRIVATE_POINTER:
1516 : : case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
1517 : : /* ??? Copying these map kinds leads to internal compiler
1518 : : errors in later passes. */
1519 : : break;
1520 : : }
1521 : : }
1522 : 344 : else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF
1523 : 344 : || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SELF)
1524 : : {
1525 : : /* If there is an 'if' or 'self' clause, it must be duplicated to the
1526 : : enclosing data region. Temporarily remove its chain to avoid
1527 : : copying it. */
1528 : 170 : tree saved_chain = OMP_CLAUSE_CHAIN (c);
1529 : 170 : OMP_CLAUSE_CHAIN (c) = NULL;
1530 : 170 : tree new_clause = unshare_expr (c);
1531 : 170 : OMP_CLAUSE_CHAIN (c) = saved_chain;
1532 : 170 : OMP_CLAUSE_CHAIN (new_clause) = data_clauses;
1533 : 170 : data_clauses = new_clause;
1534 : : }
1535 : : }
1536 : : /* Restore the original order of the clauses. */
1537 : 680 : data_clauses = nreverse (data_clauses);
1538 : :
1539 : 680 : gimple *data_region
1540 : 680 : = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
1541 : : data_clauses);
1542 : 680 : gimple_set_location (data_region, loc);
1543 : :
1544 : : /* Transform the body of the kernels region into a sequence of compute
1545 : : constructs. */
1546 : 680 : gimple *body = decompose_kernels_region_body (kernels_stmt,
1547 : : kernels_clauses);
1548 : :
1549 : : /* Put the transformed pieces together. The entire body of the region is
1550 : : wrapped in a try-finally statement that calls __builtin_GOACC_data_end
1551 : : for cleanup. */
1552 : 670 : gimple *try_stmt = make_data_region_try_statement (loc, body);
1553 : 670 : gimple_omp_set_body (data_region, try_stmt);
1554 : :
1555 : 670 : return data_region;
1556 : : }
1557 : :
1558 : :
1559 : : /* Decompose OpenACC 'kernels' constructs in the current function. */
1560 : :
1561 : : static tree
1562 : 105687 : omp_oacc_kernels_decompose_callback_stmt (gimple_stmt_iterator *gsi_p,
1563 : : bool *handled_ops_p,
1564 : : struct walk_stmt_info *)
1565 : : {
1566 : 105687 : gimple *stmt = gsi_stmt (*gsi_p);
1567 : :
1568 : 105687 : if ((gimple_code (stmt) == GIMPLE_OMP_TARGET)
1569 : 105687 : && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
1570 : : {
1571 : 680 : gimple *stmt_new = omp_oacc_kernels_decompose_1 (stmt);
1572 : 670 : gsi_replace (gsi_p, stmt_new, false);
1573 : 670 : *handled_ops_p = true;
1574 : : }
1575 : : else
1576 : 105007 : *handled_ops_p = false;
1577 : :
1578 : 105677 : return NULL;
1579 : : }
1580 : :
1581 : : static unsigned int
1582 : 517 : omp_oacc_kernels_decompose (void)
1583 : : {
1584 : 517 : gimple_seq body = gimple_body (current_function_decl);
1585 : :
1586 : 517 : struct walk_stmt_info wi;
1587 : 517 : memset (&wi, 0, sizeof (wi));
1588 : 517 : walk_gimple_seq_mod (&body, omp_oacc_kernels_decompose_callback_stmt, NULL,
1589 : : &wi);
1590 : :
1591 : 507 : gimple_set_body (current_function_decl, body);
1592 : :
1593 : 507 : return 0;
1594 : : }
1595 : :
1596 : :
1597 : : namespace {
1598 : :
1599 : : const pass_data pass_data_omp_oacc_kernels_decompose =
1600 : : {
1601 : : GIMPLE_PASS, /* type */
1602 : : "omp_oacc_kernels_decompose", /* name */
1603 : : OPTGROUP_OMP, /* optinfo_flags */
1604 : : TV_NONE, /* tv_id */
1605 : : PROP_gimple_any, /* properties_required */
1606 : : 0, /* properties_provided */
1607 : : 0, /* properties_destroyed */
1608 : : 0, /* todo_flags_start */
1609 : : 0, /* todo_flags_finish */
1610 : : };
1611 : :
1612 : : class pass_omp_oacc_kernels_decompose : public gimple_opt_pass
1613 : : {
1614 : : public:
1615 : 282866 : pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
1616 : 565732 : : gimple_opt_pass (pass_data_omp_oacc_kernels_decompose, ctxt)
1617 : : {}
1618 : :
1619 : : /* opt_pass methods: */
1620 : 2711012 : bool gate (function *) final override
1621 : : {
1622 : 2711012 : return (flag_openacc
1623 : 2711012 : && param_openacc_kernels == OPENACC_KERNELS_DECOMPOSE);
1624 : : }
1625 : 517 : unsigned int execute (function *) final override
1626 : : {
1627 : 517 : return omp_oacc_kernels_decompose ();
1628 : : }
1629 : :
1630 : : }; // class pass_omp_oacc_kernels_decompose
1631 : :
1632 : : } // anon namespace
1633 : :
1634 : : gimple_opt_pass *
1635 : 282866 : make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt)
1636 : : {
1637 : 282866 : return new pass_omp_oacc_kernels_decompose (ctxt);
1638 : : }
|