Line data Source code
1 : /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 : directives to separate functions, converts others into explicit calls to the
3 : runtime library (libgomp) and so forth
4 :
5 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
6 :
7 : This file is part of GCC.
8 :
9 : GCC is free software; you can redistribute it and/or modify it under
10 : the terms of the GNU General Public License as published by the Free
11 : Software Foundation; either version 3, or (at your option) any later
12 : version.
13 :
14 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 : for more details.
18 :
19 : You should have received a copy of the GNU General Public License
20 : along with GCC; see the file COPYING3. If not see
21 : <http://www.gnu.org/licenses/>. */
22 :
23 : #include "config.h"
24 : #include "system.h"
25 : #include "coretypes.h"
26 : #include "memmodel.h"
27 : #include "backend.h"
28 : #include "target.h"
29 : #include "rtl.h"
30 : #include "tree.h"
31 : #include "gimple.h"
32 : #include "cfghooks.h"
33 : #include "tree-pass.h"
34 : #include "ssa.h"
35 : #include "optabs.h"
36 : #include "cgraph.h"
37 : #include "pretty-print.h"
38 : #include "diagnostic-core.h"
39 : #include "fold-const.h"
40 : #include "stor-layout.h"
41 : #include "cfganal.h"
42 : #include "internal-fn.h"
43 : #include "gimplify.h"
44 : #include "gimple-iterator.h"
45 : #include "gimplify-me.h"
46 : #include "gimple-walk.h"
47 : #include "tree-cfg.h"
48 : #include "tree-into-ssa.h"
49 : #include "tree-ssa.h"
50 : #include "splay-tree.h"
51 : #include "cfgloop.h"
52 : #include "omp-general.h"
53 : #include "omp-offload.h"
54 : #include "tree-cfgcleanup.h"
55 : #include "alloc-pool.h"
56 : #include "symbol-summary.h"
57 : #include "gomp-constants.h"
58 : #include "gimple-pretty-print.h"
59 : #include "stringpool.h"
60 : #include "attribs.h"
61 : #include "tree-eh.h"
62 : #include "opts.h"
63 :
64 : /* OMP region information. Every parallel and workshare
65 : directive is enclosed between two markers, the OMP_* directive
66 : and a corresponding GIMPLE_OMP_RETURN statement. */
67 :
68 : struct omp_region
69 : {
70 : /* The enclosing region. */
71 : struct omp_region *outer;
72 :
73 : /* First child region. */
74 : struct omp_region *inner;
75 :
76 : /* Next peer region. */
77 : struct omp_region *next;
78 :
79 : /* Block containing the omp directive as its last stmt. */
80 : basic_block entry;
81 :
82 : /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 : basic_block exit;
84 :
85 : /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 : basic_block cont;
87 :
88 : /* If this is a combined parallel+workshare region, this is a list
89 : of additional arguments needed by the combined parallel+workshare
90 : library call. */
91 : vec<tree, va_gc> *ws_args;
92 :
93 : /* The code for the omp directive of this region. */
94 : enum gimple_code type;
95 :
96 : /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 : enum omp_clause_schedule_kind sched_kind;
98 :
99 : /* Schedule modifiers. */
100 : unsigned char sched_modifiers;
101 :
102 : /* True if this is a combined parallel+workshare region. */
103 : bool is_combined_parallel;
104 :
105 : /* Copy of fd.lastprivate_conditional != 0. */
106 : bool has_lastprivate_conditional;
107 :
108 : /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109 : a depend clause. */
110 : gomp_ordered *ord_stmt;
111 : };
112 :
113 : static struct omp_region *root_omp_region;
114 : static bool omp_any_child_fn_dumped;
115 :
116 : static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117 : bool = false);
118 : static gphi *find_phi_with_arg_on_edge (tree, edge);
119 : static void expand_omp (struct omp_region *region);
120 :
121 : /* Return true if REGION is a combined parallel+workshare region. */
122 :
123 : static inline bool
124 43233 : is_combined_parallel (struct omp_region *region)
125 : {
126 43233 : return region->is_combined_parallel;
127 : }
128 :
129 : /* Return true is REGION is or is contained within an offload region. */
130 :
131 : static bool
132 11149 : is_in_offload_region (struct omp_region *region)
133 : {
134 30004 : gimple *entry_stmt = last_nondebug_stmt (region->entry);
135 30004 : if (is_gimple_omp (entry_stmt)
136 29048 : && is_gimple_omp_offloaded (entry_stmt))
137 : return true;
138 25694 : else if (region->outer)
139 : return is_in_offload_region (region->outer);
140 : else
141 6839 : return (lookup_attribute ("omp declare target",
142 6839 : DECL_ATTRIBUTES (current_function_decl))
143 6839 : != NULL);
144 : }
145 :
146 : /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
147 : is the immediate dominator of PAR_ENTRY_BB, return true if there
148 : are no data dependencies that would prevent expanding the parallel
149 : directive at PAR_ENTRY_BB as a combined parallel+workshare region.
150 :
151 : When expanding a combined parallel+workshare region, the call to
152 : the child function may need additional arguments in the case of
153 : GIMPLE_OMP_FOR regions. In some cases, these arguments are
154 : computed out of variables passed in from the parent to the child
155 : via 'struct .omp_data_s'. For instance:
156 :
157 : #pragma omp parallel for schedule (guided, i * 4)
158 : for (j ...)
159 :
160 : Is lowered into:
161 :
162 : # BLOCK 2 (PAR_ENTRY_BB)
163 : .omp_data_o.i = i;
164 : #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
165 :
166 : # BLOCK 3 (WS_ENTRY_BB)
167 : .omp_data_i = &.omp_data_o;
168 : D.1667 = .omp_data_i->i;
169 : D.1598 = D.1667 * 4;
170 : #pragma omp for schedule (guided, D.1598)
171 :
172 : When we outline the parallel region, the call to the child function
173 : 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
174 : that value is computed *after* the call site. So, in principle we
175 : cannot do the transformation.
176 :
177 : To see whether the code in WS_ENTRY_BB blocks the combined
178 : parallel+workshare call, we collect all the variables used in the
179 : GIMPLE_OMP_FOR header check whether they appear on the LHS of any
180 : statement in WS_ENTRY_BB. If so, then we cannot emit the combined
181 : call.
182 :
183 : FIXME. If we had the SSA form built at this point, we could merely
184 : hoist the code in block 3 into block 2 and be done with it. But at
185 : this point we don't have dataflow information and though we could
186 : hack something up here, it is really not worth the aggravation. */
187 :
188 : static bool
189 9094 : workshare_safe_to_combine_p (basic_block ws_entry_bb)
190 : {
191 9094 : struct omp_for_data fd;
192 9094 : gimple *ws_stmt = last_nondebug_stmt (ws_entry_bb);
193 :
194 9094 : if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
195 : return true;
196 :
197 8975 : gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
198 8975 : if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
199 : return false;
200 :
201 8958 : omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
202 :
203 8958 : if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
204 : return false;
205 7179 : if (fd.iter_type != long_integer_type_node)
206 : return false;
207 :
208 : /* FIXME. We give up too easily here. If any of these arguments
209 : are not constants, they will likely involve variables that have
210 : been mapped into fields of .omp_data_s for sharing with the child
211 : function. With appropriate data flow, it would be possible to
212 : see through this. */
213 1508 : if (!is_gimple_min_invariant (fd.loop.n1)
214 1278 : || !is_gimple_min_invariant (fd.loop.n2)
215 1149 : || !is_gimple_min_invariant (fd.loop.step)
216 2642 : || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
217 376 : return false;
218 :
219 : return true;
220 : }
221 :
222 : /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
223 : presence (SIMD_SCHEDULE). */
224 :
225 : static tree
226 8326 : omp_adjust_chunk_size (tree chunk_size, bool simd_schedule, bool offload)
227 : {
228 8326 : if (!simd_schedule || integer_zerop (chunk_size))
229 8292 : return chunk_size;
230 :
231 34 : tree vf;
232 34 : tree type = TREE_TYPE (chunk_size);
233 :
234 34 : if (offload)
235 : {
236 2 : cfun->curr_properties &= ~PROP_gimple_lomp_dev;
237 2 : vf = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_MAX_VF,
238 : unsigned_type_node, 0);
239 2 : vf = fold_convert (type, vf);
240 : }
241 : else
242 : {
243 32 : poly_uint64 vf_num = omp_max_vf (false);
244 32 : if (known_eq (vf_num, 1U))
245 8293 : return chunk_size;
246 31 : vf = build_int_cst (type, vf_num);
247 : }
248 :
249 33 : tree vf_minus_one = fold_build2 (MINUS_EXPR, type, vf,
250 : build_int_cst (type, 1));
251 33 : tree negative_vf = fold_build1 (NEGATE_EXPR, type, vf);
252 33 : chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, vf_minus_one);
253 33 : return fold_build2 (BIT_AND_EXPR, type, chunk_size, negative_vf);
254 : }
255 :
256 : /* Collect additional arguments needed to emit a combined
257 : parallel+workshare call. WS_STMT is the workshare directive being
258 : expanded. */
259 :
260 : static vec<tree, va_gc> *
261 1196 : get_ws_args_for (gimple *par_stmt, gimple *ws_stmt, bool offload)
262 : {
263 1196 : tree t;
264 1196 : location_t loc = gimple_location (ws_stmt);
265 1196 : vec<tree, va_gc> *ws_args;
266 :
267 1196 : if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
268 : {
269 1084 : struct omp_for_data fd;
270 1084 : tree n1, n2;
271 :
272 1084 : omp_extract_for_data (for_stmt, &fd, NULL);
273 1084 : n1 = fd.loop.n1;
274 1084 : n2 = fd.loop.n2;
275 :
276 1084 : if (gimple_omp_for_combined_into_p (for_stmt))
277 : {
278 769 : tree innerc
279 769 : = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
280 : OMP_CLAUSE__LOOPTEMP_);
281 769 : gcc_assert (innerc);
282 769 : n1 = OMP_CLAUSE_DECL (innerc);
283 769 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
284 : OMP_CLAUSE__LOOPTEMP_);
285 769 : gcc_assert (innerc);
286 769 : n2 = OMP_CLAUSE_DECL (innerc);
287 : }
288 :
289 1084 : vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
290 :
291 1084 : t = fold_convert_loc (loc, long_integer_type_node, n1);
292 1084 : ws_args->quick_push (t);
293 :
294 1084 : t = fold_convert_loc (loc, long_integer_type_node, n2);
295 1084 : ws_args->quick_push (t);
296 :
297 1084 : t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
298 1084 : ws_args->quick_push (t);
299 :
300 1084 : if (fd.chunk_size)
301 : {
302 538 : t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
303 538 : t = omp_adjust_chunk_size (t, fd.simd_schedule, offload);
304 538 : ws_args->quick_push (t);
305 : }
306 :
307 1084 : return ws_args;
308 : }
309 112 : else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
310 : {
311 : /* Number of sections is equal to the number of edges from the
312 : GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
313 : the exit of the sections region. */
314 112 : basic_block bb = single_succ (gimple_bb (ws_stmt));
315 224 : t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
316 112 : vec_alloc (ws_args, 1);
317 112 : ws_args->quick_push (t);
318 112 : return ws_args;
319 : }
320 :
321 0 : gcc_unreachable ();
322 : }
323 :
324 : /* Discover whether REGION is a combined parallel+workshare region. */
325 :
326 : static void
327 16263 : determine_parallel_type (struct omp_region *region)
328 : {
329 16263 : basic_block par_entry_bb, par_exit_bb;
330 16263 : basic_block ws_entry_bb, ws_exit_bb;
331 :
332 16263 : if (region == NULL || region->inner == NULL
333 14801 : || region->exit == NULL || region->inner->exit == NULL
334 14790 : || region->inner->cont == NULL)
335 : return;
336 :
337 : /* We only support parallel+for and parallel+sections. */
338 10336 : if (region->type != GIMPLE_OMP_PARALLEL
339 10336 : || (region->inner->type != GIMPLE_OMP_FOR
340 292 : && region->inner->type != GIMPLE_OMP_SECTIONS))
341 : return;
342 :
343 : /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
344 : WS_EXIT_BB -> PAR_EXIT_BB. */
345 10217 : par_entry_bb = region->entry;
346 10217 : par_exit_bb = region->exit;
347 10217 : ws_entry_bb = region->inner->entry;
348 10217 : ws_exit_bb = region->inner->exit;
349 :
350 : /* Give up for task reductions on the parallel, while it is implementable,
351 : adding another big set of APIs or slowing down the normal paths is
352 : not acceptable. */
353 10217 : tree pclauses
354 10217 : = gimple_omp_parallel_clauses (last_nondebug_stmt (par_entry_bb));
355 10217 : if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
356 : return;
357 :
358 10194 : if (single_succ (par_entry_bb) == ws_entry_bb
359 9131 : && single_succ (ws_exit_bb) == par_exit_bb
360 9094 : && workshare_safe_to_combine_p (ws_entry_bb)
361 11445 : && (gimple_omp_parallel_combined_p (last_nondebug_stmt (par_entry_bb))
362 7 : || (last_and_only_stmt (ws_entry_bb)
363 2 : && last_and_only_stmt (par_exit_bb))))
364 : {
365 1244 : gimple *par_stmt = last_nondebug_stmt (par_entry_bb);
366 1244 : gimple *ws_stmt = last_nondebug_stmt (ws_entry_bb);
367 :
368 1244 : if (region->inner->type == GIMPLE_OMP_FOR)
369 : {
370 : /* If this is a combined parallel loop, we need to determine
371 : whether or not to use the combined library calls. There
372 : are two cases where we do not apply the transformation:
373 : static loops and any kind of ordered loop. In the first
374 : case, we already open code the loop so there is no need
375 : to do anything else. In the latter case, the combined
376 : parallel loop call would still need extra synchronization
377 : to implement ordered semantics, so there would not be any
378 : gain in using the combined call. */
379 1128 : tree clauses = gimple_omp_for_clauses (ws_stmt);
380 1128 : tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
381 1128 : if (c == NULL
382 1110 : || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
383 : == OMP_CLAUSE_SCHEDULE_STATIC)
384 1096 : || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
385 1084 : || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
386 2212 : || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
387 14 : && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
388 44 : return;
389 : }
390 116 : else if (region->inner->type == GIMPLE_OMP_SECTIONS
391 232 : && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
392 : OMP_CLAUSE__REDUCTEMP_)
393 116 : || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
394 : OMP_CLAUSE__CONDTEMP_)))
395 4 : return;
396 :
397 1196 : region->is_combined_parallel = true;
398 1196 : region->inner->is_combined_parallel = true;
399 1196 : region->ws_args = get_ws_args_for (par_stmt, ws_stmt,
400 1196 : is_in_offload_region (region));
401 : }
402 : }
403 :
404 : /* Debugging dumps for parallel regions. */
405 : void dump_omp_region (FILE *, struct omp_region *, int);
406 : void debug_omp_region (struct omp_region *);
407 : void debug_all_omp_regions (void);
408 :
409 : /* Dump the parallel region tree rooted at REGION. */
410 :
411 : void
412 203 : dump_omp_region (FILE *file, struct omp_region *region, int indent)
413 : {
414 248 : fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
415 248 : gimple_code_name[region->type]);
416 :
417 248 : if (region->inner)
418 39 : dump_omp_region (file, region->inner, indent + 4);
419 :
420 248 : if (region->cont)
421 : {
422 99 : fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
423 : region->cont->index);
424 : }
425 :
426 248 : if (region->exit)
427 225 : fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
428 : region->exit->index);
429 : else
430 23 : fprintf (file, "%*s[no exit marker]\n", indent, "");
431 :
432 248 : if (region->next)
433 : dump_omp_region (file, region->next, indent);
434 203 : }
435 :
436 : DEBUG_FUNCTION void
437 0 : debug_omp_region (struct omp_region *region)
438 : {
439 0 : dump_omp_region (stderr, region, 0);
440 0 : }
441 :
442 : DEBUG_FUNCTION void
443 0 : debug_all_omp_regions (void)
444 : {
445 0 : dump_omp_region (stderr, root_omp_region, 0);
446 0 : }
447 :
448 : /* Create a new parallel region starting at STMT inside region PARENT. */
449 :
450 : static struct omp_region *
451 245295 : new_omp_region (basic_block bb, enum gimple_code type,
452 : struct omp_region *parent)
453 : {
454 245295 : struct omp_region *region = XCNEW (struct omp_region);
455 :
456 245295 : region->outer = parent;
457 245295 : region->entry = bb;
458 245295 : region->type = type;
459 :
460 245295 : if (parent)
461 : {
462 : /* This is a nested region. Add it to the list of inner
463 : regions in PARENT. */
464 127424 : region->next = parent->inner;
465 127424 : parent->inner = region;
466 : }
467 : else
468 : {
469 : /* This is a toplevel region. Add it to the list of toplevel
470 : regions in ROOT_OMP_REGION. */
471 117871 : region->next = root_omp_region;
472 117871 : root_omp_region = region;
473 : }
474 :
475 245295 : return region;
476 : }
477 :
478 : /* Release the memory associated with the region tree rooted at REGION. */
479 :
480 : static void
481 245295 : free_omp_region_1 (struct omp_region *region)
482 : {
483 245295 : struct omp_region *i, *n;
484 :
485 372719 : for (i = region->inner; i ; i = n)
486 : {
487 127424 : n = i->next;
488 127424 : free_omp_region_1 (i);
489 : }
490 :
491 245295 : free (region);
492 245295 : }
493 :
494 : /* Release the memory for the entire omp region tree. */
495 :
496 : void
497 2923257 : omp_free_regions (void)
498 : {
499 2923257 : struct omp_region *r, *n;
500 3041128 : for (r = root_omp_region; r ; r = n)
501 : {
502 117871 : n = r->next;
503 117871 : free_omp_region_1 (r);
504 : }
505 2923257 : root_omp_region = NULL;
506 2923257 : }
507 :
508 : /* A convenience function to build an empty GIMPLE_COND with just the
509 : condition. */
510 :
511 : static gcond *
512 112723 : gimple_build_cond_empty (tree cond)
513 : {
514 112723 : enum tree_code pred_code;
515 112723 : tree lhs, rhs;
516 :
517 112723 : gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
518 112723 : return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
519 : }
520 :
521 : /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
522 : Add CHILD_FNDECL to decl chain of the supercontext of the block
523 : ENTRY_BLOCK - this is the block which originally contained the
524 : code from which CHILD_FNDECL was created.
525 :
526 : Together, these actions ensure that the debug info for the outlined
527 : function will be emitted with the correct lexical scope. */
528 :
529 : static void
530 43252 : adjust_context_and_scope (struct omp_region *region, tree entry_block,
531 : tree child_fndecl)
532 : {
533 43252 : tree parent_fndecl = NULL_TREE;
534 43252 : gimple *entry_stmt;
535 : /* OMP expansion expands inner regions before outer ones, so if
536 : we e.g. have explicit task region nested in parallel region, when
537 : expanding the task region current_function_decl will be the original
538 : source function, but we actually want to use as context the child
539 : function of the parallel. */
540 43252 : for (region = region->outer;
541 64334 : region && parent_fndecl == NULL_TREE; region = region->outer)
542 21082 : switch (region->type)
543 : {
544 6832 : case GIMPLE_OMP_PARALLEL:
545 6832 : case GIMPLE_OMP_TASK:
546 6832 : case GIMPLE_OMP_TEAMS:
547 6832 : entry_stmt = last_nondebug_stmt (region->entry);
548 6832 : parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
549 6832 : break;
550 4538 : case GIMPLE_OMP_TARGET:
551 4538 : entry_stmt = last_nondebug_stmt (region->entry);
552 4538 : parent_fndecl
553 4538 : = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
554 4538 : break;
555 : default:
556 : break;
557 : }
558 :
559 43252 : if (parent_fndecl == NULL_TREE)
560 34988 : parent_fndecl = current_function_decl;
561 43252 : DECL_CONTEXT (child_fndecl) = parent_fndecl;
562 :
563 43252 : if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
564 : {
565 43218 : tree b = BLOCK_SUPERCONTEXT (entry_block);
566 43218 : if (TREE_CODE (b) == BLOCK)
567 : {
568 43083 : DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
569 43083 : BLOCK_VARS (b) = child_fndecl;
570 : }
571 : }
572 43252 : }
573 :
574 : /* Build the function calls to GOMP_parallel etc to actually
575 : generate the parallel operation. REGION is the parallel region
576 : being expanded. BB is the block where to insert the code. WS_ARGS
577 : will be set if this is a call to a combined parallel+workshare
578 : construct, it contains the list of additional arguments needed by
579 : the workshare construct. */
580 :
581 : static void
582 16263 : expand_parallel_call (struct omp_region *region, basic_block bb,
583 : gomp_parallel *entry_stmt,
584 : vec<tree, va_gc> *ws_args)
585 : {
586 16263 : tree t, t1, t2, val, cond, c, clauses, flags;
587 16263 : gimple_stmt_iterator gsi;
588 16263 : gimple *stmt;
589 16263 : enum built_in_function start_ix;
590 16263 : int start_ix2;
591 16263 : location_t clause_loc;
592 16263 : vec<tree, va_gc> *args;
593 :
594 16263 : clauses = gimple_omp_parallel_clauses (entry_stmt);
595 :
596 : /* Determine what flavor of GOMP_parallel we will be
597 : emitting. */
598 16263 : start_ix = BUILT_IN_GOMP_PARALLEL;
599 16263 : tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
600 16263 : if (rtmp)
601 : start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
602 16203 : else if (is_combined_parallel (region))
603 : {
604 1196 : switch (region->inner->type)
605 : {
606 1084 : case GIMPLE_OMP_FOR:
607 1084 : gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
608 1084 : switch (region->inner->sched_kind)
609 : {
610 546 : case OMP_CLAUSE_SCHEDULE_RUNTIME:
611 : /* For lastprivate(conditional:), our implementation
612 : requires monotonic behavior. */
613 546 : if (region->inner->has_lastprivate_conditional != 0)
614 : start_ix2 = 3;
615 544 : else if ((region->inner->sched_modifiers
616 : & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
617 : start_ix2 = 6;
618 530 : else if ((region->inner->sched_modifiers
619 : & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
620 : start_ix2 = 7;
621 : else
622 16 : start_ix2 = 3;
623 : break;
624 538 : case OMP_CLAUSE_SCHEDULE_DYNAMIC:
625 538 : case OMP_CLAUSE_SCHEDULE_GUIDED:
626 538 : if ((region->inner->sched_modifiers
627 : & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
628 534 : && !region->inner->has_lastprivate_conditional)
629 : {
630 522 : start_ix2 = 3 + region->inner->sched_kind;
631 522 : break;
632 : }
633 : /* FALLTHRU */
634 16 : default:
635 16 : start_ix2 = region->inner->sched_kind;
636 16 : break;
637 : }
638 1084 : start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
639 1084 : start_ix = (enum built_in_function) start_ix2;
640 1084 : break;
641 : case GIMPLE_OMP_SECTIONS:
642 : start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
643 : break;
644 0 : default:
645 0 : gcc_unreachable ();
646 : }
647 : }
648 :
649 : /* By default, the value of NUM_THREADS is zero (selected at run time)
650 : and there is no conditional. */
651 16263 : cond = NULL_TREE;
652 16263 : val = build_int_cst (unsigned_type_node, 0);
653 16263 : flags = build_int_cst (unsigned_type_node, 0);
654 :
655 16263 : c = omp_find_clause (clauses, OMP_CLAUSE_IF);
656 16263 : if (c)
657 948 : cond = OMP_CLAUSE_IF_EXPR (c);
658 :
659 16263 : c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
660 16263 : if (c)
661 : {
662 2281 : val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
663 2281 : clause_loc = OMP_CLAUSE_LOCATION (c);
664 : }
665 : else
666 13982 : clause_loc = gimple_location (entry_stmt);
667 :
668 16263 : c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
669 16263 : if (c)
670 771 : flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
671 :
672 : /* Ensure 'val' is of the correct type. */
673 16263 : val = fold_convert_loc (clause_loc, unsigned_type_node, val);
674 :
675 : /* If we found the clause 'if (cond)', build either
676 : (cond != 0) or (cond ? val : 1u). */
677 16263 : if (cond)
678 : {
679 948 : cond = gimple_boolify (cond);
680 :
681 948 : if (integer_zerop (val))
682 161 : val = fold_build2_loc (clause_loc,
683 : EQ_EXPR, unsigned_type_node, cond,
684 161 : build_int_cst (TREE_TYPE (cond), 0));
685 : else
686 : {
687 787 : basic_block cond_bb, then_bb, else_bb;
688 787 : edge e, e_then, e_else;
689 787 : tree tmp_then, tmp_else, tmp_join, tmp_var;
690 :
691 787 : tmp_var = create_tmp_var (TREE_TYPE (val));
692 787 : if (gimple_in_ssa_p (cfun))
693 : {
694 0 : tmp_then = make_ssa_name (tmp_var);
695 0 : tmp_else = make_ssa_name (tmp_var);
696 0 : tmp_join = make_ssa_name (tmp_var);
697 : }
698 : else
699 : {
700 : tmp_then = tmp_var;
701 : tmp_else = tmp_var;
702 : tmp_join = tmp_var;
703 : }
704 :
705 787 : e = split_block_after_labels (bb);
706 787 : cond_bb = e->src;
707 787 : bb = e->dest;
708 787 : remove_edge (e);
709 :
710 787 : then_bb = create_empty_bb (cond_bb);
711 787 : else_bb = create_empty_bb (then_bb);
712 787 : set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
713 787 : set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
714 :
715 787 : stmt = gimple_build_cond_empty (cond);
716 787 : gsi = gsi_start_bb (cond_bb);
717 787 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
718 :
719 787 : gsi = gsi_start_bb (then_bb);
720 787 : expand_omp_build_assign (&gsi, tmp_then, val, true);
721 :
722 787 : gsi = gsi_start_bb (else_bb);
723 787 : expand_omp_build_assign (&gsi, tmp_else,
724 : build_int_cst (unsigned_type_node, 1),
725 : true);
726 :
727 787 : make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
728 787 : make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
729 787 : add_bb_to_loop (then_bb, cond_bb->loop_father);
730 787 : add_bb_to_loop (else_bb, cond_bb->loop_father);
731 787 : e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
732 787 : e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
733 :
734 787 : if (gimple_in_ssa_p (cfun))
735 : {
736 0 : gphi *phi = create_phi_node (tmp_join, bb);
737 0 : add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
738 0 : add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
739 : }
740 :
741 787 : val = tmp_join;
742 : }
743 :
744 948 : gsi = gsi_start_bb (bb);
745 948 : val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
746 : false, GSI_CONTINUE_LINKING);
747 : }
748 :
749 16263 : gsi = gsi_last_nondebug_bb (bb);
750 16263 : t = gimple_omp_parallel_data_arg (entry_stmt);
751 16263 : if (t == NULL)
752 2400 : t1 = null_pointer_node;
753 : else
754 13863 : t1 = build_fold_addr_expr (t);
755 16263 : tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
756 16263 : t2 = build_fold_addr_expr (child_fndecl);
757 :
758 17459 : vec_alloc (args, 4 + vec_safe_length (ws_args));
759 16263 : args->quick_push (t2);
760 16263 : args->quick_push (t1);
761 16263 : args->quick_push (val);
762 16263 : if (ws_args)
763 1196 : args->splice (*ws_args);
764 16263 : args->quick_push (flags);
765 :
766 16263 : t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
767 : builtin_decl_explicit (start_ix), args);
768 :
769 16263 : if (rtmp)
770 : {
771 60 : tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
772 60 : t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
773 : fold_convert (type,
774 : fold_convert (pointer_sized_int_node, t)));
775 : }
776 16263 : force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
777 : false, GSI_CONTINUE_LINKING);
778 16263 : }
779 :
780 : /* Build the function call to GOMP_task to actually
781 : generate the task operation. BB is the block where to insert the code. */
782 :
783 : static void
784 3777 : expand_task_call (struct omp_region *region, basic_block bb,
785 : gomp_task *entry_stmt)
786 : {
787 3777 : tree t1, t2, t3;
788 3777 : gimple_stmt_iterator gsi;
789 3777 : location_t loc = gimple_location (entry_stmt);
790 :
791 3777 : tree clauses = gimple_omp_task_clauses (entry_stmt);
792 :
793 3777 : tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
794 3777 : tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
795 3777 : tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
796 3777 : tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
797 3777 : tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
798 3777 : tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
799 3777 : tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
800 :
801 7554 : unsigned int iflags
802 3777 : = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
803 3777 : | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
804 3777 : | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
805 :
806 3777 : bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
807 3777 : tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
808 3777 : tree num_tasks = NULL_TREE;
809 3777 : bool ull = false;
810 3777 : if (taskloop_p)
811 : {
812 1330 : gimple *g = last_nondebug_stmt (region->outer->entry);
813 1330 : gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
814 : && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
815 1330 : struct omp_for_data fd;
816 1330 : omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
817 1330 : startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
818 1330 : endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
819 : OMP_CLAUSE__LOOPTEMP_);
820 1330 : startvar = OMP_CLAUSE_DECL (startvar);
821 1330 : endvar = OMP_CLAUSE_DECL (endvar);
822 1330 : step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
823 1330 : if (fd.loop.cond_code == LT_EXPR)
824 1278 : iflags |= GOMP_TASK_FLAG_UP;
825 1330 : tree tclauses = gimple_omp_for_clauses (g);
826 1330 : num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
827 1330 : if (num_tasks)
828 : {
829 176 : if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
830 3 : iflags |= GOMP_TASK_FLAG_STRICT;
831 176 : num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
832 : }
833 : else
834 : {
835 1154 : num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
836 1154 : if (num_tasks)
837 : {
838 220 : iflags |= GOMP_TASK_FLAG_GRAINSIZE;
839 220 : if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
840 3 : iflags |= GOMP_TASK_FLAG_STRICT;
841 220 : num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
842 : }
843 : else
844 934 : num_tasks = integer_zero_node;
845 : }
846 1330 : num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
847 1330 : if (ifc == NULL_TREE)
848 959 : iflags |= GOMP_TASK_FLAG_IF;
849 1330 : if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
850 57 : iflags |= GOMP_TASK_FLAG_NOGROUP;
851 1330 : ull = fd.iter_type == long_long_unsigned_type_node;
852 1330 : if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
853 497 : iflags |= GOMP_TASK_FLAG_REDUCTION;
854 : }
855 : else
856 : {
857 2447 : if (priority)
858 18 : iflags |= GOMP_TASK_FLAG_PRIORITY;
859 2447 : if (detach)
860 166 : iflags |= GOMP_TASK_FLAG_DETACH;
861 : }
862 :
863 3777 : tree flags = build_int_cst (unsigned_type_node, iflags);
864 :
865 3777 : tree cond = boolean_true_node;
866 3777 : if (ifc)
867 : {
868 484 : if (taskloop_p)
869 : {
870 371 : tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
871 371 : t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
872 : build_int_cst (unsigned_type_node,
873 : GOMP_TASK_FLAG_IF),
874 : build_int_cst (unsigned_type_node, 0));
875 371 : flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
876 : flags, t);
877 : }
878 : else
879 113 : cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
880 : }
881 :
882 3777 : if (finalc)
883 : {
884 410 : tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
885 410 : t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
886 : build_int_cst (unsigned_type_node,
887 : GOMP_TASK_FLAG_FINAL),
888 : build_int_cst (unsigned_type_node, 0));
889 410 : flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
890 : }
891 3777 : if (depend)
892 1104 : depend = OMP_CLAUSE_DECL (depend);
893 : else
894 2673 : depend = build_int_cst (ptr_type_node, 0);
895 3777 : if (priority)
896 372 : priority = fold_convert (integer_type_node,
897 : OMP_CLAUSE_PRIORITY_EXPR (priority));
898 : else
899 3405 : priority = integer_zero_node;
900 :
901 3777 : gsi = gsi_last_nondebug_bb (bb);
902 :
903 3611 : detach = (detach
904 3943 : ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
905 : : null_pointer_node);
906 :
907 3777 : tree t = gimple_omp_task_data_arg (entry_stmt);
908 3777 : if (t == NULL)
909 686 : t2 = null_pointer_node;
910 : else
911 3091 : t2 = build_fold_addr_expr_loc (loc, t);
912 3777 : t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
913 3777 : t = gimple_omp_task_copy_fn (entry_stmt);
914 3777 : if (t == NULL)
915 3273 : t3 = null_pointer_node;
916 : else
917 504 : t3 = build_fold_addr_expr_loc (loc, t);
918 :
919 3777 : if (taskloop_p)
920 2660 : t = build_call_expr (ull
921 42 : ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
922 1288 : : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
923 : 11, t1, t2, t3,
924 : gimple_omp_task_arg_size (entry_stmt),
925 : gimple_omp_task_arg_align (entry_stmt), flags,
926 : num_tasks, priority, startvar, endvar, step);
927 : else
928 2447 : t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
929 : 10, t1, t2, t3,
930 : gimple_omp_task_arg_size (entry_stmt),
931 : gimple_omp_task_arg_align (entry_stmt), cond, flags,
932 : depend, priority, detach);
933 :
934 3777 : force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
935 : false, GSI_CONTINUE_LINKING);
936 3777 : }
937 :
938 : /* Build the function call to GOMP_taskwait_depend to actually
939 : generate the taskwait operation. BB is the block where to insert the
940 : code. */
941 :
942 : static void
943 83 : expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
944 : {
945 83 : tree clauses = gimple_omp_task_clauses (entry_stmt);
946 83 : tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
947 83 : if (depend == NULL_TREE)
948 0 : return;
949 :
950 83 : depend = OMP_CLAUSE_DECL (depend);
951 :
952 83 : bool nowait = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT) != NULL_TREE;
953 83 : gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
954 50 : enum built_in_function f = (nowait
955 83 : ? BUILT_IN_GOMP_TASKWAIT_DEPEND_NOWAIT
956 : : BUILT_IN_GOMP_TASKWAIT_DEPEND);
957 83 : tree t = build_call_expr (builtin_decl_explicit (f), 1, depend);
958 :
959 83 : force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
960 : false, GSI_CONTINUE_LINKING);
961 : }
962 :
963 : /* Build the function call to GOMP_teams_reg to actually
964 : generate the host teams operation. REGION is the teams region
965 : being expanded. BB is the block where to insert the code. */
966 :
967 : static void
968 2496 : expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
969 : {
970 2496 : tree clauses = gimple_omp_teams_clauses (entry_stmt);
971 2496 : tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
972 2496 : if (num_teams == NULL_TREE)
973 2230 : num_teams = build_int_cst (unsigned_type_node, 0);
974 : else
975 : {
976 266 : num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
977 266 : num_teams = fold_convert (unsigned_type_node, num_teams);
978 : }
979 2496 : tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
980 2496 : if (thread_limit == NULL_TREE)
981 2355 : thread_limit = build_int_cst (unsigned_type_node, 0);
982 : else
983 : {
984 141 : thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
985 141 : thread_limit = fold_convert (unsigned_type_node, thread_limit);
986 : }
987 :
988 2496 : gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
989 2496 : tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
990 2496 : if (t == NULL)
991 1374 : t1 = null_pointer_node;
992 : else
993 1122 : t1 = build_fold_addr_expr (t);
994 2496 : tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
995 2496 : tree t2 = build_fold_addr_expr (child_fndecl);
996 :
997 2496 : vec<tree, va_gc> *args;
998 2496 : vec_alloc (args, 5);
999 2496 : args->quick_push (t2);
1000 2496 : args->quick_push (t1);
1001 2496 : args->quick_push (num_teams);
1002 2496 : args->quick_push (thread_limit);
1003 : /* For future extensibility. */
1004 2496 : args->quick_push (build_zero_cst (unsigned_type_node));
1005 :
1006 2496 : t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
1007 : builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
1008 : args);
1009 :
1010 2496 : force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1011 : false, GSI_CONTINUE_LINKING);
1012 2496 : }
1013 :
1014 : /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
1015 :
1016 : static tree
1017 43252 : vec2chain (vec<tree, va_gc> *v)
1018 : {
1019 43252 : tree chain = NULL_TREE, t;
1020 43252 : unsigned ix;
1021 :
1022 441253 : FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
1023 : {
1024 360205 : DECL_CHAIN (t) = chain;
1025 360205 : chain = t;
1026 : }
1027 :
1028 43252 : return chain;
1029 : }
1030 :
1031 : /* Remove barriers in REGION->EXIT's block. Note that this is only
1032 : valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1033 : is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1034 : left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1035 : removed. */
1036 :
1037 : static void
1038 11811 : remove_exit_barrier (struct omp_region *region)
1039 : {
1040 11811 : gimple_stmt_iterator gsi;
1041 11811 : basic_block exit_bb;
1042 11811 : edge_iterator ei;
1043 11811 : edge e;
1044 11811 : gimple *stmt;
1045 11811 : int any_addressable_vars = -1;
1046 :
1047 11811 : exit_bb = region->exit;
1048 :
1049 : /* If the parallel region doesn't return, we don't have REGION->EXIT
1050 : block at all. */
1051 11811 : if (! exit_bb)
1052 1196 : return;
1053 :
1054 : /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1055 : workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1056 : statements that can appear in between are extremely limited -- no
1057 : memory operations at all. Here, we allow nothing at all, so the
1058 : only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1059 11784 : gsi = gsi_last_nondebug_bb (exit_bb);
1060 11784 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1061 11784 : gsi_prev_nondebug (&gsi);
1062 11784 : if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1063 : return;
1064 :
1065 21593 : FOR_EACH_EDGE (e, ei, exit_bb->preds)
1066 : {
1067 10978 : gsi = gsi_last_nondebug_bb (e->src);
1068 10978 : if (gsi_end_p (gsi))
1069 43 : continue;
1070 10935 : stmt = gsi_stmt (gsi);
1071 10935 : if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1072 10935 : && !gimple_omp_return_nowait_p (stmt))
1073 : {
1074 : /* OpenMP 3.0 tasks unfortunately prevent this optimization
1075 : in many cases. If there could be tasks queued, the barrier
1076 : might be needed to let the tasks run before some local
1077 : variable of the parallel that the task uses as shared
1078 : runs out of scope. The task can be spawned either
1079 : from within current function (this would be easy to check)
1080 : or from some function it calls and gets passed an address
1081 : of such a variable. */
1082 1040 : if (any_addressable_vars < 0)
1083 : {
1084 1040 : gomp_parallel *parallel_stmt
1085 1040 : = as_a <gomp_parallel *> (last_nondebug_stmt (region->entry));
1086 1040 : tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1087 1040 : tree local_decls, block, decl;
1088 1040 : unsigned ix;
1089 :
1090 1040 : any_addressable_vars = 0;
1091 8490 : FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1092 7020 : if (TREE_ADDRESSABLE (decl))
1093 : {
1094 : any_addressable_vars = 1;
1095 : break;
1096 : }
1097 1041 : for (block = gimple_block (stmt);
1098 1041 : !any_addressable_vars
1099 1041 : && block
1100 1041 : && TREE_CODE (block) == BLOCK;
1101 1 : block = BLOCK_SUPERCONTEXT (block))
1102 : {
1103 2 : for (local_decls = BLOCK_VARS (block);
1104 2 : local_decls;
1105 0 : local_decls = DECL_CHAIN (local_decls))
1106 0 : if (TREE_ADDRESSABLE (local_decls))
1107 : {
1108 : any_addressable_vars = 1;
1109 : break;
1110 : }
1111 2 : if (block == gimple_block (parallel_stmt))
1112 : break;
1113 : }
1114 : }
1115 1040 : if (!any_addressable_vars)
1116 512 : gimple_omp_return_set_nowait (stmt);
1117 : }
1118 : }
1119 : }
1120 :
1121 : static void
1122 68423 : remove_exit_barriers (struct omp_region *region)
1123 : {
1124 68423 : if (region->type == GIMPLE_OMP_PARALLEL)
1125 11811 : remove_exit_barrier (region);
1126 :
1127 68423 : if (region->inner)
1128 : {
1129 37837 : region = region->inner;
1130 37837 : remove_exit_barriers (region);
1131 81943 : while (region->next)
1132 : {
1133 6269 : region = region->next;
1134 6269 : remove_exit_barriers (region);
1135 : }
1136 : }
1137 68423 : }
1138 :
1139 : /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1140 : calls. These can't be declared as const functions, but
1141 : within one parallel body they are constant, so they can be
1142 : transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1143 : which are declared const. Similarly for task body, except
1144 : that in untied task omp_get_thread_num () can change at any task
1145 : scheduling point. */
1146 :
1147 : static void
1148 16899 : optimize_omp_library_calls (gimple *entry_stmt)
1149 : {
1150 16899 : basic_block bb;
1151 16899 : gimple_stmt_iterator gsi;
1152 16899 : tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1153 16899 : tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1154 16899 : tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1155 16899 : tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1156 16899 : bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1157 19375 : && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1158 16899 : OMP_CLAUSE_UNTIED) != NULL);
1159 :
1160 294475 : FOR_EACH_BB_FN (bb, cfun)
1161 1544850 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1162 : {
1163 989698 : gimple *call = gsi_stmt (gsi);
1164 989698 : tree decl;
1165 :
1166 989698 : if (is_gimple_call (call)
1167 83547 : && (decl = gimple_call_fndecl (call))
1168 76222 : && DECL_EXTERNAL (decl)
1169 64490 : && TREE_PUBLIC (decl)
1170 1054188 : && DECL_INITIAL (decl) == NULL)
1171 : {
1172 64489 : tree built_in;
1173 :
1174 64489 : if (DECL_NAME (decl) == thr_num_id)
1175 : {
1176 : /* In #pragma omp task untied omp_get_thread_num () can change
1177 : during the execution of the task region. */
1178 1308 : if (untied_task)
1179 0 : continue;
1180 1308 : built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1181 : }
1182 63181 : else if (DECL_NAME (decl) == num_thr_id)
1183 409 : built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1184 : else
1185 62772 : continue;
1186 :
1187 1717 : if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1188 1717 : || gimple_call_num_args (call) != 0)
1189 1188 : continue;
1190 :
1191 529 : if (flag_exceptions && !TREE_NOTHROW (decl))
1192 0 : continue;
1193 :
1194 529 : if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1195 1058 : || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1196 529 : TREE_TYPE (TREE_TYPE (built_in))))
1197 0 : continue;
1198 :
1199 529 : gimple_call_set_fndecl (call, built_in);
1200 : }
1201 : }
1202 16899 : }
1203 :
1204 : /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1205 : regimplified. */
1206 :
1207 : static tree
1208 263243 : expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1209 : {
1210 263243 : tree t = *tp;
1211 :
1212 : /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1213 263243 : if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1214 : return t;
1215 :
1216 263147 : if (TREE_CODE (t) == ADDR_EXPR)
1217 1763 : recompute_tree_invariant_for_addr_expr (t);
1218 :
1219 263147 : *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1220 263147 : return NULL_TREE;
1221 : }
1222 :
1223 : /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1224 :
1225 : static void
1226 64787 : expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1227 : bool after)
1228 : {
1229 64787 : bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1230 64787 : from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1231 64787 : !after, after ? GSI_CONTINUE_LINKING
1232 : : GSI_SAME_STMT);
1233 64787 : gimple *stmt = gimple_build_assign (to, from);
1234 64787 : if (after)
1235 3493 : gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1236 : else
1237 61294 : gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1238 64787 : if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1239 64787 : || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1240 : {
1241 80 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1242 80 : gimple_regimplify_operands (stmt, &gsi);
1243 : }
1244 64787 : }
1245 :
1246 : /* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
1247 :
1248 : static gcond *
1249 8434 : expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1250 : tree lhs, tree rhs, bool after = false)
1251 : {
1252 8434 : gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1253 8434 : if (after)
1254 236 : gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1255 : else
1256 8198 : gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1257 8434 : if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1258 : NULL, NULL)
1259 8434 : || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1260 : NULL, NULL))
1261 : {
1262 6 : gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1263 6 : gimple_regimplify_operands (cond_stmt, &gsi);
1264 : }
1265 8434 : return cond_stmt;
1266 : }
1267 :
1268 : /* Expand the OpenMP parallel or task directive starting at REGION. */
1269 :
1270 : static void
1271 22619 : expand_omp_taskreg (struct omp_region *region)
1272 : {
1273 22619 : basic_block entry_bb, exit_bb, new_bb;
1274 22619 : struct function *child_cfun;
1275 22619 : tree child_fn, block, t;
1276 22619 : gimple_stmt_iterator gsi;
1277 22619 : gimple *entry_stmt, *stmt;
1278 22619 : edge e;
1279 22619 : vec<tree, va_gc> *ws_args;
1280 :
1281 22619 : entry_stmt = last_nondebug_stmt (region->entry);
1282 22619 : if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1283 22619 : && gimple_omp_task_taskwait_p (entry_stmt))
1284 : {
1285 83 : new_bb = region->entry;
1286 83 : gsi = gsi_last_nondebug_bb (region->entry);
1287 83 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1288 83 : gsi_remove (&gsi, true);
1289 83 : expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1290 83 : return;
1291 : }
1292 :
1293 22536 : child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1294 22536 : child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1295 :
1296 22536 : entry_bb = region->entry;
1297 22536 : if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1298 3777 : exit_bb = region->cont;
1299 : else
1300 18759 : exit_bb = region->exit;
1301 :
1302 22536 : if (is_combined_parallel (region))
1303 1196 : ws_args = region->ws_args;
1304 : else
1305 : ws_args = NULL;
1306 :
1307 22536 : if (child_cfun->cfg)
1308 : {
1309 : /* Due to inlining, it may happen that we have already outlined
1310 : the region, in which case all we need to do is make the
1311 : sub-graph unreachable and emit the parallel call. */
1312 0 : edge entry_succ_e, exit_succ_e;
1313 :
1314 0 : entry_succ_e = single_succ_edge (entry_bb);
1315 :
1316 0 : gsi = gsi_last_nondebug_bb (entry_bb);
1317 0 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1318 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1319 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1320 0 : gsi_remove (&gsi, true);
1321 :
1322 0 : new_bb = entry_bb;
1323 0 : if (exit_bb)
1324 : {
1325 0 : exit_succ_e = single_succ_edge (exit_bb);
1326 0 : make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1327 : }
1328 0 : remove_edge_and_dominated_blocks (entry_succ_e);
1329 : }
1330 : else
1331 : {
1332 22536 : unsigned srcidx, dstidx, num;
1333 :
1334 : /* If the parallel region needs data sent from the parent
1335 : function, then the very first statement (except possible
1336 : tree profile counter updates) of the parallel body
1337 : is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1338 : &.OMP_DATA_O is passed as an argument to the child function,
1339 : we need to replace it with the argument as seen by the child
1340 : function.
1341 :
1342 : In most cases, this will end up being the identity assignment
1343 : .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1344 : a function call that has been inlined, the original PARM_DECL
1345 : .OMP_DATA_I may have been converted into a different local
1346 : variable. In which case, we need to keep the assignment. */
1347 22536 : if (gimple_omp_taskreg_data_arg (entry_stmt))
1348 : {
1349 18076 : basic_block entry_succ_bb
1350 33061 : = single_succ_p (entry_bb) ? single_succ (entry_bb)
1351 3091 : : FALLTHRU_EDGE (entry_bb)->dest;
1352 18076 : tree arg;
1353 18076 : gimple *parcopy_stmt = NULL;
1354 :
1355 36152 : for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1356 : {
1357 18076 : gimple *stmt;
1358 :
1359 18076 : gcc_assert (!gsi_end_p (gsi));
1360 18076 : stmt = gsi_stmt (gsi);
1361 18076 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
1362 0 : continue;
1363 :
1364 18076 : if (gimple_num_ops (stmt) == 2)
1365 : {
1366 18076 : tree arg = gimple_assign_rhs1 (stmt);
1367 :
1368 : /* We're ignore the subcode because we're
1369 : effectively doing a STRIP_NOPS. */
1370 :
1371 18076 : if (TREE_CODE (arg) == ADDR_EXPR
1372 18076 : && (TREE_OPERAND (arg, 0)
1373 18076 : == gimple_omp_taskreg_data_arg (entry_stmt)))
1374 : {
1375 18076 : parcopy_stmt = stmt;
1376 18076 : break;
1377 : }
1378 : }
1379 : }
1380 :
1381 18076 : gcc_assert (parcopy_stmt != NULL);
1382 18076 : arg = DECL_ARGUMENTS (child_fn);
1383 :
1384 18076 : if (!gimple_in_ssa_p (cfun))
1385 : {
1386 17892 : if (gimple_assign_lhs (parcopy_stmt) == arg)
1387 17892 : gsi_remove (&gsi, true);
1388 : else
1389 : {
1390 : /* ?? Is setting the subcode really necessary ?? */
1391 0 : gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1392 0 : gimple_assign_set_rhs1 (parcopy_stmt, arg);
1393 : }
1394 : }
1395 : else
1396 : {
1397 184 : tree lhs = gimple_assign_lhs (parcopy_stmt);
1398 184 : gcc_assert (SSA_NAME_VAR (lhs) == arg);
1399 : /* We'd like to set the rhs to the default def in the child_fn,
1400 : but it's too early to create ssa names in the child_fn.
1401 : Instead, we set the rhs to the parm. In
1402 : move_sese_region_to_fn, we introduce a default def for the
1403 : parm, map the parm to it's default def, and once we encounter
1404 : this stmt, replace the parm with the default def. */
1405 184 : gimple_assign_set_rhs1 (parcopy_stmt, arg);
1406 184 : update_stmt (parcopy_stmt);
1407 : }
1408 : }
1409 :
1410 : /* Declare local variables needed in CHILD_CFUN. */
1411 22536 : block = DECL_INITIAL (child_fn);
1412 22536 : BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1413 : /* The gimplifier could record temporaries in parallel/task block
1414 : rather than in containing function's local_decls chain,
1415 : which would mean cgraph missed finalizing them. Do it now. */
1416 252762 : for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1417 230226 : if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1418 0 : varpool_node::finalize_decl (t);
1419 22536 : DECL_SAVED_TREE (child_fn) = NULL;
1420 : /* We'll create a CFG for child_fn, so no gimple body is needed. */
1421 22536 : gimple_set_body (child_fn, NULL);
1422 22536 : TREE_USED (block) = 1;
1423 :
1424 : /* Reset DECL_CONTEXT on function arguments. */
1425 45072 : for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1426 22536 : DECL_CONTEXT (t) = child_fn;
1427 :
1428 : /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1429 : so that it can be moved to the child function. */
1430 22536 : gsi = gsi_last_nondebug_bb (entry_bb);
1431 22536 : stmt = gsi_stmt (gsi);
1432 22536 : gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1433 : || gimple_code (stmt) == GIMPLE_OMP_TASK
1434 : || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1435 22536 : e = split_block (entry_bb, stmt);
1436 22536 : gsi_remove (&gsi, true);
1437 22536 : entry_bb = e->dest;
1438 22536 : edge e2 = NULL;
1439 22536 : if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1440 18759 : single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1441 : else
1442 : {
1443 3777 : e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1444 3777 : gcc_assert (e2->dest == region->exit);
1445 3777 : remove_edge (BRANCH_EDGE (entry_bb));
1446 3777 : set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1447 3777 : gsi = gsi_last_nondebug_bb (region->exit);
1448 3777 : gcc_assert (!gsi_end_p (gsi)
1449 : && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1450 3777 : gsi_remove (&gsi, true);
1451 : }
1452 :
1453 : /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1454 22536 : if (exit_bb)
1455 : {
1456 22492 : gsi = gsi_last_nondebug_bb (exit_bb);
1457 41224 : gcc_assert (!gsi_end_p (gsi)
1458 : && (gimple_code (gsi_stmt (gsi))
1459 : == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1460 22492 : stmt = gimple_build_return (NULL);
1461 22492 : gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1462 22492 : gsi_remove (&gsi, true);
1463 : }
1464 :
1465 : /* Move the parallel region into CHILD_CFUN. */
1466 :
1467 22536 : if (gimple_in_ssa_p (cfun))
1468 : {
1469 196 : init_tree_ssa (child_cfun);
1470 196 : init_ssa_operands (child_cfun);
1471 196 : child_cfun->gimple_df->in_ssa_p = true;
1472 196 : block = NULL_TREE;
1473 : }
1474 : else
1475 22340 : block = gimple_block (entry_stmt);
1476 :
1477 22536 : new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1478 22536 : if (exit_bb)
1479 22492 : single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1480 22536 : if (e2)
1481 : {
1482 3777 : basic_block dest_bb = e2->dest;
1483 3777 : if (!exit_bb)
1484 17 : make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1485 3777 : remove_edge (e2);
1486 3777 : set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1487 : }
1488 : /* When the OMP expansion process cannot guarantee an up-to-date
1489 : loop tree arrange for the child function to fixup loops. */
1490 22536 : if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1491 22340 : child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1492 :
1493 : /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1494 22536 : num = vec_safe_length (child_cfun->local_decls);
1495 1054858 : for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1496 : {
1497 1032322 : t = (*child_cfun->local_decls)[srcidx];
1498 1032322 : if (DECL_CONTEXT (t) == cfun->decl)
1499 230226 : continue;
1500 802096 : if (srcidx != dstidx)
1501 798455 : (*child_cfun->local_decls)[dstidx] = t;
1502 802096 : dstidx++;
1503 : }
1504 22536 : if (dstidx != num)
1505 20387 : vec_safe_truncate (child_cfun->local_decls, dstidx);
1506 :
1507 : /* Inform the callgraph about the new function. */
1508 22536 : child_cfun->curr_properties = cfun->curr_properties;
1509 22536 : child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1510 22536 : child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1511 22536 : cgraph_node *node = cgraph_node::get_create (child_fn);
1512 22536 : node->parallelized_function = 1;
1513 45072 : node->has_omp_variant_constructs
1514 22536 : |= cgraph_node::get (cfun->decl)->has_omp_variant_constructs;
1515 22536 : cgraph_node::add_new_function (child_fn, true);
1516 :
1517 22536 : bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1518 22536 : && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1519 :
1520 : /* Fix the callgraph edges for child_cfun. Those for cfun will be
1521 : fixed in a following pass. */
1522 22536 : push_cfun (child_cfun);
1523 22536 : if (need_asm)
1524 22340 : assign_assembler_name_if_needed (child_fn);
1525 :
1526 22536 : if (optimize)
1527 16899 : optimize_omp_library_calls (entry_stmt);
1528 22536 : update_max_bb_count ();
1529 22536 : cgraph_edge::rebuild_edges ();
1530 :
1531 : /* Some EH regions might become dead, see PR34608. If
1532 : pass_cleanup_cfg isn't the first pass to happen with the
1533 : new child, these dead EH edges might cause problems.
1534 : Clean them up now. */
1535 22536 : if (flag_exceptions)
1536 : {
1537 10102 : basic_block bb;
1538 10102 : bool changed = false;
1539 :
1540 134610 : FOR_EACH_BB_FN (bb, cfun)
1541 124508 : changed |= gimple_purge_dead_eh_edges (bb);
1542 10102 : if (changed)
1543 0 : cleanup_tree_cfg ();
1544 : }
1545 22536 : if (gimple_in_ssa_p (cfun))
1546 196 : update_ssa (TODO_update_ssa);
1547 22536 : if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1548 196 : verify_loop_structure ();
1549 22536 : pop_cfun ();
1550 :
1551 22536 : if (dump_file && !gimple_in_ssa_p (cfun))
1552 : {
1553 20 : omp_any_child_fn_dumped = true;
1554 20 : dump_function_header (dump_file, child_fn, dump_flags);
1555 20 : dump_function_to_file (child_fn, dump_file, dump_flags);
1556 : }
1557 : }
1558 :
1559 22536 : adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1560 :
1561 22536 : if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1562 16263 : expand_parallel_call (region, new_bb,
1563 : as_a <gomp_parallel *> (entry_stmt), ws_args);
1564 6273 : else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1565 2496 : expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1566 : else
1567 3777 : expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1568 : }
1569 :
1570 : /* Information about members of an OpenACC collapsed loop nest. */
1571 :
1572 : struct oacc_collapse
1573 : {
1574 : tree base; /* Base value. */
1575 : tree iters; /* Number of steps. */
1576 : tree step; /* Step size. */
1577 : tree tile; /* Tile increment (if tiled). */
1578 : tree outer; /* Tile iterator var. */
1579 : };
1580 :
1581 : /* Helper for expand_oacc_for. Determine collapsed loop information.
1582 : Fill in COUNTS array. Emit any initialization code before GSI.
1583 : Return the calculated outer loop bound of BOUND_TYPE. */
1584 :
1585 : static tree
1586 594 : expand_oacc_collapse_init (const struct omp_for_data *fd,
1587 : gimple_stmt_iterator *gsi,
1588 : oacc_collapse *counts, tree diff_type,
1589 : tree bound_type, location_t loc)
1590 : {
1591 594 : tree tiling = fd->tiling;
1592 594 : tree total = build_int_cst (bound_type, 1);
1593 594 : int ix;
1594 :
1595 594 : gcc_assert (integer_onep (fd->loop.step));
1596 594 : gcc_assert (integer_zerop (fd->loop.n1));
1597 :
1598 : /* When tiling, the first operand of the tile clause applies to the
1599 : innermost loop, and we work outwards from there. Seems
1600 : backwards, but whatever. */
1601 1890 : for (ix = fd->collapse; ix--;)
1602 : {
1603 1296 : const omp_for_data_loop *loop = &fd->loops[ix];
1604 :
1605 1296 : tree iter_type = TREE_TYPE (loop->v);
1606 1296 : tree plus_type = iter_type;
1607 :
1608 1296 : gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1609 :
1610 1296 : if (POINTER_TYPE_P (iter_type))
1611 0 : plus_type = sizetype;
1612 :
1613 1296 : if (tiling)
1614 : {
1615 284 : tree num = build_int_cst (integer_type_node, fd->collapse);
1616 284 : tree loop_no = build_int_cst (integer_type_node, ix);
1617 284 : tree tile = TREE_VALUE (tiling);
1618 284 : gcall *call
1619 284 : = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1620 : /* gwv-outer=*/integer_zero_node,
1621 : /* gwv-inner=*/integer_zero_node);
1622 :
1623 284 : counts[ix].outer = create_tmp_var (iter_type, ".outer");
1624 284 : counts[ix].tile = create_tmp_var (diff_type, ".tile");
1625 284 : gimple_call_set_lhs (call, counts[ix].tile);
1626 284 : gimple_set_location (call, loc);
1627 284 : gsi_insert_before (gsi, call, GSI_SAME_STMT);
1628 :
1629 284 : tiling = TREE_CHAIN (tiling);
1630 : }
1631 : else
1632 : {
1633 1012 : counts[ix].tile = NULL;
1634 1012 : counts[ix].outer = loop->v;
1635 : }
1636 :
1637 1296 : tree b = loop->n1;
1638 1296 : tree e = loop->n2;
1639 1296 : tree s = loop->step;
1640 1296 : bool up = loop->cond_code == LT_EXPR;
1641 1344 : tree dir = build_int_cst (diff_type, up ? +1 : -1);
1642 1296 : bool negating;
1643 1296 : tree expr;
1644 :
1645 1296 : b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1646 : true, GSI_SAME_STMT);
1647 1296 : e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1648 : true, GSI_SAME_STMT);
1649 :
1650 : /* Convert the step, avoiding possible unsigned->signed overflow. */
1651 1296 : negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1652 0 : if (negating)
1653 0 : s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1654 1296 : s = fold_convert (diff_type, s);
1655 1296 : if (negating)
1656 0 : s = fold_build1 (NEGATE_EXPR, diff_type, s);
1657 1296 : s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1658 : true, GSI_SAME_STMT);
1659 :
1660 : /* Determine the range, avoiding possible unsigned->signed overflow. */
1661 1296 : negating = !up && TYPE_UNSIGNED (iter_type);
1662 2592 : expr = fold_build2 (MINUS_EXPR, plus_type,
1663 : fold_convert (plus_type, negating ? b : e),
1664 : fold_convert (plus_type, negating ? e : b));
1665 1296 : expr = fold_convert (diff_type, expr);
1666 1296 : if (negating)
1667 0 : expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1668 1296 : tree range = force_gimple_operand_gsi
1669 1296 : (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1670 :
1671 : /* Determine number of iterations. */
1672 1296 : expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1673 1296 : expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1674 1296 : expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1675 :
1676 1296 : tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1677 : true, GSI_SAME_STMT);
1678 :
1679 1296 : counts[ix].base = b;
1680 1296 : counts[ix].iters = iters;
1681 1296 : counts[ix].step = s;
1682 :
1683 1296 : total = fold_build2 (MULT_EXPR, bound_type, total,
1684 : fold_convert (bound_type, iters));
1685 : }
1686 :
1687 594 : return total;
1688 : }
1689 :
1690 : /* Emit initializers for collapsed loop members. INNER is true if
1691 : this is for the element loop of a TILE. IVAR is the outer
1692 : loop iteration variable, from which collapsed loop iteration values
1693 : are calculated. COUNTS array has been initialized by
1694 : expand_oacc_collapse_inits. */
1695 :
1696 : static void
1697 771 : expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1698 : gimple_stmt_iterator *gsi,
1699 : const oacc_collapse *counts, tree ivar,
1700 : tree diff_type)
1701 : {
1702 771 : tree ivar_type = TREE_TYPE (ivar);
1703 :
1704 : /* The most rapidly changing iteration variable is the innermost
1705 : one. */
1706 2351 : for (int ix = fd->collapse; ix--;)
1707 : {
1708 1580 : const omp_for_data_loop *loop = &fd->loops[ix];
1709 1580 : const oacc_collapse *collapse = &counts[ix];
1710 1580 : tree v = inner ? loop->v : collapse->outer;
1711 1580 : tree iter_type = TREE_TYPE (v);
1712 1580 : tree plus_type = iter_type;
1713 1580 : enum tree_code plus_code = PLUS_EXPR;
1714 1580 : tree expr;
1715 :
1716 1580 : if (POINTER_TYPE_P (iter_type))
1717 : {
1718 0 : plus_code = POINTER_PLUS_EXPR;
1719 0 : plus_type = sizetype;
1720 : }
1721 :
1722 1580 : expr = ivar;
1723 1580 : if (ix)
1724 : {
1725 809 : tree mod = fold_convert (ivar_type, collapse->iters);
1726 809 : ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1727 809 : expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1728 809 : ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1729 : true, GSI_SAME_STMT);
1730 : }
1731 :
1732 1580 : expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1733 : fold_convert (diff_type, collapse->step));
1734 1580 : expr = fold_build2 (plus_code, iter_type,
1735 : inner ? collapse->outer : collapse->base,
1736 : fold_convert (plus_type, expr));
1737 1580 : expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1738 : true, GSI_SAME_STMT);
1739 1580 : gassign *ass = gimple_build_assign (v, expr);
1740 1580 : gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1741 : }
1742 771 : }
1743 :
1744 : /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1745 : of the combined collapse > 1 loop constructs, generate code like:
1746 : if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1747 : if (cond3 is <)
1748 : adj = STEP3 - 1;
1749 : else
1750 : adj = STEP3 + 1;
1751 : count3 = (adj + N32 - N31) / STEP3;
1752 : if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1753 : if (cond2 is <)
1754 : adj = STEP2 - 1;
1755 : else
1756 : adj = STEP2 + 1;
1757 : count2 = (adj + N22 - N21) / STEP2;
1758 : if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1759 : if (cond1 is <)
1760 : adj = STEP1 - 1;
1761 : else
1762 : adj = STEP1 + 1;
1763 : count1 = (adj + N12 - N11) / STEP1;
1764 : count = count1 * count2 * count3;
1765 : Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1766 : count = 0;
1767 : and set ZERO_ITER_BB to that bb. If this isn't the outermost
1768 : of the combined loop constructs, just initialize COUNTS array
1769 : from the _looptemp_ clauses. For loop nests with non-rectangular
1770 : loops, do this only for the rectangular loops. Then pick
1771 : the loops which reference outer vars in their bound expressions
1772 : and the loops which they refer to and for this sub-nest compute
1773 : number of iterations. For triangular loops use Faulhaber's formula,
1774 : otherwise as a fallback, compute by iterating the loops.
1775 : If e.g. the sub-nest is
1776 : for (I = N11; I COND1 N12; I += STEP1)
1777 : for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1778 : for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1779 : do:
1780 : COUNT = 0;
1781 : for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1782 : for (tmpj = M21 * tmpi + N21;
1783 : tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1784 : {
1785 : int tmpk1 = M31 * tmpj + N31;
1786 : int tmpk2 = M32 * tmpj + N32;
1787 : if (tmpk1 COND3 tmpk2)
1788 : {
1789 : if (COND3 is <)
1790 : adj = STEP3 - 1;
1791 : else
1792 : adj = STEP3 + 1;
1793 : COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1794 : }
1795 : }
1796 : and finally multiply the counts of the rectangular loops not
1797 : in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1798 : store number of iterations of the loops from fd->first_nonrect
1799 : to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1800 : by the counts of rectangular loops not referenced in any non-rectangular
1801 : loops sandwitched in between those. */
1802 :
1803 : /* NOTE: It *could* be better to moosh all of the BBs together,
1804 : creating one larger BB with all the computation and the unexpected
1805 : jump at the end. I.e.
1806 :
1807 : bool zero3, zero2, zero1, zero;
1808 :
1809 : zero3 = N32 c3 N31;
1810 : count3 = (N32 - N31) /[cl] STEP3;
1811 : zero2 = N22 c2 N21;
1812 : count2 = (N22 - N21) /[cl] STEP2;
1813 : zero1 = N12 c1 N11;
1814 : count1 = (N12 - N11) /[cl] STEP1;
1815 : zero = zero3 || zero2 || zero1;
1816 : count = count1 * count2 * count3;
1817 : if (__builtin_expect(zero, false)) goto zero_iter_bb;
1818 :
1819 : After all, we expect the zero=false, and thus we expect to have to
1820 : evaluate all of the comparison expressions, so short-circuiting
1821 : oughtn't be a win. Since the condition isn't protecting a
1822 : denominator, we're not concerned about divide-by-zero, so we can
1823 : fully evaluate count even if a numerator turned out to be wrong.
1824 :
1825 : It seems like putting this all together would create much better
1826 : scheduling opportunities, and less pressure on the chip's branch
1827 : predictor. */
1828 :
1829 : static void
1830 10461 : expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1831 : basic_block &entry_bb, tree *counts,
1832 : basic_block &zero_iter1_bb, int &first_zero_iter1,
1833 : basic_block &zero_iter2_bb, int &first_zero_iter2,
1834 : basic_block &l2_dom_bb)
1835 : {
1836 10461 : tree t, type = TREE_TYPE (fd->loop.v);
1837 10461 : edge e, ne;
1838 10461 : int i;
1839 :
1840 : /* Collapsed loops need work for expansion into SSA form. */
1841 10461 : gcc_assert (!gimple_in_ssa_p (cfun));
1842 :
1843 10461 : if (gimple_omp_for_combined_into_p (fd->for_stmt)
1844 10461 : && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1845 : {
1846 2692 : gcc_assert (fd->ordered == 0);
1847 : /* First two _looptemp_ clauses are for istart/iend, counts[0]
1848 : isn't supposed to be handled, as the inner loop doesn't
1849 : use it. */
1850 2692 : tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1851 : OMP_CLAUSE__LOOPTEMP_);
1852 2692 : gcc_assert (innerc);
1853 10553 : for (i = 0; i < fd->collapse; i++)
1854 : {
1855 7861 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1856 : OMP_CLAUSE__LOOPTEMP_);
1857 7861 : gcc_assert (innerc);
1858 7861 : if (i)
1859 5169 : counts[i] = OMP_CLAUSE_DECL (innerc);
1860 : else
1861 2692 : counts[0] = NULL_TREE;
1862 : }
1863 2692 : if (fd->non_rect
1864 112 : && fd->last_nonrect == fd->first_nonrect + 1
1865 2756 : && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1866 : {
1867 : tree c[4];
1868 300 : for (i = 0; i < 4; i++)
1869 : {
1870 240 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1871 : OMP_CLAUSE__LOOPTEMP_);
1872 240 : gcc_assert (innerc);
1873 240 : c[i] = OMP_CLAUSE_DECL (innerc);
1874 : }
1875 60 : counts[0] = c[0];
1876 60 : fd->first_inner_iterations = c[1];
1877 60 : fd->factor = c[2];
1878 60 : fd->adjn1 = c[3];
1879 : }
1880 2692 : return;
1881 : }
1882 :
1883 8662 : for (i = fd->collapse; i < fd->ordered; i++)
1884 : {
1885 893 : tree itype = TREE_TYPE (fd->loops[i].v);
1886 893 : counts[i] = NULL_TREE;
1887 893 : t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1888 : fold_convert (itype, fd->loops[i].n1),
1889 : fold_convert (itype, fd->loops[i].n2));
1890 893 : if (t && integer_zerop (t))
1891 : {
1892 0 : for (i = fd->collapse; i < fd->ordered; i++)
1893 0 : counts[i] = build_int_cst (type, 0);
1894 : break;
1895 : }
1896 : }
1897 7769 : bool rect_count_seen = false;
1898 7769 : bool init_n2 = SSA_VAR_P (fd->loop.n2) && zero_iter1_bb;
1899 30153 : for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1900 : {
1901 22384 : tree itype = TREE_TYPE (fd->loops[i].v);
1902 :
1903 22384 : if (i >= fd->collapse && counts[i])
1904 0 : continue;
1905 22384 : if (fd->non_rect)
1906 : {
1907 : /* Skip loops that use outer iterators in their expressions
1908 : during this phase. */
1909 1050 : if (fd->loops[i].m1 || fd->loops[i].m2)
1910 : {
1911 432 : counts[i] = build_zero_cst (type);
1912 432 : continue;
1913 : }
1914 : }
1915 21952 : if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1916 9514 : && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1917 : fold_convert (itype, fd->loops[i].n1),
1918 : fold_convert (itype, fd->loops[i].n2)))
1919 5909 : == NULL_TREE || !integer_onep (t)))
1920 : {
1921 5789 : gcond *cond_stmt;
1922 5789 : tree n1, n2;
1923 5789 : if (init_n2 && i < fd->collapse && !rect_count_seen)
1924 : {
1925 : /* When called with non-NULL zero_iter1_bb, we won't clear
1926 : fd->loop.n2 in the if (zero_iter_bb == NULL) code below
1927 : and if it is prior to storing fd->loop.n2 where
1928 : rect_count_seen is set, it could be used uninitialized.
1929 : As zero_iter1_bb in that case can be reached also if there
1930 : are non-zero iterations, the clearing can't be emitted
1931 : to the zero_iter1_bb, but needs to be done before the
1932 : condition. */
1933 1505 : gassign *assign_stmt
1934 1505 : = gimple_build_assign (fd->loop.n2, build_zero_cst (type));
1935 1505 : gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1936 1505 : init_n2 = false;
1937 : }
1938 5789 : n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1939 5789 : n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1940 : true, GSI_SAME_STMT);
1941 5789 : n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1942 5789 : n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1943 : true, GSI_SAME_STMT);
1944 5789 : cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1945 : n1, n2);
1946 5789 : e = split_block (entry_bb, cond_stmt);
1947 216 : basic_block &zero_iter_bb
1948 5789 : = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1949 216 : int &first_zero_iter
1950 5789 : = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1951 5789 : if (zero_iter_bb == NULL)
1952 : {
1953 626 : gassign *assign_stmt;
1954 626 : first_zero_iter = i;
1955 626 : zero_iter_bb = create_empty_bb (entry_bb);
1956 626 : add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1957 626 : *gsi = gsi_after_labels (zero_iter_bb);
1958 626 : if (i < fd->collapse)
1959 508 : assign_stmt = gimple_build_assign (fd->loop.n2,
1960 : build_zero_cst (type));
1961 : else
1962 : {
1963 118 : counts[i] = create_tmp_reg (type, ".count");
1964 118 : assign_stmt
1965 118 : = gimple_build_assign (counts[i], build_zero_cst (type));
1966 : }
1967 626 : gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1968 626 : set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1969 : entry_bb);
1970 : }
1971 5789 : ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1972 5789 : ne->probability = profile_probability::very_unlikely ();
1973 5789 : e->flags = EDGE_TRUE_VALUE;
1974 5789 : e->probability = ne->probability.invert ();
1975 5789 : if (l2_dom_bb == NULL)
1976 2591 : l2_dom_bb = entry_bb;
1977 5789 : entry_bb = e->dest;
1978 5789 : *gsi = gsi_last_nondebug_bb (entry_bb);
1979 : }
1980 :
1981 21952 : if (POINTER_TYPE_P (itype))
1982 1661 : itype = signed_type_for (itype);
1983 21952 : t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1984 27056 : ? -1 : 1));
1985 21952 : t = fold_build2 (PLUS_EXPR, itype,
1986 : fold_convert (itype, fd->loops[i].step), t);
1987 21952 : t = fold_build2 (PLUS_EXPR, itype, t,
1988 : fold_convert (itype, fd->loops[i].n2));
1989 21952 : t = fold_build2 (MINUS_EXPR, itype, t,
1990 : fold_convert (itype, fd->loops[i].n1));
1991 : /* ?? We could probably use CEIL_DIV_EXPR instead of
1992 : TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1993 : generate the same code in the end because generically we
1994 : don't know that the values involved must be negative for
1995 : GT?? */
1996 21952 : if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1997 2248 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
1998 : fold_build1 (NEGATE_EXPR, itype, t),
1999 : fold_build1 (NEGATE_EXPR, itype,
2000 : fold_convert (itype,
2001 : fd->loops[i].step)));
2002 : else
2003 19704 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
2004 : fold_convert (itype, fd->loops[i].step));
2005 21952 : t = fold_convert (type, t);
2006 21952 : if (TREE_CODE (t) == INTEGER_CST)
2007 16093 : counts[i] = t;
2008 : else
2009 : {
2010 5859 : if (i < fd->collapse || i != first_zero_iter2)
2011 5741 : counts[i] = create_tmp_reg (type, ".count");
2012 5859 : expand_omp_build_assign (gsi, counts[i], t);
2013 : }
2014 21952 : if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
2015 : {
2016 8621 : if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
2017 376 : continue;
2018 8245 : if (!rect_count_seen)
2019 : {
2020 2995 : t = counts[i];
2021 2995 : rect_count_seen = true;
2022 : }
2023 : else
2024 5250 : t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
2025 8245 : expand_omp_build_assign (gsi, fd->loop.n2, t);
2026 : }
2027 : }
2028 7769 : if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
2029 : {
2030 284 : gcc_assert (fd->last_nonrect != -1);
2031 :
2032 284 : counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
2033 284 : expand_omp_build_assign (gsi, counts[fd->last_nonrect],
2034 : build_zero_cst (type));
2035 366 : for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
2036 113 : if (fd->loops[i].m1
2037 83 : || fd->loops[i].m2
2038 82 : || fd->loops[i].non_rect_referenced)
2039 : break;
2040 284 : if (i == fd->last_nonrect
2041 253 : && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
2042 253 : && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2043 511 : && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
2044 : {
2045 225 : int o = fd->first_nonrect;
2046 225 : tree itype = TREE_TYPE (fd->loops[o].v);
2047 225 : tree n1o = create_tmp_reg (itype, ".n1o");
2048 225 : t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
2049 225 : expand_omp_build_assign (gsi, n1o, t);
2050 225 : tree n2o = create_tmp_reg (itype, ".n2o");
2051 225 : t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2052 225 : expand_omp_build_assign (gsi, n2o, t);
2053 225 : if (fd->loops[i].m1 && fd->loops[i].m2)
2054 43 : t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2055 : unshare_expr (fd->loops[i].m1));
2056 182 : else if (fd->loops[i].m1)
2057 162 : t = fold_build1 (NEGATE_EXPR, itype,
2058 : unshare_expr (fd->loops[i].m1));
2059 : else
2060 20 : t = unshare_expr (fd->loops[i].m2);
2061 225 : tree m2minusm1
2062 225 : = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2063 : true, GSI_SAME_STMT);
2064 :
2065 225 : gimple_stmt_iterator gsi2 = *gsi;
2066 225 : gsi_prev (&gsi2);
2067 225 : e = split_block (entry_bb, gsi_stmt (gsi2));
2068 225 : e = split_block (e->dest, (gimple *) NULL);
2069 225 : basic_block bb1 = e->src;
2070 225 : entry_bb = e->dest;
2071 225 : *gsi = gsi_after_labels (entry_bb);
2072 :
2073 225 : gsi2 = gsi_after_labels (bb1);
2074 225 : tree ostep = fold_convert (itype, fd->loops[o].step);
2075 225 : t = build_int_cst (itype, (fd->loops[o].cond_code
2076 241 : == LT_EXPR ? -1 : 1));
2077 225 : t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2078 225 : t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2079 225 : t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2080 225 : if (TYPE_UNSIGNED (itype)
2081 225 : && fd->loops[o].cond_code == GT_EXPR)
2082 0 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
2083 : fold_build1 (NEGATE_EXPR, itype, t),
2084 : fold_build1 (NEGATE_EXPR, itype, ostep));
2085 : else
2086 225 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2087 225 : tree outer_niters
2088 225 : = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2089 : true, GSI_SAME_STMT);
2090 225 : t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2091 : build_one_cst (itype));
2092 225 : t = fold_build2 (MULT_EXPR, itype, t, ostep);
2093 225 : t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2094 225 : tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2095 : true, GSI_SAME_STMT);
2096 225 : tree n1, n2, n1e, n2e;
2097 225 : t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2098 225 : if (fd->loops[i].m1)
2099 : {
2100 205 : n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2101 205 : n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2102 205 : n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2103 : }
2104 : else
2105 : n1 = t;
2106 225 : n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2107 : true, GSI_SAME_STMT);
2108 225 : t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2109 225 : if (fd->loops[i].m2)
2110 : {
2111 63 : n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2112 63 : n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2113 63 : n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2114 : }
2115 : else
2116 : n2 = t;
2117 225 : n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2118 : true, GSI_SAME_STMT);
2119 225 : t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2120 225 : if (fd->loops[i].m1)
2121 : {
2122 205 : n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2123 205 : n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2124 205 : n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2125 : }
2126 : else
2127 : n1e = t;
2128 225 : n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2129 : true, GSI_SAME_STMT);
2130 225 : t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2131 225 : if (fd->loops[i].m2)
2132 : {
2133 63 : n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2134 63 : n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2135 63 : n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2136 : }
2137 : else
2138 : n2e = t;
2139 225 : n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2140 : true, GSI_SAME_STMT);
2141 225 : gcond *cond_stmt
2142 225 : = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2143 : n1, n2);
2144 225 : e = split_block (bb1, cond_stmt);
2145 225 : e->flags = EDGE_TRUE_VALUE;
2146 225 : e->probability = profile_probability::likely ().guessed ();
2147 225 : basic_block bb2 = e->dest;
2148 225 : gsi2 = gsi_after_labels (bb2);
2149 :
2150 225 : cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2151 : n1e, n2e);
2152 225 : e = split_block (bb2, cond_stmt);
2153 225 : e->flags = EDGE_TRUE_VALUE;
2154 225 : e->probability = profile_probability::likely ().guessed ();
2155 225 : gsi2 = gsi_after_labels (e->dest);
2156 :
2157 225 : tree step = fold_convert (itype, fd->loops[i].step);
2158 225 : t = build_int_cst (itype, (fd->loops[i].cond_code
2159 241 : == LT_EXPR ? -1 : 1));
2160 225 : t = fold_build2 (PLUS_EXPR, itype, step, t);
2161 225 : t = fold_build2 (PLUS_EXPR, itype, t, n2);
2162 225 : t = fold_build2 (MINUS_EXPR, itype, t, n1);
2163 225 : if (TYPE_UNSIGNED (itype)
2164 225 : && fd->loops[i].cond_code == GT_EXPR)
2165 0 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
2166 : fold_build1 (NEGATE_EXPR, itype, t),
2167 : fold_build1 (NEGATE_EXPR, itype, step));
2168 : else
2169 225 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2170 225 : tree first_inner_iterations
2171 225 : = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2172 : true, GSI_SAME_STMT);
2173 225 : t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2174 225 : if (TYPE_UNSIGNED (itype)
2175 225 : && fd->loops[i].cond_code == GT_EXPR)
2176 0 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
2177 : fold_build1 (NEGATE_EXPR, itype, t),
2178 : fold_build1 (NEGATE_EXPR, itype, step));
2179 : else
2180 225 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2181 225 : tree factor
2182 225 : = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2183 : true, GSI_SAME_STMT);
2184 225 : t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2185 : build_one_cst (itype));
2186 225 : t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2187 225 : t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2188 225 : t = fold_build2 (MULT_EXPR, itype, factor, t);
2189 225 : t = fold_build2 (PLUS_EXPR, itype,
2190 : fold_build2 (MULT_EXPR, itype, outer_niters,
2191 : first_inner_iterations), t);
2192 225 : expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2193 : fold_convert (type, t));
2194 :
2195 225 : basic_block bb3 = create_empty_bb (bb1);
2196 225 : add_bb_to_loop (bb3, bb1->loop_father);
2197 :
2198 225 : e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2199 225 : e->probability = profile_probability::unlikely ().guessed ();
2200 :
2201 225 : gsi2 = gsi_after_labels (bb3);
2202 225 : cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2203 : n1e, n2e);
2204 225 : e = split_block (bb3, cond_stmt);
2205 225 : e->flags = EDGE_TRUE_VALUE;
2206 225 : e->probability = profile_probability::likely ().guessed ();
2207 225 : basic_block bb4 = e->dest;
2208 :
2209 225 : ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2210 225 : ne->probability = e->probability.invert ();
2211 :
2212 225 : basic_block bb5 = create_empty_bb (bb2);
2213 225 : add_bb_to_loop (bb5, bb2->loop_father);
2214 :
2215 225 : ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2216 225 : ne->probability = profile_probability::unlikely ().guessed ();
2217 :
2218 675 : for (int j = 0; j < 2; j++)
2219 : {
2220 450 : gsi2 = gsi_after_labels (j ? bb5 : bb4);
2221 450 : t = fold_build2 (MINUS_EXPR, itype,
2222 : unshare_expr (fd->loops[i].n1),
2223 : unshare_expr (fd->loops[i].n2));
2224 450 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2225 450 : tree tem
2226 450 : = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2227 : true, GSI_SAME_STMT);
2228 450 : t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2229 450 : t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2230 450 : t = fold_build2 (MINUS_EXPR, itype, tem, t);
2231 450 : tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2232 : true, GSI_SAME_STMT);
2233 450 : t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2234 450 : if (fd->loops[i].m1)
2235 : {
2236 410 : n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2237 410 : n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2238 410 : n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2239 : }
2240 : else
2241 : n1 = t;
2242 450 : n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2243 : true, GSI_SAME_STMT);
2244 450 : t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2245 450 : if (fd->loops[i].m2)
2246 : {
2247 126 : n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2248 126 : n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2249 126 : n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2250 : }
2251 : else
2252 : n2 = t;
2253 450 : n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2254 : true, GSI_SAME_STMT);
2255 675 : expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2256 :
2257 450 : cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2258 : n1, n2);
2259 450 : e = split_block (gsi_bb (gsi2), cond_stmt);
2260 450 : e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2261 450 : e->probability = profile_probability::unlikely ().guessed ();
2262 675 : ne = make_edge (e->src, bb1,
2263 : j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2264 450 : ne->probability = e->probability.invert ();
2265 450 : gsi2 = gsi_after_labels (e->dest);
2266 :
2267 450 : t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2268 450 : expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2269 :
2270 450 : make_edge (e->dest, bb1, EDGE_FALLTHRU);
2271 : }
2272 :
2273 225 : set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2274 225 : set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2275 225 : set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2276 :
2277 225 : if (fd->first_nonrect + 1 == fd->last_nonrect)
2278 : {
2279 153 : fd->first_inner_iterations = first_inner_iterations;
2280 153 : fd->factor = factor;
2281 153 : fd->adjn1 = n1o;
2282 : }
2283 : }
2284 : else
2285 : {
2286 : /* Fallback implementation. Evaluate the loops with m1/m2
2287 : non-NULL as well as their outer loops at runtime using temporaries
2288 : instead of the original iteration variables, and in the
2289 : body just bump the counter. */
2290 59 : gimple_stmt_iterator gsi2 = *gsi;
2291 59 : gsi_prev (&gsi2);
2292 59 : e = split_block (entry_bb, gsi_stmt (gsi2));
2293 59 : e = split_block (e->dest, (gimple *) NULL);
2294 59 : basic_block cur_bb = e->src;
2295 59 : basic_block next_bb = e->dest;
2296 59 : entry_bb = e->dest;
2297 59 : *gsi = gsi_after_labels (entry_bb);
2298 :
2299 59 : tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2300 59 : memset (vs, 0, fd->last_nonrect * sizeof (tree));
2301 :
2302 189 : for (i = 0; i <= fd->last_nonrect; i++)
2303 : {
2304 189 : if (fd->loops[i].m1 == NULL_TREE
2305 108 : && fd->loops[i].m2 == NULL_TREE
2306 99 : && !fd->loops[i].non_rect_referenced)
2307 40 : continue;
2308 :
2309 149 : tree itype = TREE_TYPE (fd->loops[i].v);
2310 :
2311 149 : gsi2 = gsi_after_labels (cur_bb);
2312 149 : tree n1, n2;
2313 149 : t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2314 149 : if (fd->loops[i].m1 == NULL_TREE)
2315 : n1 = t;
2316 81 : else if (POINTER_TYPE_P (itype))
2317 : {
2318 30 : gcc_assert (integer_onep (fd->loops[i].m1));
2319 30 : t = unshare_expr (fd->loops[i].n1);
2320 30 : n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2321 : }
2322 : else
2323 : {
2324 51 : n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2325 51 : n1 = fold_build2 (MULT_EXPR, itype,
2326 : vs[i - fd->loops[i].outer], n1);
2327 51 : n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2328 : }
2329 149 : n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2330 : true, GSI_SAME_STMT);
2331 149 : if (i < fd->last_nonrect)
2332 : {
2333 90 : vs[i] = create_tmp_reg (itype, ".it");
2334 90 : expand_omp_build_assign (&gsi2, vs[i], n1);
2335 : }
2336 149 : t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2337 149 : if (fd->loops[i].m2 == NULL_TREE)
2338 : n2 = t;
2339 85 : else if (POINTER_TYPE_P (itype))
2340 : {
2341 34 : gcc_assert (integer_onep (fd->loops[i].m2));
2342 34 : t = unshare_expr (fd->loops[i].n2);
2343 34 : n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2344 : }
2345 : else
2346 : {
2347 51 : n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2348 51 : n2 = fold_build2 (MULT_EXPR, itype,
2349 : vs[i - fd->loops[i].outer], n2);
2350 51 : n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2351 : }
2352 149 : n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2353 : true, GSI_SAME_STMT);
2354 149 : if (POINTER_TYPE_P (itype))
2355 70 : itype = signed_type_for (itype);
2356 149 : if (i == fd->last_nonrect)
2357 : {
2358 59 : gcond *cond_stmt
2359 59 : = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2360 : n1, n2);
2361 59 : e = split_block (cur_bb, cond_stmt);
2362 59 : e->flags = EDGE_TRUE_VALUE;
2363 59 : ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2364 59 : e->probability = profile_probability::likely ().guessed ();
2365 59 : ne->probability = e->probability.invert ();
2366 59 : gsi2 = gsi_after_labels (e->dest);
2367 :
2368 59 : t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2369 79 : ? -1 : 1));
2370 59 : t = fold_build2 (PLUS_EXPR, itype,
2371 : fold_convert (itype, fd->loops[i].step), t);
2372 59 : t = fold_build2 (PLUS_EXPR, itype, t,
2373 : fold_convert (itype, n2));
2374 59 : t = fold_build2 (MINUS_EXPR, itype, t,
2375 : fold_convert (itype, n1));
2376 59 : tree step = fold_convert (itype, fd->loops[i].step);
2377 59 : if (TYPE_UNSIGNED (itype)
2378 59 : && fd->loops[i].cond_code == GT_EXPR)
2379 0 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
2380 : fold_build1 (NEGATE_EXPR, itype, t),
2381 : fold_build1 (NEGATE_EXPR, itype, step));
2382 : else
2383 59 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2384 59 : t = fold_convert (type, t);
2385 59 : t = fold_build2 (PLUS_EXPR, type,
2386 : counts[fd->last_nonrect], t);
2387 59 : t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2388 : true, GSI_SAME_STMT);
2389 59 : expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2390 59 : e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2391 59 : set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2392 59 : break;
2393 : }
2394 90 : e = split_block (cur_bb, last_nondebug_stmt (cur_bb));
2395 :
2396 90 : basic_block new_cur_bb = create_empty_bb (cur_bb);
2397 90 : add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2398 :
2399 90 : gsi2 = gsi_after_labels (e->dest);
2400 90 : tree step = fold_convert (itype,
2401 : unshare_expr (fd->loops[i].step));
2402 90 : if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2403 38 : t = fold_build_pointer_plus (vs[i], step);
2404 : else
2405 52 : t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2406 90 : t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2407 : true, GSI_SAME_STMT);
2408 90 : expand_omp_build_assign (&gsi2, vs[i], t);
2409 :
2410 90 : ne = split_block (e->dest, last_nondebug_stmt (e->dest));
2411 90 : gsi2 = gsi_after_labels (ne->dest);
2412 :
2413 90 : expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2414 90 : edge e3, e4;
2415 90 : if (next_bb == entry_bb)
2416 : {
2417 59 : e3 = find_edge (ne->dest, next_bb);
2418 59 : e3->flags = EDGE_FALSE_VALUE;
2419 : }
2420 : else
2421 31 : e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2422 90 : e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2423 90 : e4->probability = profile_probability::likely ().guessed ();
2424 90 : e3->probability = e4->probability.invert ();
2425 90 : basic_block esrc = e->src;
2426 90 : make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2427 90 : cur_bb = new_cur_bb;
2428 90 : basic_block latch_bb = next_bb;
2429 90 : next_bb = e->dest;
2430 90 : remove_edge (e);
2431 90 : set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2432 90 : set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2433 90 : set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2434 : }
2435 : }
2436 284 : t = NULL_TREE;
2437 691 : for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2438 407 : if (!fd->loops[i].non_rect_referenced
2439 92 : && fd->loops[i].m1 == NULL_TREE
2440 92 : && fd->loops[i].m2 == NULL_TREE)
2441 : {
2442 92 : if (t == NULL_TREE)
2443 82 : t = counts[i];
2444 : else
2445 10 : t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2446 : }
2447 284 : if (t)
2448 : {
2449 82 : t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2450 82 : expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2451 : }
2452 284 : if (!rect_count_seen)
2453 186 : t = counts[fd->last_nonrect];
2454 : else
2455 98 : t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2456 : counts[fd->last_nonrect]);
2457 284 : expand_omp_build_assign (gsi, fd->loop.n2, t);
2458 284 : }
2459 7485 : else if (fd->non_rect)
2460 : {
2461 117 : tree t = fd->loop.n2;
2462 117 : gcc_assert (TREE_CODE (t) == INTEGER_CST);
2463 : int non_rect_referenced = 0, non_rect = 0;
2464 358 : for (i = 0; i < fd->collapse; i++)
2465 : {
2466 240 : if ((i < fd->first_nonrect || i > fd->last_nonrect)
2467 246 : && !integer_zerop (counts[i]))
2468 6 : t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2469 241 : if (fd->loops[i].non_rect_referenced)
2470 117 : non_rect_referenced++;
2471 241 : if (fd->loops[i].m1 || fd->loops[i].m2)
2472 117 : non_rect++;
2473 : }
2474 117 : gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2475 117 : counts[fd->last_nonrect] = t;
2476 : }
2477 : }
2478 :
2479 : /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2480 : T = V;
2481 : V3 = N31 + (T % count3) * STEP3;
2482 : T = T / count3;
2483 : V2 = N21 + (T % count2) * STEP2;
2484 : T = T / count2;
2485 : V1 = N11 + T * STEP1;
2486 : if this loop doesn't have an inner loop construct combined with it.
2487 : If it does have an inner loop construct combined with it and the
2488 : iteration count isn't known constant, store values from counts array
2489 : into its _looptemp_ temporaries instead.
2490 : For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2491 : inclusive), use the count of all those loops together, and either
2492 : find quadratic etc. equation roots, or as a fallback, do:
2493 : COUNT = 0;
2494 : for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2495 : for (tmpj = M21 * tmpi + N21;
2496 : tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2497 : {
2498 : int tmpk1 = M31 * tmpj + N31;
2499 : int tmpk2 = M32 * tmpj + N32;
2500 : if (tmpk1 COND3 tmpk2)
2501 : {
2502 : if (COND3 is <)
2503 : adj = STEP3 - 1;
2504 : else
2505 : adj = STEP3 + 1;
2506 : int temp = (adj + tmpk2 - tmpk1) / STEP3;
2507 : if (COUNT + temp > T)
2508 : {
2509 : V1 = tmpi;
2510 : V2 = tmpj;
2511 : V3 = tmpk1 + (T - COUNT) * STEP3;
2512 : goto done;
2513 : }
2514 : else
2515 : COUNT += temp;
2516 : }
2517 : }
2518 : done:;
2519 : but for optional innermost or outermost rectangular loops that aren't
2520 : referenced by other loop expressions keep doing the division/modulo. */
2521 :
2522 : static void
2523 10273 : expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2524 : tree *counts, tree *nonrect_bounds,
2525 : gimple *inner_stmt, tree startvar)
2526 : {
2527 10273 : int i;
2528 10273 : if (gimple_omp_for_combined_p (fd->for_stmt))
2529 : {
2530 : /* If fd->loop.n2 is constant, then no propagation of the counts
2531 : is needed, they are constant. */
2532 4867 : if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2533 : return;
2534 :
2535 2692 : tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2536 4158 : ? gimple_omp_taskreg_clauses (inner_stmt)
2537 1226 : : gimple_omp_for_clauses (inner_stmt);
2538 : /* First two _looptemp_ clauses are for istart/iend, counts[0]
2539 : isn't supposed to be handled, as the inner loop doesn't
2540 : use it. */
2541 2692 : tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2542 2692 : gcc_assert (innerc);
2543 2692 : int count = 0;
2544 2692 : if (fd->non_rect
2545 112 : && fd->last_nonrect == fd->first_nonrect + 1
2546 2756 : && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2547 : count = 4;
2548 10793 : for (i = 0; i < fd->collapse + count; i++)
2549 : {
2550 8101 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2551 : OMP_CLAUSE__LOOPTEMP_);
2552 8101 : gcc_assert (innerc);
2553 8101 : if (i)
2554 : {
2555 5409 : tree tem = OMP_CLAUSE_DECL (innerc);
2556 5409 : tree t;
2557 5409 : if (i < fd->collapse)
2558 5169 : t = counts[i];
2559 : else
2560 240 : switch (i - fd->collapse)
2561 : {
2562 60 : case 0: t = counts[0]; break;
2563 60 : case 1: t = fd->first_inner_iterations; break;
2564 60 : case 2: t = fd->factor; break;
2565 60 : case 3: t = fd->adjn1; break;
2566 0 : default: gcc_unreachable ();
2567 : }
2568 5409 : t = fold_convert (TREE_TYPE (tem), t);
2569 5409 : t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2570 : false, GSI_CONTINUE_LINKING);
2571 5409 : gassign *stmt = gimple_build_assign (tem, t);
2572 5409 : gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2573 : }
2574 : }
2575 : return;
2576 : }
2577 :
2578 5406 : tree type = TREE_TYPE (fd->loop.v);
2579 5406 : tree tem = create_tmp_reg (type, ".tem");
2580 5406 : gassign *stmt = gimple_build_assign (tem, startvar);
2581 5406 : gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2582 :
2583 20106 : for (i = fd->collapse - 1; i >= 0; i--)
2584 : {
2585 14700 : tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2586 14700 : itype = vtype;
2587 14700 : if (POINTER_TYPE_P (vtype))
2588 1649 : itype = signed_type_for (vtype);
2589 14700 : if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2590 9294 : t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2591 : else
2592 : t = tem;
2593 14700 : if (i == fd->last_nonrect)
2594 : {
2595 376 : t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2596 : false, GSI_CONTINUE_LINKING);
2597 376 : tree stopval = t;
2598 376 : tree idx = create_tmp_reg (type, ".count");
2599 376 : expand_omp_build_assign (gsi, idx,
2600 : build_zero_cst (type), true);
2601 376 : basic_block bb_triang = NULL, bb_triang_dom = NULL;
2602 376 : if (fd->first_nonrect + 1 == fd->last_nonrect
2603 272 : && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2604 181 : || fd->first_inner_iterations)
2605 488 : && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2606 : != CODE_FOR_nothing)
2607 620 : && !integer_zerop (fd->loop.n2))
2608 : {
2609 236 : tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2610 236 : tree itype = TREE_TYPE (fd->loops[i].v);
2611 236 : tree first_inner_iterations = fd->first_inner_iterations;
2612 236 : tree factor = fd->factor;
2613 236 : gcond *cond_stmt
2614 236 : = expand_omp_build_cond (gsi, NE_EXPR, factor,
2615 236 : build_zero_cst (TREE_TYPE (factor)),
2616 : true);
2617 236 : edge e = split_block (gsi_bb (*gsi), cond_stmt);
2618 236 : basic_block bb0 = e->src;
2619 236 : e->flags = EDGE_TRUE_VALUE;
2620 236 : e->probability = profile_probability::likely ();
2621 236 : bb_triang_dom = bb0;
2622 236 : *gsi = gsi_after_labels (e->dest);
2623 236 : tree slltype = long_long_integer_type_node;
2624 236 : tree ulltype = long_long_unsigned_type_node;
2625 236 : tree stopvalull = fold_convert (ulltype, stopval);
2626 236 : stopvalull
2627 236 : = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2628 : false, GSI_CONTINUE_LINKING);
2629 236 : first_inner_iterations
2630 236 : = fold_convert (slltype, first_inner_iterations);
2631 236 : first_inner_iterations
2632 236 : = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2633 : NULL_TREE, false,
2634 : GSI_CONTINUE_LINKING);
2635 236 : factor = fold_convert (slltype, factor);
2636 236 : factor
2637 236 : = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2638 : false, GSI_CONTINUE_LINKING);
2639 236 : tree first_inner_iterationsd
2640 236 : = fold_build1 (FLOAT_EXPR, double_type_node,
2641 : first_inner_iterations);
2642 236 : first_inner_iterationsd
2643 236 : = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2644 : NULL_TREE, false,
2645 : GSI_CONTINUE_LINKING);
2646 236 : tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2647 : factor);
2648 236 : factord = force_gimple_operand_gsi (gsi, factord, true,
2649 : NULL_TREE, false,
2650 : GSI_CONTINUE_LINKING);
2651 236 : tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2652 : stopvalull);
2653 236 : stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2654 : NULL_TREE, false,
2655 : GSI_CONTINUE_LINKING);
2656 : /* Temporarily disable flag_rounding_math, values will be
2657 : decimal numbers divided by 2 and worst case imprecisions
2658 : due to too large values ought to be caught later by the
2659 : checks for fallback. */
2660 236 : int save_flag_rounding_math = flag_rounding_math;
2661 236 : flag_rounding_math = 0;
2662 236 : t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2663 : build_real (double_type_node, dconst2));
2664 236 : tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2665 : first_inner_iterationsd, t);
2666 236 : t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2667 : GSI_CONTINUE_LINKING);
2668 236 : t = fold_build2 (MULT_EXPR, double_type_node, factord,
2669 : build_real (double_type_node, dconst2));
2670 236 : t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2671 236 : t = fold_build2 (PLUS_EXPR, double_type_node, t,
2672 : fold_build2 (MULT_EXPR, double_type_node,
2673 : t3, t3));
2674 236 : flag_rounding_math = save_flag_rounding_math;
2675 236 : t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2676 : GSI_CONTINUE_LINKING);
2677 236 : if (flag_exceptions
2678 34 : && cfun->can_throw_non_call_exceptions
2679 244 : && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2680 : {
2681 8 : tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2682 : build_zero_cst (double_type_node));
2683 8 : tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2684 : false, GSI_CONTINUE_LINKING);
2685 8 : cond_stmt = gimple_build_cond (NE_EXPR, tem,
2686 : boolean_false_node,
2687 : NULL_TREE, NULL_TREE);
2688 : }
2689 : else
2690 228 : cond_stmt
2691 228 : = gimple_build_cond (LT_EXPR, t,
2692 : build_zero_cst (double_type_node),
2693 : NULL_TREE, NULL_TREE);
2694 236 : gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2695 236 : e = split_block (gsi_bb (*gsi), cond_stmt);
2696 236 : basic_block bb1 = e->src;
2697 236 : e->flags = EDGE_FALSE_VALUE;
2698 236 : e->probability = profile_probability::very_likely ();
2699 236 : *gsi = gsi_after_labels (e->dest);
2700 236 : gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2701 236 : tree sqrtr = create_tmp_var (double_type_node);
2702 236 : gimple_call_set_lhs (call, sqrtr);
2703 236 : gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2704 236 : t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2705 236 : t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2706 236 : t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2707 236 : tree c = create_tmp_var (ulltype);
2708 236 : tree d = create_tmp_var (ulltype);
2709 236 : expand_omp_build_assign (gsi, c, t, true);
2710 236 : t = fold_build2 (MINUS_EXPR, ulltype, c,
2711 : build_one_cst (ulltype));
2712 236 : t = fold_build2 (MULT_EXPR, ulltype, c, t);
2713 236 : t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2714 236 : t = fold_build2 (MULT_EXPR, ulltype,
2715 : fold_convert (ulltype, fd->factor), t);
2716 236 : tree t2
2717 236 : = fold_build2 (MULT_EXPR, ulltype, c,
2718 : fold_convert (ulltype,
2719 : fd->first_inner_iterations));
2720 236 : t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2721 236 : expand_omp_build_assign (gsi, d, t, true);
2722 236 : t = fold_build2 (MULT_EXPR, ulltype,
2723 : fold_convert (ulltype, fd->factor), c);
2724 236 : t = fold_build2 (PLUS_EXPR, ulltype,
2725 : t, fold_convert (ulltype,
2726 : fd->first_inner_iterations));
2727 236 : t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2728 : GSI_CONTINUE_LINKING);
2729 236 : cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2730 : NULL_TREE, NULL_TREE);
2731 236 : gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2732 236 : e = split_block (gsi_bb (*gsi), cond_stmt);
2733 236 : basic_block bb2 = e->src;
2734 236 : e->flags = EDGE_TRUE_VALUE;
2735 236 : e->probability = profile_probability::very_likely ();
2736 236 : *gsi = gsi_after_labels (e->dest);
2737 236 : t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2738 236 : t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2739 : GSI_CONTINUE_LINKING);
2740 236 : cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2741 : NULL_TREE, NULL_TREE);
2742 236 : gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2743 236 : e = split_block (gsi_bb (*gsi), cond_stmt);
2744 236 : basic_block bb3 = e->src;
2745 236 : e->flags = EDGE_FALSE_VALUE;
2746 236 : e->probability = profile_probability::very_likely ();
2747 236 : *gsi = gsi_after_labels (e->dest);
2748 236 : t = fold_convert (itype, c);
2749 236 : t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2750 236 : t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2751 236 : t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2752 : GSI_CONTINUE_LINKING);
2753 236 : expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2754 236 : t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2755 236 : t2 = fold_convert (itype, t2);
2756 236 : t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2757 236 : t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2758 236 : if (fd->loops[i].m1)
2759 : {
2760 195 : t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2761 195 : t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2762 : }
2763 236 : expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2764 236 : e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2765 236 : bb_triang = e->src;
2766 236 : *gsi = gsi_after_labels (e->dest);
2767 236 : remove_edge (e);
2768 236 : e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2769 236 : e->probability = profile_probability::very_unlikely ();
2770 236 : e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2771 236 : e->probability = profile_probability::very_unlikely ();
2772 236 : e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2773 236 : e->probability = profile_probability::very_unlikely ();
2774 :
2775 236 : basic_block bb4 = create_empty_bb (bb0);
2776 236 : add_bb_to_loop (bb4, bb0->loop_father);
2777 236 : e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2778 236 : e->probability = profile_probability::unlikely ();
2779 236 : make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2780 236 : set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2781 236 : set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2782 236 : gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2783 236 : t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2784 : counts[i], counts[i - 1]);
2785 236 : t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2786 : GSI_CONTINUE_LINKING);
2787 236 : t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2788 236 : t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2789 236 : t = fold_convert (itype, t);
2790 236 : t2 = fold_convert (itype, t2);
2791 236 : t = fold_build2 (MULT_EXPR, itype, t,
2792 : fold_convert (itype, fd->loops[i].step));
2793 236 : t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2794 236 : t2 = fold_build2 (MULT_EXPR, itype, t2,
2795 : fold_convert (itype, fd->loops[i - 1].step));
2796 236 : t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2797 236 : t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2798 : false, GSI_CONTINUE_LINKING);
2799 236 : stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2800 236 : gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2801 236 : if (fd->loops[i].m1)
2802 : {
2803 195 : t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2804 : fd->loops[i - 1].v);
2805 195 : t = fold_build2 (PLUS_EXPR, itype, t, t2);
2806 : }
2807 236 : t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2808 : false, GSI_CONTINUE_LINKING);
2809 236 : stmt = gimple_build_assign (fd->loops[i].v, t);
2810 236 : gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2811 : }
2812 : /* Fallback implementation. Evaluate the loops in between
2813 : (inclusive) fd->first_nonrect and fd->last_nonrect at
2814 : runtime unsing temporaries instead of the original iteration
2815 : variables, in the body just bump the counter and compare
2816 : with the desired value. */
2817 376 : gimple_stmt_iterator gsi2 = *gsi;
2818 376 : basic_block entry_bb = gsi_bb (gsi2);
2819 376 : edge e = split_block (entry_bb, gsi_stmt (gsi2));
2820 376 : e = split_block (e->dest, (gimple *) NULL);
2821 376 : basic_block dom_bb = NULL;
2822 376 : basic_block cur_bb = e->src;
2823 376 : basic_block next_bb = e->dest;
2824 376 : entry_bb = e->dest;
2825 376 : *gsi = gsi_after_labels (entry_bb);
2826 :
2827 376 : tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2828 376 : tree n1 = NULL_TREE, n2 = NULL_TREE;
2829 376 : memset (vs, 0, fd->last_nonrect * sizeof (tree));
2830 :
2831 876 : for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2832 : {
2833 876 : tree itype = TREE_TYPE (fd->loops[j].v);
2834 1752 : bool rect_p = (fd->loops[j].m1 == NULL_TREE
2835 527 : && fd->loops[j].m2 == NULL_TREE
2836 1345 : && !fd->loops[j].non_rect_referenced);
2837 876 : gsi2 = gsi_after_labels (cur_bb);
2838 876 : t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2839 876 : if (fd->loops[j].m1 == NULL_TREE)
2840 527 : n1 = rect_p ? build_zero_cst (type) : t;
2841 349 : else if (POINTER_TYPE_P (itype))
2842 : {
2843 30 : gcc_assert (integer_onep (fd->loops[j].m1));
2844 30 : t = unshare_expr (fd->loops[j].n1);
2845 30 : n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2846 : }
2847 : else
2848 : {
2849 319 : n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2850 319 : n1 = fold_build2 (MULT_EXPR, itype,
2851 : vs[j - fd->loops[j].outer], n1);
2852 319 : n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2853 : }
2854 876 : n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2855 : true, GSI_SAME_STMT);
2856 876 : if (j < fd->last_nonrect)
2857 : {
2858 907 : vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2859 500 : expand_omp_build_assign (&gsi2, vs[j], n1);
2860 : }
2861 876 : t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2862 876 : if (fd->loops[j].m2 == NULL_TREE)
2863 646 : n2 = rect_p ? counts[j] : t;
2864 230 : else if (POINTER_TYPE_P (itype))
2865 : {
2866 34 : gcc_assert (integer_onep (fd->loops[j].m2));
2867 34 : t = unshare_expr (fd->loops[j].n2);
2868 34 : n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2869 : }
2870 : else
2871 : {
2872 196 : n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2873 196 : n2 = fold_build2 (MULT_EXPR, itype,
2874 : vs[j - fd->loops[j].outer], n2);
2875 196 : n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2876 : }
2877 876 : n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2878 : true, GSI_SAME_STMT);
2879 876 : if (POINTER_TYPE_P (itype))
2880 74 : itype = signed_type_for (itype);
2881 876 : if (j == fd->last_nonrect)
2882 : {
2883 376 : gcond *cond_stmt
2884 376 : = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2885 : n1, n2);
2886 376 : e = split_block (cur_bb, cond_stmt);
2887 376 : e->flags = EDGE_TRUE_VALUE;
2888 376 : edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2889 376 : e->probability = profile_probability::likely ().guessed ();
2890 376 : ne->probability = e->probability.invert ();
2891 376 : gsi2 = gsi_after_labels (e->dest);
2892 :
2893 376 : t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2894 428 : ? -1 : 1));
2895 376 : t = fold_build2 (PLUS_EXPR, itype,
2896 : fold_convert (itype, fd->loops[j].step), t);
2897 376 : t = fold_build2 (PLUS_EXPR, itype, t,
2898 : fold_convert (itype, n2));
2899 376 : t = fold_build2 (MINUS_EXPR, itype, t,
2900 : fold_convert (itype, n1));
2901 376 : tree step = fold_convert (itype, fd->loops[j].step);
2902 376 : if (TYPE_UNSIGNED (itype)
2903 376 : && fd->loops[j].cond_code == GT_EXPR)
2904 0 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
2905 : fold_build1 (NEGATE_EXPR, itype, t),
2906 : fold_build1 (NEGATE_EXPR, itype, step));
2907 : else
2908 376 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2909 376 : t = fold_convert (type, t);
2910 376 : t = fold_build2 (PLUS_EXPR, type, idx, t);
2911 376 : t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2912 : true, GSI_SAME_STMT);
2913 376 : e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2914 376 : set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2915 376 : cond_stmt
2916 376 : = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2917 : NULL_TREE);
2918 376 : gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2919 376 : e = split_block (gsi_bb (gsi2), cond_stmt);
2920 376 : e->flags = EDGE_TRUE_VALUE;
2921 376 : e->probability = profile_probability::likely ().guessed ();
2922 376 : ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2923 376 : ne->probability = e->probability.invert ();
2924 376 : gsi2 = gsi_after_labels (e->dest);
2925 376 : expand_omp_build_assign (&gsi2, idx, t);
2926 376 : set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2927 376 : break;
2928 : }
2929 500 : e = split_block (cur_bb, last_nondebug_stmt (cur_bb));
2930 :
2931 500 : basic_block new_cur_bb = create_empty_bb (cur_bb);
2932 500 : add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2933 :
2934 500 : gsi2 = gsi_after_labels (e->dest);
2935 500 : if (rect_p)
2936 93 : t = fold_build2 (PLUS_EXPR, type, vs[j],
2937 : build_one_cst (type));
2938 : else
2939 : {
2940 407 : tree step
2941 407 : = fold_convert (itype, unshare_expr (fd->loops[j].step));
2942 407 : if (POINTER_TYPE_P (vtype))
2943 38 : t = fold_build_pointer_plus (vs[j], step);
2944 : else
2945 369 : t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2946 : }
2947 500 : t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2948 : true, GSI_SAME_STMT);
2949 500 : expand_omp_build_assign (&gsi2, vs[j], t);
2950 :
2951 500 : edge ne = split_block (e->dest, last_nondebug_stmt (e->dest));
2952 500 : gsi2 = gsi_after_labels (ne->dest);
2953 :
2954 500 : gcond *cond_stmt;
2955 500 : if (next_bb == entry_bb)
2956 : /* No need to actually check the outermost condition. */
2957 376 : cond_stmt
2958 376 : = gimple_build_cond (EQ_EXPR, boolean_true_node,
2959 : boolean_true_node,
2960 : NULL_TREE, NULL_TREE);
2961 : else
2962 124 : cond_stmt
2963 155 : = gimple_build_cond (rect_p ? LT_EXPR
2964 31 : : fd->loops[j].cond_code,
2965 : vs[j], n2, NULL_TREE, NULL_TREE);
2966 500 : gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2967 500 : edge e3, e4;
2968 500 : if (next_bb == entry_bb)
2969 : {
2970 376 : e3 = find_edge (ne->dest, next_bb);
2971 376 : e3->flags = EDGE_FALSE_VALUE;
2972 376 : dom_bb = ne->dest;
2973 : }
2974 : else
2975 124 : e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2976 500 : e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2977 500 : e4->probability = profile_probability::likely ().guessed ();
2978 500 : e3->probability = e4->probability.invert ();
2979 500 : basic_block esrc = e->src;
2980 500 : make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2981 500 : cur_bb = new_cur_bb;
2982 500 : basic_block latch_bb = next_bb;
2983 500 : next_bb = e->dest;
2984 500 : remove_edge (e);
2985 500 : set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2986 500 : set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2987 500 : set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2988 : }
2989 1252 : for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2990 : {
2991 876 : tree vtype = TREE_TYPE (fd->loops[j].v);
2992 876 : tree itype = vtype;
2993 876 : if (POINTER_TYPE_P (itype))
2994 74 : itype = signed_type_for (itype);
2995 1752 : bool rect_p = (fd->loops[j].m1 == NULL_TREE
2996 527 : && fd->loops[j].m2 == NULL_TREE
2997 1345 : && !fd->loops[j].non_rect_referenced);
2998 876 : if (j == fd->last_nonrect)
2999 : {
3000 376 : t = fold_build2 (MINUS_EXPR, type, stopval, idx);
3001 376 : t = fold_convert (itype, t);
3002 376 : tree t2
3003 376 : = fold_convert (itype, unshare_expr (fd->loops[j].step));
3004 376 : t = fold_build2 (MULT_EXPR, itype, t, t2);
3005 376 : if (POINTER_TYPE_P (vtype))
3006 32 : t = fold_build_pointer_plus (n1, t);
3007 : else
3008 344 : t = fold_build2 (PLUS_EXPR, itype, n1, t);
3009 : }
3010 500 : else if (rect_p)
3011 : {
3012 93 : t = fold_convert (itype, vs[j]);
3013 93 : t = fold_build2 (MULT_EXPR, itype, t,
3014 : fold_convert (itype, fd->loops[j].step));
3015 93 : if (POINTER_TYPE_P (vtype))
3016 4 : t = fold_build_pointer_plus (fd->loops[j].n1, t);
3017 : else
3018 89 : t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
3019 : }
3020 : else
3021 407 : t = vs[j];
3022 876 : t = force_gimple_operand_gsi (gsi, t, false,
3023 : NULL_TREE, true,
3024 : GSI_SAME_STMT);
3025 876 : stmt = gimple_build_assign (fd->loops[j].v, t);
3026 876 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
3027 : }
3028 376 : if (gsi_end_p (*gsi))
3029 752 : *gsi = gsi_last_bb (gsi_bb (*gsi));
3030 : else
3031 0 : gsi_prev (gsi);
3032 376 : if (bb_triang)
3033 : {
3034 236 : e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
3035 236 : make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
3036 236 : *gsi = gsi_after_labels (e->dest);
3037 236 : if (!gsi_end_p (*gsi))
3038 0 : gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
3039 236 : set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
3040 : }
3041 : }
3042 : else
3043 : {
3044 14324 : t = fold_convert (itype, t);
3045 14324 : t = fold_build2 (MULT_EXPR, itype, t,
3046 : fold_convert (itype, fd->loops[i].step));
3047 14324 : if (POINTER_TYPE_P (vtype))
3048 1617 : t = fold_build_pointer_plus (fd->loops[i].n1, t);
3049 : else
3050 12707 : t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3051 14324 : t = force_gimple_operand_gsi (gsi, t,
3052 14324 : DECL_P (fd->loops[i].v)
3053 14324 : && TREE_ADDRESSABLE (fd->loops[i].v),
3054 : NULL_TREE, false,
3055 : GSI_CONTINUE_LINKING);
3056 14324 : stmt = gimple_build_assign (fd->loops[i].v, t);
3057 14324 : gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3058 : }
3059 14700 : if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3060 : {
3061 9294 : t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3062 9294 : t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3063 : false, GSI_CONTINUE_LINKING);
3064 9294 : stmt = gimple_build_assign (tem, t);
3065 9294 : gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3066 : }
3067 14700 : if (i == fd->last_nonrect)
3068 376 : i = fd->first_nonrect;
3069 : }
3070 5406 : if (fd->non_rect)
3071 1351 : for (i = 0; i <= fd->last_nonrect; i++)
3072 975 : if (fd->loops[i].m2)
3073 : {
3074 230 : tree itype = TREE_TYPE (fd->loops[i].v);
3075 :
3076 230 : tree t;
3077 230 : if (POINTER_TYPE_P (itype))
3078 : {
3079 34 : gcc_assert (integer_onep (fd->loops[i].m2));
3080 34 : t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3081 : unshare_expr (fd->loops[i].n2));
3082 : }
3083 : else
3084 : {
3085 196 : t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3086 196 : t = fold_build2 (MULT_EXPR, itype,
3087 : fd->loops[i - fd->loops[i].outer].v, t);
3088 196 : t = fold_build2 (PLUS_EXPR, itype, t,
3089 : fold_convert (itype,
3090 : unshare_expr (fd->loops[i].n2)));
3091 : }
3092 230 : nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3093 230 : t = force_gimple_operand_gsi (gsi, t, false,
3094 : NULL_TREE, false,
3095 : GSI_CONTINUE_LINKING);
3096 230 : stmt = gimple_build_assign (nonrect_bounds[i], t);
3097 230 : gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3098 : }
3099 : }
3100 :
3101 : /* Helper function for expand_omp_for_*. Generate code like:
3102 : L10:
3103 : V3 += STEP3;
3104 : if (V3 cond3 N32) goto BODY_BB; else goto L11;
3105 : L11:
3106 : V3 = N31;
3107 : V2 += STEP2;
3108 : if (V2 cond2 N22) goto BODY_BB; else goto L12;
3109 : L12:
3110 : V2 = N21;
3111 : V1 += STEP1;
3112 : goto BODY_BB;
3113 : For non-rectangular loops, use temporaries stored in nonrect_bounds
3114 : for the upper bounds if M?2 multiplier is present. Given e.g.
3115 : for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3116 : for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3117 : for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3118 : for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3119 : do:
3120 : L10:
3121 : V4 += STEP4;
3122 : if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3123 : L11:
3124 : V4 = N41 + M41 * V2; // This can be left out if the loop
3125 : // refers to the immediate parent loop
3126 : V3 += STEP3;
3127 : if (V3 cond3 N32) goto BODY_BB; else goto L12;
3128 : L12:
3129 : V3 = N31;
3130 : V2 += STEP2;
3131 : if (V2 cond2 N22) goto L120; else goto L13;
3132 : L120:
3133 : V4 = N41 + M41 * V2;
3134 : NONRECT_BOUND4 = N42 + M42 * V2;
3135 : if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3136 : L13:
3137 : V2 = N21;
3138 : V1 += STEP1;
3139 : goto L120; */
3140 :
3141 : static basic_block
3142 2593 : extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3143 : basic_block cont_bb, basic_block body_bb)
3144 : {
3145 2593 : basic_block last_bb, bb, collapse_bb = NULL;
3146 2593 : int i;
3147 2593 : gimple_stmt_iterator gsi;
3148 2593 : edge e;
3149 2593 : tree t;
3150 2593 : gimple *stmt;
3151 :
3152 2593 : last_bb = cont_bb;
3153 9736 : for (i = fd->collapse - 1; i >= 0; i--)
3154 : {
3155 7143 : tree vtype = TREE_TYPE (fd->loops[i].v);
3156 :
3157 7143 : bb = create_empty_bb (last_bb);
3158 7143 : add_bb_to_loop (bb, last_bb->loop_father);
3159 7143 : gsi = gsi_start_bb (bb);
3160 :
3161 7143 : if (i < fd->collapse - 1)
3162 : {
3163 4550 : e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3164 4550 : e->probability = profile_probability::guessed_always () / 8;
3165 :
3166 4550 : struct omp_for_data_loop *l = &fd->loops[i + 1];
3167 4550 : if (l->m1 == NULL_TREE || l->outer != 1)
3168 : {
3169 4342 : t = l->n1;
3170 4342 : if (l->m1)
3171 : {
3172 56 : if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3173 3 : t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3174 : t);
3175 : else
3176 : {
3177 53 : tree t2
3178 53 : = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3179 : fd->loops[i + 1 - l->outer].v, l->m1);
3180 53 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3181 : }
3182 : }
3183 4342 : t = force_gimple_operand_gsi (&gsi, t,
3184 4342 : DECL_P (l->v)
3185 4342 : && TREE_ADDRESSABLE (l->v),
3186 : NULL_TREE, false,
3187 : GSI_CONTINUE_LINKING);
3188 4342 : stmt = gimple_build_assign (l->v, t);
3189 4342 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3190 : }
3191 : }
3192 : else
3193 : collapse_bb = bb;
3194 :
3195 7143 : set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3196 :
3197 7143 : if (POINTER_TYPE_P (vtype))
3198 902 : t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3199 : else
3200 6241 : t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3201 7143 : t = force_gimple_operand_gsi (&gsi, t,
3202 7143 : DECL_P (fd->loops[i].v)
3203 7143 : && TREE_ADDRESSABLE (fd->loops[i].v),
3204 : NULL_TREE, false, GSI_CONTINUE_LINKING);
3205 7143 : stmt = gimple_build_assign (fd->loops[i].v, t);
3206 7143 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3207 :
3208 7143 : if (fd->loops[i].non_rect_referenced)
3209 : {
3210 308 : basic_block update_bb = NULL, prev_bb = NULL;
3211 714 : for (int j = i + 1; j <= fd->last_nonrect; j++)
3212 406 : if (j - fd->loops[j].outer == i)
3213 : {
3214 308 : tree n1, n2;
3215 308 : struct omp_for_data_loop *l = &fd->loops[j];
3216 308 : basic_block this_bb = create_empty_bb (last_bb);
3217 308 : add_bb_to_loop (this_bb, last_bb->loop_father);
3218 308 : gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3219 308 : if (prev_bb)
3220 : {
3221 0 : e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3222 0 : e->probability
3223 0 : = profile_probability::guessed_always ().apply_scale (7,
3224 : 8);
3225 0 : set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3226 : }
3227 308 : if (l->m1)
3228 : {
3229 264 : if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3230 26 : t = fold_build_pointer_plus (fd->loops[i].v, l->n1);
3231 : else
3232 : {
3233 238 : t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3234 : fd->loops[i].v);
3235 238 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3236 : t, l->n1);
3237 : }
3238 264 : n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3239 : false,
3240 : GSI_CONTINUE_LINKING);
3241 264 : stmt = gimple_build_assign (l->v, n1);
3242 264 : gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3243 264 : n1 = l->v;
3244 : }
3245 : else
3246 44 : n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3247 : NULL_TREE, false,
3248 : GSI_CONTINUE_LINKING);
3249 308 : if (l->m2)
3250 : {
3251 204 : if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3252 30 : t = fold_build_pointer_plus (fd->loops[i].v, l->n2);
3253 : else
3254 : {
3255 174 : t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3256 : fd->loops[i].v);
3257 174 : t = fold_build2 (PLUS_EXPR,
3258 : TREE_TYPE (nonrect_bounds[j]),
3259 : t, unshare_expr (l->n2));
3260 : }
3261 204 : n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3262 : false,
3263 : GSI_CONTINUE_LINKING);
3264 204 : stmt = gimple_build_assign (nonrect_bounds[j], n2);
3265 204 : gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3266 204 : n2 = nonrect_bounds[j];
3267 : }
3268 : else
3269 104 : n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3270 : true, NULL_TREE, false,
3271 : GSI_CONTINUE_LINKING);
3272 308 : gcond *cond_stmt
3273 308 : = gimple_build_cond (l->cond_code, n1, n2,
3274 : NULL_TREE, NULL_TREE);
3275 308 : gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3276 308 : if (update_bb == NULL)
3277 308 : update_bb = this_bb;
3278 308 : e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3279 308 : e->probability = profile_probability::guessed_always () / 8;
3280 308 : if (prev_bb == NULL)
3281 308 : set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3282 308 : prev_bb = this_bb;
3283 : }
3284 308 : e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3285 308 : e->probability
3286 308 : = profile_probability::guessed_always ().apply_scale (7, 8);
3287 308 : body_bb = update_bb;
3288 : }
3289 :
3290 7143 : if (i > 0)
3291 : {
3292 4550 : if (fd->loops[i].m2)
3293 204 : t = nonrect_bounds[i];
3294 : else
3295 4346 : t = unshare_expr (fd->loops[i].n2);
3296 4550 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3297 : false, GSI_CONTINUE_LINKING);
3298 4550 : tree v = fd->loops[i].v;
3299 4550 : if (DECL_P (v) && TREE_ADDRESSABLE (v))
3300 0 : v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3301 : false, GSI_CONTINUE_LINKING);
3302 4550 : t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3303 4550 : stmt = gimple_build_cond_empty (t);
3304 4550 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3305 4550 : if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3306 : expand_omp_regimplify_p, NULL, NULL)
3307 4550 : || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3308 : expand_omp_regimplify_p, NULL, NULL))
3309 4 : gimple_regimplify_operands (stmt, &gsi);
3310 4550 : e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3311 4550 : e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3312 : }
3313 : else
3314 2593 : make_edge (bb, body_bb, EDGE_FALLTHRU);
3315 7143 : set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3316 7143 : last_bb = bb;
3317 : }
3318 :
3319 2593 : return collapse_bb;
3320 : }
3321 :
3322 : /* Expand #pragma omp ordered depend(source). */
3323 :
3324 : static void
3325 335 : expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3326 : tree *counts, location_t loc)
3327 : {
3328 56 : enum built_in_function source_ix
3329 335 : = fd->iter_type == long_integer_type_node
3330 335 : ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3331 335 : gimple *g
3332 335 : = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3333 335 : build_fold_addr_expr (counts[fd->ordered]));
3334 335 : gimple_set_location (g, loc);
3335 335 : gsi_insert_before (gsi, g, GSI_SAME_STMT);
3336 335 : }
3337 :
3338 : /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3339 :
3340 : static void
3341 466 : expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3342 : tree *counts, tree c, location_t loc,
3343 : basic_block cont_bb)
3344 : {
3345 466 : auto_vec<tree, 10> args;
3346 73 : enum built_in_function sink_ix
3347 466 : = fd->iter_type == long_integer_type_node
3348 466 : ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3349 466 : tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3350 466 : int i;
3351 466 : gimple_stmt_iterator gsi2 = *gsi;
3352 466 : bool warned_step = false;
3353 :
3354 466 : if (deps == NULL)
3355 : {
3356 : /* Handle doacross(sink: omp_cur_iteration - 1). */
3357 52 : gsi_prev (&gsi2);
3358 52 : edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3359 52 : edge e2 = split_block_after_labels (e1->dest);
3360 52 : gsi2 = gsi_after_labels (e1->dest);
3361 52 : *gsi = gsi_last_bb (e1->src);
3362 52 : gimple_stmt_iterator gsi3 = *gsi;
3363 :
3364 52 : if (counts[fd->collapse - 1])
3365 : {
3366 8 : gcc_assert (fd->collapse == 1);
3367 8 : t = counts[fd->collapse - 1];
3368 : }
3369 44 : else if (fd->collapse > 1)
3370 28 : t = fd->loop.v;
3371 : else
3372 : {
3373 16 : t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3374 : fd->loops[0].v, fd->loops[0].n1);
3375 16 : t = fold_convert (fd->iter_type, t);
3376 : }
3377 :
3378 52 : t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
3379 : false, GSI_CONTINUE_LINKING);
3380 52 : gsi_insert_after (gsi, gimple_build_cond (NE_EXPR, t,
3381 52 : build_zero_cst (TREE_TYPE (t)),
3382 : NULL_TREE, NULL_TREE),
3383 : GSI_NEW_STMT);
3384 :
3385 52 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3386 : build_minus_one_cst (TREE_TYPE (t)));
3387 52 : t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3388 : true, GSI_SAME_STMT);
3389 52 : args.safe_push (t);
3390 281 : for (i = fd->collapse; i < fd->ordered; i++)
3391 : {
3392 229 : t = counts[fd->ordered + 2 + (i - fd->collapse)];
3393 229 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t,
3394 : build_minus_one_cst (TREE_TYPE (t)));
3395 229 : t = fold_convert (fd->iter_type, t);
3396 229 : t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3397 : true, GSI_SAME_STMT);
3398 229 : args.safe_push (t);
3399 : }
3400 :
3401 52 : gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix),
3402 : args);
3403 52 : gimple_set_location (g, loc);
3404 52 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3405 :
3406 52 : edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3407 52 : e3->probability = profile_probability::guessed_always () / 8;
3408 52 : e1->probability = e3->probability.invert ();
3409 52 : e1->flags = EDGE_TRUE_VALUE;
3410 52 : set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3411 :
3412 52 : if (fd->ordered > fd->collapse && cont_bb)
3413 : {
3414 33 : if (counts[fd->ordered + 1] == NULL_TREE)
3415 33 : counts[fd->ordered + 1]
3416 33 : = create_tmp_var (boolean_type_node, ".first");
3417 :
3418 33 : edge e4;
3419 33 : if (gsi_end_p (gsi3))
3420 25 : e4 = split_block_after_labels (e1->src);
3421 : else
3422 : {
3423 8 : gsi_prev (&gsi3);
3424 8 : e4 = split_block (gsi_bb (gsi3), gsi_stmt (gsi3));
3425 : }
3426 33 : gsi3 = gsi_last_bb (e4->src);
3427 :
3428 33 : gsi_insert_after (&gsi3,
3429 33 : gimple_build_cond (NE_EXPR,
3430 33 : counts[fd->ordered + 1],
3431 : boolean_false_node,
3432 : NULL_TREE, NULL_TREE),
3433 : GSI_NEW_STMT);
3434 :
3435 33 : edge e5 = make_edge (e4->src, e2->dest, EDGE_FALSE_VALUE);
3436 33 : e4->probability = profile_probability::guessed_always () / 8;
3437 33 : e5->probability = e4->probability.invert ();
3438 33 : e4->flags = EDGE_TRUE_VALUE;
3439 33 : set_immediate_dominator (CDI_DOMINATORS, e2->dest, e4->src);
3440 : }
3441 :
3442 52 : *gsi = gsi_after_labels (e2->dest);
3443 52 : return;
3444 : }
3445 504 : for (i = 0; i < fd->ordered; i++)
3446 : {
3447 502 : tree step = NULL_TREE;
3448 502 : off = TREE_PURPOSE (deps);
3449 502 : if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3450 : {
3451 83 : step = TREE_OPERAND (off, 1);
3452 83 : off = TREE_OPERAND (off, 0);
3453 : }
3454 502 : if (!integer_zerop (off))
3455 : {
3456 412 : gcc_assert (fd->loops[i].cond_code == LT_EXPR
3457 : || fd->loops[i].cond_code == GT_EXPR);
3458 412 : bool forward = fd->loops[i].cond_code == LT_EXPR;
3459 412 : if (step)
3460 : {
3461 : /* Non-simple Fortran DO loops. If step is variable,
3462 : we don't know at compile even the direction, so can't
3463 : warn. */
3464 83 : if (TREE_CODE (step) != INTEGER_CST)
3465 : break;
3466 0 : forward = tree_int_cst_sgn (step) != -1;
3467 : }
3468 329 : if (forward ^ OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3469 17 : warning_at (loc, OPT_Wopenmp,
3470 : "%qs clause with %<sink%> modifier "
3471 : "waiting for lexically later iteration",
3472 17 : OMP_CLAUSE_DOACROSS_DEPEND (c)
3473 : ? "depend" : "doacross");
3474 : break;
3475 : }
3476 90 : deps = TREE_CHAIN (deps);
3477 : }
3478 : /* If all offsets corresponding to the collapsed loops are zero,
3479 : this depend clause can be ignored. FIXME: but there is still a
3480 : flush needed. We need to emit one __sync_synchronize () for it
3481 : though (perhaps conditionally)? Solve this together with the
3482 : conservative dependence folding optimization.
3483 : if (i >= fd->collapse)
3484 : return; */
3485 :
3486 414 : deps = OMP_CLAUSE_DECL (c);
3487 414 : gsi_prev (&gsi2);
3488 414 : edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3489 414 : edge e2 = split_block_after_labels (e1->dest);
3490 :
3491 414 : gsi2 = gsi_after_labels (e1->dest);
3492 414 : *gsi = gsi_last_bb (e1->src);
3493 2806 : for (i = 0; i < fd->ordered; i++)
3494 : {
3495 2392 : tree itype = TREE_TYPE (fd->loops[i].v);
3496 2392 : tree step = NULL_TREE;
3497 2392 : tree orig_off = NULL_TREE;
3498 2392 : if (POINTER_TYPE_P (itype))
3499 9 : itype = sizetype;
3500 2392 : if (i)
3501 1978 : deps = TREE_CHAIN (deps);
3502 2392 : off = TREE_PURPOSE (deps);
3503 2392 : if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3504 : {
3505 204 : step = TREE_OPERAND (off, 1);
3506 204 : off = TREE_OPERAND (off, 0);
3507 204 : gcc_assert (fd->loops[i].cond_code == LT_EXPR
3508 : && integer_onep (fd->loops[i].step)
3509 : && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3510 : }
3511 2392 : tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3512 2392 : if (step)
3513 : {
3514 204 : off = fold_convert_loc (loc, itype, off);
3515 204 : orig_off = off;
3516 204 : off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3517 : }
3518 :
3519 2392 : if (integer_zerop (off))
3520 1516 : t = boolean_true_node;
3521 : else
3522 : {
3523 876 : tree a;
3524 876 : tree co = fold_convert_loc (loc, itype, off);
3525 876 : if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3526 : {
3527 9 : if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3528 0 : co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3529 9 : a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3530 9 : TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3531 : co);
3532 : }
3533 867 : else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3534 640 : a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3535 : fd->loops[i].v, co);
3536 : else
3537 227 : a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3538 : fd->loops[i].v, co);
3539 876 : if (step)
3540 : {
3541 204 : tree t1, t2;
3542 204 : if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3543 149 : t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3544 149 : fd->loops[i].n1);
3545 : else
3546 55 : t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3547 55 : fd->loops[i].n2);
3548 204 : if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3549 149 : t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3550 149 : fd->loops[i].n2);
3551 : else
3552 55 : t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3553 55 : fd->loops[i].n1);
3554 204 : t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3555 204 : step, build_int_cst (TREE_TYPE (step), 0));
3556 204 : if (TREE_CODE (step) != INTEGER_CST)
3557 : {
3558 204 : t1 = unshare_expr (t1);
3559 204 : t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3560 : false, GSI_CONTINUE_LINKING);
3561 204 : t2 = unshare_expr (t2);
3562 204 : t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3563 : false, GSI_CONTINUE_LINKING);
3564 : }
3565 204 : t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3566 : t, t2, t1);
3567 : }
3568 672 : else if (fd->loops[i].cond_code == LT_EXPR)
3569 : {
3570 520 : if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3571 433 : t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3572 : fd->loops[i].n1);
3573 : else
3574 87 : t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3575 : fd->loops[i].n2);
3576 : }
3577 152 : else if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3578 58 : t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3579 : fd->loops[i].n2);
3580 : else
3581 94 : t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3582 : fd->loops[i].n1);
3583 : }
3584 2392 : if (cond)
3585 1978 : cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3586 : else
3587 414 : cond = t;
3588 :
3589 2392 : off = fold_convert_loc (loc, itype, off);
3590 :
3591 2392 : if (step
3592 4411 : || (fd->loops[i].cond_code == LT_EXPR
3593 2188 : ? !integer_onep (fd->loops[i].step)
3594 169 : : !integer_minus_onep (fd->loops[i].step)))
3595 : {
3596 382 : if (step == NULL_TREE
3597 178 : && TYPE_UNSIGNED (itype)
3598 393 : && fd->loops[i].cond_code == GT_EXPR)
3599 9 : t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3600 : fold_build1_loc (loc, NEGATE_EXPR, itype,
3601 : s));
3602 : else
3603 542 : t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3604 : orig_off ? orig_off : off, s);
3605 382 : t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3606 : build_int_cst (itype, 0));
3607 382 : if (integer_zerop (t) && !warned_step)
3608 : {
3609 5 : warning_at (loc, OPT_Wopenmp,
3610 : "%qs clause with %<sink%> modifier refers to "
3611 : "iteration never in the iteration space",
3612 5 : OMP_CLAUSE_DOACROSS_DEPEND (c)
3613 : ? "depend" : "doacross");
3614 5 : warned_step = true;
3615 : }
3616 382 : cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3617 : cond, t);
3618 : }
3619 :
3620 2392 : if (i <= fd->collapse - 1 && fd->collapse > 1)
3621 322 : t = fd->loop.v;
3622 2070 : else if (counts[i])
3623 166 : t = counts[i];
3624 : else
3625 : {
3626 1904 : t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3627 1904 : fd->loops[i].v, fd->loops[i].n1);
3628 1904 : t = fold_convert_loc (loc, fd->iter_type, t);
3629 : }
3630 2392 : if (step)
3631 : /* We have divided off by step already earlier. */;
3632 2188 : else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3633 9 : off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3634 : fold_build1_loc (loc, NEGATE_EXPR, itype,
3635 : s));
3636 : else
3637 2179 : off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3638 2392 : if (OMP_CLAUSE_DOACROSS_SINK_NEGATIVE (deps))
3639 640 : off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3640 2392 : off = fold_convert_loc (loc, fd->iter_type, off);
3641 2392 : if (i <= fd->collapse - 1 && fd->collapse > 1)
3642 : {
3643 322 : if (i)
3644 161 : off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3645 : off);
3646 322 : if (i < fd->collapse - 1)
3647 : {
3648 322 : coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3649 161 : counts[i]);
3650 161 : continue;
3651 : }
3652 : }
3653 2231 : off = unshare_expr (off);
3654 2231 : t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3655 2231 : t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3656 : true, GSI_SAME_STMT);
3657 2231 : args.safe_push (t);
3658 : }
3659 414 : gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3660 414 : gimple_set_location (g, loc);
3661 414 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3662 :
3663 414 : cond = unshare_expr (cond);
3664 414 : cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3665 : GSI_CONTINUE_LINKING);
3666 414 : gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3667 414 : edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3668 414 : e3->probability = profile_probability::guessed_always () / 8;
3669 414 : e1->probability = e3->probability.invert ();
3670 414 : e1->flags = EDGE_TRUE_VALUE;
3671 414 : set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3672 :
3673 414 : *gsi = gsi_after_labels (e2->dest);
3674 466 : }
3675 :
3676 : /* Expand all #pragma omp ordered depend(source) and
3677 : #pragma omp ordered depend(sink:...) constructs in the current
3678 : #pragma omp for ordered(n) region. */
3679 :
3680 : static void
3681 335 : expand_omp_ordered_source_sink (struct omp_region *region,
3682 : struct omp_for_data *fd, tree *counts,
3683 : basic_block cont_bb)
3684 : {
3685 335 : struct omp_region *inner;
3686 335 : int i;
3687 1563 : for (i = fd->collapse - 1; i < fd->ordered; i++)
3688 1228 : if (i == fd->collapse - 1 && fd->collapse > 1)
3689 147 : counts[i] = NULL_TREE;
3690 1081 : else if (i >= fd->collapse && !cont_bb)
3691 0 : counts[i] = build_zero_cst (fd->iter_type);
3692 2154 : else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3693 2154 : && integer_onep (fd->loops[i].step))
3694 987 : counts[i] = NULL_TREE;
3695 : else
3696 94 : counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3697 335 : tree atype
3698 335 : = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3699 335 : counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3700 335 : TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3701 335 : counts[fd->ordered + 1] = NULL_TREE;
3702 :
3703 1615 : for (inner = region->inner; inner; inner = inner->next)
3704 1280 : if (inner->type == GIMPLE_OMP_ORDERED)
3705 : {
3706 709 : gomp_ordered *ord_stmt = inner->ord_stmt;
3707 709 : gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3708 709 : location_t loc = gimple_location (ord_stmt);
3709 709 : tree c;
3710 1175 : for (c = gimple_omp_ordered_clauses (ord_stmt);
3711 1175 : c; c = OMP_CLAUSE_CHAIN (c))
3712 801 : if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SOURCE)
3713 : break;
3714 709 : if (c)
3715 335 : expand_omp_ordered_source (&gsi, fd, counts, loc);
3716 1510 : for (c = gimple_omp_ordered_clauses (ord_stmt);
3717 1510 : c; c = OMP_CLAUSE_CHAIN (c))
3718 801 : if (OMP_CLAUSE_DOACROSS_KIND (c) == OMP_CLAUSE_DOACROSS_SINK)
3719 466 : expand_omp_ordered_sink (&gsi, fd, counts, c, loc, cont_bb);
3720 709 : gsi_remove (&gsi, true);
3721 : }
3722 335 : }
3723 :
3724 : /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3725 : collapsed. */
3726 :
3727 : static basic_block
3728 335 : expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3729 : basic_block cont_bb, basic_block body_bb,
3730 : basic_block l0_bb, bool ordered_lastprivate)
3731 : {
3732 335 : if (fd->ordered == fd->collapse)
3733 : return cont_bb;
3734 :
3735 171 : if (!cont_bb)
3736 : {
3737 0 : gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3738 0 : for (int i = fd->collapse; i < fd->ordered; i++)
3739 : {
3740 0 : tree type = TREE_TYPE (fd->loops[i].v);
3741 0 : tree n1 = fold_convert (type, fd->loops[i].n1);
3742 0 : expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3743 0 : tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3744 0 : size_int (i - fd->collapse + 1),
3745 : NULL_TREE, NULL_TREE);
3746 0 : expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3747 : }
3748 0 : return NULL;
3749 : }
3750 :
3751 1064 : for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3752 : {
3753 893 : tree t, type = TREE_TYPE (fd->loops[i].v);
3754 893 : gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3755 893 : if (counts[fd->ordered + 1] && i == fd->collapse)
3756 33 : expand_omp_build_assign (&gsi, counts[fd->ordered + 1],
3757 : boolean_true_node);
3758 893 : expand_omp_build_assign (&gsi, fd->loops[i].v,
3759 893 : fold_convert (type, fd->loops[i].n1));
3760 893 : if (counts[i])
3761 57 : expand_omp_build_assign (&gsi, counts[i],
3762 : build_zero_cst (fd->iter_type));
3763 893 : tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3764 893 : size_int (i - fd->collapse + 1),
3765 : NULL_TREE, NULL_TREE);
3766 893 : expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3767 893 : if (!gsi_end_p (gsi))
3768 893 : gsi_prev (&gsi);
3769 : else
3770 0 : gsi = gsi_last_bb (body_bb);
3771 893 : edge e1 = split_block (body_bb, gsi_stmt (gsi));
3772 893 : basic_block new_body = e1->dest;
3773 893 : if (body_bb == cont_bb)
3774 0 : cont_bb = new_body;
3775 893 : edge e2 = NULL;
3776 893 : basic_block new_header;
3777 893 : if (EDGE_COUNT (cont_bb->preds) > 0)
3778 : {
3779 864 : gsi = gsi_last_bb (cont_bb);
3780 864 : if (POINTER_TYPE_P (type))
3781 0 : t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3782 : else
3783 864 : t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3784 : fold_convert (type, fd->loops[i].step));
3785 864 : expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3786 864 : if (counts[i])
3787 : {
3788 57 : t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3789 : build_int_cst (fd->iter_type, 1));
3790 57 : expand_omp_build_assign (&gsi, counts[i], t);
3791 57 : t = counts[i];
3792 : }
3793 : else
3794 : {
3795 807 : t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3796 : fd->loops[i].v, fd->loops[i].n1);
3797 807 : t = fold_convert (fd->iter_type, t);
3798 807 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3799 : true, GSI_SAME_STMT);
3800 : }
3801 864 : aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3802 864 : size_int (i - fd->collapse + 1),
3803 : NULL_TREE, NULL_TREE);
3804 864 : expand_omp_build_assign (&gsi, aref, t);
3805 864 : if (counts[fd->ordered + 1] && i == fd->ordered - 1)
3806 30 : expand_omp_build_assign (&gsi, counts[fd->ordered + 1],
3807 : boolean_false_node);
3808 864 : gsi_prev (&gsi);
3809 864 : e2 = split_block (cont_bb, gsi_stmt (gsi));
3810 864 : new_header = e2->dest;
3811 : }
3812 : else
3813 : new_header = cont_bb;
3814 893 : gsi = gsi_after_labels (new_header);
3815 893 : tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3816 : true, GSI_SAME_STMT);
3817 893 : tree n2
3818 893 : = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3819 : true, NULL_TREE, true, GSI_SAME_STMT);
3820 893 : t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3821 893 : gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3822 893 : edge e3 = split_block (new_header, gsi_stmt (gsi));
3823 893 : cont_bb = e3->dest;
3824 893 : remove_edge (e1);
3825 893 : make_edge (body_bb, new_header, EDGE_FALLTHRU);
3826 893 : e3->flags = EDGE_FALSE_VALUE;
3827 893 : e3->probability = profile_probability::guessed_always () / 8;
3828 893 : e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3829 893 : e1->probability = e3->probability.invert ();
3830 :
3831 893 : set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3832 893 : set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3833 :
3834 893 : if (e2)
3835 : {
3836 864 : class loop *loop = alloc_loop ();
3837 864 : loop->header = new_header;
3838 864 : loop->latch = e2->src;
3839 864 : add_loop (loop, l0_bb->loop_father);
3840 : }
3841 : }
3842 :
3843 : /* If there are any lastprivate clauses and it is possible some loops
3844 : might have zero iterations, ensure all the decls are initialized,
3845 : otherwise we could crash evaluating C++ class iterators with lastprivate
3846 : clauses. */
3847 : bool need_inits = false;
3848 171 : for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3849 0 : if (need_inits)
3850 : {
3851 0 : tree type = TREE_TYPE (fd->loops[i].v);
3852 0 : gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3853 0 : expand_omp_build_assign (&gsi, fd->loops[i].v,
3854 : fold_convert (type, fd->loops[i].n1));
3855 : }
3856 : else
3857 : {
3858 0 : tree type = TREE_TYPE (fd->loops[i].v);
3859 0 : tree this_cond = fold_build2 (fd->loops[i].cond_code,
3860 : boolean_type_node,
3861 : fold_convert (type, fd->loops[i].n1),
3862 : fold_convert (type, fd->loops[i].n2));
3863 0 : if (!integer_onep (this_cond))
3864 0 : need_inits = true;
3865 : }
3866 :
3867 : return cont_bb;
3868 : }
3869 :
3870 : /* A subroutine of expand_omp_for. Generate code for a parallel
3871 : loop with any schedule. Given parameters:
3872 :
3873 : for (V = N1; V cond N2; V += STEP) BODY;
3874 :
3875 : where COND is "<" or ">", we generate pseudocode
3876 :
3877 : more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3878 : if (more) goto L0; else goto L3;
3879 : L0:
3880 : V = istart0;
3881 : iend = iend0;
3882 : L1:
3883 : BODY;
3884 : V += STEP;
3885 : if (V cond iend) goto L1; else goto L2;
3886 : L2:
3887 : if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3888 : L3:
3889 :
3890 : If this is a combined omp parallel loop, instead of the call to
3891 : GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3892 : If this is gimple_omp_for_combined_p loop, then instead of assigning
3893 : V and iend in L0 we assign the first two _looptemp_ clause decls of the
3894 : inner GIMPLE_OMP_FOR and V += STEP; and
3895 : if (V cond iend) goto L1; else goto L2; are removed.
3896 :
3897 : For collapsed loops, given parameters:
3898 : collapse(3)
3899 : for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3900 : for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3901 : for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3902 : BODY;
3903 :
3904 : we generate pseudocode
3905 :
3906 : if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3907 : if (cond3 is <)
3908 : adj = STEP3 - 1;
3909 : else
3910 : adj = STEP3 + 1;
3911 : count3 = (adj + N32 - N31) / STEP3;
3912 : if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3913 : if (cond2 is <)
3914 : adj = STEP2 - 1;
3915 : else
3916 : adj = STEP2 + 1;
3917 : count2 = (adj + N22 - N21) / STEP2;
3918 : if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3919 : if (cond1 is <)
3920 : adj = STEP1 - 1;
3921 : else
3922 : adj = STEP1 + 1;
3923 : count1 = (adj + N12 - N11) / STEP1;
3924 : count = count1 * count2 * count3;
3925 : goto Z1;
3926 : Z0:
3927 : count = 0;
3928 : Z1:
3929 : more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3930 : if (more) goto L0; else goto L3;
3931 : L0:
3932 : V = istart0;
3933 : T = V;
3934 : V3 = N31 + (T % count3) * STEP3;
3935 : T = T / count3;
3936 : V2 = N21 + (T % count2) * STEP2;
3937 : T = T / count2;
3938 : V1 = N11 + T * STEP1;
3939 : iend = iend0;
3940 : L1:
3941 : BODY;
3942 : V += 1;
3943 : if (V < iend) goto L10; else goto L2;
3944 : L10:
3945 : V3 += STEP3;
3946 : if (V3 cond3 N32) goto L1; else goto L11;
3947 : L11:
3948 : V3 = N31;
3949 : V2 += STEP2;
3950 : if (V2 cond2 N22) goto L1; else goto L12;
3951 : L12:
3952 : V2 = N21;
3953 : V1 += STEP1;
3954 : goto L1;
3955 : L2:
3956 : if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3957 : L3:
3958 :
3959 : */
3960 :
3961 : static void
3962 4134 : expand_omp_for_generic (struct omp_region *region,
3963 : struct omp_for_data *fd,
3964 : enum built_in_function start_fn,
3965 : enum built_in_function next_fn,
3966 : tree sched_arg,
3967 : gimple *inner_stmt)
3968 : {
3969 4134 : tree type, istart0, iend0, iend;
3970 4134 : tree t, vmain, vback, bias = NULL_TREE;
3971 4134 : basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3972 4134 : basic_block l2_bb = NULL, l3_bb = NULL;
3973 4134 : gimple_stmt_iterator gsi;
3974 4134 : gassign *assign_stmt;
3975 4134 : bool in_combined_parallel = is_combined_parallel (region);
3976 4134 : bool broken_loop = region->cont == NULL;
3977 4134 : edge e, ne;
3978 4134 : tree *counts = NULL;
3979 4134 : int i;
3980 4134 : bool ordered_lastprivate = false;
3981 4134 : bool offload = is_in_offload_region (region);
3982 :
3983 4134 : gcc_assert (!broken_loop || !in_combined_parallel);
3984 4134 : gcc_assert (fd->iter_type == long_integer_type_node
3985 : || !in_combined_parallel);
3986 :
3987 4134 : entry_bb = region->entry;
3988 4134 : cont_bb = region->cont;
3989 4134 : collapse_bb = NULL;
3990 4134 : gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3991 4134 : gcc_assert (broken_loop
3992 : || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3993 4134 : l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3994 4134 : l1_bb = single_succ (l0_bb);
3995 4134 : if (!broken_loop)
3996 : {
3997 3727 : l2_bb = create_empty_bb (cont_bb);
3998 3727 : gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3999 : || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
4000 : == l1_bb));
4001 3727 : gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4002 : }
4003 : else
4004 : l2_bb = NULL;
4005 4134 : l3_bb = BRANCH_EDGE (entry_bb)->dest;
4006 4134 : exit_bb = region->exit;
4007 :
4008 4134 : gsi = gsi_last_nondebug_bb (entry_bb);
4009 :
4010 4134 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4011 4134 : if (fd->ordered
4012 4134 : && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4013 : OMP_CLAUSE_LASTPRIVATE))
4014 4134 : ordered_lastprivate = false;
4015 4134 : tree reductions = NULL_TREE;
4016 4134 : tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
4017 4134 : tree memv = NULL_TREE;
4018 4134 : if (fd->lastprivate_conditional)
4019 : {
4020 46 : tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4021 : OMP_CLAUSE__CONDTEMP_);
4022 46 : if (fd->have_pointer_condtemp)
4023 28 : condtemp = OMP_CLAUSE_DECL (c);
4024 46 : c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4025 46 : cond_var = OMP_CLAUSE_DECL (c);
4026 : }
4027 4134 : if (sched_arg)
4028 : {
4029 169 : if (fd->have_reductemp)
4030 : {
4031 149 : tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4032 : OMP_CLAUSE__REDUCTEMP_);
4033 149 : reductions = OMP_CLAUSE_DECL (c);
4034 149 : gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4035 149 : gimple *g = SSA_NAME_DEF_STMT (reductions);
4036 149 : reductions = gimple_assign_rhs1 (g);
4037 149 : OMP_CLAUSE_DECL (c) = reductions;
4038 149 : entry_bb = gimple_bb (g);
4039 149 : edge e = split_block (entry_bb, g);
4040 149 : if (region->entry == entry_bb)
4041 8 : region->entry = e->dest;
4042 298 : gsi = gsi_last_bb (entry_bb);
4043 : }
4044 : else
4045 20 : reductions = null_pointer_node;
4046 169 : if (fd->have_pointer_condtemp)
4047 : {
4048 28 : tree type = TREE_TYPE (condtemp);
4049 28 : memv = create_tmp_var (type);
4050 28 : TREE_ADDRESSABLE (memv) = 1;
4051 28 : unsigned HOST_WIDE_INT sz
4052 28 : = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4053 28 : sz *= fd->lastprivate_conditional;
4054 28 : expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
4055 : false);
4056 28 : mem = build_fold_addr_expr (memv);
4057 : }
4058 : else
4059 141 : mem = null_pointer_node;
4060 : }
4061 4134 : if (fd->collapse > 1 || fd->ordered)
4062 : {
4063 1669 : int first_zero_iter1 = -1, first_zero_iter2 = -1;
4064 1669 : basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
4065 :
4066 1669 : counts = XALLOCAVEC (tree, fd->ordered
4067 : ? fd->ordered + 2
4068 : + (fd->ordered - fd->collapse)
4069 : : fd->collapse);
4070 1669 : expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4071 : zero_iter1_bb, first_zero_iter1,
4072 : zero_iter2_bb, first_zero_iter2, l2_dom_bb);
4073 :
4074 1669 : if (zero_iter1_bb)
4075 : {
4076 : /* Some counts[i] vars might be uninitialized if
4077 : some loop has zero iterations. But the body shouldn't
4078 : be executed in that case, so just avoid uninit warnings. */
4079 1913 : for (i = first_zero_iter1;
4080 1913 : i < (fd->ordered ? fd->ordered : fd->collapse); i++)
4081 1502 : if (SSA_VAR_P (counts[i]))
4082 950 : suppress_warning (counts[i], OPT_Wuninitialized);
4083 411 : gsi_prev (&gsi);
4084 411 : e = split_block (entry_bb, gsi_stmt (gsi));
4085 411 : entry_bb = e->dest;
4086 411 : make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
4087 411 : gsi = gsi_last_nondebug_bb (entry_bb);
4088 411 : set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4089 : get_immediate_dominator (CDI_DOMINATORS,
4090 : zero_iter1_bb));
4091 : }
4092 1669 : if (zero_iter2_bb)
4093 : {
4094 : /* Some counts[i] vars might be uninitialized if
4095 : some loop has zero iterations. But the body shouldn't
4096 : be executed in that case, so just avoid uninit warnings. */
4097 394 : for (i = first_zero_iter2; i < fd->ordered; i++)
4098 276 : if (SSA_VAR_P (counts[i]))
4099 216 : suppress_warning (counts[i], OPT_Wuninitialized);
4100 118 : if (zero_iter1_bb)
4101 103 : make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4102 : else
4103 : {
4104 15 : gsi_prev (&gsi);
4105 15 : e = split_block (entry_bb, gsi_stmt (gsi));
4106 15 : entry_bb = e->dest;
4107 15 : make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
4108 15 : gsi = gsi_last_nondebug_bb (entry_bb);
4109 15 : set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4110 : get_immediate_dominator
4111 : (CDI_DOMINATORS, zero_iter2_bb));
4112 : }
4113 : }
4114 1669 : if (fd->collapse == 1)
4115 : {
4116 188 : counts[0] = fd->loop.n2;
4117 188 : fd->loop = fd->loops[0];
4118 : }
4119 : }
4120 :
4121 4134 : type = TREE_TYPE (fd->loop.v);
4122 4134 : istart0 = create_tmp_var (fd->iter_type, ".istart0");
4123 4134 : iend0 = create_tmp_var (fd->iter_type, ".iend0");
4124 4134 : TREE_ADDRESSABLE (istart0) = 1;
4125 4134 : TREE_ADDRESSABLE (iend0) = 1;
4126 :
4127 : /* See if we need to bias by LLONG_MIN. */
4128 4134 : if (fd->iter_type == long_long_unsigned_type_node
4129 778 : && (TREE_CODE (type) == INTEGER_TYPE || TREE_CODE (type) == BITINT_TYPE)
4130 530 : && !TYPE_UNSIGNED (type)
4131 4134 : && fd->ordered == 0)
4132 : {
4133 0 : tree n1, n2;
4134 :
4135 0 : if (fd->loop.cond_code == LT_EXPR)
4136 : {
4137 0 : n1 = fd->loop.n1;
4138 0 : n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4139 : }
4140 : else
4141 : {
4142 0 : n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4143 0 : n2 = fd->loop.n1;
4144 : }
4145 0 : if (TREE_CODE (n1) != INTEGER_CST
4146 0 : || TREE_CODE (n2) != INTEGER_CST
4147 0 : || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4148 0 : bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4149 : }
4150 :
4151 4134 : gimple_stmt_iterator gsif = gsi;
4152 4134 : gsi_prev (&gsif);
4153 :
4154 4134 : tree arr = NULL_TREE;
4155 4134 : if (in_combined_parallel)
4156 : {
4157 1084 : gcc_assert (fd->ordered == 0);
4158 : /* In a combined parallel loop, emit a call to
4159 : GOMP_loop_foo_next. */
4160 1084 : t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4161 : build_fold_addr_expr (istart0),
4162 : build_fold_addr_expr (iend0));
4163 : }
4164 : else
4165 : {
4166 3050 : tree t0, t1, t2, t3, t4;
4167 : /* If this is not a combined parallel loop, emit a call to
4168 : GOMP_loop_foo_start in ENTRY_BB. */
4169 3050 : t4 = build_fold_addr_expr (iend0);
4170 3050 : t3 = build_fold_addr_expr (istart0);
4171 3050 : if (fd->ordered)
4172 : {
4173 670 : t0 = build_int_cst (unsigned_type_node,
4174 335 : fd->ordered - fd->collapse + 1);
4175 335 : arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4176 335 : fd->ordered
4177 335 : - fd->collapse + 1),
4178 : ".omp_counts");
4179 335 : DECL_NAMELESS (arr) = 1;
4180 335 : TREE_ADDRESSABLE (arr) = 1;
4181 335 : TREE_STATIC (arr) = 1;
4182 335 : vec<constructor_elt, va_gc> *v;
4183 335 : vec_alloc (v, fd->ordered - fd->collapse + 1);
4184 335 : int idx;
4185 :
4186 1563 : for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4187 : {
4188 1228 : tree c;
4189 1228 : if (idx == 0 && fd->collapse > 1)
4190 147 : c = fd->loop.n2;
4191 : else
4192 1081 : c = counts[idx + fd->collapse - 1];
4193 1228 : tree purpose = size_int (idx);
4194 1228 : CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4195 1228 : if (TREE_CODE (c) != INTEGER_CST)
4196 433 : TREE_STATIC (arr) = 0;
4197 : }
4198 :
4199 335 : DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4200 335 : if (!TREE_STATIC (arr))
4201 232 : force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4202 : void_type_node, arr),
4203 : true, NULL_TREE, true, GSI_SAME_STMT);
4204 335 : t1 = build_fold_addr_expr (arr);
4205 335 : t2 = NULL_TREE;
4206 : }
4207 : else
4208 : {
4209 2715 : t2 = fold_convert (fd->iter_type, fd->loop.step);
4210 2715 : t1 = fd->loop.n2;
4211 2715 : t0 = fd->loop.n1;
4212 2715 : if (gimple_omp_for_combined_into_p (fd->for_stmt))
4213 : {
4214 1152 : tree innerc
4215 1152 : = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4216 : OMP_CLAUSE__LOOPTEMP_);
4217 1152 : gcc_assert (innerc);
4218 1152 : t0 = OMP_CLAUSE_DECL (innerc);
4219 1152 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4220 : OMP_CLAUSE__LOOPTEMP_);
4221 1152 : gcc_assert (innerc);
4222 1152 : t1 = OMP_CLAUSE_DECL (innerc);
4223 : }
4224 5318 : if (POINTER_TYPE_P (TREE_TYPE (t0))
4225 2715 : && TYPE_PRECISION (TREE_TYPE (t0))
4226 112 : != TYPE_PRECISION (fd->iter_type))
4227 : {
4228 : /* Avoid casting pointers to integer of a different size. */
4229 0 : tree itype = signed_type_for (type);
4230 0 : t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4231 0 : t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4232 : }
4233 : else
4234 : {
4235 2715 : t1 = fold_convert (fd->iter_type, t1);
4236 2715 : t0 = fold_convert (fd->iter_type, t0);
4237 : }
4238 2715 : if (bias)
4239 : {
4240 0 : t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4241 0 : t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4242 : }
4243 : }
4244 3050 : if (fd->iter_type == long_integer_type_node || fd->ordered)
4245 : {
4246 2328 : if (fd->chunk_size)
4247 : {
4248 1579 : t = fold_convert (fd->iter_type, fd->chunk_size);
4249 1579 : t = omp_adjust_chunk_size (t, fd->simd_schedule, offload);
4250 1579 : if (sched_arg)
4251 : {
4252 135 : if (fd->ordered)
4253 36 : t = build_call_expr (builtin_decl_explicit (start_fn),
4254 : 8, t0, t1, sched_arg, t, t3, t4,
4255 : reductions, mem);
4256 : else
4257 99 : t = build_call_expr (builtin_decl_explicit (start_fn),
4258 : 9, t0, t1, t2, sched_arg, t, t3, t4,
4259 : reductions, mem);
4260 : }
4261 1444 : else if (fd->ordered)
4262 299 : t = build_call_expr (builtin_decl_explicit (start_fn),
4263 : 5, t0, t1, t, t3, t4);
4264 : else
4265 1145 : t = build_call_expr (builtin_decl_explicit (start_fn),
4266 : 6, t0, t1, t2, t, t3, t4);
4267 : }
4268 749 : else if (fd->ordered)
4269 0 : t = build_call_expr (builtin_decl_explicit (start_fn),
4270 : 4, t0, t1, t3, t4);
4271 : else
4272 749 : t = build_call_expr (builtin_decl_explicit (start_fn),
4273 : 5, t0, t1, t2, t3, t4);
4274 : }
4275 : else
4276 : {
4277 722 : tree t5;
4278 722 : tree c_bool_type;
4279 722 : tree bfn_decl;
4280 :
4281 : /* The GOMP_loop_ull_*start functions have additional boolean
4282 : argument, true for < loops and false for > loops.
4283 : In Fortran, the C bool type can be different from
4284 : boolean_type_node. */
4285 722 : bfn_decl = builtin_decl_explicit (start_fn);
4286 722 : c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4287 722 : t5 = build_int_cst (c_bool_type,
4288 942 : fd->loop.cond_code == LT_EXPR ? 1 : 0);
4289 722 : if (fd->chunk_size)
4290 : {
4291 390 : tree bfn_decl = builtin_decl_explicit (start_fn);
4292 390 : t = fold_convert (fd->iter_type, fd->chunk_size);
4293 390 : t = omp_adjust_chunk_size (t, fd->simd_schedule, offload);
4294 390 : if (sched_arg)
4295 34 : t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4296 : t, t3, t4, reductions, mem);
4297 : else
4298 356 : t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4299 : }
4300 : else
4301 332 : t = build_call_expr (builtin_decl_explicit (start_fn),
4302 : 6, t5, t0, t1, t2, t3, t4);
4303 : }
4304 : }
4305 4134 : if (TREE_TYPE (t) != boolean_type_node)
4306 0 : t = fold_build2 (NE_EXPR, boolean_type_node,
4307 : t, build_int_cst (TREE_TYPE (t), 0));
4308 4134 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4309 : true, GSI_SAME_STMT);
4310 4134 : if (arr && !TREE_STATIC (arr))
4311 : {
4312 232 : tree clobber = build_clobber (TREE_TYPE (arr));
4313 232 : gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4314 : GSI_SAME_STMT);
4315 : }
4316 4134 : if (fd->have_pointer_condtemp)
4317 28 : expand_omp_build_assign (&gsi, condtemp, memv, false);
4318 4134 : if (fd->have_reductemp)
4319 : {
4320 149 : gimple *g = gsi_stmt (gsi);
4321 149 : gsi_remove (&gsi, true);
4322 149 : release_ssa_name (gimple_assign_lhs (g));
4323 :
4324 149 : entry_bb = region->entry;
4325 149 : gsi = gsi_last_nondebug_bb (entry_bb);
4326 :
4327 149 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4328 : }
4329 4134 : gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4330 :
4331 : /* Remove the GIMPLE_OMP_FOR statement. */
4332 4134 : gsi_remove (&gsi, true);
4333 :
4334 4134 : if (gsi_end_p (gsif))
4335 1203 : gsif = gsi_after_labels (gsi_bb (gsif));
4336 4134 : gsi_next (&gsif);
4337 :
4338 : /* Iteration setup for sequential loop goes in L0_BB. */
4339 4134 : tree startvar = fd->loop.v;
4340 4134 : tree endvar = NULL_TREE;
4341 :
4342 4134 : if (gimple_omp_for_combined_p (fd->for_stmt))
4343 : {
4344 1389 : gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4345 : && gimple_omp_for_kind (inner_stmt)
4346 : == GF_OMP_FOR_KIND_SIMD);
4347 1389 : tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4348 : OMP_CLAUSE__LOOPTEMP_);
4349 1389 : gcc_assert (innerc);
4350 1389 : startvar = OMP_CLAUSE_DECL (innerc);
4351 1389 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4352 : OMP_CLAUSE__LOOPTEMP_);
4353 1389 : gcc_assert (innerc);
4354 1389 : endvar = OMP_CLAUSE_DECL (innerc);
4355 : }
4356 :
4357 4134 : gsi = gsi_start_bb (l0_bb);
4358 4134 : t = istart0;
4359 4134 : if (fd->ordered && fd->collapse == 1)
4360 188 : t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4361 : fold_convert (fd->iter_type, fd->loop.step));
4362 3946 : else if (bias)
4363 0 : t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4364 4134 : if (fd->ordered && fd->collapse == 1)
4365 : {
4366 188 : if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4367 8 : t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4368 : fd->loop.n1, fold_convert (sizetype, t));
4369 : else
4370 : {
4371 180 : t = fold_convert (TREE_TYPE (startvar), t);
4372 180 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4373 : fd->loop.n1, t);
4374 : }
4375 : }
4376 : else
4377 : {
4378 3946 : if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4379 240 : t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4380 3946 : t = fold_convert (TREE_TYPE (startvar), t);
4381 : }
4382 4134 : t = force_gimple_operand_gsi (&gsi, t,
4383 4134 : DECL_P (startvar)
4384 4134 : && TREE_ADDRESSABLE (startvar),
4385 : NULL_TREE, false, GSI_CONTINUE_LINKING);
4386 4134 : assign_stmt = gimple_build_assign (startvar, t);
4387 4134 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4388 4134 : if (cond_var)
4389 : {
4390 46 : tree itype = TREE_TYPE (cond_var);
4391 : /* For lastprivate(conditional:) itervar, we need some iteration
4392 : counter that starts at unsigned non-zero and increases.
4393 : Prefer as few IVs as possible, so if we can use startvar
4394 : itself, use that, or startvar + constant (those would be
4395 : incremented with step), and as last resort use the s0 + 1
4396 : incremented by 1. */
4397 46 : if ((fd->ordered && fd->collapse == 1)
4398 46 : || bias
4399 46 : || POINTER_TYPE_P (type)
4400 46 : || TREE_CODE (fd->loop.n1) != INTEGER_CST
4401 40 : || fd->loop.cond_code != LT_EXPR)
4402 6 : t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4403 : build_int_cst (itype, 1));
4404 40 : else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4405 9 : t = fold_convert (itype, t);
4406 : else
4407 : {
4408 31 : tree c = fold_convert (itype, fd->loop.n1);
4409 31 : c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4410 31 : t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4411 : }
4412 46 : t = force_gimple_operand_gsi (&gsi, t, false,
4413 : NULL_TREE, false, GSI_CONTINUE_LINKING);
4414 46 : assign_stmt = gimple_build_assign (cond_var, t);
4415 46 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4416 : }
4417 :
4418 4134 : t = iend0;
4419 4134 : if (fd->ordered && fd->collapse == 1)
4420 188 : t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4421 : fold_convert (fd->iter_type, fd->loop.step));
4422 3946 : else if (bias)
4423 0 : t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4424 4134 : if (fd->ordered && fd->collapse == 1)
4425 : {
4426 188 : if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4427 8 : t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4428 : fd->loop.n1, fold_convert (sizetype, t));
4429 : else
4430 : {
4431 180 : t = fold_convert (TREE_TYPE (startvar), t);
4432 180 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4433 : fd->loop.n1, t);
4434 : }
4435 : }
4436 : else
4437 : {
4438 3946 : if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4439 240 : t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4440 3946 : t = fold_convert (TREE_TYPE (startvar), t);
4441 : }
4442 4134 : iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4443 : false, GSI_CONTINUE_LINKING);
4444 4134 : if (endvar)
4445 : {
4446 1389 : assign_stmt = gimple_build_assign (endvar, iend);
4447 1389 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4448 1389 : if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4449 857 : assign_stmt = gimple_build_assign (fd->loop.v, iend);
4450 : else
4451 532 : assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4452 1389 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4453 : }
4454 : /* Handle linear clause adjustments. */
4455 4134 : tree itercnt = NULL_TREE;
4456 4134 : if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4457 27021 : for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4458 27021 : c; c = OMP_CLAUSE_CHAIN (c))
4459 22887 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4460 22887 : && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4461 : {
4462 72 : tree d = OMP_CLAUSE_DECL (c);
4463 72 : tree t = d, a, dest;
4464 72 : if (omp_privatize_by_reference (t))
4465 2 : t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4466 72 : tree type = TREE_TYPE (t);
4467 72 : if (POINTER_TYPE_P (type))
4468 0 : type = sizetype;
4469 72 : dest = unshare_expr (t);
4470 72 : tree v = create_tmp_var (TREE_TYPE (t), NULL);
4471 72 : expand_omp_build_assign (&gsif, v, t);
4472 72 : if (itercnt == NULL_TREE)
4473 : {
4474 72 : itercnt = startvar;
4475 72 : tree n1 = fd->loop.n1;
4476 72 : if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4477 : {
4478 0 : itercnt
4479 0 : = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4480 : itercnt);
4481 0 : n1 = fold_convert (TREE_TYPE (itercnt), n1);
4482 : }
4483 72 : itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4484 : itercnt, n1);
4485 72 : itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4486 : itercnt, fd->loop.step);
4487 72 : itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4488 : NULL_TREE, false,
4489 : GSI_CONTINUE_LINKING);
4490 : }
4491 72 : a = fold_build2 (MULT_EXPR, type,
4492 : fold_convert (type, itercnt),
4493 : fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4494 72 : t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4495 : : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4496 72 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4497 : false, GSI_CONTINUE_LINKING);
4498 72 : expand_omp_build_assign (&gsi, dest, t, true);
4499 : }
4500 4134 : if (fd->collapse > 1)
4501 1481 : expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4502 :
4503 4134 : if (fd->ordered)
4504 : {
4505 : /* Until now, counts array contained number of iterations or
4506 : variable containing it for ith loop. From now on, we usually need
4507 : those counts only for collapsed loops, and only for the 2nd
4508 : till the last collapsed one. Move those one element earlier,
4509 : we'll use counts[fd->collapse - 1] for the first source/sink
4510 : iteration counter and so on and counts[fd->ordered]
4511 : as the array holding the current counter values for
4512 : depend(source). For doacross(sink:omp_cur_iteration - 1) we need
4513 : the counts from fd->collapse to fd->ordered - 1; make a copy of
4514 : those to counts[fd->ordered + 2] and onwards.
4515 : counts[fd->ordered + 1] can be a flag whether it is the first
4516 : iteration with a new collapsed counter (used only if
4517 : fd->ordered > fd->collapse). */
4518 335 : if (fd->ordered > fd->collapse)
4519 171 : memcpy (counts + fd->ordered + 2, counts + fd->collapse,
4520 171 : (fd->ordered - fd->collapse) * sizeof (counts[0]));
4521 335 : if (fd->collapse > 1)
4522 147 : memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4523 335 : if (broken_loop)
4524 : {
4525 : int i;
4526 41 : for (i = fd->collapse; i < fd->ordered; i++)
4527 : {
4528 33 : tree type = TREE_TYPE (fd->loops[i].v);
4529 33 : tree this_cond
4530 33 : = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4531 : fold_convert (type, fd->loops[i].n1),
4532 : fold_convert (type, fd->loops[i].n2));
4533 33 : if (!integer_onep (this_cond))
4534 : break;
4535 : }
4536 37 : if (i < fd->ordered)
4537 : {
4538 29 : if (entry_bb->loop_father != l0_bb->loop_father)
4539 : {
4540 4 : remove_bb_from_loops (l0_bb);
4541 4 : add_bb_to_loop (l0_bb, entry_bb->loop_father);
4542 4 : gcc_assert (single_succ (l0_bb) == l1_bb);
4543 : }
4544 29 : cont_bb
4545 29 : = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4546 29 : add_bb_to_loop (cont_bb, l0_bb->loop_father);
4547 29 : gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4548 29 : gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4549 29 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4550 29 : make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4551 29 : make_edge (cont_bb, l1_bb, 0);
4552 29 : l2_bb = create_empty_bb (cont_bb);
4553 29 : broken_loop = false;
4554 : }
4555 : }
4556 335 : expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4557 335 : cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4558 : l0_bb, ordered_lastprivate);
4559 335 : if (counts[fd->collapse - 1])
4560 : {
4561 37 : gcc_assert (fd->collapse == 1);
4562 37 : gsi = gsi_last_bb (l0_bb);
4563 37 : expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4564 : istart0, true);
4565 37 : if (cont_bb)
4566 : {
4567 33 : gsi = gsi_last_bb (cont_bb);
4568 33 : t = fold_build2 (PLUS_EXPR, fd->iter_type,
4569 : counts[fd->collapse - 1],
4570 : build_int_cst (fd->iter_type, 1));
4571 33 : expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4572 66 : tree aref = build4 (ARRAY_REF, fd->iter_type,
4573 33 : counts[fd->ordered], size_zero_node,
4574 : NULL_TREE, NULL_TREE);
4575 33 : expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4576 : }
4577 37 : t = counts[fd->collapse - 1];
4578 : }
4579 298 : else if (fd->collapse > 1)
4580 147 : t = fd->loop.v;
4581 : else
4582 : {
4583 151 : t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4584 : fd->loops[0].v, fd->loops[0].n1);
4585 151 : t = fold_convert (fd->iter_type, t);
4586 : }
4587 335 : gsi = gsi_last_bb (l0_bb);
4588 335 : tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4589 : size_zero_node, NULL_TREE, NULL_TREE);
4590 335 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4591 : false, GSI_CONTINUE_LINKING);
4592 335 : expand_omp_build_assign (&gsi, aref, t, true);
4593 : }
4594 :
4595 4134 : if (!broken_loop)
4596 : {
4597 : /* Code to control the increment and predicate for the sequential
4598 : loop goes in the CONT_BB. */
4599 3756 : gsi = gsi_last_nondebug_bb (cont_bb);
4600 3756 : gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4601 3756 : gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4602 3756 : vmain = gimple_omp_continue_control_use (cont_stmt);
4603 3756 : vback = gimple_omp_continue_control_def (cont_stmt);
4604 :
4605 3756 : if (cond_var)
4606 : {
4607 46 : tree itype = TREE_TYPE (cond_var);
4608 46 : tree t2;
4609 46 : if ((fd->ordered && fd->collapse == 1)
4610 46 : || bias
4611 46 : || POINTER_TYPE_P (type)
4612 46 : || TREE_CODE (fd->loop.n1) != INTEGER_CST
4613 40 : || fd->loop.cond_code != LT_EXPR)
4614 6 : t2 = build_int_cst (itype, 1);
4615 : else
4616 40 : t2 = fold_convert (itype, fd->loop.step);
4617 46 : t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4618 46 : t2 = force_gimple_operand_gsi (&gsi, t2, false,
4619 : NULL_TREE, true, GSI_SAME_STMT);
4620 46 : assign_stmt = gimple_build_assign (cond_var, t2);
4621 46 : gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4622 : }
4623 :
4624 3756 : if (!gimple_omp_for_combined_p (fd->for_stmt))
4625 : {
4626 2367 : if (POINTER_TYPE_P (type))
4627 160 : t = fold_build_pointer_plus (vmain, fd->loop.step);
4628 : else
4629 2207 : t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4630 2367 : t = force_gimple_operand_gsi (&gsi, t,
4631 2367 : DECL_P (vback)
4632 2367 : && TREE_ADDRESSABLE (vback),
4633 : NULL_TREE, true, GSI_SAME_STMT);
4634 2367 : assign_stmt = gimple_build_assign (vback, t);
4635 2367 : gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4636 :
4637 2367 : if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4638 : {
4639 294 : tree tem;
4640 294 : if (fd->collapse > 1)
4641 143 : tem = fd->loop.v;
4642 : else
4643 : {
4644 151 : tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4645 : fd->loops[0].v, fd->loops[0].n1);
4646 151 : tem = fold_convert (fd->iter_type, tem);
4647 : }
4648 588 : tree aref = build4 (ARRAY_REF, fd->iter_type,
4649 294 : counts[fd->ordered], size_zero_node,
4650 : NULL_TREE, NULL_TREE);
4651 294 : tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4652 : true, GSI_SAME_STMT);
4653 294 : expand_omp_build_assign (&gsi, aref, tem);
4654 : }
4655 :
4656 2367 : t = build2 (fd->loop.cond_code, boolean_type_node,
4657 2367 : DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4658 : iend);
4659 2367 : gcond *cond_stmt = gimple_build_cond_empty (t);
4660 2367 : gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4661 : }
4662 :
4663 : /* Remove GIMPLE_OMP_CONTINUE. */
4664 3756 : gsi_remove (&gsi, true);
4665 :
4666 3756 : if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4667 681 : collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4668 :
4669 : /* Emit code to get the next parallel iteration in L2_BB. */
4670 3756 : gsi = gsi_start_bb (l2_bb);
4671 :
4672 3756 : t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4673 : build_fold_addr_expr (istart0),
4674 : build_fold_addr_expr (iend0));
4675 3756 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4676 : false, GSI_CONTINUE_LINKING);
4677 3756 : if (TREE_TYPE (t) != boolean_type_node)
4678 0 : t = fold_build2 (NE_EXPR, boolean_type_node,
4679 : t, build_int_cst (TREE_TYPE (t), 0));
4680 3756 : gcond *cond_stmt = gimple_build_cond_empty (t);
4681 3756 : gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4682 : }
4683 :
4684 : /* Add the loop cleanup function. */
4685 4134 : gsi = gsi_last_nondebug_bb (exit_bb);
4686 4134 : if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4687 3047 : t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4688 1087 : else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4689 2 : t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4690 : else
4691 1085 : t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4692 4134 : gcall *call_stmt = gimple_build_call (t, 0);
4693 4134 : if (fd->ordered)
4694 : {
4695 335 : tree arr = counts[fd->ordered];
4696 335 : tree clobber = build_clobber (TREE_TYPE (arr));
4697 335 : gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4698 : GSI_SAME_STMT);
4699 : }
4700 4134 : if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4701 : {
4702 2 : gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4703 2 : if (fd->have_reductemp)
4704 : {
4705 0 : gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4706 : gimple_call_lhs (call_stmt));
4707 0 : gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4708 : }
4709 : }
4710 4134 : gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4711 4134 : gsi_remove (&gsi, true);
4712 :
4713 : /* Connect the new blocks. */
4714 4134 : find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4715 4134 : find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4716 :
4717 4134 : if (!broken_loop)
4718 : {
4719 3756 : gimple_seq phis;
4720 :
4721 3756 : e = find_edge (cont_bb, l3_bb);
4722 3756 : ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4723 :
4724 3756 : phis = phi_nodes (l3_bb);
4725 3778 : for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4726 : {
4727 11 : gimple *phi = gsi_stmt (gsi);
4728 11 : SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4729 : PHI_ARG_DEF_FROM_EDGE (phi, e));
4730 : }
4731 3756 : remove_edge (e);
4732 :
4733 3756 : make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4734 3756 : e = find_edge (cont_bb, l1_bb);
4735 3756 : if (e == NULL)
4736 : {
4737 13 : e = BRANCH_EDGE (cont_bb);
4738 13 : gcc_assert (single_succ (e->dest) == l1_bb);
4739 : }
4740 3756 : if (gimple_omp_for_combined_p (fd->for_stmt))
4741 : {
4742 1389 : remove_edge (e);
4743 1389 : e = NULL;
4744 : }
4745 2367 : else if (fd->collapse > 1)
4746 : {
4747 681 : remove_edge (e);
4748 681 : e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4749 : }
4750 : else
4751 1686 : e->flags = EDGE_TRUE_VALUE;
4752 3756 : if (e)
4753 : {
4754 2367 : e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4755 2367 : find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4756 : }
4757 : else
4758 : {
4759 1389 : e = find_edge (cont_bb, l2_bb);
4760 1389 : e->flags = EDGE_FALLTHRU;
4761 : }
4762 3756 : make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4763 :
4764 3756 : if (gimple_in_ssa_p (cfun))
4765 : {
4766 : /* Add phis to the outer loop that connect to the phis in the inner,
4767 : original loop, and move the loop entry value of the inner phi to
4768 : the loop entry value of the outer phi. */
4769 13 : gphi_iterator psi;
4770 24 : for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4771 : {
4772 11 : location_t locus;
4773 11 : gphi *nphi;
4774 11 : gphi *exit_phi = psi.phi ();
4775 :
4776 22 : if (virtual_operand_p (gimple_phi_result (exit_phi)))
4777 6 : continue;
4778 :
4779 5 : edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4780 5 : tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4781 :
4782 5 : basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4783 5 : edge latch_to_l1 = find_edge (latch, l1_bb);
4784 5 : gphi *inner_phi
4785 5 : = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4786 :
4787 5 : tree t = gimple_phi_result (exit_phi);
4788 5 : tree new_res = copy_ssa_name (t, NULL);
4789 5 : nphi = create_phi_node (new_res, l0_bb);
4790 :
4791 5 : edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4792 5 : t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4793 5 : locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4794 5 : edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4795 5 : add_phi_arg (nphi, t, entry_to_l0, locus);
4796 :
4797 5 : edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4798 5 : add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4799 :
4800 5 : add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4801 : }
4802 : }
4803 :
4804 3756 : set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4805 : recompute_dominator (CDI_DOMINATORS, l2_bb));
4806 3756 : set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4807 : recompute_dominator (CDI_DOMINATORS, l3_bb));
4808 3756 : set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4809 : recompute_dominator (CDI_DOMINATORS, l0_bb));
4810 3756 : set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4811 : recompute_dominator (CDI_DOMINATORS, l1_bb));
4812 :
4813 : /* We enter expand_omp_for_generic with a loop. This original loop may
4814 : have its own loop struct, or it may be part of an outer loop struct
4815 : (which may be the fake loop). */
4816 3756 : class loop *outer_loop = entry_bb->loop_father;
4817 3756 : bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4818 :
4819 3756 : add_bb_to_loop (l2_bb, outer_loop);
4820 :
4821 : /* We've added a new loop around the original loop. Allocate the
4822 : corresponding loop struct. */
4823 3756 : class loop *new_loop = alloc_loop ();
4824 3756 : new_loop->header = l0_bb;
4825 3756 : new_loop->latch = l2_bb;
4826 3756 : add_loop (new_loop, outer_loop);
4827 :
4828 : /* Allocate a loop structure for the original loop unless we already
4829 : had one. */
4830 3756 : if (!orig_loop_has_loop_struct
4831 3756 : && !gimple_omp_for_combined_p (fd->for_stmt))
4832 : {
4833 2350 : class loop *orig_loop = alloc_loop ();
4834 2350 : orig_loop->header = l1_bb;
4835 : /* The loop may have multiple latches. */
4836 2350 : add_loop (orig_loop, new_loop);
4837 : }
4838 : }
4839 4134 : }
4840 :
4841 : /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4842 : compute needed allocation size. If !ALLOC of team allocations,
4843 : if ALLOC of thread allocation. SZ is the initial needed size for
4844 : other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4845 : CNT number of elements of each array, for !ALLOC this is
4846 : omp_get_num_threads (), for ALLOC number of iterations handled by the
4847 : current thread. If PTR is non-NULL, it is the start of the allocation
4848 : and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4849 : clauses pointers to the corresponding arrays. */
4850 :
4851 : static tree
4852 692 : expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4853 : unsigned HOST_WIDE_INT alloc_align, tree cnt,
4854 : gimple_stmt_iterator *gsi, bool alloc)
4855 : {
4856 692 : tree eltsz = NULL_TREE;
4857 692 : unsigned HOST_WIDE_INT preval = 0;
4858 692 : if (ptr && sz)
4859 5 : ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4860 : ptr, size_int (sz));
4861 5720 : for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4862 5028 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4863 3024 : && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4864 6668 : && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4865 : {
4866 820 : tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4867 820 : unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4868 820 : if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4869 : {
4870 820 : unsigned HOST_WIDE_INT szl
4871 820 : = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4872 820 : szl = least_bit_hwi (szl);
4873 820 : if (szl)
4874 820 : al = MIN (al, szl);
4875 : }
4876 820 : if (ptr == NULL_TREE)
4877 : {
4878 410 : if (eltsz == NULL_TREE)
4879 346 : eltsz = TYPE_SIZE_UNIT (pointee_type);
4880 : else
4881 64 : eltsz = size_binop (PLUS_EXPR, eltsz,
4882 : TYPE_SIZE_UNIT (pointee_type));
4883 : }
4884 820 : if (preval == 0 && al <= alloc_align)
4885 : {
4886 692 : unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4887 692 : sz += diff;
4888 692 : if (diff && ptr)
4889 0 : ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4890 : ptr, size_int (diff));
4891 : }
4892 128 : else if (al > preval)
4893 : {
4894 32 : if (ptr)
4895 : {
4896 16 : ptr = fold_convert (pointer_sized_int_node, ptr);
4897 16 : ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4898 : build_int_cst (pointer_sized_int_node,
4899 : al - 1));
4900 16 : ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4901 : build_int_cst (pointer_sized_int_node,
4902 : -(HOST_WIDE_INT) al));
4903 16 : ptr = fold_convert (ptr_type_node, ptr);
4904 : }
4905 : else
4906 16 : sz += al - 1;
4907 : }
4908 820 : if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4909 : preval = al;
4910 : else
4911 0 : preval = 1;
4912 820 : if (ptr)
4913 : {
4914 410 : expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4915 410 : ptr = OMP_CLAUSE_DECL (c);
4916 410 : ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4917 : size_binop (MULT_EXPR, cnt,
4918 : TYPE_SIZE_UNIT (pointee_type)));
4919 : }
4920 : }
4921 :
4922 692 : if (ptr == NULL_TREE)
4923 : {
4924 346 : eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4925 346 : if (sz)
4926 13 : eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4927 346 : return eltsz;
4928 : }
4929 : else
4930 : return ptr;
4931 : }
4932 :
4933 : /* Return the last _looptemp_ clause if one has been created for
4934 : lastprivate on distribute parallel for{, simd} or taskloop.
4935 : FD is the loop data and INNERC should be the second _looptemp_
4936 : clause (the one holding the end of the range).
4937 : This is followed by collapse - 1 _looptemp_ clauses for the
4938 : counts[1] and up, and for triangular loops followed by 4
4939 : further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4940 : one factor and one adjn1). After this there is optionally one
4941 : _looptemp_ clause that this function returns. */
4942 :
4943 : static tree
4944 1634 : find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4945 : {
4946 1634 : gcc_assert (innerc);
4947 1634 : int count = fd->collapse - 1;
4948 1634 : if (fd->non_rect
4949 24 : && fd->last_nonrect == fd->first_nonrect + 1
4950 1646 : && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4951 12 : count += 4;
4952 4811 : for (int i = 0; i < count; i++)
4953 : {
4954 3177 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4955 : OMP_CLAUSE__LOOPTEMP_);
4956 3177 : gcc_assert (innerc);
4957 : }
4958 1634 : return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4959 1634 : OMP_CLAUSE__LOOPTEMP_);
4960 : }
4961 :
4962 : /* A subroutine of expand_omp_for. Generate code for a parallel
4963 : loop with static schedule and no specified chunk size. Given
4964 : parameters:
4965 :
4966 : for (V = N1; V cond N2; V += STEP) BODY;
4967 :
4968 : where COND is "<" or ">", we generate pseudocode
4969 :
4970 : if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4971 : if (cond is <)
4972 : adj = STEP - 1;
4973 : else
4974 : adj = STEP + 1;
4975 : if ((__typeof (V)) -1 > 0 && cond is >)
4976 : n = -(adj + N2 - N1) / -STEP;
4977 : else
4978 : n = (adj + N2 - N1) / STEP;
4979 : q = n / nthreads;
4980 : tt = n % nthreads;
4981 : if (threadid < tt) goto L3; else goto L4;
4982 : L3:
4983 : tt = 0;
4984 : q = q + 1;
4985 : L4:
4986 : s0 = q * threadid + tt;
4987 : e0 = s0 + q;
4988 : V = s0 * STEP + N1;
4989 : if (s0 >= e0) goto L2; else goto L0;
4990 : L0:
4991 : e = e0 * STEP + N1;
4992 : L1:
4993 : BODY;
4994 : V += STEP;
4995 : if (V cond e) goto L1;
4996 : L2:
4997 : */
4998 :
4999 : static void
5000 13933 : expand_omp_for_static_nochunk (struct omp_region *region,
5001 : struct omp_for_data *fd,
5002 : gimple *inner_stmt)
5003 : {
5004 13933 : tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
5005 13933 : tree type, itype, vmain, vback;
5006 13933 : basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
5007 13933 : basic_block body_bb, cont_bb, collapse_bb = NULL;
5008 13933 : basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
5009 13933 : basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
5010 13933 : gimple_stmt_iterator gsi, gsip;
5011 13933 : edge ep;
5012 13933 : bool broken_loop = region->cont == NULL;
5013 13933 : tree *counts = NULL;
5014 13933 : tree n1, n2, step;
5015 13933 : tree reductions = NULL_TREE;
5016 13933 : tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5017 :
5018 13933 : itype = type = TREE_TYPE (fd->loop.v);
5019 13933 : if (POINTER_TYPE_P (type))
5020 606 : itype = signed_type_for (type);
5021 :
5022 13933 : entry_bb = region->entry;
5023 13933 : cont_bb = region->cont;
5024 13933 : gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5025 13933 : fin_bb = BRANCH_EDGE (entry_bb)->dest;
5026 13933 : gcc_assert (broken_loop
5027 : || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5028 13933 : seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
5029 13933 : body_bb = single_succ (seq_start_bb);
5030 13933 : if (!broken_loop)
5031 : {
5032 13437 : gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5033 : || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5034 13437 : gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5035 : }
5036 13933 : exit_bb = region->exit;
5037 :
5038 : /* Iteration space partitioning goes in ENTRY_BB. */
5039 13933 : gsi = gsi_last_nondebug_bb (entry_bb);
5040 13933 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5041 13933 : gsip = gsi;
5042 13933 : gsi_prev (&gsip);
5043 :
5044 13933 : if (fd->collapse > 1)
5045 : {
5046 3921 : int first_zero_iter = -1, dummy = -1;
5047 3921 : basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5048 :
5049 3921 : counts = XALLOCAVEC (tree, fd->collapse);
5050 3921 : expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5051 : fin_bb, first_zero_iter,
5052 : dummy_bb, dummy, l2_dom_bb);
5053 3921 : t = NULL_TREE;
5054 : }
5055 10012 : else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5056 1998 : t = integer_one_node;
5057 : else
5058 8014 : t = fold_binary (fd->loop.cond_code, boolean_type_node,
5059 : fold_convert (type, fd->loop.n1),
5060 : fold_convert (type, fd->loop.n2));
5061 13933 : if (fd->collapse == 1
5062 10012 : && TYPE_UNSIGNED (type)
5063 15946 : && (t == NULL_TREE || !integer_onep (t)))
5064 : {
5065 523 : n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5066 523 : n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5067 : true, GSI_SAME_STMT);
5068 523 : n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5069 523 : n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5070 : true, GSI_SAME_STMT);
5071 523 : gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5072 : n1, n2);
5073 523 : ep = split_block (entry_bb, cond_stmt);
5074 523 : ep->flags = EDGE_TRUE_VALUE;
5075 523 : entry_bb = ep->dest;
5076 523 : ep->probability = profile_probability::very_likely ();
5077 523 : ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
5078 523 : ep->probability = profile_probability::very_unlikely ();
5079 523 : if (gimple_in_ssa_p (cfun))
5080 : {
5081 22 : int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
5082 22 : for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5083 44 : !gsi_end_p (gpi); gsi_next (&gpi))
5084 : {
5085 22 : gphi *phi = gpi.phi ();
5086 22 : add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5087 : ep, UNKNOWN_LOCATION);
5088 : }
5089 : }
5090 1046 : gsi = gsi_last_bb (entry_bb);
5091 : }
5092 :
5093 13933 : if (fd->lastprivate_conditional)
5094 : {
5095 108 : tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5096 108 : tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5097 108 : if (fd->have_pointer_condtemp)
5098 38 : condtemp = OMP_CLAUSE_DECL (c);
5099 108 : c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5100 108 : cond_var = OMP_CLAUSE_DECL (c);
5101 : }
5102 13933 : if (fd->have_reductemp
5103 : /* For scan, we don't want to reinitialize condtemp before the
5104 : second loop. */
5105 13880 : || (fd->have_pointer_condtemp && !fd->have_scantemp)
5106 13862 : || fd->have_nonctrl_scantemp)
5107 : {
5108 244 : tree t1 = build_int_cst (long_integer_type_node, 0);
5109 244 : tree t2 = build_int_cst (long_integer_type_node, 1);
5110 244 : tree t3 = build_int_cstu (long_integer_type_node,
5111 : (HOST_WIDE_INT_1U << 31) + 1);
5112 244 : tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5113 244 : gimple_stmt_iterator gsi2 = gsi_none ();
5114 244 : gimple *g = NULL;
5115 244 : tree mem = null_pointer_node, memv = NULL_TREE;
5116 244 : unsigned HOST_WIDE_INT condtemp_sz = 0;
5117 244 : unsigned HOST_WIDE_INT alloc_align = 0;
5118 244 : if (fd->have_reductemp)
5119 : {
5120 53 : gcc_assert (!fd->have_nonctrl_scantemp);
5121 53 : tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5122 53 : reductions = OMP_CLAUSE_DECL (c);
5123 53 : gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5124 53 : g = SSA_NAME_DEF_STMT (reductions);
5125 53 : reductions = gimple_assign_rhs1 (g);
5126 53 : OMP_CLAUSE_DECL (c) = reductions;
5127 53 : gsi2 = gsi_for_stmt (g);
5128 : }
5129 : else
5130 : {
5131 191 : if (gsi_end_p (gsip))
5132 0 : gsi2 = gsi_after_labels (region->entry);
5133 : else
5134 191 : gsi2 = gsip;
5135 : reductions = null_pointer_node;
5136 : }
5137 244 : if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
5138 : {
5139 201 : tree type;
5140 201 : if (fd->have_pointer_condtemp)
5141 33 : type = TREE_TYPE (condtemp);
5142 : else
5143 168 : type = ptr_type_node;
5144 201 : memv = create_tmp_var (type);
5145 201 : TREE_ADDRESSABLE (memv) = 1;
5146 201 : unsigned HOST_WIDE_INT sz = 0;
5147 201 : tree size = NULL_TREE;
5148 201 : if (fd->have_pointer_condtemp)
5149 : {
5150 33 : sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5151 33 : sz *= fd->lastprivate_conditional;
5152 33 : condtemp_sz = sz;
5153 : }
5154 201 : if (fd->have_nonctrl_scantemp)
5155 : {
5156 173 : nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5157 173 : gimple *g = gimple_build_call (nthreads, 0);
5158 173 : nthreads = create_tmp_var (integer_type_node);
5159 173 : gimple_call_set_lhs (g, nthreads);
5160 173 : gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5161 173 : nthreads = fold_convert (sizetype, nthreads);
5162 173 : alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5163 173 : size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5164 : alloc_align, nthreads, NULL,
5165 : false);
5166 173 : size = fold_convert (type, size);
5167 : }
5168 : else
5169 28 : size = build_int_cst (type, sz);
5170 201 : expand_omp_build_assign (&gsi2, memv, size, false);
5171 201 : mem = build_fold_addr_expr (memv);
5172 : }
5173 244 : tree t
5174 244 : = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5175 : 9, t1, t2, t2, t3, t1, null_pointer_node,
5176 : null_pointer_node, reductions, mem);
5177 244 : force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5178 : true, GSI_SAME_STMT);
5179 244 : if (fd->have_pointer_condtemp)
5180 33 : expand_omp_build_assign (&gsi2, condtemp, memv, false);
5181 244 : if (fd->have_nonctrl_scantemp)
5182 : {
5183 173 : tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5184 173 : expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5185 : alloc_align, nthreads, &gsi2, false);
5186 : }
5187 244 : if (fd->have_reductemp)
5188 : {
5189 53 : gsi_remove (&gsi2, true);
5190 53 : release_ssa_name (gimple_assign_lhs (g));
5191 : }
5192 : }
5193 13933 : switch (gimple_omp_for_kind (fd->for_stmt))
5194 : {
5195 9409 : case GF_OMP_FOR_KIND_FOR:
5196 9409 : nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5197 9409 : threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5198 9409 : break;
5199 4524 : case GF_OMP_FOR_KIND_DISTRIBUTE:
5200 4524 : nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5201 4524 : threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5202 4524 : break;
5203 0 : default:
5204 0 : gcc_unreachable ();
5205 : }
5206 13933 : nthreads = build_call_expr (nthreads, 0);
5207 13933 : nthreads = fold_convert (itype, nthreads);
5208 13933 : nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5209 : true, GSI_SAME_STMT);
5210 13933 : threadid = build_call_expr (threadid, 0);
5211 13933 : threadid = fold_convert (itype, threadid);
5212 13933 : threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5213 : true, GSI_SAME_STMT);
5214 :
5215 13933 : n1 = fd->loop.n1;
5216 13933 : n2 = fd->loop.n2;
5217 13933 : step = fd->loop.step;
5218 13933 : if (gimple_omp_for_combined_into_p (fd->for_stmt))
5219 : {
5220 3042 : tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5221 : OMP_CLAUSE__LOOPTEMP_);
5222 3042 : gcc_assert (innerc);
5223 3042 : n1 = OMP_CLAUSE_DECL (innerc);
5224 3042 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5225 : OMP_CLAUSE__LOOPTEMP_);
5226 3042 : gcc_assert (innerc);
5227 3042 : n2 = OMP_CLAUSE_DECL (innerc);
5228 : }
5229 13933 : n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5230 : true, NULL_TREE, true, GSI_SAME_STMT);
5231 13933 : n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5232 : true, NULL_TREE, true, GSI_SAME_STMT);
5233 13933 : step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5234 : true, NULL_TREE, true, GSI_SAME_STMT);
5235 :
5236 14952 : t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5237 13933 : t = fold_build2 (PLUS_EXPR, itype, step, t);
5238 13933 : t = fold_build2 (PLUS_EXPR, itype, t, n2);
5239 13933 : t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5240 13933 : if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5241 414 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
5242 : fold_build1 (NEGATE_EXPR, itype, t),
5243 : fold_build1 (NEGATE_EXPR, itype, step));
5244 : else
5245 13519 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5246 13933 : t = fold_convert (itype, t);
5247 13933 : n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5248 :
5249 13933 : q = create_tmp_reg (itype, "q");
5250 13933 : t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5251 13933 : t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5252 13933 : gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5253 :
5254 13933 : tt = create_tmp_reg (itype, "tt");
5255 13933 : t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5256 13933 : t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5257 13933 : gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5258 :
5259 13933 : t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5260 13933 : gcond *cond_stmt = gimple_build_cond_empty (t);
5261 13933 : gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5262 :
5263 13933 : second_bb = split_block (entry_bb, cond_stmt)->dest;
5264 13933 : gsi = gsi_last_nondebug_bb (second_bb);
5265 13933 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5266 :
5267 13933 : gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5268 : GSI_SAME_STMT);
5269 13933 : gassign *assign_stmt
5270 13933 : = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5271 13933 : gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5272 :
5273 13933 : third_bb = split_block (second_bb, assign_stmt)->dest;
5274 13933 : gsi = gsi_last_nondebug_bb (third_bb);
5275 13933 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5276 :
5277 13933 : if (fd->have_nonctrl_scantemp)
5278 : {
5279 173 : tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5280 173 : tree controlp = NULL_TREE, controlb = NULL_TREE;
5281 756 : for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5282 756 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5283 756 : && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5284 : {
5285 346 : if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5286 173 : controlb = OMP_CLAUSE_DECL (c);
5287 : else
5288 173 : controlp = OMP_CLAUSE_DECL (c);
5289 346 : if (controlb && controlp)
5290 : break;
5291 : }
5292 173 : gcc_assert (controlp && controlb);
5293 173 : tree cnt = create_tmp_var (sizetype);
5294 173 : gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5295 173 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5296 173 : unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5297 173 : tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5298 : alloc_align, cnt, NULL, true);
5299 173 : tree size = create_tmp_var (sizetype);
5300 173 : expand_omp_build_assign (&gsi, size, sz, false);
5301 173 : tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5302 : size, size_int (16384));
5303 173 : expand_omp_build_assign (&gsi, controlb, cmp);
5304 173 : g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5305 : NULL_TREE, NULL_TREE);
5306 173 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5307 173 : fourth_bb = split_block (third_bb, g)->dest;
5308 173 : gsi = gsi_last_nondebug_bb (fourth_bb);
5309 : /* FIXME: Once we have allocators, this should use allocator. */
5310 173 : g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5311 173 : gimple_call_set_lhs (g, controlp);
5312 173 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5313 173 : expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5314 : &gsi, true);
5315 173 : gsi_prev (&gsi);
5316 173 : g = gsi_stmt (gsi);
5317 173 : fifth_bb = split_block (fourth_bb, g)->dest;
5318 173 : gsi = gsi_last_nondebug_bb (fifth_bb);
5319 :
5320 346 : g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5321 173 : gimple_call_set_lhs (g, controlp);
5322 173 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5323 173 : tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5324 1430 : for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5325 1257 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5326 1257 : && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5327 : {
5328 205 : tree tmp = create_tmp_var (sizetype);
5329 205 : tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5330 205 : g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5331 205 : TYPE_SIZE_UNIT (pointee_type));
5332 205 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5333 205 : g = gimple_build_call (alloca_decl, 2, tmp,
5334 205 : size_int (TYPE_ALIGN (pointee_type)));
5335 205 : gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5336 205 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5337 : }
5338 :
5339 173 : sixth_bb = split_block (fifth_bb, g)->dest;
5340 173 : gsi = gsi_last_nondebug_bb (sixth_bb);
5341 : }
5342 :
5343 13933 : t = build2 (MULT_EXPR, itype, q, threadid);
5344 13933 : t = build2 (PLUS_EXPR, itype, t, tt);
5345 13933 : s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5346 :
5347 13933 : t = fold_build2 (PLUS_EXPR, itype, s0, q);
5348 13933 : e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5349 :
5350 13933 : t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5351 13933 : gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5352 :
5353 : /* Remove the GIMPLE_OMP_FOR statement. */
5354 13933 : gsi_remove (&gsi, true);
5355 :
5356 : /* Setup code for sequential iteration goes in SEQ_START_BB. */
5357 13933 : gsi = gsi_start_bb (seq_start_bb);
5358 :
5359 13933 : tree startvar = fd->loop.v;
5360 13933 : tree endvar = NULL_TREE;
5361 :
5362 13933 : if (gimple_omp_for_combined_p (fd->for_stmt))
5363 : {
5364 7060 : tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5365 10583 : ? gimple_omp_parallel_clauses (inner_stmt)
5366 3537 : : gimple_omp_for_clauses (inner_stmt);
5367 7060 : tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5368 7060 : gcc_assert (innerc);
5369 7060 : startvar = OMP_CLAUSE_DECL (innerc);
5370 7060 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5371 : OMP_CLAUSE__LOOPTEMP_);
5372 7060 : gcc_assert (innerc);
5373 7060 : endvar = OMP_CLAUSE_DECL (innerc);
5374 2311 : if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5375 8382 : && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5376 : {
5377 829 : innerc = find_lastprivate_looptemp (fd, innerc);
5378 829 : if (innerc)
5379 : {
5380 : /* If needed (distribute parallel for with lastprivate),
5381 : propagate down the total number of iterations. */
5382 375 : tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5383 : fd->loop.n2);
5384 375 : t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5385 : GSI_CONTINUE_LINKING);
5386 375 : assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5387 375 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5388 : }
5389 : }
5390 : }
5391 13933 : t = fold_convert (itype, s0);
5392 13933 : t = fold_build2 (MULT_EXPR, itype, t, step);
5393 13933 : if (POINTER_TYPE_P (type))
5394 : {
5395 606 : t = fold_build_pointer_plus (n1, t);
5396 670 : if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5397 670 : && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5398 0 : t = fold_convert (signed_type_for (type), t);
5399 : }
5400 : else
5401 13327 : t = fold_build2 (PLUS_EXPR, type, t, n1);
5402 13933 : t = fold_convert (TREE_TYPE (startvar), t);
5403 13933 : t = force_gimple_operand_gsi (&gsi, t,
5404 13933 : DECL_P (startvar)
5405 13933 : && TREE_ADDRESSABLE (startvar),
5406 : NULL_TREE, false, GSI_CONTINUE_LINKING);
5407 13933 : assign_stmt = gimple_build_assign (startvar, t);
5408 13933 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5409 13933 : if (cond_var)
5410 : {
5411 108 : tree itype = TREE_TYPE (cond_var);
5412 : /* For lastprivate(conditional:) itervar, we need some iteration
5413 : counter that starts at unsigned non-zero and increases.
5414 : Prefer as few IVs as possible, so if we can use startvar
5415 : itself, use that, or startvar + constant (those would be
5416 : incremented with step), and as last resort use the s0 + 1
5417 : incremented by 1. */
5418 108 : if (POINTER_TYPE_P (type)
5419 108 : || TREE_CODE (n1) != INTEGER_CST
5420 102 : || fd->loop.cond_code != LT_EXPR)
5421 6 : t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5422 : build_int_cst (itype, 1));
5423 102 : else if (tree_int_cst_sgn (n1) == 1)
5424 12 : t = fold_convert (itype, t);
5425 : else
5426 : {
5427 90 : tree c = fold_convert (itype, n1);
5428 90 : c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5429 90 : t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5430 : }
5431 108 : t = force_gimple_operand_gsi (&gsi, t, false,
5432 : NULL_TREE, false, GSI_CONTINUE_LINKING);
5433 108 : assign_stmt = gimple_build_assign (cond_var, t);
5434 108 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5435 : }
5436 :
5437 13933 : t = fold_convert (itype, e0);
5438 13933 : t = fold_build2 (MULT_EXPR, itype, t, step);
5439 13933 : if (POINTER_TYPE_P (type))
5440 : {
5441 606 : t = fold_build_pointer_plus (n1, t);
5442 670 : if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5443 670 : && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5444 0 : t = fold_convert (signed_type_for (type), t);
5445 : }
5446 : else
5447 13327 : t = fold_build2 (PLUS_EXPR, type, t, n1);
5448 13933 : t = fold_convert (TREE_TYPE (startvar), t);
5449 13933 : e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5450 : false, GSI_CONTINUE_LINKING);
5451 13933 : if (endvar)
5452 : {
5453 7060 : assign_stmt = gimple_build_assign (endvar, e);
5454 7060 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5455 7060 : if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5456 6291 : assign_stmt = gimple_build_assign (fd->loop.v, e);
5457 : else
5458 769 : assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5459 7060 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5460 : }
5461 : /* Handle linear clause adjustments. */
5462 13933 : tree itercnt = NULL_TREE;
5463 13933 : tree *nonrect_bounds = NULL;
5464 13933 : if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5465 45047 : for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5466 45047 : c; c = OMP_CLAUSE_CHAIN (c))
5467 35638 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5468 35638 : && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5469 : {
5470 87 : tree d = OMP_CLAUSE_DECL (c);
5471 87 : tree t = d, a, dest;
5472 87 : if (omp_privatize_by_reference (t))
5473 13 : t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5474 87 : if (itercnt == NULL_TREE)
5475 : {
5476 87 : if (gimple_omp_for_combined_into_p (fd->for_stmt))
5477 : {
5478 0 : itercnt = fold_build2 (MINUS_EXPR, itype,
5479 : fold_convert (itype, n1),
5480 : fold_convert (itype, fd->loop.n1));
5481 0 : itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5482 0 : itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5483 0 : itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5484 : NULL_TREE, false,
5485 : GSI_CONTINUE_LINKING);
5486 : }
5487 : else
5488 : itercnt = s0;
5489 : }
5490 87 : tree type = TREE_TYPE (t);
5491 87 : if (POINTER_TYPE_P (type))
5492 0 : type = sizetype;
5493 87 : a = fold_build2 (MULT_EXPR, type,
5494 : fold_convert (type, itercnt),
5495 : fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5496 87 : dest = unshare_expr (t);
5497 87 : t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5498 : : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5499 87 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5500 : false, GSI_CONTINUE_LINKING);
5501 87 : expand_omp_build_assign (&gsi, dest, t, true);
5502 : }
5503 13933 : if (fd->collapse > 1)
5504 : {
5505 3921 : if (fd->non_rect)
5506 : {
5507 376 : nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5508 376 : memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5509 : }
5510 3921 : expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5511 : startvar);
5512 : }
5513 :
5514 13933 : if (!broken_loop)
5515 : {
5516 : /* The code controlling the sequential loop replaces the
5517 : GIMPLE_OMP_CONTINUE. */
5518 13437 : gsi = gsi_last_nondebug_bb (cont_bb);
5519 13437 : gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5520 13437 : gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5521 13437 : vmain = gimple_omp_continue_control_use (cont_stmt);
5522 13437 : vback = gimple_omp_continue_control_def (cont_stmt);
5523 :
5524 13437 : if (cond_var)
5525 : {
5526 108 : tree itype = TREE_TYPE (cond_var);
5527 108 : tree t2;
5528 108 : if (POINTER_TYPE_P (type)
5529 108 : || TREE_CODE (n1) != INTEGER_CST
5530 102 : || fd->loop.cond_code != LT_EXPR)
5531 6 : t2 = build_int_cst (itype, 1);
5532 : else
5533 102 : t2 = fold_convert (itype, step);
5534 108 : t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5535 108 : t2 = force_gimple_operand_gsi (&gsi, t2, false,
5536 : NULL_TREE, true, GSI_SAME_STMT);
5537 108 : assign_stmt = gimple_build_assign (cond_var, t2);
5538 108 : gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5539 : }
5540 :
5541 13437 : if (!gimple_omp_for_combined_p (fd->for_stmt))
5542 : {
5543 6377 : if (POINTER_TYPE_P (type))
5544 316 : t = fold_build_pointer_plus (vmain, step);
5545 : else
5546 6061 : t = fold_build2 (PLUS_EXPR, type, vmain, step);
5547 6377 : t = force_gimple_operand_gsi (&gsi, t,
5548 6377 : DECL_P (vback)
5549 6377 : && TREE_ADDRESSABLE (vback),
5550 : NULL_TREE, true, GSI_SAME_STMT);
5551 6377 : assign_stmt = gimple_build_assign (vback, t);
5552 6377 : gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5553 :
5554 6377 : t = build2 (fd->loop.cond_code, boolean_type_node,
5555 6377 : DECL_P (vback) && TREE_ADDRESSABLE (vback)
5556 : ? t : vback, e);
5557 6377 : gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5558 : }
5559 :
5560 : /* Remove the GIMPLE_OMP_CONTINUE statement. */
5561 13437 : gsi_remove (&gsi, true);
5562 :
5563 13437 : if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5564 1382 : collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5565 : cont_bb, body_bb);
5566 : }
5567 :
5568 : /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5569 13933 : gsi = gsi_last_nondebug_bb (exit_bb);
5570 13933 : if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5571 : {
5572 2375 : t = gimple_omp_return_lhs (gsi_stmt (gsi));
5573 2375 : if (fd->have_reductemp
5574 2322 : || ((fd->have_pointer_condtemp || fd->have_scantemp)
5575 59 : && !fd->have_nonctrl_scantemp))
5576 : {
5577 112 : tree fn;
5578 112 : if (t)
5579 4 : fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5580 : else
5581 108 : fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5582 112 : gcall *g = gimple_build_call (fn, 0);
5583 112 : if (t)
5584 : {
5585 4 : gimple_call_set_lhs (g, t);
5586 4 : if (fd->have_reductemp)
5587 4 : gsi_insert_after (&gsi, gimple_build_assign (reductions,
5588 : NOP_EXPR, t),
5589 : GSI_SAME_STMT);
5590 : }
5591 112 : gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5592 112 : }
5593 : else
5594 2263 : gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5595 : }
5596 11558 : else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5597 305 : && !fd->have_nonctrl_scantemp)
5598 : {
5599 132 : tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5600 132 : gcall *g = gimple_build_call (fn, 0);
5601 132 : gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5602 : }
5603 13933 : if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5604 : {
5605 173 : tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5606 173 : tree controlp = NULL_TREE, controlb = NULL_TREE;
5607 346 : for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5608 346 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5609 346 : && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5610 : {
5611 346 : if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5612 173 : controlb = OMP_CLAUSE_DECL (c);
5613 : else
5614 173 : controlp = OMP_CLAUSE_DECL (c);
5615 346 : if (controlb && controlp)
5616 : break;
5617 : }
5618 173 : gcc_assert (controlp && controlb);
5619 173 : gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5620 : NULL_TREE, NULL_TREE);
5621 173 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5622 173 : exit1_bb = split_block (exit_bb, g)->dest;
5623 173 : gsi = gsi_after_labels (exit1_bb);
5624 173 : g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5625 : controlp);
5626 173 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5627 173 : exit2_bb = split_block (exit1_bb, g)->dest;
5628 173 : gsi = gsi_after_labels (exit2_bb);
5629 346 : g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5630 : controlp);
5631 173 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5632 173 : exit3_bb = split_block (exit2_bb, g)->dest;
5633 173 : gsi = gsi_after_labels (exit3_bb);
5634 : }
5635 13933 : gsi_remove (&gsi, true);
5636 :
5637 : /* Connect all the blocks. */
5638 13933 : ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5639 13933 : ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5640 13933 : ep = find_edge (entry_bb, second_bb);
5641 13933 : ep->flags = EDGE_TRUE_VALUE;
5642 13933 : ep->probability = profile_probability::guessed_always () / 4;
5643 13933 : if (fourth_bb)
5644 : {
5645 173 : ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5646 173 : ep->probability = profile_probability::guessed_always () / 2;
5647 173 : ep = find_edge (third_bb, fourth_bb);
5648 173 : ep->flags = EDGE_TRUE_VALUE;
5649 173 : ep->probability = profile_probability::guessed_always () / 2;
5650 173 : ep = find_edge (fourth_bb, fifth_bb);
5651 173 : redirect_edge_and_branch (ep, sixth_bb);
5652 : }
5653 : else
5654 : sixth_bb = third_bb;
5655 13933 : find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5656 13933 : find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5657 13933 : if (exit1_bb)
5658 : {
5659 173 : ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5660 173 : ep->probability = profile_probability::guessed_always () / 2;
5661 173 : ep = find_edge (exit_bb, exit1_bb);
5662 173 : ep->flags = EDGE_TRUE_VALUE;
5663 173 : ep->probability = profile_probability::guessed_always () / 2;
5664 173 : ep = find_edge (exit1_bb, exit2_bb);
5665 173 : redirect_edge_and_branch (ep, exit3_bb);
5666 : }
5667 :
5668 13933 : if (!broken_loop)
5669 : {
5670 13437 : ep = find_edge (cont_bb, body_bb);
5671 13437 : if (ep == NULL)
5672 : {
5673 171 : ep = BRANCH_EDGE (cont_bb);
5674 171 : gcc_assert (single_succ (ep->dest) == body_bb);
5675 : }
5676 13437 : if (gimple_omp_for_combined_p (fd->for_stmt))
5677 : {
5678 7060 : remove_edge (ep);
5679 7060 : ep = NULL;
5680 : }
5681 6377 : else if (fd->collapse > 1)
5682 : {
5683 1382 : remove_edge (ep);
5684 1382 : ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5685 : }
5686 : else
5687 4995 : ep->flags = EDGE_TRUE_VALUE;
5688 25492 : find_edge (cont_bb, fin_bb)->flags
5689 14819 : = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5690 : }
5691 :
5692 13933 : set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5693 13933 : set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5694 13933 : if (fourth_bb)
5695 : {
5696 173 : set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5697 173 : set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5698 : }
5699 13933 : set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5700 :
5701 13933 : set_immediate_dominator (CDI_DOMINATORS, body_bb,
5702 : recompute_dominator (CDI_DOMINATORS, body_bb));
5703 13933 : set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5704 : recompute_dominator (CDI_DOMINATORS, fin_bb));
5705 13933 : if (exit1_bb)
5706 : {
5707 173 : set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5708 173 : set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5709 : }
5710 :
5711 13933 : class loop *loop = body_bb->loop_father;
5712 13933 : if (loop != entry_bb->loop_father)
5713 : {
5714 333 : gcc_assert (broken_loop || loop->header == body_bb);
5715 171 : gcc_assert (broken_loop
5716 : || loop->latch == region->cont
5717 : || single_pred (loop->latch) == region->cont);
5718 333 : return;
5719 : }
5720 :
5721 13600 : if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5722 : {
5723 6206 : loop = alloc_loop ();
5724 6206 : loop->header = body_bb;
5725 6206 : if (collapse_bb == NULL)
5726 4824 : loop->latch = cont_bb;
5727 6206 : add_loop (loop, body_bb->loop_father);
5728 : }
5729 : }
5730 :
5731 : /* Return phi in E->DEST with ARG on edge E. */
5732 :
5733 : static gphi *
5734 14 : find_phi_with_arg_on_edge (tree arg, edge e)
5735 : {
5736 14 : basic_block bb = e->dest;
5737 :
5738 14 : for (gphi_iterator gpi = gsi_start_phis (bb);
5739 14 : !gsi_end_p (gpi);
5740 0 : gsi_next (&gpi))
5741 : {
5742 14 : gphi *phi = gpi.phi ();
5743 14 : if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5744 14 : return phi;
5745 : }
5746 :
5747 0 : return NULL;
5748 : }
5749 :
5750 : /* A subroutine of expand_omp_for. Generate code for a parallel
5751 : loop with static schedule and a specified chunk size. Given
5752 : parameters:
5753 :
5754 : for (V = N1; V cond N2; V += STEP) BODY;
5755 :
5756 : where COND is "<" or ">", we generate pseudocode
5757 :
5758 : if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5759 : if (cond is <)
5760 : adj = STEP - 1;
5761 : else
5762 : adj = STEP + 1;
5763 : if ((__typeof (V)) -1 > 0 && cond is >)
5764 : n = -(adj + N2 - N1) / -STEP;
5765 : else
5766 : n = (adj + N2 - N1) / STEP;
5767 : trip = 0;
5768 : V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5769 : here so that V is defined
5770 : if the loop is not entered
5771 : L0:
5772 : s0 = (trip * nthreads + threadid) * CHUNK;
5773 : e0 = min (s0 + CHUNK, n);
5774 : if (s0 < n) goto L1; else goto L4;
5775 : L1:
5776 : V = s0 * STEP + N1;
5777 : e = e0 * STEP + N1;
5778 : L2:
5779 : BODY;
5780 : V += STEP;
5781 : if (V cond e) goto L2; else goto L3;
5782 : L3:
5783 : trip += 1;
5784 : goto L0;
5785 : L4:
5786 : */
5787 :
5788 : static void
5789 5819 : expand_omp_for_static_chunk (struct omp_region *region,
5790 : struct omp_for_data *fd, gimple *inner_stmt)
5791 : {
5792 5819 : tree n, s0, e0, e, t;
5793 5819 : tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5794 5819 : tree type, itype, vmain, vback, vextra;
5795 5819 : basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5796 5819 : basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5797 5819 : gimple_stmt_iterator gsi, gsip;
5798 5819 : edge se;
5799 5819 : bool broken_loop = region->cont == NULL;
5800 5819 : tree *counts = NULL;
5801 5819 : tree n1, n2, step;
5802 5819 : tree reductions = NULL_TREE;
5803 5819 : tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5804 :
5805 5819 : itype = type = TREE_TYPE (fd->loop.v);
5806 5819 : if (POINTER_TYPE_P (type))
5807 316 : itype = signed_type_for (type);
5808 :
5809 5819 : entry_bb = region->entry;
5810 5819 : se = split_block (entry_bb, last_nondebug_stmt (entry_bb));
5811 5819 : entry_bb = se->src;
5812 5819 : iter_part_bb = se->dest;
5813 5819 : cont_bb = region->cont;
5814 5819 : gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5815 5819 : fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5816 5819 : gcc_assert (broken_loop
5817 : || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5818 5819 : seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5819 5819 : body_bb = single_succ (seq_start_bb);
5820 5819 : if (!broken_loop)
5821 : {
5822 5562 : gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5823 : || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5824 5562 : gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5825 5562 : trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5826 : }
5827 5819 : exit_bb = region->exit;
5828 :
5829 : /* Trip and adjustment setup goes in ENTRY_BB. */
5830 5819 : gsi = gsi_last_nondebug_bb (entry_bb);
5831 5819 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5832 5819 : gsip = gsi;
5833 5819 : gsi_prev (&gsip);
5834 :
5835 5819 : if (fd->collapse > 1)
5836 : {
5837 2232 : int first_zero_iter = -1, dummy = -1;
5838 2232 : basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5839 :
5840 2232 : counts = XALLOCAVEC (tree, fd->collapse);
5841 2232 : expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5842 : fin_bb, first_zero_iter,
5843 : dummy_bb, dummy, l2_dom_bb);
5844 2232 : t = NULL_TREE;
5845 : }
5846 3587 : else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5847 813 : t = integer_one_node;
5848 : else
5849 2774 : t = fold_binary (fd->loop.cond_code, boolean_type_node,
5850 : fold_convert (type, fd->loop.n1),
5851 : fold_convert (type, fd->loop.n2));
5852 5819 : if (fd->collapse == 1
5853 3587 : && TYPE_UNSIGNED (type)
5854 7006 : && (t == NULL_TREE || !integer_onep (t)))
5855 : {
5856 236 : n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5857 236 : n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5858 : true, GSI_SAME_STMT);
5859 236 : n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5860 236 : n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5861 : true, GSI_SAME_STMT);
5862 236 : gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5863 : n1, n2);
5864 236 : se = split_block (entry_bb, cond_stmt);
5865 236 : se->flags = EDGE_TRUE_VALUE;
5866 236 : entry_bb = se->dest;
5867 236 : se->probability = profile_probability::very_likely ();
5868 236 : se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5869 236 : se->probability = profile_probability::very_unlikely ();
5870 236 : if (gimple_in_ssa_p (cfun))
5871 : {
5872 1 : int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5873 1 : for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5874 2 : !gsi_end_p (gpi); gsi_next (&gpi))
5875 : {
5876 1 : gphi *phi = gpi.phi ();
5877 1 : add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5878 : se, UNKNOWN_LOCATION);
5879 : }
5880 : }
5881 472 : gsi = gsi_last_bb (entry_bb);
5882 : }
5883 :
5884 5819 : if (fd->lastprivate_conditional)
5885 : {
5886 49 : tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5887 49 : tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5888 49 : if (fd->have_pointer_condtemp)
5889 29 : condtemp = OMP_CLAUSE_DECL (c);
5890 49 : c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5891 49 : cond_var = OMP_CLAUSE_DECL (c);
5892 : }
5893 5819 : if (fd->have_reductemp || fd->have_pointer_condtemp)
5894 : {
5895 44 : tree t1 = build_int_cst (long_integer_type_node, 0);
5896 44 : tree t2 = build_int_cst (long_integer_type_node, 1);
5897 44 : tree t3 = build_int_cstu (long_integer_type_node,
5898 : (HOST_WIDE_INT_1U << 31) + 1);
5899 44 : tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5900 44 : gimple_stmt_iterator gsi2 = gsi_none ();
5901 44 : gimple *g = NULL;
5902 44 : tree mem = null_pointer_node, memv = NULL_TREE;
5903 44 : if (fd->have_reductemp)
5904 : {
5905 25 : tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5906 25 : reductions = OMP_CLAUSE_DECL (c);
5907 25 : gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5908 25 : g = SSA_NAME_DEF_STMT (reductions);
5909 25 : reductions = gimple_assign_rhs1 (g);
5910 25 : OMP_CLAUSE_DECL (c) = reductions;
5911 25 : gsi2 = gsi_for_stmt (g);
5912 : }
5913 : else
5914 : {
5915 19 : if (gsi_end_p (gsip))
5916 0 : gsi2 = gsi_after_labels (region->entry);
5917 : else
5918 19 : gsi2 = gsip;
5919 : reductions = null_pointer_node;
5920 : }
5921 44 : if (fd->have_pointer_condtemp)
5922 : {
5923 29 : tree type = TREE_TYPE (condtemp);
5924 29 : memv = create_tmp_var (type);
5925 29 : TREE_ADDRESSABLE (memv) = 1;
5926 29 : unsigned HOST_WIDE_INT sz
5927 29 : = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5928 29 : sz *= fd->lastprivate_conditional;
5929 29 : expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5930 : false);
5931 29 : mem = build_fold_addr_expr (memv);
5932 : }
5933 44 : tree t
5934 44 : = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5935 : 9, t1, t2, t2, t3, t1, null_pointer_node,
5936 : null_pointer_node, reductions, mem);
5937 44 : force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5938 : true, GSI_SAME_STMT);
5939 44 : if (fd->have_pointer_condtemp)
5940 29 : expand_omp_build_assign (&gsi2, condtemp, memv, false);
5941 44 : if (fd->have_reductemp)
5942 : {
5943 25 : gsi_remove (&gsi2, true);
5944 25 : release_ssa_name (gimple_assign_lhs (g));
5945 : }
5946 : }
5947 5819 : switch (gimple_omp_for_kind (fd->for_stmt))
5948 : {
5949 2365 : case GF_OMP_FOR_KIND_FOR:
5950 2365 : nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5951 2365 : threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5952 2365 : break;
5953 3454 : case GF_OMP_FOR_KIND_DISTRIBUTE:
5954 3454 : nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5955 3454 : threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5956 3454 : break;
5957 0 : default:
5958 0 : gcc_unreachable ();
5959 : }
5960 5819 : nthreads = build_call_expr (nthreads, 0);
5961 5819 : nthreads = fold_convert (itype, nthreads);
5962 5819 : nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5963 : true, GSI_SAME_STMT);
5964 5819 : threadid = build_call_expr (threadid, 0);
5965 5819 : threadid = fold_convert (itype, threadid);
5966 5819 : threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5967 : true, GSI_SAME_STMT);
5968 :
5969 5819 : n1 = fd->loop.n1;
5970 5819 : n2 = fd->loop.n2;
5971 5819 : step = fd->loop.step;
5972 5819 : if (gimple_omp_for_combined_into_p (fd->for_stmt))
5973 : {
5974 1301 : tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5975 : OMP_CLAUSE__LOOPTEMP_);
5976 1301 : gcc_assert (innerc);
5977 1301 : n1 = OMP_CLAUSE_DECL (innerc);
5978 1301 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5979 : OMP_CLAUSE__LOOPTEMP_);
5980 1301 : gcc_assert (innerc);
5981 1301 : n2 = OMP_CLAUSE_DECL (innerc);
5982 : }
5983 5819 : n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5984 : true, NULL_TREE, true, GSI_SAME_STMT);
5985 5819 : n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5986 : true, NULL_TREE, true, GSI_SAME_STMT);
5987 5819 : step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5988 : true, NULL_TREE, true, GSI_SAME_STMT);
5989 5819 : tree chunk_size = fold_convert (itype, fd->chunk_size);
5990 5819 : chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule,
5991 5819 : is_in_offload_region (region));
5992 5819 : chunk_size
5993 5819 : = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5994 : GSI_SAME_STMT);
5995 :
5996 6425 : t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5997 5819 : t = fold_build2 (PLUS_EXPR, itype, step, t);
5998 5819 : t = fold_build2 (PLUS_EXPR, itype, t, n2);
5999 5819 : t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
6000 5819 : if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6001 284 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
6002 : fold_build1 (NEGATE_EXPR, itype, t),
6003 : fold_build1 (NEGATE_EXPR, itype, step));
6004 : else
6005 5535 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6006 5819 : t = fold_convert (itype, t);
6007 5819 : n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6008 : true, GSI_SAME_STMT);
6009 :
6010 5819 : trip_var = create_tmp_reg (itype, ".trip");
6011 5819 : if (gimple_in_ssa_p (cfun))
6012 : {
6013 12 : trip_init = make_ssa_name (trip_var);
6014 12 : trip_main = make_ssa_name (trip_var);
6015 12 : trip_back = make_ssa_name (trip_var);
6016 : }
6017 : else
6018 : {
6019 : trip_init = trip_var;
6020 : trip_main = trip_var;
6021 : trip_back = trip_var;
6022 : }
6023 :
6024 5819 : gassign *assign_stmt
6025 5819 : = gimple_build_assign (trip_init, build_int_cst (itype, 0));
6026 5819 : gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6027 :
6028 5819 : t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
6029 5819 : t = fold_build2 (MULT_EXPR, itype, t, step);
6030 5819 : if (POINTER_TYPE_P (type))
6031 316 : t = fold_build_pointer_plus (n1, t);
6032 : else
6033 5503 : t = fold_build2 (PLUS_EXPR, type, t, n1);
6034 5819 : vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6035 : true, GSI_SAME_STMT);
6036 :
6037 : /* Remove the GIMPLE_OMP_FOR. */
6038 5819 : gsi_remove (&gsi, true);
6039 :
6040 5819 : gimple_stmt_iterator gsif = gsi;
6041 :
6042 : /* Iteration space partitioning goes in ITER_PART_BB. */
6043 5819 : gsi = gsi_last_bb (iter_part_bb);
6044 :
6045 5819 : t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
6046 5819 : t = fold_build2 (PLUS_EXPR, itype, t, threadid);
6047 5819 : t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
6048 5819 : s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6049 : false, GSI_CONTINUE_LINKING);
6050 :
6051 5819 : t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
6052 5819 : t = fold_build2 (MIN_EXPR, itype, t, n);
6053 5819 : e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6054 : false, GSI_CONTINUE_LINKING);
6055 :
6056 5819 : t = build2 (LT_EXPR, boolean_type_node, s0, n);
6057 5819 : gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
6058 :
6059 : /* Setup code for sequential iteration goes in SEQ_START_BB. */
6060 5819 : gsi = gsi_start_bb (seq_start_bb);
6061 :
6062 5819 : tree startvar = fd->loop.v;
6063 5819 : tree endvar = NULL_TREE;
6064 :
6065 5819 : if (gimple_omp_for_combined_p (fd->for_stmt))
6066 : {
6067 4202 : tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
6068 6943 : ? gimple_omp_parallel_clauses (inner_stmt)
6069 1461 : : gimple_omp_for_clauses (inner_stmt);
6070 4202 : tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
6071 4202 : gcc_assert (innerc);
6072 4202 : startvar = OMP_CLAUSE_DECL (innerc);
6073 4202 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6074 : OMP_CLAUSE__LOOPTEMP_);
6075 4202 : gcc_assert (innerc);
6076 4202 : endvar = OMP_CLAUSE_DECL (innerc);
6077 1663 : if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
6078 5087 : && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
6079 : {
6080 704 : innerc = find_lastprivate_looptemp (fd, innerc);
6081 704 : if (innerc)
6082 : {
6083 : /* If needed (distribute parallel for with lastprivate),
6084 : propagate down the total number of iterations. */
6085 320 : tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
6086 : fd->loop.n2);
6087 320 : t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
6088 : GSI_CONTINUE_LINKING);
6089 320 : assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
6090 320 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6091 : }
6092 : }
6093 : }
6094 :
6095 5819 : t = fold_convert (itype, s0);
6096 5819 : t = fold_build2 (MULT_EXPR, itype, t, step);
6097 5819 : if (POINTER_TYPE_P (type))
6098 : {
6099 316 : t = fold_build_pointer_plus (n1, t);
6100 380 : if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6101 380 : && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6102 0 : t = fold_convert (signed_type_for (type), t);
6103 : }
6104 : else
6105 5503 : t = fold_build2 (PLUS_EXPR, type, t, n1);
6106 5819 : t = fold_convert (TREE_TYPE (startvar), t);
6107 5819 : t = force_gimple_operand_gsi (&gsi, t,
6108 5819 : DECL_P (startvar)
6109 5819 : && TREE_ADDRESSABLE (startvar),
6110 : NULL_TREE, false, GSI_CONTINUE_LINKING);
6111 5819 : assign_stmt = gimple_build_assign (startvar, t);
6112 5819 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6113 5819 : if (cond_var)
6114 : {
6115 49 : tree itype = TREE_TYPE (cond_var);
6116 : /* For lastprivate(conditional:) itervar, we need some iteration
6117 : counter that starts at unsigned non-zero and increases.
6118 : Prefer as few IVs as possible, so if we can use startvar
6119 : itself, use that, or startvar + constant (those would be
6120 : incremented with step), and as last resort use the s0 + 1
6121 : incremented by 1. */
6122 49 : if (POINTER_TYPE_P (type)
6123 49 : || TREE_CODE (n1) != INTEGER_CST
6124 43 : || fd->loop.cond_code != LT_EXPR)
6125 6 : t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
6126 : build_int_cst (itype, 1));
6127 43 : else if (tree_int_cst_sgn (n1) == 1)
6128 21 : t = fold_convert (itype, t);
6129 : else
6130 : {
6131 22 : tree c = fold_convert (itype, n1);
6132 22 : c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
6133 22 : t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
6134 : }
6135 49 : t = force_gimple_operand_gsi (&gsi, t, false,
6136 : NULL_TREE, false, GSI_CONTINUE_LINKING);
6137 49 : assign_stmt = gimple_build_assign (cond_var, t);
6138 49 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6139 : }
6140 :
6141 5819 : t = fold_convert (itype, e0);
6142 5819 : t = fold_build2 (MULT_EXPR, itype, t, step);
6143 5819 : if (POINTER_TYPE_P (type))
6144 : {
6145 316 : t = fold_build_pointer_plus (n1, t);
6146 380 : if (!POINTER_TYPE_P (TREE_TYPE (startvar))
6147 380 : && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6148 0 : t = fold_convert (signed_type_for (type), t);
6149 : }
6150 : else
6151 5503 : t = fold_build2 (PLUS_EXPR, type, t, n1);
6152 5819 : t = fold_convert (TREE_TYPE (startvar), t);
6153 5819 : e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6154 : false, GSI_CONTINUE_LINKING);
6155 5819 : if (endvar)
6156 : {
6157 4202 : assign_stmt = gimple_build_assign (endvar, e);
6158 4202 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6159 4202 : if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6160 3434 : assign_stmt = gimple_build_assign (fd->loop.v, e);
6161 : else
6162 768 : assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6163 4202 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6164 : }
6165 : /* Handle linear clause adjustments. */
6166 5819 : tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6167 5819 : if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6168 15806 : for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6169 15806 : c; c = OMP_CLAUSE_CHAIN (c))
6170 13441 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6171 13441 : && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6172 : {
6173 68 : tree d = OMP_CLAUSE_DECL (c);
6174 68 : tree t = d, a, dest;
6175 68 : if (omp_privatize_by_reference (t))
6176 3 : t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6177 68 : tree type = TREE_TYPE (t);
6178 68 : if (POINTER_TYPE_P (type))
6179 0 : type = sizetype;
6180 68 : dest = unshare_expr (t);
6181 68 : tree v = create_tmp_var (TREE_TYPE (t), NULL);
6182 68 : expand_omp_build_assign (&gsif, v, t);
6183 68 : if (itercnt == NULL_TREE)
6184 : {
6185 68 : if (gimple_omp_for_combined_into_p (fd->for_stmt))
6186 : {
6187 0 : itercntbias
6188 0 : = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6189 : fold_convert (itype, fd->loop.n1));
6190 0 : itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6191 : itercntbias, step);
6192 0 : itercntbias
6193 0 : = force_gimple_operand_gsi (&gsif, itercntbias, true,
6194 : NULL_TREE, true,
6195 : GSI_SAME_STMT);
6196 0 : itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6197 0 : itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6198 : NULL_TREE, false,
6199 : GSI_CONTINUE_LINKING);
6200 : }
6201 : else
6202 : itercnt = s0;
6203 : }
6204 68 : a = fold_build2 (MULT_EXPR, type,
6205 : fold_convert (type, itercnt),
6206 : fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6207 68 : t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6208 : : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6209 68 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6210 : false, GSI_CONTINUE_LINKING);
6211 68 : expand_omp_build_assign (&gsi, dest, t, true);
6212 : }
6213 5819 : if (fd->collapse > 1)
6214 2232 : expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6215 :
6216 5819 : if (!broken_loop)
6217 : {
6218 : /* The code controlling the sequential loop goes in CONT_BB,
6219 : replacing the GIMPLE_OMP_CONTINUE. */
6220 5562 : gsi = gsi_last_nondebug_bb (cont_bb);
6221 5562 : gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6222 5562 : vmain = gimple_omp_continue_control_use (cont_stmt);
6223 5562 : vback = gimple_omp_continue_control_def (cont_stmt);
6224 :
6225 5562 : if (cond_var)
6226 : {
6227 49 : tree itype = TREE_TYPE (cond_var);
6228 49 : tree t2;
6229 49 : if (POINTER_TYPE_P (type)
6230 49 : || TREE_CODE (n1) != INTEGER_CST
6231 43 : || fd->loop.cond_code != LT_EXPR)
6232 6 : t2 = build_int_cst (itype, 1);
6233 : else
6234 43 : t2 = fold_convert (itype, step);
6235 49 : t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6236 49 : t2 = force_gimple_operand_gsi (&gsi, t2, false,
6237 : NULL_TREE, true, GSI_SAME_STMT);
6238 49 : assign_stmt = gimple_build_assign (cond_var, t2);
6239 49 : gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6240 : }
6241 :
6242 5562 : if (!gimple_omp_for_combined_p (fd->for_stmt))
6243 : {
6244 1360 : if (POINTER_TYPE_P (type))
6245 96 : t = fold_build_pointer_plus (vmain, step);
6246 : else
6247 1264 : t = fold_build2 (PLUS_EXPR, type, vmain, step);
6248 1360 : t = force_gimple_operand_gsi (&gsi, t,
6249 1360 : DECL_P (vback)
6250 1360 : && TREE_ADDRESSABLE (vback), NULL_TREE,
6251 : true, GSI_SAME_STMT);
6252 1360 : assign_stmt = gimple_build_assign (vback, t);
6253 1360 : gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6254 :
6255 1360 : if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6256 94 : t = build2 (EQ_EXPR, boolean_type_node,
6257 : build_int_cst (itype, 0),
6258 : build_int_cst (itype, 1));
6259 : else
6260 1266 : t = build2 (fd->loop.cond_code, boolean_type_node,
6261 1266 : DECL_P (vback) && TREE_ADDRESSABLE (vback)
6262 : ? t : vback, e);
6263 1360 : gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6264 : }
6265 :
6266 : /* Remove GIMPLE_OMP_CONTINUE. */
6267 5562 : gsi_remove (&gsi, true);
6268 :
6269 5562 : if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6270 449 : collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6271 :
6272 : /* Trip update code goes into TRIP_UPDATE_BB. */
6273 5562 : gsi = gsi_start_bb (trip_update_bb);
6274 :
6275 5562 : t = build_int_cst (itype, 1);
6276 5562 : t = build2 (PLUS_EXPR, itype, trip_main, t);
6277 5562 : assign_stmt = gimple_build_assign (trip_back, t);
6278 5562 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6279 : }
6280 :
6281 : /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6282 5819 : gsi = gsi_last_nondebug_bb (exit_bb);
6283 5819 : if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6284 : {
6285 300 : t = gimple_omp_return_lhs (gsi_stmt (gsi));
6286 300 : if (fd->have_reductemp || fd->have_pointer_condtemp)
6287 : {
6288 35 : tree fn;
6289 35 : if (t)
6290 0 : fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6291 : else
6292 35 : fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6293 35 : gcall *g = gimple_build_call (fn, 0);
6294 35 : if (t)
6295 : {
6296 0 : gimple_call_set_lhs (g, t);
6297 0 : if (fd->have_reductemp)
6298 0 : gsi_insert_after (&gsi, gimple_build_assign (reductions,
6299 : NOP_EXPR, t),
6300 : GSI_SAME_STMT);
6301 : }
6302 35 : gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6303 35 : }
6304 : else
6305 265 : gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6306 : }
6307 5519 : else if (fd->have_pointer_condtemp)
6308 : {
6309 9 : tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6310 9 : gcall *g = gimple_build_call (fn, 0);
6311 9 : gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6312 : }
6313 5819 : gsi_remove (&gsi, true);
6314 :
6315 : /* Connect the new blocks. */
6316 5819 : find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6317 5819 : find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6318 :
6319 5819 : if (!broken_loop)
6320 : {
6321 5562 : se = find_edge (cont_bb, body_bb);
6322 5562 : if (se == NULL)
6323 : {
6324 12 : se = BRANCH_EDGE (cont_bb);
6325 12 : gcc_assert (single_succ (se->dest) == body_bb);
6326 : }
6327 5562 : if (gimple_omp_for_combined_p (fd->for_stmt))
6328 : {
6329 4202 : remove_edge (se);
6330 4202 : se = NULL;
6331 : }
6332 1360 : else if (fd->collapse > 1)
6333 : {
6334 449 : remove_edge (se);
6335 449 : se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6336 : }
6337 : else
6338 911 : se->flags = EDGE_TRUE_VALUE;
6339 10675 : find_edge (cont_bb, trip_update_bb)->flags
6340 449 : = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6341 :
6342 5562 : redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6343 : iter_part_bb);
6344 : }
6345 :
6346 5819 : if (gimple_in_ssa_p (cfun))
6347 : {
6348 12 : gphi_iterator psi;
6349 12 : gphi *phi;
6350 12 : edge re, ene;
6351 12 : edge_var_map *vm;
6352 12 : size_t i;
6353 :
6354 12 : gcc_assert (fd->collapse == 1 && !broken_loop);
6355 :
6356 : /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6357 : remove arguments of the phi nodes in fin_bb. We need to create
6358 : appropriate phi nodes in iter_part_bb instead. */
6359 12 : se = find_edge (iter_part_bb, fin_bb);
6360 12 : re = single_succ_edge (trip_update_bb);
6361 12 : vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6362 12 : ene = single_succ_edge (entry_bb);
6363 :
6364 12 : psi = gsi_start_phis (fin_bb);
6365 21 : for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6366 9 : gsi_next (&psi), ++i)
6367 : {
6368 9 : gphi *nphi;
6369 9 : location_t locus;
6370 :
6371 9 : phi = psi.phi ();
6372 9 : if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6373 9 : redirect_edge_var_map_def (vm), 0))
6374 0 : continue;
6375 :
6376 9 : t = gimple_phi_result (phi);
6377 9 : gcc_assert (t == redirect_edge_var_map_result (vm));
6378 :
6379 9 : if (!single_pred_p (fin_bb))
6380 1 : t = copy_ssa_name (t, phi);
6381 :
6382 9 : nphi = create_phi_node (t, iter_part_bb);
6383 :
6384 9 : t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6385 9 : locus = gimple_phi_arg_location_from_edge (phi, se);
6386 :
6387 : /* A special case -- fd->loop.v is not yet computed in
6388 : iter_part_bb, we need to use vextra instead. */
6389 9 : if (t == fd->loop.v)
6390 0 : t = vextra;
6391 9 : add_phi_arg (nphi, t, ene, locus);
6392 9 : locus = redirect_edge_var_map_location (vm);
6393 9 : tree back_arg = redirect_edge_var_map_def (vm);
6394 9 : add_phi_arg (nphi, back_arg, re, locus);
6395 9 : edge ce = find_edge (cont_bb, body_bb);
6396 9 : if (ce == NULL)
6397 : {
6398 9 : ce = BRANCH_EDGE (cont_bb);
6399 9 : gcc_assert (single_succ (ce->dest) == body_bb);
6400 : ce = single_succ_edge (ce->dest);
6401 : }
6402 9 : gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6403 9 : gcc_assert (inner_loop_phi != NULL);
6404 9 : add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6405 : find_edge (seq_start_bb, body_bb), locus);
6406 :
6407 9 : if (!single_pred_p (fin_bb))
6408 1 : add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6409 : }
6410 21 : gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6411 12 : redirect_edge_var_map_clear (re);
6412 12 : if (single_pred_p (fin_bb))
6413 27 : while (1)
6414 : {
6415 19 : psi = gsi_start_phis (fin_bb);
6416 19 : if (gsi_end_p (psi))
6417 : break;
6418 8 : remove_phi_node (&psi, false);
6419 : }
6420 :
6421 : /* Make phi node for trip. */
6422 12 : phi = create_phi_node (trip_main, iter_part_bb);
6423 12 : add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6424 : UNKNOWN_LOCATION);
6425 12 : add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6426 : UNKNOWN_LOCATION);
6427 : }
6428 :
6429 5819 : if (!broken_loop)
6430 5562 : set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6431 5819 : set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6432 : recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6433 5819 : set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6434 : recompute_dominator (CDI_DOMINATORS, fin_bb));
6435 5819 : set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6436 : recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6437 5819 : set_immediate_dominator (CDI_DOMINATORS, body_bb,
6438 : recompute_dominator (CDI_DOMINATORS, body_bb));
6439 :
6440 5819 : if (!broken_loop)
6441 : {
6442 5562 : class loop *loop = body_bb->loop_father;
6443 5562 : class loop *trip_loop = alloc_loop ();
6444 5562 : trip_loop->header = iter_part_bb;
6445 5562 : trip_loop->latch = trip_update_bb;
6446 5562 : add_loop (trip_loop, iter_part_bb->loop_father);
6447 :
6448 5562 : if (loop != entry_bb->loop_father)
6449 : {
6450 12 : gcc_assert (loop->header == body_bb);
6451 12 : gcc_assert (loop->latch == region->cont
6452 : || single_pred (loop->latch) == region->cont);
6453 12 : trip_loop->inner = loop;
6454 12 : return;
6455 : }
6456 :
6457 5550 : if (!gimple_omp_for_combined_p (fd->for_stmt))
6458 : {
6459 1348 : loop = alloc_loop ();
6460 1348 : loop->header = body_bb;
6461 1348 : if (collapse_bb == NULL)
6462 899 : loop->latch = cont_bb;
6463 1348 : add_loop (loop, trip_loop);
6464 : }
6465 : }
6466 : }
6467 :
6468 : /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6469 : loop. Given parameters:
6470 :
6471 : for (V = N1; V cond N2; V += STEP) BODY;
6472 :
6473 : where COND is "<" or ">", we generate pseudocode
6474 :
6475 : V = N1;
6476 : goto L1;
6477 : L0:
6478 : BODY;
6479 : V += STEP;
6480 : L1:
6481 : if (V cond N2) goto L0; else goto L2;
6482 : L2:
6483 :
6484 : For collapsed loops, emit the outer loops as scalar
6485 : and only try to vectorize the innermost loop. */
6486 :
6487 : static void
6488 9355 : expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6489 : {
6490 9355 : tree type, t;
6491 9355 : basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6492 9355 : gimple_stmt_iterator gsi;
6493 9355 : gimple *stmt;
6494 9355 : gcond *cond_stmt;
6495 9355 : bool broken_loop = region->cont == NULL;
6496 9355 : edge e, ne;
6497 9355 : tree *counts = NULL;
6498 9355 : int i;
6499 9355 : int safelen_int = INT_MAX;
6500 9355 : bool dont_vectorize = false;
6501 9355 : tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6502 : OMP_CLAUSE_SAFELEN);
6503 9355 : tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6504 : OMP_CLAUSE__SIMDUID_);
6505 9355 : tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6506 : OMP_CLAUSE_IF);
6507 9355 : tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6508 : OMP_CLAUSE_SIMDLEN);
6509 9355 : tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6510 : OMP_CLAUSE__CONDTEMP_);
6511 9355 : tree n1, n2;
6512 9432 : tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6513 :
6514 9355 : if (safelen)
6515 : {
6516 5351 : poly_uint64 val;
6517 5351 : safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6518 5351 : if (!poly_int_tree_p (safelen, &val))
6519 : safelen_int = 0;
6520 : else
6521 5336 : safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6522 5336 : if (safelen_int == 1)
6523 1782 : safelen_int = 0;
6524 : }
6525 759 : if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6526 9990 : || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6527 : {
6528 : safelen_int = 0;
6529 : dont_vectorize = true;
6530 : }
6531 9355 : type = TREE_TYPE (fd->loop.v);
6532 9355 : entry_bb = region->entry;
6533 9355 : cont_bb = region->cont;
6534 9355 : gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6535 9355 : gcc_assert (broken_loop
6536 : || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6537 9355 : l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6538 9355 : if (!broken_loop)
6539 : {
6540 8283 : gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6541 8283 : gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6542 8283 : l1_bb = split_block (cont_bb, last_nondebug_stmt (cont_bb))->dest;
6543 8283 : l2_bb = BRANCH_EDGE (entry_bb)->dest;
6544 : }
6545 : else
6546 : {
6547 1072 : BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6548 1072 : l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6549 1072 : l2_bb = single_succ (l1_bb);
6550 : }
6551 9355 : exit_bb = region->exit;
6552 9355 : l2_dom_bb = NULL;
6553 :
6554 9355 : gsi = gsi_last_nondebug_bb (entry_bb);
6555 :
6556 9355 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6557 : /* Not needed in SSA form right now. */
6558 9355 : gcc_assert (!gimple_in_ssa_p (cfun));
6559 9355 : if (fd->collapse > 1
6560 9355 : && (gimple_omp_for_combined_into_p (fd->for_stmt)
6561 496 : || broken_loop))
6562 : {
6563 2277 : int first_zero_iter = -1, dummy = -1;
6564 2277 : basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6565 :
6566 2277 : counts = XALLOCAVEC (tree, fd->collapse);
6567 2277 : expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6568 : zero_iter_bb, first_zero_iter,
6569 : dummy_bb, dummy, l2_dom_bb);
6570 : }
6571 9355 : if (l2_dom_bb == NULL)
6572 9347 : l2_dom_bb = l1_bb;
6573 :
6574 9355 : n1 = fd->loop.n1;
6575 9355 : n2 = fd->loop.n2;
6576 9355 : if (gimple_omp_for_combined_into_p (fd->for_stmt))
6577 : {
6578 7004 : tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6579 : OMP_CLAUSE__LOOPTEMP_);
6580 7004 : gcc_assert (innerc);
6581 7004 : n1 = OMP_CLAUSE_DECL (innerc);
6582 7004 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6583 : OMP_CLAUSE__LOOPTEMP_);
6584 7004 : gcc_assert (innerc);
6585 7004 : n2 = OMP_CLAUSE_DECL (innerc);
6586 : }
6587 9355 : tree step = fd->loop.step;
6588 9355 : tree orig_step = step; /* May be different from step if is_simt. */
6589 :
6590 9355 : bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6591 9355 : OMP_CLAUSE__SIMT_);
6592 9355 : if (is_simt)
6593 : {
6594 0 : cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6595 0 : is_simt = safelen_int > 1;
6596 : }
6597 9355 : tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6598 9355 : if (is_simt)
6599 : {
6600 0 : simt_lane = create_tmp_var (unsigned_type_node);
6601 0 : gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6602 0 : gimple_call_set_lhs (g, simt_lane);
6603 0 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6604 0 : tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6605 : fold_convert (TREE_TYPE (step), simt_lane));
6606 0 : n1 = fold_convert (type, n1);
6607 0 : if (POINTER_TYPE_P (type))
6608 0 : n1 = fold_build_pointer_plus (n1, offset);
6609 : else
6610 0 : n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6611 :
6612 : /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6613 0 : if (fd->collapse > 1)
6614 0 : simt_maxlane = build_one_cst (unsigned_type_node);
6615 0 : else if (safelen_int < omp_max_simt_vf ())
6616 0 : simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6617 0 : tree vf
6618 0 : = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6619 : unsigned_type_node, 0);
6620 0 : if (simt_maxlane)
6621 0 : vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6622 0 : vf = fold_convert (TREE_TYPE (step), vf);
6623 0 : step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6624 : }
6625 :
6626 9355 : tree n2var = NULL_TREE;
6627 9355 : tree n2v = NULL_TREE;
6628 9355 : tree *nonrect_bounds = NULL;
6629 9355 : tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6630 9355 : if (fd->collapse > 1)
6631 : {
6632 2753 : if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6633 : {
6634 2277 : if (fd->non_rect)
6635 : {
6636 99 : nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6637 99 : memset (nonrect_bounds, 0,
6638 : sizeof (tree) * (fd->last_nonrect + 1));
6639 : }
6640 2277 : expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6641 2277 : gcc_assert (entry_bb == gsi_bb (gsi));
6642 2277 : gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6643 2277 : gsi_prev (&gsi);
6644 2277 : entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6645 2277 : expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6646 : NULL, n1);
6647 2277 : gsi = gsi_for_stmt (fd->for_stmt);
6648 : }
6649 2753 : if (broken_loop)
6650 : ;
6651 2217 : else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6652 : {
6653 : /* Compute in n2var the limit for the first innermost loop,
6654 : i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6655 : where cnt is how many iterations would the loop have if
6656 : all further iterations were assigned to the current task. */
6657 1741 : n2var = create_tmp_var (type);
6658 1741 : i = fd->collapse - 1;
6659 1741 : tree itype = TREE_TYPE (fd->loops[i].v);
6660 1741 : if (POINTER_TYPE_P (itype))
6661 266 : itype = signed_type_for (itype);
6662 1741 : t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6663 2511 : ? -1 : 1));
6664 1741 : t = fold_build2 (PLUS_EXPR, itype,
6665 : fold_convert (itype, fd->loops[i].step), t);
6666 1741 : t = fold_build2 (PLUS_EXPR, itype, t,
6667 : fold_convert (itype, fd->loops[i].n2));
6668 1741 : if (fd->loops[i].m2)
6669 : {
6670 26 : tree t2 = fold_convert (itype,
6671 : fd->loops[i - fd->loops[i].outer].v);
6672 26 : tree t3 = fold_convert (itype, fd->loops[i].m2);
6673 26 : t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6674 26 : t = fold_build2 (PLUS_EXPR, itype, t, t2);
6675 : }
6676 1741 : t = fold_build2 (MINUS_EXPR, itype, t,
6677 : fold_convert (itype, fd->loops[i].v));
6678 1741 : if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6679 256 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
6680 : fold_build1 (NEGATE_EXPR, itype, t),
6681 : fold_build1 (NEGATE_EXPR, itype,
6682 : fold_convert (itype,
6683 : fd->loops[i].step)));
6684 : else
6685 1485 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6686 : fold_convert (itype, fd->loops[i].step));
6687 1741 : t = fold_convert (type, t);
6688 1741 : tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6689 1741 : min_arg1 = create_tmp_var (type);
6690 1741 : expand_omp_build_assign (&gsi, min_arg1, t2);
6691 1741 : min_arg2 = create_tmp_var (type);
6692 1741 : expand_omp_build_assign (&gsi, min_arg2, t);
6693 : }
6694 : else
6695 : {
6696 476 : if (TREE_CODE (n2) == INTEGER_CST)
6697 : {
6698 : /* Indicate for lastprivate handling that at least one iteration
6699 : has been performed, without wasting runtime. */
6700 138 : if (integer_nonzerop (n2))
6701 130 : expand_omp_build_assign (&gsi, fd->loop.v,
6702 : fold_convert (type, n2));
6703 : else
6704 : /* Indicate that no iteration has been performed. */
6705 8 : expand_omp_build_assign (&gsi, fd->loop.v,
6706 : build_one_cst (type));
6707 : }
6708 : else
6709 : {
6710 338 : expand_omp_build_assign (&gsi, fd->loop.v,
6711 : build_zero_cst (type));
6712 338 : expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6713 : }
6714 476 : for (i = 0; i < fd->collapse; i++)
6715 : {
6716 476 : t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6717 476 : if (fd->loops[i].m1)
6718 : {
6719 0 : tree t2
6720 0 : = fold_convert (TREE_TYPE (t),
6721 : fd->loops[i - fd->loops[i].outer].v);
6722 0 : tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6723 0 : t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6724 0 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6725 : }
6726 476 : expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6727 : /* For normal non-combined collapsed loops just initialize
6728 : the outermost iterator in the entry_bb. */
6729 476 : if (!broken_loop)
6730 : break;
6731 : }
6732 : }
6733 : }
6734 : else
6735 6602 : expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6736 9355 : tree altv = NULL_TREE, altn2 = NULL_TREE;
6737 9355 : if (fd->collapse == 1
6738 6602 : && !broken_loop
6739 6066 : && TREE_CODE (orig_step) != INTEGER_CST)
6740 : {
6741 : /* The vectorizer currently punts on loops with non-constant steps
6742 : for the main IV (can't compute number of iterations and gives up
6743 : because of that). As for OpenMP loops it is always possible to
6744 : compute the number of iterations upfront, use an alternate IV
6745 : as the loop iterator:
6746 : altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6747 : for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6748 176 : altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6749 176 : expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6750 176 : tree itype = TREE_TYPE (fd->loop.v);
6751 176 : if (POINTER_TYPE_P (itype))
6752 0 : itype = signed_type_for (itype);
6753 334 : t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6754 176 : t = fold_build2 (PLUS_EXPR, itype,
6755 : fold_convert (itype, step), t);
6756 176 : t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6757 176 : t = fold_build2 (MINUS_EXPR, itype, t,
6758 : fold_convert (itype, fd->loop.v));
6759 176 : if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6760 0 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
6761 : fold_build1 (NEGATE_EXPR, itype, t),
6762 : fold_build1 (NEGATE_EXPR, itype,
6763 : fold_convert (itype, step)));
6764 : else
6765 176 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6766 : fold_convert (itype, step));
6767 176 : t = fold_convert (TREE_TYPE (altv), t);
6768 176 : altn2 = create_tmp_var (TREE_TYPE (altv));
6769 176 : expand_omp_build_assign (&gsi, altn2, t);
6770 176 : tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6771 176 : t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6772 176 : t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6773 : true, GSI_SAME_STMT);
6774 176 : gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6775 176 : build_zero_cst (TREE_TYPE (altv)));
6776 176 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6777 176 : }
6778 9179 : else if (fd->collapse > 1
6779 2753 : && !broken_loop
6780 2217 : && !gimple_omp_for_combined_into_p (fd->for_stmt)
6781 9655 : && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6782 : {
6783 22 : altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6784 22 : altn2 = create_tmp_var (TREE_TYPE (altv));
6785 : }
6786 9355 : if (cond_var)
6787 : {
6788 77 : if (POINTER_TYPE_P (type)
6789 77 : || TREE_CODE (n1) != INTEGER_CST
6790 15 : || fd->loop.cond_code != LT_EXPR
6791 92 : || tree_int_cst_sgn (n1) != 1)
6792 72 : expand_omp_build_assign (&gsi, cond_var,
6793 72 : build_one_cst (TREE_TYPE (cond_var)));
6794 : else
6795 5 : expand_omp_build_assign (&gsi, cond_var,
6796 5 : fold_convert (TREE_TYPE (cond_var), n1));
6797 : }
6798 :
6799 : /* Remove the GIMPLE_OMP_FOR statement. */
6800 9355 : gsi_remove (&gsi, true);
6801 :
6802 9355 : if (!broken_loop)
6803 : {
6804 : /* Code to control the increment goes in the CONT_BB. */
6805 8283 : gsi = gsi_last_nondebug_bb (cont_bb);
6806 8283 : stmt = gsi_stmt (gsi);
6807 8283 : gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6808 :
6809 8283 : if (fd->collapse == 1
6810 8283 : || gimple_omp_for_combined_into_p (fd->for_stmt))
6811 : {
6812 7807 : if (POINTER_TYPE_P (type))
6813 286 : t = fold_build_pointer_plus (fd->loop.v, step);
6814 : else
6815 7521 : t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6816 7807 : expand_omp_build_assign (&gsi, fd->loop.v, t);
6817 : }
6818 476 : else if (TREE_CODE (n2) != INTEGER_CST)
6819 338 : expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6820 8283 : if (altv)
6821 : {
6822 198 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6823 : build_one_cst (TREE_TYPE (altv)));
6824 198 : expand_omp_build_assign (&gsi, altv, t);
6825 : }
6826 :
6827 8283 : if (fd->collapse > 1)
6828 : {
6829 2217 : i = fd->collapse - 1;
6830 2217 : if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6831 280 : t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6832 : else
6833 : {
6834 1937 : t = fold_convert (TREE_TYPE (fd->loops[i].v),
6835 : fd->loops[i].step);
6836 1937 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6837 : fd->loops[i].v, t);
6838 : }
6839 2217 : expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6840 : }
6841 8283 : if (cond_var)
6842 : {
6843 77 : if (POINTER_TYPE_P (type)
6844 77 : || TREE_CODE (n1) != INTEGER_CST
6845 15 : || fd->loop.cond_code != LT_EXPR
6846 92 : || tree_int_cst_sgn (n1) != 1)
6847 72 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6848 : build_one_cst (TREE_TYPE (cond_var)));
6849 : else
6850 5 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6851 : fold_convert (TREE_TYPE (cond_var), step));
6852 77 : expand_omp_build_assign (&gsi, cond_var, t);
6853 : }
6854 :
6855 : /* Remove GIMPLE_OMP_CONTINUE. */
6856 8283 : gsi_remove (&gsi, true);
6857 : }
6858 :
6859 : /* Emit the condition in L1_BB. */
6860 9355 : gsi = gsi_start_bb (l1_bb);
6861 :
6862 9355 : if (altv)
6863 198 : t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6864 9157 : else if (fd->collapse > 1
6865 2731 : && !gimple_omp_for_combined_into_p (fd->for_stmt)
6866 9631 : && !broken_loop)
6867 : {
6868 454 : i = fd->collapse - 1;
6869 454 : tree itype = TREE_TYPE (fd->loops[i].v);
6870 454 : if (fd->loops[i].m2)
6871 203 : t = n2v = create_tmp_var (itype);
6872 : else
6873 251 : t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6874 454 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6875 : false, GSI_CONTINUE_LINKING);
6876 454 : tree v = fd->loops[i].v;
6877 454 : if (DECL_P (v) && TREE_ADDRESSABLE (v))
6878 0 : v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6879 : false, GSI_CONTINUE_LINKING);
6880 454 : t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6881 : }
6882 : else
6883 : {
6884 8703 : if (fd->collapse > 1 && !broken_loop)
6885 : t = n2var;
6886 : else
6887 6962 : t = fold_convert (type, unshare_expr (n2));
6888 8703 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6889 : false, GSI_CONTINUE_LINKING);
6890 8703 : tree v = fd->loop.v;
6891 8703 : if (DECL_P (v) && TREE_ADDRESSABLE (v))
6892 0 : v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6893 : false, GSI_CONTINUE_LINKING);
6894 8703 : t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6895 : }
6896 9355 : cond_stmt = gimple_build_cond_empty (t);
6897 9355 : gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6898 9355 : if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6899 : NULL, NULL)
6900 9355 : || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6901 : NULL, NULL))
6902 : {
6903 0 : gsi = gsi_for_stmt (cond_stmt);
6904 0 : gimple_regimplify_operands (cond_stmt, &gsi);
6905 : }
6906 :
6907 : /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6908 9355 : if (is_simt)
6909 : {
6910 0 : gsi = gsi_start_bb (l2_bb);
6911 0 : step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6912 0 : if (POINTER_TYPE_P (type))
6913 0 : t = fold_build_pointer_plus (fd->loop.v, step);
6914 : else
6915 0 : t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6916 0 : expand_omp_build_assign (&gsi, fd->loop.v, t);
6917 : }
6918 :
6919 : /* Remove GIMPLE_OMP_RETURN. */
6920 9355 : gsi = gsi_last_nondebug_bb (exit_bb);
6921 9355 : gsi_remove (&gsi, true);
6922 :
6923 : /* Connect the new blocks. */
6924 9355 : remove_edge (FALLTHRU_EDGE (entry_bb));
6925 :
6926 9355 : if (!broken_loop)
6927 : {
6928 8283 : remove_edge (BRANCH_EDGE (entry_bb));
6929 8283 : make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6930 :
6931 8283 : e = BRANCH_EDGE (l1_bb);
6932 8283 : ne = FALLTHRU_EDGE (l1_bb);
6933 8283 : e->flags = EDGE_TRUE_VALUE;
6934 : }
6935 : else
6936 : {
6937 1072 : single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6938 :
6939 1072 : ne = single_succ_edge (l1_bb);
6940 1072 : e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6941 :
6942 : }
6943 9355 : ne->flags = EDGE_FALSE_VALUE;
6944 9355 : e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6945 9355 : ne->probability = e->probability.invert ();
6946 :
6947 9355 : set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6948 9355 : set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6949 :
6950 9355 : if (simt_maxlane)
6951 : {
6952 0 : cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6953 : NULL_TREE, NULL_TREE);
6954 0 : gsi = gsi_last_bb (entry_bb);
6955 0 : gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6956 0 : make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6957 0 : FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6958 0 : FALLTHRU_EDGE (entry_bb)->probability
6959 0 : = profile_probability::guessed_always ().apply_scale (7, 8);
6960 0 : BRANCH_EDGE (entry_bb)->probability
6961 0 : = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6962 0 : l2_dom_bb = entry_bb;
6963 : }
6964 9355 : set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6965 :
6966 9355 : if (!broken_loop && fd->collapse > 1)
6967 : {
6968 2217 : basic_block last_bb = l1_bb;
6969 2217 : basic_block init_bb = NULL;
6970 6108 : for (i = fd->collapse - 2; i >= 0; i--)
6971 : {
6972 3891 : tree nextn2v = NULL_TREE;
6973 3891 : if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6974 : e = EDGE_SUCC (last_bb, 0);
6975 : else
6976 2217 : e = EDGE_SUCC (last_bb, 1);
6977 3891 : basic_block bb = split_edge (e);
6978 3891 : if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6979 553 : t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
6980 : else
6981 : {
6982 3338 : t = fold_convert (TREE_TYPE (fd->loops[i].v),
6983 : fd->loops[i].step);
6984 3338 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6985 : fd->loops[i].v, t);
6986 : }
6987 3891 : gsi = gsi_after_labels (bb);
6988 3891 : expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6989 :
6990 3891 : bb = split_block (bb, last_nondebug_stmt (bb))->dest;
6991 3891 : gsi = gsi_start_bb (bb);
6992 3891 : tree itype = TREE_TYPE (fd->loops[i].v);
6993 3891 : if (fd->loops[i].m2)
6994 0 : t = nextn2v = create_tmp_var (itype);
6995 : else
6996 3891 : t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6997 3891 : t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6998 : false, GSI_CONTINUE_LINKING);
6999 3891 : tree v = fd->loops[i].v;
7000 3891 : if (DECL_P (v) && TREE_ADDRESSABLE (v))
7001 0 : v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
7002 : false, GSI_CONTINUE_LINKING);
7003 3891 : t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
7004 3891 : cond_stmt = gimple_build_cond_empty (t);
7005 3891 : gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
7006 3891 : if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
7007 : expand_omp_regimplify_p, NULL, NULL)
7008 3891 : || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
7009 : expand_omp_regimplify_p, NULL, NULL))
7010 : {
7011 6 : gsi = gsi_for_stmt (cond_stmt);
7012 6 : gimple_regimplify_operands (cond_stmt, &gsi);
7013 : }
7014 3891 : ne = single_succ_edge (bb);
7015 3891 : ne->flags = EDGE_FALSE_VALUE;
7016 :
7017 3891 : init_bb = create_empty_bb (bb);
7018 3891 : set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
7019 3891 : add_bb_to_loop (init_bb, bb->loop_father);
7020 3891 : e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
7021 3891 : e->probability
7022 3891 : = profile_probability::guessed_always ().apply_scale (7, 8);
7023 3891 : ne->probability = e->probability.invert ();
7024 :
7025 3891 : gsi = gsi_after_labels (init_bb);
7026 3891 : if (fd->loops[i + 1].m1)
7027 : {
7028 242 : tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7029 : fd->loops[i + 1
7030 : - fd->loops[i + 1].outer].v);
7031 242 : if (POINTER_TYPE_P (TREE_TYPE (t2)))
7032 6 : t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1);
7033 : else
7034 : {
7035 236 : t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7036 : fd->loops[i + 1].n1);
7037 236 : tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
7038 236 : t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
7039 236 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
7040 : }
7041 : }
7042 : else
7043 3649 : t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7044 : fd->loops[i + 1].n1);
7045 3891 : expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
7046 3891 : if (fd->loops[i + 1].m2)
7047 : {
7048 241 : if (i + 2 == fd->collapse && (n2var || altv))
7049 : {
7050 38 : gcc_assert (n2v == NULL_TREE);
7051 38 : n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
7052 : }
7053 241 : tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7054 : fd->loops[i + 1
7055 : - fd->loops[i + 1].outer].v);
7056 241 : if (POINTER_TYPE_P (TREE_TYPE (t2)))
7057 6 : t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2);
7058 : else
7059 : {
7060 235 : t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7061 : fd->loops[i + 1].n2);
7062 235 : tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
7063 235 : t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
7064 235 : t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
7065 : }
7066 241 : expand_omp_build_assign (&gsi, n2v, t);
7067 : }
7068 3891 : if (i + 2 == fd->collapse && n2var)
7069 : {
7070 : /* For composite simd, n2 is the first iteration the current
7071 : task shouldn't already handle, so we effectively want to use
7072 : for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
7073 : as the vectorized loop. Except the vectorizer will not
7074 : vectorize that, so instead compute N2VAR as
7075 : N2VAR = V + MIN (N2 - V, COUNTS3) and use
7076 : for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
7077 : as the loop to vectorize. */
7078 1741 : tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
7079 1741 : if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
7080 : {
7081 99 : tree itype = TREE_TYPE (fd->loops[i].v);
7082 99 : if (POINTER_TYPE_P (itype))
7083 4 : itype = signed_type_for (itype);
7084 99 : t = build_int_cst (itype, (fd->loops[i + 1].cond_code
7085 99 : == LT_EXPR ? -1 : 1));
7086 99 : t = fold_build2 (PLUS_EXPR, itype,
7087 : fold_convert (itype,
7088 : fd->loops[i + 1].step), t);
7089 99 : if (fd->loops[i + 1].m2 == NULL_TREE)
7090 73 : t = fold_build2 (PLUS_EXPR, itype, t,
7091 : fold_convert (itype,
7092 : fd->loops[i + 1].n2));
7093 26 : else if (POINTER_TYPE_P (TREE_TYPE (n2v)))
7094 : {
7095 4 : t = fold_build_pointer_plus (n2v, t);
7096 4 : t = fold_convert (itype, t);
7097 : }
7098 : else
7099 22 : t = fold_build2 (PLUS_EXPR, itype, t, n2v);
7100 99 : t = fold_build2 (MINUS_EXPR, itype, t,
7101 : fold_convert (itype, fd->loops[i + 1].v));
7102 99 : tree step = fold_convert (itype, fd->loops[i + 1].step);
7103 99 : if (TYPE_UNSIGNED (itype)
7104 99 : && fd->loops[i + 1].cond_code == GT_EXPR)
7105 0 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
7106 : fold_build1 (NEGATE_EXPR, itype, t),
7107 : fold_build1 (NEGATE_EXPR, itype, step));
7108 : else
7109 99 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7110 99 : t = fold_convert (type, t);
7111 99 : }
7112 : else
7113 1642 : t = counts[i + 1];
7114 1741 : expand_omp_build_assign (&gsi, min_arg1, t2);
7115 1741 : expand_omp_build_assign (&gsi, min_arg2, t);
7116 1741 : e = split_block (init_bb, last_nondebug_stmt (init_bb));
7117 1741 : gsi = gsi_after_labels (e->dest);
7118 1741 : init_bb = e->dest;
7119 1741 : remove_edge (FALLTHRU_EDGE (entry_bb));
7120 1741 : make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
7121 1741 : set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
7122 1741 : set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
7123 1741 : t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
7124 1741 : t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
7125 1741 : expand_omp_build_assign (&gsi, n2var, t);
7126 : }
7127 3891 : if (i + 2 == fd->collapse && altv)
7128 : {
7129 : /* The vectorizer currently punts on loops with non-constant
7130 : steps for the main IV (can't compute number of iterations
7131 : and gives up because of that). As for OpenMP loops it is
7132 : always possible to compute the number of iterations upfront,
7133 : use an alternate IV as the loop iterator. */
7134 22 : expand_omp_build_assign (&gsi, altv,
7135 22 : build_zero_cst (TREE_TYPE (altv)));
7136 22 : tree itype = TREE_TYPE (fd->loops[i + 1].v);
7137 22 : if (POINTER_TYPE_P (itype))
7138 0 : itype = signed_type_for (itype);
7139 22 : t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
7140 26 : ? -1 : 1));
7141 22 : t = fold_build2 (PLUS_EXPR, itype,
7142 : fold_convert (itype, fd->loops[i + 1].step), t);
7143 22 : t = fold_build2 (PLUS_EXPR, itype, t,
7144 : fold_convert (itype,
7145 : fd->loops[i + 1].m2
7146 : ? n2v : fd->loops[i + 1].n2));
7147 22 : t = fold_build2 (MINUS_EXPR, itype, t,
7148 : fold_convert (itype, fd->loops[i + 1].v));
7149 22 : tree step = fold_convert (itype, fd->loops[i + 1].step);
7150 22 : if (TYPE_UNSIGNED (itype)
7151 22 : && fd->loops[i + 1].cond_code == GT_EXPR)
7152 0 : t = fold_build2 (TRUNC_DIV_EXPR, itype,
7153 : fold_build1 (NEGATE_EXPR, itype, t),
7154 : fold_build1 (NEGATE_EXPR, itype, step));
7155 : else
7156 22 : t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7157 22 : t = fold_convert (TREE_TYPE (altv), t);
7158 22 : expand_omp_build_assign (&gsi, altn2, t);
7159 22 : tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7160 : fd->loops[i + 1].m2
7161 : ? n2v : fd->loops[i + 1].n2);
7162 22 : t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7163 : fd->loops[i + 1].v, t2);
7164 22 : t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7165 : true, GSI_SAME_STMT);
7166 22 : gassign *g
7167 22 : = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7168 22 : build_zero_cst (TREE_TYPE (altv)));
7169 22 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7170 : }
7171 3891 : n2v = nextn2v;
7172 :
7173 3891 : make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7174 3891 : if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7175 : {
7176 787 : e = find_edge (entry_bb, last_bb);
7177 787 : redirect_edge_succ (e, bb);
7178 787 : set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7179 787 : set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7180 : }
7181 :
7182 3891 : last_bb = bb;
7183 : }
7184 : }
7185 9355 : if (!broken_loop)
7186 : {
7187 8283 : class loop *loop = alloc_loop ();
7188 8283 : loop->header = l1_bb;
7189 8283 : loop->latch = cont_bb;
7190 8283 : add_loop (loop, l1_bb->loop_father);
7191 8283 : loop->safelen = safelen_int;
7192 8283 : if (simduid)
7193 : {
7194 3478 : loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7195 3478 : cfun->has_simduid_loops = true;
7196 : }
7197 : /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7198 : the loop. */
7199 8283 : if ((flag_tree_loop_vectorize
7200 3286 : || !OPTION_SET_P (flag_tree_loop_vectorize))
7201 8283 : && flag_tree_loop_optimize
7202 8282 : && loop->safelen > 1)
7203 : {
7204 6500 : loop->force_vectorize = true;
7205 6500 : if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7206 : {
7207 60 : unsigned HOST_WIDE_INT v
7208 60 : = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7209 60 : if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7210 60 : loop->simdlen = v;
7211 : }
7212 6500 : cfun->has_force_vectorize_loops = true;
7213 6500 : }
7214 1783 : else if (dont_vectorize)
7215 234 : loop->dont_vectorize = true;
7216 : }
7217 1072 : else if (simduid)
7218 532 : cfun->has_simduid_loops = true;
7219 9355 : }
7220 :
7221 : /* Taskloop construct is represented after gimplification with
7222 : two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7223 : in between them. This routine expands the outer GIMPLE_OMP_FOR,
7224 : which should just compute all the needed loop temporaries
7225 : for GIMPLE_OMP_TASK. */
7226 :
7227 : static void
7228 1330 : expand_omp_taskloop_for_outer (struct omp_region *region,
7229 : struct omp_for_data *fd,
7230 : gimple *inner_stmt)
7231 : {
7232 1330 : tree type, bias = NULL_TREE;
7233 1330 : basic_block entry_bb, cont_bb, exit_bb;
7234 1330 : gimple_stmt_iterator gsi;
7235 1330 : gassign *assign_stmt;
7236 1330 : tree *counts = NULL;
7237 1330 : int i;
7238 :
7239 1330 : gcc_assert (inner_stmt);
7240 1330 : gcc_assert (region->cont);
7241 1330 : gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7242 : && gimple_omp_task_taskloop_p (inner_stmt));
7243 1330 : type = TREE_TYPE (fd->loop.v);
7244 :
7245 : /* See if we need to bias by LLONG_MIN. */
7246 1330 : if (fd->iter_type == long_long_unsigned_type_node
7247 42 : && (TREE_CODE (type) == INTEGER_TYPE || TREE_CODE (type) == BITINT_TYPE)
7248 1358 : && !TYPE_UNSIGNED (type))
7249 : {
7250 0 : tree n1, n2;
7251 :
7252 0 : if (fd->loop.cond_code == LT_EXPR)
7253 : {
7254 0 : n1 = fd->loop.n1;
7255 0 : n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7256 : }
7257 : else
7258 : {
7259 0 : n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7260 0 : n2 = fd->loop.n1;
7261 : }
7262 0 : if (TREE_CODE (n1) != INTEGER_CST
7263 0 : || TREE_CODE (n2) != INTEGER_CST
7264 0 : || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7265 0 : bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7266 : }
7267 :
7268 1330 : entry_bb = region->entry;
7269 1330 : cont_bb = region->cont;
7270 1330 : gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7271 1330 : gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7272 1330 : exit_bb = region->exit;
7273 :
7274 1330 : gsi = gsi_last_nondebug_bb (entry_bb);
7275 1330 : gimple *for_stmt = gsi_stmt (gsi);
7276 1330 : gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7277 1330 : if (fd->collapse > 1)
7278 : {
7279 181 : int first_zero_iter = -1, dummy = -1;
7280 181 : basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7281 :
7282 181 : counts = XALLOCAVEC (tree, fd->collapse);
7283 181 : expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7284 : zero_iter_bb, first_zero_iter,
7285 : dummy_bb, dummy, l2_dom_bb);
7286 :
7287 181 : if (zero_iter_bb)
7288 : {
7289 : /* Some counts[i] vars might be uninitialized if
7290 : some loop has zero iterations. But the body shouldn't
7291 : be executed in that case, so just avoid uninit warnings. */
7292 336 : for (i = first_zero_iter; i < fd->collapse; i++)
7293 239 : if (SSA_VAR_P (counts[i]))
7294 226 : suppress_warning (counts[i], OPT_Wuninitialized);
7295 97 : gsi_prev (&gsi);
7296 97 : edge e = split_block (entry_bb, gsi_stmt (gsi));
7297 97 : entry_bb = e->dest;
7298 97 : make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7299 97 : gsi = gsi_last_bb (entry_bb);
7300 97 : set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7301 : get_immediate_dominator (CDI_DOMINATORS,
7302 : zero_iter_bb));
7303 : }
7304 : }
7305 :
7306 1330 : tree t0, t1;
7307 1330 : t1 = fd->loop.n2;
7308 1330 : t0 = fd->loop.n1;
7309 2646 : if (POINTER_TYPE_P (TREE_TYPE (t0))
7310 1330 : && TYPE_PRECISION (TREE_TYPE (t0))
7311 14 : != TYPE_PRECISION (fd->iter_type))
7312 : {
7313 : /* Avoid casting pointers to integer of a different size. */
7314 0 : tree itype = signed_type_for (type);
7315 0 : t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7316 0 : t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7317 : }
7318 : else
7319 : {
7320 1330 : t1 = fold_convert (fd->iter_type, t1);
7321 1330 : t0 = fold_convert (fd->iter_type, t0);
7322 : }
7323 1330 : if (bias)
7324 : {
7325 0 : t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7326 0 : t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7327 : }
7328 :
7329 1330 : tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7330 : OMP_CLAUSE__LOOPTEMP_);
7331 1330 : gcc_assert (innerc);
7332 1330 : tree startvar = OMP_CLAUSE_DECL (innerc);
7333 1330 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7334 1330 : gcc_assert (innerc);
7335 1330 : tree endvar = OMP_CLAUSE_DECL (innerc);
7336 1330 : if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7337 : {
7338 101 : innerc = find_lastprivate_looptemp (fd, innerc);
7339 101 : if (innerc)
7340 : {
7341 : /* If needed (inner taskloop has lastprivate clause), propagate
7342 : down the total number of iterations. */
7343 31 : tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7344 : NULL_TREE, false,
7345 : GSI_CONTINUE_LINKING);
7346 31 : assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7347 31 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7348 : }
7349 : }
7350 :
7351 1330 : t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7352 : GSI_CONTINUE_LINKING);
7353 1330 : assign_stmt = gimple_build_assign (startvar, t0);
7354 1330 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7355 :
7356 1330 : t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7357 : GSI_CONTINUE_LINKING);
7358 1330 : assign_stmt = gimple_build_assign (endvar, t1);
7359 1330 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7360 1330 : if (fd->collapse > 1)
7361 181 : expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7362 :
7363 : /* Remove the GIMPLE_OMP_FOR statement. */
7364 1330 : gsi = gsi_for_stmt (for_stmt);
7365 1330 : gsi_remove (&gsi, true);
7366 :
7367 1330 : gsi = gsi_last_nondebug_bb (cont_bb);
7368 1330 : gsi_remove (&gsi, true);
7369 :
7370 1330 : gsi = gsi_last_nondebug_bb (exit_bb);
7371 1330 : gsi_remove (&gsi, true);
7372 :
7373 1330 : FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7374 1330 : remove_edge (BRANCH_EDGE (entry_bb));
7375 1330 : FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7376 1330 : remove_edge (BRANCH_EDGE (cont_bb));
7377 1330 : set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7378 1330 : set_immediate_dominator (CDI_DOMINATORS, region->entry,
7379 : recompute_dominator (CDI_DOMINATORS, region->entry));
7380 1330 : }
7381 :
7382 : /* Taskloop construct is represented after gimplification with
7383 : two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7384 : in between them. This routine expands the inner GIMPLE_OMP_FOR.
7385 : GOMP_taskloop{,_ull} function arranges for each task to be given just
7386 : a single range of iterations. */
7387 :
7388 : static void
7389 1330 : expand_omp_taskloop_for_inner (struct omp_region *region,
7390 : struct omp_for_data *fd,
7391 : gimple *inner_stmt)
7392 : {
7393 1330 : tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7394 1330 : basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7395 1330 : basic_block fin_bb;
7396 1330 : gimple_stmt_iterator gsi;
7397 1330 : edge ep;
7398 1330 : bool broken_loop = region->cont == NULL;
7399 1330 : tree *counts = NULL;
7400 1330 : tree n1, n2, step;
7401 :
7402 1330 : itype = type = TREE_TYPE (fd->loop.v);
7403 1330 : if (POINTER_TYPE_P (type))
7404 14 : itype = signed_type_for (type);
7405 :
7406 : /* See if we need to bias by LLONG_MIN. */
7407 1330 : if (fd->iter_type == long_long_unsigned_type_node
7408 42 : && (TREE_CODE (type) == INTEGER_TYPE || TREE_CODE (type) == BITINT_TYPE)
7409 1358 : && !TYPE_UNSIGNED (type))
7410 : {
7411 0 : tree n1, n2;
7412 :
7413 0 : if (fd->loop.cond_code == LT_EXPR)
7414 : {
7415 0 : n1 = fd->loop.n1;
7416 0 : n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7417 : }
7418 : else
7419 : {
7420 0 : n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7421 0 : n2 = fd->loop.n1;
7422 : }
7423 0 : if (TREE_CODE (n1) != INTEGER_CST
7424 0 : || TREE_CODE (n2) != INTEGER_CST
7425 0 : || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7426 0 : bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7427 : }
7428 :
7429 1330 : entry_bb = region->entry;
7430 1330 : cont_bb = region->cont;
7431 1330 : gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7432 1330 : fin_bb = BRANCH_EDGE (entry_bb)->dest;
7433 1330 : gcc_assert (broken_loop
7434 : || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7435 1330 : body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7436 1330 : if (!broken_loop)
7437 : {
7438 1314 : gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7439 1314 : gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7440 : }
7441 1330 : exit_bb = region->exit;
7442 :
7443 : /* Iteration space partitioning goes in ENTRY_BB. */
7444 1330 : gsi = gsi_last_nondebug_bb (entry_bb);
7445 1330 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7446 :
7447 1330 : if (fd->collapse > 1)
7448 : {
7449 181 : int first_zero_iter = -1, dummy = -1;
7450 181 : basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7451 :
7452 181 : counts = XALLOCAVEC (tree, fd->collapse);
7453 181 : expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7454 : fin_bb, first_zero_iter,
7455 : dummy_bb, dummy, l2_dom_bb);
7456 181 : t = NULL_TREE;
7457 : }
7458 : else
7459 1330 : t = integer_one_node;
7460 :
7461 1330 : step = fd->loop.step;
7462 1330 : tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7463 : OMP_CLAUSE__LOOPTEMP_);
7464 1330 : gcc_assert (innerc);
7465 1330 : n1 = OMP_CLAUSE_DECL (innerc);
7466 1330 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7467 1330 : gcc_assert (innerc);
7468 1330 : n2 = OMP_CLAUSE_DECL (innerc);
7469 1330 : if (bias)
7470 : {
7471 0 : n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7472 0 : n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7473 : }
7474 1330 : n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7475 : true, NULL_TREE, true, GSI_SAME_STMT);
7476 1330 : n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7477 : true, NULL_TREE, true, GSI_SAME_STMT);
7478 1330 : step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7479 : true, NULL_TREE, true, GSI_SAME_STMT);
7480 :
7481 1330 : tree startvar = fd->loop.v;
7482 1330 : tree endvar = NULL_TREE;
7483 :
7484 1330 : if (gimple_omp_for_combined_p (fd->for_stmt))
7485 : {
7486 617 : tree clauses = gimple_omp_for_clauses (inner_stmt);
7487 617 : tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7488 617 : gcc_assert (innerc);
7489 617 : startvar = OMP_CLAUSE_DECL (innerc);
7490 617 : innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7491 : OMP_CLAUSE__LOOPTEMP_);
7492 617 : gcc_assert (innerc);
7493 617 : endvar = OMP_CLAUSE_DECL (innerc);
7494 : }
7495 1330 : t = fold_convert (TREE_TYPE (startvar), n1);
7496 1330 : t = force_gimple_operand_gsi (&gsi, t,
7497 1330 : DECL_P (startvar)
7498 1330 : && TREE_ADDRESSABLE (startvar),
7499 : NULL_TREE, false, GSI_CONTINUE_LINKING);
7500 1330 : gimple *assign_stmt = gimple_build_assign (startvar, t);
7501 1330 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7502 :
7503 1330 : t = fold_convert (TREE_TYPE (startvar), n2);
7504 1330 : e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7505 : false, GSI_CONTINUE_LINKING);
7506 1330 : if (endvar)
7507 : {
7508 617 : assign_stmt = gimple_build_assign (endvar, e);
7509 617 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7510 617 : if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7511 529 : assign_stmt = gimple_build_assign (fd->loop.v, e);
7512 : else
7513 88 : assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7514 617 : gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7515 : }
7516 :
7517 1330 : tree *nonrect_bounds = NULL;
7518 1330 : if (fd->collapse > 1)
7519 : {
7520 181 : if (fd->non_rect)
7521 : {
7522 19 : nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7523 19 : memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7524 : }
7525 181 : gcc_assert (gsi_bb (gsi) == entry_bb);
7526 181 : expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7527 : startvar);
7528 181 : entry_bb = gsi_bb (gsi);
7529 : }
7530 :
7531 1330 : if (!broken_loop)
7532 : {
7533 : /* The code controlling the sequential loop replaces the
7534 : GIMPLE_OMP_CONTINUE. */
7535 1314 : gsi = gsi_last_nondebug_bb (cont_bb);
7536 1314 : gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7537 1314 : gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7538 1314 : vmain = gimple_omp_continue_control_use (cont_stmt);
7539 1314 : vback = gimple_omp_continue_control_def (cont_stmt);
7540 :
7541 1314 : if (!gimple_omp_for_combined_p (fd->for_stmt))
7542 : {
7543 697 : if (POINTER_TYPE_P (type))
7544 8 : t = fold_build_pointer_plus (vmain, step);
7545 : else
7546 689 : t = fold_build2 (PLUS_EXPR, type, vmain, step);
7547 697 : t = force_gimple_operand_gsi (&gsi, t,
7548 697 : DECL_P (vback)
7549 697 : && TREE_ADDRESSABLE (vback),
7550 : NULL_TREE, true, GSI_SAME_STMT);
7551 697 : assign_stmt = gimple_build_assign (vback, t);
7552 697 : gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7553 :
7554 697 : t = build2 (fd->loop.cond_code, boolean_type_node,
7555 697 : DECL_P (vback) && TREE_ADDRESSABLE (vback)
7556 : ? t : vback, e);
7557 697 : gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7558 : }
7559 :
7560 : /* Remove the GIMPLE_OMP_CONTINUE statement. */
7561 1314 : gsi_remove (&gsi, true);
7562 :
7563 1314 : if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7564 81 : collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7565 : cont_bb, body_bb);
7566 : }
7567 :
7568 : /* Remove the GIMPLE_OMP_FOR statement. */
7569 1330 : gsi = gsi_for_stmt (fd->for_stmt);
7570 1330 : gsi_remove (&gsi, true);
7571 :
7572 : /* Remove the GIMPLE_OMP_RETURN statement. */
7573 1330 : gsi = gsi_last_nondebug_bb (exit_bb);
7574 1330 : gsi_remove (&gsi, true);
7575 :
7576 1330 : FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7577 1330 : if (!broken_loop)
7578 1314 : remove_edge (BRANCH_EDGE (entry_bb));
7579 : else
7580 : {
7581 16 : remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7582 16 : region->outer->cont = NULL;
7583 : }
7584 :
7585 : /* Connect all the blocks. */
7586 1330 : if (!broken_loop)
7587 : {
7588 1314 : ep = find_edge (cont_bb, body_bb);
7589 1314 : if (gimple_omp_for_combined_p (fd->for_stmt))
7590 : {
7591 617 : remove_edge (ep);
7592 617 : ep = NULL;
7593 : }
7594 697 : else if (fd->collapse > 1)
7595 : {
7596 81 : remove_edge (ep);
7597 81 : ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7598 : }
7599 : else
7600 616 : ep->flags = EDGE_TRUE_VALUE;
7601 2547 : find_edge (cont_bb, fin_bb)->flags
7602 1395 : = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7603 : }
7604 :
7605 1330 : set_immediate_dominator (CDI_DOMINATORS, body_bb,
7606 : recompute_dominator (CDI_DOMINATORS, body_bb));
7607 1330 : if (!broken_loop)
7608 1314 : set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7609 : recompute_dominator (CDI_DOMINATORS, fin_bb));
7610 :
7611 1314 : if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7612 : {
7613 697 : class loop *loop = alloc_loop ();
7614 697 : loop->header = body_bb;
7615 697 : if (collapse_bb == NULL)
7616 616 : loop->latch = cont_bb;
7617 697 : add_loop (loop, body_bb->loop_father);
7618 : }
7619 1330 : }
7620 :
7621 : /* A subroutine of expand_omp_for. Generate code for an OpenACC
7622 : partitioned loop. The lowering here is abstracted, in that the
7623 : loop parameters are passed through internal functions, which are
7624 : further lowered by oacc_device_lower, once we get to the target
7625 : compiler. The loop is of the form:
7626 :
7627 : for (V = B; V LTGT E; V += S) {BODY}
7628 :
7629 : where LTGT is < or >. We may have a specified chunking size, CHUNKING
7630 : (constant 0 for no chunking) and we will have a GWV partitioning
7631 : mask, specifying dimensions over which the loop is to be
7632 : partitioned (see note below). We generate code that looks like
7633 : (this ignores tiling):
7634 :
7635 : <entry_bb> [incoming FALL->body, BRANCH->exit]
7636 : typedef signedintify (typeof (V)) T; // underlying signed integral type
7637 : T range = E - B;
7638 : T chunk_no = 0;
7639 : T DIR = LTGT == '<' ? +1 : -1;
7640 : T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7641 : T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7642 :
7643 : <head_bb> [created by splitting end of entry_bb]
7644 : T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7645 : T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7646 : if (!(offset LTGT bound)) goto bottom_bb;
7647 :
7648 : <body_bb> [incoming]
7649 : V = B + offset;
7650 : {BODY}
7651 :
7652 : <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7653 : offset += step;
7654 : if (offset LTGT bound) goto body_bb; [*]
7655 :
7656 : <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7657 : chunk_no++;
7658 : if (chunk < chunk_max) goto head_bb;
7659 :
7660 : <exit_bb> [incoming]
7661 : V = B + ((range -/+ 1) / S +/- 1) * S [*]
7662 :
7663 : [*] Needed if V live at end of loop. */
7664 :
7665 : static void
7666 11711 : expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7667 : {
7668 11711 : bool is_oacc_kernels_parallelized
7669 11711 : = (lookup_attribute ("oacc kernels parallelized",
7670 11711 : DECL_ATTRIBUTES (current_function_decl)) != NULL);
7671 11711 : {
7672 11711 : bool is_oacc_kernels
7673 11711 : = (lookup_attribute ("oacc kernels",
7674 11711 : DECL_ATTRIBUTES (current_function_decl)) != NULL);
7675 11711 : if (is_oacc_kernels_parallelized)
7676 386 : gcc_checking_assert (is_oacc_kernels);
7677 : }
7678 23422 : gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7679 : /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7680 : for SSA specifics, and some are for 'parloops' OpenACC
7681 : 'kernels'-parallelized specifics. */
7682 :
7683 11711 : tree v = fd->loop.v;
7684 11711 : enum tree_code cond_code = fd->loop.cond_code;
7685 11711 : enum tree_code plus_code = PLUS_EXPR;
7686 :
7687 11711 : tree chunk_size = integer_minus_one_node;
7688 11711 : tree gwv = integer_zero_node;
7689 11711 : tree iter_type = TREE_TYPE (v);
7690 11711 : tree diff_type = iter_type;
7691 11711 : tree plus_type = iter_type;
7692 11711 : struct oacc_collapse *counts = NULL;
7693 :
7694 11711 : gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7695 : == GF_OMP_FOR_KIND_OACC_LOOP);
7696 11711 : gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7697 11711 : gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7698 :
7699 11711 : if (POINTER_TYPE_P (iter_type))
7700 : {
7701 52 : plus_code = POINTER_PLUS_EXPR;
7702 52 : plus_type = sizetype;
7703 : }
7704 24124 : for (int ix = fd->collapse; ix--;)
7705 : {
7706 12413 : tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7707 12413 : if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7708 0 : diff_type = diff_type2;
7709 : }
7710 11711 : if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7711 1002 : diff_type = signed_type_for (diff_type);
7712 11711 : if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7713 23 : diff_type = integer_type_node;
7714 :
7715 11711 : basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7716 11711 : basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7717 11711 : basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7718 11711 : basic_block bottom_bb = NULL;
7719 :
7720 : /* entry_bb has two successors; the branch edge is to the exit
7721 : block (or to finalization blocks preceding it), fallthrough edge
7722 : to body. */
7723 11711 : gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7724 :
7725 : /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7726 : body_bb, or to a block whose only successor is the body_bb. Its
7727 : fallthrough successor is the final block (same as the branch
7728 : successor of the entry_bb), possibly via finalization blocks. */
7729 11711 : if (cont_bb)
7730 : {
7731 11672 : basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7732 11672 : basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7733 :
7734 11672 : gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7735 11672 : gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7736 : }
7737 : else
7738 39 : gcc_assert (!gimple_in_ssa_p (cfun));
7739 :
7740 11711 : tree chunk_no;
7741 11711 : tree chunk_max = NULL_TREE;
7742 11711 : tree bound, offset;
7743 11711 : tree step = create_tmp_var (diff_type, ".step");
7744 11711 : bool up = cond_code == LT_EXPR;
7745 11871 : tree dir = build_int_cst (diff_type, up ? +1 : -1);
7746 11711 : bool chunking = !gimple_in_ssa_p (cfun);
7747 11711 : bool negating;
7748 :
7749 : /* Tiling vars. */
7750 11711 : tree tile_size = NULL_TREE;
7751 11711 : tree element_s = NULL_TREE;
7752 11711 : tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7753 11711 : basic_block elem_body_bb = NULL;
7754 11711 : basic_block elem_cont_bb = NULL;
7755 :
7756 : /* SSA instances. */
7757 11711 : tree offset_incr = NULL_TREE;
7758 11711 : tree offset_init = NULL_TREE;
7759 :
7760 11711 : gimple_stmt_iterator gsi;
7761 11711 : gassign *ass;
7762 11711 : gcall *call;
7763 11711 : gimple *stmt;
7764 11711 : tree expr;
7765 11711 : location_t loc;
7766 11711 : edge split, be, fte;
7767 :
7768 : /* Split the end of entry_bb to create head_bb. */
7769 11711 : split = split_block (entry_bb, last_nondebug_stmt (entry_bb));
7770 11711 : basic_block head_bb = split->dest;
7771 11711 : entry_bb = split->src;
7772 :
7773 : /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7774 11711 : gsi = gsi_last_nondebug_bb (entry_bb);
7775 11711 : gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7776 11711 : loc = gimple_location (for_stmt);
7777 :
7778 11711 : if (gimple_in_ssa_p (cfun))
7779 : {
7780 386 : offset_init = gimple_omp_for_index (for_stmt, 0);
7781 386 : gcc_assert (integer_zerop (fd->loop.n1));
7782 : /* The SSA parallelizer does gang parallelism. */
7783 386 : gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7784 : }
7785 :
7786 11711 : if (fd->collapse > 1 || fd->tiling)
7787 : {
7788 1188 : gcc_assert (!gimple_in_ssa_p (cfun) && up);
7789 594 : counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7790 594 : tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7791 594 : TREE_TYPE (fd->loop.n2), loc);
7792 :
7793 594 : if (SSA_VAR_P (fd->loop.n2))
7794 : {
7795 107 : total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7796 : true, GSI_SAME_STMT);
7797 107 : ass = gimple_build_assign (fd->loop.n2, total);
7798 107 : gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7799 : }
7800 : }
7801 :
7802 11711 : tree b = fd->loop.n1;
7803 11711 : tree e = fd->loop.n2;
7804 11711 : tree s = fd->loop.step;
7805 :
7806 11711 : b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7807 11711 : e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7808 :
7809 : /* Convert the step, avoiding possible unsigned->signed overflow. */
7810 11711 : negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7811 32 : if (negating)
7812 32 : s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7813 11711 : s = fold_convert (diff_type, s);
7814 11711 : if (negating)
7815 32 : s = fold_build1 (NEGATE_EXPR, diff_type, s);
7816 11711 : s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7817 :
7818 11711 : if (!chunking)
7819 386 : chunk_size = integer_zero_node;
7820 11711 : expr = fold_convert (diff_type, chunk_size);
7821 11711 : chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7822 : NULL_TREE, true, GSI_SAME_STMT);
7823 :
7824 11711 : if (fd->tiling)
7825 : {
7826 : /* Determine the tile size and element step,
7827 : modify the outer loop step size. */
7828 177 : tile_size = create_tmp_var (diff_type, ".tile_size");
7829 177 : expr = build_int_cst (diff_type, 1);
7830 461 : for (int ix = 0; ix < fd->collapse; ix++)
7831 284 : expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7832 177 : expr = force_gimple_operand_gsi (&gsi, expr, true,
7833 : NULL_TREE, true, GSI_SAME_STMT);
7834 177 : ass = gimple_build_assign (tile_size, expr);
7835 177 : gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7836 :
7837 177 : element_s = create_tmp_var (diff_type, ".element_s");
7838 177 : ass = gimple_build_assign (element_s, s);
7839 177 : gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7840 :
7841 177 : expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7842 177 : s = force_gimple_operand_gsi (&gsi, expr, true,
7843 : NULL_TREE, true, GSI_SAME_STMT);
7844 : }
7845 :
7846 : /* Determine the range, avoiding possible unsigned->signed overflow. */
7847 11711 : negating = !up && TYPE_UNSIGNED (iter_type);
7848 23374 : expr = fold_build2 (MINUS_EXPR, plus_type,
7849 : fold_convert (plus_type, negating ? b : e),
7850 : fold_convert (plus_type, negating ? e : b));
7851 11711 : expr = fold_convert (diff_type, expr);
7852 11711 : if (negating)
7853 48 : expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7854 11711 : tree range = force_gimple_operand_gsi (&gsi, expr, true,
7855 : NULL_TREE, true, GSI_SAME_STMT);
7856 :
7857 11711 : chunk_no = build_int_cst (diff_type, 0);
7858 11711 : if (chunking)
7859 : {
7860 11325 : gcc_assert (!gimple_in_ssa_p (cfun));
7861 :
7862 11325 : expr = chunk_no;
7863 11325 : chunk_max = create_tmp_var (diff_type, ".chunk_max");
7864 11325 : chunk_no = create_tmp_var (diff_type, ".chunk_no");
7865 :
7866 11325 : ass = gimple_build_assign (chunk_no, expr);
7867 11325 : gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7868 :
7869 11325 : call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7870 : build_int_cst (integer_type_node,
7871 : IFN_GOACC_LOOP_CHUNKS),
7872 : dir, range, s, chunk_size, gwv);
7873 11325 : gimple_call_set_lhs (call, chunk_max);
7874 11325 : gimple_set_location (call, loc);
7875 11325 : gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7876 : }
7877 : else
7878 : chunk_size = chunk_no;
7879 :
7880 11711 : call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7881 : build_int_cst (integer_type_node,
7882 : IFN_GOACC_LOOP_STEP),
7883 : dir, range, s, chunk_size, gwv);
7884 11711 : gimple_call_set_lhs (call, step);
7885 11711 : gimple_set_location (call, loc);
7886 11711 : gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7887 :
7888 : /* Remove the GIMPLE_OMP_FOR. */
7889 11711 : gsi_remove (&gsi, true);
7890 :
7891 : /* Fixup edges from head_bb. */
7892 11711 : be = BRANCH_EDGE (head_bb);
7893 11711 : fte = FALLTHRU_EDGE (head_bb);
7894 11711 : be->flags |= EDGE_FALSE_VALUE;
7895 11711 : fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7896 :
7897 11711 : basic_block body_bb = fte->dest;
7898 :
7899 11711 : if (gimple_in_ssa_p (cfun))
7900 : {
7901 386 : gsi = gsi_last_nondebug_bb (cont_bb);
7902 386 : gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7903 :
7904 386 : offset = gimple_omp_continue_control_use (cont_stmt);
7905 386 : offset_incr = gimple_omp_continue_control_def (cont_stmt);
7906 : }
7907 : else
7908 : {
7909 11325 : offset = create_tmp_var (diff_type, ".offset");
7910 11325 : offset_init = offset_incr = offset;
7911 : }
7912 11711 : bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7913 :
7914 : /* Loop offset & bound go into head_bb. */
7915 11711 : gsi = gsi_start_bb (head_bb);
7916 :
7917 11711 : call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7918 : build_int_cst (integer_type_node,
7919 : IFN_GOACC_LOOP_OFFSET),
7920 : dir, range, s,
7921 : chunk_size, gwv, chunk_no);
7922 11711 : gimple_call_set_lhs (call, offset_init);
7923 11711 : gimple_set_location (call, loc);
7924 11711 : gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7925 :
7926 11711 : call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7927 : build_int_cst (integer_type_node,
7928 : IFN_GOACC_LOOP_BOUND),
7929 : dir, range, s,
7930 : chunk_size, gwv, offset_init);
7931 11711 : gimple_call_set_lhs (call, bound);
7932 11711 : gimple_set_location (call, loc);
7933 11711 : gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7934 :
7935 11711 : expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7936 11711 : gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7937 : GSI_CONTINUE_LINKING);
7938 :
7939 : /* V assignment goes into body_bb. */
7940 11711 : if (!gimple_in_ssa_p (cfun))
7941 : {
7942 11325 : gsi = gsi_start_bb (body_bb);
7943 :
7944 11325 : expr = build2 (plus_code, iter_type, b,
7945 : fold_convert (plus_type, offset));
7946 11325 : expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7947 : true, GSI_SAME_STMT);
7948 11325 : ass = gimple_build_assign (v, expr);
7949 11325 : gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7950 :
7951 11325 : if (fd->collapse > 1 || fd->tiling)
7952 594 : expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7953 :
7954 11325 : if (fd->tiling)
7955 : {
7956 : /* Determine the range of the element loop -- usually simply
7957 : the tile_size, but could be smaller if the final
7958 : iteration of the outer loop is a partial tile. */
7959 177 : tree e_range = create_tmp_var (diff_type, ".e_range");
7960 :
7961 177 : expr = build2 (MIN_EXPR, diff_type,
7962 : build2 (MINUS_EXPR, diff_type, bound, offset),
7963 : build2 (MULT_EXPR, diff_type, tile_size,
7964 : element_s));
7965 177 : expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7966 : true, GSI_SAME_STMT);
7967 177 : ass = gimple_build_assign (e_range, expr);
7968 177 : gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7969 :
7970 : /* Determine bound, offset & step of inner loop. */
7971 177 : e_bound = create_tmp_var (diff_type, ".e_bound");
7972 177 : e_offset = create_tmp_var (diff_type, ".e_offset");
7973 177 : e_step = create_tmp_var (diff_type, ".e_step");
7974 :
7975 : /* Mark these as element loops. */
7976 177 : tree t, e_gwv = integer_minus_one_node;
7977 177 : tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7978 :
7979 177 : t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7980 177 : call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7981 : element_s, chunk, e_gwv, chunk);
7982 177 : gimple_call_set_lhs (call, e_offset);
7983 177 : gimple_set_location (call, loc);
7984 177 : gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7985 :
7986 177 : t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7987 177 : call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7988 : element_s, chunk, e_gwv, e_offset);
7989 177 : gimple_call_set_lhs (call, e_bound);
7990 177 : gimple_set_location (call, loc);
7991 177 : gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7992 :
7993 177 : t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7994 177 : call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7995 : element_s, chunk, e_gwv);
7996 177 : gimple_call_set_lhs (call, e_step);
7997 177 : gimple_set_location (call, loc);
7998 177 : gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7999 :
8000 : /* Add test and split block. */
8001 177 : expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
8002 177 : stmt = gimple_build_cond_empty (expr);
8003 177 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8004 177 : split = split_block (body_bb, stmt);
8005 177 : elem_body_bb = split->dest;
8006 177 : if (cont_bb == body_bb)
8007 146 : cont_bb = elem_body_bb;
8008 177 : body_bb = split->src;
8009 :
8010 177 : split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
8011 :
8012 : /* Add a dummy exit for the tiled block when cont_bb is missing. */
8013 177 : if (cont_bb == NULL)
8014 : {
8015 5 : edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
8016 5 : e->probability = profile_probability::even ();
8017 5 : split->probability = profile_probability::even ();
8018 : }
8019 :
8020 : /* Initialize the user's loop vars. */
8021 177 : gsi = gsi_start_bb (elem_body_bb);
8022 177 : expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
8023 : diff_type);
8024 : }
8025 : }
8026 :
8027 : /* Loop increment goes into cont_bb. If this is not a loop, we
8028 : will have spawned threads as if it was, and each one will
8029 : execute one iteration. The specification is not explicit about
8030 : whether such constructs are ill-formed or not, and they can
8031 : occur, especially when noreturn routines are involved. */
8032 11711 : if (cont_bb)
8033 : {
8034 11672 : gsi = gsi_last_nondebug_bb (cont_bb);
8035 11672 : gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
8036 11672 : loc = gimple_location (cont_stmt);
8037 :
8038 11672 : if (fd->tiling)
8039 : {
8040 : /* Insert element loop increment and test. */
8041 172 : expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
8042 172 : expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8043 : true, GSI_SAME_STMT);
8044 172 : ass = gimple_build_assign (e_offset, expr);
8045 172 : gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8046 172 : expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
8047 :
8048 172 : stmt = gimple_build_cond_empty (expr);
8049 172 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8050 172 : split = split_block (cont_bb, stmt);
8051 172 : elem_cont_bb = split->src;
8052 172 : cont_bb = split->dest;
8053 :
8054 172 : split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8055 172 : split->probability = profile_probability::unlikely ().guessed ();
8056 172 : edge latch_edge
8057 172 : = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
8058 172 : latch_edge->probability = profile_probability::likely ().guessed ();
8059 :
8060 172 : edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
8061 172 : skip_edge->probability = profile_probability::unlikely ().guessed ();
8062 172 : edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
8063 172 : loop_entry_edge->probability
8064 172 : = profile_probability::likely ().guessed ();
8065 :
8066 172 : gsi = gsi_for_stmt (cont_stmt);
8067 : }
8068 :
8069 : /* Increment offset. */
8070 11672 : if (gimple_in_ssa_p (cfun))
8071 386 : expr = build2 (plus_code, iter_type, offset,
8072 : fold_convert (plus_type, step));
8073 : else
8074 11286 : expr = build2 (PLUS_EXPR, diff_type, offset, step);
8075 11672 : expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8076 : true, GSI_SAME_STMT);
8077 11672 : ass = gimple_build_assign (offset_incr, expr);
8078 11672 : gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8079 11672 : expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
8080 11672 : gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
8081 :
8082 : /* Remove the GIMPLE_OMP_CONTINUE. */
8083 11672 : gsi_remove (&gsi, true);
8084 :
8085 : /* Fixup edges from cont_bb. */
8086 11672 : be = BRANCH_EDGE (cont_bb);
8087 11672 : fte = FALLTHRU_EDGE (cont_bb);
8088 11672 : be->flags |= EDGE_TRUE_VALUE;
8089 11672 : fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8090 :
8091 11672 : if (chunking)
8092 : {
8093 : /* Split the beginning of exit_bb to make bottom_bb. We
8094 : need to insert a nop at the start, because splitting is
8095 : after a stmt, not before. */
8096 11286 : gsi = gsi_start_bb (exit_bb);
8097 11286 : stmt = gimple_build_nop ();
8098 11286 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8099 11286 : split = split_block (exit_bb, stmt);
8100 11286 : bottom_bb = split->src;
8101 11286 : exit_bb = split->dest;
8102 11286 : gsi = gsi_last_bb (bottom_bb);
8103 :
8104 : /* Chunk increment and test goes into bottom_bb. */
8105 11286 : expr = build2 (PLUS_EXPR, diff_type, chunk_no,
8106 : build_int_cst (diff_type, 1));
8107 11286 : ass = gimple_build_assign (chunk_no, expr);
8108 11286 : gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
8109 :
8110 : /* Chunk test at end of bottom_bb. */
8111 11286 : expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
8112 11286 : gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
8113 : GSI_CONTINUE_LINKING);
8114 :
8115 : /* Fixup edges from bottom_bb. */
8116 11286 : split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
8117 11286 : split->probability = profile_probability::unlikely ().guessed ();
8118 11286 : edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
8119 11286 : latch_edge->probability = profile_probability::likely ().guessed ();
8120 : }
8121 : }
8122 :
8123 11711 : gsi = gsi_last_nondebug_bb (exit_bb);
8124 11711 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8125 11711 : loc = gimple_location (gsi_stmt (gsi));
8126 :
8127 11711 : if (!gimple_in_ssa_p (cfun))
8128 : {
8129 : /* Insert the final value of V, in case it is live. This is the
8130 : value for the only thread that survives past the join. */
8131 11325 : expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
8132 11325 : expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
8133 11325 : expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
8134 11325 : expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
8135 11325 : expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
8136 11325 : expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
8137 : true, GSI_SAME_STMT);
8138 11325 : ass = gimple_build_assign (v, expr);
8139 11325 : gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8140 : }
8141 :
8142 : /* Remove the OMP_RETURN. */
8143 11711 : gsi_remove (&gsi, true);
8144 :
8145 11711 : if (cont_bb)
8146 : {
8147 : /* We now have one, two or three nested loops. Update the loop
8148 : structures. */
8149 11672 : class loop *parent = entry_bb->loop_father;
8150 11672 : class loop *body = body_bb->loop_father;
8151 :
8152 11672 : if (chunking)
8153 : {
8154 11286 : class loop *chunk_loop = alloc_loop ();
8155 11286 : chunk_loop->header = head_bb;
8156 11286 : chunk_loop->latch = bottom_bb;
8157 11286 : add_loop (chunk_loop, parent);
8158 11286 : parent = chunk_loop;
8159 : }
8160 386 : else if (parent != body)
8161 : {
8162 386 : gcc_assert (body->header == body_bb);
8163 386 : gcc_assert (body->latch == cont_bb
8164 : || single_pred (body->latch) == cont_bb);
8165 : parent = NULL;
8166 : }
8167 :
8168 11286 : if (parent)
8169 : {
8170 11286 : class loop *body_loop = alloc_loop ();
8171 11286 : body_loop->header = body_bb;
8172 11286 : body_loop->latch = cont_bb;
8173 11286 : add_loop (body_loop, parent);
8174 :
8175 11286 : if (fd->tiling)
8176 : {
8177 : /* Insert tiling's element loop. */
8178 172 : class loop *inner_loop = alloc_loop ();
8179 172 : inner_loop->header = elem_body_bb;
8180 172 : inner_loop->latch = elem_cont_bb;
8181 172 : add_loop (inner_loop, body_loop);
8182 : }
8183 : }
8184 : }
8185 11711 : }
8186 :
8187 : /* Expand the OMP loop defined by REGION. */
8188 :
8189 : static void
8190 47612 : expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8191 : {
8192 47612 : struct omp_for_data fd;
8193 47612 : struct omp_for_data_loop *loops;
8194 :
8195 47612 : loops = XALLOCAVEC (struct omp_for_data_loop,
8196 : gimple_omp_for_collapse
8197 : (last_nondebug_stmt (region->entry)));
8198 47612 : omp_extract_for_data (as_a <gomp_for *> (last_nondebug_stmt (region->entry)),
8199 : &fd, loops);
8200 47612 : region->sched_kind = fd.sched_kind;
8201 47612 : region->sched_modifiers = fd.sched_modifiers;
8202 47612 : region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8203 47612 : if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8204 : {
8205 2344 : for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8206 1644 : if ((loops[i].m1 || loops[i].m2)
8207 731 : && (loops[i].m1 == NULL_TREE
8208 506 : || TREE_CODE (loops[i].m1) == INTEGER_CST)
8209 612 : && (loops[i].m2 == NULL_TREE
8210 343 : || TREE_CODE (loops[i].m2) == INTEGER_CST)
8211 596 : && TREE_CODE (loops[i].step) == INTEGER_CST
8212 586 : && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8213 : {
8214 586 : tree t;
8215 586 : tree itype = TREE_TYPE (loops[i].v);
8216 586 : if (loops[i].m1 && loops[i].m2)
8217 108 : t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8218 478 : else if (loops[i].m1)
8219 267 : t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8220 : else
8221 : t = loops[i].m2;
8222 586 : t = fold_build2 (MULT_EXPR, itype, t,
8223 : fold_convert (itype,
8224 : loops[i - loops[i].outer].step));
8225 586 : if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8226 3 : t = fold_build2 (TRUNC_MOD_EXPR, itype,
8227 : fold_build1 (NEGATE_EXPR, itype, t),
8228 : fold_build1 (NEGATE_EXPR, itype,
8229 : fold_convert (itype,
8230 : loops[i].step)));
8231 : else
8232 583 : t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8233 : fold_convert (itype, loops[i].step));
8234 586 : if (integer_nonzerop (t))
8235 8 : error_at (gimple_location (fd.for_stmt),
8236 : "invalid OpenMP non-rectangular loop step; "
8237 : "%<(%E - %E) * %E%> is not a multiple of loop %d "
8238 : "step %qE",
8239 8 : loops[i].m2 ? loops[i].m2 : integer_zero_node,
8240 8 : loops[i].m1 ? loops[i].m1 : integer_zero_node,
8241 8 : loops[i - loops[i].outer].step, i + 1,
8242 : loops[i].step);
8243 : }
8244 : }
8245 :
8246 47612 : gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8247 47612 : BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8248 47612 : FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8249 47612 : if (region->cont)
8250 : {
8251 45325 : gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8252 45325 : BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8253 45325 : FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8254 : }
8255 : else
8256 : /* If there isn't a continue then this is a degerate case where
8257 : the introduction of abnormal edges during lowering will prevent
8258 : original loops from being detected. Fix that up. */
8259 2287 : loops_state_set (LOOPS_NEED_FIXUP);
8260 :
8261 47612 : if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8262 9355 : expand_omp_simd (region, &fd);
8263 38257 : else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8264 : {
8265 11711 : gcc_assert (!inner_stmt && !fd.non_rect);
8266 11711 : expand_oacc_for (region, &fd);
8267 : }
8268 26546 : else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8269 : {
8270 2660 : if (gimple_omp_for_combined_into_p (fd.for_stmt))
8271 1330 : expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8272 : else
8273 1330 : expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8274 : }
8275 23886 : else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8276 20204 : && !fd.have_ordered)
8277 : {
8278 19752 : if (fd.chunk_size == NULL)
8279 13933 : expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8280 : else
8281 5819 : expand_omp_for_static_chunk (region, &fd, inner_stmt);
8282 : }
8283 : else
8284 : {
8285 4134 : int fn_index, start_ix, next_ix;
8286 4134 : unsigned HOST_WIDE_INT sched = 0;
8287 4134 : tree sched_arg = NULL_TREE;
8288 :
8289 4134 : gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8290 : == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8291 4134 : if (fd.chunk_size == NULL
8292 1665 : && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8293 0 : fd.chunk_size = integer_zero_node;
8294 4134 : switch (fd.sched_kind)
8295 : {
8296 1665 : case OMP_CLAUSE_SCHEDULE_RUNTIME:
8297 1665 : if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8298 28 : && fd.lastprivate_conditional == 0)
8299 : {
8300 28 : gcc_assert (!fd.have_ordered);
8301 : fn_index = 6;
8302 : sched = 4;
8303 : }
8304 1637 : else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8305 1605 : && !fd.have_ordered
8306 1561 : && fd.lastprivate_conditional == 0)
8307 : fn_index = 7;
8308 : else
8309 : {
8310 87 : fn_index = 3;
8311 87 : sched = (HOST_WIDE_INT_1U << 31);
8312 : }
8313 : break;
8314 2017 : case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8315 2017 : case OMP_CLAUSE_SCHEDULE_GUIDED:
8316 2017 : if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8317 1961 : && !fd.have_ordered
8318 1798 : && fd.lastprivate_conditional == 0)
8319 : {
8320 1771 : fn_index = 3 + fd.sched_kind;
8321 1771 : sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8322 : break;
8323 : }
8324 246 : fn_index = fd.sched_kind;
8325 246 : sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8326 246 : sched += (HOST_WIDE_INT_1U << 31);
8327 246 : break;
8328 452 : case OMP_CLAUSE_SCHEDULE_STATIC:
8329 452 : gcc_assert (fd.have_ordered);
8330 : fn_index = 0;
8331 : sched = (HOST_WIDE_INT_1U << 31) + 1;
8332 : break;
8333 0 : default:
8334 0 : gcc_unreachable ();
8335 : }
8336 4134 : if (!fd.ordered)
8337 3799 : fn_index += fd.have_ordered * 8;
8338 4134 : if (fd.ordered)
8339 335 : start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8340 : else
8341 3799 : start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8342 4134 : next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8343 4134 : if (fd.have_reductemp || fd.have_pointer_condtemp)
8344 : {
8345 169 : if (fd.ordered)
8346 : start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8347 133 : else if (fd.have_ordered)
8348 : start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8349 : else
8350 95 : start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8351 169 : sched_arg = build_int_cstu (long_integer_type_node, sched);
8352 169 : if (!fd.chunk_size)
8353 38 : fd.chunk_size = integer_zero_node;
8354 : }
8355 4134 : if (fd.iter_type == long_long_unsigned_type_node)
8356 : {
8357 778 : start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8358 : - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8359 778 : next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8360 : - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8361 : }
8362 4134 : expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8363 : (enum built_in_function) next_ix, sched_arg,
8364 : inner_stmt);
8365 : }
8366 47612 : }
8367 :
8368 : /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8369 :
8370 : v = GOMP_sections_start (n);
8371 : L0:
8372 : switch (v)
8373 : {
8374 : case 0:
8375 : goto L2;
8376 : case 1:
8377 : section 1;
8378 : goto L1;
8379 : case 2:
8380 : ...
8381 : case n:
8382 : ...
8383 : default:
8384 : abort ();
8385 : }
8386 : L1:
8387 : v = GOMP_sections_next ();
8388 : goto L0;
8389 : L2:
8390 : reduction;
8391 :
8392 : If this is a combined parallel sections, replace the call to
8393 : GOMP_sections_start with call to GOMP_sections_next. */
8394 :
8395 : static void
8396 378 : expand_omp_sections (struct omp_region *region)
8397 : {
8398 378 : tree t, u, vin = NULL, vmain, vnext, l2;
8399 378 : unsigned len;
8400 378 : basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8401 378 : gimple_stmt_iterator si, switch_si;
8402 378 : gomp_sections *sections_stmt;
8403 378 : gimple *stmt;
8404 378 : gomp_continue *cont;
8405 378 : edge_iterator ei;
8406 378 : edge e;
8407 378 : struct omp_region *inner;
8408 378 : unsigned i, casei;
8409 378 : bool exit_reachable = region->cont != NULL;
8410 :
8411 378 : gcc_assert (region->exit != NULL);
8412 378 : entry_bb = region->entry;
8413 378 : l0_bb = single_succ (entry_bb);
8414 378 : l1_bb = region->cont;
8415 378 : l2_bb = region->exit;
8416 719 : if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8417 314 : l2 = gimple_block_label (l2_bb);
8418 : else
8419 : {
8420 : /* This can happen if there are reductions. */
8421 64 : len = EDGE_COUNT (l0_bb->succs);
8422 64 : gcc_assert (len > 0);
8423 64 : e = EDGE_SUCC (l0_bb, len - 1);
8424 64 : si = gsi_last_nondebug_bb (e->dest);
8425 64 : l2 = NULL_TREE;
8426 64 : if (gsi_end_p (si)
8427 64 : || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8428 64 : l2 = gimple_block_label (e->dest);
8429 : else
8430 0 : FOR_EACH_EDGE (e, ei, l0_bb->succs)
8431 : {
8432 0 : si = gsi_last_nondebug_bb (e->dest);
8433 0 : if (gsi_end_p (si)
8434 0 : || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8435 : {
8436 0 : l2 = gimple_block_label (e->dest);
8437 0 : break;
8438 : }
8439 : }
8440 : }
8441 378 : if (exit_reachable)
8442 332 : default_bb = create_empty_bb (l1_bb->prev_bb);
8443 : else
8444 46 : default_bb = create_empty_bb (l0_bb);
8445 :
8446 : /* We will build a switch() with enough cases for all the
8447 : GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8448 : and a default case to abort if something goes wrong. */
8449 378 : len = EDGE_COUNT (l0_bb->succs);
8450 :
8451 : /* Use vec::quick_push on label_vec throughout, since we know the size
8452 : in advance. */
8453 378 : auto_vec<tree> label_vec (len);
8454 :
8455 : /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8456 : GIMPLE_OMP_SECTIONS statement. */
8457 378 : si = gsi_last_nondebug_bb (entry_bb);
8458 378 : sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8459 378 : gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8460 378 : vin = gimple_omp_sections_control (sections_stmt);
8461 378 : tree clauses = gimple_omp_sections_clauses (sections_stmt);
8462 378 : tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8463 378 : tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8464 378 : tree cond_var = NULL_TREE;
8465 378 : if (reductmp || condtmp)
8466 : {
8467 18 : tree reductions = null_pointer_node, mem = null_pointer_node;
8468 18 : tree memv = NULL_TREE, condtemp = NULL_TREE;
8469 18 : gimple_stmt_iterator gsi = gsi_none ();
8470 18 : gimple *g = NULL;
8471 18 : if (reductmp)
8472 : {
8473 8 : reductions = OMP_CLAUSE_DECL (reductmp);
8474 8 : gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8475 8 : g = SSA_NAME_DEF_STMT (reductions);
8476 8 : reductions = gimple_assign_rhs1 (g);
8477 8 : OMP_CLAUSE_DECL (reductmp) = reductions;
8478 8 : gsi = gsi_for_stmt (g);
8479 : }
8480 : else
8481 10 : gsi = si;
8482 18 : if (condtmp)
8483 : {
8484 12 : condtemp = OMP_CLAUSE_DECL (condtmp);
8485 12 : tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8486 : OMP_CLAUSE__CONDTEMP_);
8487 12 : cond_var = OMP_CLAUSE_DECL (c);
8488 12 : tree type = TREE_TYPE (condtemp);
8489 12 : memv = create_tmp_var (type);
8490 12 : TREE_ADDRESSABLE (memv) = 1;
8491 12 : unsigned cnt = 0;
8492 74 : for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8493 62 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8494 62 : && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8495 24 : ++cnt;
8496 12 : unsigned HOST_WIDE_INT sz
8497 12 : = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8498 12 : expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8499 : false);
8500 12 : mem = build_fold_addr_expr (memv);
8501 : }
8502 18 : t = build_int_cst (unsigned_type_node, len - 1);
8503 18 : u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8504 18 : stmt = gimple_build_call (u, 3, t, reductions, mem);
8505 18 : gimple_call_set_lhs (stmt, vin);
8506 18 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8507 18 : if (condtmp)
8508 : {
8509 12 : expand_omp_build_assign (&gsi, condtemp, memv, false);
8510 12 : tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8511 12 : vin, build_one_cst (TREE_TYPE (cond_var)));
8512 12 : expand_omp_build_assign (&gsi, cond_var, t, false);
8513 : }
8514 18 : if (reductmp)
8515 : {
8516 8 : gsi_remove (&gsi, true);
8517 8 : release_ssa_name (gimple_assign_lhs (g));
8518 : }
8519 : }
8520 360 : else if (!is_combined_parallel (region))
8521 : {
8522 : /* If we are not inside a combined parallel+sections region,
8523 : call GOMP_sections_start. */
8524 248 : t = build_int_cst (unsigned_type_node, len - 1);
8525 248 : u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8526 248 : stmt = gimple_build_call (u, 1, t);
8527 : }
8528 : else
8529 : {
8530 : /* Otherwise, call GOMP_sections_next. */
8531 112 : u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8532 112 : stmt = gimple_build_call (u, 0);
8533 : }
8534 378 : if (!reductmp && !condtmp)
8535 : {
8536 360 : gimple_call_set_lhs (stmt, vin);
8537 360 : gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8538 : }
8539 378 : gsi_remove (&si, true);
8540 :
8541 : /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8542 : L0_BB. */
8543 378 : switch_si = gsi_last_nondebug_bb (l0_bb);
8544 378 : gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8545 378 : if (exit_reachable)
8546 : {
8547 332 : cont = as_a <gomp_continue *> (last_nondebug_stmt (l1_bb));
8548 332 : gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8549 332 : vmain = gimple_omp_continue_control_use (cont);
8550 332 : vnext = gimple_omp_continue_control_def (cont);
8551 : }
8552 : else
8553 : {
8554 : vmain = vin;
8555 : vnext = NULL_TREE;
8556 : }
8557 :
8558 378 : t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8559 378 : label_vec.quick_push (t);
8560 378 : i = 1;
8561 :
8562 : /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8563 378 : for (inner = region->inner, casei = 1;
8564 1260 : inner;
8565 882 : inner = inner->next, i++, casei++)
8566 : {
8567 882 : basic_block s_entry_bb, s_exit_bb;
8568 :
8569 : /* Skip optional reduction region. */
8570 882 : if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8571 : {
8572 27 : --i;
8573 27 : --casei;
8574 27 : continue;
8575 : }
8576 :
8577 855 : s_entry_bb = inner->entry;
8578 855 : s_exit_bb = inner->exit;
8579 :
8580 855 : t = gimple_block_label (s_entry_bb);
8581 855 : u = build_int_cst (unsigned_type_node, casei);
8582 855 : u = build_case_label (u, NULL, t);
8583 855 : label_vec.quick_push (u);
8584 :
8585 855 : si = gsi_last_nondebug_bb (s_entry_bb);
8586 855 : gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8587 855 : gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8588 855 : gsi_remove (&si, true);
8589 855 : single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8590 :
8591 855 : if (s_exit_bb == NULL)
8592 122 : continue;
8593 :
8594 733 : si = gsi_last_nondebug_bb (s_exit_bb);
8595 733 : gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8596 733 : gsi_remove (&si, true);
8597 :
8598 733 : single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8599 : }
8600 :
8601 : /* Error handling code goes in DEFAULT_BB. */
8602 378 : t = gimple_block_label (default_bb);
8603 378 : u = build_case_label (NULL, NULL, t);
8604 378 : make_edge (l0_bb, default_bb, 0);
8605 378 : add_bb_to_loop (default_bb, current_loops->tree_root);
8606 :
8607 378 : stmt = gimple_build_switch (vmain, u, label_vec);
8608 378 : gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8609 378 : gsi_remove (&switch_si, true);
8610 :
8611 378 : si = gsi_start_bb (default_bb);
8612 378 : stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8613 378 : gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8614 :
8615 378 : if (exit_reachable)
8616 : {
8617 332 : tree bfn_decl;
8618 :
8619 : /* Code to get the next section goes in L1_BB. */
8620 332 : si = gsi_last_nondebug_bb (l1_bb);
8621 332 : gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8622 :
8623 332 : bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8624 332 : stmt = gimple_build_call (bfn_decl, 0);
8625 332 : gimple_call_set_lhs (stmt, vnext);
8626 332 : gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8627 332 : if (cond_var)
8628 : {
8629 12 : tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8630 12 : vnext, build_one_cst (TREE_TYPE (cond_var)));
8631 12 : expand_omp_build_assign (&si, cond_var, t, false);
8632 : }
8633 332 : gsi_remove (&si, true);
8634 :
8635 332 : single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8636 : }
8637 :
8638 : /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8639 378 : si = gsi_last_nondebug_bb (l2_bb);
8640 378 : if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8641 251 : t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8642 127 : else if (gimple_omp_return_lhs (gsi_stmt (si)))
8643 0 : t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8644 : else
8645 127 : t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8646 378 : stmt = gimple_build_call (t, 0);
8647 378 : if (gimple_omp_return_lhs (gsi_stmt (si)))
8648 0 : gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8649 378 : gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8650 378 : gsi_remove (&si, true);
8651 :
8652 378 : set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8653 378 : }
8654 :
8655 : /* Expand code for an OpenMP single or scope directive. We've already expanded
8656 : much of the code, here we simply place the GOMP_barrier call. */
8657 :
8658 : static void
8659 1252 : expand_omp_single (struct omp_region *region)
8660 : {
8661 1252 : basic_block entry_bb, exit_bb;
8662 1252 : gimple_stmt_iterator si;
8663 :
8664 1252 : entry_bb = region->entry;
8665 1252 : exit_bb = region->exit;
8666 :
8667 1252 : si = gsi_last_nondebug_bb (entry_bb);
8668 1252 : enum gimple_code code = gimple_code (gsi_stmt (si));
8669 1252 : gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8670 1252 : gsi_remove (&si, true);
8671 1252 : single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8672 :
8673 1252 : if (exit_bb == NULL)
8674 : {
8675 8 : gcc_assert (code == GIMPLE_OMP_SCOPE);
8676 8 : return;
8677 : }
8678 :
8679 1244 : si = gsi_last_nondebug_bb (exit_bb);
8680 1244 : if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8681 : {
8682 973 : tree t = gimple_omp_return_lhs (gsi_stmt (si));
8683 973 : gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8684 : }
8685 1244 : gsi_remove (&si, true);
8686 1244 : single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8687 : }
8688 :
8689 : /* Generic expansion for OpenMP synchronization directives: master,
8690 : ordered and critical. All we need to do here is remove the entry
8691 : and exit markers for REGION. */
8692 :
8693 : static void
8694 10779 : expand_omp_synch (struct omp_region *region)
8695 : {
8696 10779 : basic_block entry_bb, exit_bb;
8697 10779 : gimple_stmt_iterator si;
8698 :
8699 10779 : entry_bb = region->entry;
8700 10779 : exit_bb = region->exit;
8701 :
8702 10779 : si = gsi_last_nondebug_bb (entry_bb);
8703 10779 : gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8704 : || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8705 : || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8706 : || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8707 : || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8708 : || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8709 : || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8710 10779 : if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8711 10779 : && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8712 : {
8713 2496 : expand_omp_taskreg (region);
8714 2496 : return;
8715 : }
8716 8283 : gsi_remove (&si, true);
8717 8283 : single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8718 :
8719 8283 : if (exit_bb)
8720 : {
8721 7725 : si = gsi_last_nondebug_bb (exit_bb);
8722 7725 : gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8723 7725 : gsi_remove (&si, true);
8724 7725 : single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8725 : }
8726 : }
8727 :
8728 : /* Translate enum omp_memory_order to enum memmodel for the embedded
8729 : fail clause in there. */
8730 :
8731 : static enum memmodel
8732 2936 : omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8733 : {
8734 2936 : switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8735 : {
8736 2665 : case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8737 2665 : switch (mo & OMP_MEMORY_ORDER_MASK)
8738 : {
8739 : case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8740 : case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8741 : case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8742 : case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8743 : case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8744 0 : default: break;
8745 : }
8746 0 : gcc_unreachable ();
8747 : case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8748 : case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8749 : case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8750 0 : default: gcc_unreachable ();
8751 : }
8752 : }
8753 :
8754 : /* Translate enum omp_memory_order to enum memmodel. The two enums
8755 : are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8756 : is 0 and omp_memory_order has the fail mode encoded in it too. */
8757 :
8758 : static enum memmodel
8759 10045 : omp_memory_order_to_memmodel (enum omp_memory_order mo)
8760 : {
8761 10045 : enum memmodel ret, fail_ret;
8762 10045 : switch (mo & OMP_MEMORY_ORDER_MASK)
8763 : {
8764 : case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8765 : case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8766 : case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8767 : case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8768 : case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8769 0 : default: gcc_unreachable ();
8770 : }
8771 : /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8772 : we can just return ret here unconditionally. Otherwise, work around
8773 : it here and make sure fail memmodel is not stronger. */
8774 10045 : if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8775 : return ret;
8776 142 : fail_ret = omp_memory_order_to_fail_memmodel (mo);
8777 142 : if (fail_ret > ret)
8778 13 : return fail_ret;
8779 : return ret;
8780 : }
8781 :
8782 : /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8783 : operation as a normal volatile load. */
8784 :
8785 : static bool
8786 1183 : expand_omp_atomic_load (basic_block load_bb, tree addr,
8787 : tree loaded_val, int index)
8788 : {
8789 1183 : enum built_in_function tmpbase;
8790 1183 : gimple_stmt_iterator gsi;
8791 1183 : basic_block store_bb;
8792 1183 : location_t loc;
8793 1183 : gimple *stmt;
8794 1183 : tree decl, type, itype;
8795 :
8796 1183 : gsi = gsi_last_nondebug_bb (load_bb);
8797 1183 : stmt = gsi_stmt (gsi);
8798 1183 : gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8799 1183 : loc = gimple_location (stmt);
8800 :
8801 : /* ??? If the target does not implement atomic_load_optab[mode], and mode
8802 : is smaller than word size, then expand_atomic_load assumes that the load
8803 : is atomic. We could avoid the builtin entirely in this case. */
8804 :
8805 1183 : tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8806 1183 : decl = builtin_decl_explicit (tmpbase);
8807 1183 : if (decl == NULL_TREE)
8808 : return false;
8809 :
8810 1183 : type = TREE_TYPE (loaded_val);
8811 1183 : itype = TREE_TYPE (TREE_TYPE (decl));
8812 :
8813 1183 : enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8814 1183 : tree mo = build_int_cst (integer_type_node,
8815 1183 : omp_memory_order_to_memmodel (omo));
8816 1183 : gcall *call = gimple_build_call (decl, 2, addr, mo);
8817 1183 : gimple_set_location (call, loc);
8818 2366 : gimple_set_vuse (call, gimple_vuse (stmt));
8819 1183 : gimple *repl;
8820 1183 : if (!useless_type_conversion_p (type, itype))
8821 : {
8822 1181 : tree lhs = make_ssa_name (itype);
8823 1181 : gimple_call_set_lhs (call, lhs);
8824 1181 : gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8825 1181 : repl = gimple_build_assign (loaded_val,
8826 : build1 (VIEW_CONVERT_EXPR, type, lhs));
8827 1181 : gimple_set_location (repl, loc);
8828 : }
8829 : else
8830 : {
8831 2 : gimple_call_set_lhs (call, loaded_val);
8832 : repl = call;
8833 : }
8834 1183 : gsi_replace (&gsi, repl, true);
8835 :
8836 1183 : store_bb = single_succ (load_bb);
8837 1183 : gsi = gsi_last_nondebug_bb (store_bb);
8838 1183 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8839 1183 : gsi_remove (&gsi, true);
8840 :
8841 1183 : return true;
8842 : }
8843 :
8844 : /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8845 : operation as a normal volatile store. */
8846 :
8847 : static bool
8848 856 : expand_omp_atomic_store (basic_block load_bb, tree addr,
8849 : tree loaded_val, tree stored_val, int index)
8850 : {
8851 856 : enum built_in_function tmpbase;
8852 856 : gimple_stmt_iterator gsi;
8853 856 : basic_block store_bb = single_succ (load_bb);
8854 856 : location_t loc;
8855 856 : gimple *stmt;
8856 856 : tree decl, type, itype;
8857 856 : machine_mode imode;
8858 856 : bool exchange;
8859 :
8860 856 : gsi = gsi_last_nondebug_bb (load_bb);
8861 856 : stmt = gsi_stmt (gsi);
8862 856 : gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8863 :
8864 : /* If the load value is needed, then this isn't a store but an exchange. */
8865 856 : exchange = gimple_omp_atomic_need_value_p (stmt);
8866 :
8867 856 : gsi = gsi_last_nondebug_bb (store_bb);
8868 856 : stmt = gsi_stmt (gsi);
8869 856 : gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8870 856 : loc = gimple_location (stmt);
8871 :
8872 : /* ??? If the target does not implement atomic_store_optab[mode], and mode
8873 : is smaller than word size, then expand_atomic_store assumes that the store
8874 : is atomic. We could avoid the builtin entirely in this case. */
8875 :
8876 856 : tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8877 856 : tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8878 856 : decl = builtin_decl_explicit (tmpbase);
8879 856 : if (decl == NULL_TREE)
8880 : return false;
8881 :
8882 856 : type = TREE_TYPE (stored_val);
8883 :
8884 : /* Dig out the type of the function's second argument. */
8885 856 : itype = TREE_TYPE (decl);
8886 856 : itype = TYPE_ARG_TYPES (itype);
8887 856 : itype = TREE_CHAIN (itype);
8888 856 : itype = TREE_VALUE (itype);
8889 856 : imode = TYPE_MODE (itype);
8890 :
8891 856 : if (exchange && !can_atomic_exchange_p (imode, true))
8892 : return false;
8893 :
8894 856 : if (!useless_type_conversion_p (itype, type))
8895 856 : stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8896 856 : enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8897 856 : tree mo = build_int_cst (integer_type_node,
8898 856 : omp_memory_order_to_memmodel (omo));
8899 856 : stored_val = force_gimple_operand_gsi (&gsi, stored_val, true, NULL_TREE,
8900 : true, GSI_SAME_STMT);
8901 856 : gcall *call = gimple_build_call (decl, 3, addr, stored_val, mo);
8902 856 : gimple_set_location (call, loc);
8903 1712 : gimple_set_vuse (call, gimple_vuse (stmt));
8904 1712 : gimple_set_vdef (call, gimple_vdef (stmt));
8905 :
8906 856 : gimple *repl = call;
8907 856 : if (exchange)
8908 : {
8909 88 : if (!useless_type_conversion_p (type, itype))
8910 : {
8911 88 : tree lhs = make_ssa_name (itype);
8912 88 : gimple_call_set_lhs (call, lhs);
8913 88 : gsi_insert_before (&gsi, call, GSI_SAME_STMT);
8914 88 : repl = gimple_build_assign (loaded_val,
8915 : build1 (VIEW_CONVERT_EXPR, type, lhs));
8916 88 : gimple_set_location (repl, loc);
8917 : }
8918 : else
8919 0 : gimple_call_set_lhs (call, loaded_val);
8920 : }
8921 856 : gsi_replace (&gsi, repl, true);
8922 :
8923 : /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8924 856 : gsi = gsi_last_nondebug_bb (load_bb);
8925 856 : gsi_remove (&gsi, true);
8926 :
8927 856 : return true;
8928 : }
8929 :
8930 : /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8931 : operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8932 : size of the data type, and thus usable to find the index of the builtin
8933 : decl. Returns false if the expression is not of the proper form. */
8934 :
8935 : static bool
8936 5879 : expand_omp_atomic_fetch_op (basic_block load_bb,
8937 : tree addr, tree loaded_val,
8938 : tree stored_val, int index)
8939 : {
8940 5879 : enum built_in_function oldbase, newbase, tmpbase;
8941 5879 : tree decl, itype, call;
8942 5879 : tree lhs, rhs;
8943 5879 : basic_block store_bb = single_succ (load_bb);
8944 5879 : gimple_stmt_iterator gsi;
8945 5879 : gimple *stmt;
8946 5879 : location_t loc;
8947 5879 : enum tree_code code;
8948 5879 : bool need_old, need_new;
8949 5879 : machine_mode imode;
8950 :
8951 : /* We expect to find the following sequences:
8952 :
8953 : load_bb:
8954 : GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8955 :
8956 : store_bb:
8957 : val = tmp OP something; (or: something OP tmp)
8958 : GIMPLE_OMP_STORE (val)
8959 :
8960 : ???FIXME: Allow a more flexible sequence.
8961 : Perhaps use data flow to pick the statements.
8962 :
8963 : */
8964 :
8965 5879 : gsi = gsi_after_labels (store_bb);
8966 5879 : stmt = gsi_stmt (gsi);
8967 5879 : if (is_gimple_debug (stmt))
8968 : {
8969 0 : gsi_next_nondebug (&gsi);
8970 0 : if (gsi_end_p (gsi))
8971 : return false;
8972 5879 : stmt = gsi_stmt (gsi);
8973 : }
8974 5879 : loc = gimple_location (stmt);
8975 5879 : if (!is_gimple_assign (stmt))
8976 : return false;
8977 5879 : gsi_next_nondebug (&gsi);
8978 5879 : if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8979 : return false;
8980 5212 : need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8981 5212 : need_old = gimple_omp_atomic_need_value_p (last_nondebug_stmt (load_bb));
8982 5212 : enum omp_memory_order omo
8983 5212 : = gimple_omp_atomic_memory_order (last_nondebug_stmt (load_bb));
8984 5212 : enum memmodel mo = omp_memory_order_to_memmodel (omo);
8985 5212 : gcc_checking_assert (!need_old || !need_new);
8986 :
8987 5212 : if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8988 : return false;
8989 :
8990 : /* Check for one of the supported fetch-op operations. */
8991 5212 : code = gimple_assign_rhs_code (stmt);
8992 5212 : switch (code)
8993 : {
8994 : case PLUS_EXPR:
8995 : case POINTER_PLUS_EXPR:
8996 : oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8997 : newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8998 : break;
8999 79 : case MINUS_EXPR:
9000 79 : oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
9001 79 : newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
9002 79 : break;
9003 112 : case BIT_AND_EXPR:
9004 112 : oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
9005 112 : newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
9006 112 : break;
9007 227 : case BIT_IOR_EXPR:
9008 227 : oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
9009 227 : newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
9010 227 : break;
9011 111 : case BIT_XOR_EXPR:
9012 111 : oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
9013 111 : newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
9014 111 : break;
9015 : default:
9016 : return false;
9017 : }
9018 :
9019 : /* Make sure the expression is of the proper form. */
9020 4575 : if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
9021 4393 : rhs = gimple_assign_rhs2 (stmt);
9022 182 : else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
9023 182 : && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
9024 139 : rhs = gimple_assign_rhs1 (stmt);
9025 : else
9026 43 : return false;
9027 :
9028 9064 : tmpbase = ((enum built_in_function)
9029 4532 : ((need_new ? newbase : oldbase) + index + 1));
9030 4532 : decl = builtin_decl_explicit (tmpbase);
9031 4532 : if (decl == NULL_TREE)
9032 : return false;
9033 4532 : itype = TREE_TYPE (TREE_TYPE (decl));
9034 4532 : imode = TYPE_MODE (itype);
9035 :
9036 : /* We could test all of the various optabs involved, but the fact of the
9037 : matter is that (with the exception of i486 vs i586 and xadd) all targets
9038 : that support any atomic operaton optab also implements compare-and-swap.
9039 : Let optabs.cc take care of expanding any compare-and-swap loop. */
9040 4532 : if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
9041 0 : return false;
9042 :
9043 4532 : gsi = gsi_last_nondebug_bb (load_bb);
9044 4532 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
9045 :
9046 : /* OpenMP does not imply any barrier-like semantics on its atomic ops.
9047 : It only requires that the operation happen atomically. Thus we can
9048 : use the RELAXED memory model. */
9049 4532 : call = build_call_expr_loc (loc, decl, 3, addr,
9050 : fold_convert_loc (loc, itype, rhs),
9051 4532 : build_int_cst (NULL, mo));
9052 :
9053 4532 : if (need_old || need_new)
9054 : {
9055 552 : lhs = need_old ? loaded_val : stored_val;
9056 552 : call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
9057 552 : call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
9058 : }
9059 : else
9060 3980 : call = fold_convert_loc (loc, void_type_node, call);
9061 4532 : force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
9062 4532 : gsi_remove (&gsi, true);
9063 :
9064 4532 : gsi = gsi_last_nondebug_bb (store_bb);
9065 4532 : gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
9066 4532 : gsi_remove (&gsi, true);
9067 4532 : gsi = gsi_last_nondebug_bb (store_bb);
9068 4532 : stmt = gsi_stmt (gsi);
9069 4532 : gsi_remove (&gsi, true);
9070 :
9071 4532 : if (gimple_in_ssa_p (cfun))
9072 29 : release_defs (stmt);
9073 :
9074 : return true;
9075 : }
9076 :
9077 : /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
9078 : compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
9079 : Returns false if the expression is not of the proper form. */
9080 :
9081 : static bool
9082 2213 : expand_omp_atomic_cas (basic_block load_bb, tree addr,
9083 : tree loaded_val, tree stored_val, int index)
9084 : {
9085 : /* We expect to find the following sequences:
9086 :
9087 : load_bb:
9088 : GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
9089 :
9090 : store_bb:
9091 : val = tmp == e ? d : tmp;
9092 : GIMPLE_OMP_ATOMIC_STORE (val)
9093 :
9094 : or in store_bb instead:
9095 : tmp2 = tmp == e;
9096 : val = tmp2 ? d : tmp;
9097 : GIMPLE_OMP_ATOMIC_STORE (val)
9098 :
9099 : or:
9100 : tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
9101 : val = e == tmp3 ? d : tmp;
9102 : GIMPLE_OMP_ATOMIC_STORE (val)
9103 :
9104 : etc. */
9105 :
9106 :
9107 2213 : basic_block store_bb = single_succ (load_bb);
9108 2213 : gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
9109 2213 : gimple *store_stmt = gsi_stmt (gsi);
9110 2213 : if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
9111 : return false;
9112 2213 : gsi_prev_nondebug (&gsi);
9113 2213 : if (gsi_end_p (gsi))
9114 : return false;
9115 2207 : gimple *condexpr_stmt = gsi_stmt (gsi);
9116 2207 : if (!is_gimple_assign (condexpr_stmt)
9117 2207 : || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
9118 : return false;
9119 408 : if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
9120 : return false;
9121 408 : gimple *cond_stmt = NULL;
9122 408 : gimple *vce_stmt = NULL;
9123 408 : gsi_prev_nondebug (&gsi);
9124 408 : if (!gsi_end_p (gsi))
9125 : {
9126 408 : cond_stmt = gsi_stmt (gsi);
9127 408 : if (!is_gimple_assign (cond_stmt))
9128 : return false;
9129 408 : if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
9130 : {
9131 323 : gsi_prev_nondebug (&gsi);
9132 323 : if (!gsi_end_p (gsi))
9133 : {
9134 89 : vce_stmt = gsi_stmt (gsi);
9135 89 : if (!is_gimple_assign (vce_stmt)
9136 89 : || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
9137 : return false;
9138 : }
9139 : }
9140 85 : else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
9141 : std::swap (vce_stmt, cond_stmt);
9142 : else
9143 : return false;
9144 69 : if (vce_stmt)
9145 : {
9146 69 : tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
9147 69 : if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
9148 69 : || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
9149 12 : return false;
9150 114 : if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
9151 57 : || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
9152 114 : || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
9153 57 : TYPE_SIZE (TREE_TYPE (loaded_val))))
9154 0 : return false;
9155 57 : gsi_prev_nondebug (&gsi);
9156 57 : if (!gsi_end_p (gsi))
9157 : return false;
9158 : }
9159 : }
9160 291 : tree cond = gimple_assign_rhs1 (condexpr_stmt);
9161 291 : tree cond_op1, cond_op2;
9162 291 : if (cond_stmt)
9163 : {
9164 : /* We should now always get a separate cond_stmt. */
9165 291 : if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
9166 : return false;
9167 291 : cond_op1 = gimple_assign_rhs1 (cond_stmt);
9168 291 : cond_op2 = gimple_assign_rhs2 (cond_stmt);
9169 : }
9170 0 : else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9171 : return false;
9172 : else
9173 : {
9174 0 : cond_op1 = TREE_OPERAND (cond, 0);
9175 0 : cond_op2 = TREE_OPERAND (cond, 1);
9176 : }
9177 291 : tree d;
9178 291 : if (TREE_CODE (cond) == NE_EXPR)
9179 : {
9180 0 : if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
9181 : return false;
9182 0 : d = gimple_assign_rhs3 (condexpr_stmt);
9183 : }
9184 291 : else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9185 : return false;
9186 : else
9187 291 : d = gimple_assign_rhs2 (condexpr_stmt);
9188 291 : tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9189 291 : if (operand_equal_p (e, cond_op1))
9190 : e = cond_op2;
9191 21 : else if (operand_equal_p (e, cond_op2))
9192 : e = cond_op1;
9193 : else
9194 : return false;
9195 :
9196 291 : location_t loc = gimple_location (store_stmt);
9197 291 : gimple *load_stmt = last_nondebug_stmt (load_bb);
9198 291 : bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9199 291 : bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9200 291 : bool weak = gimple_omp_atomic_weak_p (load_stmt);
9201 291 : enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9202 291 : tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9203 291 : tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9204 291 : gcc_checking_assert (!need_old || !need_new);
9205 :
9206 291 : enum built_in_function fncode
9207 : = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9208 291 : + index + 1);
9209 291 : tree cmpxchg = builtin_decl_explicit (fncode);
9210 291 : if (cmpxchg == NULL_TREE)
9211 : return false;
9212 291 : tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9213 :
9214 291 : if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9215 291 : || !can_atomic_load_p (TYPE_MODE (itype)))
9216 1 : return false;
9217 :
9218 290 : tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9219 290 : if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9220 : return false;
9221 :
9222 289 : gsi = gsi_for_stmt (store_stmt);
9223 289 : if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9224 : {
9225 232 : tree ne = create_tmp_reg (itype);
9226 232 : gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9227 232 : gimple_set_location (g, loc);
9228 232 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9229 232 : e = ne;
9230 : }
9231 289 : if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9232 : {
9233 289 : tree nd = create_tmp_reg (itype);
9234 289 : enum tree_code code;
9235 289 : if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9236 : {
9237 57 : code = VIEW_CONVERT_EXPR;
9238 57 : d = build1 (VIEW_CONVERT_EXPR, itype, d);
9239 : }
9240 : else
9241 : code = NOP_EXPR;
9242 289 : gimple *g = gimple_build_assign (nd, code, d);
9243 289 : gimple_set_location (g, loc);
9244 289 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9245 289 : d = nd;
9246 : }
9247 :
9248 289 : tree ctype = build_complex_type (itype);
9249 289 : int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9250 289 : gimple *g
9251 289 : = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9252 289 : build_int_cst (integer_type_node, flag),
9253 : mo, fmo);
9254 289 : tree cres = create_tmp_reg (ctype);
9255 289 : gimple_call_set_lhs (g, cres);
9256 289 : gimple_set_location (g, loc);
9257 289 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9258 :
9259 289 : if (cond_stmt || need_old || need_new)
9260 : {
9261 289 : tree im = create_tmp_reg (itype);
9262 289 : g = gimple_build_assign (im, IMAGPART_EXPR,
9263 : build1 (IMAGPART_EXPR, itype, cres));
9264 289 : gimple_set_location (g, loc);
9265 289 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9266 :
9267 289 : tree re = NULL_TREE;
9268 289 : if (need_old || need_new)
9269 : {
9270 199 : re = create_tmp_reg (itype);
9271 199 : g = gimple_build_assign (re, REALPART_EXPR,
9272 : build1 (REALPART_EXPR, itype, cres));
9273 199 : gimple_set_location (g, loc);
9274 199 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9275 : }
9276 :
9277 289 : if (cond_stmt)
9278 : {
9279 289 : g = gimple_build_assign (cond, NOP_EXPR, im);
9280 289 : gimple_set_location (g, loc);
9281 289 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9282 : }
9283 :
9284 289 : if (need_new)
9285 : {
9286 44 : g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9287 : cond_stmt
9288 0 : ? cond : build2 (NE_EXPR, boolean_type_node,
9289 : im, build_zero_cst (itype)),
9290 : d, re);
9291 44 : gimple_set_location (g, loc);
9292 44 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9293 44 : re = gimple_assign_lhs (g);
9294 : }
9295 :
9296 289 : if (need_old || need_new)
9297 : {
9298 199 : tree v = need_old ? loaded_val : stored_val;
9299 199 : enum tree_code code;
9300 199 : if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9301 : {
9302 38 : code = VIEW_CONVERT_EXPR;
9303 38 : re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9304 : }
9305 161 : else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9306 : code = NOP_EXPR;
9307 : else
9308 0 : code = TREE_CODE (re);
9309 199 : g = gimple_build_assign (v, code, re);
9310 199 : gimple_set_location (g, loc);
9311 199 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9312 : }
9313 : }
9314 :
9315 289 : gsi_remove (&gsi, true);
9316 289 : gsi = gsi_for_stmt (load_stmt);
9317 289 : gsi_remove (&gsi, true);
9318 289 : gsi = gsi_for_stmt (condexpr_stmt);
9319 289 : gsi_remove (&gsi, true);
9320 289 : if (cond_stmt)
9321 : {
9322 289 : gsi = gsi_for_stmt (cond_stmt);
9323 289 : gsi_remove (&gsi, true);
9324 : }
9325 289 : if (vce_stmt)
9326 : {
9327 57 : gsi = gsi_for_stmt (vce_stmt);
9328 57 : gsi_remove (&gsi, true);
9329 : }
9330 :
9331 : return true;
9332 : }
9333 :
9334 : /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9335 :
9336 : oldval = *addr;
9337 : repeat:
9338 : newval = rhs; // with oldval replacing *addr in rhs
9339 : oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9340 : if (oldval != newval)
9341 : goto repeat;
9342 :
9343 : INDEX is log2 of the size of the data type, and thus usable to find the
9344 : index of the builtin decl. */
9345 :
9346 : static bool
9347 2582 : expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9348 : tree addr, tree loaded_val, tree stored_val,
9349 : int index)
9350 : {
9351 2582 : tree loadedi, storedi, initial, new_storedi, old_vali;
9352 2582 : tree type, itype, cmpxchg, iaddr, atype;
9353 2582 : gimple_stmt_iterator si;
9354 2582 : basic_block loop_header = single_succ (load_bb);
9355 2582 : gimple *phi, *stmt;
9356 2582 : edge e;
9357 2582 : enum built_in_function fncode;
9358 :
9359 2582 : fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9360 2582 : + index + 1);
9361 2582 : cmpxchg = builtin_decl_explicit (fncode);
9362 2582 : if (cmpxchg == NULL_TREE)
9363 : return false;
9364 2582 : type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9365 2582 : atype = type;
9366 2582 : itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9367 :
9368 2582 : if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9369 2582 : || !can_atomic_load_p (TYPE_MODE (itype)))
9370 79 : return false;
9371 :
9372 : /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
9373 2503 : si = gsi_last_nondebug_bb (load_bb);
9374 2503 : gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9375 2503 : location_t loc = gimple_location (gsi_stmt (si));
9376 2503 : enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9377 2503 : tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9378 2503 : tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9379 :
9380 : /* For floating-point values, we'll need to view-convert them to integers
9381 : so that we can perform the atomic compare and swap. Simplify the
9382 : following code by always setting up the "i"ntegral variables. */
9383 2503 : if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9384 : {
9385 845 : tree iaddr_val;
9386 :
9387 845 : iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9388 : true));
9389 845 : atype = itype;
9390 845 : iaddr_val
9391 845 : = force_gimple_operand_gsi (&si,
9392 845 : fold_convert (TREE_TYPE (iaddr), addr),
9393 : false, NULL_TREE, true, GSI_SAME_STMT);
9394 845 : stmt = gimple_build_assign (iaddr, iaddr_val);
9395 845 : gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9396 845 : loadedi = create_tmp_var (itype);
9397 845 : if (gimple_in_ssa_p (cfun))
9398 15 : loadedi = make_ssa_name (loadedi);
9399 : }
9400 : else
9401 : {
9402 : iaddr = addr;
9403 : loadedi = loaded_val;
9404 : }
9405 :
9406 2503 : fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9407 2503 : tree loaddecl = builtin_decl_explicit (fncode);
9408 2503 : if (loaddecl)
9409 2503 : initial
9410 2503 : = fold_convert (atype,
9411 : build_call_expr (loaddecl, 2, iaddr,
9412 : build_int_cst (NULL_TREE,
9413 : MEMMODEL_RELAXED)));
9414 : else
9415 : {
9416 0 : tree off
9417 0 : = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9418 : true), 0);
9419 0 : initial = build2 (MEM_REF, atype, iaddr, off);
9420 : }
9421 :
9422 2503 : initial
9423 2503 : = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9424 : GSI_SAME_STMT);
9425 :
9426 : /* Move the value to the LOADEDI temporary. */
9427 2503 : if (gimple_in_ssa_p (cfun))
9428 : {
9429 39 : gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9430 39 : phi = create_phi_node (loadedi, loop_header);
9431 39 : SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9432 : initial);
9433 : }
9434 : else
9435 2464 : gsi_insert_before (&si,
9436 2464 : gimple_build_assign (loadedi, initial),
9437 : GSI_SAME_STMT);
9438 2503 : if (loadedi != loaded_val)
9439 : {
9440 845 : gimple_stmt_iterator gsi2;
9441 845 : tree x;
9442 :
9443 845 : x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9444 845 : gsi2 = gsi_start_bb (loop_header);
9445 845 : if (gimple_in_ssa_p (cfun))
9446 : {
9447 15 : gassign *stmt;
9448 15 : x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9449 : true, GSI_SAME_STMT);
9450 15 : stmt = gimple_build_assign (loaded_val, x);
9451 15 : gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9452 : }
9453 : else
9454 : {
9455 830 : x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9456 830 : force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9457 : true, GSI_SAME_STMT);
9458 : }
9459 : }
9460 2503 : gsi_remove (&si, true);
9461 :
9462 2503 : si = gsi_last_nondebug_bb (store_bb);
9463 2503 : gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9464 :
9465 2503 : if (iaddr == addr)
9466 : storedi = stored_val;
9467 : else
9468 845 : storedi
9469 845 : = force_gimple_operand_gsi (&si,
9470 : build1 (VIEW_CONVERT_EXPR, itype,
9471 : stored_val), true, NULL_TREE, true,
9472 : GSI_SAME_STMT);
9473 :
9474 : /* Build the compare&swap statement. */
9475 2503 : tree ctype = build_complex_type (itype);
9476 2503 : int flag = int_size_in_bytes (itype);
9477 2503 : new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9478 : ctype, 6, iaddr, loadedi,
9479 : storedi,
9480 : build_int_cst (integer_type_node,
9481 2503 : flag),
9482 : mo, fmo);
9483 2503 : new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9484 2503 : new_storedi = force_gimple_operand_gsi (&si,
9485 2503 : fold_convert (TREE_TYPE (loadedi),
9486 : new_storedi),
9487 : true, NULL_TREE,
9488 : true, GSI_SAME_STMT);
9489 :
9490 2503 : if (gimple_in_ssa_p (cfun))
9491 : old_vali = loadedi;
9492 : else
9493 : {
9494 2464 : old_vali = create_tmp_var (TREE_TYPE (loadedi));
9495 2464 : stmt = gimple_build_assign (old_vali, loadedi);
9496 2464 : gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9497 :
9498 2464 : stmt = gimple_build_assign (loadedi, new_storedi);
9499 2464 : gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9500 : }
9501 :
9502 : /* Note that we always perform the comparison as an integer, even for
9503 : floating point. This allows the atomic operation to properly
9504 : succeed even with NaNs and -0.0. */
9505 2503 : tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9506 2503 : stmt = gimple_build_cond_empty (ne);
9507 2503 : gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9508 :
9509 : /* Update cfg. */
9510 2503 : e = single_succ_edge (store_bb);
9511 2503 : e->flags &= ~EDGE_FALLTHRU;
9512 2503 : e->flags |= EDGE_FALSE_VALUE;
9513 : /* Expect no looping. */
9514 2503 : e->probability = profile_probability::guessed_always ();
9515 :
9516 2503 : e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9517 2503 : e->probability = profile_probability::guessed_never ();
9518 :
9519 : /* Copy the new value to loadedi (we already did that before the condition
9520 : if we are not in SSA). */
9521 2503 : if (gimple_in_ssa_p (cfun))
9522 : {
9523 39 : phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9524 39 : SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9525 : }
9526 :
9527 : /* Remove GIMPLE_OMP_ATOMIC_STORE. */
9528 2503 : stmt = gsi_stmt (si);
9529 2503 : gsi_remove (&si, true);
9530 2503 : if (gimple_in_ssa_p (cfun))
9531 39 : release_defs (stmt);
9532 :
9533 2503 : class loop *loop = alloc_loop ();
9534 2503 : loop->header = loop_header;
9535 2503 : loop->latch = store_bb;
9536 2503 : add_loop (loop, loop_header->loop_father);
9537 :
9538 2503 : return true;
9539 : }
9540 :
9541 : /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
9542 :
9543 : GOMP_atomic_start ();
9544 : *addr = rhs;
9545 : GOMP_atomic_end ();
9546 :
9547 : The result is not globally atomic, but works so long as all parallel
9548 : references are within #pragma omp atomic directives. According to
9549 : responses received from omp@openmp.org, appears to be within spec.
9550 : Which makes sense, since that's how several other compilers handle
9551 : this situation as well.
9552 : LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9553 : expanding. STORED_VAL is the operand of the matching
9554 : GIMPLE_OMP_ATOMIC_STORE.
9555 :
9556 : We replace
9557 : GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9558 : loaded_val = *addr;
9559 :
9560 : and replace
9561 : GIMPLE_OMP_ATOMIC_STORE (stored_val) with
9562 : *addr = stored_val;
9563 : */
9564 :
9565 : static bool
9566 289 : expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9567 : tree addr, tree loaded_val, tree stored_val)
9568 : {
9569 289 : gimple_stmt_iterator si;
9570 289 : gassign *stmt;
9571 289 : tree t;
9572 :
9573 289 : si = gsi_last_nondebug_bb (load_bb);
9574 289 : gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9575 :
9576 289 : t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9577 289 : t = build_call_expr (t, 0);
9578 289 : force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9579 :
9580 289 : tree mem = build_simple_mem_ref (addr);
9581 289 : TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9582 289 : TREE_OPERAND (mem, 1)
9583 289 : = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9584 : true),
9585 : TREE_OPERAND (mem, 1));
9586 289 : stmt = gimple_build_assign (loaded_val, mem);
9587 289 : gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9588 289 : gsi_remove (&si, true);
9589 :
9590 289 : si = gsi_last_nondebug_bb (store_bb);
9591 289 : gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9592 :
9593 289 : stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9594 578 : gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (si)));
9595 578 : gimple_set_vdef (stmt, gimple_vdef (gsi_stmt (si)));
9596 289 : gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9597 :
9598 289 : t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9599 289 : t = build_call_expr (t, 0);
9600 289 : force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9601 289 : gsi_remove (&si, true);
9602 289 : return true;
9603 : }
9604 :
9605 : /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9606 : using expand_omp_atomic_fetch_op. If it failed, we try to
9607 : call expand_omp_atomic_pipeline, and if it fails too, the
9608 : ultimate fallback is wrapping the operation in a mutex
9609 : (expand_omp_atomic_mutex). REGION is the atomic region built
9610 : by build_omp_regions_1(). */
9611 :
9612 : static void
9613 9652 : expand_omp_atomic (struct omp_region *region)
9614 : {
9615 9652 : basic_block load_bb = region->entry, store_bb = region->exit;
9616 9652 : gomp_atomic_load *load
9617 9652 : = as_a <gomp_atomic_load *> (last_nondebug_stmt (load_bb));
9618 9652 : gomp_atomic_store *store
9619 9652 : = as_a <gomp_atomic_store *> (last_nondebug_stmt (store_bb));
9620 9652 : tree loaded_val = gimple_omp_atomic_load_lhs (load);
9621 9652 : tree addr = gimple_omp_atomic_load_rhs (load);
9622 9652 : tree stored_val = gimple_omp_atomic_store_val (store);
9623 9652 : tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9624 9652 : HOST_WIDE_INT index;
9625 :
9626 : /* Make sure the type is one of the supported sizes. */
9627 9652 : index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9628 9652 : index = exact_log2 (index);
9629 9618 : if (index >= 0 && index <= 4)
9630 : {
9631 9618 : unsigned int align = TYPE_ALIGN_UNIT (type);
9632 :
9633 : /* __sync builtins require strict data alignment. */
9634 19236 : if (exact_log2 (align) >= index)
9635 : {
9636 : /* Atomic load. */
9637 9442 : scalar_mode smode;
9638 9442 : if (loaded_val == stored_val
9639 1187 : && (is_int_mode (TYPE_MODE (type), &smode)
9640 1337 : || is_float_mode (TYPE_MODE (type), &smode))
9641 1187 : && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9642 10625 : && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9643 9652 : return;
9644 :
9645 : /* Atomic store. */
9646 8259 : if ((is_int_mode (TYPE_MODE (type), &smode)
9647 9304 : || is_float_mode (TYPE_MODE (type), &smode))
9648 8259 : && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9649 8180 : && store_bb == single_succ (load_bb)
9650 7565 : && first_stmt (store_bb) == store
9651 856 : && expand_omp_atomic_store (load_bb, addr, loaded_val,
9652 : stored_val, index))
9653 : return;
9654 :
9655 : /* When possible, use specialized atomic update functions. */
9656 985 : if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9657 6426 : && store_bb == single_succ (load_bb)
9658 13282 : && expand_omp_atomic_fetch_op (load_bb, addr,
9659 : loaded_val, stored_val, index))
9660 : return;
9661 :
9662 : /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */
9663 2871 : if (store_bb == single_succ (load_bb)
9664 2252 : && !gimple_in_ssa_p (cfun)
9665 5084 : && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9666 : index))
9667 : return;
9668 :
9669 : /* If we don't have specialized __sync builtins, try and implement
9670 : as a compare and swap loop. */
9671 2582 : if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9672 : loaded_val, stored_val, index))
9673 : return;
9674 : }
9675 : }
9676 :
9677 : /* The ultimate fallback is wrapping the operation in a mutex. */
9678 289 : expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9679 : }
9680 :
9681 : /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9682 : at REGION_EXIT. */
9683 :
9684 : static void
9685 1651 : mark_loops_in_oacc_kernels_region (basic_block region_entry,
9686 : basic_block region_exit)
9687 : {
9688 1651 : class loop *outer = region_entry->loop_father;
9689 1651 : gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9690 :
9691 : /* Don't parallelize the kernels region if it contains more than one outer
9692 : loop. */
9693 1651 : unsigned int nr_outer_loops = 0;
9694 1651 : class loop *single_outer = NULL;
9695 33062 : for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9696 : {
9697 31411 : gcc_assert (loop_outer (loop) == outer);
9698 :
9699 31411 : if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9700 23379 : continue;
9701 :
9702 14588 : if (region_exit != NULL
9703 8032 : && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9704 6556 : continue;
9705 :
9706 1476 : nr_outer_loops++;
9707 1476 : single_outer = loop;
9708 : }
9709 1651 : if (nr_outer_loops != 1)
9710 : return;
9711 :
9712 1336 : for (class loop *loop = single_outer->inner;
9713 2293 : loop != NULL;
9714 957 : loop = loop->inner)
9715 1045 : if (loop->next)
9716 : return;
9717 :
9718 : /* Mark the loops in the region. */
9719 3269 : for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9720 2021 : loop->in_oacc_kernels_region = true;
9721 : }
9722 :
9723 : /* Build target argument identifier from the DEVICE identifier, value
9724 : identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9725 :
9726 : static tree
9727 22642 : get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9728 : {
9729 22642 : tree t = build_int_cst (integer_type_node, device);
9730 22642 : if (subseqent_param)
9731 703 : t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9732 : build_int_cst (integer_type_node,
9733 : GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9734 22642 : t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9735 : build_int_cst (integer_type_node, id));
9736 22642 : return t;
9737 : }
9738 :
9739 : /* Like above but return it in type that can be directly stored as an element
9740 : of the argument array. */
9741 :
9742 : static tree
9743 703 : get_target_argument_identifier (int device, bool subseqent_param, int id)
9744 : {
9745 703 : tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9746 703 : return fold_convert (ptr_type_node, t);
9747 : }
9748 :
9749 : /* Return a target argument consisting of DEVICE identifier, value identifier
9750 : ID, and the actual VALUE. */
9751 :
9752 : static tree
9753 21939 : get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9754 : tree value)
9755 : {
9756 21939 : tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9757 : fold_convert (integer_type_node, value),
9758 : build_int_cst (unsigned_type_node,
9759 : GOMP_TARGET_ARG_VALUE_SHIFT));
9760 21939 : t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9761 : get_target_argument_identifier_1 (device, false, id));
9762 21939 : t = fold_convert (ptr_type_node, t);
9763 21939 : return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9764 : }
9765 :
9766 : /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9767 : push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9768 : otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9769 : arguments. */
9770 :
9771 : static void
9772 22642 : push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9773 : int id, tree value, vec <tree> *args)
9774 : {
9775 22642 : if (tree_fits_shwi_p (value)
9776 21939 : && tree_to_shwi (value) > -(1 << 15)
9777 21939 : && tree_to_shwi (value) < (1 << 15))
9778 21939 : args->quick_push (get_target_argument_value (gsi, device, id, value));
9779 : else
9780 : {
9781 703 : args->quick_push (get_target_argument_identifier (device, true, id));
9782 703 : value = fold_convert (ptr_type_node, value);
9783 703 : value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9784 : GSI_SAME_STMT);
9785 703 : args->quick_push (value);
9786 : }
9787 22642 : }
9788 :
9789 : /* Create an array of arguments that is then passed to GOMP_target. */
9790 :
9791 : static tree
9792 11321 : get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9793 : {
9794 11321 : auto_vec <tree, 6> args;
9795 11321 : tree clauses = gimple_omp_target_clauses (tgt_stmt);
9796 11321 : tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9797 11321 : if (c)
9798 11321 : t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9799 : else
9800 0 : t = integer_minus_one_node;
9801 11321 : push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9802 : GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9803 :
9804 11321 : c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9805 11321 : if (c)
9806 11321 : t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9807 : else
9808 0 : t = integer_minus_one_node;
9809 11321 : push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9810 : GOMP_TARGET_ARG_THREAD_LIMIT, t,
9811 : &args);
9812 :
9813 : /* Produce more, perhaps device specific, arguments here. */
9814 :
9815 22642 : tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9816 11321 : args.length () + 1),
9817 : ".omp_target_args");
9818 34666 : for (unsigned i = 0; i < args.length (); i++)
9819 : {
9820 23345 : tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9821 23345 : build_int_cst (integer_type_node, i),
9822 : NULL_TREE, NULL_TREE);
9823 23345 : gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9824 : GSI_SAME_STMT);
9825 : }
9826 22642 : tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9827 11321 : build_int_cst (integer_type_node, args.length ()),
9828 : NULL_TREE, NULL_TREE);
9829 11321 : gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9830 : GSI_SAME_STMT);
9831 11321 : TREE_ADDRESSABLE (argarray) = 1;
9832 11321 : return build_fold_addr_expr (argarray);
9833 11321 : }
9834 :
9835 : /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9836 :
9837 : static void
9838 36447 : expand_omp_target (struct omp_region *region)
9839 : {
9840 36447 : basic_block entry_bb, exit_bb, new_bb;
9841 36447 : struct function *child_cfun;
9842 36447 : tree child_fn, child_fn2, block, t, c;
9843 36447 : gimple_stmt_iterator gsi;
9844 36447 : gomp_target *entry_stmt;
9845 36447 : gimple *stmt;
9846 36447 : edge e;
9847 36447 : bool offloaded;
9848 36447 : int target_kind;
9849 :
9850 36447 : entry_stmt = as_a <gomp_target *> (last_nondebug_stmt (region->entry));
9851 36447 : target_kind = gimple_omp_target_kind (entry_stmt);
9852 36447 : new_bb = region->entry;
9853 :
9854 36447 : offloaded = is_gimple_omp_offloaded (entry_stmt);
9855 36447 : switch (target_kind)
9856 : {
9857 36447 : case GF_OMP_TARGET_KIND_REGION:
9858 36447 : case GF_OMP_TARGET_KIND_UPDATE:
9859 36447 : case GF_OMP_TARGET_KIND_ENTER_DATA:
9860 36447 : case GF_OMP_TARGET_KIND_EXIT_DATA:
9861 36447 : case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9862 36447 : case GF_OMP_TARGET_KIND_OACC_KERNELS:
9863 36447 : case GF_OMP_TARGET_KIND_OACC_SERIAL:
9864 36447 : case GF_OMP_TARGET_KIND_OACC_UPDATE:
9865 36447 : case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9866 36447 : case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9867 36447 : case GF_OMP_TARGET_KIND_OACC_DECLARE:
9868 36447 : case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9869 36447 : case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9870 36447 : case GF_OMP_TARGET_KIND_DATA:
9871 36447 : case GF_OMP_TARGET_KIND_OACC_DATA:
9872 36447 : case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9873 36447 : case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9874 36447 : break;
9875 0 : default:
9876 0 : gcc_unreachable ();
9877 : }
9878 :
9879 36447 : tree clauses = gimple_omp_target_clauses (entry_stmt);
9880 :
9881 36447 : bool is_ancestor = false;
9882 36447 : child_fn = child_fn2 = NULL_TREE;
9883 36447 : child_cfun = NULL;
9884 36447 : if (offloaded)
9885 : {
9886 20716 : c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9887 20716 : if (ENABLE_OFFLOADING && c)
9888 : is_ancestor = OMP_CLAUSE_DEVICE_ANCESTOR (c);
9889 20716 : child_fn = gimple_omp_target_child_fn (entry_stmt);
9890 20716 : child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9891 : }
9892 :
9893 : /* Supported by expand_omp_taskreg, but not here. */
9894 20716 : if (child_cfun != NULL)
9895 20716 : gcc_checking_assert (!child_cfun->cfg);
9896 36447 : gcc_checking_assert (!gimple_in_ssa_p (cfun));
9897 :
9898 36447 : entry_bb = region->entry;
9899 36447 : exit_bb = region->exit;
9900 :
9901 36447 : if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9902 1651 : mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9903 :
9904 : /* Going on, all OpenACC compute constructs are mapped to
9905 : 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9906 : To distinguish between them, we attach attributes. */
9907 36447 : switch (target_kind)
9908 : {
9909 6825 : case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9910 6825 : DECL_ATTRIBUTES (child_fn)
9911 6825 : = tree_cons (get_identifier ("oacc parallel"),
9912 6825 : NULL_TREE, DECL_ATTRIBUTES (child_fn));
9913 6825 : break;
9914 1651 : case GF_OMP_TARGET_KIND_OACC_KERNELS:
9915 1651 : DECL_ATTRIBUTES (child_fn)
9916 1651 : = tree_cons (get_identifier ("oacc kernels"),
9917 1651 : NULL_TREE, DECL_ATTRIBUTES (child_fn));
9918 1651 : break;
9919 756 : case GF_OMP_TARGET_KIND_OACC_SERIAL:
9920 756 : DECL_ATTRIBUTES (child_fn)
9921 756 : = tree_cons (get_identifier ("oacc serial"),
9922 756 : NULL_TREE, DECL_ATTRIBUTES (child_fn));
9923 756 : break;
9924 54 : case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9925 54 : DECL_ATTRIBUTES (child_fn)
9926 54 : = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9927 54 : NULL_TREE, DECL_ATTRIBUTES (child_fn));
9928 54 : break;
9929 109 : case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9930 109 : DECL_ATTRIBUTES (child_fn)
9931 109 : = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9932 109 : NULL_TREE, DECL_ATTRIBUTES (child_fn));
9933 109 : break;
9934 27052 : default:
9935 : /* Make sure we don't miss any. */
9936 27052 : gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9937 : && is_gimple_omp_offloaded (entry_stmt)));
9938 : break;
9939 : }
9940 :
9941 36447 : if (offloaded)
9942 : {
9943 20716 : unsigned srcidx, dstidx, num;
9944 :
9945 : /* If the offloading region needs data sent from the parent
9946 : function, then the very first statement (except possible
9947 : tree profile counter updates) of the offloading body
9948 : is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9949 : &.OMP_DATA_O is passed as an argument to the child function,
9950 : we need to replace it with the argument as seen by the child
9951 : function.
9952 :
9953 : In most cases, this will end up being the identity assignment
9954 : .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9955 : a function call that has been inlined, the original PARM_DECL
9956 : .OMP_DATA_I may have been converted into a different local
9957 : variable. In which case, we need to keep the assignment. */
9958 20716 : tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9959 20716 : if (data_arg)
9960 : {
9961 16101 : basic_block entry_succ_bb = single_succ (entry_bb);
9962 16101 : gimple_stmt_iterator gsi;
9963 16101 : tree arg;
9964 16101 : gimple *tgtcopy_stmt = NULL;
9965 16101 : tree sender = TREE_VEC_ELT (data_arg, 0);
9966 :
9967 32202 : for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9968 : {
9969 16101 : gcc_assert (!gsi_end_p (gsi));
9970 16101 : stmt = gsi_stmt (gsi);
9971 16101 : if (gimple_code (stmt) != GIMPLE_ASSIGN)
9972 0 : continue;
9973 :
9974 16101 : if (gimple_num_ops (stmt) == 2)
9975 : {
9976 16101 : tree arg = gimple_assign_rhs1 (stmt);
9977 :
9978 : /* We're ignoring the subcode because we're
9979 : effectively doing a STRIP_NOPS. */
9980 :
9981 16101 : if ((TREE_CODE (arg) == ADDR_EXPR
9982 15994 : && TREE_OPERAND (arg, 0) == sender)
9983 16101 : || arg == sender)
9984 : {
9985 16101 : tgtcopy_stmt = stmt;
9986 16101 : break;
9987 : }
9988 : }
9989 : }
9990 :
9991 16101 : gcc_assert (tgtcopy_stmt != NULL);
9992 16101 : arg = DECL_ARGUMENTS (child_fn);
9993 :
9994 16101 : gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9995 16101 : gsi_remove (&gsi, true);
9996 : }
9997 :
9998 : /* Declare local variables needed in CHILD_CFUN. */
9999 20716 : block = DECL_INITIAL (child_fn);
10000 20716 : BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
10001 : /* The gimplifier could record temporaries in the offloading block
10002 : rather than in containing function's local_decls chain,
10003 : which would mean cgraph missed finalizing them. Do it now. */
10004 150695 : for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
10005 129979 : if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
10006 0 : varpool_node::finalize_decl (t);
10007 20716 : DECL_SAVED_TREE (child_fn) = NULL;
10008 : /* We'll create a CFG for child_fn, so no gimple body is needed. */
10009 20716 : gimple_set_body (child_fn, NULL);
10010 20716 : TREE_USED (block) = 1;
10011 :
10012 : /* Reset DECL_CONTEXT on function arguments. */
10013 41432 : for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
10014 20716 : DECL_CONTEXT (t) = child_fn;
10015 :
10016 : /* Split ENTRY_BB at GIMPLE_*,
10017 : so that it can be moved to the child function. */
10018 20716 : gsi = gsi_last_nondebug_bb (entry_bb);
10019 20716 : stmt = gsi_stmt (gsi);
10020 20716 : gcc_assert (stmt
10021 : && gimple_code (stmt) == gimple_code (entry_stmt));
10022 20716 : e = split_block (entry_bb, stmt);
10023 20716 : gsi_remove (&gsi, true);
10024 20716 : entry_bb = e->dest;
10025 20716 : single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
10026 :
10027 : /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
10028 20716 : if (exit_bb)
10029 : {
10030 20640 : gsi = gsi_last_nondebug_bb (exit_bb);
10031 20640 : gcc_assert (!gsi_end_p (gsi)
10032 : && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
10033 20640 : stmt = gimple_build_return (NULL);
10034 20640 : gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
10035 20640 : gsi_remove (&gsi, true);
10036 : }
10037 :
10038 : /* Move the offloading region into CHILD_CFUN. */
10039 :
10040 20716 : block = gimple_block (entry_stmt);
10041 :
10042 20716 : new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
10043 20716 : if (exit_bb)
10044 20640 : single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
10045 : /* When the OMP expansion process cannot guarantee an up-to-date
10046 : loop tree arrange for the child function to fixup loops. */
10047 20716 : if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10048 20716 : child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
10049 :
10050 : /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
10051 20716 : num = vec_safe_length (child_cfun->local_decls);
10052 624052 : for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
10053 : {
10054 603336 : t = (*child_cfun->local_decls)[srcidx];
10055 603336 : if (DECL_CONTEXT (t) == cfun->decl)
10056 129979 : continue;
10057 473357 : if (srcidx != dstidx)
10058 458389 : (*child_cfun->local_decls)[dstidx] = t;
10059 473357 : dstidx++;
10060 : }
10061 20716 : if (dstidx != num)
10062 17409 : vec_safe_truncate (child_cfun->local_decls, dstidx);
10063 :
10064 : /* Inform the callgraph about the new function. */
10065 20716 : child_cfun->curr_properties = cfun->curr_properties;
10066 20716 : child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
10067 20716 : child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
10068 20716 : cgraph_node *node = cgraph_node::get_create (child_fn);
10069 20716 : node->parallelized_function = 1;
10070 41432 : node->has_omp_variant_constructs
10071 20716 : |= cgraph_node::get (cfun->decl)->has_omp_variant_constructs;
10072 20716 : cgraph_node::add_new_function (child_fn, true);
10073 :
10074 : /* Add the new function to the offload table. */
10075 20716 : if (ENABLE_OFFLOADING)
10076 : {
10077 : if (in_lto_p)
10078 : DECL_PRESERVE_P (child_fn) = 1;
10079 : if (!is_ancestor)
10080 : vec_safe_push (offload_funcs, child_fn);
10081 : }
10082 :
10083 20716 : bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
10084 20716 : && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
10085 :
10086 : /* Fix the callgraph edges for child_cfun. Those for cfun will be
10087 : fixed in a following pass. */
10088 20716 : push_cfun (child_cfun);
10089 20716 : if (need_asm)
10090 20710 : assign_assembler_name_if_needed (child_fn);
10091 20716 : cgraph_edge::rebuild_edges ();
10092 :
10093 : /* Some EH regions might become dead, see PR34608. If
10094 : pass_cleanup_cfg isn't the first pass to happen with the
10095 : new child, these dead EH edges might cause problems.
10096 : Clean them up now. */
10097 20716 : if (flag_exceptions)
10098 : {
10099 8196 : basic_block bb;
10100 8196 : bool changed = false;
10101 :
10102 118572 : FOR_EACH_BB_FN (bb, cfun)
10103 110376 : changed |= gimple_purge_dead_eh_edges (bb);
10104 8196 : if (changed)
10105 0 : cleanup_tree_cfg ();
10106 : }
10107 20716 : if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10108 0 : verify_loop_structure ();
10109 20716 : pop_cfun ();
10110 :
10111 20716 : if (dump_file && !gimple_in_ssa_p (cfun))
10112 : {
10113 31 : omp_any_child_fn_dumped = true;
10114 31 : dump_function_header (dump_file, child_fn, dump_flags);
10115 31 : dump_function_to_file (child_fn, dump_file, dump_flags);
10116 : }
10117 :
10118 20716 : adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
10119 :
10120 : /* Handle the case that an inner ancestor:1 target is called by an outer
10121 : target region. */
10122 20716 : if (is_ancestor)
10123 : {
10124 : cgraph_node *fn2_node;
10125 : child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn),
10126 : FUNCTION_DECL,
10127 : clone_function_name (child_fn, "nohost"),
10128 : TREE_TYPE (child_fn));
10129 : if (in_lto_p)
10130 : DECL_PRESERVE_P (child_fn2) = 1;
10131 : TREE_STATIC (child_fn2) = 1;
10132 : DECL_ARTIFICIAL (child_fn2) = 1;
10133 : DECL_IGNORED_P (child_fn2) = 0;
10134 : TREE_PUBLIC (child_fn2) = 0;
10135 : DECL_UNINLINABLE (child_fn2) = 1;
10136 : DECL_EXTERNAL (child_fn2) = 0;
10137 : DECL_CONTEXT (child_fn2) = DECL_CONTEXT (child_fn);
10138 : DECL_INITIAL (child_fn2) = make_node (BLOCK);
10139 : BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2;
10140 : DECL_ATTRIBUTES (child_fn)
10141 : = remove_attribute ("omp target entrypoint",
10142 : DECL_ATTRIBUTES (child_fn));
10143 : DECL_ATTRIBUTES (child_fn2)
10144 : = tree_cons (get_identifier ("omp target device_ancestor_nohost"),
10145 : NULL_TREE, copy_list (DECL_ATTRIBUTES (child_fn)));
10146 : DECL_ATTRIBUTES (child_fn)
10147 : = tree_cons (get_identifier ("omp target device_ancestor_host"),
10148 : NULL_TREE, DECL_ATTRIBUTES (child_fn));
10149 : DECL_FUNCTION_SPECIFIC_OPTIMIZATION (child_fn2)
10150 : = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (current_function_decl);
10151 : DECL_FUNCTION_SPECIFIC_TARGET (child_fn2)
10152 : = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
10153 : DECL_FUNCTION_VERSIONED (child_fn2)
10154 : = DECL_FUNCTION_VERSIONED (current_function_decl);
10155 :
10156 : fn2_node = cgraph_node::get_create (child_fn2);
10157 : fn2_node->offloadable = 1;
10158 : fn2_node->force_output = 1;
10159 : node->offloadable = 0;
10160 :
10161 : /* Enable pass_omp_device_lower pass. */
10162 : fn2_node = cgraph_node::get (DECL_CONTEXT (child_fn));
10163 : fn2_node->has_omp_variant_constructs = 1;
10164 :
10165 : t = build_decl (DECL_SOURCE_LOCATION (child_fn),
10166 : RESULT_DECL, NULL_TREE, void_type_node);
10167 : DECL_ARTIFICIAL (t) = 1;
10168 : DECL_IGNORED_P (t) = 1;
10169 : DECL_CONTEXT (t) = child_fn2;
10170 : DECL_RESULT (child_fn2) = t;
10171 : DECL_SAVED_TREE (child_fn2) = build1 (RETURN_EXPR,
10172 : void_type_node, NULL);
10173 : tree tmp = DECL_ARGUMENTS (child_fn);
10174 : t = build_decl (DECL_SOURCE_LOCATION (child_fn), PARM_DECL,
10175 : DECL_NAME (tmp), TREE_TYPE (tmp));
10176 : DECL_ARTIFICIAL (t) = 1;
10177 : DECL_NAMELESS (t) = 1;
10178 : DECL_ARG_TYPE (t) = ptr_type_node;
10179 : DECL_CONTEXT (t) = current_function_decl;
10180 : TREE_USED (t) = 1;
10181 : TREE_READONLY (t) = 1;
10182 : DECL_ARGUMENTS (child_fn2) = t;
10183 : gcc_assert (TREE_CHAIN (tmp) == NULL_TREE);
10184 :
10185 : gimplify_function_tree (child_fn2);
10186 : cgraph_node::add_new_function (child_fn2, true);
10187 :
10188 : vec_safe_push (offload_funcs, child_fn2);
10189 : if (dump_file && !gimple_in_ssa_p (cfun))
10190 : {
10191 : dump_function_header (dump_file, child_fn2, dump_flags);
10192 : dump_function_to_file (child_fn2, dump_file, dump_flags);
10193 : }
10194 : }
10195 : }
10196 :
10197 : /* Emit a library call to launch the offloading region, or do data
10198 : transfers. */
10199 36447 : tree t1, t2, t3, t4, depend;
10200 36447 : enum built_in_function start_ix;
10201 36447 : unsigned int flags_i = 0;
10202 :
10203 36447 : switch (gimple_omp_target_kind (entry_stmt))
10204 : {
10205 : case GF_OMP_TARGET_KIND_REGION:
10206 : start_ix = BUILT_IN_GOMP_TARGET;
10207 : break;
10208 : case GF_OMP_TARGET_KIND_DATA:
10209 : start_ix = BUILT_IN_GOMP_TARGET_DATA;
10210 : break;
10211 : case GF_OMP_TARGET_KIND_UPDATE:
10212 : start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
10213 : break;
10214 : case GF_OMP_TARGET_KIND_ENTER_DATA:
10215 : start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10216 : break;
10217 : case GF_OMP_TARGET_KIND_EXIT_DATA:
10218 : start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
10219 : flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
10220 : break;
10221 : case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10222 : case GF_OMP_TARGET_KIND_OACC_KERNELS:
10223 : case GF_OMP_TARGET_KIND_OACC_SERIAL:
10224 : case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10225 : case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10226 : start_ix = BUILT_IN_GOACC_PARALLEL;
10227 : break;
10228 : case GF_OMP_TARGET_KIND_OACC_DATA:
10229 : case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10230 : case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10231 : start_ix = BUILT_IN_GOACC_DATA_START;
10232 : break;
10233 : case GF_OMP_TARGET_KIND_OACC_UPDATE:
10234 : start_ix = BUILT_IN_GOACC_UPDATE;
10235 : break;
10236 : case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10237 : start_ix = BUILT_IN_GOACC_ENTER_DATA;
10238 : break;
10239 : case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10240 : start_ix = BUILT_IN_GOACC_EXIT_DATA;
10241 : break;
10242 : case GF_OMP_TARGET_KIND_OACC_DECLARE:
10243 : start_ix = BUILT_IN_GOACC_DECLARE;
10244 : break;
10245 0 : default:
10246 0 : gcc_unreachable ();
10247 : }
10248 :
10249 36447 : tree device = NULL_TREE;
10250 36447 : location_t device_loc = UNKNOWN_LOCATION;
10251 36447 : tree goacc_flags = NULL_TREE;
10252 36447 : bool need_device_adjustment = false;
10253 36447 : gimple_stmt_iterator adj_gsi;
10254 36447 : if (is_gimple_omp_oacc (entry_stmt))
10255 : {
10256 : /* By default, no GOACC_FLAGs are set. */
10257 14785 : goacc_flags = integer_zero_node;
10258 : }
10259 : else
10260 : {
10261 21662 : c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
10262 21662 : if (c)
10263 : {
10264 925 : device = OMP_CLAUSE_DEVICE_ID (c);
10265 : /* Ensure 'device' is of the correct type. */
10266 925 : device = fold_convert_loc (device_loc, integer_type_node, device);
10267 925 : if (TREE_CODE (device) == INTEGER_CST)
10268 : {
10269 54 : if (wi::to_wide (device) == GOMP_DEVICE_ICV)
10270 2 : device = build_int_cst (integer_type_node,
10271 : GOMP_DEVICE_HOST_FALLBACK);
10272 52 : else if (wi::to_wide (device) == GOMP_DEVICE_HOST_FALLBACK)
10273 0 : device = build_int_cst (integer_type_node,
10274 : GOMP_DEVICE_HOST_FALLBACK - 1);
10275 : }
10276 : else
10277 : need_device_adjustment = true;
10278 925 : device_loc = OMP_CLAUSE_LOCATION (c);
10279 925 : if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10280 41 : device = build_int_cst (integer_type_node,
10281 : GOMP_DEVICE_HOST_FALLBACK);
10282 : }
10283 : else
10284 : {
10285 : /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10286 : library choose). */
10287 20737 : device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10288 20737 : device_loc = gimple_location (entry_stmt);
10289 : }
10290 :
10291 21662 : c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10292 : /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10293 : nowait doesn't appear. */
10294 21662 : if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10295 : c = NULL;
10296 21398 : if (c)
10297 137 : flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10298 : }
10299 :
10300 : /* By default, there is no conditional. */
10301 36447 : tree cond = NULL_TREE;
10302 36447 : c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10303 36447 : if (c)
10304 1741 : cond = OMP_CLAUSE_IF_EXPR (c);
10305 : /* If we found the clause 'if (cond)', build:
10306 : OpenACC: goacc_flags = (cond ? goacc_flags
10307 : : goacc_flags | GOACC_FLAG_HOST_FALLBACK)
10308 : OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10309 1741 : if (cond)
10310 : {
10311 1741 : tree *tp;
10312 1741 : if (is_gimple_omp_oacc (entry_stmt))
10313 : tp = &goacc_flags;
10314 : else
10315 666 : tp = &device;
10316 :
10317 1741 : cond = gimple_boolify (cond);
10318 :
10319 1741 : basic_block cond_bb, then_bb, else_bb;
10320 1741 : edge e;
10321 1741 : tree tmp_var = create_tmp_var (TREE_TYPE (*tp));
10322 1741 : if (offloaded)
10323 806 : e = split_block_after_labels (new_bb);
10324 : else
10325 : {
10326 935 : gsi = gsi_last_nondebug_bb (new_bb);
10327 935 : gsi_prev (&gsi);
10328 935 : e = split_block (new_bb, gsi_stmt (gsi));
10329 : }
10330 1741 : cond_bb = e->src;
10331 1741 : new_bb = e->dest;
10332 1741 : remove_edge (e);
10333 :
10334 1741 : then_bb = create_empty_bb (cond_bb);
10335 1741 : else_bb = create_empty_bb (then_bb);
10336 1741 : set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10337 1741 : set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10338 :
10339 1741 : stmt = gimple_build_cond_empty (cond);
10340 1741 : gsi = gsi_last_bb (cond_bb);
10341 1741 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10342 :
10343 1741 : gsi = gsi_start_bb (then_bb);
10344 1741 : stmt = gimple_build_assign (tmp_var, *tp);
10345 1741 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10346 1741 : adj_gsi = gsi;
10347 :
10348 1741 : gsi = gsi_start_bb (else_bb);
10349 1741 : if (is_gimple_omp_oacc (entry_stmt))
10350 1075 : stmt = gimple_build_assign (tmp_var,
10351 : BIT_IOR_EXPR,
10352 : *tp,
10353 : build_int_cst (integer_type_node,
10354 : GOACC_FLAG_HOST_FALLBACK));
10355 : else
10356 666 : stmt = gimple_build_assign (tmp_var,
10357 : build_int_cst (integer_type_node,
10358 : GOMP_DEVICE_HOST_FALLBACK));
10359 1741 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10360 :
10361 1741 : make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10362 1741 : make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10363 1741 : add_bb_to_loop (then_bb, cond_bb->loop_father);
10364 1741 : add_bb_to_loop (else_bb, cond_bb->loop_father);
10365 1741 : make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10366 1741 : make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10367 :
10368 1741 : *tp = tmp_var;
10369 :
10370 1741 : gsi = gsi_last_nondebug_bb (new_bb);
10371 : }
10372 : else
10373 : {
10374 34706 : gsi = gsi_last_nondebug_bb (new_bb);
10375 :
10376 34706 : if (device != NULL_TREE)
10377 20996 : device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10378 : true, GSI_SAME_STMT);
10379 34706 : if (need_device_adjustment)
10380 : {
10381 397 : tree tmp_var = create_tmp_var (TREE_TYPE (device));
10382 397 : stmt = gimple_build_assign (tmp_var, device);
10383 397 : gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10384 397 : adj_gsi = gsi_for_stmt (stmt);
10385 397 : device = tmp_var;
10386 : }
10387 : }
10388 :
10389 36447 : if ((c = omp_find_clause (clauses, OMP_CLAUSE_SELF)) != NULL_TREE)
10390 : {
10391 324 : gcc_assert ((is_gimple_omp_oacc (entry_stmt) && offloaded)
10392 : || (gimple_omp_target_kind (entry_stmt)
10393 : == GF_OMP_TARGET_KIND_OACC_DATA_KERNELS));
10394 :
10395 87 : edge e;
10396 87 : if (offloaded)
10397 237 : e = split_block_after_labels (new_bb);
10398 : else
10399 : {
10400 87 : gsi = gsi_last_nondebug_bb (new_bb);
10401 87 : gsi_prev (&gsi);
10402 87 : e = split_block (new_bb, gsi_stmt (gsi));
10403 : }
10404 324 : basic_block cond_bb = e->src;
10405 324 : new_bb = e->dest;
10406 324 : remove_edge (e);
10407 :
10408 324 : basic_block then_bb = create_empty_bb (cond_bb);
10409 324 : basic_block else_bb = create_empty_bb (then_bb);
10410 324 : set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10411 324 : set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10412 :
10413 324 : tree self_cond = gimple_boolify (OMP_CLAUSE_SELF_EXPR (c));
10414 324 : stmt = gimple_build_cond_empty (self_cond);
10415 324 : gsi = gsi_last_bb (cond_bb);
10416 324 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10417 :
10418 324 : tree tmp_var = create_tmp_var (TREE_TYPE (goacc_flags));
10419 324 : stmt = gimple_build_assign (tmp_var, BIT_IOR_EXPR, goacc_flags,
10420 : build_int_cst (integer_type_node,
10421 : GOACC_FLAG_LOCAL_DEVICE));
10422 324 : gsi = gsi_start_bb (then_bb);
10423 324 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10424 :
10425 324 : gsi = gsi_start_bb (else_bb);
10426 324 : stmt = gimple_build_assign (tmp_var, goacc_flags);
10427 324 : gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10428 :
10429 324 : make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10430 324 : make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10431 324 : add_bb_to_loop (then_bb, cond_bb->loop_father);
10432 324 : add_bb_to_loop (else_bb, cond_bb->loop_father);
10433 324 : make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10434 324 : make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10435 :
10436 324 : goacc_flags = tmp_var;
10437 324 : gsi = gsi_last_nondebug_bb (new_bb);
10438 : }
10439 :
10440 36447 : if (need_device_adjustment)
10441 : {
10442 871 : tree uns = fold_convert (unsigned_type_node, device);
10443 871 : uns = force_gimple_operand_gsi (&adj_gsi, uns, true, NULL_TREE,
10444 : false, GSI_CONTINUE_LINKING);
10445 871 : edge e = split_block (gsi_bb (adj_gsi), gsi_stmt (adj_gsi));
10446 871 : basic_block cond_bb = e->src;
10447 871 : basic_block else_bb = e->dest;
10448 871 : if (gsi_bb (adj_gsi) == new_bb)
10449 : {
10450 397 : new_bb = else_bb;
10451 397 : gsi = gsi_last_nondebug_bb (new_bb);
10452 : }
10453 :
10454 871 : basic_block then_bb = create_empty_bb (cond_bb);
10455 871 : set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10456 :
10457 871 : cond = build2 (GT_EXPR, boolean_type_node, uns,
10458 : build_int_cst (unsigned_type_node,
10459 : GOMP_DEVICE_HOST_FALLBACK - 1));
10460 871 : stmt = gimple_build_cond_empty (cond);
10461 871 : adj_gsi = gsi_last_bb (cond_bb);
10462 871 : gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10463 :
10464 871 : adj_gsi = gsi_start_bb (then_bb);
10465 871 : tree add = build2 (PLUS_EXPR, integer_type_node, device,
10466 : build_int_cst (integer_type_node, -1));
10467 871 : stmt = gimple_build_assign (device, add);
10468 871 : gsi_insert_after (&adj_gsi, stmt, GSI_CONTINUE_LINKING);
10469 :
10470 871 : make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10471 871 : e->flags = EDGE_FALSE_VALUE;
10472 871 : add_bb_to_loop (then_bb, cond_bb->loop_father);
10473 871 : make_edge (then_bb, else_bb, EDGE_FALLTHRU);
10474 : }
10475 :
10476 36447 : t = gimple_omp_target_data_arg (entry_stmt);
10477 36447 : if (t == NULL)
10478 : {
10479 4852 : t1 = size_zero_node;
10480 4852 : t2 = build_zero_cst (ptr_type_node);
10481 4852 : t3 = t2;
10482 4852 : t4 = t2;
10483 : }
10484 31595 : else if (TREE_VEC_LENGTH (t) == 3 || is_gimple_omp_oacc (entry_stmt))
10485 : {
10486 31453 : t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10487 31453 : t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10488 31453 : t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10489 31453 : t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10490 31453 : t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10491 : }
10492 : else
10493 : {
10494 142 : t1 = force_gimple_operand_gsi (&gsi, TREE_VEC_ELT (t, 3), true, NULL_TREE,
10495 : true, GSI_SAME_STMT);
10496 142 : t2 = force_gimple_operand_gsi (&gsi, TREE_VEC_ELT (t, 0), true, NULL_TREE,
10497 : true, GSI_SAME_STMT);
10498 142 : t3 = force_gimple_operand_gsi (&gsi, TREE_VEC_ELT (t, 1), true, NULL_TREE,
10499 : true, GSI_SAME_STMT);
10500 142 : t4 = force_gimple_operand_gsi (&gsi, TREE_VEC_ELT (t, 2), true, NULL_TREE,
10501 : true, GSI_SAME_STMT);
10502 : }
10503 :
10504 36447 : gimple *g;
10505 36447 : bool tagging = false;
10506 : /* The maximum number used by any start_ix, without varargs. */
10507 36447 : auto_vec<tree, 11> args;
10508 36447 : if (is_gimple_omp_oacc (entry_stmt))
10509 : {
10510 14785 : tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10511 : TREE_TYPE (goacc_flags), goacc_flags);
10512 14785 : goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10513 : NULL_TREE, true,
10514 : GSI_SAME_STMT);
10515 14785 : args.quick_push (goacc_flags_m);
10516 : }
10517 : else
10518 21662 : args.quick_push (device);
10519 36447 : if (offloaded)
10520 20716 : args.quick_push (build_fold_addr_expr (child_fn2 ? child_fn2 : child_fn));
10521 36447 : args.quick_push (t1);
10522 36447 : args.quick_push (t2);
10523 36447 : args.quick_push (t3);
10524 36447 : args.quick_push (t4);
10525 36447 : switch (start_ix)
10526 : {
10527 : case BUILT_IN_GOACC_DATA_START:
10528 : case BUILT_IN_GOACC_DECLARE:
10529 : case BUILT_IN_GOMP_TARGET_DATA:
10530 : break;
10531 20037 : case BUILT_IN_GOMP_TARGET:
10532 20037 : case BUILT_IN_GOMP_TARGET_UPDATE:
10533 20037 : case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10534 20037 : args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10535 20037 : c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10536 20037 : if (c)
10537 329 : depend = OMP_CLAUSE_DECL (c);
10538 : else
10539 19708 : depend = build_int_cst (ptr_type_node, 0);
10540 20037 : args.quick_push (depend);
10541 20037 : if (start_ix == BUILT_IN_GOMP_TARGET)
10542 11321 : args.quick_push (get_target_arguments (&gsi, entry_stmt));
10543 : break;
10544 9395 : case BUILT_IN_GOACC_PARALLEL:
10545 9395 : if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10546 : {
10547 : tree dims = NULL_TREE;
10548 : unsigned int ix;
10549 :
10550 : /* For serial constructs we set all dimensions to 1. */
10551 3024 : for (ix = GOMP_DIM_MAX; ix--;)
10552 2268 : dims = tree_cons (NULL_TREE, integer_one_node, dims);
10553 756 : oacc_replace_fn_attrib (child_fn, dims);
10554 : }
10555 : else
10556 8639 : oacc_set_fn_attrib (child_fn, clauses, &args);
10557 : tagging = true;
10558 : /* FALLTHRU */
10559 12136 : case BUILT_IN_GOACC_ENTER_DATA:
10560 12136 : case BUILT_IN_GOACC_EXIT_DATA:
10561 12136 : case BUILT_IN_GOACC_UPDATE:
10562 12136 : {
10563 12136 : tree t_async = NULL_TREE;
10564 :
10565 : /* If present, use the value specified by the respective
10566 : clause, making sure that is of the correct type. */
10567 12136 : c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10568 12136 : if (c)
10569 1681 : t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10570 : integer_type_node,
10571 1681 : OMP_CLAUSE_ASYNC_EXPR (c));
10572 10455 : else if (!tagging)
10573 : /* Default values for t_async. */
10574 2431 : t_async = fold_convert_loc (gimple_location (entry_stmt),
10575 : integer_type_node,
10576 : build_int_cst (integer_type_node,
10577 : GOMP_ASYNC_SYNC));
10578 12136 : if (tagging && t_async)
10579 : {
10580 1371 : unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10581 :
10582 1371 : if (TREE_CODE (t_async) == INTEGER_CST)
10583 : {
10584 : /* See if we can pack the async arg in to the tag's
10585 : operand. */
10586 1331 : i_async = TREE_INT_CST_LOW (t_async);
10587 1331 : if (i_async < GOMP_LAUNCH_OP_MAX)
10588 : t_async = NULL_TREE;
10589 : else
10590 825 : i_async = GOMP_LAUNCH_OP_MAX;
10591 : }
10592 1371 : args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10593 : i_async));
10594 : }
10595 12136 : if (t_async)
10596 3566 : args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10597 : NULL_TREE, true,
10598 : GSI_SAME_STMT));
10599 :
10600 : /* Save the argument index, and ... */
10601 12136 : unsigned t_wait_idx = args.length ();
10602 12136 : unsigned num_waits = 0;
10603 12136 : c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10604 12136 : if (!tagging || c)
10605 : /* ... push a placeholder. */
10606 2944 : args.safe_push (integer_zero_node);
10607 :
10608 13986 : for (; c; c = OMP_CLAUSE_CHAIN (c))
10609 1850 : if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10610 : {
10611 422 : tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10612 : integer_type_node,
10613 422 : OMP_CLAUSE_WAIT_EXPR (c));
10614 422 : arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10615 : GSI_SAME_STMT);
10616 422 : args.safe_push (arg);
10617 422 : num_waits++;
10618 : }
10619 :
10620 12136 : if (!tagging || num_waits)
10621 : {
10622 203 : tree len;
10623 :
10624 : /* Now that we know the number, update the placeholder. */
10625 203 : if (tagging)
10626 203 : len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10627 : else
10628 2741 : len = build_int_cst (integer_type_node, num_waits);
10629 2944 : len = fold_convert_loc (gimple_location (entry_stmt),
10630 : unsigned_type_node, len);
10631 2944 : args[t_wait_idx] = len;
10632 : }
10633 : }
10634 12136 : break;
10635 0 : default:
10636 0 : gcc_unreachable ();
10637 : }
10638 23457 : if (tagging)
10639 : /* Push terminal marker - zero. */
10640 9395 : args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10641 :
10642 36447 : if (child_fn2)
10643 : {
10644 : g = gimple_build_call_internal (IFN_GOMP_TARGET_REV, 1,
10645 : build_fold_addr_expr (child_fn));
10646 : gimple_set_location (g, gimple_location (entry_stmt));
10647 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10648 : }
10649 :
10650 36447 : g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10651 36447 : gimple_set_location (g, gimple_location (entry_stmt));
10652 36447 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10653 36447 : if (!offloaded)
10654 : {
10655 15731 : g = gsi_stmt (gsi);
10656 15731 : gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10657 15731 : gsi_remove (&gsi, true);
10658 : }
10659 36447 : }
10660 :
10661 : /* Expand the parallel region tree rooted at REGION. Expansion
10662 : proceeds in depth-first order. Innermost regions are expanded
10663 : first. This way, parallel regions that require a new function to
10664 : be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10665 : internal dependencies in their body. */
10666 :
10667 : static void
10668 81572 : expand_omp (struct omp_region *region)
10669 : {
10670 81572 : omp_any_child_fn_dumped = false;
10671 209379 : while (region)
10672 : {
10673 127807 : location_t saved_location;
10674 127807 : gimple *inner_stmt = NULL;
10675 :
10676 : /* First, determine whether this is a combined parallel+workshare
10677 : region. */
10678 127807 : if (region->type == GIMPLE_OMP_PARALLEL)
10679 16263 : determine_parallel_type (region);
10680 :
10681 127807 : if (region->type == GIMPLE_OMP_FOR
10682 127807 : && gimple_omp_for_combined_p (last_nondebug_stmt (region->entry)))
10683 14598 : inner_stmt = last_nondebug_stmt (region->inner->entry);
10684 :
10685 127807 : if (region->inner)
10686 57255 : expand_omp (region->inner);
10687 :
10688 127807 : saved_location = input_location;
10689 127807 : if (gimple_has_location (last_nondebug_stmt (region->entry)))
10690 127623 : input_location = gimple_location (last_nondebug_stmt (region->entry));
10691 :
10692 127807 : switch (region->type)
10693 : {
10694 20123 : case GIMPLE_OMP_PARALLEL:
10695 20123 : case GIMPLE_OMP_TASK:
10696 20123 : expand_omp_taskreg (region);
10697 20123 : break;
10698 :
10699 47612 : case GIMPLE_OMP_FOR:
10700 47612 : expand_omp_for (region, inner_stmt);
10701 47612 : break;
10702 :
10703 378 : case GIMPLE_OMP_SECTIONS:
10704 378 : expand_omp_sections (region);
10705 378 : break;
10706 :
10707 : case GIMPLE_OMP_SECTION:
10708 : /* Individual omp sections are handled together with their
10709 : parent GIMPLE_OMP_SECTIONS region. */
10710 : break;
10711 :
10712 0 : case GIMPLE_OMP_STRUCTURED_BLOCK:
10713 : /* We should have gotten rid of these in gimple lowering. */
10714 0 : gcc_unreachable ();
10715 :
10716 1252 : case GIMPLE_OMP_SINGLE:
10717 1252 : case GIMPLE_OMP_SCOPE:
10718 1252 : expand_omp_single (region);
10719 1252 : break;
10720 :
10721 1120 : case GIMPLE_OMP_ORDERED:
10722 1120 : {
10723 1120 : gomp_ordered *ord_stmt
10724 1120 : = as_a <gomp_ordered *> (last_nondebug_stmt (region->entry));
10725 1120 : if (gimple_omp_ordered_standalone_p (ord_stmt))
10726 : {
10727 : /* We'll expand these when expanding corresponding
10728 : worksharing region with ordered(n) clause. */
10729 709 : gcc_assert (region->outer
10730 : && region->outer->type == GIMPLE_OMP_FOR);
10731 709 : region->ord_stmt = ord_stmt;
10732 709 : break;
10733 : }
10734 : }
10735 : /* FALLTHRU */
10736 10779 : case GIMPLE_OMP_MASTER:
10737 10779 : case GIMPLE_OMP_MASKED:
10738 10779 : case GIMPLE_OMP_TASKGROUP:
10739 10779 : case GIMPLE_OMP_CRITICAL:
10740 10779 : case GIMPLE_OMP_TEAMS:
10741 10779 : expand_omp_synch (region);
10742 10779 : break;
10743 :
10744 9652 : case GIMPLE_OMP_ATOMIC_LOAD:
10745 9652 : expand_omp_atomic (region);
10746 9652 : break;
10747 :
10748 36447 : case GIMPLE_OMP_TARGET:
10749 36447 : expand_omp_target (region);
10750 36447 : break;
10751 :
10752 0 : default:
10753 0 : gcc_unreachable ();
10754 : }
10755 :
10756 127807 : input_location = saved_location;
10757 127807 : region = region->next;
10758 : }
10759 81572 : if (omp_any_child_fn_dumped)
10760 : {
10761 45 : if (dump_file)
10762 45 : dump_function_header (dump_file, current_function_decl, dump_flags);
10763 45 : omp_any_child_fn_dumped = false;
10764 : }
10765 81572 : }
10766 :
10767 : /* Helper for build_omp_regions. Scan the dominator tree starting at
10768 : block BB. PARENT is the region that contains BB. If SINGLE_TREE is
10769 : true, the function ends once a single tree is built (otherwise, whole
10770 : forest of OMP constructs may be built). */
10771 :
10772 : static void
10773 1391403 : build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10774 : bool single_tree)
10775 : {
10776 1391403 : gimple_stmt_iterator gsi;
10777 1391403 : gimple *stmt;
10778 1391403 : basic_block son;
10779 :
10780 1391403 : gsi = gsi_last_nondebug_bb (bb);
10781 1391403 : if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10782 : {
10783 288111 : struct omp_region *region;
10784 288111 : enum gimple_code code;
10785 :
10786 288111 : stmt = gsi_stmt (gsi);
10787 288111 : code = gimple_code (stmt);
10788 288111 : if (code == GIMPLE_OMP_RETURN)
10789 : {
10790 : /* STMT is the return point out of region PARENT. Mark it
10791 : as the exit point and make PARENT the immediately
10792 : enclosing region. */
10793 100841 : gcc_assert (parent);
10794 100841 : region = parent;
10795 100841 : region->exit = bb;
10796 100841 : parent = parent->outer;
10797 : }
10798 187270 : else if (code == GIMPLE_OMP_ATOMIC_STORE)
10799 : {
10800 : /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10801 : GIMPLE_OMP_RETURN, but matches with
10802 : GIMPLE_OMP_ATOMIC_LOAD. */
10803 9652 : gcc_assert (parent);
10804 9652 : gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10805 9652 : region = parent;
10806 9652 : region->exit = bb;
10807 9652 : parent = parent->outer;
10808 : }
10809 177618 : else if (code == GIMPLE_OMP_CONTINUE)
10810 : {
10811 49433 : gcc_assert (parent);
10812 49433 : parent->cont = bb;
10813 : }
10814 128185 : else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10815 : {
10816 : /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10817 : GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10818 : }
10819 : else
10820 : {
10821 127807 : region = new_omp_region (bb, code, parent);
10822 : /* Otherwise... */
10823 127807 : if (code == GIMPLE_OMP_TARGET)
10824 : {
10825 36447 : switch (gimple_omp_target_kind (stmt))
10826 : {
10827 : case GF_OMP_TARGET_KIND_REGION:
10828 : case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10829 : case GF_OMP_TARGET_KIND_OACC_KERNELS:
10830 : case GF_OMP_TARGET_KIND_OACC_SERIAL:
10831 : case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10832 : case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10833 : break;
10834 : case GF_OMP_TARGET_KIND_UPDATE:
10835 : case GF_OMP_TARGET_KIND_ENTER_DATA:
10836 : case GF_OMP_TARGET_KIND_EXIT_DATA:
10837 : case GF_OMP_TARGET_KIND_DATA:
10838 : case GF_OMP_TARGET_KIND_OACC_DATA:
10839 : case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10840 : case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10841 : case GF_OMP_TARGET_KIND_OACC_UPDATE:
10842 : case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10843 : case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10844 : case GF_OMP_TARGET_KIND_OACC_DECLARE:
10845 : /* ..., other than for those stand-alone directives...
10846 : To be precise, target data isn't stand-alone, but
10847 : gimplifier put the end API call into try finally block
10848 : for it, so omp expansion can treat it as such. */
10849 : region = NULL;
10850 : break;
10851 0 : default:
10852 0 : gcc_unreachable ();
10853 : }
10854 : }
10855 91360 : else if (code == GIMPLE_OMP_ORDERED
10856 91360 : && gimple_omp_ordered_standalone_p (stmt))
10857 : /* #pragma omp ordered depend is also just a stand-alone
10858 : directive. */
10859 : region = NULL;
10860 90651 : else if (code == GIMPLE_OMP_TASK
10861 90651 : && gimple_omp_task_taskwait_p (stmt))
10862 : /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10863 : region = NULL;
10864 90568 : else if (code == GIMPLE_OMP_TASKGROUP)
10865 : /* #pragma omp taskgroup isn't a stand-alone directive, but
10866 : gimplifier put the end API call into try finall block
10867 : for it, so omp expansion can treat it as such. */
10868 : region = NULL;
10869 : /* ..., this directive becomes the parent for a new region. */
10870 : if (region)
10871 : parent = region;
10872 : }
10873 : }
10874 :
10875 1391403 : if (single_tree && !parent)
10876 0 : return;
10877 :
10878 1391403 : for (son = first_dom_son (CDI_DOMINATORS, bb);
10879 2706679 : son;
10880 1315276 : son = next_dom_son (CDI_DOMINATORS, son))
10881 1315276 : build_omp_regions_1 (son, parent, single_tree);
10882 : }
10883 :
10884 : /* Builds the tree of OMP regions rooted at ROOT, storing it to
10885 : root_omp_region. */
10886 :
10887 : static void
10888 0 : build_omp_regions_root (basic_block root)
10889 : {
10890 0 : gcc_assert (root_omp_region == NULL);
10891 0 : build_omp_regions_1 (root, NULL, true);
10892 0 : gcc_assert (root_omp_region != NULL);
10893 0 : }
10894 :
10895 : /* Expands omp construct (and its subconstructs) starting in HEAD. */
10896 :
10897 : void
10898 0 : omp_expand_local (basic_block head)
10899 : {
10900 0 : build_omp_regions_root (head);
10901 0 : if (dump_file && (dump_flags & TDF_DETAILS))
10902 : {
10903 0 : fprintf (dump_file, "\nOMP region tree\n\n");
10904 0 : dump_omp_region (dump_file, root_omp_region, 0);
10905 0 : fprintf (dump_file, "\n");
10906 : }
10907 :
10908 0 : remove_exit_barriers (root_omp_region);
10909 0 : expand_omp (root_omp_region);
10910 :
10911 0 : omp_free_regions ();
10912 0 : }
10913 :
10914 : /* Scan the CFG and build a tree of OMP regions. Return the root of
10915 : the OMP region tree. */
10916 :
10917 : static void
10918 76127 : build_omp_regions (void)
10919 : {
10920 76127 : gcc_assert (root_omp_region == NULL);
10921 76127 : calculate_dominance_info (CDI_DOMINATORS);
10922 76127 : build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10923 76127 : }
10924 :
10925 : /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10926 :
10927 : static unsigned int
10928 76127 : execute_expand_omp (void)
10929 : {
10930 76127 : build_omp_regions ();
10931 :
10932 76127 : if (!root_omp_region)
10933 : return 0;
10934 :
10935 24317 : if (dump_file)
10936 : {
10937 164 : fprintf (dump_file, "\nOMP region tree\n\n");
10938 164 : dump_omp_region (dump_file, root_omp_region, 0);
10939 164 : fprintf (dump_file, "\n");
10940 : }
10941 :
10942 24317 : remove_exit_barriers (root_omp_region);
10943 :
10944 24317 : expand_omp (root_omp_region);
10945 :
10946 24317 : omp_free_regions ();
10947 :
10948 24317 : return (TODO_cleanup_cfg
10949 48085 : | (gimple_in_ssa_p (cfun) ? TODO_update_ssa_only_virtuals : 0));
10950 : }
10951 :
10952 : /* OMP expansion -- the default pass, run before creation of SSA form. */
10953 :
10954 : namespace {
10955 :
10956 : const pass_data pass_data_expand_omp =
10957 : {
10958 : GIMPLE_PASS, /* type */
10959 : "ompexp", /* name */
10960 : OPTGROUP_OMP, /* optinfo_flags */
10961 : TV_NONE, /* tv_id */
10962 : PROP_gimple_any, /* properties_required */
10963 : PROP_gimple_eomp, /* properties_provided */
10964 : 0, /* properties_destroyed */
10965 : 0, /* todo_flags_start */
10966 : 0, /* todo_flags_finish */
10967 : };
10968 :
10969 : class pass_expand_omp : public gimple_opt_pass
10970 : {
10971 : public:
10972 288775 : pass_expand_omp (gcc::context *ctxt)
10973 577550 : : gimple_opt_pass (pass_data_expand_omp, ctxt)
10974 : {}
10975 :
10976 : /* opt_pass methods: */
10977 2898924 : unsigned int execute (function *) final override
10978 : {
10979 2891750 : bool gate = ((flag_openacc != 0 || flag_openmp != 0
10980 2844751 : || flag_openmp_simd != 0)
10981 2948180 : && !seen_error ());
10982 :
10983 : /* This pass always runs, to provide PROP_gimple_eomp.
10984 : But often, there is nothing to do. */
10985 2898924 : if (!gate)
10986 2846552 : return 0;
10987 :
10988 52372 : return execute_expand_omp ();
10989 : }
10990 :
10991 : }; // class pass_expand_omp
10992 :
10993 : } // anon namespace
10994 :
10995 : gimple_opt_pass *
10996 288775 : make_pass_expand_omp (gcc::context *ctxt)
10997 : {
10998 288775 : return new pass_expand_omp (ctxt);
10999 : }
11000 :
11001 : namespace {
11002 :
11003 : const pass_data pass_data_expand_omp_ssa =
11004 : {
11005 : GIMPLE_PASS, /* type */
11006 : "ompexpssa", /* name */
11007 : OPTGROUP_OMP, /* optinfo_flags */
11008 : TV_NONE, /* tv_id */
11009 : PROP_cfg | PROP_ssa, /* properties_required */
11010 : PROP_gimple_eomp, /* properties_provided */
11011 : 0, /* properties_destroyed */
11012 : 0, /* todo_flags_start */
11013 : TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
11014 : };
11015 :
11016 : class pass_expand_omp_ssa : public gimple_opt_pass
11017 : {
11018 : public:
11019 577550 : pass_expand_omp_ssa (gcc::context *ctxt)
11020 1155100 : : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
11021 : {}
11022 :
11023 : /* opt_pass methods: */
11024 241567 : bool gate (function *fun) final override
11025 : {
11026 241567 : return !(fun->curr_properties & PROP_gimple_eomp);
11027 : }
11028 23755 : unsigned int execute (function *) final override
11029 : {
11030 23755 : return execute_expand_omp ();
11031 : }
11032 288775 : opt_pass * clone () final override
11033 : {
11034 288775 : return new pass_expand_omp_ssa (m_ctxt);
11035 : }
11036 :
11037 : }; // class pass_expand_omp_ssa
11038 :
11039 : } // anon namespace
11040 :
11041 : gimple_opt_pass *
11042 288775 : make_pass_expand_omp_ssa (gcc::context *ctxt)
11043 : {
11044 288775 : return new pass_expand_omp_ssa (ctxt);
11045 : }
11046 :
11047 : /* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
11048 : GIMPLE_* codes. */
11049 :
11050 : bool
11051 289193 : omp_make_gimple_edges (basic_block bb, struct omp_region **region,
11052 : int *region_idx)
11053 : {
11054 289193 : gimple *last = last_nondebug_stmt (bb);
11055 289193 : enum gimple_code code = gimple_code (last);
11056 289193 : struct omp_region *cur_region = *region;
11057 289193 : bool fallthru = false;
11058 :
11059 289193 : switch (code)
11060 : {
11061 75098 : case GIMPLE_OMP_PARALLEL:
11062 75098 : case GIMPLE_OMP_FOR:
11063 75098 : case GIMPLE_OMP_SINGLE:
11064 75098 : case GIMPLE_OMP_TEAMS:
11065 75098 : case GIMPLE_OMP_MASTER:
11066 75098 : case GIMPLE_OMP_MASKED:
11067 75098 : case GIMPLE_OMP_SCOPE:
11068 75098 : case GIMPLE_OMP_CRITICAL:
11069 75098 : case GIMPLE_OMP_SECTION:
11070 75098 : cur_region = new_omp_region (bb, code, cur_region);
11071 75098 : fallthru = true;
11072 75098 : break;
11073 :
11074 536 : case GIMPLE_OMP_TASKGROUP:
11075 536 : cur_region = new_omp_region (bb, code, cur_region);
11076 536 : fallthru = true;
11077 536 : cur_region = cur_region->outer;
11078 536 : break;
11079 :
11080 3860 : case GIMPLE_OMP_TASK:
11081 3860 : cur_region = new_omp_region (bb, code, cur_region);
11082 3860 : fallthru = true;
11083 3860 : if (gimple_omp_task_taskwait_p (last))
11084 83 : cur_region = cur_region->outer;
11085 : break;
11086 :
11087 1124 : case GIMPLE_OMP_ORDERED:
11088 1124 : cur_region = new_omp_region (bb, code, cur_region);
11089 1124 : fallthru = true;
11090 1124 : if (gimple_omp_ordered_standalone_p (last))
11091 713 : cur_region = cur_region->outer;
11092 : break;
11093 :
11094 36492 : case GIMPLE_OMP_TARGET:
11095 36492 : cur_region = new_omp_region (bb, code, cur_region);
11096 36492 : fallthru = true;
11097 36492 : switch (gimple_omp_target_kind (last))
11098 : {
11099 : case GF_OMP_TARGET_KIND_REGION:
11100 : case GF_OMP_TARGET_KIND_OACC_PARALLEL:
11101 : case GF_OMP_TARGET_KIND_OACC_KERNELS:
11102 : case GF_OMP_TARGET_KIND_OACC_SERIAL:
11103 : case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
11104 : case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
11105 : break;
11106 15733 : case GF_OMP_TARGET_KIND_UPDATE:
11107 15733 : case GF_OMP_TARGET_KIND_ENTER_DATA:
11108 15733 : case GF_OMP_TARGET_KIND_EXIT_DATA:
11109 15733 : case GF_OMP_TARGET_KIND_DATA:
11110 15733 : case GF_OMP_TARGET_KIND_OACC_DATA:
11111 15733 : case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
11112 15733 : case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
11113 15733 : case GF_OMP_TARGET_KIND_OACC_UPDATE:
11114 15733 : case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
11115 15733 : case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
11116 15733 : case GF_OMP_TARGET_KIND_OACC_DECLARE:
11117 15733 : cur_region = cur_region->outer;
11118 15733 : break;
11119 0 : default:
11120 0 : gcc_unreachable ();
11121 : }
11122 : break;
11123 :
11124 378 : case GIMPLE_OMP_SECTIONS:
11125 378 : cur_region = new_omp_region (bb, code, cur_region);
11126 378 : fallthru = true;
11127 378 : break;
11128 :
11129 : case GIMPLE_OMP_SECTIONS_SWITCH:
11130 : fallthru = false;
11131 : break;
11132 :
11133 19688 : case GIMPLE_OMP_ATOMIC_LOAD:
11134 19688 : case GIMPLE_OMP_ATOMIC_STORE:
11135 19688 : fallthru = true;
11136 19688 : break;
11137 :
11138 100423 : case GIMPLE_OMP_RETURN:
11139 : /* In the case of a GIMPLE_OMP_SECTION, the edge will go
11140 : somewhere other than the next block. This will be
11141 : created later. */
11142 100423 : cur_region->exit = bb;
11143 100423 : if (cur_region->type == GIMPLE_OMP_TASK)
11144 : /* Add an edge corresponding to not scheduling the task
11145 : immediately. */
11146 3777 : make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
11147 100423 : fallthru = cur_region->type != GIMPLE_OMP_SECTION;
11148 100423 : cur_region = cur_region->outer;
11149 100423 : break;
11150 :
11151 51216 : case GIMPLE_OMP_CONTINUE:
11152 51216 : cur_region->cont = bb;
11153 51216 : switch (cur_region->type)
11154 : {
11155 47061 : case GIMPLE_OMP_FOR:
11156 : /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
11157 : succs edges as abnormal to prevent splitting
11158 : them. */
11159 47061 : single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
11160 : /* Make the loopback edge. */
11161 47061 : make_edge (bb, single_succ (cur_region->entry),
11162 : EDGE_ABNORMAL);
11163 :
11164 : /* Create an edge from GIMPLE_OMP_FOR to exit, which
11165 : corresponds to the case that the body of the loop
11166 : is not executed at all. */
11167 47061 : make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
11168 47061 : make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
11169 47061 : fallthru = false;
11170 47061 : break;
11171 :
11172 378 : case GIMPLE_OMP_SECTIONS:
11173 : /* Wire up the edges into and out of the nested sections. */
11174 378 : {
11175 378 : basic_block switch_bb = single_succ (cur_region->entry);
11176 :
11177 378 : struct omp_region *i;
11178 1233 : for (i = cur_region->inner; i ; i = i->next)
11179 : {
11180 855 : gcc_assert (i->type == GIMPLE_OMP_SECTION);
11181 855 : make_edge (switch_bb, i->entry, 0);
11182 855 : make_edge (i->exit, bb, EDGE_FALLTHRU);
11183 : }
11184 :
11185 : /* Make the loopback edge to the block with
11186 : GIMPLE_OMP_SECTIONS_SWITCH. */
11187 378 : make_edge (bb, switch_bb, 0);
11188 :
11189 : /* Make the edge from the switch to exit. */
11190 378 : make_edge (switch_bb, bb->next_bb, 0);
11191 378 : fallthru = false;
11192 : }
11193 378 : break;
11194 :
11195 : case GIMPLE_OMP_TASK:
11196 : fallthru = true;
11197 : break;
11198 :
11199 0 : default:
11200 0 : gcc_unreachable ();
11201 : }
11202 : break;
11203 :
11204 0 : default:
11205 0 : gcc_unreachable ();
11206 : }
11207 :
11208 289193 : if (*region != cur_region)
11209 : {
11210 200846 : *region = cur_region;
11211 200846 : if (cur_region)
11212 160127 : *region_idx = cur_region->entry->index;
11213 : else
11214 40719 : *region_idx = 0;
11215 : }
11216 :
11217 289193 : return fallthru;
11218 : }
|