Line data Source code
1 : /* If-conversion for vectorizer.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 : Contributed by Devang Patel <dpatel@apple.com>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : /* This pass implements a tree level if-conversion of loops. Its
22 : initial goal is to help the vectorizer to vectorize loops with
23 : conditions.
24 :
25 : A short description of if-conversion:
26 :
27 : o Decide if a loop is if-convertible or not.
28 : o Walk all loop basic blocks in breadth first order (BFS order).
29 : o Remove conditional statements (at the end of basic block)
30 : and propagate condition into destination basic blocks'
31 : predicate list.
32 : o Replace modify expression with conditional modify expression
33 : using current basic block's condition.
34 : o Merge all basic blocks
35 : o Replace phi nodes with conditional modify expr
36 : o Merge all basic blocks into header
37 :
38 : Sample transformation:
39 :
40 : INPUT
41 : -----
42 :
43 : # i_23 = PHI <0(0), i_18(10)>;
44 : <L0>:;
45 : j_15 = A[i_23];
46 : if (j_15 > 41) goto <L1>; else goto <L17>;
47 :
48 : <L17>:;
49 : goto <bb 3> (<L3>);
50 :
51 : <L1>:;
52 :
53 : # iftmp.2_4 = PHI <0(8), 42(2)>;
54 : <L3>:;
55 : A[i_23] = iftmp.2_4;
56 : i_18 = i_23 + 1;
57 : if (i_18 <= 15) goto <L19>; else goto <L18>;
58 :
59 : <L19>:;
60 : goto <bb 1> (<L0>);
61 :
62 : <L18>:;
63 :
64 : OUTPUT
65 : ------
66 :
67 : # i_23 = PHI <0(0), i_18(10)>;
68 : <L0>:;
69 : j_15 = A[i_23];
70 :
71 : <L3>:;
72 : iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 : A[i_23] = iftmp.2_4;
74 : i_18 = i_23 + 1;
75 : if (i_18 <= 15) goto <L19>; else goto <L18>;
76 :
77 : <L19>:;
78 : goto <bb 1> (<L0>);
79 :
80 : <L18>:;
81 : */
82 :
83 : #include "config.h"
84 : #include "system.h"
85 : #include "coretypes.h"
86 : #include "backend.h"
87 : #include "rtl.h"
88 : #include "tree.h"
89 : #include "gimple.h"
90 : #include "cfghooks.h"
91 : #include "tree-pass.h"
92 : #include "ssa.h"
93 : #include "expmed.h"
94 : #include "expr.h"
95 : #include "optabs-tree.h"
96 : #include "gimple-pretty-print.h"
97 : #include "alias.h"
98 : #include "fold-const.h"
99 : #include "stor-layout.h"
100 : #include "gimple-iterator.h"
101 : #include "gimple-fold.h"
102 : #include "gimplify.h"
103 : #include "gimplify-me.h"
104 : #include "tree-cfg.h"
105 : #include "tree-into-ssa.h"
106 : #include "tree-ssa.h"
107 : #include "cfgloop.h"
108 : #include "tree-data-ref.h"
109 : #include "tree-scalar-evolution.h"
110 : #include "tree-ssa-loop.h"
111 : #include "tree-ssa-loop-niter.h"
112 : #include "tree-ssa-loop-ivopts.h"
113 : #include "tree-ssa-address.h"
114 : #include "dbgcnt.h"
115 : #include "tree-hash-traits.h"
116 : #include "varasm.h"
117 : #include "builtins.h"
118 : #include "cfganal.h"
119 : #include "internal-fn.h"
120 : #include "fold-const.h"
121 : #include "tree-ssa-sccvn.h"
122 : #include "tree-cfgcleanup.h"
123 : #include "tree-ssa-dse.h"
124 : #include "tree-vectorizer.h"
125 : #include "tree-eh.h"
126 : #include "cgraph.h"
127 :
128 : /* For lang_hooks.types.type_for_mode. */
129 : #include "langhooks.h"
130 :
131 : /* Only handle PHIs with no more arguments unless we are asked to by
132 : simd pragma. */
133 : #define MAX_PHI_ARG_NUM \
134 : ((unsigned) param_max_tree_if_conversion_phi_args)
135 :
136 : /* True if we've converted a statement that was only executed when some
137 : condition C was true, and if for correctness we need to predicate the
138 : statement to ensure that it is a no-op when C is false. See
139 : predicate_statements for the kinds of predication we support. */
140 : static bool need_to_predicate;
141 :
142 : /* True if we have to rewrite stmts that may invoke undefined behavior
143 : when a condition C was false so it doesn't if it is always executed.
144 : See predicate_statements for the kinds of predication we support. */
145 : static bool need_to_rewrite_undefined;
146 :
147 : /* Indicate if there are any complicated PHIs that need to be handled in
148 : if-conversion. Complicated PHI has more than two arguments and can't
149 : be degenerated to two arguments PHI. See more information in comment
150 : before phi_convertible_by_degenerating_args. */
151 : static bool any_complicated_phi;
152 :
153 : /* True if we have bitfield accesses we can lower. */
154 : static bool need_to_lower_bitfields;
155 :
156 : /* True if there is any ifcvting to be done. */
157 : static bool need_to_ifcvt;
158 :
159 : /* Hash for struct innermost_loop_behavior. It depends on the user to
160 : free the memory. */
161 :
162 : struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
163 : {
164 : static inline hashval_t hash (const value_type &);
165 : static inline bool equal (const value_type &,
166 : const compare_type &);
167 : };
168 :
169 : inline hashval_t
170 438534 : innermost_loop_behavior_hash::hash (const value_type &e)
171 : {
172 438534 : hashval_t hash;
173 :
174 438534 : hash = iterative_hash_expr (e->base_address, 0);
175 438534 : hash = iterative_hash_expr (e->offset, hash);
176 438534 : hash = iterative_hash_expr (e->init, hash);
177 438534 : return iterative_hash_expr (e->step, hash);
178 : }
179 :
180 : inline bool
181 312521 : innermost_loop_behavior_hash::equal (const value_type &e1,
182 : const compare_type &e2)
183 : {
184 312521 : if ((e1->base_address && !e2->base_address)
185 312521 : || (!e1->base_address && e2->base_address)
186 312521 : || (!e1->offset && e2->offset)
187 287106 : || (e1->offset && !e2->offset)
188 253366 : || (!e1->init && e2->init)
189 253366 : || (e1->init && !e2->init)
190 253366 : || (!e1->step && e2->step)
191 253366 : || (e1->step && !e2->step))
192 : return false;
193 :
194 253366 : if (e1->base_address && e2->base_address
195 506732 : && !operand_equal_p (e1->base_address, e2->base_address, 0))
196 : return false;
197 45402 : if (e1->offset && e2->offset
198 128423 : && !operand_equal_p (e1->offset, e2->offset, 0))
199 : return false;
200 45171 : if (e1->init && e2->init
201 127961 : && !operand_equal_p (e1->init, e2->init, 0))
202 : return false;
203 17336 : if (e1->step && e2->step
204 72291 : && !operand_equal_p (e1->step, e2->step, 0))
205 : return false;
206 :
207 : return true;
208 : }
209 :
210 : /* List of basic blocks in if-conversion-suitable order. */
211 : static basic_block *ifc_bbs;
212 :
213 : /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
214 : static hash_map<innermost_loop_behavior_hash,
215 : data_reference_p> *innermost_DR_map;
216 :
217 : /* Hash table to store <base reference, DR> pairs. */
218 : static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
219 :
220 : /* List of redundant SSA names: the first should be replaced by the second. */
221 : static vec< std::pair<tree, tree> > redundant_ssa_names;
222 :
223 : /* Structure used to predicate basic blocks. This is attached to the
224 : ->aux field of the BBs in the loop to be if-converted. */
225 : struct bb_predicate {
226 :
227 : /* The condition under which this basic block is executed. */
228 : tree predicate;
229 :
230 : /* PREDICATE is gimplified, and the sequence of statements is
231 : recorded here, in order to avoid the duplication of computations
232 : that occur in previous conditions. See PR44483. */
233 : gimple_seq predicate_gimplified_stmts;
234 :
235 : /* Records the number of statements recorded into
236 : PREDICATE_GIMPLIFIED_STMTS. */
237 : unsigned no_predicate_stmts;
238 : };
239 :
240 : /* Returns true when the basic block BB has a predicate. */
241 :
242 : static inline bool
243 1940694 : bb_has_predicate (basic_block bb)
244 : {
245 1940694 : return bb->aux != NULL;
246 : }
247 :
248 : /* Returns the gimplified predicate for basic block BB. */
249 :
250 : static inline tree
251 991592 : bb_predicate (basic_block bb)
252 : {
253 991592 : return ((struct bb_predicate *) bb->aux)->predicate;
254 : }
255 :
256 : /* Sets the gimplified predicate COND for basic block BB. */
257 :
258 : static inline void
259 531392 : set_bb_predicate (basic_block bb, tree cond)
260 : {
261 531392 : auto aux = (struct bb_predicate *) bb->aux;
262 531392 : gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
263 : && is_gimple_val (TREE_OPERAND (cond, 0)))
264 : || is_gimple_val (cond));
265 531392 : aux->predicate = cond;
266 531392 : aux->no_predicate_stmts++;
267 :
268 531392 : if (dump_file && (dump_flags & TDF_DETAILS))
269 222 : fprintf (dump_file, "Recording block %d value %d\n", bb->index,
270 : aux->no_predicate_stmts);
271 531392 : }
272 :
273 : /* Returns the sequence of statements of the gimplification of the
274 : predicate for basic block BB. */
275 :
276 : static inline gimple_seq
277 661496 : bb_predicate_gimplified_stmts (basic_block bb)
278 : {
279 661496 : return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
280 : }
281 :
282 : /* Sets the sequence of statements STMTS of the gimplification of the
283 : predicate for basic block BB. If PRESERVE_COUNTS then don't clear the predicate
284 : counts. */
285 :
286 : static inline void
287 311120 : set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts,
288 : bool preserve_counts)
289 : {
290 311120 : ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
291 311120 : if (stmts == NULL && !preserve_counts)
292 254010 : ((struct bb_predicate *) bb->aux)->no_predicate_stmts = 0;
293 91373 : }
294 :
295 : /* Adds the sequence of statements STMTS to the sequence of statements
296 : of the predicate for basic block BB. */
297 :
298 : static inline void
299 93564 : add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
300 : {
301 : /* We might have updated some stmts in STMTS via force_gimple_operand
302 : calling fold_stmt and that producing multiple stmts. Delink immediate
303 : uses so update_ssa after loop versioning doesn't get confused for
304 : the not yet inserted predicates.
305 : ??? This should go away once we reliably avoid updating stmts
306 : not in any BB. */
307 93564 : for (gimple_stmt_iterator gsi = gsi_start (stmts);
308 226932 : !gsi_end_p (gsi); gsi_next (&gsi))
309 : {
310 133368 : gimple *stmt = gsi_stmt (gsi);
311 133368 : delink_stmt_imm_use (stmt);
312 133368 : gimple_set_modified (stmt, true);
313 133368 : ((struct bb_predicate *) bb->aux)->no_predicate_stmts++;
314 : }
315 93564 : gimple_seq_add_seq_without_update
316 93564 : (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
317 93564 : }
318 :
319 : /* Return the number of statements the predicate of the basic block consists
320 : of. */
321 :
322 : static inline unsigned
323 16346 : get_bb_num_predicate_stmts (basic_block bb)
324 : {
325 16346 : return ((struct bb_predicate *) bb->aux)->no_predicate_stmts;
326 : }
327 :
328 : /* Initializes to TRUE the predicate of basic block BB. */
329 :
330 : static inline void
331 219747 : init_bb_predicate (basic_block bb)
332 : {
333 219747 : bb->aux = XNEW (struct bb_predicate);
334 219747 : set_bb_predicate_gimplified_stmts (bb, NULL, false);
335 219747 : set_bb_predicate (bb, boolean_true_node);
336 219747 : }
337 :
338 : /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
339 :
340 : static inline void
341 432073 : release_bb_predicate (basic_block bb)
342 : {
343 432073 : gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
344 432073 : if (stmts)
345 : {
346 : /* Ensure that these stmts haven't yet been added to a bb. */
347 34263 : if (flag_checking)
348 : for (gimple_stmt_iterator i = gsi_start (stmts);
349 94920 : !gsi_end_p (i); gsi_next (&i))
350 60657 : gcc_assert (! gimple_bb (gsi_stmt (i)));
351 :
352 : /* Discard them. */
353 34263 : gimple_seq_discard (stmts);
354 34263 : set_bb_predicate_gimplified_stmts (bb, NULL, false);
355 : }
356 432073 : }
357 :
358 : /* Free the predicate of basic block BB. */
359 :
360 : static inline void
361 1728368 : free_bb_predicate (basic_block bb)
362 : {
363 1728368 : if (!bb_has_predicate (bb))
364 : return;
365 :
366 219747 : release_bb_predicate (bb);
367 219747 : free (bb->aux);
368 219747 : bb->aux = NULL;
369 : }
370 :
371 : /* Reinitialize predicate of BB with the true predicate. */
372 :
373 : static inline void
374 212326 : reset_bb_predicate (basic_block bb)
375 : {
376 212326 : if (!bb_has_predicate (bb))
377 0 : init_bb_predicate (bb);
378 : else
379 : {
380 212326 : release_bb_predicate (bb);
381 212326 : set_bb_predicate (bb, boolean_true_node);
382 : }
383 212326 : }
384 :
385 : /* Returns a new SSA_NAME of type TYPE that is assigned the value of
386 : the expression EXPR. Inserts the statement created for this
387 : computation before GSI and leaves the iterator GSI at the same
388 : statement. */
389 :
390 : static tree
391 5668 : ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
392 : {
393 5668 : tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
394 5668 : gimple *stmt = gimple_build_assign (new_name, expr);
395 11336 : gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
396 5668 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
397 5668 : return new_name;
398 : }
399 :
400 : /* Return true when COND is a false predicate. */
401 :
402 : static inline bool
403 87960 : is_false_predicate (tree cond)
404 : {
405 87960 : return (cond != NULL_TREE
406 87960 : && (cond == boolean_false_node
407 87960 : || integer_zerop (cond)));
408 : }
409 :
410 : /* Return true when COND is a true predicate. */
411 :
412 : static inline bool
413 1132562 : is_true_predicate (tree cond)
414 : {
415 1132562 : return (cond == NULL_TREE
416 1132562 : || cond == boolean_true_node
417 1619613 : || integer_onep (cond));
418 : }
419 :
420 : /* Returns true when BB has a predicate that is not trivial: true or
421 : NULL_TREE. */
422 :
423 : static inline bool
424 553819 : is_predicated (basic_block bb)
425 : {
426 12450 : return !is_true_predicate (bb_predicate (bb));
427 : }
428 :
429 : /* Parses the predicate COND and returns its comparison code and
430 : operands OP0 and OP1. */
431 :
432 : static enum tree_code
433 466656 : parse_predicate (tree cond, tree *op0, tree *op1)
434 : {
435 466656 : gimple *s;
436 :
437 466656 : if (TREE_CODE (cond) == SSA_NAME
438 466656 : && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
439 : {
440 63410 : if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
441 : {
442 31925 : *op0 = gimple_assign_rhs1 (s);
443 31925 : *op1 = gimple_assign_rhs2 (s);
444 31925 : return gimple_assign_rhs_code (s);
445 : }
446 :
447 31485 : else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
448 : {
449 0 : tree op = gimple_assign_rhs1 (s);
450 0 : tree type = TREE_TYPE (op);
451 0 : enum tree_code code = parse_predicate (op, op0, op1);
452 :
453 0 : return code == ERROR_MARK ? ERROR_MARK
454 0 : : invert_tree_comparison (code, HONOR_NANS (type));
455 : }
456 :
457 : return ERROR_MARK;
458 : }
459 :
460 403246 : if (COMPARISON_CLASS_P (cond))
461 : {
462 471 : *op0 = TREE_OPERAND (cond, 0);
463 471 : *op1 = TREE_OPERAND (cond, 1);
464 471 : return TREE_CODE (cond);
465 : }
466 :
467 : return ERROR_MARK;
468 : }
469 :
470 : /* Returns the fold of predicate C1 OR C2 at location LOC. */
471 :
472 : static tree
473 233328 : fold_or_predicates (location_t loc, tree c1, tree c2)
474 : {
475 233328 : tree op1a, op1b, op2a, op2b;
476 233328 : enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
477 233328 : enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
478 :
479 233328 : if (code1 != ERROR_MARK && code2 != ERROR_MARK)
480 : {
481 2200 : tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
482 : code2, op2a, op2b);
483 2200 : if (t)
484 : return t;
485 : }
486 :
487 231223 : return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
488 : }
489 :
490 : /* Returns either a COND_EXPR or the folded expression if the folded
491 : expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
492 : a constant or a SSA_NAME. */
493 :
494 : static tree
495 54816 : fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
496 : {
497 : /* Short cut the case where both rhs and lhs are the same. */
498 54816 : if (operand_equal_p (rhs, lhs))
499 : return rhs;
500 :
501 : /* If COND is comparison r != 0 and r has boolean type, convert COND
502 : to SSA_NAME to accept by vect bool pattern. */
503 54816 : if (TREE_CODE (cond) == NE_EXPR)
504 : {
505 0 : tree op0 = TREE_OPERAND (cond, 0);
506 0 : tree op1 = TREE_OPERAND (cond, 1);
507 0 : if (TREE_CODE (op0) == SSA_NAME
508 0 : && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
509 0 : && (integer_zerop (op1)))
510 : cond = op0;
511 : }
512 :
513 54816 : gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
514 54816 : type, cond, rhs, lhs);
515 54816 : if (cexpr.resimplify (NULL, follow_all_ssa_edges))
516 : {
517 10616 : if (gimple_simplified_result_is_gimple_val (&cexpr))
518 553 : return cexpr.ops[0];
519 10063 : else if (cexpr.code == ABS_EXPR)
520 2 : return build1 (ABS_EXPR, type, cexpr.ops[0]);
521 10061 : else if (cexpr.code == MIN_EXPR
522 10061 : || cexpr.code == MAX_EXPR)
523 7303 : return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
524 : }
525 :
526 46958 : return build3 (COND_EXPR, type, cond, rhs, lhs);
527 : }
528 :
529 : /* Add condition NC to the predicate list of basic block BB. LOOP is
530 : the loop to be if-converted. Use predicate of cd-equivalent block
531 : for join bb if it exists: we call basic blocks bb1 and bb2
532 : cd-equivalent if they are executed under the same condition. */
533 :
534 : static inline void
535 172617 : add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
536 : {
537 172617 : tree bc, *tp;
538 172617 : basic_block dom_bb;
539 :
540 172617 : if (is_true_predicate (nc))
541 77159 : return;
542 :
543 : /* If dominance tells us this basic block is always executed,
544 : don't record any predicates for it. */
545 172615 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
546 : return;
547 :
548 99319 : dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
549 : /* We use notion of cd equivalence to get simpler predicate for
550 : join block, e.g. if join block has 2 predecessors with predicates
551 : p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
552 : p1 & p2 | p1 & !p2. */
553 99319 : if (dom_bb != loop->header
554 99319 : && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
555 : {
556 3861 : gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
557 3861 : bc = bb_predicate (dom_bb);
558 3861 : if (!is_true_predicate (bc))
559 3861 : set_bb_predicate (bb, bc);
560 : else
561 0 : gcc_assert (is_true_predicate (bb_predicate (bb)));
562 3861 : if (dump_file && (dump_flags & TDF_DETAILS))
563 4 : fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
564 : dom_bb->index, bb->index);
565 3861 : return;
566 : }
567 :
568 95458 : if (!is_predicated (bb))
569 91373 : bc = nc;
570 : else
571 : {
572 4085 : bc = bb_predicate (bb);
573 4085 : bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
574 4085 : if (is_true_predicate (bc))
575 : {
576 0 : reset_bb_predicate (bb);
577 0 : return;
578 : }
579 : }
580 :
581 : /* Allow a TRUTH_NOT_EXPR around the main predicate. */
582 95458 : if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
583 33134 : tp = &TREE_OPERAND (bc, 0);
584 : else
585 : tp = &bc;
586 95458 : if (!is_gimple_val (*tp))
587 : {
588 93564 : gimple_seq stmts;
589 93564 : *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
590 93564 : add_bb_predicate_gimplified_stmts (bb, stmts);
591 : }
592 95458 : set_bb_predicate (bb, bc);
593 : }
594 :
595 : /* Add the condition COND to the previous condition PREV_COND, and add
596 : this to the predicate list of the destination of edge E. LOOP is
597 : the loop to be if-converted. */
598 :
599 : static void
600 114058 : add_to_dst_predicate_list (class loop *loop, edge e,
601 : tree prev_cond, tree cond)
602 : {
603 114058 : if (!flow_bb_inside_loop_p (loop, e->dest))
604 : return;
605 :
606 114058 : if (!is_true_predicate (prev_cond))
607 22676 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
608 : prev_cond, cond);
609 :
610 114058 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
611 90685 : add_to_predicate_list (loop, e->dest, cond);
612 : }
613 :
614 : /* Return true if one of the successor edges of BB exits LOOP. */
615 :
616 : static bool
617 3405047 : bb_with_exit_edge_p (const class loop *loop, basic_block bb)
618 : {
619 3405047 : edge e;
620 3405047 : edge_iterator ei;
621 :
622 6894720 : FOR_EACH_EDGE (e, ei, bb->succs)
623 4846739 : if (loop_exit_edge_p (loop, e))
624 : return true;
625 :
626 : return false;
627 : }
628 :
629 : /* Given PHI which has more than two arguments, this function checks if
630 : it's if-convertible by degenerating its arguments. Specifically, if
631 : below two conditions are satisfied:
632 :
633 : 1) Number of PHI arguments with different values equals to 2 and one
634 : argument has the only occurrence.
635 : 2) The edge corresponding to the unique argument isn't critical edge.
636 :
637 : Such PHI can be handled as PHIs have only two arguments. For example,
638 : below PHI:
639 :
640 : res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
641 :
642 : can be transformed into:
643 :
644 : res = (predicate of e3) ? A_2 : A_1;
645 :
646 : Return TRUE if it is the case, FALSE otherwise. */
647 :
648 : static bool
649 5551 : phi_convertible_by_degenerating_args (gphi *phi)
650 : {
651 5551 : edge e;
652 5551 : tree arg, t1 = NULL, t2 = NULL;
653 5551 : unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
654 5551 : unsigned int num_args = gimple_phi_num_args (phi);
655 :
656 5551 : gcc_assert (num_args > 2);
657 :
658 20177 : for (i = 0; i < num_args; i++)
659 : {
660 16849 : arg = gimple_phi_arg_def (phi, i);
661 16849 : if (t1 == NULL || operand_equal_p (t1, arg, 0))
662 : {
663 7603 : n1++;
664 7603 : i1 = i;
665 7603 : t1 = arg;
666 : }
667 9246 : else if (t2 == NULL || operand_equal_p (t2, arg, 0))
668 : {
669 7023 : n2++;
670 7023 : i2 = i;
671 7023 : t2 = arg;
672 : }
673 : else
674 : return false;
675 : }
676 :
677 3328 : if (n1 != 1 && n2 != 1)
678 : return false;
679 :
680 : /* Check if the edge corresponding to the unique arg is critical. */
681 3260 : e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
682 3260 : if (EDGE_COUNT (e->src->succs) > 1)
683 : return false;
684 :
685 : return true;
686 : }
687 :
688 : /* Return true when PHI is if-convertible. PHI is part of loop LOOP
689 : and it belongs to basic block BB. Note at this point, it is sure
690 : that PHI is if-convertible. This function updates global variable
691 : ANY_COMPLICATED_PHI if PHI is complicated. */
692 :
693 : static bool
694 138997 : if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
695 : {
696 138997 : if (dump_file && (dump_flags & TDF_DETAILS))
697 : {
698 67 : fprintf (dump_file, "-------------------------\n");
699 67 : print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
700 : }
701 :
702 138997 : if (bb != loop->header
703 54942 : && gimple_phi_num_args (phi) > 2
704 144548 : && !phi_convertible_by_degenerating_args (phi))
705 2291 : any_complicated_phi = true;
706 :
707 138997 : return true;
708 : }
709 :
710 : /* Records the status of a data reference. This struct is attached to
711 : each DR->aux field. */
712 :
713 : struct ifc_dr {
714 : bool rw_unconditionally;
715 : bool w_unconditionally;
716 : bool written_at_least_once;
717 :
718 : tree rw_predicate;
719 : tree w_predicate;
720 : tree base_w_predicate;
721 : };
722 :
723 : #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
724 : #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
725 : #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
726 : #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
727 :
728 : /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
729 : HASH tables. While storing them in HASH table, it checks if the
730 : reference is unconditionally read or written and stores that as a flag
731 : information. For base reference it checks if it is written atlest once
732 : unconditionally and stores it as flag information along with DR.
733 : In other words for every data reference A in STMT there exist other
734 : accesses to a data reference with the same base with predicates that
735 : add up (OR-up) to the true predicate: this ensures that the data
736 : reference A is touched (read or written) on every iteration of the
737 : if-converted loop. */
738 : static void
739 138321 : hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
740 : {
741 :
742 138321 : data_reference_p *master_dr, *base_master_dr;
743 138321 : tree base_ref = DR_BASE_OBJECT (a);
744 138321 : innermost_loop_behavior *innermost = &DR_INNERMOST (a);
745 138321 : tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
746 138321 : bool exist1, exist2;
747 :
748 138321 : master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
749 138321 : if (!exist1)
750 103671 : *master_dr = a;
751 :
752 138321 : if (DR_IS_WRITE (a))
753 : {
754 45425 : IFC_DR (*master_dr)->w_predicate
755 90850 : = fold_or_predicates (UNKNOWN_LOCATION, ca,
756 45425 : IFC_DR (*master_dr)->w_predicate);
757 45425 : if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
758 29558 : DR_W_UNCONDITIONALLY (*master_dr) = true;
759 : }
760 138321 : IFC_DR (*master_dr)->rw_predicate
761 276642 : = fold_or_predicates (UNKNOWN_LOCATION, ca,
762 138321 : IFC_DR (*master_dr)->rw_predicate);
763 138321 : if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
764 102653 : DR_RW_UNCONDITIONALLY (*master_dr) = true;
765 :
766 138321 : if (DR_IS_WRITE (a))
767 : {
768 45425 : base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
769 45425 : if (!exist2)
770 35292 : *base_master_dr = a;
771 45425 : IFC_DR (*base_master_dr)->base_w_predicate
772 90850 : = fold_or_predicates (UNKNOWN_LOCATION, ca,
773 45425 : IFC_DR (*base_master_dr)->base_w_predicate);
774 45425 : if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
775 29792 : DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
776 : }
777 138321 : }
778 :
779 : /* Return TRUE if can prove the index IDX of an array reference REF is
780 : within array bound. Return false otherwise. */
781 :
782 : static bool
783 248314 : idx_within_array_bound (tree ref, tree *idx, void *dta)
784 : {
785 248314 : wi::overflow_type overflow;
786 248314 : widest_int niter, valid_niter, delta, wi_step;
787 248314 : tree ev, init, step;
788 248314 : tree low, high;
789 248314 : class loop *loop = (class loop*) dta;
790 :
791 : /* Only support within-bound access for array references. */
792 248314 : if (TREE_CODE (ref) != ARRAY_REF)
793 : return false;
794 :
795 : /* For arrays that might have flexible sizes, it is not guaranteed that they
796 : do not extend over their declared size. */
797 143256 : if (array_ref_flexible_size_p (ref))
798 : return false;
799 :
800 90872 : ev = analyze_scalar_evolution (loop, *idx);
801 90872 : ev = instantiate_parameters (loop, ev);
802 90872 : init = initial_condition (ev);
803 90872 : step = evolution_part_in_loop_num (ev, loop->num);
804 :
805 90872 : if (!init || TREE_CODE (init) != INTEGER_CST
806 80384 : || (step && TREE_CODE (step) != INTEGER_CST))
807 : return false;
808 :
809 80378 : low = array_ref_low_bound (ref);
810 80378 : high = array_ref_up_bound (ref);
811 :
812 : /* The case of nonconstant bounds could be handled, but it would be
813 : complicated. */
814 80378 : if (TREE_CODE (low) != INTEGER_CST
815 80378 : || !high || TREE_CODE (high) != INTEGER_CST)
816 : return false;
817 :
818 : /* Check if the initial idx is within bound. */
819 80310 : if (wi::to_widest (init) < wi::to_widest (low)
820 160612 : || wi::to_widest (init) > wi::to_widest (high))
821 15 : return false;
822 :
823 : /* The idx is always within bound. */
824 80295 : if (!step || integer_zerop (step))
825 1980 : return true;
826 :
827 78315 : if (!max_loop_iterations (loop, &niter))
828 : return false;
829 :
830 78315 : if (wi::to_widest (step) < 0)
831 : {
832 303 : delta = wi::to_widest (init) - wi::to_widest (low);
833 303 : wi_step = -wi::to_widest (step);
834 : }
835 : else
836 : {
837 78012 : delta = wi::to_widest (high) - wi::to_widest (init);
838 78012 : wi_step = wi::to_widest (step);
839 : }
840 :
841 78315 : valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
842 : /* The iteration space of idx is within array bound. */
843 156630 : if (!overflow && niter <= valid_niter)
844 : return true;
845 :
846 : return false;
847 248314 : }
848 :
849 : /* Return TRUE if ref is a within bound array reference. */
850 :
851 : bool
852 242599 : ref_within_array_bound (gimple *stmt, tree ref)
853 : {
854 242599 : class loop *loop = loop_containing_stmt (stmt);
855 :
856 242599 : gcc_assert (loop != NULL);
857 242599 : return for_each_index (&ref, idx_within_array_bound, loop);
858 : }
859 :
860 :
861 : /* Given a memory reference expression T, return TRUE if base object
862 : it refers to is writable. The base object of a memory reference
863 : is the main object being referenced, which is returned by function
864 : get_base_address. */
865 :
866 : static bool
867 2436 : base_object_writable (tree ref)
868 : {
869 2436 : tree base_tree = get_base_address (ref);
870 :
871 2436 : return (base_tree
872 2436 : && DECL_P (base_tree)
873 1581 : && decl_binds_to_current_def_p (base_tree)
874 4013 : && !TREE_READONLY (base_tree));
875 : }
876 :
877 : /* Return true when the memory references of STMT won't trap in the
878 : if-converted code. There are two things that we have to check for:
879 :
880 : - writes to memory occur to writable memory: if-conversion of
881 : memory writes transforms the conditional memory writes into
882 : unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
883 : into "A[i] = cond ? foo : A[i]", and as the write to memory may not
884 : be executed at all in the original code, it may be a readonly
885 : memory. To check that A is not const-qualified, we check that
886 : there exists at least an unconditional write to A in the current
887 : function.
888 :
889 : - reads or writes to memory are valid memory accesses for every
890 : iteration. To check that the memory accesses are correctly formed
891 : and that we are allowed to read and write in these locations, we
892 : check that the memory accesses to be if-converted occur at every
893 : iteration unconditionally.
894 :
895 : Returns true for the memory reference in STMT, same memory reference
896 : is read or written unconditionally at least once and the base memory
897 : reference is written unconditionally once. This is to check reference
898 : will not write fault. Also returns true if the memory reference is
899 : unconditionally read once then we are conditionally writing to memory
900 : which is defined as read and write and is bound to the definition
901 : we are seeing. */
902 : static bool
903 20129 : ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
904 : {
905 : /* If DR didn't see a reference here we can't use it to tell
906 : whether the ref traps or not. */
907 20129 : if (gimple_uid (stmt) == 0)
908 : return false;
909 :
910 20128 : data_reference_p *master_dr, *base_master_dr;
911 20128 : data_reference_p a = drs[gimple_uid (stmt) - 1];
912 :
913 20128 : tree base = DR_BASE_OBJECT (a);
914 20128 : innermost_loop_behavior *innermost = &DR_INNERMOST (a);
915 :
916 20128 : gcc_assert (DR_STMT (a) == stmt);
917 20128 : gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
918 : || DR_INIT (a) || DR_STEP (a));
919 :
920 20128 : master_dr = innermost_DR_map->get (innermost);
921 20128 : gcc_assert (master_dr != NULL);
922 :
923 20128 : base_master_dr = baseref_DR_map->get (base);
924 :
925 : /* If a is unconditionally written to it doesn't trap. */
926 20128 : if (DR_W_UNCONDITIONALLY (*master_dr))
927 : return true;
928 :
929 : /* If a is unconditionally accessed then ...
930 :
931 : Even a is conditional access, we can treat it as an unconditional
932 : one if it's an array reference and all its index are within array
933 : bound. */
934 18269 : if (DR_RW_UNCONDITIONALLY (*master_dr)
935 18269 : || ref_within_array_bound (stmt, DR_REF (a)))
936 : {
937 : /* an unconditional read won't trap. */
938 6641 : if (DR_IS_READ (a))
939 : return true;
940 :
941 : /* an unconditionally write won't trap if the base is written
942 : to unconditionally. */
943 2473 : if ((base_master_dr
944 2473 : && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
945 : /* or the base is known to be not readonly. */
946 4909 : || base_object_writable (DR_REF (a)))
947 1614 : return !ref_can_have_store_data_races (base);
948 : }
949 :
950 : return false;
951 : }
952 :
953 : /* Return true if STMT could be converted into a masked load or store
954 : (conditional load or store based on a mask computed from bb predicate). */
955 :
956 : static bool
957 11928 : ifcvt_can_use_mask_load_store (gimple *stmt)
958 : {
959 : /* Check whether this is a load or store. */
960 11928 : tree lhs = gimple_assign_lhs (stmt);
961 11928 : bool is_load;
962 11928 : tree ref;
963 11928 : if (gimple_store_p (stmt))
964 : {
965 2696 : if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
966 : return false;
967 : is_load = false;
968 : ref = lhs;
969 : }
970 9232 : else if (gimple_assign_load_p (stmt))
971 : {
972 9231 : is_load = true;
973 9231 : ref = gimple_assign_rhs1 (stmt);
974 : }
975 : else
976 : return false;
977 :
978 11927 : if (may_be_nonaddressable_p (ref))
979 : return false;
980 :
981 : /* Mask should be integer mode of the same size as the load/store
982 : mode. */
983 11868 : machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
984 11868 : if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
985 31 : return false;
986 :
987 11837 : if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
988 : return true;
989 :
990 : return false;
991 : }
992 :
993 : /* Return true if STMT could be converted from an operation that is
994 : unconditional to one that is conditional on a bb predicate mask. */
995 :
996 : static bool
997 13914 : ifcvt_can_predicate (gimple *stmt)
998 : {
999 13914 : basic_block bb = gimple_bb (stmt);
1000 :
1001 320 : if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
1002 13914 : || bb->loop_father->dont_vectorize
1003 27828 : || gimple_has_volatile_ops (stmt))
1004 : return false;
1005 :
1006 13914 : if (gimple_assign_single_p (stmt))
1007 11928 : return ifcvt_can_use_mask_load_store (stmt);
1008 :
1009 1986 : tree callee;
1010 1986 : if (gimple_call_builtin_p (stmt))
1011 155 : if ((callee = gimple_call_fndecl (stmt))
1012 155 : && fndecl_built_in_p (callee, BUILT_IN_NORMAL))
1013 : {
1014 149 : auto ifn = associated_internal_fn (callee);
1015 149 : auto cond_ifn = get_conditional_internal_fn (ifn);
1016 149 : tree type = TREE_TYPE (gimple_call_fntype (stmt));
1017 149 : return (cond_ifn != IFN_LAST
1018 149 : && vectorized_internal_fn_supported_p (cond_ifn, type));
1019 : }
1020 :
1021 1837 : if (!is_gimple_assign (stmt))
1022 : return false;
1023 :
1024 1831 : tree_code code = gimple_assign_rhs_code (stmt);
1025 1831 : tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
1026 1831 : tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
1027 1831 : if (!types_compatible_p (lhs_type, rhs_type))
1028 : return false;
1029 1101 : internal_fn cond_fn = get_conditional_internal_fn (code);
1030 1101 : return (cond_fn != IFN_LAST
1031 1101 : && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
1032 : }
1033 :
1034 : /* Return true when STMT is if-convertible.
1035 :
1036 : GIMPLE_ASSIGN statement is not if-convertible if,
1037 : - it is not movable,
1038 : - it could trap,
1039 : - LHS is not var decl. */
1040 :
1041 : static bool
1042 76516 : if_convertible_gimple_assign_stmt_p (gimple *stmt,
1043 : vec<data_reference_p> refs)
1044 : {
1045 76516 : tree lhs = gimple_assign_lhs (stmt);
1046 :
1047 76516 : if (dump_file && (dump_flags & TDF_DETAILS))
1048 : {
1049 26 : fprintf (dump_file, "-------------------------\n");
1050 26 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1051 : }
1052 :
1053 76516 : if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1054 : return false;
1055 :
1056 : /* Some of these constrains might be too conservative. */
1057 76237 : if (stmt_ends_bb_p (stmt)
1058 76237 : || gimple_has_volatile_ops (stmt)
1059 76162 : || (TREE_CODE (lhs) == SSA_NAME
1060 71830 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1061 152399 : || gimple_has_side_effects (stmt))
1062 : {
1063 75 : if (dump_file && (dump_flags & TDF_DETAILS))
1064 0 : fprintf (dump_file, "stmt not suitable for ifcvt\n");
1065 75 : return false;
1066 : }
1067 :
1068 : /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1069 : in between if_convertible_loop_p and combine_blocks
1070 : we can perform loop versioning. */
1071 76162 : gimple_set_plf (stmt, GF_PLF_2, false);
1072 :
1073 76162 : if ((! gimple_vuse (stmt)
1074 20129 : || gimple_could_trap_p_1 (stmt, false, false)
1075 20129 : || ! ifcvt_memrefs_wont_trap (stmt, refs))
1076 89086 : && gimple_could_trap_p (stmt))
1077 : {
1078 13759 : if (ifcvt_can_predicate (stmt))
1079 : {
1080 2776 : gimple_set_plf (stmt, GF_PLF_2, true);
1081 2776 : need_to_predicate = true;
1082 2776 : return true;
1083 : }
1084 10983 : if (dump_file && (dump_flags & TDF_DETAILS))
1085 0 : fprintf (dump_file, "tree could trap...\n");
1086 10983 : return false;
1087 : }
1088 62403 : else if (gimple_needing_rewrite_undefined (stmt))
1089 : /* We have to rewrite stmts with undefined overflow. */
1090 27855 : need_to_rewrite_undefined = true;
1091 :
1092 : /* When if-converting stores force versioning, likewise if we
1093 : ended up generating store data races. */
1094 124806 : if (gimple_vdef (stmt))
1095 1636 : need_to_predicate = true;
1096 :
1097 : return true;
1098 : }
1099 :
1100 : /* Return true when SW switch statement is equivalent to cond, that
1101 : all non default labels point to the same label.
1102 :
1103 : Fallthrough is not checked for and could even happen
1104 : with cond (using goto), so is handled.
1105 :
1106 : This is intended for switches created by the if-switch-conversion
1107 : pass, but can handle some programmer supplied cases too. */
1108 :
1109 : static bool
1110 14 : if_convertible_switch_p (gswitch *sw)
1111 : {
1112 14 : if (gimple_switch_num_labels (sw) <= 1)
1113 : return false;
1114 14 : tree label = CASE_LABEL (gimple_switch_label (sw, 1));
1115 43 : for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++)
1116 : {
1117 29 : if (CASE_LABEL (gimple_switch_label (sw, i)) != label)
1118 : return false;
1119 : }
1120 : return true;
1121 : }
1122 :
1123 : /* Return true when STMT is an if-convertible SIMD clone stmts.
1124 :
1125 : A SIMD clone statement is if-convertible if:
1126 : - it is an GIMPLE_CALL,
1127 : - it has a FNDECL,
1128 : - it has SIMD clones,
1129 : - it has at least one inbranch clone. */
1130 : static bool
1131 1593 : if_convertible_simdclone_stmt_p (gimple *stmt)
1132 : {
1133 1593 : if (!is_gimple_call (stmt))
1134 : return false;
1135 :
1136 1593 : tree fndecl = gimple_call_fndecl (stmt);
1137 1593 : if (fndecl)
1138 : {
1139 : /* We can vectorize some builtins and functions with SIMD "inbranch"
1140 : clones. */
1141 1325 : struct cgraph_node *node = cgraph_node::get (fndecl);
1142 1325 : if (node && node->simd_clones != NULL)
1143 : /* Ensure that at least one clone can be "inbranch". */
1144 1962 : for (struct cgraph_node *n = node->simd_clones; n != NULL;
1145 961 : n = n->simdclone->next_clone)
1146 1960 : if (n->simdclone->inbranch)
1147 : return true;
1148 : }
1149 :
1150 : return false;
1151 : }
1152 :
1153 : /* Return true when STMT is if-convertible.
1154 :
1155 : A statement is if-convertible if:
1156 : - it is an if-convertible GIMPLE_ASSIGN,
1157 : - it is a GIMPLE_LABEL or a GIMPLE_COND,
1158 : - it is a switch equivalent to COND
1159 : - it is builtins call,
1160 : - it is a call to a function with a SIMD clone. */
1161 :
1162 : static bool
1163 173633 : if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1164 : {
1165 173633 : switch (gimple_code (stmt))
1166 : {
1167 : case GIMPLE_LABEL:
1168 : case GIMPLE_DEBUG:
1169 : case GIMPLE_COND:
1170 : return true;
1171 :
1172 14 : case GIMPLE_SWITCH:
1173 14 : return if_convertible_switch_p (as_a <gswitch *> (stmt));
1174 :
1175 76516 : case GIMPLE_ASSIGN:
1176 76516 : return if_convertible_gimple_assign_stmt_p (stmt, refs);
1177 :
1178 1485 : case GIMPLE_CALL:
1179 1485 : {
1180 : /* Check if stmt is a simd clone first. */
1181 1485 : if (if_convertible_simdclone_stmt_p (stmt))
1182 : {
1183 999 : gimple_set_plf (stmt, GF_PLF_2, true);
1184 999 : need_to_predicate = true;
1185 999 : return true;
1186 : }
1187 :
1188 : /* Check if the call can trap and if so require predication. */
1189 486 : if (gimple_could_trap_p (stmt))
1190 : {
1191 155 : if (ifcvt_can_predicate (stmt))
1192 : {
1193 108 : gimple_set_plf (stmt, GF_PLF_2, true);
1194 108 : need_to_predicate = true;
1195 108 : return true;
1196 : }
1197 : else
1198 : {
1199 47 : if (dump_file && (dump_flags & TDF_DETAILS))
1200 0 : fprintf (dump_file, "stmt could trap...\n");
1201 47 : return false;
1202 : }
1203 : }
1204 :
1205 : /* Check if it's a prefetch. Many ISAs contain vectorized and/or
1206 : conditional prefetches so if-convert should convert them or remove
1207 : them. Mark them as supported. */
1208 331 : if (gimple_call_builtin_p (stmt, BUILT_IN_PREFETCH))
1209 : return true;
1210 :
1211 : /* There are some IFN_s that are used to replace builtins but have the
1212 : same semantics. Even if MASK_CALL cannot handle them vectorable_call
1213 : will insert the proper selection, so do not block conversion. */
1214 329 : int flags = gimple_call_flags (stmt);
1215 329 : if ((flags & ECF_CONST)
1216 329 : && !(flags & ECF_LOOPING_CONST_OR_PURE)
1217 658 : && gimple_call_combined_fn (stmt) != CFN_LAST)
1218 : return true;
1219 :
1220 : return false;
1221 : }
1222 :
1223 0 : default:
1224 : /* Don't know what to do with 'em so don't do anything. */
1225 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1226 : {
1227 0 : fprintf (dump_file, "don't know what to do\n");
1228 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1229 : }
1230 : return false;
1231 : }
1232 : }
1233 :
1234 : /* Assumes that BB has more than 1 predecessors.
1235 : Returns false if at least one successor is not on critical edge
1236 : and true otherwise. */
1237 :
1238 : static inline bool
1239 81035 : all_preds_critical_p (basic_block bb)
1240 : {
1241 81035 : edge e;
1242 81035 : edge_iterator ei;
1243 :
1244 158052 : FOR_EACH_EDGE (e, ei, bb->preds)
1245 140453 : if (EDGE_COUNT (e->src->succs) == 1)
1246 : return false;
1247 : return true;
1248 : }
1249 :
1250 : /* Return true when BB is if-convertible. This routine does not check
1251 : basic block's statements and phis.
1252 :
1253 : A basic block is not if-convertible if:
1254 : - it is non-empty and it is after the exit block (in BFS order),
1255 : - it is after the exit block but before the latch,
1256 : - its edges are not normal.
1257 :
1258 : EXIT_BB is the basic block containing the exit of the LOOP. BB is
1259 : inside LOOP. */
1260 :
1261 : static bool
1262 229303 : if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1263 : {
1264 229303 : edge e;
1265 229303 : edge_iterator ei;
1266 :
1267 229303 : if (dump_file && (dump_flags & TDF_DETAILS))
1268 86 : fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1269 :
1270 229303 : if (EDGE_COUNT (bb->succs) > 2)
1271 : return false;
1272 :
1273 458478 : if (gcall *call = safe_dyn_cast <gcall *> (*gsi_last_bb (bb)))
1274 190 : if (gimple_call_ctrl_altering_p (call))
1275 : return false;
1276 :
1277 229239 : if (exit_bb)
1278 : {
1279 42370 : if (bb != loop->latch)
1280 : {
1281 528 : if (dump_file && (dump_flags & TDF_DETAILS))
1282 0 : fprintf (dump_file, "basic block after exit bb but before latch\n");
1283 528 : return false;
1284 : }
1285 41842 : else if (!empty_block_p (bb))
1286 : {
1287 1438 : if (dump_file && (dump_flags & TDF_DETAILS))
1288 0 : fprintf (dump_file, "non empty basic block after exit bb\n");
1289 1438 : return false;
1290 : }
1291 40404 : else if (bb == loop->latch
1292 40404 : && bb != exit_bb
1293 80808 : && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1294 : {
1295 11 : if (dump_file && (dump_flags & TDF_DETAILS))
1296 0 : fprintf (dump_file, "latch is not dominated by exit_block\n");
1297 11 : return false;
1298 : }
1299 : }
1300 :
1301 : /* Be less adventurous and handle only normal edges. */
1302 556335 : FOR_EACH_EDGE (e, ei, bb->succs)
1303 329083 : if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1304 : {
1305 10 : if (dump_file && (dump_flags & TDF_DETAILS))
1306 0 : fprintf (dump_file, "Difficult to handle edges\n");
1307 10 : return false;
1308 : }
1309 :
1310 : return true;
1311 : }
1312 :
1313 : /* Return true when all predecessor blocks of BB are visited. The
1314 : VISITED bitmap keeps track of the visited blocks. */
1315 :
1316 : static bool
1317 2264357 : pred_blocks_visited_p (basic_block bb, bitmap *visited)
1318 : {
1319 2264357 : edge e;
1320 2264357 : edge_iterator ei;
1321 3909113 : FOR_EACH_EDGE (e, ei, bb->preds)
1322 2567683 : if (!bitmap_bit_p (*visited, e->src->index))
1323 : return false;
1324 :
1325 : return true;
1326 : }
1327 :
1328 : /* Get body of a LOOP in suitable order for if-conversion. It is
1329 : caller's responsibility to deallocate basic block list.
1330 : If-conversion suitable order is, breadth first sort (BFS) order
1331 : with an additional constraint: select a block only if all its
1332 : predecessors are already selected. */
1333 :
1334 : static basic_block *
1335 399544 : get_loop_body_in_if_conv_order (const class loop *loop)
1336 : {
1337 399544 : basic_block *blocks, *blocks_in_bfs_order;
1338 399544 : basic_block bb;
1339 399544 : bitmap visited;
1340 399544 : unsigned int index = 0;
1341 399544 : unsigned int visited_count = 0;
1342 :
1343 399544 : gcc_assert (loop->num_nodes);
1344 399544 : gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1345 :
1346 399544 : blocks = XCNEWVEC (basic_block, loop->num_nodes);
1347 399544 : visited = BITMAP_ALLOC (NULL);
1348 :
1349 399544 : blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1350 :
1351 399544 : index = 0;
1352 3932307 : while (index < loop->num_nodes)
1353 : {
1354 3133271 : bb = blocks_in_bfs_order [index];
1355 :
1356 3133271 : if (bb->flags & BB_IRREDUCIBLE_LOOP)
1357 : {
1358 52 : free (blocks_in_bfs_order);
1359 52 : BITMAP_FREE (visited);
1360 52 : free (blocks);
1361 52 : return NULL;
1362 : }
1363 :
1364 3133219 : if (!bitmap_bit_p (visited, bb->index))
1365 : {
1366 2264357 : if (pred_blocks_visited_p (bb, &visited)
1367 2264357 : || bb == loop->header)
1368 : {
1369 : /* This block is now visited. */
1370 1740974 : bitmap_set_bit (visited, bb->index);
1371 1740974 : blocks[visited_count++] = bb;
1372 : }
1373 : }
1374 :
1375 3133219 : index++;
1376 :
1377 3133219 : if (index == loop->num_nodes
1378 459352 : && visited_count != loop->num_nodes)
1379 : /* Not done yet. */
1380 3133219 : index = 0;
1381 : }
1382 399492 : free (blocks_in_bfs_order);
1383 399492 : BITMAP_FREE (visited);
1384 :
1385 : /* Go through loop and reject if-conversion or lowering of bitfields if we
1386 : encounter statements we do not believe the vectorizer will be able to
1387 : handle. If adding a new type of statement here, make sure
1388 : 'ifcvt_local_dce' is also able to handle it properly. */
1389 2129177 : for (index = 0; index < loop->num_nodes; index++)
1390 : {
1391 1732217 : basic_block bb = blocks[index];
1392 1732217 : gimple_stmt_iterator gsi;
1393 :
1394 1732217 : bool may_have_nonlocal_labels
1395 1732217 : = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1396 15684712 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1397 12222810 : switch (gimple_code (gsi_stmt (gsi)))
1398 : {
1399 40409 : case GIMPLE_LABEL:
1400 40409 : if (!may_have_nonlocal_labels)
1401 : {
1402 6836 : tree label
1403 6836 : = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1404 13672 : if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1405 : {
1406 40 : free (blocks);
1407 40 : return NULL;
1408 : }
1409 : }
1410 : /* Fallthru. */
1411 12220278 : case GIMPLE_ASSIGN:
1412 12220278 : case GIMPLE_CALL:
1413 12220278 : case GIMPLE_DEBUG:
1414 12220278 : case GIMPLE_COND:
1415 12220278 : case GIMPLE_SWITCH:
1416 12220278 : gimple_set_uid (gsi_stmt (gsi), 0);
1417 12220278 : break;
1418 2492 : default:
1419 2492 : free (blocks);
1420 2492 : return NULL;
1421 : }
1422 : }
1423 : return blocks;
1424 : }
1425 :
1426 : /* Returns true when the analysis of the predicates for all the basic
1427 : blocks in LOOP succeeded.
1428 :
1429 : predicate_bbs first allocates the predicates of the basic blocks.
1430 : These fields are then initialized with the tree expressions
1431 : representing the predicates under which a basic block is executed
1432 : in the LOOP. As the loop->header is executed at each iteration, it
1433 : has the "true" predicate. Other statements executed under a
1434 : condition are predicated with that condition, for example
1435 :
1436 : | if (x)
1437 : | S1;
1438 : | else
1439 : | S2;
1440 :
1441 : S1 will be predicated with "x", and
1442 : S2 will be predicated with "!x". */
1443 :
1444 : static void
1445 40393 : predicate_bbs (loop_p loop)
1446 : {
1447 40393 : unsigned int i;
1448 :
1449 260140 : for (i = 0; i < loop->num_nodes; i++)
1450 219747 : init_bb_predicate (ifc_bbs[i]);
1451 :
1452 260140 : for (i = 0; i < loop->num_nodes; i++)
1453 : {
1454 219747 : basic_block bb = ifc_bbs[i];
1455 219747 : tree cond;
1456 :
1457 : /* The loop latch and loop exit block are always executed and
1458 : have no extra conditions to be processed: skip them. */
1459 300533 : if (bb == loop->latch
1460 219747 : || bb_with_exit_edge_p (loop, bb))
1461 : {
1462 80786 : reset_bb_predicate (bb);
1463 80786 : continue;
1464 : }
1465 :
1466 138961 : cond = bb_predicate (bb);
1467 277922 : if (gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb)))
1468 : {
1469 56958 : tree c2;
1470 56958 : edge true_edge, false_edge;
1471 56958 : location_t loc = gimple_location (stmt);
1472 56958 : tree c;
1473 : /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1474 : conditions can remain unfolded because of multiple uses so
1475 : try to re-fold here, especially to get precision changing
1476 : conversions sorted out. Do not simply fold the stmt since
1477 : this is analysis only. When conditions were embedded in
1478 : COND_EXPRs those were folded separately before folding the
1479 : COND_EXPR but as they are now outside we have to make sure
1480 : to fold them. Do it here - another opportunity would be to
1481 : fold predicates as they are inserted. */
1482 56958 : gimple_match_op cexpr (gimple_match_cond::UNCOND,
1483 56958 : gimple_cond_code (stmt),
1484 : boolean_type_node,
1485 : gimple_cond_lhs (stmt),
1486 56958 : gimple_cond_rhs (stmt));
1487 56958 : if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1488 3985 : && cexpr.code.is_tree_code ()
1489 60943 : && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1490 577 : c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1491 : cexpr.ops[0], cexpr.ops[1]);
1492 : else
1493 56381 : c = build2_loc (loc, gimple_cond_code (stmt),
1494 : boolean_type_node,
1495 : gimple_cond_lhs (stmt),
1496 : gimple_cond_rhs (stmt));
1497 :
1498 : /* Add new condition into destination's predicate list. */
1499 56958 : extract_true_false_edges_from_block (gimple_bb (stmt),
1500 : &true_edge, &false_edge);
1501 :
1502 : /* If C is true, then TRUE_EDGE is taken. */
1503 56958 : add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1504 : unshare_expr (c));
1505 :
1506 : /* If C is false, then FALSE_EDGE is taken. */
1507 56958 : c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1508 : unshare_expr (c));
1509 56958 : add_to_dst_predicate_list (loop, false_edge,
1510 : unshare_expr (cond), c2);
1511 :
1512 56958 : cond = NULL_TREE;
1513 : }
1514 :
1515 : /* Assumes the limited COND like switches checked for earlier. */
1516 82003 : else if (gswitch *sw = safe_dyn_cast <gswitch *> (*gsi_last_bb (bb)))
1517 : {
1518 71 : location_t loc = gimple_location (*gsi_last_bb (bb));
1519 :
1520 71 : tree default_label = CASE_LABEL (gimple_switch_default_label (sw));
1521 71 : tree cond_label = CASE_LABEL (gimple_switch_label (sw, 1));
1522 :
1523 71 : edge false_edge = find_edge (bb, label_to_block (cfun, default_label));
1524 71 : edge true_edge = find_edge (bb, label_to_block (cfun, cond_label));
1525 :
1526 : /* Create chain of switch tests for each case. */
1527 71 : tree switch_cond = NULL_TREE;
1528 71 : tree index = gimple_switch_index (sw);
1529 272 : for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++)
1530 : {
1531 201 : tree label = gimple_switch_label (sw, i);
1532 201 : tree case_cond;
1533 201 : if (CASE_HIGH (label))
1534 : {
1535 7 : tree low = build2_loc (loc, GE_EXPR,
1536 : boolean_type_node,
1537 7 : index, fold_convert_loc (loc, TREE_TYPE (index),
1538 7 : CASE_LOW (label)));
1539 14 : tree high = build2_loc (loc, LE_EXPR,
1540 : boolean_type_node,
1541 7 : index, fold_convert_loc (loc, TREE_TYPE (index),
1542 7 : CASE_HIGH (label)));
1543 7 : case_cond = build2_loc (loc, TRUTH_AND_EXPR,
1544 : boolean_type_node,
1545 : low, high);
1546 : }
1547 : else
1548 194 : case_cond = build2_loc (loc, EQ_EXPR,
1549 : boolean_type_node,
1550 : index,
1551 194 : fold_convert_loc (loc, TREE_TYPE (index),
1552 194 : CASE_LOW (label)));
1553 201 : if (i > 1)
1554 130 : switch_cond = build2_loc (loc, TRUTH_OR_EXPR,
1555 : boolean_type_node,
1556 : case_cond, switch_cond);
1557 : else
1558 : switch_cond = case_cond;
1559 : }
1560 :
1561 71 : add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1562 : unshare_expr (switch_cond));
1563 71 : switch_cond = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1564 : unshare_expr (switch_cond));
1565 71 : add_to_dst_predicate_list (loop, false_edge,
1566 : unshare_expr (cond), switch_cond);
1567 71 : cond = NULL_TREE;
1568 : }
1569 :
1570 : /* If current bb has only one successor, then consider it as an
1571 : unconditional goto. */
1572 301679 : if (single_succ_p (bb))
1573 : {
1574 81932 : basic_block bb_n = single_succ (bb);
1575 :
1576 : /* The successor bb inherits the predicate of its
1577 : predecessor. If there is no predicate in the predecessor
1578 : bb, then consider the successor bb as always executed. */
1579 81932 : if (cond == NULL_TREE)
1580 0 : cond = boolean_true_node;
1581 :
1582 81932 : add_to_predicate_list (loop, bb_n, cond);
1583 : }
1584 : }
1585 :
1586 : /* The loop header is always executed. */
1587 40393 : reset_bb_predicate (loop->header);
1588 40393 : gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1589 : && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1590 40393 : }
1591 :
1592 : /* Build region by adding loop pre-header and post-header blocks. */
1593 :
1594 : static vec<basic_block>
1595 40393 : build_region (class loop *loop)
1596 : {
1597 40393 : vec<basic_block> region = vNULL;
1598 40393 : basic_block exit_bb = NULL;
1599 :
1600 40393 : gcc_assert (ifc_bbs);
1601 : /* The first element is loop pre-header. */
1602 40393 : region.safe_push (loop_preheader_edge (loop)->src);
1603 :
1604 260140 : for (unsigned int i = 0; i < loop->num_nodes; i++)
1605 : {
1606 219747 : basic_block bb = ifc_bbs[i];
1607 219747 : region.safe_push (bb);
1608 : /* Find loop postheader. */
1609 219747 : edge e;
1610 219747 : edge_iterator ei;
1611 487951 : FOR_EACH_EDGE (e, ei, bb->succs)
1612 308597 : if (loop_exit_edge_p (loop, e))
1613 : {
1614 40393 : exit_bb = e->dest;
1615 40393 : break;
1616 : }
1617 : }
1618 : /* The last element is loop post-header. */
1619 40393 : gcc_assert (exit_bb);
1620 40393 : region.safe_push (exit_bb);
1621 40393 : return region;
1622 : }
1623 :
1624 : /* Return true when LOOP is if-convertible. This is a helper function
1625 : for if_convertible_loop_p. REFS and DDRS are initialized and freed
1626 : in if_convertible_loop_p. */
1627 :
1628 : static bool
1629 42444 : if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1630 : {
1631 42444 : unsigned int i;
1632 42444 : basic_block exit_bb = NULL;
1633 42444 : vec<basic_block> region;
1634 :
1635 42444 : calculate_dominance_info (CDI_DOMINATORS);
1636 :
1637 269696 : for (i = 0; i < loop->num_nodes; i++)
1638 : {
1639 229303 : basic_block bb = ifc_bbs[i];
1640 :
1641 229303 : if (!if_convertible_bb_p (loop, bb, exit_bb))
1642 : return false;
1643 :
1644 227252 : if (bb_with_exit_edge_p (loop, bb))
1645 42370 : exit_bb = bb;
1646 : }
1647 :
1648 40393 : data_reference_p dr;
1649 :
1650 40393 : innermost_DR_map
1651 40393 : = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1652 40393 : baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1653 :
1654 : /* Compute post-dominator tree locally. */
1655 40393 : region = build_region (loop);
1656 40393 : calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1657 :
1658 40393 : predicate_bbs (loop);
1659 :
1660 : /* Free post-dominator tree since it is not used after predication. */
1661 40393 : free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1662 40393 : region.release ();
1663 :
1664 178714 : for (i = 0; refs->iterate (i, &dr); i++)
1665 : {
1666 138321 : tree ref = DR_REF (dr);
1667 :
1668 138321 : dr->aux = XNEW (struct ifc_dr);
1669 138321 : DR_BASE_W_UNCONDITIONALLY (dr) = false;
1670 138321 : DR_RW_UNCONDITIONALLY (dr) = false;
1671 138321 : DR_W_UNCONDITIONALLY (dr) = false;
1672 138321 : IFC_DR (dr)->rw_predicate = boolean_false_node;
1673 138321 : IFC_DR (dr)->w_predicate = boolean_false_node;
1674 138321 : IFC_DR (dr)->base_w_predicate = boolean_false_node;
1675 138321 : if (gimple_uid (DR_STMT (dr)) == 0)
1676 137644 : gimple_set_uid (DR_STMT (dr), i + 1);
1677 :
1678 : /* If DR doesn't have innermost loop behavior or it's a compound
1679 : memory reference, we synthesize its innermost loop behavior
1680 : for hashing. */
1681 138321 : if (TREE_CODE (ref) == COMPONENT_REF
1682 : || TREE_CODE (ref) == IMAGPART_EXPR
1683 : || TREE_CODE (ref) == REALPART_EXPR
1684 89931 : || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1685 26203 : || DR_INIT (dr) || DR_STEP (dr)))
1686 : {
1687 130882 : while (TREE_CODE (ref) == COMPONENT_REF
1688 75752 : || TREE_CODE (ref) == IMAGPART_EXPR
1689 206056 : || TREE_CODE (ref) == REALPART_EXPR)
1690 56289 : ref = TREE_OPERAND (ref, 0);
1691 :
1692 74593 : memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1693 74593 : DR_BASE_ADDRESS (dr) = ref;
1694 : }
1695 138321 : hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1696 : }
1697 :
1698 203482 : for (i = 0; i < loop->num_nodes; i++)
1699 : {
1700 174487 : basic_block bb = ifc_bbs[i];
1701 174487 : gimple_stmt_iterator itr;
1702 :
1703 : /* Check the if-convertibility of statements in predicated BBs. */
1704 174487 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1705 305969 : for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1706 173633 : if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1707 13449 : return false;
1708 : }
1709 :
1710 : /* Checking PHIs needs to be done after stmts, as the fact whether there
1711 : are any masked loads or stores affects the tests. */
1712 177632 : for (i = 0; i < loop->num_nodes; i++)
1713 : {
1714 148637 : basic_block bb = ifc_bbs[i];
1715 148637 : gphi_iterator itr;
1716 :
1717 287634 : for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1718 138997 : if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1719 : return false;
1720 : }
1721 :
1722 28995 : if (dump_file)
1723 30 : fprintf (dump_file, "Applying if-conversion\n");
1724 :
1725 : return true;
1726 : }
1727 :
1728 : /* Return true when LOOP is if-convertible.
1729 : LOOP is if-convertible if:
1730 : - it is innermost,
1731 : - it has two or more basic blocks,
1732 : - it has only one exit,
1733 : - loop header is not the exit edge,
1734 : - if its basic blocks and phi nodes are if convertible. */
1735 :
1736 : static bool
1737 42586 : if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1738 : {
1739 42586 : edge e;
1740 42586 : edge_iterator ei;
1741 42586 : bool res = false;
1742 :
1743 : /* Handle only innermost loop. */
1744 42586 : if (!loop || loop->inner)
1745 : {
1746 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1747 0 : fprintf (dump_file, "not innermost loop\n");
1748 0 : return false;
1749 : }
1750 :
1751 : /* If only one block, no need for if-conversion. */
1752 42586 : if (loop->num_nodes <= 2)
1753 : {
1754 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1755 0 : fprintf (dump_file, "less than 2 basic blocks\n");
1756 0 : return false;
1757 : }
1758 :
1759 : /* If one of the loop header's edge is an exit edge then do not
1760 : apply if-conversion. */
1761 127639 : FOR_EACH_EDGE (e, ei, loop->header->succs)
1762 85195 : if (loop_exit_edge_p (loop, e))
1763 : return false;
1764 :
1765 42444 : res = if_convertible_loop_p_1 (loop, refs);
1766 :
1767 82837 : delete innermost_DR_map;
1768 42444 : innermost_DR_map = NULL;
1769 :
1770 82837 : delete baseref_DR_map;
1771 42444 : baseref_DR_map = NULL;
1772 :
1773 42444 : return res;
1774 : }
1775 :
1776 : /* Return reduc_1 if has_nop.
1777 :
1778 : if (...)
1779 : tmp1 = (unsigned type) reduc_1;
1780 : tmp2 = tmp1 + rhs2;
1781 : reduc_3 = (signed type) tmp2. */
1782 : static tree
1783 11162 : strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1784 : {
1785 11162 : if (!has_nop)
1786 : return op;
1787 :
1788 414 : if (TREE_CODE (op) != SSA_NAME)
1789 : return NULL_TREE;
1790 :
1791 368 : gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1792 368 : if (!stmt
1793 368 : || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1794 230 : || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1795 : (gimple_assign_rhs1 (stmt))))
1796 149 : return NULL_TREE;
1797 :
1798 219 : return gimple_assign_rhs1 (stmt);
1799 : }
1800 :
1801 : /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1802 : which is in predicated basic block.
1803 : In fact, the following PHI pattern is searching:
1804 : loop-header:
1805 : reduc_1 = PHI <..., reduc_2>
1806 : ...
1807 : if (...)
1808 : reduc_3 = ...
1809 : reduc_2 = PHI <reduc_1, reduc_3>
1810 :
1811 : ARG_0 and ARG_1 are correspondent PHI arguments.
1812 : REDUC, OP0 and OP1 contain reduction stmt and its operands.
1813 : EXTENDED is true if PHI has > 2 arguments. */
1814 :
1815 : static bool
1816 50406 : is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1817 : tree *op0, tree *op1, bool extended, bool* has_nop,
1818 : gimple **nop_reduc)
1819 : {
1820 50406 : tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1821 50406 : gimple *stmt;
1822 50406 : gimple *header_phi = NULL;
1823 50406 : enum tree_code reduction_op;
1824 50406 : basic_block bb = gimple_bb (phi);
1825 50406 : class loop *loop = bb->loop_father;
1826 50406 : edge latch_e = loop_latch_edge (loop);
1827 50406 : imm_use_iterator imm_iter;
1828 50406 : use_operand_p use_p;
1829 50406 : edge e;
1830 50406 : edge_iterator ei;
1831 50406 : bool result = *has_nop = false;
1832 50406 : if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1833 : return false;
1834 :
1835 33829 : if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1836 : {
1837 8381 : lhs = arg_1;
1838 8381 : header_phi = SSA_NAME_DEF_STMT (arg_0);
1839 8381 : stmt = SSA_NAME_DEF_STMT (arg_1);
1840 : }
1841 25448 : else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1842 : {
1843 11189 : lhs = arg_0;
1844 11189 : header_phi = SSA_NAME_DEF_STMT (arg_1);
1845 11189 : stmt = SSA_NAME_DEF_STMT (arg_0);
1846 : }
1847 : else
1848 : return false;
1849 19570 : if (gimple_bb (header_phi) != loop->header)
1850 : return false;
1851 :
1852 18680 : if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1853 : return false;
1854 :
1855 12669 : if (gimple_code (stmt) != GIMPLE_ASSIGN
1856 12669 : || gimple_has_volatile_ops (stmt))
1857 : return false;
1858 :
1859 12539 : if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1860 : return false;
1861 :
1862 12450 : if (!is_predicated (gimple_bb (stmt)))
1863 : return false;
1864 :
1865 : /* Check that stmt-block is predecessor of phi-block. */
1866 10319 : FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1867 10255 : if (e->dest == bb)
1868 : {
1869 : result = true;
1870 : break;
1871 : }
1872 10191 : if (!result)
1873 : return false;
1874 :
1875 10127 : if (!has_single_use (lhs))
1876 : return false;
1877 :
1878 10076 : reduction_op = gimple_assign_rhs_code (stmt);
1879 :
1880 : /* Catch something like below
1881 :
1882 : loop-header:
1883 : reduc_1 = PHI <..., reduc_2>
1884 : ...
1885 : if (...)
1886 : tmp1 = (unsigned type) reduc_1;
1887 : tmp2 = tmp1 + rhs2;
1888 : reduc_3 = (signed type) tmp2;
1889 :
1890 : reduc_2 = PHI <reduc_1, reduc_3>
1891 :
1892 : and convert to
1893 :
1894 : reduc_2 = PHI <0, reduc_1>
1895 : tmp1 = (unsigned type)reduc_1;
1896 : ifcvt = cond_expr ? rhs2 : 0
1897 : tmp2 = tmp1 +/- ifcvt;
1898 : reduc_1 = (signed type)tmp2; */
1899 :
1900 10076 : if (CONVERT_EXPR_CODE_P (reduction_op))
1901 : {
1902 413 : lhs = gimple_assign_rhs1 (stmt);
1903 413 : if (TREE_CODE (lhs) != SSA_NAME
1904 413 : || !has_single_use (lhs))
1905 : return false;
1906 :
1907 224 : *nop_reduc = stmt;
1908 224 : stmt = SSA_NAME_DEF_STMT (lhs);
1909 224 : if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1910 224 : || !is_gimple_assign (stmt))
1911 : return false;
1912 :
1913 221 : *has_nop = true;
1914 221 : reduction_op = gimple_assign_rhs_code (stmt);
1915 : }
1916 :
1917 9884 : if (reduction_op != PLUS_EXPR
1918 : && reduction_op != MINUS_EXPR
1919 9884 : && reduction_op != MULT_EXPR
1920 9884 : && reduction_op != BIT_IOR_EXPR
1921 : && reduction_op != BIT_XOR_EXPR
1922 4417 : && reduction_op != BIT_AND_EXPR)
1923 : return false;
1924 5581 : r_op1 = gimple_assign_rhs1 (stmt);
1925 5581 : r_op2 = gimple_assign_rhs2 (stmt);
1926 :
1927 5581 : r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1928 5581 : r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1929 :
1930 : /* Make R_OP1 to hold reduction variable. */
1931 5581 : if (r_nop2 == PHI_RESULT (header_phi)
1932 5581 : && commutative_tree_code (reduction_op))
1933 : {
1934 : std::swap (r_op1, r_op2);
1935 : std::swap (r_nop1, r_nop2);
1936 : }
1937 4476 : else if (r_nop1 != PHI_RESULT (header_phi))
1938 : return false;
1939 :
1940 5266 : if (*has_nop)
1941 : {
1942 : /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1943 598 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1944 : {
1945 304 : gimple *use_stmt = USE_STMT (use_p);
1946 304 : if (is_gimple_debug (use_stmt))
1947 0 : continue;
1948 304 : if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1949 126 : continue;
1950 178 : if (use_stmt != phi)
1951 42 : return false;
1952 168 : }
1953 : }
1954 :
1955 : /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1956 21480 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1957 : {
1958 11241 : gimple *use_stmt = USE_STMT (use_p);
1959 11241 : if (is_gimple_debug (use_stmt))
1960 29 : continue;
1961 11212 : if (use_stmt == stmt)
1962 5040 : continue;
1963 6172 : if (gimple_code (use_stmt) != GIMPLE_PHI)
1964 209 : return false;
1965 209 : }
1966 :
1967 5015 : *op0 = r_op1; *op1 = r_op2;
1968 5015 : *reduc = stmt;
1969 5015 : return true;
1970 : }
1971 :
1972 : /* Converts conditional scalar reduction into unconditional form, e.g.
1973 : bb_4
1974 : if (_5 != 0) goto bb_5 else goto bb_6
1975 : end_bb_4
1976 : bb_5
1977 : res_6 = res_13 + 1;
1978 : end_bb_5
1979 : bb_6
1980 : # res_2 = PHI <res_13(4), res_6(5)>
1981 : end_bb_6
1982 :
1983 : will be converted into sequence
1984 : _ifc__1 = _5 != 0 ? 1 : 0;
1985 : res_2 = res_13 + _ifc__1;
1986 : Argument SWAP tells that arguments of conditional expression should be
1987 : swapped.
1988 : If LOOP_VERSIONED is true if we assume that we versioned the loop for
1989 : vectorization. In that case we can create a COND_OP.
1990 : Returns rhs of resulting PHI assignment. */
1991 :
1992 : static tree
1993 5015 : convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1994 : tree cond, tree op0, tree op1, bool swap,
1995 : bool has_nop, gimple* nop_reduc,
1996 : bool loop_versioned)
1997 : {
1998 5015 : gimple_stmt_iterator stmt_it;
1999 5015 : gimple *new_assign;
2000 5015 : tree rhs;
2001 5015 : tree rhs1 = gimple_assign_rhs1 (reduc);
2002 5015 : tree lhs = gimple_assign_lhs (reduc);
2003 5015 : tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
2004 5015 : tree c;
2005 5015 : enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
2006 5015 : tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op,
2007 : NULL, false);
2008 5015 : gimple_seq stmts = NULL;
2009 :
2010 5015 : if (dump_file && (dump_flags & TDF_DETAILS))
2011 : {
2012 2 : fprintf (dump_file, "Found cond scalar reduction.\n");
2013 2 : print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
2014 : }
2015 :
2016 : /* If possible create a COND_OP instead of a COND_EXPR and an OP_EXPR.
2017 : The COND_OP will have a neutral_op else value. */
2018 5015 : internal_fn ifn;
2019 5015 : ifn = get_conditional_internal_fn (reduction_op);
2020 5015 : if (loop_versioned && ifn != IFN_LAST
2021 5013 : && vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
2022 1363 : && !VECTOR_TYPE_P (TREE_TYPE (lhs))
2023 6378 : && !swap)
2024 : {
2025 1343 : gcall *cond_call = gimple_build_call_internal (ifn, 4,
2026 : unshare_expr (cond),
2027 : op0, op1, op0);
2028 1343 : gsi_insert_before (gsi, cond_call, GSI_SAME_STMT);
2029 1343 : gimple_call_set_lhs (cond_call, tmp);
2030 : rhs = tmp;
2031 : }
2032 : else
2033 : {
2034 : /* Build cond expression using COND and constant operand
2035 : of reduction rhs. */
2036 7088 : c = fold_build_cond_expr (TREE_TYPE (rhs1),
2037 : unshare_expr (cond),
2038 : swap ? op_nochange : op1,
2039 : swap ? op1 : op_nochange);
2040 : /* Create assignment stmt and insert it at GSI. */
2041 3672 : new_assign = gimple_build_assign (tmp, c);
2042 3672 : gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
2043 : /* Build rhs for unconditional increment/decrement/logic_operation. */
2044 3672 : rhs = gimple_build (&stmts, reduction_op,
2045 3672 : TREE_TYPE (rhs1), op0, tmp);
2046 : }
2047 :
2048 5015 : if (has_nop)
2049 : {
2050 126 : rhs = gimple_convert (&stmts,
2051 126 : TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
2052 126 : stmt_it = gsi_for_stmt (nop_reduc);
2053 126 : gsi_remove (&stmt_it, true);
2054 126 : release_defs (nop_reduc);
2055 : }
2056 5015 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
2057 :
2058 : /* Delete original reduction stmt. */
2059 5015 : stmt_it = gsi_for_stmt (reduc);
2060 5015 : gsi_remove (&stmt_it, true);
2061 5015 : release_defs (reduc);
2062 5015 : return rhs;
2063 : }
2064 :
2065 : /* Generate a simplified conditional. */
2066 :
2067 : static tree
2068 56866 : gen_simplified_condition (tree cond, scalar_cond_masked_set_type &cond_set)
2069 : {
2070 : /* Check if the value is already live in a previous branch. This resolves
2071 : nested conditionals from diamond PHI reductions. */
2072 56866 : if (TREE_CODE (cond) == SSA_NAME)
2073 : {
2074 56858 : gimple *stmt = SSA_NAME_DEF_STMT (cond);
2075 56858 : gassign *assign = NULL;
2076 56858 : if ((assign = as_a <gassign *> (stmt))
2077 56858 : && gimple_assign_rhs_code (assign) == BIT_AND_EXPR)
2078 : {
2079 3639 : tree arg1 = gimple_assign_rhs1 (assign);
2080 3639 : tree arg2 = gimple_assign_rhs2 (assign);
2081 3639 : if (cond_set.contains ({ arg1, 1 }))
2082 150 : arg1 = boolean_true_node;
2083 : else
2084 3489 : arg1 = gen_simplified_condition (arg1, cond_set);
2085 :
2086 3639 : if (cond_set.contains ({ arg2, 1 }))
2087 1787 : arg2 = boolean_true_node;
2088 : else
2089 1852 : arg2 = gen_simplified_condition (arg2, cond_set);
2090 :
2091 3639 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2);
2092 : }
2093 : }
2094 56866 : return cond;
2095 : }
2096 :
2097 : /* Structure used to track meta-data on PHI arguments used to generate
2098 : most efficient comparison sequence to slatten a PHI node. */
2099 :
2100 : typedef struct ifcvt_arg_entry
2101 : {
2102 : /* The PHI node argument value. */
2103 : tree arg;
2104 :
2105 : /* The number of compares required to reach this PHI node from start of the
2106 : BB being if-converted. */
2107 : unsigned num_compares;
2108 :
2109 : /* The number of times this PHI node argument appears in the current PHI
2110 : node. */
2111 : unsigned occurs;
2112 :
2113 : /* The indices at which this PHI arg occurs inside the PHI node. */
2114 : vec <int> *indexes;
2115 : } ifcvt_arg_entry_t;
2116 :
2117 : /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
2118 : as to whether the condition is inverted. */
2119 :
2120 : static tree
2121 4265 : gen_phi_arg_condition (gphi *phi, ifcvt_arg_entry_t &arg,
2122 : gimple_stmt_iterator *gsi,
2123 : scalar_cond_masked_set_type &cond_set, bool *invert)
2124 : {
2125 4265 : int len;
2126 4265 : int i;
2127 4265 : tree cond = NULL_TREE;
2128 4265 : tree c;
2129 4265 : edge e;
2130 :
2131 4265 : *invert = false;
2132 4265 : len = arg.indexes->length ();
2133 4265 : gcc_assert (len > 0);
2134 8602 : for (i = 0; i < len; i++)
2135 : {
2136 4337 : e = gimple_phi_arg_edge (phi, (*arg.indexes)[i]);
2137 4337 : c = bb_predicate (e->src);
2138 4337 : if (is_true_predicate (c))
2139 : {
2140 0 : cond = c;
2141 0 : break;
2142 : }
2143 : /* If we have just a single inverted predicate, signal that and
2144 : instead invert the COND_EXPR arms. */
2145 4337 : if (len == 1 && TREE_CODE (c) == TRUTH_NOT_EXPR)
2146 : {
2147 84 : c = TREE_OPERAND (c, 0);
2148 84 : *invert = true;
2149 : }
2150 :
2151 4337 : c = gen_simplified_condition (c, cond_set);
2152 4337 : c = force_gimple_operand_gsi (gsi, unshare_expr (c),
2153 : true, NULL_TREE, true, GSI_SAME_STMT);
2154 4337 : if (cond != NULL_TREE)
2155 : {
2156 : /* Must build OR expression. */
2157 72 : cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
2158 72 : cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2159 : NULL_TREE, true, GSI_SAME_STMT);
2160 : }
2161 : else
2162 : cond = c;
2163 :
2164 : /* Register the new possibly simplified conditional. When more than 2
2165 : entries in a phi node we chain entries in the false branch, so the
2166 : inverted condition is active. */
2167 4337 : scalar_cond_masked_key pred_cond ({ cond, 1 });
2168 4337 : if (!*invert)
2169 4253 : pred_cond.inverted_p = !pred_cond.inverted_p;
2170 4337 : cond_set.add (pred_cond);
2171 : }
2172 4265 : gcc_assert (cond != NULL_TREE);
2173 4265 : return cond;
2174 : }
2175 :
2176 : /* Factors out an operation from *ARG0 and *ARG1 and
2177 : create the new statement at GSI. *RES is the
2178 : result of that new statement. Update *ARG0 and *ARG1
2179 : and *RES to the new values if the factoring happened.
2180 : Loops until all of the factoring is completed. */
2181 :
2182 : static void
2183 47188 : factor_out_operators (tree *res, gimple_stmt_iterator *gsi,
2184 : tree *arg0, tree *arg1, gphi *phi)
2185 : {
2186 47188 : gimple_match_op arg0_op, arg1_op;
2187 47188 : bool repeated = false;
2188 :
2189 47882 : again:
2190 47882 : if (TREE_CODE (*arg0) != SSA_NAME || TREE_CODE (*arg1) != SSA_NAME)
2191 : return;
2192 :
2193 32330 : if (operand_equal_p (*arg0, *arg1))
2194 : return;
2195 :
2196 : /* If either args have > 1 use, then this transformation actually
2197 : increases the number of expressions evaluated at runtime. */
2198 32330 : if (repeated
2199 32330 : ? (!has_zero_uses (*arg0) || !has_zero_uses (*arg1))
2200 31658 : : (!has_single_use (*arg0) || !has_single_use (*arg1)))
2201 : return;
2202 :
2203 5567 : gimple *arg0_def_stmt = SSA_NAME_DEF_STMT (*arg0);
2204 5567 : if (!gimple_extract_op (arg0_def_stmt, &arg0_op))
2205 : return;
2206 :
2207 : /* Might pick up abnormals from previous bbs so stop the loop. */
2208 2286 : if (arg0_op.operands_occurs_in_abnormal_phi ())
2209 : return;
2210 :
2211 2284 : gimple *arg1_def_stmt = SSA_NAME_DEF_STMT (*arg1);
2212 2284 : if (!gimple_extract_op (arg1_def_stmt, &arg1_op))
2213 : return;
2214 :
2215 : /* Might pick up abnormals from previous bbs so stop the loop. */
2216 1958 : if (arg1_op.operands_occurs_in_abnormal_phi ())
2217 : return;
2218 :
2219 : /* No factoring can happen if the codes are different
2220 : or the number operands. */
2221 1958 : if (arg1_op.code != arg0_op.code
2222 1958 : || arg1_op.num_ops != arg0_op.num_ops)
2223 : return;
2224 :
2225 982 : tree new_arg0, new_arg1;
2226 982 : int opnum = find_different_opnum (arg0_op, arg1_op, &new_arg0, &new_arg1);
2227 982 : if (opnum == -1)
2228 : return;
2229 :
2230 : /* BIT_FIELD_REF and BIT_INSERT_EXPR can't be factored out for non-0 operands
2231 : as the other operands require constants. */
2232 714 : if ((arg1_op.code == BIT_FIELD_REF
2233 710 : || arg1_op.code == BIT_INSERT_EXPR)
2234 718 : && opnum != 0)
2235 : return;
2236 :
2237 : /* It is not profitability to factor out vec_perm with
2238 : constant masks (operand 2). The target might not support it
2239 : and that might be invalid to do as such. Also with constants
2240 : masks, the number of elements of the mask type does not need
2241 : to match the number of elements of other operands and can be
2242 : arbitrary integral vector type so factoring that out can't work.
2243 : Note in the case where one mask is a constant and the other is not,
2244 : the next check for compatible types will reject the case the
2245 : constant mask has the incompatible type. */
2246 706 : if (arg1_op.code == VEC_PERM_EXPR && opnum == 2
2247 8 : && TREE_CODE (new_arg0) == VECTOR_CST
2248 714 : && TREE_CODE (new_arg1) == VECTOR_CST)
2249 : return;
2250 :
2251 702 : if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1)))
2252 : return;
2253 694 : tree new_res = make_ssa_name (TREE_TYPE (new_arg0), NULL);
2254 :
2255 : /* Create the operation stmt if possible and insert it. */
2256 :
2257 694 : gimple_match_op new_op = arg0_op;
2258 694 : new_op.ops[opnum] = new_res;
2259 694 : gimple_seq seq = NULL;
2260 694 : tree result = *res;
2261 694 : result = maybe_push_res_to_seq (&new_op, &seq, result);
2262 :
2263 : /* If we can't create the new statement, release the temp name
2264 : and return back. */
2265 694 : if (!result)
2266 : {
2267 0 : release_ssa_name (new_res);
2268 0 : return;
2269 : }
2270 694 : gsi_insert_seq_before (gsi, seq, GSI_CONTINUE_LINKING);
2271 :
2272 694 : if (dump_file && (dump_flags & TDF_DETAILS))
2273 : {
2274 1 : fprintf (dump_file, "PHI ");
2275 1 : print_generic_expr (dump_file, gimple_phi_result (phi));
2276 1 : fprintf (dump_file,
2277 : " changed to factor operation out from COND_EXPR.\n");
2278 1 : fprintf (dump_file, "New stmt with OPERATION that defines ");
2279 1 : print_generic_expr (dump_file, result);
2280 1 : fprintf (dump_file, ".\n");
2281 : }
2282 :
2283 : /* Remove the old operation(s) that has single use. */
2284 694 : gimple_stmt_iterator gsi_for_def;
2285 :
2286 694 : gsi_for_def = gsi_for_stmt (arg0_def_stmt);
2287 694 : gsi_remove (&gsi_for_def, true);
2288 694 : release_defs (arg0_def_stmt);
2289 694 : gsi_for_def = gsi_for_stmt (arg1_def_stmt);
2290 694 : gsi_remove (&gsi_for_def, true);
2291 694 : release_defs (arg1_def_stmt);
2292 :
2293 : /* Update the arguments and try again. */
2294 694 : *arg0 = new_arg0;
2295 694 : *arg1 = new_arg1;
2296 694 : *res = new_res;
2297 :
2298 : /* Update the phi node too. */
2299 694 : gimple_phi_set_result (phi, new_res);
2300 694 : gimple_phi_arg (phi, 0)->def = new_arg0;
2301 694 : gimple_phi_arg (phi, 1)->def = new_arg1;
2302 694 : update_stmt (phi);
2303 :
2304 694 : repeated = true;
2305 694 : goto again;
2306 : }
2307 :
2308 : /* Create the smallest nested conditional possible. On pre-order we record
2309 : which conditionals are live, and on post-order rewrite the chain by removing
2310 : already active conditions.
2311 :
2312 : As an example we simplify:
2313 :
2314 : _7 = a_10 < 0;
2315 : _21 = a_10 >= 0;
2316 : _22 = a_10 < e_11(D);
2317 : _23 = _21 & _22;
2318 : _ifc__42 = _23 ? t_13 : 0;
2319 : t_6 = _7 ? 1 : _ifc__42
2320 :
2321 : into
2322 :
2323 : _7 = a_10 < 0;
2324 : _22 = a_10 < e_11(D);
2325 : _ifc__42 = _22 ? t_13 : 0;
2326 : t_6 = _7 ? 1 : _ifc__42;
2327 :
2328 : which produces better code. */
2329 :
2330 : static tree
2331 6402 : gen_phi_nest_statement (gphi *phi, gimple_stmt_iterator *gsi,
2332 : scalar_cond_masked_set_type &cond_set, tree type,
2333 : gimple **res_stmt, tree lhs0,
2334 : vec<struct ifcvt_arg_entry> &args, unsigned idx)
2335 : {
2336 12804 : if (idx == args.length ())
2337 2137 : return args[idx - 1].arg;
2338 :
2339 4265 : bool invert;
2340 4265 : tree cond = gen_phi_arg_condition (phi, args[idx - 1], gsi, cond_set,
2341 : &invert);
2342 4265 : tree arg1 = gen_phi_nest_statement (phi, gsi, cond_set, type, res_stmt, lhs0,
2343 : args, idx + 1);
2344 :
2345 4265 : unsigned prev = idx;
2346 4265 : unsigned curr = prev - 1;
2347 4265 : tree arg0 = args[curr].arg;
2348 4265 : tree rhs, lhs;
2349 4265 : if (idx > 1)
2350 2128 : lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2351 : else
2352 : lhs = lhs0;
2353 :
2354 4265 : if (invert)
2355 84 : rhs = fold_build_cond_expr (type, unshare_expr (cond),
2356 : arg1, arg0);
2357 : else
2358 4181 : rhs = fold_build_cond_expr (type, unshare_expr (cond),
2359 : arg0, arg1);
2360 4265 : gassign *new_stmt = gimple_build_assign (lhs, rhs);
2361 4265 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2362 4265 : update_stmt (new_stmt);
2363 4265 : *res_stmt = new_stmt;
2364 4265 : return lhs;
2365 : }
2366 :
2367 : /* When flattening a PHI node we have a choice of which conditions to test to
2368 : for all the paths from the start of the dominator block of the BB with the
2369 : PHI node. If the PHI node has X arguments we have to only test X - 1
2370 : conditions as the last one is implicit. It does matter which conditions we
2371 : test first. We should test the shortest condition first (distance here is
2372 : measures in the number of logical operators in the condition) and the
2373 : longest one last. This allows us to skip testing the most expensive
2374 : condition. To accomplish this we need to sort the conditions. P1 and P2
2375 : are sorted first based on the number of logical operations (num_compares)
2376 : and then by how often they occur in the PHI node. */
2377 :
2378 : static int
2379 36461 : cmp_arg_entry (const void *p1, const void *p2, void * /* data. */)
2380 : {
2381 36461 : const ifcvt_arg_entry sval1 = *(const ifcvt_arg_entry *)p1;
2382 36461 : const ifcvt_arg_entry sval2 = *(const ifcvt_arg_entry *)p2;
2383 :
2384 36461 : if (sval1.num_compares < sval2.num_compares)
2385 : return -1;
2386 12424 : else if (sval1.num_compares > sval2.num_compares)
2387 : return 1;
2388 :
2389 665 : if (sval1.occurs < sval2.occurs)
2390 : return -1;
2391 665 : else if (sval1.occurs > sval2.occurs)
2392 0 : return 1;
2393 :
2394 : return 0;
2395 : }
2396 :
2397 : /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
2398 : This routine can handle PHI nodes with more than two arguments.
2399 :
2400 : For example,
2401 : S1: A = PHI <x1(1), x2(5)>
2402 : is converted into,
2403 : S2: A = cond ? x1 : x2;
2404 :
2405 : The generated code is inserted at GSI that points to the top of
2406 : basic block's statement list.
2407 : If PHI node has more than two arguments a chain of conditional
2408 : expression is produced.
2409 : LOOP_VERSIONED should be true if we know that the loop was versioned for
2410 : vectorization. */
2411 :
2412 :
2413 : static void
2414 52600 : predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
2415 : {
2416 52600 : gimple *new_stmt = NULL, *reduc, *nop_reduc;
2417 52600 : tree rhs, res, arg0, arg1, op0, op1, scev;
2418 52600 : tree cond;
2419 52600 : unsigned int index0;
2420 52600 : edge e;
2421 52600 : basic_block bb;
2422 52600 : unsigned int i;
2423 52600 : bool has_nop;
2424 :
2425 52600 : res = gimple_phi_result (phi);
2426 105200 : if (virtual_operand_p (res))
2427 47245 : return;
2428 :
2429 52600 : if ((rhs = degenerate_phi_result (phi))
2430 52600 : || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
2431 : res))
2432 52560 : && !chrec_contains_undetermined (scev)
2433 52560 : && scev != res
2434 17 : && (rhs = gimple_phi_arg_def (phi, 0))))
2435 : {
2436 57 : if (dump_file && (dump_flags & TDF_DETAILS))
2437 : {
2438 0 : fprintf (dump_file, "Degenerate phi!\n");
2439 0 : print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
2440 : }
2441 57 : new_stmt = gimple_build_assign (res, rhs);
2442 57 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2443 57 : update_stmt (new_stmt);
2444 57 : return;
2445 : }
2446 :
2447 52543 : bb = gimple_bb (phi);
2448 : /* Keep track of conditionals already seen. */
2449 52543 : scalar_cond_masked_set_type cond_set;
2450 52543 : if (EDGE_COUNT (bb->preds) == 2)
2451 : {
2452 : /* Predicate ordinary PHI node with 2 arguments. */
2453 47188 : edge first_edge, second_edge;
2454 47188 : basic_block true_bb;
2455 47188 : first_edge = EDGE_PRED (bb, 0);
2456 47188 : second_edge = EDGE_PRED (bb, 1);
2457 47188 : cond = bb_predicate (first_edge->src);
2458 47188 : cond_set.add ({ cond, 1 });
2459 47188 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2460 22592 : std::swap (first_edge, second_edge);
2461 47188 : if (EDGE_COUNT (first_edge->src->succs) > 1)
2462 : {
2463 22762 : cond = bb_predicate (second_edge->src);
2464 22762 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2465 9939 : cond = TREE_OPERAND (cond, 0);
2466 : else
2467 : first_edge = second_edge;
2468 : }
2469 : else
2470 24426 : cond = bb_predicate (first_edge->src);
2471 :
2472 : /* Gimplify the condition to a valid cond-expr conditional operand. */
2473 47188 : cond = gen_simplified_condition (cond, cond_set);
2474 47188 : cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2475 : NULL_TREE, true, GSI_SAME_STMT);
2476 47188 : true_bb = first_edge->src;
2477 47188 : if (EDGE_PRED (bb, 1)->src == true_bb)
2478 : {
2479 35415 : arg0 = gimple_phi_arg_def (phi, 1);
2480 35415 : arg1 = gimple_phi_arg_def (phi, 0);
2481 : }
2482 : else
2483 : {
2484 11773 : arg0 = gimple_phi_arg_def (phi, 0);
2485 11773 : arg1 = gimple_phi_arg_def (phi, 1);
2486 : }
2487 :
2488 : /* Factor out operand if possible. This can only be done easily
2489 : for PHI with 2 elements. */
2490 47188 : factor_out_operators (&res, gsi, &arg0, &arg1, phi);
2491 :
2492 47188 : if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2493 : &op0, &op1, false, &has_nop,
2494 : &nop_reduc))
2495 : {
2496 : /* Convert reduction stmt into vectorizable form. */
2497 8120 : rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2498 4060 : true_bb != gimple_bb (reduc),
2499 : has_nop, nop_reduc,
2500 : loop_versioned);
2501 4060 : redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2502 : }
2503 : else
2504 : /* Build new RHS using selected condition and arguments. */
2505 43128 : rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2506 : arg0, arg1);
2507 47188 : new_stmt = gimple_build_assign (res, rhs);
2508 47188 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2509 47188 : gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2510 47188 : if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2511 : {
2512 1516 : new_stmt = gsi_stmt (new_gsi);
2513 1516 : update_stmt (new_stmt);
2514 : }
2515 :
2516 47188 : if (dump_file && (dump_flags & TDF_DETAILS))
2517 : {
2518 16 : fprintf (dump_file, "new phi replacement stmt\n");
2519 16 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2520 : }
2521 47188 : return;
2522 : }
2523 :
2524 : /* Create hashmap for PHI node which contain vector of argument indexes
2525 : having the same value. */
2526 5355 : bool swap = false;
2527 5355 : hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2528 5355 : unsigned int num_args = gimple_phi_num_args (phi);
2529 : /* Vector of different PHI argument values. */
2530 5355 : auto_vec<ifcvt_arg_entry_t> args;
2531 :
2532 : /* Compute phi_arg_map, determine the list of unique PHI args and the indices
2533 : where they are in the PHI node. The indices will be used to determine
2534 : the conditions to apply and their complexity. */
2535 21701 : for (i = 0; i < num_args; i++)
2536 : {
2537 16346 : tree arg;
2538 :
2539 16346 : arg = gimple_phi_arg_def (phi, i);
2540 16346 : if (!phi_arg_map.get (arg))
2541 12838 : args.safe_push ({ arg, 0, 0, NULL });
2542 16346 : phi_arg_map.get_or_insert (arg).safe_push (i);
2543 : }
2544 :
2545 : /* Determine element with max number of occurrences and complexity. Looking
2546 : at only number of occurrences as a measure for complexity isn't enough as
2547 : all usages can be unique but the comparisons to reach the PHI node differ
2548 : per branch. */
2549 18193 : for (unsigned i = 0; i < args.length (); i++)
2550 : {
2551 12838 : unsigned int len = 0;
2552 12838 : vec<int> *indices = phi_arg_map.get (args[i].arg);
2553 54860 : for (int index : *indices)
2554 : {
2555 16346 : edge e = gimple_phi_arg_edge (phi, index);
2556 16346 : len += get_bb_num_predicate_stmts (e->src);
2557 : }
2558 :
2559 12838 : unsigned occur = indices->length ();
2560 12838 : if (dump_file && (dump_flags & TDF_DETAILS))
2561 7 : fprintf (dump_file, "Ranking %d as len=%d, idx=%d\n", i, len, occur);
2562 12838 : args[i].num_compares = len;
2563 12838 : args[i].occurs = occur;
2564 12838 : args[i].indexes = indices;
2565 : }
2566 :
2567 : /* Sort elements based on rankings ARGS. */
2568 5355 : args.stablesort (cmp_arg_entry, NULL);
2569 :
2570 : /* Handle one special case when number of arguments with different values
2571 : is equal 2 and one argument has the only occurrence. Such PHI can be
2572 : handled as if would have only 2 arguments. */
2573 5355 : if (args.length () == 2
2574 8645 : && args[0].indexes->length () == 1)
2575 : {
2576 3218 : index0 = (*args[0].indexes)[0];
2577 3218 : arg0 = args[0].arg;
2578 3218 : arg1 = args[1].arg;
2579 3218 : e = gimple_phi_arg_edge (phi, index0);
2580 3218 : cond = bb_predicate (e->src);
2581 3218 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2582 : {
2583 39 : swap = true;
2584 39 : cond = TREE_OPERAND (cond, 0);
2585 : }
2586 : /* Gimplify the condition to a valid cond-expr conditional operand. */
2587 3218 : cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2588 : NULL_TREE, true, GSI_SAME_STMT);
2589 3218 : if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2590 : &op0, &op1, true, &has_nop, &nop_reduc)))
2591 4489 : rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2592 : swap ? arg1 : arg0,
2593 : swap ? arg0 : arg1);
2594 : else
2595 : {
2596 : /* Convert reduction stmt into vectorizable form. */
2597 955 : rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2598 : swap, has_nop, nop_reduc,
2599 : loop_versioned);
2600 955 : redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2601 : }
2602 3218 : new_stmt = gimple_build_assign (res, rhs);
2603 3218 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2604 3218 : update_stmt (new_stmt);
2605 : }
2606 : else
2607 : {
2608 : /* Common case. */
2609 2137 : tree type = TREE_TYPE (gimple_phi_result (phi));
2610 2137 : gen_phi_nest_statement (phi, gsi, cond_set, type, &new_stmt, res,
2611 : args, 1);
2612 : }
2613 :
2614 5355 : if (dump_file && (dump_flags & TDF_DETAILS))
2615 : {
2616 3 : fprintf (dump_file, "new extended phi replacement stmt\n");
2617 3 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2618 : }
2619 52543 : }
2620 :
2621 : /* Replaces in LOOP all the scalar phi nodes other than those in the
2622 : LOOP->header block with conditional modify expressions.
2623 : LOOP_VERSIONED should be true if we know that the loop was versioned for
2624 : vectorization. */
2625 :
2626 : static void
2627 28995 : predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
2628 : {
2629 28995 : basic_block bb;
2630 28995 : unsigned int orig_loop_num_nodes = loop->num_nodes;
2631 28995 : unsigned int i;
2632 :
2633 148637 : for (i = 1; i < orig_loop_num_nodes; i++)
2634 : {
2635 119642 : gphi *phi;
2636 119642 : gimple_stmt_iterator gsi;
2637 119642 : gphi_iterator phi_gsi;
2638 119642 : bb = ifc_bbs[i];
2639 :
2640 119642 : if (bb == loop->header)
2641 85505 : continue;
2642 :
2643 119642 : phi_gsi = gsi_start_phis (bb);
2644 119642 : if (gsi_end_p (phi_gsi))
2645 85505 : continue;
2646 :
2647 34137 : gsi = gsi_after_labels (bb);
2648 89079 : while (!gsi_end_p (phi_gsi))
2649 : {
2650 54942 : phi = phi_gsi.phi ();
2651 109884 : if (virtual_operand_p (gimple_phi_result (phi)))
2652 2342 : gsi_next (&phi_gsi);
2653 : else
2654 : {
2655 52600 : predicate_scalar_phi (phi, &gsi, loop_versioned);
2656 52600 : remove_phi_node (&phi_gsi, false);
2657 : }
2658 : }
2659 : }
2660 28995 : }
2661 :
2662 : /* Insert in each basic block of LOOP the statements produced by the
2663 : gimplification of the predicates. */
2664 :
2665 : static void
2666 28995 : insert_gimplified_predicates (loop_p loop)
2667 : {
2668 28995 : unsigned int i;
2669 :
2670 177632 : for (i = 0; i < loop->num_nodes; i++)
2671 : {
2672 148637 : basic_block bb = ifc_bbs[i];
2673 148637 : gimple_seq stmts;
2674 148637 : if (!is_predicated (bb))
2675 91147 : gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2676 148637 : if (!is_predicated (bb))
2677 : {
2678 : /* Do not insert statements for a basic block that is not
2679 : predicated. Also make sure that the predicate of the
2680 : basic block is set to true. */
2681 91147 : reset_bb_predicate (bb);
2682 91147 : continue;
2683 : }
2684 :
2685 57490 : stmts = bb_predicate_gimplified_stmts (bb);
2686 57490 : if (stmts)
2687 : {
2688 57110 : if (need_to_predicate)
2689 : {
2690 : /* Insert the predicate of the BB just after the label,
2691 : as the if-conversion of memory writes will use this
2692 : predicate. */
2693 5860 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
2694 5860 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2695 : }
2696 : else
2697 : {
2698 : /* Insert the predicate of the BB at the end of the BB
2699 : as this would reduce the register pressure: the only
2700 : use of this predicate will be in successor BBs. */
2701 51250 : gimple_stmt_iterator gsi = gsi_last_bb (bb);
2702 :
2703 51250 : if (gsi_end_p (gsi)
2704 51250 : || stmt_ends_bb_p (gsi_stmt (gsi)))
2705 29538 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2706 : else
2707 21712 : gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2708 : }
2709 :
2710 : /* Once the sequence is code generated, set it to NULL. */
2711 57110 : set_bb_predicate_gimplified_stmts (bb, NULL, true);
2712 : }
2713 : }
2714 28995 : }
2715 :
2716 : /* Helper function for predicate_statements. Returns index of existent
2717 : mask if it was created for given SIZE and -1 otherwise. */
2718 :
2719 : static int
2720 937 : mask_exists (int size, const vec<int> &vec)
2721 : {
2722 937 : unsigned int ix;
2723 937 : int v;
2724 1026 : FOR_EACH_VEC_ELT (vec, ix, v)
2725 965 : if (v == size)
2726 876 : return (int) ix;
2727 : return -1;
2728 : }
2729 :
2730 : /* Helper function for predicate_statements. STMT is a memory read or
2731 : write and it needs to be predicated by MASK. Return a statement
2732 : that does so. */
2733 :
2734 : static gimple *
2735 2014 : predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2736 : {
2737 2014 : gcall *new_stmt;
2738 :
2739 2014 : tree lhs = gimple_assign_lhs (stmt);
2740 2014 : tree rhs = gimple_assign_rhs1 (stmt);
2741 2014 : tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2742 2014 : mark_addressable (ref);
2743 2014 : tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2744 : true, NULL_TREE, true, GSI_SAME_STMT);
2745 2014 : tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2746 2014 : get_object_alignment (ref));
2747 : /* Copy points-to info if possible. */
2748 2014 : if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2749 517 : copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2750 : ref);
2751 2014 : if (TREE_CODE (lhs) == SSA_NAME)
2752 : {
2753 : /* Get a zero else value. This might not be what a target actually uses
2754 : but we cannot be sure about which vector mode the vectorizer will
2755 : choose. Therefore, leave the decision whether we need to force the
2756 : inactive elements to zero to the vectorizer. */
2757 1207 : tree els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
2758 1207 : TREE_TYPE (lhs));
2759 :
2760 1207 : new_stmt
2761 1207 : = gimple_build_call_internal (IFN_MASK_LOAD, 4, addr,
2762 : ptr, mask, els);
2763 :
2764 1207 : gimple_call_set_lhs (new_stmt, lhs);
2765 2414 : gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2766 : }
2767 : else
2768 : {
2769 807 : new_stmt
2770 807 : = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2771 : mask, rhs);
2772 807 : gimple_move_vops (new_stmt, stmt);
2773 : }
2774 2014 : gimple_call_set_nothrow (new_stmt, true);
2775 2014 : return new_stmt;
2776 : }
2777 :
2778 : /* STMT uses OP_LHS. Check whether it is equivalent to:
2779 :
2780 : ... = OP_MASK ? OP_LHS : X;
2781 :
2782 : Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2783 : known to have value OP_COND. */
2784 :
2785 : static tree
2786 820 : check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2787 : tree op_lhs)
2788 : {
2789 1505 : gassign *assign = dyn_cast <gassign *> (stmt);
2790 1028 : if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2791 : return NULL_TREE;
2792 :
2793 203 : tree use_cond = gimple_assign_rhs1 (assign);
2794 203 : tree if_true = gimple_assign_rhs2 (assign);
2795 203 : tree if_false = gimple_assign_rhs3 (assign);
2796 :
2797 72 : if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2798 203 : && if_true == op_lhs)
2799 : return if_false;
2800 :
2801 72 : if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2802 : return if_true;
2803 :
2804 : return NULL_TREE;
2805 : }
2806 :
2807 : /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2808 : the set of SSA names defined earlier in STMT's block. */
2809 :
2810 : static bool
2811 131 : value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2812 : tree value)
2813 : {
2814 131 : if (is_gimple_min_invariant (value))
2815 : return true;
2816 :
2817 95 : if (TREE_CODE (value) == SSA_NAME)
2818 : {
2819 95 : if (SSA_NAME_IS_DEFAULT_DEF (value))
2820 : return true;
2821 :
2822 95 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2823 95 : basic_block use_bb = gimple_bb (stmt);
2824 95 : return (def_bb == use_bb
2825 95 : ? ssa_names->contains (value)
2826 95 : : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2827 : }
2828 :
2829 : return false;
2830 : }
2831 :
2832 : /* Helper function for predicate_statements. STMT is a potentially-trapping
2833 : arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2834 : has value COND. Return a statement that does so. SSA_NAMES is the set of
2835 : SSA names defined earlier in STMT's block. */
2836 :
2837 : static gimple *
2838 615 : predicate_rhs_code (gimple *stmt, tree mask, tree cond,
2839 : hash_set<tree_ssa_name_hash> *ssa_names)
2840 : {
2841 615 : internal_fn cond_fn;
2842 615 : if (is_gimple_assign (stmt))
2843 : {
2844 507 : tree_code code = gimple_assign_rhs_code (stmt);
2845 507 : cond_fn = get_conditional_internal_fn (code);
2846 : }
2847 108 : else if (tree callee = gimple_call_fndecl (stmt))
2848 : {
2849 108 : auto ifn = associated_internal_fn (callee);
2850 108 : cond_fn = get_conditional_internal_fn (ifn);
2851 : }
2852 : else
2853 : return NULL;
2854 :
2855 615 : if (cond_fn == IFN_LAST)
2856 : {
2857 0 : gcc_assert (!gimple_could_trap_p (stmt));
2858 : return NULL;
2859 : }
2860 :
2861 615 : tree lhs = gimple_get_lhs (stmt);
2862 615 : unsigned int nops = gimple_num_args (stmt) + 1;
2863 :
2864 : /* Construct the arguments to the conditional internal function. */
2865 615 : auto_vec<tree, 8> args;
2866 615 : args.safe_grow (nops + 1, true);
2867 615 : args[0] = mask;
2868 1953 : for (unsigned int i = 0; i < nops - 1; ++i)
2869 1338 : args[i+1] = gimple_arg (stmt, i);
2870 615 : args[nops] = NULL_TREE;
2871 :
2872 : /* Look for uses of the result to see whether they are COND_EXPRs that can
2873 : be folded into the conditional call. */
2874 615 : imm_use_iterator imm_iter;
2875 615 : gimple *use_stmt;
2876 2050 : FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2877 : {
2878 820 : tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2879 820 : if (new_else && value_available_p (stmt, ssa_names, new_else))
2880 : {
2881 110 : if (!args[nops])
2882 110 : args[nops] = new_else;
2883 110 : if (operand_equal_p (new_else, args[nops], 0))
2884 : {
2885 : /* We have:
2886 :
2887 : LHS = IFN_COND (MASK, ..., ELSE);
2888 : X = MASK ? LHS : ELSE;
2889 :
2890 : which makes X equivalent to LHS. */
2891 110 : tree use_lhs = gimple_assign_lhs (use_stmt);
2892 110 : redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2893 : }
2894 : }
2895 615 : }
2896 615 : if (!args[nops])
2897 1010 : args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2898 505 : nops - 1, &args[1]);
2899 :
2900 : /* Create and insert the call. */
2901 615 : gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2902 615 : gimple_call_set_lhs (new_stmt, lhs);
2903 615 : gimple_call_set_nothrow (new_stmt, true);
2904 :
2905 615 : return new_stmt;
2906 615 : }
2907 :
2908 : /* Predicate each write to memory in LOOP.
2909 :
2910 : This function transforms control flow constructs containing memory
2911 : writes of the form:
2912 :
2913 : | for (i = 0; i < N; i++)
2914 : | if (cond)
2915 : | A[i] = expr;
2916 :
2917 : into the following form that does not contain control flow:
2918 :
2919 : | for (i = 0; i < N; i++)
2920 : | A[i] = cond ? expr : A[i];
2921 :
2922 : The original CFG looks like this:
2923 :
2924 : | bb_0
2925 : | i = 0
2926 : | end_bb_0
2927 : |
2928 : | bb_1
2929 : | if (i < N) goto bb_5 else goto bb_2
2930 : | end_bb_1
2931 : |
2932 : | bb_2
2933 : | cond = some_computation;
2934 : | if (cond) goto bb_3 else goto bb_4
2935 : | end_bb_2
2936 : |
2937 : | bb_3
2938 : | A[i] = expr;
2939 : | goto bb_4
2940 : | end_bb_3
2941 : |
2942 : | bb_4
2943 : | goto bb_1
2944 : | end_bb_4
2945 :
2946 : insert_gimplified_predicates inserts the computation of the COND
2947 : expression at the beginning of the destination basic block:
2948 :
2949 : | bb_0
2950 : | i = 0
2951 : | end_bb_0
2952 : |
2953 : | bb_1
2954 : | if (i < N) goto bb_5 else goto bb_2
2955 : | end_bb_1
2956 : |
2957 : | bb_2
2958 : | cond = some_computation;
2959 : | if (cond) goto bb_3 else goto bb_4
2960 : | end_bb_2
2961 : |
2962 : | bb_3
2963 : | cond = some_computation;
2964 : | A[i] = expr;
2965 : | goto bb_4
2966 : | end_bb_3
2967 : |
2968 : | bb_4
2969 : | goto bb_1
2970 : | end_bb_4
2971 :
2972 : predicate_statements is then predicating the memory write as follows:
2973 :
2974 : | bb_0
2975 : | i = 0
2976 : | end_bb_0
2977 : |
2978 : | bb_1
2979 : | if (i < N) goto bb_5 else goto bb_2
2980 : | end_bb_1
2981 : |
2982 : | bb_2
2983 : | if (cond) goto bb_3 else goto bb_4
2984 : | end_bb_2
2985 : |
2986 : | bb_3
2987 : | cond = some_computation;
2988 : | A[i] = cond ? expr : A[i];
2989 : | goto bb_4
2990 : | end_bb_3
2991 : |
2992 : | bb_4
2993 : | goto bb_1
2994 : | end_bb_4
2995 :
2996 : and finally combine_blocks removes the basic block boundaries making
2997 : the loop vectorizable:
2998 :
2999 : | bb_0
3000 : | i = 0
3001 : | if (i < N) goto bb_5 else goto bb_1
3002 : | end_bb_0
3003 : |
3004 : | bb_1
3005 : | cond = some_computation;
3006 : | A[i] = cond ? expr : A[i];
3007 : | if (i < N) goto bb_5 else goto bb_4
3008 : | end_bb_1
3009 : |
3010 : | bb_4
3011 : | goto bb_1
3012 : | end_bb_4
3013 : */
3014 :
3015 : static void
3016 11856 : predicate_statements (loop_p loop)
3017 : {
3018 11856 : unsigned int i, orig_loop_num_nodes = loop->num_nodes;
3019 11856 : auto_vec<int, 1> vect_sizes;
3020 11856 : auto_vec<tree, 1> vect_masks;
3021 11856 : hash_set<tree_ssa_name_hash> ssa_names;
3022 :
3023 62470 : for (i = 1; i < orig_loop_num_nodes; i++)
3024 : {
3025 50614 : gimple_stmt_iterator gsi;
3026 50614 : basic_block bb = ifc_bbs[i];
3027 50614 : tree cond = bb_predicate (bb);
3028 50614 : bool swap;
3029 50614 : int index;
3030 :
3031 50614 : if (is_true_predicate (cond))
3032 25051 : continue;
3033 :
3034 25563 : swap = false;
3035 25563 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
3036 : {
3037 9623 : swap = true;
3038 9623 : cond = TREE_OPERAND (cond, 0);
3039 : }
3040 :
3041 25563 : vect_sizes.truncate (0);
3042 25563 : vect_masks.truncate (0);
3043 :
3044 197235 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3045 : {
3046 146109 : gimple *stmt = gsi_stmt (gsi);
3047 146109 : if (!is_gimple_assign (stmt)
3048 146109 : && !gimple_call_builtin_p (stmt))
3049 : ;
3050 87960 : else if (is_false_predicate (cond)
3051 87960 : && gimple_vdef (stmt))
3052 : {
3053 0 : unlink_stmt_vdef (stmt);
3054 0 : gsi_remove (&gsi, true);
3055 0 : release_defs (stmt);
3056 2 : continue;
3057 : }
3058 : /* For now, just drop prefetches. Do it now to remove any possible
3059 : aliasing check failures from the address calculations of the
3060 : prefetch. Vect would be too late in that regard. */
3061 87960 : else if (gimple_call_builtin_p (stmt, BUILT_IN_PREFETCH))
3062 : {
3063 2 : unlink_stmt_vdef (stmt);
3064 2 : gsi_remove (&gsi, true);
3065 2 : release_defs (stmt);
3066 2 : continue;
3067 : }
3068 87958 : else if (gimple_plf (stmt, GF_PLF_2)
3069 87958 : && (is_gimple_assign (stmt)
3070 108 : || (gimple_call_builtin_p (stmt)
3071 108 : && !if_convertible_simdclone_stmt_p (stmt))))
3072 : {
3073 2629 : tree lhs = gimple_get_lhs (stmt);
3074 : /* ?? Assume that calls without an LHS are not data processing
3075 : and so no issues with traps. */
3076 2629 : if (!lhs)
3077 0 : continue;
3078 2629 : tree mask;
3079 2629 : gimple *new_stmt;
3080 2629 : gimple_seq stmts = NULL;
3081 2629 : machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
3082 : /* We checked before setting GF_PLF_2 that an equivalent
3083 : integer mode exists. */
3084 2629 : int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
3085 2629 : if (!vect_sizes.is_empty ()
3086 937 : && (index = mask_exists (bitsize, vect_sizes)) != -1)
3087 : /* Use created mask. */
3088 876 : mask = vect_masks[index];
3089 : else
3090 : {
3091 1753 : if (COMPARISON_CLASS_P (cond))
3092 0 : mask = gimple_build (&stmts, TREE_CODE (cond),
3093 : boolean_type_node,
3094 0 : TREE_OPERAND (cond, 0),
3095 0 : TREE_OPERAND (cond, 1));
3096 : else
3097 1753 : mask = cond;
3098 :
3099 1753 : if (swap)
3100 : {
3101 454 : tree true_val
3102 454 : = constant_boolean_node (true, TREE_TYPE (mask));
3103 454 : mask = gimple_build (&stmts, BIT_XOR_EXPR,
3104 454 : TREE_TYPE (mask), mask, true_val);
3105 : }
3106 1753 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
3107 :
3108 : /* Save mask and its size for further use. */
3109 1753 : vect_sizes.safe_push (bitsize);
3110 1753 : vect_masks.safe_push (mask);
3111 : }
3112 2629 : if (gimple_assign_single_p (stmt))
3113 2014 : new_stmt = predicate_load_or_store (&gsi,
3114 : as_a <gassign *> (stmt),
3115 : mask);
3116 : else
3117 615 : new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
3118 :
3119 2629 : if (new_stmt)
3120 2629 : gsi_replace (&gsi, new_stmt, true);
3121 : }
3122 85329 : else if (gimple_needing_rewrite_undefined (stmt))
3123 17875 : rewrite_to_defined_unconditional (&gsi);
3124 134908 : else if (gimple_vdef (stmt))
3125 : {
3126 1488 : tree lhs = gimple_assign_lhs (stmt);
3127 1488 : tree rhs = gimple_assign_rhs1 (stmt);
3128 1488 : tree type = TREE_TYPE (lhs);
3129 :
3130 1488 : lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
3131 1488 : rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
3132 1488 : if (swap)
3133 531 : std::swap (lhs, rhs);
3134 1488 : cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
3135 : NULL_TREE, true, GSI_SAME_STMT);
3136 1488 : rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
3137 1488 : gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
3138 1488 : update_stmt (stmt);
3139 : }
3140 :
3141 146107 : if (gimple_plf (gsi_stmt (gsi), GF_PLF_2)
3142 146107 : && is_gimple_call (gsi_stmt (gsi)))
3143 : {
3144 : /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
3145 : This will cause the vectorizer to match the "in branch"
3146 : clone variants, and serves to build the mask vector
3147 : in a natural way. */
3148 999 : tree mask = cond;
3149 999 : gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
3150 999 : tree orig_fn = gimple_call_fn (call);
3151 999 : int orig_nargs = gimple_call_num_args (call);
3152 999 : auto_vec<tree> args;
3153 999 : args.safe_push (orig_fn);
3154 2013 : for (int i = 0; i < orig_nargs; i++)
3155 1014 : args.safe_push (gimple_call_arg (call, i));
3156 : /* If `swap', we invert the mask used for the if branch for use
3157 : when masking the function call. */
3158 999 : if (swap)
3159 : {
3160 948 : gimple_seq stmts = NULL;
3161 948 : tree true_val
3162 948 : = constant_boolean_node (true, TREE_TYPE (mask));
3163 948 : mask = gimple_build (&stmts, BIT_XOR_EXPR,
3164 948 : TREE_TYPE (mask), mask, true_val);
3165 948 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
3166 : }
3167 999 : args.safe_push (mask);
3168 :
3169 : /* Replace the call with a IFN_MASK_CALL that has the extra
3170 : condition parameter. */
3171 999 : gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
3172 : args);
3173 999 : gimple_call_set_lhs (new_call, gimple_call_lhs (call));
3174 999 : gsi_replace (&gsi, new_call, true);
3175 999 : }
3176 :
3177 146107 : tree lhs = gimple_get_lhs (gsi_stmt (gsi));
3178 146107 : if (lhs && TREE_CODE (lhs) == SSA_NAME)
3179 85764 : ssa_names.add (lhs);
3180 146107 : gsi_next (&gsi);
3181 : }
3182 51099 : ssa_names.empty ();
3183 : }
3184 11856 : }
3185 :
3186 : /* Remove all GIMPLE_CONDs and GIMPLE_LABELs and GIMPLE_SWITCH of all
3187 : the basic blocks other than the exit and latch of the LOOP. Also
3188 : resets the GIMPLE_DEBUG information. */
3189 :
3190 : static void
3191 28995 : remove_conditions_and_labels (loop_p loop)
3192 : {
3193 28995 : gimple_stmt_iterator gsi;
3194 28995 : unsigned int i;
3195 :
3196 177632 : for (i = 0; i < loop->num_nodes; i++)
3197 : {
3198 148637 : basic_block bb = ifc_bbs[i];
3199 :
3200 148637 : if (bb_with_exit_edge_p (loop, bb)
3201 148637 : || bb == loop->latch)
3202 57990 : continue;
3203 :
3204 764687 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
3205 583393 : switch (gimple_code (gsi_stmt (gsi)))
3206 : {
3207 37141 : case GIMPLE_COND:
3208 37141 : case GIMPLE_LABEL:
3209 37141 : case GIMPLE_SWITCH:
3210 37141 : gsi_remove (&gsi, true);
3211 37141 : break;
3212 :
3213 353554 : case GIMPLE_DEBUG:
3214 : /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
3215 353554 : if (gimple_debug_bind_p (gsi_stmt (gsi)))
3216 : {
3217 272731 : gimple_debug_bind_reset_value (gsi_stmt (gsi));
3218 272731 : update_stmt (gsi_stmt (gsi));
3219 : }
3220 353554 : gsi_next (&gsi);
3221 353554 : break;
3222 :
3223 192698 : default:
3224 192698 : gsi_next (&gsi);
3225 : }
3226 : }
3227 28995 : }
3228 :
3229 : /* Combine all the basic blocks from LOOP into one or two super basic
3230 : blocks. Replace PHI nodes with conditional modify expressions.
3231 : LOOP_VERSIONED should be true if we know that the loop was versioned for
3232 : vectorization. */
3233 :
3234 : static void
3235 28995 : combine_blocks (class loop *loop, bool loop_versioned)
3236 : {
3237 28995 : basic_block bb, exit_bb, merge_target_bb;
3238 28995 : unsigned int orig_loop_num_nodes = loop->num_nodes;
3239 28995 : unsigned int i;
3240 28995 : edge e;
3241 28995 : edge_iterator ei;
3242 :
3243 : /* Reset flow-sensitive info before predicating stmts or PHIs we
3244 : might fold. */
3245 177632 : for (i = 0; i < orig_loop_num_nodes; i++)
3246 : {
3247 148637 : bb = ifc_bbs[i];
3248 148637 : if (is_predicated (bb))
3249 : {
3250 57490 : for (auto gsi = gsi_start_phis (bb);
3251 59015 : !gsi_end_p (gsi); gsi_next (&gsi))
3252 1525 : reset_flow_sensitive_info (gimple_phi_result (*gsi));
3253 234030 : for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3254 : {
3255 119050 : gimple *stmt = gsi_stmt (gsi);
3256 119050 : ssa_op_iter i;
3257 119050 : tree op;
3258 169219 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
3259 50169 : reset_flow_sensitive_info (op);
3260 : }
3261 : }
3262 : }
3263 :
3264 28995 : remove_conditions_and_labels (loop);
3265 28995 : insert_gimplified_predicates (loop);
3266 28995 : predicate_all_scalar_phis (loop, loop_versioned);
3267 :
3268 28995 : if (need_to_predicate || need_to_rewrite_undefined)
3269 11856 : predicate_statements (loop);
3270 :
3271 : /* Merge basic blocks. */
3272 28995 : exit_bb = single_exit (loop)->src;
3273 28995 : gcc_assert (exit_bb != loop->latch);
3274 177632 : for (i = 0; i < orig_loop_num_nodes; i++)
3275 : {
3276 148637 : bb = ifc_bbs[i];
3277 148637 : free_bb_predicate (bb);
3278 : }
3279 :
3280 28995 : merge_target_bb = loop->header;
3281 :
3282 : /* Get at the virtual def valid for uses starting at the first block
3283 : we merge into the header. Without a virtual PHI the loop has the
3284 : same virtual use on all stmts. */
3285 28995 : gphi *vphi = get_virtual_phi (loop->header);
3286 28995 : tree last_vdef = NULL_TREE;
3287 28995 : if (vphi)
3288 : {
3289 17315 : last_vdef = gimple_phi_result (vphi);
3290 34630 : for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
3291 279467 : ! gsi_end_p (gsi); gsi_next (&gsi))
3292 359045 : if (gimple_vdef (gsi_stmt (gsi)))
3293 5090 : last_vdef = gimple_vdef (gsi_stmt (gsi));
3294 : }
3295 148637 : for (i = 1; i < orig_loop_num_nodes; i++)
3296 : {
3297 119642 : gimple_stmt_iterator gsi;
3298 119642 : gimple_stmt_iterator last;
3299 :
3300 119642 : bb = ifc_bbs[i];
3301 :
3302 119642 : if (bb == exit_bb || bb == loop->latch)
3303 57990 : continue;
3304 :
3305 : /* We release virtual PHIs late because we have to propagate them
3306 : out using the current VUSE. The def might be the one used
3307 : after the loop. */
3308 61652 : vphi = get_virtual_phi (bb);
3309 61652 : if (vphi)
3310 : {
3311 : /* When there's just loads inside the loop a stray virtual
3312 : PHI merging the uses can appear, update last_vdef from
3313 : it. */
3314 616 : if (!last_vdef)
3315 0 : last_vdef = gimple_phi_arg_def (vphi, 0);
3316 616 : imm_use_iterator iter;
3317 616 : use_operand_p use_p;
3318 616 : gimple *use_stmt;
3319 2729 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
3320 : {
3321 4493 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3322 1498 : SET_USE (use_p, last_vdef);
3323 616 : }
3324 616 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
3325 0 : SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
3326 616 : gsi = gsi_for_stmt (vphi);
3327 616 : remove_phi_node (&gsi, true);
3328 : }
3329 :
3330 : /* Make stmts member of loop->header and clear range info from all stmts
3331 : in BB which is now no longer executed conditional on a predicate we
3332 : could have derived it from. */
3333 388848 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3334 : {
3335 265544 : gimple *stmt = gsi_stmt (gsi);
3336 265544 : gimple_set_bb (stmt, merge_target_bb);
3337 : /* Update virtual operands. */
3338 265544 : if (last_vdef)
3339 : {
3340 206006 : use_operand_p use_p = ssa_vuse_operand (stmt);
3341 14612 : if (use_p
3342 14612 : && USE_FROM_PTR (use_p) != last_vdef)
3343 634 : SET_USE (use_p, last_vdef);
3344 618034 : if (gimple_vdef (stmt))
3345 265544 : last_vdef = gimple_vdef (stmt);
3346 : }
3347 : else
3348 : /* If this is the first load we arrive at update last_vdef
3349 : so we handle stray PHIs correctly. */
3350 305832 : last_vdef = gimple_vuse (stmt);
3351 : }
3352 :
3353 : /* Update stmt list. */
3354 61652 : last = gsi_last_bb (merge_target_bb);
3355 123304 : gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
3356 61652 : set_bb_seq (bb, NULL);
3357 : }
3358 :
3359 : /* Fixup virtual operands in the exit block. */
3360 28995 : if (exit_bb
3361 28995 : && exit_bb != loop->header)
3362 : {
3363 : /* We release virtual PHIs late because we have to propagate them
3364 : out using the current VUSE. The def might be the one used
3365 : after the loop. */
3366 28995 : vphi = get_virtual_phi (exit_bb);
3367 28995 : if (vphi)
3368 : {
3369 : /* When there's just loads inside the loop a stray virtual
3370 : PHI merging the uses can appear, update last_vdef from
3371 : it. */
3372 1726 : if (!last_vdef)
3373 0 : last_vdef = gimple_phi_arg_def (vphi, 0);
3374 1726 : imm_use_iterator iter;
3375 1726 : use_operand_p use_p;
3376 1726 : gimple *use_stmt;
3377 6925 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
3378 : {
3379 10419 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3380 3473 : SET_USE (use_p, last_vdef);
3381 1726 : }
3382 1726 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
3383 0 : SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
3384 1726 : gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
3385 1726 : remove_phi_node (&gsi, true);
3386 : }
3387 : }
3388 :
3389 : /* Now remove all the edges in the loop, except for those from the exit
3390 : block and delete the blocks we elided. */
3391 148637 : for (i = 1; i < orig_loop_num_nodes; i++)
3392 : {
3393 119642 : bb = ifc_bbs[i];
3394 :
3395 276050 : for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
3396 : {
3397 156408 : if (e->src == exit_bb)
3398 28995 : ei_next (&ei);
3399 : else
3400 127413 : remove_edge (e);
3401 : }
3402 : }
3403 148637 : for (i = 1; i < orig_loop_num_nodes; i++)
3404 : {
3405 119642 : bb = ifc_bbs[i];
3406 :
3407 119642 : if (bb == exit_bb || bb == loop->latch)
3408 57990 : continue;
3409 :
3410 61652 : delete_basic_block (bb);
3411 : }
3412 :
3413 : /* Re-connect the exit block. */
3414 28995 : if (exit_bb != NULL)
3415 : {
3416 28995 : if (exit_bb != loop->header)
3417 : {
3418 : /* Connect this node to loop header. */
3419 28995 : make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
3420 28995 : set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
3421 : }
3422 :
3423 : /* Redirect non-exit edges to loop->latch. */
3424 86985 : FOR_EACH_EDGE (e, ei, exit_bb->succs)
3425 : {
3426 57990 : if (!loop_exit_edge_p (loop, e))
3427 28995 : redirect_edge_and_branch (e, loop->latch);
3428 : }
3429 28995 : set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
3430 : }
3431 : else
3432 : {
3433 : /* If the loop does not have an exit, reconnect header and latch. */
3434 0 : make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
3435 0 : set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
3436 : }
3437 :
3438 : /* If possible, merge loop header to the block with the exit edge.
3439 : This reduces the number of basic blocks to two, to please the
3440 : vectorizer that handles only loops with two nodes. */
3441 28995 : if (exit_bb
3442 28995 : && exit_bb != loop->header)
3443 : {
3444 28995 : if (can_merge_blocks_p (loop->header, exit_bb))
3445 28993 : merge_blocks (loop->header, exit_bb);
3446 : }
3447 :
3448 28995 : free (ifc_bbs);
3449 28995 : ifc_bbs = NULL;
3450 28995 : }
3451 :
3452 : /* Version LOOP before if-converting it; the original loop
3453 : will be if-converted, the new copy of the loop will not,
3454 : and the LOOP_VECTORIZED internal call will be guarding which
3455 : loop to execute. The vectorizer pass will fold this
3456 : internal call into either true or false.
3457 :
3458 : Note that this function intentionally invalidates profile. Both edges
3459 : out of LOOP_VECTORIZED must have 100% probability so the profile remains
3460 : consistent after the condition is folded in the vectorizer. */
3461 :
3462 : static class loop *
3463 29203 : version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
3464 : {
3465 29203 : basic_block cond_bb;
3466 29203 : tree cond = make_ssa_name (boolean_type_node);
3467 29203 : class loop *new_loop;
3468 29203 : gimple *g;
3469 29203 : gimple_stmt_iterator gsi;
3470 29203 : unsigned int save_length = 0;
3471 :
3472 29203 : g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
3473 29203 : build_int_cst (integer_type_node, loop->num),
3474 : integer_zero_node);
3475 29203 : gimple_call_set_lhs (g, cond);
3476 :
3477 29203 : void **saved_preds = NULL;
3478 29203 : if (any_complicated_phi || need_to_predicate)
3479 : {
3480 : /* Save BB->aux around loop_version as that uses the same field. */
3481 3490 : save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
3482 3490 : saved_preds = XALLOCAVEC (void *, save_length);
3483 26348 : for (unsigned i = 0; i < save_length; i++)
3484 22858 : saved_preds[i] = ifc_bbs[i]->aux;
3485 : }
3486 :
3487 29203 : initialize_original_copy_tables ();
3488 : /* At this point we invalidate profile consistency until IFN_LOOP_VECTORIZED
3489 : is re-merged in the vectorizer. */
3490 29203 : new_loop = loop_version (loop, cond, &cond_bb,
3491 : profile_probability::always (),
3492 : profile_probability::always (),
3493 : profile_probability::always (),
3494 : profile_probability::always (), true);
3495 29203 : free_original_copy_tables ();
3496 :
3497 29203 : if (any_complicated_phi || need_to_predicate)
3498 26348 : for (unsigned i = 0; i < save_length; i++)
3499 22858 : ifc_bbs[i]->aux = saved_preds[i];
3500 :
3501 29203 : if (new_loop == NULL)
3502 : return NULL;
3503 :
3504 29203 : new_loop->dont_vectorize = true;
3505 29203 : new_loop->force_vectorize = false;
3506 29203 : gsi = gsi_last_bb (cond_bb);
3507 29203 : gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
3508 29203 : if (preds)
3509 29203 : preds->safe_push (g);
3510 29203 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3511 29203 : update_ssa (TODO_update_ssa_no_phi);
3512 29203 : return new_loop;
3513 : }
3514 :
3515 : /* Return true when LOOP satisfies the follow conditions that will
3516 : allow it to be recognized by the vectorizer for outer-loop
3517 : vectorization:
3518 : - The loop is not the root node of the loop tree.
3519 : - The loop has exactly one inner loop.
3520 : - The loop has a single exit.
3521 : - The loop header has a single successor, which is the inner
3522 : loop header.
3523 : - Each of the inner and outer loop latches have a single
3524 : predecessor.
3525 : - The loop exit block has a single predecessor, which is the
3526 : inner loop's exit block. */
3527 :
3528 : static bool
3529 29203 : versionable_outer_loop_p (class loop *loop)
3530 : {
3531 29203 : if (!loop_outer (loop)
3532 7950 : || loop->dont_vectorize
3533 6967 : || !loop->inner
3534 6967 : || loop->inner->next
3535 2879 : || !single_exit (loop)
3536 2224 : || !single_succ_p (loop->header)
3537 933 : || single_succ (loop->header) != loop->inner->header
3538 933 : || !single_pred_p (loop->latch)
3539 37153 : || !single_pred_p (loop->inner->latch))
3540 28270 : return false;
3541 :
3542 933 : basic_block outer_exit = single_pred (loop->latch);
3543 933 : basic_block inner_exit = single_pred (loop->inner->latch);
3544 :
3545 30127 : if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
3546 : return false;
3547 :
3548 923 : if (dump_file)
3549 0 : fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
3550 :
3551 : return true;
3552 : }
3553 :
3554 : /* Performs splitting of critical edges. Skip splitting and return false
3555 : if LOOP will not be converted because:
3556 :
3557 : - LOOP is not well formed.
3558 : - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3559 :
3560 : Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3561 :
3562 : static bool
3563 313236 : ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3564 : {
3565 313236 : basic_block *body;
3566 313236 : basic_block bb;
3567 313236 : unsigned int num = loop->num_nodes;
3568 313236 : unsigned int i;
3569 313236 : edge e;
3570 313236 : edge_iterator ei;
3571 313236 : auto_vec<edge> critical_edges;
3572 :
3573 : /* Loop is not well formed. */
3574 313236 : if (loop->inner)
3575 : return false;
3576 :
3577 224919 : body = get_loop_body (loop);
3578 1791083 : for (i = 0; i < num; i++)
3579 : {
3580 1347411 : bb = body[i];
3581 1347411 : if (!aggressive_if_conv
3582 1339089 : && phi_nodes (bb)
3583 1769766 : && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3584 : {
3585 6166 : if (dump_file && (dump_flags & TDF_DETAILS))
3586 0 : fprintf (dump_file,
3587 : "BB %d has complicated PHI with more than %u args.\n",
3588 : bb->index, MAX_PHI_ARG_NUM);
3589 :
3590 6166 : free (body);
3591 6166 : return false;
3592 : }
3593 1341245 : if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3594 761436 : continue;
3595 :
3596 : /* Skip basic blocks not ending with conditional branch. */
3597 1387145 : if (!safe_is_a <gcond *> (*gsi_last_bb (bb))
3598 579809 : && !safe_is_a <gswitch *> (*gsi_last_bb (bb)))
3599 322804 : continue;
3600 :
3601 781382 : FOR_EACH_EDGE (e, ei, bb->succs)
3602 646111 : if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3603 121734 : critical_edges.safe_push (e);
3604 : }
3605 218753 : free (body);
3606 :
3607 431378 : while (critical_edges.length () > 0)
3608 : {
3609 118142 : e = critical_edges.pop ();
3610 : /* Don't split if bb can be predicated along non-critical edge. */
3611 118142 : if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3612 54706 : split_edge (e);
3613 : }
3614 :
3615 : return true;
3616 313236 : }
3617 :
3618 : /* Delete redundant statements produced by predication which prevents
3619 : loop vectorization. */
3620 :
3621 : static void
3622 29252 : ifcvt_local_dce (class loop *loop)
3623 : {
3624 29252 : gimple *stmt;
3625 29252 : gimple *stmt1;
3626 29252 : gimple *phi;
3627 29252 : gimple_stmt_iterator gsi;
3628 29252 : auto_vec<gimple *> worklist;
3629 29252 : enum gimple_code code;
3630 29252 : use_operand_p use_p;
3631 29252 : imm_use_iterator imm_iter;
3632 :
3633 : /* The loop has a single BB only. */
3634 29252 : basic_block bb = loop->header;
3635 29252 : tree latch_vdef = NULL_TREE;
3636 :
3637 29252 : worklist.create (64);
3638 : /* Consider all phi as live statements. */
3639 114073 : for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3640 : {
3641 84821 : phi = gsi_stmt (gsi);
3642 84821 : gimple_set_plf (phi, GF_PLF_2, true);
3643 84821 : worklist.safe_push (phi);
3644 187147 : if (virtual_operand_p (gimple_phi_result (phi)))
3645 17505 : latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3646 : }
3647 : /* Consider load/store statements, CALL and COND as live. */
3648 938194 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3649 : {
3650 879690 : stmt = gsi_stmt (gsi);
3651 879690 : if (is_gimple_debug (stmt))
3652 : {
3653 459630 : gimple_set_plf (stmt, GF_PLF_2, true);
3654 459630 : continue;
3655 : }
3656 420060 : if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3657 : {
3658 77806 : gimple_set_plf (stmt, GF_PLF_2, true);
3659 77806 : worklist.safe_push (stmt);
3660 77806 : continue;
3661 : }
3662 342254 : code = gimple_code (stmt);
3663 342254 : if (code == GIMPLE_COND || code == GIMPLE_CALL || code == GIMPLE_SWITCH)
3664 : {
3665 34773 : gimple_set_plf (stmt, GF_PLF_2, true);
3666 34773 : worklist.safe_push (stmt);
3667 34773 : continue;
3668 : }
3669 307481 : gimple_set_plf (stmt, GF_PLF_2, false);
3670 :
3671 307481 : if (code == GIMPLE_ASSIGN)
3672 : {
3673 307472 : tree lhs = gimple_assign_lhs (stmt);
3674 1025778 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3675 : {
3676 430377 : stmt1 = USE_STMT (use_p);
3677 430377 : if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3678 : {
3679 19543 : gimple_set_plf (stmt, GF_PLF_2, true);
3680 19543 : worklist.safe_push (stmt);
3681 19543 : break;
3682 : }
3683 307472 : }
3684 : }
3685 : }
3686 : /* Propagate liveness through arguments of live stmt. */
3687 517757 : while (worklist.length () > 0)
3688 : {
3689 488505 : ssa_op_iter iter;
3690 488505 : use_operand_p use_p;
3691 488505 : tree use;
3692 :
3693 488505 : stmt = worklist.pop ();
3694 1721789 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3695 : {
3696 744779 : use = USE_FROM_PTR (use_p);
3697 744779 : if (TREE_CODE (use) != SSA_NAME)
3698 44335 : continue;
3699 700444 : stmt1 = SSA_NAME_DEF_STMT (use);
3700 700444 : if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3701 428882 : continue;
3702 271562 : gimple_set_plf (stmt1, GF_PLF_2, true);
3703 271562 : worklist.safe_push (stmt1);
3704 : }
3705 : }
3706 : /* Delete dead statements. */
3707 29252 : gsi = gsi_last_bb (bb);
3708 908942 : while (!gsi_end_p (gsi))
3709 : {
3710 879690 : gimple_stmt_iterator gsiprev = gsi;
3711 879690 : gsi_prev (&gsiprev);
3712 879690 : stmt = gsi_stmt (gsi);
3713 879690 : if (!gimple_has_volatile_ops (stmt)
3714 879549 : && gimple_store_p (stmt)
3715 432549 : && gimple_vdef (stmt))
3716 : {
3717 20873 : tree lhs = gimple_get_lhs (stmt);
3718 20873 : ao_ref write;
3719 20873 : ao_ref_init (&write, lhs);
3720 :
3721 20873 : if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3722 : == DSE_STORE_DEAD)
3723 267 : delete_dead_or_redundant_assignment (&gsi, "dead");
3724 20873 : gsi = gsiprev;
3725 20873 : continue;
3726 20873 : }
3727 :
3728 858817 : if (gimple_plf (stmt, GF_PLF_2))
3729 : {
3730 842441 : gsi = gsiprev;
3731 842441 : continue;
3732 : }
3733 16376 : if (dump_file && (dump_flags & TDF_DETAILS))
3734 : {
3735 16 : fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3736 16 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3737 : }
3738 16376 : gsi_remove (&gsi, true);
3739 16376 : release_defs (stmt);
3740 16376 : gsi = gsiprev;
3741 : }
3742 29252 : }
3743 :
3744 : /* Return true if VALUE is already available on edge PE. */
3745 :
3746 : static bool
3747 264852 : ifcvt_available_on_edge_p (edge pe, tree value)
3748 : {
3749 264852 : if (is_gimple_min_invariant (value))
3750 : return true;
3751 :
3752 258699 : if (TREE_CODE (value) == SSA_NAME)
3753 : {
3754 257461 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3755 257461 : if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3756 26795 : return true;
3757 : }
3758 :
3759 : return false;
3760 : }
3761 :
3762 : /* Return true if STMT can be hoisted from if-converted loop LOOP to
3763 : edge PE. */
3764 :
3765 : static bool
3766 863047 : ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3767 : {
3768 863047 : if (auto *call = dyn_cast<gcall *> (stmt))
3769 : {
3770 5527 : if (gimple_call_internal_p (call)
3771 5527 : && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3772 : return false;
3773 : }
3774 1227449 : else if (auto *assign = dyn_cast<gassign *> (stmt))
3775 : {
3776 447429 : if (gimple_assign_rhs_code (assign) == COND_EXPR)
3777 : return false;
3778 : }
3779 : else
3780 : return false;
3781 :
3782 325462 : if (gimple_has_side_effects (stmt)
3783 324242 : || gimple_could_trap_p (stmt)
3784 243969 : || stmt_could_throw_p (cfun, stmt)
3785 243967 : || gimple_vdef (stmt)
3786 568745 : || gimple_vuse (stmt))
3787 83260 : return false;
3788 :
3789 242202 : int num_args = gimple_num_args (stmt);
3790 242202 : if (pe != loop_preheader_edge (loop))
3791 : {
3792 268822 : for (int i = 0; i < num_args; ++i)
3793 264852 : if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3794 : return false;
3795 : }
3796 : else
3797 : {
3798 8059 : for (int i = 0; i < num_args; ++i)
3799 7789 : if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3800 : return false;
3801 : }
3802 :
3803 : return true;
3804 : }
3805 :
3806 : /* Hoist invariant statements from LOOP to edge PE. */
3807 :
3808 : static void
3809 29252 : ifcvt_hoist_invariants (class loop *loop, edge pe)
3810 : {
3811 : /* Only hoist from the now unconditionally executed part of the loop. */
3812 29252 : basic_block bb = loop->header;
3813 29252 : gimple_stmt_iterator hoist_gsi = {};
3814 921551 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3815 : {
3816 863047 : gimple *stmt = gsi_stmt (gsi);
3817 863047 : if (ifcvt_can_hoist (loop, pe, stmt))
3818 : {
3819 : /* Once we've hoisted one statement, insert other statements
3820 : after it. */
3821 4240 : gsi_remove (&gsi, false);
3822 4240 : if (hoist_gsi.ptr)
3823 1817 : gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3824 : else
3825 : {
3826 2423 : gsi_insert_on_edge_immediate (pe, stmt);
3827 2423 : hoist_gsi = gsi_for_stmt (stmt);
3828 : }
3829 4240 : continue;
3830 : }
3831 858807 : gsi_next (&gsi);
3832 : }
3833 29252 : }
3834 :
3835 : /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3836 : type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3837 : value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3838 : if not NULL, will hold the tree representing the base struct of this
3839 : bitfield. */
3840 :
3841 : static tree
3842 1221 : get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3843 : tree *struct_expr)
3844 : {
3845 1221 : tree comp_ref = write ? gimple_assign_lhs (stmt)
3846 386 : : gimple_assign_rhs1 (stmt);
3847 :
3848 1221 : tree field_decl = TREE_OPERAND (comp_ref, 1);
3849 1221 : tree ref_offset = component_ref_field_offset (comp_ref);
3850 1221 : tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3851 :
3852 : /* Bail out if the representative is not a suitable type for a scalar
3853 : register variable. */
3854 1221 : if (!is_gimple_reg_type (TREE_TYPE (rep_decl)))
3855 : return NULL_TREE;
3856 :
3857 : /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3858 : precision. */
3859 1206 : unsigned HOST_WIDE_INT bf_prec
3860 1206 : = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3861 1206 : if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3862 : return NULL_TREE;
3863 :
3864 1206 : if (TREE_CODE (DECL_FIELD_OFFSET (rep_decl)) != INTEGER_CST
3865 1206 : || TREE_CODE (ref_offset) != INTEGER_CST)
3866 : {
3867 2 : if (dump_file && (dump_flags & TDF_DETAILS))
3868 2 : fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3869 : " offset is non-constant.\n");
3870 2 : return NULL_TREE;
3871 : }
3872 :
3873 1204 : if (struct_expr)
3874 602 : *struct_expr = TREE_OPERAND (comp_ref, 0);
3875 :
3876 1204 : if (bitpos)
3877 : {
3878 : /* To calculate the bitposition of the BITFIELD_REF we have to determine
3879 : where our bitfield starts in relation to the container REP_DECL. The
3880 : DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3881 : us how many bytes from the start of the structure there are until the
3882 : start of the group of bitfield members the FIELD_DECL belongs to,
3883 : whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3884 : position our actual bitfield member starts. For the container
3885 : REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3886 : us the distance between the start of the structure and the start of
3887 : the container, though the first is in bytes and the later other in
3888 : bits. With this in mind we calculate the bit position of our new
3889 : BITFIELD_REF by subtracting the number of bits between the start of
3890 : the structure and the container from the number of bits from the start
3891 : of the structure and the actual bitfield member. */
3892 602 : tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3893 : ref_offset,
3894 : build_int_cst (bitsizetype, BITS_PER_UNIT));
3895 602 : bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3896 : DECL_FIELD_BIT_OFFSET (field_decl));
3897 602 : tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3898 : DECL_FIELD_OFFSET (rep_decl),
3899 : build_int_cst (bitsizetype, BITS_PER_UNIT));
3900 602 : rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3901 : DECL_FIELD_BIT_OFFSET (rep_decl));
3902 :
3903 602 : *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3904 : }
3905 :
3906 : return rep_decl;
3907 :
3908 : }
3909 :
3910 : /* Lowers the bitfield described by DATA.
3911 : For a write like:
3912 :
3913 : struct.bf = _1;
3914 :
3915 : lower to:
3916 :
3917 : __ifc_1 = struct.<representative>;
3918 : __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3919 : struct.<representative> = __ifc_2;
3920 :
3921 : For a read:
3922 :
3923 : _1 = struct.bf;
3924 :
3925 : lower to:
3926 :
3927 : __ifc_1 = struct.<representative>;
3928 : _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3929 :
3930 : where representative is a legal load that contains the bitfield value,
3931 : bitsize is the size of the bitfield and bitpos the offset to the start of
3932 : the bitfield within the representative. */
3933 :
3934 : static void
3935 602 : lower_bitfield (gassign *stmt, bool write)
3936 : {
3937 602 : tree struct_expr;
3938 602 : tree bitpos;
3939 602 : tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
3940 602 : tree rep_type = TREE_TYPE (rep_decl);
3941 602 : tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
3942 :
3943 602 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3944 602 : if (dump_file && (dump_flags & TDF_DETAILS))
3945 : {
3946 9 : fprintf (dump_file, "Lowering:\n");
3947 9 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3948 9 : fprintf (dump_file, "to:\n");
3949 : }
3950 :
3951 : /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
3952 : defining SSA_NAME. */
3953 602 : tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
3954 : NULL_TREE);
3955 602 : tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
3956 :
3957 602 : if (dump_file && (dump_flags & TDF_DETAILS))
3958 9 : print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3959 :
3960 602 : if (write)
3961 : {
3962 413 : new_val = ifc_temp_var (rep_type,
3963 : build3 (BIT_INSERT_EXPR, rep_type, new_val,
3964 : unshare_expr (gimple_assign_rhs1 (stmt)),
3965 : bitpos), &gsi);
3966 :
3967 413 : if (dump_file && (dump_flags & TDF_DETAILS))
3968 0 : print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
3969 :
3970 413 : gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
3971 : new_val);
3972 413 : gimple_move_vops (new_stmt, stmt);
3973 413 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3974 :
3975 413 : if (dump_file && (dump_flags & TDF_DETAILS))
3976 0 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3977 : }
3978 : else
3979 : {
3980 189 : tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
3981 189 : build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
3982 : bitpos);
3983 189 : new_val = ifc_temp_var (bf_type, bfr, &gsi);
3984 :
3985 189 : gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
3986 : new_val);
3987 189 : gimple_move_vops (new_stmt, stmt);
3988 189 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
3989 :
3990 189 : if (dump_file && (dump_flags & TDF_DETAILS))
3991 9 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
3992 : }
3993 :
3994 602 : gsi_remove (&gsi, true);
3995 602 : }
3996 :
3997 : /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
3998 : with data structures representing these bitfields. */
3999 :
4000 : static bool
4001 235929 : bitfields_to_lower_p (class loop *loop,
4002 : vec <gassign *> &reads_to_lower,
4003 : vec <gassign *> &writes_to_lower)
4004 : {
4005 235929 : gimple_stmt_iterator gsi;
4006 :
4007 235929 : if (dump_file && (dump_flags & TDF_DETAILS))
4008 : {
4009 28 : fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
4010 : }
4011 :
4012 1003044 : for (unsigned i = 0; i < loop->num_nodes; ++i)
4013 : {
4014 767136 : basic_block bb = ifc_bbs[i];
4015 6513059 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4016 : {
4017 4978808 : gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
4018 4978808 : if (!stmt)
4019 4852760 : continue;
4020 :
4021 2014647 : tree op = gimple_assign_lhs (stmt);
4022 2014647 : bool write = TREE_CODE (op) == COMPONENT_REF;
4023 :
4024 2014647 : if (!write)
4025 1983232 : op = gimple_assign_rhs1 (stmt);
4026 :
4027 2014647 : if (TREE_CODE (op) != COMPONENT_REF)
4028 1888599 : continue;
4029 :
4030 126048 : if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
4031 : {
4032 623 : if (dump_file && (dump_flags & TDF_DETAILS))
4033 11 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
4034 :
4035 623 : if (TREE_THIS_VOLATILE (op))
4036 : {
4037 4 : if (dump_file && (dump_flags & TDF_DETAILS))
4038 0 : fprintf (dump_file, "\t Bitfield NO OK to lower,"
4039 : " the access is volatile.\n");
4040 21 : return false;
4041 : }
4042 :
4043 619 : if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
4044 : {
4045 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4046 0 : fprintf (dump_file, "\t Bitfield NO OK to lower,"
4047 : " field type is not Integral.\n");
4048 0 : return false;
4049 : }
4050 :
4051 619 : if (!get_bitfield_rep (stmt, write, NULL, NULL))
4052 : {
4053 17 : if (dump_file && (dump_flags & TDF_DETAILS))
4054 2 : fprintf (dump_file, "\t Bitfield NOT OK to lower,"
4055 : " representative is BLKmode.\n");
4056 17 : return false;
4057 : }
4058 :
4059 602 : if (dump_file && (dump_flags & TDF_DETAILS))
4060 9 : fprintf (dump_file, "\tBitfield OK to lower.\n");
4061 602 : if (write)
4062 413 : writes_to_lower.safe_push (stmt);
4063 : else
4064 189 : reads_to_lower.safe_push (stmt);
4065 : }
4066 : }
4067 : }
4068 471680 : return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
4069 : }
4070 :
4071 :
4072 : /* If-convert LOOP when it is legal. For the moment this pass has no
4073 : profitability analysis. Returns non-zero todo flags when something
4074 : changed. */
4075 :
4076 : unsigned int
4077 493104 : tree_if_conversion (class loop *loop, vec<gimple *> *preds)
4078 : {
4079 493104 : unsigned int todo = 0;
4080 493104 : bool aggressive_if_conv;
4081 493104 : class loop *rloop;
4082 493104 : auto_vec <gassign *, 4> reads_to_lower;
4083 493104 : auto_vec <gassign *, 4> writes_to_lower;
4084 493104 : bitmap exit_bbs;
4085 493104 : edge pe;
4086 493104 : auto_vec<data_reference_p, 10> refs;
4087 494027 : bool loop_versioned;
4088 :
4089 494027 : again:
4090 494027 : rloop = NULL;
4091 494027 : ifc_bbs = NULL;
4092 494027 : need_to_lower_bitfields = false;
4093 494027 : need_to_ifcvt = false;
4094 494027 : need_to_predicate = false;
4095 494027 : need_to_rewrite_undefined = false;
4096 494027 : any_complicated_phi = false;
4097 494027 : loop_versioned = false;
4098 :
4099 : /* Apply more aggressive if-conversion when loop or its outer loop were
4100 : marked with simd pragma. When that's the case, we try to if-convert
4101 : loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
4102 494027 : aggressive_if_conv = loop->force_vectorize;
4103 494027 : if (!aggressive_if_conv)
4104 : {
4105 485665 : class loop *outer_loop = loop_outer (loop);
4106 485665 : if (outer_loop && outer_loop->force_vectorize)
4107 8646 : aggressive_if_conv = true;
4108 : }
4109 :
4110 : /* If there are more than two BBs in the loop then there is at least one if
4111 : to convert. */
4112 494027 : if (loop->num_nodes > 2
4113 494027 : && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
4114 94483 : goto cleanup;
4115 :
4116 399544 : ifc_bbs = get_loop_body_in_if_conv_order (loop);
4117 399544 : if (!ifc_bbs)
4118 : {
4119 2584 : if (dump_file && (dump_flags & TDF_DETAILS))
4120 3 : fprintf (dump_file, "Irreducible loop\n");
4121 2584 : goto cleanup;
4122 : }
4123 :
4124 396960 : if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
4125 147387 : goto cleanup;
4126 :
4127 249573 : if (loop->num_nodes > 2)
4128 : {
4129 : /* More than one loop exit is too much to handle. */
4130 136749 : if (!single_exit (loop))
4131 : {
4132 94163 : if (dump_file && (dump_flags & TDF_DETAILS))
4133 10 : fprintf (dump_file, "Can not ifcvt due to multiple exits\n");
4134 : }
4135 : else
4136 : {
4137 42586 : need_to_ifcvt = true;
4138 :
4139 42586 : if (!if_convertible_loop_p (loop, &refs)
4140 42586 : || !dbg_cnt (if_conversion_tree))
4141 13591 : goto cleanup;
4142 :
4143 28995 : if ((need_to_predicate || any_complicated_phi)
4144 3490 : && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
4145 3490 : || loop->dont_vectorize))
4146 0 : goto cleanup;
4147 : }
4148 : }
4149 :
4150 235982 : if ((flag_tree_loop_vectorize || loop->force_vectorize)
4151 235929 : && !loop->dont_vectorize)
4152 235929 : need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
4153 : writes_to_lower);
4154 :
4155 235982 : if (!need_to_ifcvt && !need_to_lower_bitfields)
4156 206730 : goto cleanup;
4157 :
4158 : /* The edge to insert invariant stmts on. */
4159 29252 : pe = loop_preheader_edge (loop);
4160 :
4161 : /* Since we have no cost model, always version loops unless the user
4162 : specified -ftree-loop-if-convert or unless versioning is required.
4163 : Either version this loop, or if the pattern is right for outer-loop
4164 : vectorization, version the outer loop. In the latter case we will
4165 : still if-convert the original inner loop. */
4166 29252 : if (need_to_lower_bitfields
4167 28993 : || need_to_predicate
4168 26894 : || any_complicated_phi
4169 25505 : || flag_tree_loop_if_convert != 1)
4170 : {
4171 29203 : class loop *vloop
4172 29203 : = (versionable_outer_loop_p (loop_outer (loop))
4173 29203 : ? loop_outer (loop) : loop);
4174 29203 : class loop *nloop = version_loop_for_if_conversion (vloop, preds);
4175 29203 : if (nloop == NULL)
4176 0 : goto cleanup;
4177 29203 : if (vloop != loop)
4178 : {
4179 : /* If versionable_outer_loop_p decided to version the
4180 : outer loop, version also the inner loop of the non-vectorized
4181 : loop copy. So we transform:
4182 : loop1
4183 : loop2
4184 : into:
4185 : if (LOOP_VECTORIZED (1, 3))
4186 : {
4187 : loop1
4188 : loop2
4189 : }
4190 : else
4191 : loop3 (copy of loop1)
4192 : if (LOOP_VECTORIZED (4, 5))
4193 : loop4 (copy of loop2)
4194 : else
4195 : loop5 (copy of loop4) */
4196 923 : gcc_assert (nloop->inner && nloop->inner->next == NULL);
4197 : rloop = nloop->inner;
4198 : }
4199 : else
4200 : /* If we versioned loop then make sure to insert invariant
4201 : stmts before the .LOOP_VECTORIZED check since the vectorizer
4202 : will re-use that for things like runtime alias versioning
4203 : whose condition can end up using those invariants. */
4204 28280 : pe = single_pred_edge (gimple_bb (preds->last ()));
4205 :
4206 : loop_versioned = true;
4207 : }
4208 :
4209 29252 : if (need_to_lower_bitfields)
4210 : {
4211 259 : if (dump_file && (dump_flags & TDF_DETAILS))
4212 : {
4213 9 : fprintf (dump_file, "-------------------------\n");
4214 9 : fprintf (dump_file, "Start lowering bitfields\n");
4215 : }
4216 448 : while (!reads_to_lower.is_empty ())
4217 189 : lower_bitfield (reads_to_lower.pop (), false);
4218 672 : while (!writes_to_lower.is_empty ())
4219 413 : lower_bitfield (writes_to_lower.pop (), true);
4220 :
4221 259 : if (dump_file && (dump_flags & TDF_DETAILS))
4222 : {
4223 9 : fprintf (dump_file, "Done lowering bitfields\n");
4224 9 : fprintf (dump_file, "-------------------------\n");
4225 : }
4226 : }
4227 29252 : if (need_to_ifcvt)
4228 : {
4229 : /* Before we rewrite edges we'll record their original position in the
4230 : edge map such that we can map the edges between the ifcvt and the
4231 : non-ifcvt loop during peeling. */
4232 28995 : uintptr_t idx = 0;
4233 115980 : for (edge exit : get_loop_exit_edges (loop))
4234 28995 : exit->aux = (void*)idx++;
4235 :
4236 : /* Now all statements are if-convertible. Combine all the basic
4237 : blocks into one huge basic block doing the if-conversion
4238 : on-the-fly. */
4239 28995 : combine_blocks (loop, loop_versioned);
4240 : }
4241 :
4242 : std::pair <tree, tree> *name_pair;
4243 : unsigned ssa_names_idx;
4244 34377 : FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
4245 5125 : replace_uses_by (name_pair->first, name_pair->second);
4246 29252 : redundant_ssa_names.release ();
4247 :
4248 : /* Perform local CSE, this esp. helps the vectorizer analysis if loads
4249 : and stores are involved. CSE only the loop body, not the entry
4250 : PHIs, those are to be kept in sync with the non-if-converted copy.
4251 : ??? We'll still keep dead stores though. */
4252 29252 : exit_bbs = BITMAP_ALLOC (NULL);
4253 117186 : for (edge exit : get_loop_exit_edges (loop))
4254 29448 : bitmap_set_bit (exit_bbs, exit->dest->index);
4255 29252 : todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs,
4256 : false, true, true);
4257 :
4258 : /* Delete dead predicate computations. */
4259 29252 : ifcvt_local_dce (loop);
4260 29252 : BITMAP_FREE (exit_bbs);
4261 :
4262 29252 : ifcvt_hoist_invariants (loop, pe);
4263 :
4264 29252 : todo |= TODO_cleanup_cfg;
4265 :
4266 494027 : cleanup:
4267 494027 : data_reference_p dr;
4268 494027 : unsigned int i;
4269 1635985 : for (i = 0; refs.iterate (i, &dr); i++)
4270 : {
4271 1141958 : free (dr->aux);
4272 1141958 : free_data_ref (dr);
4273 : }
4274 494027 : refs.truncate (0);
4275 :
4276 494027 : if (ifc_bbs)
4277 : {
4278 : unsigned int i;
4279 :
4280 1947696 : for (i = 0; i < loop->num_nodes; i++)
4281 1579731 : free_bb_predicate (ifc_bbs[i]);
4282 :
4283 367965 : free (ifc_bbs);
4284 367965 : ifc_bbs = NULL;
4285 : }
4286 494027 : if (rloop != NULL)
4287 : {
4288 923 : loop = rloop;
4289 923 : reads_to_lower.truncate (0);
4290 923 : writes_to_lower.truncate (0);
4291 923 : goto again;
4292 : }
4293 :
4294 493104 : return todo;
4295 493104 : }
4296 :
4297 : /* Tree if-conversion pass management. */
4298 :
4299 : namespace {
4300 :
4301 : const pass_data pass_data_if_conversion =
4302 : {
4303 : GIMPLE_PASS, /* type */
4304 : "ifcvt", /* name */
4305 : OPTGROUP_NONE, /* optinfo_flags */
4306 : TV_TREE_LOOP_IFCVT, /* tv_id */
4307 : ( PROP_cfg | PROP_ssa ), /* properties_required */
4308 : 0, /* properties_provided */
4309 : 0, /* properties_destroyed */
4310 : 0, /* todo_flags_start */
4311 : 0, /* todo_flags_finish */
4312 : };
4313 :
4314 : class pass_if_conversion : public gimple_opt_pass
4315 : {
4316 : public:
4317 298828 : pass_if_conversion (gcc::context *ctxt)
4318 597656 : : gimple_opt_pass (pass_data_if_conversion, ctxt)
4319 : {}
4320 :
4321 : /* opt_pass methods: */
4322 : bool gate (function *) final override;
4323 : unsigned int execute (function *) final override;
4324 :
4325 : }; // class pass_if_conversion
4326 :
4327 : bool
4328 240924 : pass_if_conversion::gate (function *fun)
4329 : {
4330 35212 : return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
4331 207524 : && flag_tree_loop_if_convert != 0)
4332 274333 : || flag_tree_loop_if_convert == 1);
4333 : }
4334 :
4335 : unsigned int
4336 207536 : pass_if_conversion::execute (function *fun)
4337 : {
4338 207536 : unsigned todo = 0;
4339 :
4340 415072 : if (number_of_loops (fun) <= 1)
4341 : return 0;
4342 :
4343 207536 : auto_vec<gimple *> preds;
4344 1123077 : for (auto loop : loops_list (cfun, 0))
4345 500469 : if (flag_tree_loop_if_convert == 1
4346 500222 : || ((flag_tree_loop_vectorize || loop->force_vectorize)
4347 497464 : && !loop->dont_vectorize))
4348 493104 : todo |= tree_if_conversion (loop, &preds);
4349 :
4350 207536 : if (todo)
4351 : {
4352 18951 : free_numbers_of_iterations_estimates (fun);
4353 18951 : scev_reset ();
4354 : }
4355 :
4356 207536 : if (flag_checking)
4357 : {
4358 207532 : basic_block bb;
4359 6925009 : FOR_EACH_BB_FN (bb, fun)
4360 6717477 : gcc_assert (!bb->aux);
4361 : }
4362 :
4363 : /* Perform IL update now, it might elide some loops. */
4364 207536 : if (todo & TODO_cleanup_cfg)
4365 : {
4366 18951 : cleanup_tree_cfg ();
4367 18951 : if (need_ssa_update_p (fun))
4368 0 : todo |= TODO_update_ssa;
4369 : }
4370 207536 : if (todo & TODO_update_ssa_any)
4371 0 : update_ssa (todo & TODO_update_ssa_any);
4372 :
4373 : /* If if-conversion elided the loop fall back to the original one. Likewise
4374 : if the loops are not nested in the same outer loop. */
4375 236739 : for (unsigned i = 0; i < preds.length (); ++i)
4376 : {
4377 29203 : gimple *g = preds[i];
4378 29203 : if (!gimple_bb (g))
4379 0 : continue;
4380 29203 : auto ifcvt_loop = get_loop (fun, tree_to_uhwi (gimple_call_arg (g, 0)));
4381 29203 : auto orig_loop = get_loop (fun, tree_to_uhwi (gimple_call_arg (g, 1)));
4382 29203 : if (!ifcvt_loop || !orig_loop)
4383 : {
4384 2 : if (dump_file)
4385 0 : fprintf (dump_file, "If-converted loop vanished\n");
4386 2 : fold_loop_internal_call (g, boolean_false_node);
4387 : }
4388 29201 : else if (loop_outer (ifcvt_loop) != loop_outer (orig_loop))
4389 : {
4390 0 : if (dump_file)
4391 0 : fprintf (dump_file, "If-converted loop in different outer loop\n");
4392 0 : fold_loop_internal_call (g, boolean_false_node);
4393 : }
4394 : }
4395 :
4396 207536 : return 0;
4397 207536 : }
4398 :
4399 : } // anon namespace
4400 :
4401 : gimple_opt_pass *
4402 298828 : make_pass_if_conversion (gcc::context *ctxt)
4403 : {
4404 298828 : return new pass_if_conversion (ctxt);
4405 : }
|