Line data Source code
1 : /* If-conversion for vectorizer.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 : Contributed by Devang Patel <dpatel@apple.com>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : /* This pass implements a tree level if-conversion of loops. Its
22 : initial goal is to help the vectorizer to vectorize loops with
23 : conditions.
24 :
25 : A short description of if-conversion:
26 :
27 : o Decide if a loop is if-convertible or not.
28 : o Walk all loop basic blocks in breadth first order (BFS order).
29 : o Remove conditional statements (at the end of basic block)
30 : and propagate condition into destination basic blocks'
31 : predicate list.
32 : o Replace modify expression with conditional modify expression
33 : using current basic block's condition.
34 : o Merge all basic blocks
35 : o Replace phi nodes with conditional modify expr
36 : o Merge all basic blocks into header
37 :
38 : Sample transformation:
39 :
40 : INPUT
41 : -----
42 :
43 : # i_23 = PHI <0(0), i_18(10)>;
44 : <L0>:;
45 : j_15 = A[i_23];
46 : if (j_15 > 41) goto <L1>; else goto <L17>;
47 :
48 : <L17>:;
49 : goto <bb 3> (<L3>);
50 :
51 : <L1>:;
52 :
53 : # iftmp.2_4 = PHI <0(8), 42(2)>;
54 : <L3>:;
55 : A[i_23] = iftmp.2_4;
56 : i_18 = i_23 + 1;
57 : if (i_18 <= 15) goto <L19>; else goto <L18>;
58 :
59 : <L19>:;
60 : goto <bb 1> (<L0>);
61 :
62 : <L18>:;
63 :
64 : OUTPUT
65 : ------
66 :
67 : # i_23 = PHI <0(0), i_18(10)>;
68 : <L0>:;
69 : j_15 = A[i_23];
70 :
71 : <L3>:;
72 : iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 : A[i_23] = iftmp.2_4;
74 : i_18 = i_23 + 1;
75 : if (i_18 <= 15) goto <L19>; else goto <L18>;
76 :
77 : <L19>:;
78 : goto <bb 1> (<L0>);
79 :
80 : <L18>:;
81 : */
82 :
83 : #include "config.h"
84 : #include "system.h"
85 : #include "coretypes.h"
86 : #include "backend.h"
87 : #include "rtl.h"
88 : #include "tree.h"
89 : #include "gimple.h"
90 : #include "cfghooks.h"
91 : #include "tree-pass.h"
92 : #include "ssa.h"
93 : #include "expmed.h"
94 : #include "expr.h"
95 : #include "optabs-tree.h"
96 : #include "gimple-pretty-print.h"
97 : #include "alias.h"
98 : #include "fold-const.h"
99 : #include "stor-layout.h"
100 : #include "gimple-iterator.h"
101 : #include "gimple-fold.h"
102 : #include "gimplify.h"
103 : #include "gimplify-me.h"
104 : #include "tree-cfg.h"
105 : #include "tree-into-ssa.h"
106 : #include "tree-ssa.h"
107 : #include "cfgloop.h"
108 : #include "tree-data-ref.h"
109 : #include "tree-scalar-evolution.h"
110 : #include "tree-ssa-loop.h"
111 : #include "tree-ssa-loop-niter.h"
112 : #include "tree-ssa-loop-ivopts.h"
113 : #include "tree-ssa-address.h"
114 : #include "dbgcnt.h"
115 : #include "tree-hash-traits.h"
116 : #include "varasm.h"
117 : #include "builtins.h"
118 : #include "cfganal.h"
119 : #include "internal-fn.h"
120 : #include "fold-const.h"
121 : #include "tree-ssa-sccvn.h"
122 : #include "tree-cfgcleanup.h"
123 : #include "tree-ssa-dse.h"
124 : #include "tree-vectorizer.h"
125 : #include "tree-eh.h"
126 : #include "cgraph.h"
127 :
128 : /* For lang_hooks.types.type_for_mode. */
129 : #include "langhooks.h"
130 :
131 : /* Only handle PHIs with no more arguments unless we are asked to by
132 : simd pragma. */
133 : #define MAX_PHI_ARG_NUM \
134 : ((unsigned) param_max_tree_if_conversion_phi_args)
135 :
136 : /* True if we've converted a statement that was only executed when some
137 : condition C was true, and if for correctness we need to predicate the
138 : statement to ensure that it is a no-op when C is false. See
139 : predicate_statements for the kinds of predication we support. */
140 : static bool need_to_predicate;
141 :
142 : /* True if we have to rewrite stmts that may invoke undefined behavior
143 : when a condition C was false so it doesn't if it is always executed.
144 : See predicate_statements for the kinds of predication we support. */
145 : static bool need_to_rewrite_undefined;
146 :
147 : /* Indicate if there are any complicated PHIs that need to be handled in
148 : if-conversion. Complicated PHI has more than two arguments and can't
149 : be degenerated to two arguments PHI. See more information in comment
150 : before phi_convertible_by_degenerating_args. */
151 : static bool any_complicated_phi;
152 :
153 : /* True if we have bitfield accesses we can lower. */
154 : static bool need_to_lower_bitfields;
155 :
156 : /* True if there is any ifcvting to be done. */
157 : static bool need_to_ifcvt;
158 :
159 : /* Hash for struct innermost_loop_behavior. It depends on the user to
160 : free the memory. */
161 :
162 : struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
163 : {
164 : static inline hashval_t hash (const value_type &);
165 : static inline bool equal (const value_type &,
166 : const compare_type &);
167 : };
168 :
169 : inline hashval_t
170 449209 : innermost_loop_behavior_hash::hash (const value_type &e)
171 : {
172 449209 : hashval_t hash;
173 :
174 449209 : hash = iterative_hash_expr (e->base_address, 0);
175 449209 : hash = iterative_hash_expr (e->offset, hash);
176 449209 : hash = iterative_hash_expr (e->init, hash);
177 449209 : return iterative_hash_expr (e->step, hash);
178 : }
179 :
180 : inline bool
181 321254 : innermost_loop_behavior_hash::equal (const value_type &e1,
182 : const compare_type &e2)
183 : {
184 321254 : if ((e1->base_address && !e2->base_address)
185 321254 : || (!e1->base_address && e2->base_address)
186 321254 : || (!e1->offset && e2->offset)
187 295629 : || (e1->offset && !e2->offset)
188 257809 : || (!e1->init && e2->init)
189 257809 : || (e1->init && !e2->init)
190 257809 : || (!e1->step && e2->step)
191 257809 : || (e1->step && !e2->step))
192 : return false;
193 :
194 257809 : if (e1->base_address && e2->base_address
195 515618 : && !operand_equal_p (e1->base_address, e2->base_address, 0))
196 : return false;
197 44989 : if (e1->offset && e2->offset
198 128451 : && !operand_equal_p (e1->offset, e2->offset, 0))
199 : return false;
200 44768 : if (e1->init && e2->init
201 128009 : && !operand_equal_p (e1->init, e2->init, 0))
202 : return false;
203 17640 : if (e1->step && e2->step
204 73753 : && !operand_equal_p (e1->step, e2->step, 0))
205 : return false;
206 :
207 : return true;
208 : }
209 :
210 : /* List of basic blocks in if-conversion-suitable order. */
211 : static basic_block *ifc_bbs;
212 :
213 : /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
214 : static hash_map<innermost_loop_behavior_hash,
215 : data_reference_p> *innermost_DR_map;
216 :
217 : /* Hash table to store <base reference, DR> pairs. */
218 : static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
219 :
220 : /* List of redundant SSA names: the first should be replaced by the second. */
221 : static vec< std::pair<tree, tree> > redundant_ssa_names;
222 :
223 : /* Structure used to predicate basic blocks. This is attached to the
224 : ->aux field of the BBs in the loop to be if-converted. */
225 : struct bb_predicate {
226 :
227 : /* The condition under which this basic block is executed. */
228 : tree predicate;
229 :
230 : /* PREDICATE is gimplified, and the sequence of statements is
231 : recorded here, in order to avoid the duplication of computations
232 : that occur in previous conditions. See PR44483. */
233 : gimple_seq predicate_gimplified_stmts;
234 :
235 : /* Records the number of statements recorded into
236 : PREDICATE_GIMPLIFIED_STMTS. */
237 : unsigned no_predicate_stmts;
238 : };
239 :
240 : /* Returns true when the basic block BB has a predicate. */
241 :
242 : static inline bool
243 1976203 : bb_has_predicate (basic_block bb)
244 : {
245 1976203 : return bb->aux != NULL;
246 : }
247 :
248 : /* Returns the gimplified predicate for basic block BB. */
249 :
250 : static inline tree
251 992351 : bb_predicate (basic_block bb)
252 : {
253 992351 : return ((struct bb_predicate *) bb->aux)->predicate;
254 : }
255 :
256 : /* Sets the gimplified predicate COND for basic block BB. */
257 :
258 : static inline void
259 533381 : set_bb_predicate (basic_block bb, tree cond)
260 : {
261 533381 : auto aux = (struct bb_predicate *) bb->aux;
262 533381 : gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
263 : && is_gimple_val (TREE_OPERAND (cond, 0)))
264 : || is_gimple_val (cond));
265 533381 : aux->predicate = cond;
266 533381 : aux->no_predicate_stmts++;
267 :
268 533381 : if (dump_file && (dump_flags & TDF_DETAILS))
269 263 : fprintf (dump_file, "Recording block %d value %d\n", bb->index,
270 : aux->no_predicate_stmts);
271 533381 : }
272 :
273 : /* Returns the sequence of statements of the gimplification of the
274 : predicate for basic block BB. */
275 :
276 : static inline gimple_seq
277 664083 : bb_predicate_gimplified_stmts (basic_block bb)
278 : {
279 664083 : return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
280 : }
281 :
282 : /* Sets the sequence of statements STMTS of the gimplification of the
283 : predicate for basic block BB. If PRESERVE_COUNTS then don't clear the predicate
284 : counts. */
285 :
286 : static inline void
287 312368 : set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts,
288 : bool preserve_counts)
289 : {
290 312368 : ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
291 312368 : if (stmts == NULL && !preserve_counts)
292 255487 : ((struct bb_predicate *) bb->aux)->no_predicate_stmts = 0;
293 91500 : }
294 :
295 : /* Adds the sequence of statements STMTS to the sequence of statements
296 : of the predicate for basic block BB. */
297 :
298 : static inline void
299 93686 : add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
300 : {
301 : /* We might have updated some stmts in STMTS via force_gimple_operand
302 : calling fold_stmt and that producing multiple stmts. Delink immediate
303 : uses so update_ssa after loop versioning doesn't get confused for
304 : the not yet inserted predicates.
305 : ??? This should go away once we reliably avoid updating stmts
306 : not in any BB. */
307 93686 : for (gimple_stmt_iterator gsi = gsi_start (stmts);
308 226510 : !gsi_end_p (gsi); gsi_next (&gsi))
309 : {
310 132824 : gimple *stmt = gsi_stmt (gsi);
311 132824 : delink_stmt_imm_use (stmt);
312 132824 : gimple_set_modified (stmt, true);
313 132824 : ((struct bb_predicate *) bb->aux)->no_predicate_stmts++;
314 : }
315 93686 : gimple_seq_add_seq_without_update
316 93686 : (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
317 93686 : }
318 :
319 : /* Return the number of statements the predicate of the basic block consists
320 : of. */
321 :
322 : static inline unsigned
323 16282 : get_bb_num_predicate_stmts (basic_block bb)
324 : {
325 16282 : return ((struct bb_predicate *) bb->aux)->no_predicate_stmts;
326 : }
327 :
328 : /* Initializes to TRUE the predicate of basic block BB. */
329 :
330 : static inline void
331 220868 : init_bb_predicate (basic_block bb)
332 : {
333 220868 : bb->aux = XNEW (struct bb_predicate);
334 220868 : set_bb_predicate_gimplified_stmts (bb, NULL, false);
335 220868 : set_bb_predicate (bb, boolean_true_node);
336 220868 : }
337 :
338 : /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
339 :
340 : static inline void
341 434221 : release_bb_predicate (basic_block bb)
342 : {
343 434221 : gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
344 434221 : if (stmts)
345 : {
346 : /* Ensure that these stmts haven't yet been added to a bb. */
347 34619 : if (flag_checking)
348 : for (gimple_stmt_iterator i = gsi_start (stmts);
349 95084 : !gsi_end_p (i); gsi_next (&i))
350 60465 : gcc_assert (! gimple_bb (gsi_stmt (i)));
351 :
352 : /* Discard them. */
353 34619 : gimple_seq_discard (stmts);
354 34619 : set_bb_predicate_gimplified_stmts (bb, NULL, false);
355 : }
356 434221 : }
357 :
358 : /* Free the predicate of basic block BB. */
359 :
360 : static inline void
361 1762850 : free_bb_predicate (basic_block bb)
362 : {
363 1762850 : if (!bb_has_predicate (bb))
364 : return;
365 :
366 220868 : release_bb_predicate (bb);
367 220868 : free (bb->aux);
368 220868 : bb->aux = NULL;
369 : }
370 :
371 : /* Reinitialize predicate of BB with the true predicate. */
372 :
373 : static inline void
374 213353 : reset_bb_predicate (basic_block bb)
375 : {
376 213353 : if (!bb_has_predicate (bb))
377 0 : init_bb_predicate (bb);
378 : else
379 : {
380 213353 : release_bb_predicate (bb);
381 213353 : set_bb_predicate (bb, boolean_true_node);
382 : }
383 213353 : }
384 :
385 : /* Returns a new SSA_NAME of type TYPE that is assigned the value of
386 : the expression EXPR. Inserts the statement created for this
387 : computation before GSI and leaves the iterator GSI at the same
388 : statement. */
389 :
390 : static tree
391 5880 : ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
392 : {
393 5880 : tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
394 5880 : gimple *stmt = gimple_build_assign (new_name, expr);
395 11760 : gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
396 5880 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
397 5880 : return new_name;
398 : }
399 :
400 : /* Return true when COND is a false predicate. */
401 :
402 : static inline bool
403 87409 : is_false_predicate (tree cond)
404 : {
405 87409 : return (cond != NULL_TREE
406 87409 : && (cond == boolean_false_node
407 87409 : || integer_zerop (cond)));
408 : }
409 :
410 : /* Return true when COND is a true predicate. */
411 :
412 : static inline bool
413 1137158 : is_true_predicate (tree cond)
414 : {
415 1137158 : return (cond == NULL_TREE
416 1137158 : || cond == boolean_true_node
417 1623936 : || integer_onep (cond));
418 : }
419 :
420 : /* Returns true when BB has a predicate that is not trivial: true or
421 : NULL_TREE. */
422 :
423 : static inline bool
424 552917 : is_predicated (basic_block bb)
425 : {
426 12386 : return !is_true_predicate (bb_predicate (bb));
427 : }
428 :
429 : /* Parses the predicate COND and returns its comparison code and
430 : operands OP0 and OP1. */
431 :
432 : static enum tree_code
433 476186 : parse_predicate (tree cond, tree *op0, tree *op1)
434 : {
435 476186 : gimple *s;
436 :
437 476186 : if (TREE_CODE (cond) == SSA_NAME
438 476186 : && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
439 : {
440 65978 : if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
441 : {
442 34986 : *op0 = gimple_assign_rhs1 (s);
443 34986 : *op1 = gimple_assign_rhs2 (s);
444 34986 : return gimple_assign_rhs_code (s);
445 : }
446 :
447 30992 : else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
448 : {
449 0 : tree op = gimple_assign_rhs1 (s);
450 0 : tree type = TREE_TYPE (op);
451 0 : enum tree_code code = parse_predicate (op, op0, op1);
452 :
453 0 : return code == ERROR_MARK ? ERROR_MARK
454 0 : : invert_tree_comparison (code, HONOR_NANS (type));
455 : }
456 :
457 : return ERROR_MARK;
458 : }
459 :
460 410208 : if (COMPARISON_CLASS_P (cond))
461 : {
462 459 : *op0 = TREE_OPERAND (cond, 0);
463 459 : *op1 = TREE_OPERAND (cond, 1);
464 459 : return TREE_CODE (cond);
465 : }
466 :
467 : return ERROR_MARK;
468 : }
469 :
470 : /* Returns the fold of predicate C1 OR C2 at location LOC. */
471 :
472 : static tree
473 238093 : fold_or_predicates (location_t loc, tree c1, tree c2)
474 : {
475 238093 : tree op1a, op1b, op2a, op2b;
476 238093 : enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
477 238093 : enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
478 :
479 238093 : if (code1 != ERROR_MARK && code2 != ERROR_MARK)
480 : {
481 2213 : tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
482 : code2, op2a, op2b);
483 2213 : if (t)
484 : return t;
485 : }
486 :
487 235975 : return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
488 : }
489 :
490 : /* Returns either a COND_EXPR or the folded expression if the folded
491 : expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
492 : a constant or a SSA_NAME. */
493 :
494 : static tree
495 54575 : fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
496 : {
497 : /* Short cut the case where both rhs and lhs are the same. */
498 54575 : if (operand_equal_p (rhs, lhs))
499 : return rhs;
500 :
501 : /* If COND is comparison r != 0 and r has boolean type, convert COND
502 : to SSA_NAME to accept by vect bool pattern. */
503 54575 : if (TREE_CODE (cond) == NE_EXPR)
504 : {
505 0 : tree op0 = TREE_OPERAND (cond, 0);
506 0 : tree op1 = TREE_OPERAND (cond, 1);
507 0 : if (TREE_CODE (op0) == SSA_NAME
508 0 : && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
509 0 : && (integer_zerop (op1)))
510 : cond = op0;
511 : }
512 :
513 54575 : gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
514 54575 : type, cond, rhs, lhs);
515 54575 : if (cexpr.resimplify (NULL, follow_all_ssa_edges))
516 : {
517 10630 : if (gimple_simplified_result_is_gimple_val (&cexpr))
518 553 : return cexpr.ops[0];
519 10077 : else if (cexpr.code == ABS_EXPR)
520 2 : return build1 (ABS_EXPR, type, cexpr.ops[0]);
521 10075 : else if (cexpr.code == MIN_EXPR
522 10075 : || cexpr.code == MAX_EXPR)
523 7307 : return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
524 : }
525 :
526 46713 : return build3 (COND_EXPR, type, cond, rhs, lhs);
527 : }
528 :
529 : /* Add condition NC to the predicate list of basic block BB. LOOP is
530 : the loop to be if-converted. Use predicate of cd-equivalent block
531 : for join bb if it exists: we call basic blocks bb1 and bb2
532 : cd-equivalent if they are executed under the same condition. */
533 :
534 : static inline void
535 173063 : add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
536 : {
537 173063 : tree bc, *tp;
538 173063 : basic_block dom_bb;
539 :
540 173063 : if (is_true_predicate (nc))
541 77606 : return;
542 :
543 : /* If dominance tells us this basic block is always executed,
544 : don't record any predicates for it. */
545 173061 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
546 : return;
547 :
548 99160 : dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
549 : /* We use notion of cd equivalence to get simpler predicate for
550 : join block, e.g. if join block has 2 predecessors with predicates
551 : p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
552 : p1 & p2 | p1 & !p2. */
553 99160 : if (dom_bb != loop->header
554 99160 : && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
555 : {
556 3703 : gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
557 3703 : bc = bb_predicate (dom_bb);
558 3703 : if (!is_true_predicate (bc))
559 3703 : set_bb_predicate (bb, bc);
560 : else
561 0 : gcc_assert (is_true_predicate (bb_predicate (bb)));
562 3703 : if (dump_file && (dump_flags & TDF_DETAILS))
563 4 : fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
564 : dom_bb->index, bb->index);
565 3703 : return;
566 : }
567 :
568 95457 : if (!is_predicated (bb))
569 91500 : bc = nc;
570 : else
571 : {
572 3957 : bc = bb_predicate (bb);
573 3957 : bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
574 3957 : if (is_true_predicate (bc))
575 : {
576 0 : reset_bb_predicate (bb);
577 0 : return;
578 : }
579 : }
580 :
581 : /* Allow a TRUTH_NOT_EXPR around the main predicate. */
582 95457 : if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
583 33134 : tp = &TREE_OPERAND (bc, 0);
584 : else
585 : tp = &bc;
586 95457 : if (!is_gimple_val (*tp))
587 : {
588 93686 : gimple_seq stmts;
589 93686 : *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
590 93686 : add_bb_predicate_gimplified_stmts (bb, stmts);
591 : }
592 95457 : set_bb_predicate (bb, bc);
593 : }
594 :
595 : /* Add the condition COND to the previous condition PREV_COND, and add
596 : this to the predicate list of the destination of edge E. LOOP is
597 : the loop to be if-converted. */
598 :
599 : static void
600 114362 : add_to_dst_predicate_list (class loop *loop, edge e,
601 : tree prev_cond, tree cond)
602 : {
603 114362 : if (!flow_bb_inside_loop_p (loop, e->dest))
604 : return;
605 :
606 114362 : if (!is_true_predicate (prev_cond))
607 22276 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
608 : prev_cond, cond);
609 :
610 114362 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
611 90880 : add_to_predicate_list (loop, e->dest, cond);
612 : }
613 :
614 : /* Return true if one of the successor edges of BB exits LOOP. */
615 :
616 : static bool
617 3465507 : bb_with_exit_edge_p (const class loop *loop, basic_block bb)
618 : {
619 3465507 : edge e;
620 3465507 : edge_iterator ei;
621 :
622 7010569 : FOR_EACH_EDGE (e, ei, bb->succs)
623 4922976 : if (loop_exit_edge_p (loop, e))
624 : return true;
625 :
626 : return false;
627 : }
628 :
629 : /* Given PHI which has more than two arguments, this function checks if
630 : it's if-convertible by degenerating its arguments. Specifically, if
631 : below two conditions are satisfied:
632 :
633 : 1) Number of PHI arguments with different values equals to 2 and one
634 : argument has the only occurrence.
635 : 2) The edge corresponding to the unique argument isn't critical edge.
636 :
637 : Such PHI can be handled as PHIs have only two arguments. For example,
638 : below PHI:
639 :
640 : res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
641 :
642 : can be transformed into:
643 :
644 : res = (predicate of e3) ? A_2 : A_1;
645 :
646 : Return TRUE if it is the case, FALSE otherwise. */
647 :
648 : static bool
649 5527 : phi_convertible_by_degenerating_args (gphi *phi)
650 : {
651 5527 : edge e;
652 5527 : tree arg, t1 = NULL, t2 = NULL;
653 5527 : unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
654 5527 : unsigned int num_args = gimple_phi_num_args (phi);
655 :
656 5527 : gcc_assert (num_args > 2);
657 :
658 20083 : for (i = 0; i < num_args; i++)
659 : {
660 16761 : arg = gimple_phi_arg_def (phi, i);
661 16761 : if (t1 == NULL || operand_equal_p (t1, arg, 0))
662 : {
663 7551 : n1++;
664 7551 : i1 = i;
665 7551 : t1 = arg;
666 : }
667 9210 : else if (t2 == NULL || operand_equal_p (t2, arg, 0))
668 : {
669 7005 : n2++;
670 7005 : i2 = i;
671 7005 : t2 = arg;
672 : }
673 : else
674 : return false;
675 : }
676 :
677 3322 : if (n1 != 1 && n2 != 1)
678 : return false;
679 :
680 : /* Check if the edge corresponding to the unique arg is critical. */
681 3254 : e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
682 3254 : if (EDGE_COUNT (e->src->succs) > 1)
683 : return false;
684 :
685 : return true;
686 : }
687 :
688 : /* Return true when PHI is if-convertible. PHI is part of loop LOOP
689 : and it belongs to basic block BB. Note at this point, it is sure
690 : that PHI is if-convertible. This function updates global variable
691 : ANY_COMPLICATED_PHI if PHI is complicated. */
692 :
693 : static bool
694 138420 : if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
695 : {
696 138420 : if (dump_file && (dump_flags & TDF_DETAILS))
697 : {
698 79 : fprintf (dump_file, "-------------------------\n");
699 79 : print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
700 : }
701 :
702 138420 : if (bb != loop->header
703 54619 : && gimple_phi_num_args (phi) > 2
704 143947 : && !phi_convertible_by_degenerating_args (phi))
705 2273 : any_complicated_phi = true;
706 :
707 138420 : return true;
708 : }
709 :
710 : /* Records the status of a data reference. This struct is attached to
711 : each DR->aux field. */
712 :
713 : struct ifc_dr {
714 : bool rw_unconditionally;
715 : bool w_unconditionally;
716 : bool written_at_least_once;
717 :
718 : tree rw_predicate;
719 : tree w_predicate;
720 : tree base_w_predicate;
721 : };
722 :
723 : #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
724 : #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
725 : #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
726 : #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
727 :
728 : /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
729 : HASH tables. While storing them in HASH table, it checks if the
730 : reference is unconditionally read or written and stores that as a flag
731 : information. For base reference it checks if it is written atlest once
732 : unconditionally and stores it as flag information along with DR.
733 : In other words for every data reference A in STMT there exist other
734 : accesses to a data reference with the same base with predicates that
735 : add up (OR-up) to the true predicate: this ensures that the data
736 : reference A is touched (read or written) on every iteration of the
737 : if-converted loop. */
738 : static void
739 140314 : hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
740 : {
741 :
742 140314 : data_reference_p *master_dr, *base_master_dr;
743 140314 : tree base_ref = DR_BASE_OBJECT (a);
744 140314 : innermost_loop_behavior *innermost = &DR_INNERMOST (a);
745 140314 : tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
746 140314 : bool exist1, exist2;
747 :
748 140314 : master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
749 140314 : if (!exist1)
750 104593 : *master_dr = a;
751 :
752 140314 : if (DR_IS_WRITE (a))
753 : {
754 46875 : IFC_DR (*master_dr)->w_predicate
755 93750 : = fold_or_predicates (UNKNOWN_LOCATION, ca,
756 46875 : IFC_DR (*master_dr)->w_predicate);
757 46875 : if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
758 30787 : DR_W_UNCONDITIONALLY (*master_dr) = true;
759 : }
760 140314 : IFC_DR (*master_dr)->rw_predicate
761 280628 : = fold_or_predicates (UNKNOWN_LOCATION, ca,
762 140314 : IFC_DR (*master_dr)->rw_predicate);
763 140314 : if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
764 104216 : DR_RW_UNCONDITIONALLY (*master_dr) = true;
765 :
766 140314 : if (DR_IS_WRITE (a))
767 : {
768 46875 : base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
769 46875 : if (!exist2)
770 35904 : *base_master_dr = a;
771 46875 : IFC_DR (*base_master_dr)->base_w_predicate
772 93750 : = fold_or_predicates (UNKNOWN_LOCATION, ca,
773 46875 : IFC_DR (*base_master_dr)->base_w_predicate);
774 46875 : if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
775 31021 : DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
776 : }
777 140314 : }
778 :
779 : /* Return TRUE if can prove the index IDX of an array reference REF is
780 : within array bound. Return false otherwise. */
781 :
782 : static bool
783 242837 : idx_within_array_bound (tree ref, tree *idx, void *dta)
784 : {
785 242837 : wi::overflow_type overflow;
786 242837 : widest_int niter, valid_niter, delta, wi_step;
787 242837 : tree ev, init, step;
788 242837 : tree low, high;
789 242837 : class loop *loop = (class loop*) dta;
790 :
791 : /* Only support within-bound access for array references. */
792 242837 : if (TREE_CODE (ref) != ARRAY_REF)
793 : return false;
794 :
795 : /* For arrays that might have flexible sizes, it is not guaranteed that they
796 : do not extend over their declared size. */
797 138612 : if (array_ref_flexible_size_p (ref))
798 : return false;
799 :
800 86680 : ev = analyze_scalar_evolution (loop, *idx);
801 86680 : ev = instantiate_parameters (loop, ev);
802 86680 : init = initial_condition (ev);
803 86680 : step = evolution_part_in_loop_num (ev, loop->num);
804 :
805 86680 : if (!init || TREE_CODE (init) != INTEGER_CST
806 76317 : || (step && TREE_CODE (step) != INTEGER_CST))
807 : return false;
808 :
809 76311 : low = array_ref_low_bound (ref);
810 76311 : high = array_ref_up_bound (ref);
811 :
812 : /* The case of nonconstant bounds could be handled, but it would be
813 : complicated. */
814 76311 : if (TREE_CODE (low) != INTEGER_CST
815 76311 : || !high || TREE_CODE (high) != INTEGER_CST)
816 : return false;
817 :
818 : /* Check if the intial idx is within bound. */
819 76243 : if (wi::to_widest (init) < wi::to_widest (low)
820 152478 : || wi::to_widest (init) > wi::to_widest (high))
821 15 : return false;
822 :
823 : /* The idx is always within bound. */
824 76228 : if (!step || integer_zerop (step))
825 1964 : return true;
826 :
827 74264 : if (!max_loop_iterations (loop, &niter))
828 : return false;
829 :
830 74264 : if (wi::to_widest (step) < 0)
831 : {
832 303 : delta = wi::to_widest (init) - wi::to_widest (low);
833 303 : wi_step = -wi::to_widest (step);
834 : }
835 : else
836 : {
837 73961 : delta = wi::to_widest (high) - wi::to_widest (init);
838 73961 : wi_step = wi::to_widest (step);
839 : }
840 :
841 74264 : valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
842 : /* The iteration space of idx is within array bound. */
843 148528 : if (!overflow && niter <= valid_niter)
844 : return true;
845 :
846 : return false;
847 242837 : }
848 :
849 : /* Return TRUE if ref is a within bound array reference. */
850 :
851 : bool
852 237423 : ref_within_array_bound (gimple *stmt, tree ref)
853 : {
854 237423 : class loop *loop = loop_containing_stmt (stmt);
855 :
856 237423 : gcc_assert (loop != NULL);
857 237423 : return for_each_index (&ref, idx_within_array_bound, loop);
858 : }
859 :
860 :
861 : /* Given a memory reference expression T, return TRUE if base object
862 : it refers to is writable. The base object of a memory reference
863 : is the main object being referenced, which is returned by function
864 : get_base_address. */
865 :
866 : static bool
867 2505 : base_object_writable (tree ref)
868 : {
869 2505 : tree base_tree = get_base_address (ref);
870 :
871 2505 : return (base_tree
872 2505 : && DECL_P (base_tree)
873 1659 : && decl_binds_to_current_def_p (base_tree)
874 4160 : && !TREE_READONLY (base_tree));
875 : }
876 :
877 : /* Return true when the memory references of STMT won't trap in the
878 : if-converted code. There are two things that we have to check for:
879 :
880 : - writes to memory occur to writable memory: if-conversion of
881 : memory writes transforms the conditional memory writes into
882 : unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
883 : into "A[i] = cond ? foo : A[i]", and as the write to memory may not
884 : be executed at all in the original code, it may be a readonly
885 : memory. To check that A is not const-qualified, we check that
886 : there exists at least an unconditional write to A in the current
887 : function.
888 :
889 : - reads or writes to memory are valid memory accesses for every
890 : iteration. To check that the memory accesses are correctly formed
891 : and that we are allowed to read and write in these locations, we
892 : check that the memory accesses to be if-converted occur at every
893 : iteration unconditionally.
894 :
895 : Returns true for the memory reference in STMT, same memory reference
896 : is read or written unconditionally atleast once and the base memory
897 : reference is written unconditionally once. This is to check reference
898 : will not write fault. Also retuns true if the memory reference is
899 : unconditionally read once then we are conditionally writing to memory
900 : which is defined as read and write and is bound to the definition
901 : we are seeing. */
902 : static bool
903 20213 : ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
904 : {
905 : /* If DR didn't see a reference here we can't use it to tell
906 : whether the ref traps or not. */
907 20213 : if (gimple_uid (stmt) == 0)
908 : return false;
909 :
910 20212 : data_reference_p *master_dr, *base_master_dr;
911 20212 : data_reference_p a = drs[gimple_uid (stmt) - 1];
912 :
913 20212 : tree base = DR_BASE_OBJECT (a);
914 20212 : innermost_loop_behavior *innermost = &DR_INNERMOST (a);
915 :
916 20212 : gcc_assert (DR_STMT (a) == stmt);
917 20212 : gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
918 : || DR_INIT (a) || DR_STEP (a));
919 :
920 20212 : master_dr = innermost_DR_map->get (innermost);
921 20212 : gcc_assert (master_dr != NULL);
922 :
923 20212 : base_master_dr = baseref_DR_map->get (base);
924 :
925 : /* If a is unconditionally written to it doesn't trap. */
926 20212 : if (DR_W_UNCONDITIONALLY (*master_dr))
927 : return true;
928 :
929 : /* If a is unconditionally accessed then ...
930 :
931 : Even a is conditional access, we can treat it as an unconditional
932 : one if it's an array reference and all its index are within array
933 : bound. */
934 18341 : if (DR_RW_UNCONDITIONALLY (*master_dr)
935 18341 : || ref_within_array_bound (stmt, DR_REF (a)))
936 : {
937 : /* an unconditional read won't trap. */
938 6623 : if (DR_IS_READ (a))
939 : return true;
940 :
941 : /* an unconditionaly write won't trap if the base is written
942 : to unconditionally. */
943 2542 : if ((base_master_dr
944 2542 : && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
945 : /* or the base is known to be not readonly. */
946 5047 : || base_object_writable (DR_REF (a)))
947 1692 : return !ref_can_have_store_data_races (base);
948 : }
949 :
950 : return false;
951 : }
952 :
953 : /* Return true if STMT could be converted into a masked load or store
954 : (conditional load or store based on a mask computed from bb predicate). */
955 :
956 : static bool
957 12027 : ifcvt_can_use_mask_load_store (gimple *stmt)
958 : {
959 : /* Check whether this is a load or store. */
960 12027 : tree lhs = gimple_assign_lhs (stmt);
961 12027 : bool is_load;
962 12027 : tree ref;
963 12027 : if (gimple_store_p (stmt))
964 : {
965 2709 : if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
966 : return false;
967 : is_load = false;
968 : ref = lhs;
969 : }
970 9318 : else if (gimple_assign_load_p (stmt))
971 : {
972 9317 : is_load = true;
973 9317 : ref = gimple_assign_rhs1 (stmt);
974 : }
975 : else
976 : return false;
977 :
978 12026 : if (may_be_nonaddressable_p (ref))
979 : return false;
980 :
981 : /* Mask should be integer mode of the same size as the load/store
982 : mode. */
983 11967 : machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
984 11967 : if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
985 31 : return false;
986 :
987 11936 : if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
988 : return true;
989 :
990 : return false;
991 : }
992 :
993 : /* Return true if STMT could be converted from an operation that is
994 : unconditional to one that is conditional on a bb predicate mask. */
995 :
996 : static bool
997 14026 : ifcvt_can_predicate (gimple *stmt)
998 : {
999 14026 : basic_block bb = gimple_bb (stmt);
1000 :
1001 320 : if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
1002 14026 : || bb->loop_father->dont_vectorize
1003 28052 : || gimple_has_volatile_ops (stmt))
1004 : return false;
1005 :
1006 14026 : if (gimple_assign_single_p (stmt))
1007 12027 : return ifcvt_can_use_mask_load_store (stmt);
1008 :
1009 1999 : tree callee;
1010 1999 : if (gimple_call_builtin_p (stmt))
1011 155 : if ((callee = gimple_call_fndecl (stmt))
1012 155 : && fndecl_built_in_p (callee, BUILT_IN_NORMAL))
1013 : {
1014 149 : auto ifn = associated_internal_fn (callee);
1015 149 : auto cond_ifn = get_conditional_internal_fn (ifn);
1016 149 : tree type = TREE_TYPE (gimple_call_fntype (stmt));
1017 149 : return (cond_ifn != IFN_LAST
1018 149 : && vectorized_internal_fn_supported_p (cond_ifn, type));
1019 : }
1020 :
1021 1850 : if (!is_gimple_assign (stmt))
1022 : return false;
1023 :
1024 1844 : tree_code code = gimple_assign_rhs_code (stmt);
1025 1844 : tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
1026 1844 : tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
1027 1844 : if (!types_compatible_p (lhs_type, rhs_type))
1028 : return false;
1029 1102 : internal_fn cond_fn = get_conditional_internal_fn (code);
1030 1102 : return (cond_fn != IFN_LAST
1031 1102 : && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
1032 : }
1033 :
1034 : /* Return true when STMT is if-convertible.
1035 :
1036 : GIMPLE_ASSIGN statement is not if-convertible if,
1037 : - it is not movable,
1038 : - it could trap,
1039 : - LHS is not var decl. */
1040 :
1041 : static bool
1042 76183 : if_convertible_gimple_assign_stmt_p (gimple *stmt,
1043 : vec<data_reference_p> refs)
1044 : {
1045 76183 : tree lhs = gimple_assign_lhs (stmt);
1046 :
1047 76183 : if (dump_file && (dump_flags & TDF_DETAILS))
1048 : {
1049 42 : fprintf (dump_file, "-------------------------\n");
1050 42 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1051 : }
1052 :
1053 76183 : if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1054 : return false;
1055 :
1056 : /* Some of these constrains might be too conservative. */
1057 75649 : if (stmt_ends_bb_p (stmt)
1058 75649 : || gimple_has_volatile_ops (stmt)
1059 75574 : || (TREE_CODE (lhs) == SSA_NAME
1060 71151 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1061 151223 : || gimple_has_side_effects (stmt))
1062 : {
1063 75 : if (dump_file && (dump_flags & TDF_DETAILS))
1064 0 : fprintf (dump_file, "stmt not suitable for ifcvt\n");
1065 75 : return false;
1066 : }
1067 :
1068 : /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1069 : in between if_convertible_loop_p and combine_blocks
1070 : we can perform loop versioning. */
1071 75574 : gimple_set_plf (stmt, GF_PLF_2, false);
1072 :
1073 75574 : if ((! gimple_vuse (stmt)
1074 20213 : || gimple_could_trap_p_1 (stmt, false, false)
1075 20213 : || ! ifcvt_memrefs_wont_trap (stmt, refs))
1076 88581 : && gimple_could_trap_p (stmt))
1077 : {
1078 13871 : if (ifcvt_can_predicate (stmt))
1079 : {
1080 2777 : gimple_set_plf (stmt, GF_PLF_2, true);
1081 2777 : need_to_predicate = true;
1082 2777 : return true;
1083 : }
1084 11094 : if (dump_file && (dump_flags & TDF_DETAILS))
1085 0 : fprintf (dump_file, "tree could trap...\n");
1086 11094 : return false;
1087 : }
1088 61703 : else if (gimple_needing_rewrite_undefined (stmt))
1089 : /* We have to rewrite stmts with undefined overflow. */
1090 28035 : need_to_rewrite_undefined = true;
1091 :
1092 : /* When if-converting stores force versioning, likewise if we
1093 : ended up generating store data races. */
1094 123406 : if (gimple_vdef (stmt))
1095 1714 : need_to_predicate = true;
1096 :
1097 : return true;
1098 : }
1099 :
1100 : /* Return true when SW switch statement is equivalent to cond, that
1101 : all non default labels point to the same label.
1102 :
1103 : Fallthrough is not checked for and could even happen
1104 : with cond (using goto), so is handled.
1105 :
1106 : This is intended for switches created by the if-switch-conversion
1107 : pass, but can handle some programmer supplied cases too. */
1108 :
1109 : static bool
1110 14 : if_convertible_switch_p (gswitch *sw)
1111 : {
1112 14 : if (gimple_switch_num_labels (sw) <= 1)
1113 : return false;
1114 14 : tree label = CASE_LABEL (gimple_switch_label (sw, 1));
1115 43 : for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++)
1116 : {
1117 29 : if (CASE_LABEL (gimple_switch_label (sw, i)) != label)
1118 : return false;
1119 : }
1120 : return true;
1121 : }
1122 :
1123 : /* Return true when STMT is an if-convertible SIMD clone stmts.
1124 :
1125 : A SIMD clone statement is if-convertible if:
1126 : - it is an GIMPLE_CALL,
1127 : - it has a FNDECL,
1128 : - it has SIMD clones,
1129 : - it has at least one inbranch clone. */
1130 : static bool
1131 1565 : if_convertible_simdclone_stmt_p (gimple *stmt)
1132 : {
1133 1565 : if (!is_gimple_call (stmt))
1134 : return false;
1135 :
1136 1565 : tree fndecl = gimple_call_fndecl (stmt);
1137 1565 : if (fndecl)
1138 : {
1139 : /* We can vectorize some builtins and functions with SIMD "inbranch"
1140 : clones. */
1141 1323 : struct cgraph_node *node = cgraph_node::get (fndecl);
1142 1323 : if (node && node->simd_clones != NULL)
1143 : /* Ensure that at least one clone can be "inbranch". */
1144 1962 : for (struct cgraph_node *n = node->simd_clones; n != NULL;
1145 961 : n = n->simdclone->next_clone)
1146 1960 : if (n->simdclone->inbranch)
1147 : return true;
1148 : }
1149 :
1150 : return false;
1151 : }
1152 :
1153 : /* Return true when STMT is if-convertible.
1154 :
1155 : A statement is if-convertible if:
1156 : - it is an if-convertible GIMPLE_ASSIGN,
1157 : - it is a GIMPLE_LABEL or a GIMPLE_COND,
1158 : - it is a switch equivalent to COND
1159 : - it is builtins call,
1160 : - it is a call to a function with a SIMD clone. */
1161 :
1162 : static bool
1163 163857 : if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1164 : {
1165 163857 : switch (gimple_code (stmt))
1166 : {
1167 : case GIMPLE_LABEL:
1168 : case GIMPLE_DEBUG:
1169 : case GIMPLE_COND:
1170 : return true;
1171 :
1172 14 : case GIMPLE_SWITCH:
1173 14 : return if_convertible_switch_p (as_a <gswitch *> (stmt));
1174 :
1175 76183 : case GIMPLE_ASSIGN:
1176 76183 : return if_convertible_gimple_assign_stmt_p (stmt, refs);
1177 :
1178 1457 : case GIMPLE_CALL:
1179 1457 : {
1180 : /* Check if stmt is a simd clone first. */
1181 1457 : if (if_convertible_simdclone_stmt_p (stmt))
1182 : {
1183 999 : gimple_set_plf (stmt, GF_PLF_2, true);
1184 999 : need_to_predicate = true;
1185 999 : return true;
1186 : }
1187 :
1188 : /* Check if the call can trap and if so require predication. */
1189 458 : if (gimple_could_trap_p (stmt))
1190 : {
1191 155 : if (ifcvt_can_predicate (stmt))
1192 : {
1193 108 : gimple_set_plf (stmt, GF_PLF_2, true);
1194 108 : need_to_predicate = true;
1195 108 : return true;
1196 : }
1197 : else
1198 : {
1199 47 : if (dump_file && (dump_flags & TDF_DETAILS))
1200 0 : fprintf (dump_file, "stmt could trap...\n");
1201 47 : return false;
1202 : }
1203 : }
1204 :
1205 : /* There are some IFN_s that are used to replace builtins but have the
1206 : same semantics. Even if MASK_CALL cannot handle them vectorable_call
1207 : will insert the proper selection, so do not block conversion. */
1208 303 : int flags = gimple_call_flags (stmt);
1209 303 : if ((flags & ECF_CONST)
1210 303 : && !(flags & ECF_LOOPING_CONST_OR_PURE)
1211 606 : && gimple_call_combined_fn (stmt) != CFN_LAST)
1212 : return true;
1213 :
1214 : return false;
1215 : }
1216 :
1217 0 : default:
1218 : /* Don't know what to do with 'em so don't do anything. */
1219 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1220 : {
1221 0 : fprintf (dump_file, "don't know what to do\n");
1222 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1223 : }
1224 : return false;
1225 : }
1226 : }
1227 :
1228 : /* Assumes that BB has more than 1 predecessors.
1229 : Returns false if at least one successor is not on critical edge
1230 : and true otherwise. */
1231 :
1232 : static inline bool
1233 78655 : all_preds_critical_p (basic_block bb)
1234 : {
1235 78655 : edge e;
1236 78655 : edge_iterator ei;
1237 :
1238 153538 : FOR_EACH_EDGE (e, ei, bb->preds)
1239 136402 : if (EDGE_COUNT (e->src->succs) == 1)
1240 : return false;
1241 : return true;
1242 : }
1243 :
1244 : /* Return true when BB is if-convertible. This routine does not check
1245 : basic block's statements and phis.
1246 :
1247 : A basic block is not if-convertible if:
1248 : - it is non-empty and it is after the exit block (in BFS order),
1249 : - it is after the exit block but before the latch,
1250 : - its edges are not normal.
1251 :
1252 : EXIT_BB is the basic block containing the exit of the LOOP. BB is
1253 : inside LOOP. */
1254 :
1255 : static bool
1256 230336 : if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1257 : {
1258 230336 : edge e;
1259 230336 : edge_iterator ei;
1260 :
1261 230336 : if (dump_file && (dump_flags & TDF_DETAILS))
1262 102 : fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1263 :
1264 230336 : if (EDGE_COUNT (bb->succs) > 2)
1265 : return false;
1266 :
1267 460550 : if (gcall *call = safe_dyn_cast <gcall *> (*gsi_last_bb (bb)))
1268 188 : if (gimple_call_ctrl_altering_p (call))
1269 : return false;
1270 :
1271 230275 : if (exit_bb)
1272 : {
1273 42710 : if (bb != loop->latch)
1274 : {
1275 528 : if (dump_file && (dump_flags & TDF_DETAILS))
1276 0 : fprintf (dump_file, "basic block after exit bb but before latch\n");
1277 528 : return false;
1278 : }
1279 42182 : else if (!empty_block_p (bb))
1280 : {
1281 1419 : if (dump_file && (dump_flags & TDF_DETAILS))
1282 0 : fprintf (dump_file, "non empty basic block after exit bb\n");
1283 1419 : return false;
1284 : }
1285 40763 : else if (bb == loop->latch
1286 40763 : && bb != exit_bb
1287 81526 : && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1288 : {
1289 11 : if (dump_file && (dump_flags & TDF_DETAILS))
1290 0 : fprintf (dump_file, "latch is not dominated by exit_block\n");
1291 11 : return false;
1292 : }
1293 : }
1294 :
1295 : /* Be less adventurous and handle only normal edges. */
1296 558912 : FOR_EACH_EDGE (e, ei, bb->succs)
1297 330605 : if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1298 : {
1299 10 : if (dump_file && (dump_flags & TDF_DETAILS))
1300 0 : fprintf (dump_file, "Difficult to handle edges\n");
1301 10 : return false;
1302 : }
1303 :
1304 : return true;
1305 : }
1306 :
1307 : /* Return true when all predecessor blocks of BB are visited. The
1308 : VISITED bitmap keeps track of the visited blocks. */
1309 :
1310 : static bool
1311 2486890 : pred_blocks_visited_p (basic_block bb, bitmap *visited)
1312 : {
1313 2486890 : edge e;
1314 2486890 : edge_iterator ei;
1315 4170941 : FOR_EACH_EDGE (e, ei, bb->preds)
1316 2801073 : if (!bitmap_bit_p (*visited, e->src->index))
1317 : return false;
1318 :
1319 : return true;
1320 : }
1321 :
1322 : /* Get body of a LOOP in suitable order for if-conversion. It is
1323 : caller's responsibility to deallocate basic block list.
1324 : If-conversion suitable order is, breadth first sort (BFS) order
1325 : with an additional constraint: select a block only if all its
1326 : predecessors are already selected. */
1327 :
1328 : static basic_block *
1329 404955 : get_loop_body_in_if_conv_order (const class loop *loop)
1330 : {
1331 404955 : basic_block *blocks, *blocks_in_bfs_order;
1332 404955 : basic_block bb;
1333 404955 : bitmap visited;
1334 404955 : unsigned int index = 0;
1335 404955 : unsigned int visited_count = 0;
1336 :
1337 404955 : gcc_assert (loop->num_nodes);
1338 404955 : gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1339 :
1340 404955 : blocks = XCNEWVEC (basic_block, loop->num_nodes);
1341 404955 : visited = BITMAP_ALLOC (NULL);
1342 :
1343 404955 : blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1344 :
1345 404955 : index = 0;
1346 4377391 : while (index < loop->num_nodes)
1347 : {
1348 3567533 : bb = blocks_in_bfs_order [index];
1349 :
1350 3567533 : if (bb->flags & BB_IRREDUCIBLE_LOOP)
1351 : {
1352 52 : free (blocks_in_bfs_order);
1353 52 : BITMAP_FREE (visited);
1354 52 : free (blocks);
1355 52 : return NULL;
1356 : }
1357 :
1358 3567481 : if (!bitmap_bit_p (visited, bb->index))
1359 : {
1360 2486890 : if (pred_blocks_visited_p (bb, &visited)
1361 2486890 : || bb == loop->header)
1362 : {
1363 : /* This block is now visited. */
1364 1774823 : bitmap_set_bit (visited, bb->index);
1365 1774823 : blocks[visited_count++] = bb;
1366 : }
1367 : }
1368 :
1369 3567481 : index++;
1370 :
1371 3567481 : if (index == loop->num_nodes
1372 467287 : && visited_count != loop->num_nodes)
1373 : /* Not done yet. */
1374 3567481 : index = 0;
1375 : }
1376 404903 : free (blocks_in_bfs_order);
1377 404903 : BITMAP_FREE (visited);
1378 :
1379 : /* Go through loop and reject if-conversion or lowering of bitfields if we
1380 : encounter statements we do not believe the vectorizer will be able to
1381 : handle. If adding a new type of statement here, make sure
1382 : 'ifcvt_local_dce' is also able to handle it propertly. */
1383 2169072 : for (index = 0; index < loop->num_nodes; index++)
1384 : {
1385 1766687 : basic_block bb = blocks[index];
1386 1766687 : gimple_stmt_iterator gsi;
1387 :
1388 1766687 : bool may_have_nonlocal_labels
1389 1766687 : = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1390 15842943 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1391 12312087 : switch (gimple_code (gsi_stmt (gsi)))
1392 : {
1393 40428 : case GIMPLE_LABEL:
1394 40428 : if (!may_have_nonlocal_labels)
1395 : {
1396 6830 : tree label
1397 6830 : = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1398 13660 : if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1399 : {
1400 41 : free (blocks);
1401 41 : return NULL;
1402 : }
1403 : }
1404 : /* Fallthru. */
1405 12309569 : case GIMPLE_ASSIGN:
1406 12309569 : case GIMPLE_CALL:
1407 12309569 : case GIMPLE_DEBUG:
1408 12309569 : case GIMPLE_COND:
1409 12309569 : case GIMPLE_SWITCH:
1410 12309569 : gimple_set_uid (gsi_stmt (gsi), 0);
1411 12309569 : break;
1412 2477 : default:
1413 2477 : free (blocks);
1414 2477 : return NULL;
1415 : }
1416 : }
1417 : return blocks;
1418 : }
1419 :
1420 : /* Returns true when the analysis of the predicates for all the basic
1421 : blocks in LOOP succeeded.
1422 :
1423 : predicate_bbs first allocates the predicates of the basic blocks.
1424 : These fields are then initialized with the tree expressions
1425 : representing the predicates under which a basic block is executed
1426 : in the LOOP. As the loop->header is executed at each iteration, it
1427 : has the "true" predicate. Other statements executed under a
1428 : condition are predicated with that condition, for example
1429 :
1430 : | if (x)
1431 : | S1;
1432 : | else
1433 : | S2;
1434 :
1435 : S1 will be predicated with "x", and
1436 : S2 will be predicated with "!x". */
1437 :
1438 : static void
1439 40752 : predicate_bbs (loop_p loop)
1440 : {
1441 40752 : unsigned int i;
1442 :
1443 261620 : for (i = 0; i < loop->num_nodes; i++)
1444 220868 : init_bb_predicate (ifc_bbs[i]);
1445 :
1446 261620 : for (i = 0; i < loop->num_nodes; i++)
1447 : {
1448 220868 : basic_block bb = ifc_bbs[i];
1449 220868 : tree cond;
1450 :
1451 : /* The loop latch and loop exit block are always executed and
1452 : have no extra conditions to be processed: skip them. */
1453 302372 : if (bb == loop->latch
1454 220868 : || bb_with_exit_edge_p (loop, bb))
1455 : {
1456 81504 : reset_bb_predicate (bb);
1457 81504 : continue;
1458 : }
1459 :
1460 139364 : cond = bb_predicate (bb);
1461 278728 : if (gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb)))
1462 : {
1463 57110 : tree c2;
1464 57110 : edge true_edge, false_edge;
1465 57110 : location_t loc = gimple_location (stmt);
1466 57110 : tree c;
1467 : /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1468 : conditions can remain unfolded because of multiple uses so
1469 : try to re-fold here, especially to get precision changing
1470 : conversions sorted out. Do not simply fold the stmt since
1471 : this is analysis only. When conditions were embedded in
1472 : COND_EXPRs those were folded separately before folding the
1473 : COND_EXPR but as they are now outside we have to make sure
1474 : to fold them. Do it here - another opportunity would be to
1475 : fold predicates as they are inserted. */
1476 57110 : gimple_match_op cexpr (gimple_match_cond::UNCOND,
1477 57110 : gimple_cond_code (stmt),
1478 : boolean_type_node,
1479 : gimple_cond_lhs (stmt),
1480 57110 : gimple_cond_rhs (stmt));
1481 57110 : if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1482 3928 : && cexpr.code.is_tree_code ()
1483 61038 : && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1484 572 : c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1485 : cexpr.ops[0], cexpr.ops[1]);
1486 : else
1487 56538 : c = build2_loc (loc, gimple_cond_code (stmt),
1488 : boolean_type_node,
1489 : gimple_cond_lhs (stmt),
1490 : gimple_cond_rhs (stmt));
1491 :
1492 : /* Add new condition into destination's predicate list. */
1493 57110 : extract_true_false_edges_from_block (gimple_bb (stmt),
1494 : &true_edge, &false_edge);
1495 :
1496 : /* If C is true, then TRUE_EDGE is taken. */
1497 57110 : add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1498 : unshare_expr (c));
1499 :
1500 : /* If C is false, then FALSE_EDGE is taken. */
1501 57110 : c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1502 : unshare_expr (c));
1503 57110 : add_to_dst_predicate_list (loop, false_edge,
1504 : unshare_expr (cond), c2);
1505 :
1506 57110 : cond = NULL_TREE;
1507 : }
1508 :
1509 : /* Assumes the limited COND like switches checked for earlier. */
1510 82254 : else if (gswitch *sw = safe_dyn_cast <gswitch *> (*gsi_last_bb (bb)))
1511 : {
1512 71 : location_t loc = gimple_location (*gsi_last_bb (bb));
1513 :
1514 71 : tree default_label = CASE_LABEL (gimple_switch_default_label (sw));
1515 71 : tree cond_label = CASE_LABEL (gimple_switch_label (sw, 1));
1516 :
1517 71 : edge false_edge = find_edge (bb, label_to_block (cfun, default_label));
1518 71 : edge true_edge = find_edge (bb, label_to_block (cfun, cond_label));
1519 :
1520 : /* Create chain of switch tests for each case. */
1521 71 : tree switch_cond = NULL_TREE;
1522 71 : tree index = gimple_switch_index (sw);
1523 272 : for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++)
1524 : {
1525 201 : tree label = gimple_switch_label (sw, i);
1526 201 : tree case_cond;
1527 201 : if (CASE_HIGH (label))
1528 : {
1529 7 : tree low = build2_loc (loc, GE_EXPR,
1530 : boolean_type_node,
1531 7 : index, fold_convert_loc (loc, TREE_TYPE (index),
1532 7 : CASE_LOW (label)));
1533 14 : tree high = build2_loc (loc, LE_EXPR,
1534 : boolean_type_node,
1535 7 : index, fold_convert_loc (loc, TREE_TYPE (index),
1536 7 : CASE_HIGH (label)));
1537 7 : case_cond = build2_loc (loc, TRUTH_AND_EXPR,
1538 : boolean_type_node,
1539 : low, high);
1540 : }
1541 : else
1542 194 : case_cond = build2_loc (loc, EQ_EXPR,
1543 : boolean_type_node,
1544 : index,
1545 194 : fold_convert_loc (loc, TREE_TYPE (index),
1546 194 : CASE_LOW (label)));
1547 201 : if (i > 1)
1548 130 : switch_cond = build2_loc (loc, TRUTH_OR_EXPR,
1549 : boolean_type_node,
1550 : case_cond, switch_cond);
1551 : else
1552 : switch_cond = case_cond;
1553 : }
1554 :
1555 71 : add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1556 : unshare_expr (switch_cond));
1557 71 : switch_cond = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1558 : unshare_expr (switch_cond));
1559 71 : add_to_dst_predicate_list (loop, false_edge,
1560 : unshare_expr (cond), switch_cond);
1561 71 : cond = NULL_TREE;
1562 : }
1563 :
1564 : /* If current bb has only one successor, then consider it as an
1565 : unconditional goto. */
1566 303051 : if (single_succ_p (bb))
1567 : {
1568 82183 : basic_block bb_n = single_succ (bb);
1569 :
1570 : /* The successor bb inherits the predicate of its
1571 : predecessor. If there is no predicate in the predecessor
1572 : bb, then consider the successor bb as always executed. */
1573 82183 : if (cond == NULL_TREE)
1574 0 : cond = boolean_true_node;
1575 :
1576 82183 : add_to_predicate_list (loop, bb_n, cond);
1577 : }
1578 : }
1579 :
1580 : /* The loop header is always executed. */
1581 40752 : reset_bb_predicate (loop->header);
1582 40752 : gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1583 : && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1584 40752 : }
1585 :
1586 : /* Build region by adding loop pre-header and post-header blocks. */
1587 :
1588 : static vec<basic_block>
1589 40752 : build_region (class loop *loop)
1590 : {
1591 40752 : vec<basic_block> region = vNULL;
1592 40752 : basic_block exit_bb = NULL;
1593 :
1594 40752 : gcc_assert (ifc_bbs);
1595 : /* The first element is loop pre-header. */
1596 40752 : region.safe_push (loop_preheader_edge (loop)->src);
1597 :
1598 261620 : for (unsigned int i = 0; i < loop->num_nodes; i++)
1599 : {
1600 220868 : basic_block bb = ifc_bbs[i];
1601 220868 : region.safe_push (bb);
1602 : /* Find loop postheader. */
1603 220868 : edge e;
1604 220868 : edge_iterator ei;
1605 490403 : FOR_EACH_EDGE (e, ei, bb->succs)
1606 310287 : if (loop_exit_edge_p (loop, e))
1607 : {
1608 40752 : exit_bb = e->dest;
1609 40752 : break;
1610 : }
1611 : }
1612 : /* The last element is loop post-header. */
1613 40752 : gcc_assert (exit_bb);
1614 40752 : region.safe_push (exit_bb);
1615 40752 : return region;
1616 : }
1617 :
1618 : /* Return true when LOOP is if-convertible. This is a helper function
1619 : for if_convertible_loop_p. REFS and DDRS are initialized and freed
1620 : in if_convertible_loop_p. */
1621 :
1622 : static bool
1623 42781 : if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1624 : {
1625 42781 : unsigned int i;
1626 42781 : basic_block exit_bb = NULL;
1627 42781 : vec<basic_block> region;
1628 :
1629 42781 : calculate_dominance_info (CDI_DOMINATORS);
1630 :
1631 271088 : for (i = 0; i < loop->num_nodes; i++)
1632 : {
1633 230336 : basic_block bb = ifc_bbs[i];
1634 :
1635 230336 : if (!if_convertible_bb_p (loop, bb, exit_bb))
1636 : return false;
1637 :
1638 228307 : if (bb_with_exit_edge_p (loop, bb))
1639 42710 : exit_bb = bb;
1640 : }
1641 :
1642 40752 : data_reference_p dr;
1643 :
1644 40752 : innermost_DR_map
1645 40752 : = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1646 40752 : baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1647 :
1648 : /* Compute post-dominator tree locally. */
1649 40752 : region = build_region (loop);
1650 40752 : calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1651 :
1652 40752 : predicate_bbs (loop);
1653 :
1654 : /* Free post-dominator tree since it is not used after predication. */
1655 40752 : free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1656 40752 : region.release ();
1657 :
1658 181066 : for (i = 0; refs->iterate (i, &dr); i++)
1659 : {
1660 140314 : tree ref = DR_REF (dr);
1661 :
1662 140314 : dr->aux = XNEW (struct ifc_dr);
1663 140314 : DR_BASE_W_UNCONDITIONALLY (dr) = false;
1664 140314 : DR_RW_UNCONDITIONALLY (dr) = false;
1665 140314 : DR_W_UNCONDITIONALLY (dr) = false;
1666 140314 : IFC_DR (dr)->rw_predicate = boolean_false_node;
1667 140314 : IFC_DR (dr)->w_predicate = boolean_false_node;
1668 140314 : IFC_DR (dr)->base_w_predicate = boolean_false_node;
1669 140314 : if (gimple_uid (DR_STMT (dr)) == 0)
1670 139369 : gimple_set_uid (DR_STMT (dr), i + 1);
1671 :
1672 : /* If DR doesn't have innermost loop behavior or it's a compound
1673 : memory reference, we synthesize its innermost loop behavior
1674 : for hashing. */
1675 140314 : if (TREE_CODE (ref) == COMPONENT_REF
1676 : || TREE_CODE (ref) == IMAGPART_EXPR
1677 : || TREE_CODE (ref) == REALPART_EXPR
1678 90231 : || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1679 26484 : || DR_INIT (dr) || DR_STEP (dr)))
1680 : {
1681 135230 : while (TREE_CODE (ref) == COMPONENT_REF
1682 77726 : || TREE_CODE (ref) == IMAGPART_EXPR
1683 212378 : || TREE_CODE (ref) == REALPART_EXPR)
1684 58663 : ref = TREE_OPERAND (ref, 0);
1685 :
1686 76567 : memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1687 76567 : DR_BASE_ADDRESS (dr) = ref;
1688 : }
1689 140314 : hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1690 : }
1691 :
1692 203691 : for (i = 0; i < loop->num_nodes; i++)
1693 : {
1694 174703 : basic_block bb = ifc_bbs[i];
1695 174703 : gimple_stmt_iterator itr;
1696 :
1697 : /* Check the if-convertibility of statements in predicated BBs. */
1698 174703 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1699 295645 : for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1700 163857 : if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1701 13793 : return false;
1702 : }
1703 :
1704 : /* Checking PHIs needs to be done after stmts, as the fact whether there
1705 : are any masked loads or stores affects the tests. */
1706 177346 : for (i = 0; i < loop->num_nodes; i++)
1707 : {
1708 148358 : basic_block bb = ifc_bbs[i];
1709 148358 : gphi_iterator itr;
1710 :
1711 286778 : for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1712 138420 : if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1713 : return false;
1714 : }
1715 :
1716 28988 : if (dump_file)
1717 33 : fprintf (dump_file, "Applying if-conversion\n");
1718 :
1719 : return true;
1720 : }
1721 :
1722 : /* Return true when LOOP is if-convertible.
1723 : LOOP is if-convertible if:
1724 : - it is innermost,
1725 : - it has two or more basic blocks,
1726 : - it has only one exit,
1727 : - loop header is not the exit edge,
1728 : - if its basic blocks and phi nodes are if convertible. */
1729 :
1730 : static bool
1731 42920 : if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1732 : {
1733 42920 : edge e;
1734 42920 : edge_iterator ei;
1735 42920 : bool res = false;
1736 :
1737 : /* Handle only innermost loop. */
1738 42920 : if (!loop || loop->inner)
1739 : {
1740 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1741 0 : fprintf (dump_file, "not innermost loop\n");
1742 0 : return false;
1743 : }
1744 :
1745 : /* If only one block, no need for if-conversion. */
1746 42920 : if (loop->num_nodes <= 2)
1747 : {
1748 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1749 0 : fprintf (dump_file, "less than 2 basic blocks\n");
1750 0 : return false;
1751 : }
1752 :
1753 : /* If one of the loop header's edge is an exit edge then do not
1754 : apply if-conversion. */
1755 128638 : FOR_EACH_EDGE (e, ei, loop->header->succs)
1756 85857 : if (loop_exit_edge_p (loop, e))
1757 : return false;
1758 :
1759 42781 : res = if_convertible_loop_p_1 (loop, refs);
1760 :
1761 83533 : delete innermost_DR_map;
1762 42781 : innermost_DR_map = NULL;
1763 :
1764 83533 : delete baseref_DR_map;
1765 42781 : baseref_DR_map = NULL;
1766 :
1767 42781 : return res;
1768 : }
1769 :
1770 : /* Return reduc_1 if has_nop.
1771 :
1772 : if (...)
1773 : tmp1 = (unsigned type) reduc_1;
1774 : tmp2 = tmp1 + rhs2;
1775 : reduc_3 = (signed type) tmp2. */
1776 : static tree
1777 11182 : strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1778 : {
1779 11182 : if (!has_nop)
1780 : return op;
1781 :
1782 414 : if (TREE_CODE (op) != SSA_NAME)
1783 : return NULL_TREE;
1784 :
1785 368 : gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1786 368 : if (!stmt
1787 368 : || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1788 230 : || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1789 : (gimple_assign_rhs1 (stmt))))
1790 149 : return NULL_TREE;
1791 :
1792 219 : return gimple_assign_rhs1 (stmt);
1793 : }
1794 :
1795 : /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1796 : which is in predicated basic block.
1797 : In fact, the following PHI pattern is searching:
1798 : loop-header:
1799 : reduc_1 = PHI <..., reduc_2>
1800 : ...
1801 : if (...)
1802 : reduc_3 = ...
1803 : reduc_2 = PHI <reduc_1, reduc_3>
1804 :
1805 : ARG_0 and ARG_1 are correspondent PHI arguments.
1806 : REDUC, OP0 and OP1 contain reduction stmt and its operands.
1807 : EXTENDED is true if PHI has > 2 arguments. */
1808 :
1809 : static bool
1810 50108 : is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1811 : tree *op0, tree *op1, bool extended, bool* has_nop,
1812 : gimple **nop_reduc)
1813 : {
1814 50108 : tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1815 50108 : gimple *stmt;
1816 50108 : gimple *header_phi = NULL;
1817 50108 : enum tree_code reduction_op;
1818 50108 : basic_block bb = gimple_bb (phi);
1819 50108 : class loop *loop = bb->loop_father;
1820 50108 : edge latch_e = loop_latch_edge (loop);
1821 50108 : imm_use_iterator imm_iter;
1822 50108 : use_operand_p use_p;
1823 50108 : edge e;
1824 50108 : edge_iterator ei;
1825 50108 : bool result = *has_nop = false;
1826 50108 : if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1827 : return false;
1828 :
1829 33611 : if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1830 : {
1831 8302 : lhs = arg_1;
1832 8302 : header_phi = SSA_NAME_DEF_STMT (arg_0);
1833 8302 : stmt = SSA_NAME_DEF_STMT (arg_1);
1834 : }
1835 25309 : else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1836 : {
1837 11170 : lhs = arg_0;
1838 11170 : header_phi = SSA_NAME_DEF_STMT (arg_1);
1839 11170 : stmt = SSA_NAME_DEF_STMT (arg_0);
1840 : }
1841 : else
1842 : return false;
1843 19472 : if (gimple_bb (header_phi) != loop->header)
1844 : return false;
1845 :
1846 18582 : if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1847 : return false;
1848 :
1849 12603 : if (gimple_code (stmt) != GIMPLE_ASSIGN
1850 12603 : || gimple_has_volatile_ops (stmt))
1851 : return false;
1852 :
1853 12473 : if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1854 : return false;
1855 :
1856 12386 : if (!is_predicated (gimple_bb (stmt)))
1857 : return false;
1858 :
1859 : /* Check that stmt-block is predecessor of phi-block. */
1860 10270 : FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1861 10210 : if (e->dest == bb)
1862 : {
1863 : result = true;
1864 : break;
1865 : }
1866 10150 : if (!result)
1867 : return false;
1868 :
1869 10090 : if (!has_single_use (lhs))
1870 : return false;
1871 :
1872 10047 : reduction_op = gimple_assign_rhs_code (stmt);
1873 :
1874 : /* Catch something like below
1875 :
1876 : loop-header:
1877 : reduc_1 = PHI <..., reduc_2>
1878 : ...
1879 : if (...)
1880 : tmp1 = (unsigned type) reduc_1;
1881 : tmp2 = tmp1 + rhs2;
1882 : reduc_3 = (signed type) tmp2;
1883 :
1884 : reduc_2 = PHI <reduc_1, reduc_3>
1885 :
1886 : and convert to
1887 :
1888 : reduc_2 = PHI <0, reduc_1>
1889 : tmp1 = (unsigned type)reduc_1;
1890 : ifcvt = cond_expr ? rhs2 : 0
1891 : tmp2 = tmp1 +/- ifcvt;
1892 : reduc_1 = (signed type)tmp2; */
1893 :
1894 10047 : if (CONVERT_EXPR_CODE_P (reduction_op))
1895 : {
1896 411 : lhs = gimple_assign_rhs1 (stmt);
1897 411 : if (TREE_CODE (lhs) != SSA_NAME
1898 411 : || !has_single_use (lhs))
1899 : return false;
1900 :
1901 222 : *nop_reduc = stmt;
1902 222 : stmt = SSA_NAME_DEF_STMT (lhs);
1903 222 : if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1904 222 : || !is_gimple_assign (stmt))
1905 : return false;
1906 :
1907 219 : *has_nop = true;
1908 219 : reduction_op = gimple_assign_rhs_code (stmt);
1909 : }
1910 :
1911 9855 : if (reduction_op != PLUS_EXPR
1912 : && reduction_op != MINUS_EXPR
1913 9855 : && reduction_op != MULT_EXPR
1914 9855 : && reduction_op != BIT_IOR_EXPR
1915 : && reduction_op != BIT_XOR_EXPR
1916 4378 : && reduction_op != BIT_AND_EXPR)
1917 : return false;
1918 5591 : r_op1 = gimple_assign_rhs1 (stmt);
1919 5591 : r_op2 = gimple_assign_rhs2 (stmt);
1920 :
1921 5591 : r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1922 5591 : r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1923 :
1924 : /* Make R_OP1 to hold reduction variable. */
1925 5591 : if (r_nop2 == PHI_RESULT (header_phi)
1926 5591 : && commutative_tree_code (reduction_op))
1927 : {
1928 : std::swap (r_op1, r_op2);
1929 : std::swap (r_nop1, r_nop2);
1930 : }
1931 4483 : else if (r_nop1 != PHI_RESULT (header_phi))
1932 : return false;
1933 :
1934 5280 : if (*has_nop)
1935 : {
1936 : /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1937 598 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1938 : {
1939 304 : gimple *use_stmt = USE_STMT (use_p);
1940 304 : if (is_gimple_debug (use_stmt))
1941 0 : continue;
1942 304 : if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1943 126 : continue;
1944 178 : if (use_stmt != phi)
1945 42 : return false;
1946 168 : }
1947 : }
1948 :
1949 : /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1950 21536 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1951 : {
1952 11269 : gimple *use_stmt = USE_STMT (use_p);
1953 11269 : if (is_gimple_debug (use_stmt))
1954 29 : continue;
1955 11240 : if (use_stmt == stmt)
1956 5054 : continue;
1957 6186 : if (gimple_code (use_stmt) != GIMPLE_PHI)
1958 209 : return false;
1959 209 : }
1960 :
1961 5029 : *op0 = r_op1; *op1 = r_op2;
1962 5029 : *reduc = stmt;
1963 5029 : return true;
1964 : }
1965 :
1966 : /* Converts conditional scalar reduction into unconditional form, e.g.
1967 : bb_4
1968 : if (_5 != 0) goto bb_5 else goto bb_6
1969 : end_bb_4
1970 : bb_5
1971 : res_6 = res_13 + 1;
1972 : end_bb_5
1973 : bb_6
1974 : # res_2 = PHI <res_13(4), res_6(5)>
1975 : end_bb_6
1976 :
1977 : will be converted into sequence
1978 : _ifc__1 = _5 != 0 ? 1 : 0;
1979 : res_2 = res_13 + _ifc__1;
1980 : Argument SWAP tells that arguments of conditional expression should be
1981 : swapped.
1982 : If LOOP_VERSIONED is true if we assume that we versioned the loop for
1983 : vectorization. In that case we can create a COND_OP.
1984 : Returns rhs of resulting PHI assignment. */
1985 :
1986 : static tree
1987 5029 : convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1988 : tree cond, tree op0, tree op1, bool swap,
1989 : bool has_nop, gimple* nop_reduc,
1990 : bool loop_versioned)
1991 : {
1992 5029 : gimple_stmt_iterator stmt_it;
1993 5029 : gimple *new_assign;
1994 5029 : tree rhs;
1995 5029 : tree rhs1 = gimple_assign_rhs1 (reduc);
1996 5029 : tree lhs = gimple_assign_lhs (reduc);
1997 5029 : tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1998 5029 : tree c;
1999 5029 : enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
2000 5029 : tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op,
2001 : NULL, false);
2002 5029 : gimple_seq stmts = NULL;
2003 :
2004 5029 : if (dump_file && (dump_flags & TDF_DETAILS))
2005 : {
2006 2 : fprintf (dump_file, "Found cond scalar reduction.\n");
2007 2 : print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
2008 : }
2009 :
2010 : /* If possible create a COND_OP instead of a COND_EXPR and an OP_EXPR.
2011 : The COND_OP will have a neutral_op else value. */
2012 5029 : internal_fn ifn;
2013 5029 : ifn = get_conditional_internal_fn (reduction_op);
2014 5029 : if (loop_versioned && ifn != IFN_LAST
2015 5027 : && vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
2016 1362 : && !VECTOR_TYPE_P (TREE_TYPE (lhs))
2017 6391 : && !swap)
2018 : {
2019 1342 : gcall *cond_call = gimple_build_call_internal (ifn, 4,
2020 : unshare_expr (cond),
2021 : op0, op1, op0);
2022 1342 : gsi_insert_before (gsi, cond_call, GSI_SAME_STMT);
2023 1342 : gimple_call_set_lhs (cond_call, tmp);
2024 : rhs = tmp;
2025 : }
2026 : else
2027 : {
2028 : /* Build cond expression using COND and constant operand
2029 : of reduction rhs. */
2030 7119 : c = fold_build_cond_expr (TREE_TYPE (rhs1),
2031 : unshare_expr (cond),
2032 : swap ? op_nochange : op1,
2033 : swap ? op1 : op_nochange);
2034 : /* Create assignment stmt and insert it at GSI. */
2035 3687 : new_assign = gimple_build_assign (tmp, c);
2036 3687 : gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
2037 : /* Build rhs for unconditional increment/decrement/logic_operation. */
2038 3687 : rhs = gimple_build (&stmts, reduction_op,
2039 3687 : TREE_TYPE (rhs1), op0, tmp);
2040 : }
2041 :
2042 5029 : if (has_nop)
2043 : {
2044 126 : rhs = gimple_convert (&stmts,
2045 126 : TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
2046 126 : stmt_it = gsi_for_stmt (nop_reduc);
2047 126 : gsi_remove (&stmt_it, true);
2048 126 : release_defs (nop_reduc);
2049 : }
2050 5029 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
2051 :
2052 : /* Delete original reduction stmt. */
2053 5029 : stmt_it = gsi_for_stmt (reduc);
2054 5029 : gsi_remove (&stmt_it, true);
2055 5029 : release_defs (reduc);
2056 5029 : return rhs;
2057 : }
2058 :
2059 : /* Generate a simplified conditional. */
2060 :
2061 : static tree
2062 56314 : gen_simplified_condition (tree cond, scalar_cond_masked_set_type &cond_set)
2063 : {
2064 : /* Check if the value is already live in a previous branch. This resolves
2065 : nested conditionals from diamond PHI reductions. */
2066 56314 : if (TREE_CODE (cond) == SSA_NAME)
2067 : {
2068 56306 : gimple *stmt = SSA_NAME_DEF_STMT (cond);
2069 56306 : gassign *assign = NULL;
2070 56306 : if ((assign = as_a <gassign *> (stmt))
2071 56306 : && gimple_assign_rhs_code (assign) == BIT_AND_EXPR)
2072 : {
2073 3519 : tree arg1 = gimple_assign_rhs1 (assign);
2074 3519 : tree arg2 = gimple_assign_rhs2 (assign);
2075 3519 : if (cond_set.contains ({ arg1, 1 }))
2076 150 : arg1 = boolean_true_node;
2077 : else
2078 3369 : arg1 = gen_simplified_condition (arg1, cond_set);
2079 :
2080 3519 : if (cond_set.contains ({ arg2, 1 }))
2081 1787 : arg2 = boolean_true_node;
2082 : else
2083 1732 : arg2 = gen_simplified_condition (arg2, cond_set);
2084 :
2085 3519 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2);
2086 : }
2087 : }
2088 56314 : return cond;
2089 : }
2090 :
2091 : /* Structure used to track meta-data on PHI arguments used to generate
2092 : most efficient comparison sequence to slatten a PHI node. */
2093 :
2094 : typedef struct ifcvt_arg_entry
2095 : {
2096 : /* The PHI node argument value. */
2097 : tree arg;
2098 :
2099 : /* The number of compares required to reach this PHI node from start of the
2100 : BB being if-converted. */
2101 : unsigned num_compares;
2102 :
2103 : /* The number of times this PHI node argument appears in the current PHI
2104 : node. */
2105 : unsigned occurs;
2106 :
2107 : /* The indices at which this PHI arg occurs inside the PHI node. */
2108 : vec <int> *indexes;
2109 : } ifcvt_arg_entry_t;
2110 :
2111 : /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
2112 : as to whether the condition is inverted. */
2113 :
2114 : static tree
2115 4245 : gen_phi_arg_condition (gphi *phi, ifcvt_arg_entry_t &arg,
2116 : gimple_stmt_iterator *gsi,
2117 : scalar_cond_masked_set_type &cond_set, bool *invert)
2118 : {
2119 4245 : int len;
2120 4245 : int i;
2121 4245 : tree cond = NULL_TREE;
2122 4245 : tree c;
2123 4245 : edge e;
2124 :
2125 4245 : *invert = false;
2126 4245 : len = arg.indexes->length ();
2127 4245 : gcc_assert (len > 0);
2128 8562 : for (i = 0; i < len; i++)
2129 : {
2130 4317 : e = gimple_phi_arg_edge (phi, (*arg.indexes)[i]);
2131 4317 : c = bb_predicate (e->src);
2132 4317 : if (is_true_predicate (c))
2133 : {
2134 0 : cond = c;
2135 0 : break;
2136 : }
2137 : /* If we have just a single inverted predicate, signal that and
2138 : instead invert the COND_EXPR arms. */
2139 4317 : if (len == 1 && TREE_CODE (c) == TRUTH_NOT_EXPR)
2140 : {
2141 84 : c = TREE_OPERAND (c, 0);
2142 84 : *invert = true;
2143 : }
2144 :
2145 4317 : c = gen_simplified_condition (c, cond_set);
2146 4317 : c = force_gimple_operand_gsi (gsi, unshare_expr (c),
2147 : true, NULL_TREE, true, GSI_SAME_STMT);
2148 4317 : if (cond != NULL_TREE)
2149 : {
2150 : /* Must build OR expression. */
2151 72 : cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
2152 72 : cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2153 : NULL_TREE, true, GSI_SAME_STMT);
2154 : }
2155 : else
2156 : cond = c;
2157 :
2158 : /* Register the new possibly simplified conditional. When more than 2
2159 : entries in a phi node we chain entries in the false branch, so the
2160 : inverted condition is active. */
2161 4317 : scalar_cond_masked_key pred_cond ({ cond, 1 });
2162 4317 : if (!*invert)
2163 4233 : pred_cond.inverted_p = !pred_cond.inverted_p;
2164 4317 : cond_set.add (pred_cond);
2165 : }
2166 4245 : gcc_assert (cond != NULL_TREE);
2167 4245 : return cond;
2168 : }
2169 :
2170 : /* Find the operand which is different between ARG0_OP and ARG1_OP.
2171 : Returns the operand num where the difference is.
2172 : Set NEWARG0 and NEWARG1 from the different argument.
2173 : Returns -1 if none is found.
2174 : If ARG0_OP/ARG1_OP is commutative also try swapping the
2175 : two commutative operands and return the operand number where
2176 : the difference happens in ARG0_OP. */
2177 :
2178 : static int
2179 1111 : find_different_opnum (const gimple_match_op &arg0_op,
2180 : const gimple_match_op &arg1_op,
2181 : tree *new_arg0, tree *new_arg1)
2182 : {
2183 1111 : unsigned opnum = -1;
2184 1111 : unsigned first;
2185 1111 : first = first_commutative_argument (arg1_op.code, arg1_op.type);
2186 3022 : for (unsigned i = 0; i < arg0_op.num_ops; i++)
2187 : {
2188 2232 : if (!operand_equal_for_phi_arg_p (arg0_op.ops[i],
2189 2232 : arg1_op.ops[i]))
2190 : {
2191 : /* Can handle only one non equal operand. */
2192 1432 : if (opnum != -1u)
2193 : {
2194 : /* Though if opnum is right before i and opnum is equal
2195 : to the first communtative argument, handle communtative
2196 : specially. */
2197 321 : if (i == opnum + 1 && opnum == first)
2198 272 : goto commutative;
2199 : return -1;
2200 : }
2201 : opnum = i;
2202 : }
2203 : }
2204 : /* If all operands are equal only do this is there was single
2205 : operand. */
2206 790 : if (opnum == -1u)
2207 : {
2208 0 : if (arg0_op.num_ops != 1)
2209 : return -1;
2210 : opnum = 0;
2211 : }
2212 790 : *new_arg0 = arg0_op.ops[opnum];
2213 790 : *new_arg1 = arg1_op.ops[opnum];
2214 790 : return opnum;
2215 :
2216 : /* Handle commutative operations. */
2217 272 : commutative:
2218 272 : gcc_assert (first != -1u);
2219 :
2220 : /* Check the rest of the arguments to make sure they are the same. */
2221 272 : for (unsigned i = first + 2; i < arg0_op.num_ops; i++)
2222 0 : if (!operand_equal_for_phi_arg_p (arg0_op.ops[i],
2223 0 : arg1_op.ops[i]))
2224 : return -1;
2225 :
2226 : /* If the arg0[first+1] and arg1[first] are the same
2227 : then the one which is different is arg0[first] and arg1[first+1]
2228 : return first since this is based on arg0. */
2229 272 : if (operand_equal_for_phi_arg_p (arg0_op.ops[first + 1],
2230 272 : arg1_op.ops[first]))
2231 : {
2232 33 : *new_arg0 = arg0_op.ops[first];
2233 33 : *new_arg1 = arg1_op.ops[first + 1];
2234 33 : return first;
2235 : }
2236 : /* If the arg0[first] and arg1[first+1] are the same
2237 : then the one which is different is arg0[first+1] and arg1[first]
2238 : return first+1 since this is based on arg0. */
2239 239 : if (operand_equal_for_phi_arg_p (arg0_op.ops[first],
2240 239 : arg1_op.ops[first + 1]))
2241 : {
2242 14 : *new_arg0 = arg0_op.ops[first + 1];
2243 14 : *new_arg1 = arg1_op.ops[first];
2244 14 : return first + 1;
2245 : }
2246 : return -1;
2247 : }
2248 :
2249 : /* Factors out an operation from *ARG0 and *ARG1 and
2250 : create the new statement at GSI. *RES is the
2251 : result of that new statement. Update *ARG0 and *ARG1
2252 : and *RES to the new values if the factoring happened.
2253 : Loops until all of the factoring is completed. */
2254 :
2255 : static void
2256 46896 : factor_out_operators (tree *res, gimple_stmt_iterator *gsi,
2257 : tree *arg0, tree *arg1, gphi *phi)
2258 : {
2259 46896 : gimple_match_op arg0_op, arg1_op;
2260 46896 : bool repeated = false;
2261 :
2262 47713 : again:
2263 47713 : if (TREE_CODE (*arg0) != SSA_NAME || TREE_CODE (*arg1) != SSA_NAME)
2264 : return;
2265 :
2266 32243 : if (operand_equal_p (*arg0, *arg1))
2267 : return;
2268 :
2269 : /* If either args have > 1 use, then this transformation actually
2270 : increases the number of expressions evaluated at runtime. */
2271 32243 : if (repeated
2272 32243 : ? (!has_zero_uses (*arg0) || !has_zero_uses (*arg1))
2273 31463 : : (!has_single_use (*arg0) || !has_single_use (*arg1)))
2274 : return;
2275 :
2276 5668 : gimple *arg0_def_stmt = SSA_NAME_DEF_STMT (*arg0);
2277 5668 : if (!gimple_extract_op (arg0_def_stmt, &arg0_op))
2278 : return;
2279 :
2280 : /* At this point there should be no ssa names occuring in abnormals. */
2281 2389 : gcc_assert (!arg0_op.operands_occurs_in_abnormal_phi ());
2282 :
2283 2389 : gimple *arg1_def_stmt = SSA_NAME_DEF_STMT (*arg1);
2284 2389 : if (!gimple_extract_op (arg1_def_stmt, &arg1_op))
2285 : return;
2286 :
2287 : /* At this point there should be no ssa names occuring in abnormals. */
2288 2063 : gcc_assert (!arg1_op.operands_occurs_in_abnormal_phi ());
2289 :
2290 : /* No factoring can happen if the codes are different
2291 : or the number operands. */
2292 2063 : if (arg1_op.code != arg0_op.code
2293 2063 : || arg1_op.num_ops != arg0_op.num_ops)
2294 : return;
2295 :
2296 1111 : tree new_arg0, new_arg1;
2297 1111 : int opnum = find_different_opnum (arg0_op, arg1_op, &new_arg0, &new_arg1);
2298 1111 : if (opnum == -1)
2299 : return;
2300 :
2301 : /* BIT_FIELD_REF and BIT_INSERT_EXPR can't be factored out for non-0 operands
2302 : as the other operands require constants. */
2303 837 : if ((arg1_op.code == BIT_FIELD_REF
2304 828 : || arg1_op.code == BIT_INSERT_EXPR)
2305 846 : && opnum != 0)
2306 : return;
2307 :
2308 : /* It is not profitability to factor out vec_perm with
2309 : constant masks (operand 2). The target might not support it
2310 : and that might be invalid to do as such. Also with constants
2311 : masks, the number of elements of the mask type does not need
2312 : to match tne number of elements of other operands and can be
2313 : arbitrary integral vector type so factoring that out can't work.
2314 : Note in the case where one mask is a constant and the other is not,
2315 : the next check for compatiable types will reject the case the
2316 : constant mask has the incompatible type. */
2317 829 : if (arg1_op.code == VEC_PERM_EXPR && opnum == 2
2318 8 : && TREE_CODE (new_arg0) == VECTOR_CST
2319 837 : && TREE_CODE (new_arg1) == VECTOR_CST)
2320 : return;
2321 :
2322 825 : if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1)))
2323 : return;
2324 817 : tree new_res = make_ssa_name (TREE_TYPE (new_arg0), NULL);
2325 :
2326 : /* Create the operation stmt if possible and insert it. */
2327 :
2328 817 : gimple_match_op new_op = arg0_op;
2329 817 : new_op.ops[opnum] = new_res;
2330 817 : gimple_seq seq = NULL;
2331 817 : tree result = *res;
2332 817 : result = maybe_push_res_to_seq (&new_op, &seq, result);
2333 :
2334 : /* If we can't create the new statement, release the temp name
2335 : and return back. */
2336 817 : if (!result)
2337 : {
2338 0 : release_ssa_name (new_res);
2339 0 : return;
2340 : }
2341 817 : gsi_insert_seq_before (gsi, seq, GSI_CONTINUE_LINKING);
2342 :
2343 817 : if (dump_file && (dump_flags & TDF_DETAILS))
2344 : {
2345 9 : fprintf (dump_file, "PHI ");
2346 9 : print_generic_expr (dump_file, gimple_phi_result (phi));
2347 9 : fprintf (dump_file,
2348 : " changed to factor operation out from COND_EXPR.\n");
2349 9 : fprintf (dump_file, "New stmt with OPERATION that defines ");
2350 9 : print_generic_expr (dump_file, result);
2351 9 : fprintf (dump_file, ".\n");
2352 : }
2353 :
2354 : /* Remove the old operation(s) that has single use. */
2355 817 : gimple_stmt_iterator gsi_for_def;
2356 :
2357 817 : gsi_for_def = gsi_for_stmt (arg0_def_stmt);
2358 817 : gsi_remove (&gsi_for_def, true);
2359 817 : release_defs (arg0_def_stmt);
2360 817 : gsi_for_def = gsi_for_stmt (arg1_def_stmt);
2361 817 : gsi_remove (&gsi_for_def, true);
2362 817 : release_defs (arg1_def_stmt);
2363 :
2364 : /* Update the arguments and try again. */
2365 817 : *arg0 = new_arg0;
2366 817 : *arg1 = new_arg1;
2367 817 : *res = new_res;
2368 :
2369 : /* Update the phi node too. */
2370 817 : gimple_phi_set_result (phi, new_res);
2371 817 : gimple_phi_arg (phi, 0)->def = new_arg0;
2372 817 : gimple_phi_arg (phi, 1)->def = new_arg1;
2373 817 : update_stmt (phi);
2374 :
2375 817 : repeated = true;
2376 817 : goto again;
2377 : }
2378 :
2379 : /* Create the smallest nested conditional possible. On pre-order we record
2380 : which conditionals are live, and on post-order rewrite the chain by removing
2381 : already active conditions.
2382 :
2383 : As an example we simplify:
2384 :
2385 : _7 = a_10 < 0;
2386 : _21 = a_10 >= 0;
2387 : _22 = a_10 < e_11(D);
2388 : _23 = _21 & _22;
2389 : _ifc__42 = _23 ? t_13 : 0;
2390 : t_6 = _7 ? 1 : _ifc__42
2391 :
2392 : into
2393 :
2394 : _7 = a_10 < 0;
2395 : _22 = a_10 < e_11(D);
2396 : _ifc__42 = _22 ? t_13 : 0;
2397 : t_6 = _7 ? 1 : _ifc__42;
2398 :
2399 : which produces better code. */
2400 :
2401 : static tree
2402 6372 : gen_phi_nest_statement (gphi *phi, gimple_stmt_iterator *gsi,
2403 : scalar_cond_masked_set_type &cond_set, tree type,
2404 : gimple **res_stmt, tree lhs0,
2405 : vec<struct ifcvt_arg_entry> &args, unsigned idx)
2406 : {
2407 12744 : if (idx == args.length ())
2408 2127 : return args[idx - 1].arg;
2409 :
2410 4245 : bool invert;
2411 4245 : tree cond = gen_phi_arg_condition (phi, args[idx - 1], gsi, cond_set,
2412 : &invert);
2413 4245 : tree arg1 = gen_phi_nest_statement (phi, gsi, cond_set, type, res_stmt, lhs0,
2414 : args, idx + 1);
2415 :
2416 4245 : unsigned prev = idx;
2417 4245 : unsigned curr = prev - 1;
2418 4245 : tree arg0 = args[curr].arg;
2419 4245 : tree rhs, lhs;
2420 4245 : if (idx > 1)
2421 2118 : lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2422 : else
2423 : lhs = lhs0;
2424 :
2425 4245 : if (invert)
2426 84 : rhs = fold_build_cond_expr (type, unshare_expr (cond),
2427 : arg1, arg0);
2428 : else
2429 4161 : rhs = fold_build_cond_expr (type, unshare_expr (cond),
2430 : arg0, arg1);
2431 4245 : gassign *new_stmt = gimple_build_assign (lhs, rhs);
2432 4245 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2433 4245 : update_stmt (new_stmt);
2434 4245 : *res_stmt = new_stmt;
2435 4245 : return lhs;
2436 : }
2437 :
2438 : /* When flattening a PHI node we have a choice of which conditions to test to
2439 : for all the paths from the start of the dominator block of the BB with the
2440 : PHI node. If the PHI node has X arguments we have to only test X - 1
2441 : conditions as the last one is implicit. It does matter which conditions we
2442 : test first. We should test the shortest condition first (distance here is
2443 : measures in the number of logical operators in the condition) and the
2444 : longest one last. This allows us to skip testing the most expensive
2445 : condition. To accomplish this we need to sort the conditions. P1 and P2
2446 : are sorted first based on the number of logical operations (num_compares)
2447 : and then by how often they occur in the PHI node. */
2448 :
2449 : static int
2450 36337 : cmp_arg_entry (const void *p1, const void *p2, void * /* data. */)
2451 : {
2452 36337 : const ifcvt_arg_entry sval1 = *(const ifcvt_arg_entry *)p1;
2453 36337 : const ifcvt_arg_entry sval2 = *(const ifcvt_arg_entry *)p2;
2454 :
2455 36337 : if (sval1.num_compares < sval2.num_compares)
2456 : return -1;
2457 12342 : else if (sval1.num_compares > sval2.num_compares)
2458 : return 1;
2459 :
2460 625 : if (sval1.occurs < sval2.occurs)
2461 : return -1;
2462 625 : else if (sval1.occurs > sval2.occurs)
2463 0 : return 1;
2464 :
2465 : return 0;
2466 : }
2467 :
2468 : /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
2469 : This routine can handle PHI nodes with more than two arguments.
2470 :
2471 : For example,
2472 : S1: A = PHI <x1(1), x2(5)>
2473 : is converted into,
2474 : S2: A = cond ? x1 : x2;
2475 :
2476 : The generated code is inserted at GSI that points to the top of
2477 : basic block's statement list.
2478 : If PHI node has more than two arguments a chain of conditional
2479 : expression is produced.
2480 : LOOP_VERSIONED should be true if we know that the loop was versioned for
2481 : vectorization. */
2482 :
2483 :
2484 : static void
2485 52286 : predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
2486 : {
2487 52286 : gimple *new_stmt = NULL, *reduc, *nop_reduc;
2488 52286 : tree rhs, res, arg0, arg1, op0, op1, scev;
2489 52286 : tree cond;
2490 52286 : unsigned int index0;
2491 52286 : edge e;
2492 52286 : basic_block bb;
2493 52286 : unsigned int i;
2494 52286 : bool has_nop;
2495 :
2496 52286 : res = gimple_phi_result (phi);
2497 104572 : if (virtual_operand_p (res))
2498 46947 : return;
2499 :
2500 52286 : if ((rhs = degenerate_phi_result (phi))
2501 52286 : || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
2502 : res))
2503 52246 : && !chrec_contains_undetermined (scev)
2504 52246 : && scev != res
2505 11 : && (rhs = gimple_phi_arg_def (phi, 0))))
2506 : {
2507 51 : if (dump_file && (dump_flags & TDF_DETAILS))
2508 : {
2509 0 : fprintf (dump_file, "Degenerate phi!\n");
2510 0 : print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
2511 : }
2512 51 : new_stmt = gimple_build_assign (res, rhs);
2513 51 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2514 51 : update_stmt (new_stmt);
2515 51 : return;
2516 : }
2517 :
2518 52235 : bb = gimple_bb (phi);
2519 : /* Keep track of conditionals already seen. */
2520 52235 : scalar_cond_masked_set_type cond_set;
2521 52235 : if (EDGE_COUNT (bb->preds) == 2)
2522 : {
2523 : /* Predicate ordinary PHI node with 2 arguments. */
2524 46896 : edge first_edge, second_edge;
2525 46896 : basic_block true_bb;
2526 46896 : first_edge = EDGE_PRED (bb, 0);
2527 46896 : second_edge = EDGE_PRED (bb, 1);
2528 46896 : cond = bb_predicate (first_edge->src);
2529 46896 : cond_set.add ({ cond, 1 });
2530 46896 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2531 22308 : std::swap (first_edge, second_edge);
2532 46896 : if (EDGE_COUNT (first_edge->src->succs) > 1)
2533 : {
2534 22589 : cond = bb_predicate (second_edge->src);
2535 22589 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2536 9748 : cond = TREE_OPERAND (cond, 0);
2537 : else
2538 : first_edge = second_edge;
2539 : }
2540 : else
2541 24307 : cond = bb_predicate (first_edge->src);
2542 :
2543 : /* Gimplify the condition to a valid cond-expr conditonal operand. */
2544 46896 : cond = gen_simplified_condition (cond, cond_set);
2545 46896 : cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2546 : NULL_TREE, true, GSI_SAME_STMT);
2547 46896 : true_bb = first_edge->src;
2548 46896 : if (EDGE_PRED (bb, 1)->src == true_bb)
2549 : {
2550 35149 : arg0 = gimple_phi_arg_def (phi, 1);
2551 35149 : arg1 = gimple_phi_arg_def (phi, 0);
2552 : }
2553 : else
2554 : {
2555 11747 : arg0 = gimple_phi_arg_def (phi, 0);
2556 11747 : arg1 = gimple_phi_arg_def (phi, 1);
2557 : }
2558 :
2559 : /* Factor out operand if possible. This can only be done easily
2560 : for PHI with 2 elements. */
2561 46896 : factor_out_operators (&res, gsi, &arg0, &arg1, phi);
2562 :
2563 46896 : if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2564 : &op0, &op1, false, &has_nop,
2565 : &nop_reduc))
2566 : {
2567 : /* Convert reduction stmt into vectorizable form. */
2568 8148 : rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2569 4074 : true_bb != gimple_bb (reduc),
2570 : has_nop, nop_reduc,
2571 : loop_versioned);
2572 4074 : redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2573 : }
2574 : else
2575 : /* Build new RHS using selected condition and arguments. */
2576 42822 : rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2577 : arg0, arg1);
2578 46896 : new_stmt = gimple_build_assign (res, rhs);
2579 46896 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2580 46896 : gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2581 46896 : if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2582 : {
2583 1481 : new_stmt = gsi_stmt (new_gsi);
2584 1481 : update_stmt (new_stmt);
2585 : }
2586 :
2587 46896 : if (dump_file && (dump_flags & TDF_DETAILS))
2588 : {
2589 19 : fprintf (dump_file, "new phi replacement stmt\n");
2590 19 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2591 : }
2592 46896 : return;
2593 : }
2594 :
2595 : /* Create hashmap for PHI node which contain vector of argument indexes
2596 : having the same value. */
2597 5339 : bool swap = false;
2598 5339 : hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2599 5339 : unsigned int num_args = gimple_phi_num_args (phi);
2600 : /* Vector of different PHI argument values. */
2601 5339 : auto_vec<ifcvt_arg_entry_t> args;
2602 :
2603 : /* Compute phi_arg_map, determine the list of unique PHI args and the indices
2604 : where they are in the PHI node. The indices will be used to determine
2605 : the conditions to apply and their complexity. */
2606 21621 : for (i = 0; i < num_args; i++)
2607 : {
2608 16282 : tree arg;
2609 :
2610 16282 : arg = gimple_phi_arg_def (phi, i);
2611 16282 : if (!phi_arg_map.get (arg))
2612 12796 : args.safe_push ({ arg, 0, 0, NULL });
2613 16282 : phi_arg_map.get_or_insert (arg).safe_push (i);
2614 : }
2615 :
2616 : /* Determine element with max number of occurrences and complexity. Looking
2617 : at only number of occurrences as a measure for complexity isn't enough as
2618 : all usages can be unique but the comparisons to reach the PHI node differ
2619 : per branch. */
2620 18135 : for (unsigned i = 0; i < args.length (); i++)
2621 : {
2622 12796 : unsigned int len = 0;
2623 12796 : vec<int> *indices = phi_arg_map.get (args[i].arg);
2624 54670 : for (int index : *indices)
2625 : {
2626 16282 : edge e = gimple_phi_arg_edge (phi, index);
2627 16282 : len += get_bb_num_predicate_stmts (e->src);
2628 : }
2629 :
2630 12796 : unsigned occur = indices->length ();
2631 12796 : if (dump_file && (dump_flags & TDF_DETAILS))
2632 7 : fprintf (dump_file, "Ranking %d as len=%d, idx=%d\n", i, len, occur);
2633 12796 : args[i].num_compares = len;
2634 12796 : args[i].occurs = occur;
2635 12796 : args[i].indexes = indices;
2636 : }
2637 :
2638 : /* Sort elements based on rankings ARGS. */
2639 5339 : args.stablesort (cmp_arg_entry, NULL);
2640 :
2641 : /* Handle one special case when number of arguments with different values
2642 : is equal 2 and one argument has the only occurrence. Such PHI can be
2643 : handled as if would have only 2 arguments. */
2644 5339 : if (args.length () == 2
2645 8623 : && args[0].indexes->length () == 1)
2646 : {
2647 3212 : index0 = (*args[0].indexes)[0];
2648 3212 : arg0 = args[0].arg;
2649 3212 : arg1 = args[1].arg;
2650 3212 : e = gimple_phi_arg_edge (phi, index0);
2651 3212 : cond = bb_predicate (e->src);
2652 3212 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2653 : {
2654 39 : swap = true;
2655 39 : cond = TREE_OPERAND (cond, 0);
2656 : }
2657 : /* Gimplify the condition to a valid cond-expr conditonal operand. */
2658 3212 : cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2659 : NULL_TREE, true, GSI_SAME_STMT);
2660 3212 : if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2661 : &op0, &op1, true, &has_nop, &nop_reduc)))
2662 4477 : rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2663 : swap ? arg1 : arg0,
2664 : swap ? arg0 : arg1);
2665 : else
2666 : {
2667 : /* Convert reduction stmt into vectorizable form. */
2668 955 : rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2669 : swap, has_nop, nop_reduc,
2670 : loop_versioned);
2671 955 : redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2672 : }
2673 3212 : new_stmt = gimple_build_assign (res, rhs);
2674 3212 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2675 3212 : update_stmt (new_stmt);
2676 : }
2677 : else
2678 : {
2679 : /* Common case. */
2680 2127 : tree type = TREE_TYPE (gimple_phi_result (phi));
2681 2127 : gen_phi_nest_statement (phi, gsi, cond_set, type, &new_stmt, res,
2682 : args, 1);
2683 : }
2684 :
2685 5339 : if (dump_file && (dump_flags & TDF_DETAILS))
2686 : {
2687 3 : fprintf (dump_file, "new extended phi replacement stmt\n");
2688 3 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2689 : }
2690 52235 : }
2691 :
2692 : /* Replaces in LOOP all the scalar phi nodes other than those in the
2693 : LOOP->header block with conditional modify expressions.
2694 : LOOP_VERSIONED should be true if we know that the loop was versioned for
2695 : vectorization. */
2696 :
2697 : static void
2698 28988 : predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
2699 : {
2700 28988 : basic_block bb;
2701 28988 : unsigned int orig_loop_num_nodes = loop->num_nodes;
2702 28988 : unsigned int i;
2703 :
2704 148358 : for (i = 1; i < orig_loop_num_nodes; i++)
2705 : {
2706 119370 : gphi *phi;
2707 119370 : gimple_stmt_iterator gsi;
2708 119370 : gphi_iterator phi_gsi;
2709 119370 : bb = ifc_bbs[i];
2710 :
2711 119370 : if (bb == loop->header)
2712 85319 : continue;
2713 :
2714 119370 : phi_gsi = gsi_start_phis (bb);
2715 119370 : if (gsi_end_p (phi_gsi))
2716 85319 : continue;
2717 :
2718 34051 : gsi = gsi_after_labels (bb);
2719 88670 : while (!gsi_end_p (phi_gsi))
2720 : {
2721 54619 : phi = phi_gsi.phi ();
2722 109238 : if (virtual_operand_p (gimple_phi_result (phi)))
2723 2333 : gsi_next (&phi_gsi);
2724 : else
2725 : {
2726 52286 : predicate_scalar_phi (phi, &gsi, loop_versioned);
2727 52286 : remove_phi_node (&phi_gsi, false);
2728 : }
2729 : }
2730 : }
2731 28988 : }
2732 :
2733 : /* Insert in each basic block of LOOP the statements produced by the
2734 : gimplification of the predicates. */
2735 :
2736 : static void
2737 28988 : insert_gimplified_predicates (loop_p loop)
2738 : {
2739 28988 : unsigned int i;
2740 :
2741 177346 : for (i = 0; i < loop->num_nodes; i++)
2742 : {
2743 148358 : basic_block bb = ifc_bbs[i];
2744 148358 : gimple_seq stmts;
2745 148358 : if (!is_predicated (bb))
2746 91097 : gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2747 148358 : if (!is_predicated (bb))
2748 : {
2749 : /* Do not insert statements for a basic block that is not
2750 : predicated. Also make sure that the predicate of the
2751 : basic block is set to true. */
2752 91097 : reset_bb_predicate (bb);
2753 91097 : continue;
2754 : }
2755 :
2756 57261 : stmts = bb_predicate_gimplified_stmts (bb);
2757 57261 : if (stmts)
2758 : {
2759 56881 : if (need_to_predicate)
2760 : {
2761 : /* Insert the predicate of the BB just after the label,
2762 : as the if-conversion of memory writes will use this
2763 : predicate. */
2764 5840 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
2765 5840 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2766 : }
2767 : else
2768 : {
2769 : /* Insert the predicate of the BB at the end of the BB
2770 : as this would reduce the register pressure: the only
2771 : use of this predicate will be in successor BBs. */
2772 51041 : gimple_stmt_iterator gsi = gsi_last_bb (bb);
2773 :
2774 51041 : if (gsi_end_p (gsi)
2775 51041 : || stmt_ends_bb_p (gsi_stmt (gsi)))
2776 29395 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2777 : else
2778 21646 : gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2779 : }
2780 :
2781 : /* Once the sequence is code generated, set it to NULL. */
2782 56881 : set_bb_predicate_gimplified_stmts (bb, NULL, true);
2783 : }
2784 : }
2785 28988 : }
2786 :
2787 : /* Helper function for predicate_statements. Returns index of existent
2788 : mask if it was created for given SIZE and -1 otherwise. */
2789 :
2790 : static int
2791 937 : mask_exists (int size, const vec<int> &vec)
2792 : {
2793 937 : unsigned int ix;
2794 937 : int v;
2795 1026 : FOR_EACH_VEC_ELT (vec, ix, v)
2796 965 : if (v == size)
2797 876 : return (int) ix;
2798 : return -1;
2799 : }
2800 :
2801 : /* Helper function for predicate_statements. STMT is a memory read or
2802 : write and it needs to be predicated by MASK. Return a statement
2803 : that does so. */
2804 :
2805 : static gimple *
2806 2015 : predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2807 : {
2808 2015 : gcall *new_stmt;
2809 :
2810 2015 : tree lhs = gimple_assign_lhs (stmt);
2811 2015 : tree rhs = gimple_assign_rhs1 (stmt);
2812 2015 : tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2813 2015 : mark_addressable (ref);
2814 2015 : tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2815 : true, NULL_TREE, true, GSI_SAME_STMT);
2816 2015 : tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2817 2015 : get_object_alignment (ref));
2818 : /* Copy points-to info if possible. */
2819 2015 : if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2820 519 : copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2821 : ref);
2822 2015 : if (TREE_CODE (lhs) == SSA_NAME)
2823 : {
2824 : /* Get a zero else value. This might not be what a target actually uses
2825 : but we cannot be sure about which vector mode the vectorizer will
2826 : choose. Therefore, leave the decision whether we need to force the
2827 : inactive elements to zero to the vectorizer. */
2828 1206 : tree els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
2829 1206 : TREE_TYPE (lhs));
2830 :
2831 1206 : new_stmt
2832 1206 : = gimple_build_call_internal (IFN_MASK_LOAD, 4, addr,
2833 : ptr, mask, els);
2834 :
2835 1206 : gimple_call_set_lhs (new_stmt, lhs);
2836 2412 : gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2837 : }
2838 : else
2839 : {
2840 809 : new_stmt
2841 809 : = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2842 : mask, rhs);
2843 809 : gimple_move_vops (new_stmt, stmt);
2844 : }
2845 2015 : gimple_call_set_nothrow (new_stmt, true);
2846 2015 : return new_stmt;
2847 : }
2848 :
2849 : /* STMT uses OP_LHS. Check whether it is equivalent to:
2850 :
2851 : ... = OP_MASK ? OP_LHS : X;
2852 :
2853 : Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2854 : known to have value OP_COND. */
2855 :
2856 : static tree
2857 820 : check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2858 : tree op_lhs)
2859 : {
2860 1505 : gassign *assign = dyn_cast <gassign *> (stmt);
2861 1028 : if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2862 : return NULL_TREE;
2863 :
2864 203 : tree use_cond = gimple_assign_rhs1 (assign);
2865 203 : tree if_true = gimple_assign_rhs2 (assign);
2866 203 : tree if_false = gimple_assign_rhs3 (assign);
2867 :
2868 72 : if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2869 203 : && if_true == op_lhs)
2870 : return if_false;
2871 :
2872 72 : if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2873 : return if_true;
2874 :
2875 : return NULL_TREE;
2876 : }
2877 :
2878 : /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2879 : the set of SSA names defined earlier in STMT's block. */
2880 :
2881 : static bool
2882 131 : value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2883 : tree value)
2884 : {
2885 131 : if (is_gimple_min_invariant (value))
2886 : return true;
2887 :
2888 95 : if (TREE_CODE (value) == SSA_NAME)
2889 : {
2890 95 : if (SSA_NAME_IS_DEFAULT_DEF (value))
2891 : return true;
2892 :
2893 95 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2894 95 : basic_block use_bb = gimple_bb (stmt);
2895 95 : return (def_bb == use_bb
2896 95 : ? ssa_names->contains (value)
2897 95 : : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2898 : }
2899 :
2900 : return false;
2901 : }
2902 :
2903 : /* Helper function for predicate_statements. STMT is a potentially-trapping
2904 : arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2905 : has value COND. Return a statement that does so. SSA_NAMES is the set of
2906 : SSA names defined earlier in STMT's block. */
2907 :
2908 : static gimple *
2909 615 : predicate_rhs_code (gimple *stmt, tree mask, tree cond,
2910 : hash_set<tree_ssa_name_hash> *ssa_names)
2911 : {
2912 615 : internal_fn cond_fn;
2913 615 : if (is_gimple_assign (stmt))
2914 : {
2915 507 : tree_code code = gimple_assign_rhs_code (stmt);
2916 507 : cond_fn = get_conditional_internal_fn (code);
2917 : }
2918 108 : else if (tree callee = gimple_call_fndecl (stmt))
2919 : {
2920 108 : auto ifn = associated_internal_fn (callee);
2921 108 : cond_fn = get_conditional_internal_fn (ifn);
2922 : }
2923 : else
2924 : return NULL;
2925 :
2926 615 : if (cond_fn == IFN_LAST)
2927 : {
2928 0 : gcc_assert (!gimple_could_trap_p (stmt));
2929 : return NULL;
2930 : }
2931 :
2932 615 : tree lhs = gimple_get_lhs (stmt);
2933 615 : unsigned int nops = gimple_num_args (stmt) + 1;
2934 :
2935 : /* Construct the arguments to the conditional internal function. */
2936 615 : auto_vec<tree, 8> args;
2937 615 : args.safe_grow (nops + 1, true);
2938 615 : args[0] = mask;
2939 1953 : for (unsigned int i = 0; i < nops - 1; ++i)
2940 1338 : args[i+1] = gimple_arg (stmt, i);
2941 615 : args[nops] = NULL_TREE;
2942 :
2943 : /* Look for uses of the result to see whether they are COND_EXPRs that can
2944 : be folded into the conditional call. */
2945 615 : imm_use_iterator imm_iter;
2946 615 : gimple *use_stmt;
2947 2050 : FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2948 : {
2949 820 : tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2950 820 : if (new_else && value_available_p (stmt, ssa_names, new_else))
2951 : {
2952 110 : if (!args[nops])
2953 110 : args[nops] = new_else;
2954 110 : if (operand_equal_p (new_else, args[nops], 0))
2955 : {
2956 : /* We have:
2957 :
2958 : LHS = IFN_COND (MASK, ..., ELSE);
2959 : X = MASK ? LHS : ELSE;
2960 :
2961 : which makes X equivalent to LHS. */
2962 110 : tree use_lhs = gimple_assign_lhs (use_stmt);
2963 110 : redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2964 : }
2965 : }
2966 615 : }
2967 615 : if (!args[nops])
2968 1010 : args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2969 505 : nops - 1, &args[1]);
2970 :
2971 : /* Create and insert the call. */
2972 615 : gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2973 615 : gimple_call_set_lhs (new_stmt, lhs);
2974 615 : gimple_call_set_nothrow (new_stmt, true);
2975 :
2976 615 : return new_stmt;
2977 615 : }
2978 :
2979 : /* Predicate each write to memory in LOOP.
2980 :
2981 : This function transforms control flow constructs containing memory
2982 : writes of the form:
2983 :
2984 : | for (i = 0; i < N; i++)
2985 : | if (cond)
2986 : | A[i] = expr;
2987 :
2988 : into the following form that does not contain control flow:
2989 :
2990 : | for (i = 0; i < N; i++)
2991 : | A[i] = cond ? expr : A[i];
2992 :
2993 : The original CFG looks like this:
2994 :
2995 : | bb_0
2996 : | i = 0
2997 : | end_bb_0
2998 : |
2999 : | bb_1
3000 : | if (i < N) goto bb_5 else goto bb_2
3001 : | end_bb_1
3002 : |
3003 : | bb_2
3004 : | cond = some_computation;
3005 : | if (cond) goto bb_3 else goto bb_4
3006 : | end_bb_2
3007 : |
3008 : | bb_3
3009 : | A[i] = expr;
3010 : | goto bb_4
3011 : | end_bb_3
3012 : |
3013 : | bb_4
3014 : | goto bb_1
3015 : | end_bb_4
3016 :
3017 : insert_gimplified_predicates inserts the computation of the COND
3018 : expression at the beginning of the destination basic block:
3019 :
3020 : | bb_0
3021 : | i = 0
3022 : | end_bb_0
3023 : |
3024 : | bb_1
3025 : | if (i < N) goto bb_5 else goto bb_2
3026 : | end_bb_1
3027 : |
3028 : | bb_2
3029 : | cond = some_computation;
3030 : | if (cond) goto bb_3 else goto bb_4
3031 : | end_bb_2
3032 : |
3033 : | bb_3
3034 : | cond = some_computation;
3035 : | A[i] = expr;
3036 : | goto bb_4
3037 : | end_bb_3
3038 : |
3039 : | bb_4
3040 : | goto bb_1
3041 : | end_bb_4
3042 :
3043 : predicate_statements is then predicating the memory write as follows:
3044 :
3045 : | bb_0
3046 : | i = 0
3047 : | end_bb_0
3048 : |
3049 : | bb_1
3050 : | if (i < N) goto bb_5 else goto bb_2
3051 : | end_bb_1
3052 : |
3053 : | bb_2
3054 : | if (cond) goto bb_3 else goto bb_4
3055 : | end_bb_2
3056 : |
3057 : | bb_3
3058 : | cond = some_computation;
3059 : | A[i] = cond ? expr : A[i];
3060 : | goto bb_4
3061 : | end_bb_3
3062 : |
3063 : | bb_4
3064 : | goto bb_1
3065 : | end_bb_4
3066 :
3067 : and finally combine_blocks removes the basic block boundaries making
3068 : the loop vectorizable:
3069 :
3070 : | bb_0
3071 : | i = 0
3072 : | if (i < N) goto bb_5 else goto bb_1
3073 : | end_bb_0
3074 : |
3075 : | bb_1
3076 : | cond = some_computation;
3077 : | A[i] = cond ? expr : A[i];
3078 : | if (i < N) goto bb_5 else goto bb_4
3079 : | end_bb_1
3080 : |
3081 : | bb_4
3082 : | goto bb_1
3083 : | end_bb_4
3084 : */
3085 :
3086 : static void
3087 11960 : predicate_statements (loop_p loop)
3088 : {
3089 11960 : unsigned int i, orig_loop_num_nodes = loop->num_nodes;
3090 11960 : auto_vec<int, 1> vect_sizes;
3091 11960 : auto_vec<tree, 1> vect_masks;
3092 11960 : hash_set<tree_ssa_name_hash> ssa_names;
3093 :
3094 62735 : for (i = 1; i < orig_loop_num_nodes; i++)
3095 : {
3096 50775 : gimple_stmt_iterator gsi;
3097 50775 : basic_block bb = ifc_bbs[i];
3098 50775 : tree cond = bb_predicate (bb);
3099 50775 : bool swap;
3100 50775 : int index;
3101 :
3102 50775 : if (is_true_predicate (cond))
3103 25241 : continue;
3104 :
3105 25534 : swap = false;
3106 25534 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
3107 : {
3108 9580 : swap = true;
3109 9580 : cond = TREE_OPERAND (cond, 0);
3110 : }
3111 :
3112 25534 : vect_sizes.truncate (0);
3113 25534 : vect_masks.truncate (0);
3114 :
3115 187824 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3116 : {
3117 136756 : gimple *stmt = gsi_stmt (gsi);
3118 136756 : if (!is_gimple_assign (stmt)
3119 136756 : && !gimple_call_builtin_p (stmt))
3120 : ;
3121 87409 : else if (is_false_predicate (cond)
3122 87409 : && gimple_vdef (stmt))
3123 : {
3124 0 : unlink_stmt_vdef (stmt);
3125 0 : gsi_remove (&gsi, true);
3126 0 : release_defs (stmt);
3127 0 : continue;
3128 : }
3129 87409 : else if (gimple_plf (stmt, GF_PLF_2)
3130 87409 : && (is_gimple_assign (stmt)
3131 108 : || (gimple_call_builtin_p (stmt)
3132 108 : && !if_convertible_simdclone_stmt_p (stmt))))
3133 : {
3134 2630 : tree lhs = gimple_get_lhs (stmt);
3135 : /* ?? Assume that calls without an LHS are not data processing
3136 : and so no issues with traps. */
3137 2630 : if (!lhs)
3138 0 : continue;
3139 2630 : tree mask;
3140 2630 : gimple *new_stmt;
3141 2630 : gimple_seq stmts = NULL;
3142 2630 : machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
3143 : /* We checked before setting GF_PLF_2 that an equivalent
3144 : integer mode exists. */
3145 2630 : int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
3146 2630 : if (!vect_sizes.is_empty ()
3147 937 : && (index = mask_exists (bitsize, vect_sizes)) != -1)
3148 : /* Use created mask. */
3149 876 : mask = vect_masks[index];
3150 : else
3151 : {
3152 1754 : if (COMPARISON_CLASS_P (cond))
3153 0 : mask = gimple_build (&stmts, TREE_CODE (cond),
3154 : boolean_type_node,
3155 0 : TREE_OPERAND (cond, 0),
3156 0 : TREE_OPERAND (cond, 1));
3157 : else
3158 1754 : mask = cond;
3159 :
3160 1754 : if (swap)
3161 : {
3162 455 : tree true_val
3163 455 : = constant_boolean_node (true, TREE_TYPE (mask));
3164 455 : mask = gimple_build (&stmts, BIT_XOR_EXPR,
3165 455 : TREE_TYPE (mask), mask, true_val);
3166 : }
3167 1754 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
3168 :
3169 : /* Save mask and its size for further use. */
3170 1754 : vect_sizes.safe_push (bitsize);
3171 1754 : vect_masks.safe_push (mask);
3172 : }
3173 2630 : if (gimple_assign_single_p (stmt))
3174 2015 : new_stmt = predicate_load_or_store (&gsi,
3175 : as_a <gassign *> (stmt),
3176 : mask);
3177 : else
3178 615 : new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
3179 :
3180 2630 : if (new_stmt)
3181 2630 : gsi_replace (&gsi, new_stmt, true);
3182 : }
3183 84779 : else if (gimple_needing_rewrite_undefined (stmt))
3184 17922 : rewrite_to_defined_unconditional (&gsi);
3185 133714 : else if (gimple_vdef (stmt))
3186 : {
3187 1564 : tree lhs = gimple_assign_lhs (stmt);
3188 1564 : tree rhs = gimple_assign_rhs1 (stmt);
3189 1564 : tree type = TREE_TYPE (lhs);
3190 :
3191 1564 : lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
3192 1564 : rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
3193 1564 : if (swap)
3194 559 : std::swap (lhs, rhs);
3195 1564 : cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
3196 : NULL_TREE, true, GSI_SAME_STMT);
3197 1564 : rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
3198 1564 : gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
3199 1564 : update_stmt (stmt);
3200 : }
3201 :
3202 136756 : if (gimple_plf (gsi_stmt (gsi), GF_PLF_2)
3203 136756 : && is_gimple_call (gsi_stmt (gsi)))
3204 : {
3205 : /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
3206 : This will cause the vectorizer to match the "in branch"
3207 : clone variants, and serves to build the mask vector
3208 : in a natural way. */
3209 999 : tree mask = cond;
3210 999 : gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
3211 999 : tree orig_fn = gimple_call_fn (call);
3212 999 : int orig_nargs = gimple_call_num_args (call);
3213 999 : auto_vec<tree> args;
3214 999 : args.safe_push (orig_fn);
3215 2013 : for (int i = 0; i < orig_nargs; i++)
3216 1014 : args.safe_push (gimple_call_arg (call, i));
3217 : /* If `swap', we invert the mask used for the if branch for use
3218 : when masking the function call. */
3219 999 : if (swap)
3220 : {
3221 948 : gimple_seq stmts = NULL;
3222 948 : tree true_val
3223 948 : = constant_boolean_node (true, TREE_TYPE (mask));
3224 948 : mask = gimple_build (&stmts, BIT_XOR_EXPR,
3225 948 : TREE_TYPE (mask), mask, true_val);
3226 948 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
3227 : }
3228 999 : args.safe_push (mask);
3229 :
3230 : /* Replace the call with a IFN_MASK_CALL that has the extra
3231 : condition parameter. */
3232 999 : gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
3233 : args);
3234 999 : gimple_call_set_lhs (new_call, gimple_call_lhs (call));
3235 999 : gsi_replace (&gsi, new_call, true);
3236 999 : }
3237 :
3238 136756 : tree lhs = gimple_get_lhs (gsi_stmt (gsi));
3239 136756 : if (lhs && TREE_CODE (lhs) == SSA_NAME)
3240 85115 : ssa_names.add (lhs);
3241 136756 : gsi_next (&gsi);
3242 : }
3243 51041 : ssa_names.empty ();
3244 : }
3245 11960 : }
3246 :
3247 : /* Remove all GIMPLE_CONDs and GIMPLE_LABELs and GIMPLE_SWITCH of all
3248 : the basic blocks other than the exit and latch of the LOOP. Also
3249 : resets the GIMPLE_DEBUG information. */
3250 :
3251 : static void
3252 28988 : remove_conditions_and_labels (loop_p loop)
3253 : {
3254 28988 : gimple_stmt_iterator gsi;
3255 28988 : unsigned int i;
3256 :
3257 177346 : for (i = 0; i < loop->num_nodes; i++)
3258 : {
3259 148358 : basic_block bb = ifc_bbs[i];
3260 :
3261 148358 : if (bb_with_exit_edge_p (loop, bb)
3262 148358 : || bb == loop->latch)
3263 57976 : continue;
3264 :
3265 730785 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
3266 550021 : switch (gimple_code (gsi_stmt (gsi)))
3267 : {
3268 37072 : case GIMPLE_COND:
3269 37072 : case GIMPLE_LABEL:
3270 37072 : case GIMPLE_SWITCH:
3271 37072 : gsi_remove (&gsi, true);
3272 37072 : break;
3273 :
3274 320850 : case GIMPLE_DEBUG:
3275 : /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
3276 320850 : if (gimple_debug_bind_p (gsi_stmt (gsi)))
3277 : {
3278 247107 : gimple_debug_bind_reset_value (gsi_stmt (gsi));
3279 247107 : update_stmt (gsi_stmt (gsi));
3280 : }
3281 320850 : gsi_next (&gsi);
3282 320850 : break;
3283 :
3284 192099 : default:
3285 192099 : gsi_next (&gsi);
3286 : }
3287 : }
3288 28988 : }
3289 :
3290 : /* Combine all the basic blocks from LOOP into one or two super basic
3291 : blocks. Replace PHI nodes with conditional modify expressions.
3292 : LOOP_VERSIONED should be true if we know that the loop was versioned for
3293 : vectorization. */
3294 :
3295 : static void
3296 28988 : combine_blocks (class loop *loop, bool loop_versioned)
3297 : {
3298 28988 : basic_block bb, exit_bb, merge_target_bb;
3299 28988 : unsigned int orig_loop_num_nodes = loop->num_nodes;
3300 28988 : unsigned int i;
3301 28988 : edge e;
3302 28988 : edge_iterator ei;
3303 :
3304 : /* Reset flow-sensitive info before predicating stmts or PHIs we
3305 : might fold. */
3306 177346 : for (i = 0; i < orig_loop_num_nodes; i++)
3307 : {
3308 148358 : bb = ifc_bbs[i];
3309 148358 : if (is_predicated (bb))
3310 : {
3311 57261 : for (auto gsi = gsi_start_phis (bb);
3312 58660 : !gsi_end_p (gsi); gsi_next (&gsi))
3313 1399 : reset_flow_sensitive_info (gimple_phi_result (*gsi));
3314 224468 : for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3315 : {
3316 109946 : gimple *stmt = gsi_stmt (gsi);
3317 109946 : ssa_op_iter i;
3318 109946 : tree op;
3319 159806 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
3320 49860 : reset_flow_sensitive_info (op);
3321 : }
3322 : }
3323 : }
3324 :
3325 28988 : remove_conditions_and_labels (loop);
3326 28988 : insert_gimplified_predicates (loop);
3327 28988 : predicate_all_scalar_phis (loop, loop_versioned);
3328 :
3329 28988 : if (need_to_predicate || need_to_rewrite_undefined)
3330 11960 : predicate_statements (loop);
3331 :
3332 : /* Merge basic blocks. */
3333 28988 : exit_bb = single_exit (loop)->src;
3334 28988 : gcc_assert (exit_bb != loop->latch);
3335 177346 : for (i = 0; i < orig_loop_num_nodes; i++)
3336 : {
3337 148358 : bb = ifc_bbs[i];
3338 148358 : free_bb_predicate (bb);
3339 : }
3340 :
3341 28988 : merge_target_bb = loop->header;
3342 :
3343 : /* Get at the virtual def valid for uses starting at the first block
3344 : we merge into the header. Without a virtual PHI the loop has the
3345 : same virtual use on all stmts. */
3346 28988 : gphi *vphi = get_virtual_phi (loop->header);
3347 28988 : tree last_vdef = NULL_TREE;
3348 28988 : if (vphi)
3349 : {
3350 17280 : last_vdef = gimple_phi_result (vphi);
3351 34560 : for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
3352 253721 : ! gsi_end_p (gsi); gsi_next (&gsi))
3353 333296 : if (gimple_vdef (gsi_stmt (gsi)))
3354 5076 : last_vdef = gimple_vdef (gsi_stmt (gsi));
3355 : }
3356 148358 : for (i = 1; i < orig_loop_num_nodes; i++)
3357 : {
3358 119370 : gimple_stmt_iterator gsi;
3359 119370 : gimple_stmt_iterator last;
3360 :
3361 119370 : bb = ifc_bbs[i];
3362 :
3363 119370 : if (bb == exit_bb || bb == loop->latch)
3364 57976 : continue;
3365 :
3366 : /* We release virtual PHIs late because we have to propagate them
3367 : out using the current VUSE. The def might be the one used
3368 : after the loop. */
3369 61394 : vphi = get_virtual_phi (bb);
3370 61394 : if (vphi)
3371 : {
3372 : /* When there's just loads inside the loop a stray virtual
3373 : PHI merging the uses can appear, update last_vdef from
3374 : it. */
3375 564 : if (!last_vdef)
3376 0 : last_vdef = gimple_phi_arg_def (vphi, 0);
3377 564 : imm_use_iterator iter;
3378 564 : use_operand_p use_p;
3379 564 : gimple *use_stmt;
3380 2537 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
3381 : {
3382 4229 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3383 1410 : SET_USE (use_p, last_vdef);
3384 564 : }
3385 564 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
3386 0 : SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
3387 564 : gsi = gsi_for_stmt (vphi);
3388 564 : remove_phi_node (&gsi, true);
3389 : }
3390 :
3391 : /* Make stmts member of loop->header and clear range info from all stmts
3392 : in BB which is now no longer executed conditional on a predicate we
3393 : could have derived it from. */
3394 378350 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3395 : {
3396 255562 : gimple *stmt = gsi_stmt (gsi);
3397 255562 : gimple_set_bb (stmt, merge_target_bb);
3398 : /* Update virtual operands. */
3399 255562 : if (last_vdef)
3400 : {
3401 196078 : use_operand_p use_p = ssa_vuse_operand (stmt);
3402 14612 : if (use_p
3403 14612 : && USE_FROM_PTR (use_p) != last_vdef)
3404 719 : SET_USE (use_p, last_vdef);
3405 597029 : if (gimple_vdef (stmt))
3406 255562 : last_vdef = gimple_vdef (stmt);
3407 : }
3408 : else
3409 : /* If this is the first load we arrive at update last_vdef
3410 : so we handle stray PHIs correctly. */
3411 295860 : last_vdef = gimple_vuse (stmt);
3412 : }
3413 :
3414 : /* Update stmt list. */
3415 61394 : last = gsi_last_bb (merge_target_bb);
3416 122788 : gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
3417 61394 : set_bb_seq (bb, NULL);
3418 : }
3419 :
3420 : /* Fixup virtual operands in the exit block. */
3421 28988 : if (exit_bb
3422 28988 : && exit_bb != loop->header)
3423 : {
3424 : /* We release virtual PHIs late because we have to propagate them
3425 : out using the current VUSE. The def might be the one used
3426 : after the loop. */
3427 28988 : vphi = get_virtual_phi (exit_bb);
3428 28988 : if (vphi)
3429 : {
3430 : /* When there's just loads inside the loop a stray virtual
3431 : PHI merging the uses can appear, update last_vdef from
3432 : it. */
3433 1769 : if (!last_vdef)
3434 0 : last_vdef = gimple_phi_arg_def (vphi, 0);
3435 1769 : imm_use_iterator iter;
3436 1769 : use_operand_p use_p;
3437 1769 : gimple *use_stmt;
3438 7097 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
3439 : {
3440 10677 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3441 3559 : SET_USE (use_p, last_vdef);
3442 1769 : }
3443 1769 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
3444 0 : SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
3445 1769 : gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
3446 1769 : remove_phi_node (&gsi, true);
3447 : }
3448 : }
3449 :
3450 : /* Now remove all the edges in the loop, except for those from the exit
3451 : block and delete the blocks we elided. */
3452 148358 : for (i = 1; i < orig_loop_num_nodes; i++)
3453 : {
3454 119370 : bb = ifc_bbs[i];
3455 :
3456 275437 : for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
3457 : {
3458 156067 : if (e->src == exit_bb)
3459 28988 : ei_next (&ei);
3460 : else
3461 127079 : remove_edge (e);
3462 : }
3463 : }
3464 148358 : for (i = 1; i < orig_loop_num_nodes; i++)
3465 : {
3466 119370 : bb = ifc_bbs[i];
3467 :
3468 119370 : if (bb == exit_bb || bb == loop->latch)
3469 57976 : continue;
3470 :
3471 61394 : delete_basic_block (bb);
3472 : }
3473 :
3474 : /* Re-connect the exit block. */
3475 28988 : if (exit_bb != NULL)
3476 : {
3477 28988 : if (exit_bb != loop->header)
3478 : {
3479 : /* Connect this node to loop header. */
3480 28988 : make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
3481 28988 : set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
3482 : }
3483 :
3484 : /* Redirect non-exit edges to loop->latch. */
3485 86964 : FOR_EACH_EDGE (e, ei, exit_bb->succs)
3486 : {
3487 57976 : if (!loop_exit_edge_p (loop, e))
3488 28988 : redirect_edge_and_branch (e, loop->latch);
3489 : }
3490 28988 : set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
3491 : }
3492 : else
3493 : {
3494 : /* If the loop does not have an exit, reconnect header and latch. */
3495 0 : make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
3496 0 : set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
3497 : }
3498 :
3499 : /* If possible, merge loop header to the block with the exit edge.
3500 : This reduces the number of basic blocks to two, to please the
3501 : vectorizer that handles only loops with two nodes. */
3502 28988 : if (exit_bb
3503 28988 : && exit_bb != loop->header)
3504 : {
3505 28988 : if (can_merge_blocks_p (loop->header, exit_bb))
3506 28986 : merge_blocks (loop->header, exit_bb);
3507 : }
3508 :
3509 28988 : free (ifc_bbs);
3510 28988 : ifc_bbs = NULL;
3511 28988 : }
3512 :
3513 : /* Version LOOP before if-converting it; the original loop
3514 : will be if-converted, the new copy of the loop will not,
3515 : and the LOOP_VECTORIZED internal call will be guarding which
3516 : loop to execute. The vectorizer pass will fold this
3517 : internal call into either true or false.
3518 :
3519 : Note that this function intentionally invalidates profile. Both edges
3520 : out of LOOP_VECTORIZED must have 100% probability so the profile remains
3521 : consistent after the condition is folded in the vectorizer. */
3522 :
3523 : static class loop *
3524 29194 : version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
3525 : {
3526 29194 : basic_block cond_bb;
3527 29194 : tree cond = make_ssa_name (boolean_type_node);
3528 29194 : class loop *new_loop;
3529 29194 : gimple *g;
3530 29194 : gimple_stmt_iterator gsi;
3531 29194 : unsigned int save_length = 0;
3532 :
3533 29194 : g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
3534 29194 : build_int_cst (integer_type_node, loop->num),
3535 : integer_zero_node);
3536 29194 : gimple_call_set_lhs (g, cond);
3537 :
3538 29194 : void **saved_preds = NULL;
3539 29194 : if (any_complicated_phi || need_to_predicate)
3540 : {
3541 : /* Save BB->aux around loop_version as that uses the same field. */
3542 3534 : save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
3543 3534 : saved_preds = XALLOCAVEC (void *, save_length);
3544 26512 : for (unsigned i = 0; i < save_length; i++)
3545 22978 : saved_preds[i] = ifc_bbs[i]->aux;
3546 : }
3547 :
3548 29194 : initialize_original_copy_tables ();
3549 : /* At this point we invalidate profile consistency until IFN_LOOP_VECTORIZED
3550 : is re-merged in the vectorizer. */
3551 29194 : new_loop = loop_version (loop, cond, &cond_bb,
3552 : profile_probability::always (),
3553 : profile_probability::always (),
3554 : profile_probability::always (),
3555 : profile_probability::always (), true);
3556 29194 : free_original_copy_tables ();
3557 :
3558 29194 : if (any_complicated_phi || need_to_predicate)
3559 26512 : for (unsigned i = 0; i < save_length; i++)
3560 22978 : ifc_bbs[i]->aux = saved_preds[i];
3561 :
3562 29194 : if (new_loop == NULL)
3563 : return NULL;
3564 :
3565 29194 : new_loop->dont_vectorize = true;
3566 29194 : new_loop->force_vectorize = false;
3567 29194 : gsi = gsi_last_bb (cond_bb);
3568 29194 : gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
3569 29194 : if (preds)
3570 29194 : preds->safe_push (g);
3571 29194 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3572 29194 : update_ssa (TODO_update_ssa_no_phi);
3573 29194 : return new_loop;
3574 : }
3575 :
3576 : /* Return true when LOOP satisfies the follow conditions that will
3577 : allow it to be recognized by the vectorizer for outer-loop
3578 : vectorization:
3579 : - The loop is not the root node of the loop tree.
3580 : - The loop has exactly one inner loop.
3581 : - The loop has a single exit.
3582 : - The loop header has a single successor, which is the inner
3583 : loop header.
3584 : - Each of the inner and outer loop latches have a single
3585 : predecessor.
3586 : - The loop exit block has a single predecessor, which is the
3587 : inner loop's exit block. */
3588 :
3589 : static bool
3590 29194 : versionable_outer_loop_p (class loop *loop)
3591 : {
3592 29194 : if (!loop_outer (loop)
3593 8110 : || loop->dont_vectorize
3594 7141 : || !loop->inner
3595 7141 : || loop->inner->next
3596 2889 : || !single_exit (loop)
3597 2228 : || !single_succ_p (loop->header)
3598 929 : || single_succ (loop->header) != loop->inner->header
3599 929 : || !single_pred_p (loop->latch)
3600 37304 : || !single_pred_p (loop->inner->latch))
3601 28265 : return false;
3602 :
3603 929 : basic_block outer_exit = single_pred (loop->latch);
3604 929 : basic_block inner_exit = single_pred (loop->inner->latch);
3605 :
3606 30116 : if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
3607 : return false;
3608 :
3609 921 : if (dump_file)
3610 0 : fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
3611 :
3612 : return true;
3613 : }
3614 :
3615 : /* Performs splitting of critical edges. Skip splitting and return false
3616 : if LOOP will not be converted because:
3617 :
3618 : - LOOP is not well formed.
3619 : - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3620 :
3621 : Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3622 :
3623 : static bool
3624 315967 : ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3625 : {
3626 315967 : basic_block *body;
3627 315967 : basic_block bb;
3628 315967 : unsigned int num = loop->num_nodes;
3629 315967 : unsigned int i;
3630 315967 : edge e;
3631 315967 : edge_iterator ei;
3632 315967 : auto_vec<edge> critical_edges;
3633 :
3634 : /* Loop is not well formed. */
3635 315967 : if (loop->inner)
3636 : return false;
3637 :
3638 227181 : body = get_loop_body (loop);
3639 1822332 : for (i = 0; i < num; i++)
3640 : {
3641 1373178 : bb = body[i];
3642 1373178 : if (!aggressive_if_conv
3643 1364936 : && phi_nodes (bb)
3644 1798727 : && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3645 : {
3646 5208 : if (dump_file && (dump_flags & TDF_DETAILS))
3647 0 : fprintf (dump_file,
3648 : "BB %d has complicated PHI with more than %u args.\n",
3649 : bb->index, MAX_PHI_ARG_NUM);
3650 :
3651 5208 : free (body);
3652 5208 : return false;
3653 : }
3654 1367970 : if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3655 772603 : continue;
3656 :
3657 : /* Skip basic blocks not ending with conditional branch. */
3658 1420572 : if (!safe_is_a <gcond *> (*gsi_last_bb (bb))
3659 595367 : && !safe_is_a <gswitch *> (*gsi_last_bb (bb)))
3660 333498 : continue;
3661 :
3662 787289 : FOR_EACH_EDGE (e, ei, bb->succs)
3663 644792 : if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3664 119372 : critical_edges.safe_push (e);
3665 : }
3666 221973 : free (body);
3667 :
3668 432846 : while (critical_edges.length () > 0)
3669 : {
3670 116879 : e = critical_edges.pop ();
3671 : /* Don't split if bb can be predicated along non-critical edge. */
3672 116879 : if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3673 55360 : split_edge (e);
3674 : }
3675 :
3676 : return true;
3677 315967 : }
3678 :
3679 : /* Delete redundant statements produced by predication which prevents
3680 : loop vectorization. */
3681 :
3682 : static void
3683 29243 : ifcvt_local_dce (class loop *loop)
3684 : {
3685 29243 : gimple *stmt;
3686 29243 : gimple *stmt1;
3687 29243 : gimple *phi;
3688 29243 : gimple_stmt_iterator gsi;
3689 29243 : auto_vec<gimple *> worklist;
3690 29243 : enum gimple_code code;
3691 29243 : use_operand_p use_p;
3692 29243 : imm_use_iterator imm_iter;
3693 :
3694 : /* The loop has a single BB only. */
3695 29243 : basic_block bb = loop->header;
3696 29243 : tree latch_vdef = NULL_TREE;
3697 :
3698 29243 : worklist.create (64);
3699 : /* Consider all phi as live statements. */
3700 113792 : for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3701 : {
3702 84549 : phi = gsi_stmt (gsi);
3703 84549 : gimple_set_plf (phi, GF_PLF_2, true);
3704 84549 : worklist.safe_push (phi);
3705 186566 : if (virtual_operand_p (gimple_phi_result (phi)))
3706 17468 : latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3707 : }
3708 : /* Consider load/store statements, CALL and COND as live. */
3709 900024 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3710 : {
3711 841538 : stmt = gsi_stmt (gsi);
3712 841538 : if (is_gimple_debug (stmt))
3713 : {
3714 422989 : gimple_set_plf (stmt, GF_PLF_2, true);
3715 422989 : continue;
3716 : }
3717 418549 : if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3718 : {
3719 77432 : gimple_set_plf (stmt, GF_PLF_2, true);
3720 77432 : worklist.safe_push (stmt);
3721 77432 : continue;
3722 : }
3723 341117 : code = gimple_code (stmt);
3724 341117 : if (code == GIMPLE_COND || code == GIMPLE_CALL || code == GIMPLE_SWITCH)
3725 : {
3726 34723 : gimple_set_plf (stmt, GF_PLF_2, true);
3727 34723 : worklist.safe_push (stmt);
3728 34723 : continue;
3729 : }
3730 306394 : gimple_set_plf (stmt, GF_PLF_2, false);
3731 :
3732 306394 : if (code == GIMPLE_ASSIGN)
3733 : {
3734 306385 : tree lhs = gimple_assign_lhs (stmt);
3735 1022349 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3736 : {
3737 428980 : stmt1 = USE_STMT (use_p);
3738 428980 : if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3739 : {
3740 19401 : gimple_set_plf (stmt, GF_PLF_2, true);
3741 19401 : worklist.safe_push (stmt);
3742 19401 : break;
3743 : }
3744 306385 : }
3745 : }
3746 : }
3747 : /* Propagate liveness through arguments of live stmt. */
3748 515953 : while (worklist.length () > 0)
3749 : {
3750 486710 : ssa_op_iter iter;
3751 486710 : use_operand_p use_p;
3752 486710 : tree use;
3753 :
3754 486710 : stmt = worklist.pop ();
3755 1715352 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3756 : {
3757 741932 : use = USE_FROM_PTR (use_p);
3758 741932 : if (TREE_CODE (use) != SSA_NAME)
3759 44512 : continue;
3760 697420 : stmt1 = SSA_NAME_DEF_STMT (use);
3761 697420 : if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3762 426815 : continue;
3763 270605 : gimple_set_plf (stmt1, GF_PLF_2, true);
3764 270605 : worklist.safe_push (stmt1);
3765 : }
3766 : }
3767 : /* Delete dead statements. */
3768 29243 : gsi = gsi_last_bb (bb);
3769 870781 : while (!gsi_end_p (gsi))
3770 : {
3771 841538 : gimple_stmt_iterator gsiprev = gsi;
3772 841538 : gsi_prev (&gsiprev);
3773 841538 : stmt = gsi_stmt (gsi);
3774 841538 : if (!gimple_has_volatile_ops (stmt)
3775 841397 : && gimple_store_p (stmt)
3776 430815 : && gimple_vdef (stmt))
3777 : {
3778 20757 : tree lhs = gimple_get_lhs (stmt);
3779 20757 : ao_ref write;
3780 20757 : ao_ref_init (&write, lhs);
3781 :
3782 20757 : if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3783 : == DSE_STORE_DEAD)
3784 343 : delete_dead_or_redundant_assignment (&gsi, "dead");
3785 20757 : gsi = gsiprev;
3786 20757 : continue;
3787 20757 : }
3788 :
3789 820781 : if (gimple_plf (stmt, GF_PLF_2))
3790 : {
3791 804393 : gsi = gsiprev;
3792 804393 : continue;
3793 : }
3794 16388 : if (dump_file && (dump_flags & TDF_DETAILS))
3795 : {
3796 16 : fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3797 16 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3798 : }
3799 16388 : gsi_remove (&gsi, true);
3800 16388 : release_defs (stmt);
3801 16388 : gsi = gsiprev;
3802 : }
3803 29243 : }
3804 :
3805 : /* Return true if VALUE is already available on edge PE. */
3806 :
3807 : static bool
3808 263677 : ifcvt_available_on_edge_p (edge pe, tree value)
3809 : {
3810 263677 : if (is_gimple_min_invariant (value))
3811 : return true;
3812 :
3813 257509 : if (TREE_CODE (value) == SSA_NAME)
3814 : {
3815 256356 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3816 256356 : if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3817 26942 : return true;
3818 : }
3819 :
3820 : return false;
3821 : }
3822 :
3823 : /* Return true if STMT can be hoisted from if-converted loop LOOP to
3824 : edge PE. */
3825 :
3826 : static bool
3827 824807 : ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3828 : {
3829 824807 : if (auto *call = dyn_cast<gcall *> (stmt))
3830 : {
3831 5486 : if (gimple_call_internal_p (call)
3832 5486 : && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3833 : return false;
3834 : }
3835 1187649 : else if (auto *assign = dyn_cast<gassign *> (stmt))
3836 : {
3837 445345 : if (gimple_assign_rhs_code (assign) == COND_EXPR)
3838 : return false;
3839 : }
3840 : else
3841 : return false;
3842 :
3843 324044 : if (gimple_has_side_effects (stmt)
3844 322824 : || gimple_could_trap_p (stmt)
3845 242545 : || stmt_could_throw_p (cfun, stmt)
3846 242543 : || gimple_vdef (stmt)
3847 565927 : || gimple_vuse (stmt))
3848 83212 : return false;
3849 :
3850 240832 : int num_args = gimple_num_args (stmt);
3851 240832 : if (pe != loop_preheader_edge (loop))
3852 : {
3853 267658 : for (int i = 0; i < num_args; ++i)
3854 263677 : if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3855 : return false;
3856 : }
3857 : else
3858 : {
3859 8007 : for (int i = 0; i < num_args; ++i)
3860 7737 : if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3861 : return false;
3862 : }
3863 :
3864 : return true;
3865 : }
3866 :
3867 : /* Hoist invariant statements from LOOP to edge PE. */
3868 :
3869 : static void
3870 29243 : ifcvt_hoist_invariants (class loop *loop, edge pe)
3871 : {
3872 : /* Only hoist from the now unconditionally executed part of the loop. */
3873 29243 : basic_block bb = loop->header;
3874 29243 : gimple_stmt_iterator hoist_gsi = {};
3875 883293 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3876 : {
3877 824807 : gimple *stmt = gsi_stmt (gsi);
3878 824807 : if (ifcvt_can_hoist (loop, pe, stmt))
3879 : {
3880 : /* Once we've hoisted one statement, insert other statements
3881 : after it. */
3882 4251 : gsi_remove (&gsi, false);
3883 4251 : if (hoist_gsi.ptr)
3884 1831 : gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3885 : else
3886 : {
3887 2420 : gsi_insert_on_edge_immediate (pe, stmt);
3888 2420 : hoist_gsi = gsi_for_stmt (stmt);
3889 : }
3890 4251 : continue;
3891 : }
3892 820556 : gsi_next (&gsi);
3893 : }
3894 29243 : }
3895 :
3896 : /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3897 : type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3898 : value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3899 : if not NULL, will hold the tree representing the base struct of this
3900 : bitfield. */
3901 :
3902 : static tree
3903 1205 : get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3904 : tree *struct_expr)
3905 : {
3906 1205 : tree comp_ref = write ? gimple_assign_lhs (stmt)
3907 386 : : gimple_assign_rhs1 (stmt);
3908 :
3909 1205 : tree field_decl = TREE_OPERAND (comp_ref, 1);
3910 1205 : tree ref_offset = component_ref_field_offset (comp_ref);
3911 1205 : tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3912 :
3913 : /* Bail out if the representative is not a suitable type for a scalar
3914 : register variable. */
3915 1205 : if (!is_gimple_reg_type (TREE_TYPE (rep_decl)))
3916 : return NULL_TREE;
3917 :
3918 : /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3919 : precision. */
3920 1190 : unsigned HOST_WIDE_INT bf_prec
3921 1190 : = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3922 1190 : if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3923 : return NULL_TREE;
3924 :
3925 1190 : if (TREE_CODE (DECL_FIELD_OFFSET (rep_decl)) != INTEGER_CST
3926 1190 : || TREE_CODE (ref_offset) != INTEGER_CST)
3927 : {
3928 2 : if (dump_file && (dump_flags & TDF_DETAILS))
3929 2 : fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3930 : " offset is non-constant.\n");
3931 2 : return NULL_TREE;
3932 : }
3933 :
3934 1188 : if (struct_expr)
3935 594 : *struct_expr = TREE_OPERAND (comp_ref, 0);
3936 :
3937 1188 : if (bitpos)
3938 : {
3939 : /* To calculate the bitposition of the BITFIELD_REF we have to determine
3940 : where our bitfield starts in relation to the container REP_DECL. The
3941 : DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3942 : us how many bytes from the start of the structure there are until the
3943 : start of the group of bitfield members the FIELD_DECL belongs to,
3944 : whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3945 : position our actual bitfield member starts. For the container
3946 : REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3947 : us the distance between the start of the structure and the start of
3948 : the container, though the first is in bytes and the later other in
3949 : bits. With this in mind we calculate the bit position of our new
3950 : BITFIELD_REF by subtracting the number of bits between the start of
3951 : the structure and the container from the number of bits from the start
3952 : of the structure and the actual bitfield member. */
3953 594 : tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3954 : ref_offset,
3955 : build_int_cst (bitsizetype, BITS_PER_UNIT));
3956 594 : bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3957 : DECL_FIELD_BIT_OFFSET (field_decl));
3958 594 : tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3959 : DECL_FIELD_OFFSET (rep_decl),
3960 : build_int_cst (bitsizetype, BITS_PER_UNIT));
3961 594 : rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3962 : DECL_FIELD_BIT_OFFSET (rep_decl));
3963 :
3964 594 : *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3965 : }
3966 :
3967 : return rep_decl;
3968 :
3969 : }
3970 :
3971 : /* Lowers the bitfield described by DATA.
3972 : For a write like:
3973 :
3974 : struct.bf = _1;
3975 :
3976 : lower to:
3977 :
3978 : __ifc_1 = struct.<representative>;
3979 : __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3980 : struct.<representative> = __ifc_2;
3981 :
3982 : For a read:
3983 :
3984 : _1 = struct.bf;
3985 :
3986 : lower to:
3987 :
3988 : __ifc_1 = struct.<representative>;
3989 : _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
3990 :
3991 : where representative is a legal load that contains the bitfield value,
3992 : bitsize is the size of the bitfield and bitpos the offset to the start of
3993 : the bitfield within the representative. */
3994 :
3995 : static void
3996 594 : lower_bitfield (gassign *stmt, bool write)
3997 : {
3998 594 : tree struct_expr;
3999 594 : tree bitpos;
4000 594 : tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
4001 594 : tree rep_type = TREE_TYPE (rep_decl);
4002 594 : tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
4003 :
4004 594 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4005 594 : if (dump_file && (dump_flags & TDF_DETAILS))
4006 : {
4007 9 : fprintf (dump_file, "Lowering:\n");
4008 9 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
4009 9 : fprintf (dump_file, "to:\n");
4010 : }
4011 :
4012 : /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
4013 : defining SSA_NAME. */
4014 594 : tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
4015 : NULL_TREE);
4016 594 : tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
4017 :
4018 594 : if (dump_file && (dump_flags & TDF_DETAILS))
4019 9 : print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
4020 :
4021 594 : if (write)
4022 : {
4023 405 : new_val = ifc_temp_var (rep_type,
4024 : build3 (BIT_INSERT_EXPR, rep_type, new_val,
4025 : unshare_expr (gimple_assign_rhs1 (stmt)),
4026 : bitpos), &gsi);
4027 :
4028 405 : if (dump_file && (dump_flags & TDF_DETAILS))
4029 0 : print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
4030 :
4031 405 : gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
4032 : new_val);
4033 405 : gimple_move_vops (new_stmt, stmt);
4034 405 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
4035 :
4036 405 : if (dump_file && (dump_flags & TDF_DETAILS))
4037 0 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
4038 : }
4039 : else
4040 : {
4041 189 : tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
4042 189 : build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
4043 : bitpos);
4044 189 : new_val = ifc_temp_var (bf_type, bfr, &gsi);
4045 :
4046 189 : gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
4047 : new_val);
4048 189 : gimple_move_vops (new_stmt, stmt);
4049 189 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
4050 :
4051 189 : if (dump_file && (dump_flags & TDF_DETAILS))
4052 9 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
4053 : }
4054 :
4055 594 : gsi_remove (&gsi, true);
4056 594 : }
4057 :
4058 : /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
4059 : with data structures representing these bitfields. */
4060 :
4061 : static bool
4062 236763 : bitfields_to_lower_p (class loop *loop,
4063 : vec <gassign *> &reads_to_lower,
4064 : vec <gassign *> &writes_to_lower)
4065 : {
4066 236763 : gimple_stmt_iterator gsi;
4067 :
4068 236763 : if (dump_file && (dump_flags & TDF_DETAILS))
4069 : {
4070 31 : fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
4071 : }
4072 :
4073 1006017 : for (unsigned i = 0; i < loop->num_nodes; ++i)
4074 : {
4075 769275 : basic_block bb = ifc_bbs[i];
4076 6358200 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4077 : {
4078 4819671 : gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
4079 4819671 : if (!stmt)
4080 4693982 : continue;
4081 :
4082 2019422 : tree op = gimple_assign_lhs (stmt);
4083 2019422 : bool write = TREE_CODE (op) == COMPONENT_REF;
4084 :
4085 2019422 : if (!write)
4086 1988021 : op = gimple_assign_rhs1 (stmt);
4087 :
4088 2019422 : if (TREE_CODE (op) != COMPONENT_REF)
4089 1893733 : continue;
4090 :
4091 125689 : if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
4092 : {
4093 615 : if (dump_file && (dump_flags & TDF_DETAILS))
4094 11 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
4095 :
4096 615 : if (TREE_THIS_VOLATILE (op))
4097 : {
4098 4 : if (dump_file && (dump_flags & TDF_DETAILS))
4099 0 : fprintf (dump_file, "\t Bitfield NO OK to lower,"
4100 : " the access is volatile.\n");
4101 21 : return false;
4102 : }
4103 :
4104 611 : if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
4105 : {
4106 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4107 0 : fprintf (dump_file, "\t Bitfield NO OK to lower,"
4108 : " field type is not Integral.\n");
4109 0 : return false;
4110 : }
4111 :
4112 611 : if (!get_bitfield_rep (stmt, write, NULL, NULL))
4113 : {
4114 17 : if (dump_file && (dump_flags & TDF_DETAILS))
4115 2 : fprintf (dump_file, "\t Bitfield NOT OK to lower,"
4116 : " representative is BLKmode.\n");
4117 17 : return false;
4118 : }
4119 :
4120 594 : if (dump_file && (dump_flags & TDF_DETAILS))
4121 9 : fprintf (dump_file, "\tBitfield OK to lower.\n");
4122 594 : if (write)
4123 405 : writes_to_lower.safe_push (stmt);
4124 : else
4125 189 : reads_to_lower.safe_push (stmt);
4126 : }
4127 : }
4128 : }
4129 473348 : return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
4130 : }
4131 :
4132 :
4133 : /* If-convert LOOP when it is legal. For the moment this pass has no
4134 : profitability analysis. Returns non-zero todo flags when something
4135 : changed. */
4136 :
4137 : unsigned int
4138 498028 : tree_if_conversion (class loop *loop, vec<gimple *> *preds)
4139 : {
4140 498028 : unsigned int todo = 0;
4141 498028 : bool aggressive_if_conv;
4142 498028 : class loop *rloop;
4143 498028 : auto_vec <gassign *, 4> reads_to_lower;
4144 498028 : auto_vec <gassign *, 4> writes_to_lower;
4145 498028 : bitmap exit_bbs;
4146 498028 : edge pe;
4147 498028 : auto_vec<data_reference_p, 10> refs;
4148 498949 : bool loop_versioned;
4149 :
4150 498949 : again:
4151 498949 : rloop = NULL;
4152 498949 : ifc_bbs = NULL;
4153 498949 : need_to_lower_bitfields = false;
4154 498949 : need_to_ifcvt = false;
4155 498949 : need_to_predicate = false;
4156 498949 : need_to_rewrite_undefined = false;
4157 498949 : any_complicated_phi = false;
4158 498949 : loop_versioned = false;
4159 :
4160 : /* Apply more aggressive if-conversion when loop or its outer loop were
4161 : marked with simd pragma. When that's the case, we try to if-convert
4162 : loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
4163 498949 : aggressive_if_conv = loop->force_vectorize;
4164 498949 : if (!aggressive_if_conv)
4165 : {
4166 490597 : class loop *outer_loop = loop_outer (loop);
4167 490597 : if (outer_loop && outer_loop->force_vectorize)
4168 8668 : aggressive_if_conv = true;
4169 : }
4170 :
4171 : /* If there are more than two BBs in the loop then there is at least one if
4172 : to convert. */
4173 498949 : if (loop->num_nodes > 2
4174 498949 : && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
4175 93994 : goto cleanup;
4176 :
4177 404955 : ifc_bbs = get_loop_body_in_if_conv_order (loop);
4178 404955 : if (!ifc_bbs)
4179 : {
4180 2570 : if (dump_file && (dump_flags & TDF_DETAILS))
4181 3 : fprintf (dump_file, "Irreducible loop\n");
4182 2570 : goto cleanup;
4183 : }
4184 :
4185 402385 : if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
4186 151637 : goto cleanup;
4187 :
4188 250748 : if (loop->num_nodes > 2)
4189 : {
4190 : /* More than one loop exit is too much to handle. */
4191 137390 : if (!single_exit (loop))
4192 : {
4193 94470 : if (dump_file && (dump_flags & TDF_DETAILS))
4194 10 : fprintf (dump_file, "Can not ifcvt due to multiple exits\n");
4195 : }
4196 : else
4197 : {
4198 42920 : need_to_ifcvt = true;
4199 :
4200 42920 : if (!if_convertible_loop_p (loop, &refs)
4201 42920 : || !dbg_cnt (if_conversion_tree))
4202 13932 : goto cleanup;
4203 :
4204 28988 : if ((need_to_predicate || any_complicated_phi)
4205 3534 : && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
4206 3534 : || loop->dont_vectorize))
4207 0 : goto cleanup;
4208 : }
4209 : }
4210 :
4211 236816 : if ((flag_tree_loop_vectorize || loop->force_vectorize)
4212 236763 : && !loop->dont_vectorize)
4213 236763 : need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
4214 : writes_to_lower);
4215 :
4216 236816 : if (!need_to_ifcvt && !need_to_lower_bitfields)
4217 207573 : goto cleanup;
4218 :
4219 : /* The edge to insert invariant stmts on. */
4220 29243 : pe = loop_preheader_edge (loop);
4221 :
4222 : /* Since we have no cost model, always version loops unless the user
4223 : specified -ftree-loop-if-convert or unless versioning is required.
4224 : Either version this loop, or if the pattern is right for outer-loop
4225 : vectorization, version the outer loop. In the latter case we will
4226 : still if-convert the original inner loop. */
4227 29243 : if (need_to_lower_bitfields
4228 28986 : || need_to_predicate
4229 26843 : || any_complicated_phi
4230 25454 : || flag_tree_loop_if_convert != 1)
4231 : {
4232 29194 : class loop *vloop
4233 29194 : = (versionable_outer_loop_p (loop_outer (loop))
4234 29194 : ? loop_outer (loop) : loop);
4235 29194 : class loop *nloop = version_loop_for_if_conversion (vloop, preds);
4236 29194 : if (nloop == NULL)
4237 0 : goto cleanup;
4238 29194 : if (vloop != loop)
4239 : {
4240 : /* If versionable_outer_loop_p decided to version the
4241 : outer loop, version also the inner loop of the non-vectorized
4242 : loop copy. So we transform:
4243 : loop1
4244 : loop2
4245 : into:
4246 : if (LOOP_VECTORIZED (1, 3))
4247 : {
4248 : loop1
4249 : loop2
4250 : }
4251 : else
4252 : loop3 (copy of loop1)
4253 : if (LOOP_VECTORIZED (4, 5))
4254 : loop4 (copy of loop2)
4255 : else
4256 : loop5 (copy of loop4) */
4257 921 : gcc_assert (nloop->inner && nloop->inner->next == NULL);
4258 : rloop = nloop->inner;
4259 : }
4260 : else
4261 : /* If we versioned loop then make sure to insert invariant
4262 : stmts before the .LOOP_VECTORIZED check since the vectorizer
4263 : will re-use that for things like runtime alias versioning
4264 : whose condition can end up using those invariants. */
4265 28273 : pe = single_pred_edge (gimple_bb (preds->last ()));
4266 :
4267 : loop_versioned = true;
4268 : }
4269 :
4270 29243 : if (need_to_lower_bitfields)
4271 : {
4272 257 : if (dump_file && (dump_flags & TDF_DETAILS))
4273 : {
4274 9 : fprintf (dump_file, "-------------------------\n");
4275 9 : fprintf (dump_file, "Start lowering bitfields\n");
4276 : }
4277 446 : while (!reads_to_lower.is_empty ())
4278 189 : lower_bitfield (reads_to_lower.pop (), false);
4279 662 : while (!writes_to_lower.is_empty ())
4280 405 : lower_bitfield (writes_to_lower.pop (), true);
4281 :
4282 257 : if (dump_file && (dump_flags & TDF_DETAILS))
4283 : {
4284 9 : fprintf (dump_file, "Done lowering bitfields\n");
4285 9 : fprintf (dump_file, "-------------------------\n");
4286 : }
4287 : }
4288 29243 : if (need_to_ifcvt)
4289 : {
4290 : /* Before we rewrite edges we'll record their original position in the
4291 : edge map such that we can map the edges between the ifcvt and the
4292 : non-ifcvt loop during peeling. */
4293 28988 : uintptr_t idx = 0;
4294 115952 : for (edge exit : get_loop_exit_edges (loop))
4295 28988 : exit->aux = (void*)idx++;
4296 :
4297 : /* Now all statements are if-convertible. Combine all the basic
4298 : blocks into one huge basic block doing the if-conversion
4299 : on-the-fly. */
4300 28988 : combine_blocks (loop, loop_versioned);
4301 : }
4302 :
4303 : std::pair <tree, tree> *name_pair;
4304 : unsigned ssa_names_idx;
4305 34382 : FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
4306 5139 : replace_uses_by (name_pair->first, name_pair->second);
4307 29243 : redundant_ssa_names.release ();
4308 :
4309 : /* Perform local CSE, this esp. helps the vectorizer analysis if loads
4310 : and stores are involved. CSE only the loop body, not the entry
4311 : PHIs, those are to be kept in sync with the non-if-converted copy.
4312 : ??? We'll still keep dead stores though. */
4313 29243 : exit_bbs = BITMAP_ALLOC (NULL);
4314 117148 : for (edge exit : get_loop_exit_edges (loop))
4315 29437 : bitmap_set_bit (exit_bbs, exit->dest->index);
4316 29243 : todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs,
4317 : false, true, true);
4318 :
4319 : /* Delete dead predicate computations. */
4320 29243 : ifcvt_local_dce (loop);
4321 29243 : BITMAP_FREE (exit_bbs);
4322 :
4323 29243 : ifcvt_hoist_invariants (loop, pe);
4324 :
4325 29243 : todo |= TODO_cleanup_cfg;
4326 :
4327 498949 : cleanup:
4328 498949 : data_reference_p dr;
4329 498949 : unsigned int i;
4330 1646278 : for (i = 0; refs.iterate (i, &dr); i++)
4331 : {
4332 1147329 : free (dr->aux);
4333 1147329 : free_data_ref (dr);
4334 : }
4335 498949 : refs.truncate (0);
4336 :
4337 498949 : if (ifc_bbs)
4338 : {
4339 : unsigned int i;
4340 :
4341 1987889 : for (i = 0; i < loop->num_nodes; i++)
4342 1614492 : free_bb_predicate (ifc_bbs[i]);
4343 :
4344 373397 : free (ifc_bbs);
4345 373397 : ifc_bbs = NULL;
4346 : }
4347 498949 : if (rloop != NULL)
4348 : {
4349 921 : loop = rloop;
4350 921 : reads_to_lower.truncate (0);
4351 921 : writes_to_lower.truncate (0);
4352 921 : goto again;
4353 : }
4354 :
4355 498028 : return todo;
4356 498028 : }
4357 :
4358 : /* Tree if-conversion pass management. */
4359 :
4360 : namespace {
4361 :
4362 : const pass_data pass_data_if_conversion =
4363 : {
4364 : GIMPLE_PASS, /* type */
4365 : "ifcvt", /* name */
4366 : OPTGROUP_NONE, /* optinfo_flags */
4367 : TV_TREE_LOOP_IFCVT, /* tv_id */
4368 : ( PROP_cfg | PROP_ssa ), /* properties_required */
4369 : 0, /* properties_provided */
4370 : 0, /* properties_destroyed */
4371 : 0, /* todo_flags_start */
4372 : 0, /* todo_flags_finish */
4373 : };
4374 :
4375 : class pass_if_conversion : public gimple_opt_pass
4376 : {
4377 : public:
4378 285722 : pass_if_conversion (gcc::context *ctxt)
4379 571444 : : gimple_opt_pass (pass_data_if_conversion, ctxt)
4380 : {}
4381 :
4382 : /* opt_pass methods: */
4383 : bool gate (function *) final override;
4384 : unsigned int execute (function *) final override;
4385 :
4386 : }; // class pass_if_conversion
4387 :
4388 : bool
4389 241458 : pass_if_conversion::gate (function *fun)
4390 : {
4391 34739 : return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
4392 208531 : && flag_tree_loop_if_convert != 0)
4393 274394 : || flag_tree_loop_if_convert == 1);
4394 : }
4395 :
4396 : unsigned int
4397 208543 : pass_if_conversion::execute (function *fun)
4398 : {
4399 208543 : unsigned todo = 0;
4400 :
4401 417086 : if (number_of_loops (fun) <= 1)
4402 : return 0;
4403 :
4404 208543 : auto_vec<gimple *> preds;
4405 1131026 : for (auto loop : loops_list (cfun, 0))
4406 505397 : if (flag_tree_loop_if_convert == 1
4407 505150 : || ((flag_tree_loop_vectorize || loop->force_vectorize)
4408 502382 : && !loop->dont_vectorize))
4409 498028 : todo |= tree_if_conversion (loop, &preds);
4410 :
4411 208543 : if (todo)
4412 : {
4413 18869 : free_numbers_of_iterations_estimates (fun);
4414 18869 : scev_reset ();
4415 : }
4416 :
4417 208543 : if (flag_checking)
4418 : {
4419 208539 : basic_block bb;
4420 7076522 : FOR_EACH_BB_FN (bb, fun)
4421 6867983 : gcc_assert (!bb->aux);
4422 : }
4423 :
4424 : /* Perform IL update now, it might elide some loops. */
4425 208543 : if (todo & TODO_cleanup_cfg)
4426 : {
4427 18869 : cleanup_tree_cfg ();
4428 18869 : if (need_ssa_update_p (fun))
4429 0 : todo |= TODO_update_ssa;
4430 : }
4431 208543 : if (todo & TODO_update_ssa_any)
4432 0 : update_ssa (todo & TODO_update_ssa_any);
4433 :
4434 : /* If if-conversion elided the loop fall back to the original one. Likewise
4435 : if the loops are not nested in the same outer loop. */
4436 237737 : for (unsigned i = 0; i < preds.length (); ++i)
4437 : {
4438 29194 : gimple *g = preds[i];
4439 29194 : if (!gimple_bb (g))
4440 0 : continue;
4441 29194 : auto ifcvt_loop = get_loop (fun, tree_to_uhwi (gimple_call_arg (g, 0)));
4442 29194 : auto orig_loop = get_loop (fun, tree_to_uhwi (gimple_call_arg (g, 1)));
4443 29194 : if (!ifcvt_loop || !orig_loop)
4444 : {
4445 2 : if (dump_file)
4446 0 : fprintf (dump_file, "If-converted loop vanished\n");
4447 2 : fold_loop_internal_call (g, boolean_false_node);
4448 : }
4449 29192 : else if (loop_outer (ifcvt_loop) != loop_outer (orig_loop))
4450 : {
4451 0 : if (dump_file)
4452 0 : fprintf (dump_file, "If-converted loop in different outer loop\n");
4453 0 : fold_loop_internal_call (g, boolean_false_node);
4454 : }
4455 : }
4456 :
4457 208543 : return 0;
4458 208543 : }
4459 :
4460 : } // anon namespace
4461 :
4462 : gimple_opt_pass *
4463 285722 : make_pass_if_conversion (gcc::context *ctxt)
4464 : {
4465 285722 : return new pass_if_conversion (ctxt);
4466 : }
|