Line data Source code
1 : /* If-conversion for vectorizer.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 : Contributed by Devang Patel <dpatel@apple.com>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : /* This pass implements a tree level if-conversion of loops. Its
22 : initial goal is to help the vectorizer to vectorize loops with
23 : conditions.
24 :
25 : A short description of if-conversion:
26 :
27 : o Decide if a loop is if-convertible or not.
28 : o Walk all loop basic blocks in breadth first order (BFS order).
29 : o Remove conditional statements (at the end of basic block)
30 : and propagate condition into destination basic blocks'
31 : predicate list.
32 : o Replace modify expression with conditional modify expression
33 : using current basic block's condition.
34 : o Merge all basic blocks
35 : o Replace phi nodes with conditional modify expr
36 : o Merge all basic blocks into header
37 :
38 : Sample transformation:
39 :
40 : INPUT
41 : -----
42 :
43 : # i_23 = PHI <0(0), i_18(10)>;
44 : <L0>:;
45 : j_15 = A[i_23];
46 : if (j_15 > 41) goto <L1>; else goto <L17>;
47 :
48 : <L17>:;
49 : goto <bb 3> (<L3>);
50 :
51 : <L1>:;
52 :
53 : # iftmp.2_4 = PHI <0(8), 42(2)>;
54 : <L3>:;
55 : A[i_23] = iftmp.2_4;
56 : i_18 = i_23 + 1;
57 : if (i_18 <= 15) goto <L19>; else goto <L18>;
58 :
59 : <L19>:;
60 : goto <bb 1> (<L0>);
61 :
62 : <L18>:;
63 :
64 : OUTPUT
65 : ------
66 :
67 : # i_23 = PHI <0(0), i_18(10)>;
68 : <L0>:;
69 : j_15 = A[i_23];
70 :
71 : <L3>:;
72 : iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 : A[i_23] = iftmp.2_4;
74 : i_18 = i_23 + 1;
75 : if (i_18 <= 15) goto <L19>; else goto <L18>;
76 :
77 : <L19>:;
78 : goto <bb 1> (<L0>);
79 :
80 : <L18>:;
81 : */
82 :
83 : #include "config.h"
84 : #include "system.h"
85 : #include "coretypes.h"
86 : #include "backend.h"
87 : #include "rtl.h"
88 : #include "tree.h"
89 : #include "gimple.h"
90 : #include "cfghooks.h"
91 : #include "tree-pass.h"
92 : #include "ssa.h"
93 : #include "expmed.h"
94 : #include "expr.h"
95 : #include "optabs-tree.h"
96 : #include "gimple-pretty-print.h"
97 : #include "alias.h"
98 : #include "fold-const.h"
99 : #include "stor-layout.h"
100 : #include "gimple-iterator.h"
101 : #include "gimple-fold.h"
102 : #include "gimplify.h"
103 : #include "gimplify-me.h"
104 : #include "tree-cfg.h"
105 : #include "tree-into-ssa.h"
106 : #include "tree-ssa.h"
107 : #include "cfgloop.h"
108 : #include "tree-data-ref.h"
109 : #include "tree-scalar-evolution.h"
110 : #include "tree-ssa-loop.h"
111 : #include "tree-ssa-loop-niter.h"
112 : #include "tree-ssa-loop-ivopts.h"
113 : #include "tree-ssa-address.h"
114 : #include "dbgcnt.h"
115 : #include "tree-hash-traits.h"
116 : #include "varasm.h"
117 : #include "builtins.h"
118 : #include "cfganal.h"
119 : #include "internal-fn.h"
120 : #include "fold-const.h"
121 : #include "tree-ssa-sccvn.h"
122 : #include "tree-cfgcleanup.h"
123 : #include "tree-ssa-dse.h"
124 : #include "tree-vectorizer.h"
125 : #include "tree-eh.h"
126 : #include "cgraph.h"
127 :
128 : /* For lang_hooks.types.type_for_mode. */
129 : #include "langhooks.h"
130 :
131 : /* Only handle PHIs with no more arguments unless we are asked to by
132 : simd pragma. */
133 : #define MAX_PHI_ARG_NUM \
134 : ((unsigned) param_max_tree_if_conversion_phi_args)
135 :
136 : /* True if we've converted a statement that was only executed when some
137 : condition C was true, and if for correctness we need to predicate the
138 : statement to ensure that it is a no-op when C is false. See
139 : predicate_statements for the kinds of predication we support. */
140 : static bool need_to_predicate;
141 :
142 : /* True if we have to rewrite stmts that may invoke undefined behavior
143 : when a condition C was false so it doesn't if it is always executed.
144 : See predicate_statements for the kinds of predication we support. */
145 : static bool need_to_rewrite_undefined;
146 :
147 : /* Indicate if there are any complicated PHIs that need to be handled in
148 : if-conversion. Complicated PHI has more than two arguments and can't
149 : be degenerated to two arguments PHI. See more information in comment
150 : before phi_convertible_by_degenerating_args. */
151 : static bool any_complicated_phi;
152 :
153 : /* True if we have bitfield accesses we can lower. */
154 : static bool need_to_lower_bitfields;
155 :
156 : /* True if there is any ifcvting to be done. */
157 : static bool need_to_ifcvt;
158 :
159 : /* Hash for struct innermost_loop_behavior. It depends on the user to
160 : free the memory. */
161 :
162 : struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
163 : {
164 : static inline hashval_t hash (const value_type &);
165 : static inline bool equal (const value_type &,
166 : const compare_type &);
167 : };
168 :
169 : inline hashval_t
170 441532 : innermost_loop_behavior_hash::hash (const value_type &e)
171 : {
172 441532 : hashval_t hash;
173 :
174 441532 : hash = iterative_hash_expr (e->base_address, 0);
175 441532 : hash = iterative_hash_expr (e->offset, hash);
176 441532 : hash = iterative_hash_expr (e->init, hash);
177 441532 : return iterative_hash_expr (e->step, hash);
178 : }
179 :
180 : inline bool
181 315273 : innermost_loop_behavior_hash::equal (const value_type &e1,
182 : const compare_type &e2)
183 : {
184 315273 : if ((e1->base_address && !e2->base_address)
185 315273 : || (!e1->base_address && e2->base_address)
186 315273 : || (!e1->offset && e2->offset)
187 289584 : || (e1->offset && !e2->offset)
188 255978 : || (!e1->init && e2->init)
189 255978 : || (e1->init && !e2->init)
190 255978 : || (!e1->step && e2->step)
191 255978 : || (e1->step && !e2->step))
192 : return false;
193 :
194 255978 : if (e1->base_address && e2->base_address
195 511956 : && !operand_equal_p (e1->base_address, e2->base_address, 0))
196 : return false;
197 45604 : if (e1->offset && e2->offset
198 129066 : && !operand_equal_p (e1->offset, e2->offset, 0))
199 : return false;
200 45375 : if (e1->init && e2->init
201 128608 : && !operand_equal_p (e1->init, e2->init, 0))
202 : return false;
203 17568 : if (e1->step && e2->step
204 72994 : && !operand_equal_p (e1->step, e2->step, 0))
205 : return false;
206 :
207 : return true;
208 : }
209 :
210 : /* List of basic blocks in if-conversion-suitable order. */
211 : static basic_block *ifc_bbs;
212 :
213 : /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
214 : static hash_map<innermost_loop_behavior_hash,
215 : data_reference_p> *innermost_DR_map;
216 :
217 : /* Hash table to store <base reference, DR> pairs. */
218 : static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
219 :
220 : /* List of redundant SSA names: the first should be replaced by the second. */
221 : static vec< std::pair<tree, tree> > redundant_ssa_names;
222 :
223 : /* Structure used to predicate basic blocks. This is attached to the
224 : ->aux field of the BBs in the loop to be if-converted. */
225 : struct bb_predicate {
226 :
227 : /* The condition under which this basic block is executed. */
228 : tree predicate;
229 :
230 : /* PREDICATE is gimplified, and the sequence of statements is
231 : recorded here, in order to avoid the duplication of computations
232 : that occur in previous conditions. See PR44483. */
233 : gimple_seq predicate_gimplified_stmts;
234 :
235 : /* Records the number of statements recorded into
236 : PREDICATE_GIMPLIFIED_STMTS. */
237 : unsigned no_predicate_stmts;
238 : };
239 :
240 : /* Returns true when the basic block BB has a predicate. */
241 :
242 : static inline bool
243 1963824 : bb_has_predicate (basic_block bb)
244 : {
245 1963824 : return bb->aux != NULL;
246 : }
247 :
248 : /* Returns the gimplified predicate for basic block BB. */
249 :
250 : static inline tree
251 992162 : bb_predicate (basic_block bb)
252 : {
253 992162 : return ((struct bb_predicate *) bb->aux)->predicate;
254 : }
255 :
256 : /* Sets the gimplified predicate COND for basic block BB. */
257 :
258 : static inline void
259 531949 : set_bb_predicate (basic_block bb, tree cond)
260 : {
261 531949 : auto aux = (struct bb_predicate *) bb->aux;
262 531949 : gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
263 : && is_gimple_val (TREE_OPERAND (cond, 0)))
264 : || is_gimple_val (cond));
265 531949 : aux->predicate = cond;
266 531949 : aux->no_predicate_stmts++;
267 :
268 531949 : if (dump_file && (dump_flags & TDF_DETAILS))
269 263 : fprintf (dump_file, "Recording block %d value %d\n", bb->index,
270 : aux->no_predicate_stmts);
271 531949 : }
272 :
273 : /* Returns the sequence of statements of the gimplification of the
274 : predicate for basic block BB. */
275 :
276 : static inline gimple_seq
277 661677 : bb_predicate_gimplified_stmts (basic_block bb)
278 : {
279 661677 : return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
280 : }
281 :
282 : /* Sets the sequence of statements STMTS of the gimplification of the
283 : predicate for basic block BB. If PRESERVE_COUNTS then don't clear the predicate
284 : counts. */
285 :
286 : static inline void
287 311712 : set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts,
288 : bool preserve_counts)
289 : {
290 311712 : ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
291 311712 : if (stmts == NULL && !preserve_counts)
292 254633 : ((struct bb_predicate *) bb->aux)->no_predicate_stmts = 0;
293 91624 : }
294 :
295 : /* Adds the sequence of statements STMTS to the sequence of statements
296 : of the predicate for basic block BB. */
297 :
298 : static inline void
299 93815 : add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
300 : {
301 : /* We might have updated some stmts in STMTS via force_gimple_operand
302 : calling fold_stmt and that producing multiple stmts. Delink immediate
303 : uses so update_ssa after loop versioning doesn't get confused for
304 : the not yet inserted predicates.
305 : ??? This should go away once we reliably avoid updating stmts
306 : not in any BB. */
307 93815 : for (gimple_stmt_iterator gsi = gsi_start (stmts);
308 227583 : !gsi_end_p (gsi); gsi_next (&gsi))
309 : {
310 133768 : gimple *stmt = gsi_stmt (gsi);
311 133768 : delink_stmt_imm_use (stmt);
312 133768 : gimple_set_modified (stmt, true);
313 133768 : ((struct bb_predicate *) bb->aux)->no_predicate_stmts++;
314 : }
315 93815 : gimple_seq_add_seq_without_update
316 93815 : (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
317 93815 : }
318 :
319 : /* Return the number of statements the predicate of the basic block consists
320 : of. */
321 :
322 : static inline unsigned
323 16382 : get_bb_num_predicate_stmts (basic_block bb)
324 : {
325 16382 : return ((struct bb_predicate *) bb->aux)->no_predicate_stmts;
326 : }
327 :
328 : /* Initializes to TRUE the predicate of basic block BB. */
329 :
330 : static inline void
331 220088 : init_bb_predicate (basic_block bb)
332 : {
333 220088 : bb->aux = XNEW (struct bb_predicate);
334 220088 : set_bb_predicate_gimplified_stmts (bb, NULL, false);
335 220088 : set_bb_predicate (bb, boolean_true_node);
336 220088 : }
337 :
338 : /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
339 :
340 : static inline void
341 432365 : release_bb_predicate (basic_block bb)
342 : {
343 432365 : gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
344 432365 : if (stmts)
345 : {
346 : /* Ensure that these stmts haven't yet been added to a bb. */
347 34545 : if (flag_checking)
348 : for (gimple_stmt_iterator i = gsi_start (stmts);
349 95589 : !gsi_end_p (i); gsi_next (&i))
350 61044 : gcc_assert (! gimple_bb (gsi_stmt (i)));
351 :
352 : /* Discard them. */
353 34545 : gimple_seq_discard (stmts);
354 34545 : set_bb_predicate_gimplified_stmts (bb, NULL, false);
355 : }
356 432365 : }
357 :
358 : /* Free the predicate of basic block BB. */
359 :
360 : static inline void
361 1751547 : free_bb_predicate (basic_block bb)
362 : {
363 1751547 : if (!bb_has_predicate (bb))
364 : return;
365 :
366 220088 : release_bb_predicate (bb);
367 220088 : free (bb->aux);
368 220088 : bb->aux = NULL;
369 : }
370 :
371 : /* Reinitialize predicate of BB with the true predicate. */
372 :
373 : static inline void
374 212277 : reset_bb_predicate (basic_block bb)
375 : {
376 212277 : if (!bb_has_predicate (bb))
377 0 : init_bb_predicate (bb);
378 : else
379 : {
380 212277 : release_bb_predicate (bb);
381 212277 : set_bb_predicate (bb, boolean_true_node);
382 : }
383 212277 : }
384 :
385 : /* Returns a new SSA_NAME of type TYPE that is assigned the value of
386 : the expression EXPR. Inserts the statement created for this
387 : computation before GSI and leaves the iterator GSI at the same
388 : statement. */
389 :
390 : static tree
391 6048 : ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
392 : {
393 6048 : tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
394 6048 : gimple *stmt = gimple_build_assign (new_name, expr);
395 12096 : gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
396 6048 : gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
397 6048 : return new_name;
398 : }
399 :
400 : /* Return true when COND is a false predicate. */
401 :
402 : static inline bool
403 88433 : is_false_predicate (tree cond)
404 : {
405 88433 : return (cond != NULL_TREE
406 88433 : && (cond == boolean_false_node
407 88433 : || integer_zerop (cond)));
408 : }
409 :
410 : /* Return true when COND is a true predicate. */
411 :
412 : static inline bool
413 1134371 : is_true_predicate (tree cond)
414 : {
415 1134371 : return (cond == NULL_TREE
416 1134371 : || cond == boolean_true_node
417 1623262 : || integer_onep (cond));
418 : }
419 :
420 : /* Returns true when BB has a predicate that is not trivial: true or
421 : NULL_TREE. */
422 :
423 : static inline bool
424 553568 : is_predicated (basic_block bb)
425 : {
426 12461 : return !is_true_predicate (bb_predicate (bb));
427 : }
428 :
429 : /* Parses the predicate COND and returns its comparison code and
430 : operands OP0 and OP1. */
431 :
432 : static enum tree_code
433 468928 : parse_predicate (tree cond, tree *op0, tree *op1)
434 : {
435 468928 : gimple *s;
436 :
437 468928 : if (TREE_CODE (cond) == SSA_NAME
438 468928 : && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
439 : {
440 64532 : if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
441 : {
442 32880 : *op0 = gimple_assign_rhs1 (s);
443 32880 : *op1 = gimple_assign_rhs2 (s);
444 32880 : return gimple_assign_rhs_code (s);
445 : }
446 :
447 31652 : else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
448 : {
449 0 : tree op = gimple_assign_rhs1 (s);
450 0 : tree type = TREE_TYPE (op);
451 0 : enum tree_code code = parse_predicate (op, op0, op1);
452 :
453 0 : return code == ERROR_MARK ? ERROR_MARK
454 0 : : invert_tree_comparison (code, HONOR_NANS (type));
455 : }
456 :
457 : return ERROR_MARK;
458 : }
459 :
460 404396 : if (COMPARISON_CLASS_P (cond))
461 : {
462 471 : *op0 = TREE_OPERAND (cond, 0);
463 471 : *op1 = TREE_OPERAND (cond, 1);
464 471 : return TREE_CODE (cond);
465 : }
466 :
467 : return ERROR_MARK;
468 : }
469 :
470 : /* Returns the fold of predicate C1 OR C2 at location LOC. */
471 :
472 : static tree
473 234464 : fold_or_predicates (location_t loc, tree c1, tree c2)
474 : {
475 234464 : tree op1a, op1b, op2a, op2b;
476 234464 : enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
477 234464 : enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
478 :
479 234464 : if (code1 != ERROR_MARK && code2 != ERROR_MARK)
480 : {
481 2237 : tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
482 : code2, op2a, op2b);
483 2237 : if (t)
484 : return t;
485 : }
486 :
487 232322 : return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
488 : }
489 :
490 : /* Returns either a COND_EXPR or the folded expression if the folded
491 : expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
492 : a constant or a SSA_NAME. */
493 :
494 : static tree
495 54778 : fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
496 : {
497 : /* Short cut the case where both rhs and lhs are the same. */
498 54778 : if (operand_equal_p (rhs, lhs))
499 : return rhs;
500 :
501 : /* If COND is comparison r != 0 and r has boolean type, convert COND
502 : to SSA_NAME to accept by vect bool pattern. */
503 54778 : if (TREE_CODE (cond) == NE_EXPR)
504 : {
505 0 : tree op0 = TREE_OPERAND (cond, 0);
506 0 : tree op1 = TREE_OPERAND (cond, 1);
507 0 : if (TREE_CODE (op0) == SSA_NAME
508 0 : && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
509 0 : && (integer_zerop (op1)))
510 : cond = op0;
511 : }
512 :
513 54778 : gimple_match_op cexpr (gimple_match_cond::UNCOND, COND_EXPR,
514 54778 : type, cond, rhs, lhs);
515 54778 : if (cexpr.resimplify (NULL, follow_all_ssa_edges))
516 : {
517 10629 : if (gimple_simplified_result_is_gimple_val (&cexpr))
518 553 : return cexpr.ops[0];
519 10076 : else if (cexpr.code == ABS_EXPR)
520 2 : return build1 (ABS_EXPR, type, cexpr.ops[0]);
521 10074 : else if (cexpr.code == MIN_EXPR
522 10074 : || cexpr.code == MAX_EXPR)
523 7299 : return build2 ((tree_code)cexpr.code, type, cexpr.ops[0], cexpr.ops[1]);
524 : }
525 :
526 46924 : return build3 (COND_EXPR, type, cond, rhs, lhs);
527 : }
528 :
529 : /* Add condition NC to the predicate list of basic block BB. LOOP is
530 : the loop to be if-converted. Use predicate of cd-equivalent block
531 : for join bb if it exists: we call basic blocks bb1 and bb2
532 : cd-equivalent if they are executed under the same condition. */
533 :
534 : static inline void
535 173069 : add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
536 : {
537 173069 : tree bc, *tp;
538 173069 : basic_block dom_bb;
539 :
540 173069 : if (is_true_predicate (nc))
541 77354 : return;
542 :
543 : /* If dominance tells us this basic block is always executed,
544 : don't record any predicates for it. */
545 173067 : if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
546 : return;
547 :
548 99584 : dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
549 : /* We use notion of cd equivalence to get simpler predicate for
550 : join block, e.g. if join block has 2 predecessors with predicates
551 : p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
552 : p1 & p2 | p1 & !p2. */
553 99584 : if (dom_bb != loop->header
554 99584 : && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
555 : {
556 3869 : gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
557 3869 : bc = bb_predicate (dom_bb);
558 3869 : if (!is_true_predicate (bc))
559 3869 : set_bb_predicate (bb, bc);
560 : else
561 0 : gcc_assert (is_true_predicate (bb_predicate (bb)));
562 3869 : if (dump_file && (dump_flags & TDF_DETAILS))
563 4 : fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
564 : dom_bb->index, bb->index);
565 3869 : return;
566 : }
567 :
568 95715 : if (!is_predicated (bb))
569 91624 : bc = nc;
570 : else
571 : {
572 4091 : bc = bb_predicate (bb);
573 4091 : bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
574 4091 : if (is_true_predicate (bc))
575 : {
576 0 : reset_bb_predicate (bb);
577 0 : return;
578 : }
579 : }
580 :
581 : /* Allow a TRUTH_NOT_EXPR around the main predicate. */
582 95715 : if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
583 33153 : tp = &TREE_OPERAND (bc, 0);
584 : else
585 : tp = &bc;
586 95715 : if (!is_gimple_val (*tp))
587 : {
588 93815 : gimple_seq stmts;
589 93815 : *tp = force_gimple_operand (*tp, &stmts, true, NULL_TREE);
590 93815 : add_bb_predicate_gimplified_stmts (bb, stmts);
591 : }
592 95715 : set_bb_predicate (bb, bc);
593 : }
594 :
595 : /* Add the condition COND to the previous condition PREV_COND, and add
596 : this to the predicate list of the destination of edge E. LOOP is
597 : the loop to be if-converted. */
598 :
599 : static void
600 114202 : add_to_dst_predicate_list (class loop *loop, edge e,
601 : tree prev_cond, tree cond)
602 : {
603 114202 : if (!flow_bb_inside_loop_p (loop, e->dest))
604 : return;
605 :
606 114202 : if (!is_true_predicate (prev_cond))
607 22774 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
608 : prev_cond, cond);
609 :
610 114202 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
611 90930 : add_to_predicate_list (loop, e->dest, cond);
612 : }
613 :
614 : /* Return true if one of the successor edges of BB exits LOOP. */
615 :
616 : static bool
617 3446504 : bb_with_exit_edge_p (const class loop *loop, basic_block bb)
618 : {
619 3446504 : edge e;
620 3446504 : edge_iterator ei;
621 :
622 6967249 : FOR_EACH_EDGE (e, ei, bb->succs)
623 4896960 : if (loop_exit_edge_p (loop, e))
624 : return true;
625 :
626 : return false;
627 : }
628 :
629 : /* Given PHI which has more than two arguments, this function checks if
630 : it's if-convertible by degenerating its arguments. Specifically, if
631 : below two conditions are satisfied:
632 :
633 : 1) Number of PHI arguments with different values equals to 2 and one
634 : argument has the only occurrence.
635 : 2) The edge corresponding to the unique argument isn't critical edge.
636 :
637 : Such PHI can be handled as PHIs have only two arguments. For example,
638 : below PHI:
639 :
640 : res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
641 :
642 : can be transformed into:
643 :
644 : res = (predicate of e3) ? A_2 : A_1;
645 :
646 : Return TRUE if it is the case, FALSE otherwise. */
647 :
648 : static bool
649 5563 : phi_convertible_by_degenerating_args (gphi *phi)
650 : {
651 5563 : edge e;
652 5563 : tree arg, t1 = NULL, t2 = NULL;
653 5563 : unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
654 5563 : unsigned int num_args = gimple_phi_num_args (phi);
655 :
656 5563 : gcc_assert (num_args > 2);
657 :
658 20225 : for (i = 0; i < num_args; i++)
659 : {
660 16885 : arg = gimple_phi_arg_def (phi, i);
661 16885 : if (t1 == NULL || operand_equal_p (t1, arg, 0))
662 : {
663 7623 : n1++;
664 7623 : i1 = i;
665 7623 : t1 = arg;
666 : }
667 9262 : else if (t2 == NULL || operand_equal_p (t2, arg, 0))
668 : {
669 7039 : n2++;
670 7039 : i2 = i;
671 7039 : t2 = arg;
672 : }
673 : else
674 : return false;
675 : }
676 :
677 3340 : if (n1 != 1 && n2 != 1)
678 : return false;
679 :
680 : /* Check if the edge corresponding to the unique arg is critical. */
681 3272 : e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
682 3272 : if (EDGE_COUNT (e->src->succs) > 1)
683 : return false;
684 :
685 : return true;
686 : }
687 :
688 : /* Return true when PHI is if-convertible. PHI is part of loop LOOP
689 : and it belongs to basic block BB. Note at this point, it is sure
690 : that PHI is if-convertible. This function updates global variable
691 : ANY_COMPLICATED_PHI if PHI is complicated. */
692 :
693 : static bool
694 138704 : if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
695 : {
696 138704 : if (dump_file && (dump_flags & TDF_DETAILS))
697 : {
698 79 : fprintf (dump_file, "-------------------------\n");
699 79 : print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
700 : }
701 :
702 138704 : if (bb != loop->header
703 54821 : && gimple_phi_num_args (phi) > 2
704 144267 : && !phi_convertible_by_degenerating_args (phi))
705 2291 : any_complicated_phi = true;
706 :
707 138704 : return true;
708 : }
709 :
710 : /* Records the status of a data reference. This struct is attached to
711 : each DR->aux field. */
712 :
713 : struct ifc_dr {
714 : bool rw_unconditionally;
715 : bool w_unconditionally;
716 : bool written_at_least_once;
717 :
718 : tree rw_predicate;
719 : tree w_predicate;
720 : tree base_w_predicate;
721 : };
722 :
723 : #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
724 : #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
725 : #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
726 : #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
727 :
728 : /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
729 : HASH tables. While storing them in HASH table, it checks if the
730 : reference is unconditionally read or written and stores that as a flag
731 : information. For base reference it checks if it is written atlest once
732 : unconditionally and stores it as flag information along with DR.
733 : In other words for every data reference A in STMT there exist other
734 : accesses to a data reference with the same base with predicates that
735 : add up (OR-up) to the true predicate: this ensures that the data
736 : reference A is touched (read or written) on every iteration of the
737 : if-converted loop. */
738 : static void
739 138865 : hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
740 : {
741 :
742 138865 : data_reference_p *master_dr, *base_master_dr;
743 138865 : tree base_ref = DR_BASE_OBJECT (a);
744 138865 : innermost_loop_behavior *innermost = &DR_INNERMOST (a);
745 138865 : tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
746 138865 : bool exist1, exist2;
747 :
748 138865 : master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
749 138865 : if (!exist1)
750 103926 : *master_dr = a;
751 :
752 138865 : if (DR_IS_WRITE (a))
753 : {
754 45718 : IFC_DR (*master_dr)->w_predicate
755 91436 : = fold_or_predicates (UNKNOWN_LOCATION, ca,
756 45718 : IFC_DR (*master_dr)->w_predicate);
757 45718 : if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
758 29460 : DR_W_UNCONDITIONALLY (*master_dr) = true;
759 : }
760 138865 : IFC_DR (*master_dr)->rw_predicate
761 277730 : = fold_or_predicates (UNKNOWN_LOCATION, ca,
762 138865 : IFC_DR (*master_dr)->rw_predicate);
763 138865 : if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
764 102819 : DR_RW_UNCONDITIONALLY (*master_dr) = true;
765 :
766 138865 : if (DR_IS_WRITE (a))
767 : {
768 45718 : base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
769 45718 : if (!exist2)
770 35440 : *base_master_dr = a;
771 45718 : IFC_DR (*base_master_dr)->base_w_predicate
772 91436 : = fold_or_predicates (UNKNOWN_LOCATION, ca,
773 45718 : IFC_DR (*base_master_dr)->base_w_predicate);
774 45718 : if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
775 29694 : DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
776 : }
777 138865 : }
778 :
779 : /* Return TRUE if can prove the index IDX of an array reference REF is
780 : within array bound. Return false otherwise. */
781 :
782 : static bool
783 248328 : idx_within_array_bound (tree ref, tree *idx, void *dta)
784 : {
785 248328 : wi::overflow_type overflow;
786 248328 : widest_int niter, valid_niter, delta, wi_step;
787 248328 : tree ev, init, step;
788 248328 : tree low, high;
789 248328 : class loop *loop = (class loop*) dta;
790 :
791 : /* Only support within-bound access for array references. */
792 248328 : if (TREE_CODE (ref) != ARRAY_REF)
793 : return false;
794 :
795 : /* For arrays that might have flexible sizes, it is not guaranteed that they
796 : do not extend over their declared size. */
797 143003 : if (array_ref_flexible_size_p (ref))
798 : return false;
799 :
800 90710 : ev = analyze_scalar_evolution (loop, *idx);
801 90710 : ev = instantiate_parameters (loop, ev);
802 90710 : init = initial_condition (ev);
803 90710 : step = evolution_part_in_loop_num (ev, loop->num);
804 :
805 90710 : if (!init || TREE_CODE (init) != INTEGER_CST
806 80183 : || (step && TREE_CODE (step) != INTEGER_CST))
807 : return false;
808 :
809 80177 : low = array_ref_low_bound (ref);
810 80177 : high = array_ref_up_bound (ref);
811 :
812 : /* The case of nonconstant bounds could be handled, but it would be
813 : complicated. */
814 80177 : if (TREE_CODE (low) != INTEGER_CST
815 80177 : || !high || TREE_CODE (high) != INTEGER_CST)
816 : return false;
817 :
818 : /* Check if the intial idx is within bound. */
819 80109 : if (wi::to_widest (init) < wi::to_widest (low)
820 160210 : || wi::to_widest (init) > wi::to_widest (high))
821 15 : return false;
822 :
823 : /* The idx is always within bound. */
824 80094 : if (!step || integer_zerop (step))
825 1980 : return true;
826 :
827 78114 : if (!max_loop_iterations (loop, &niter))
828 : return false;
829 :
830 78114 : if (wi::to_widest (step) < 0)
831 : {
832 303 : delta = wi::to_widest (init) - wi::to_widest (low);
833 303 : wi_step = -wi::to_widest (step);
834 : }
835 : else
836 : {
837 77811 : delta = wi::to_widest (high) - wi::to_widest (init);
838 77811 : wi_step = wi::to_widest (step);
839 : }
840 :
841 78114 : valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
842 : /* The iteration space of idx is within array bound. */
843 156228 : if (!overflow && niter <= valid_niter)
844 : return true;
845 :
846 : return false;
847 248328 : }
848 :
849 : /* Return TRUE if ref is a within bound array reference. */
850 :
851 : bool
852 242775 : ref_within_array_bound (gimple *stmt, tree ref)
853 : {
854 242775 : class loop *loop = loop_containing_stmt (stmt);
855 :
856 242775 : gcc_assert (loop != NULL);
857 242775 : return for_each_index (&ref, idx_within_array_bound, loop);
858 : }
859 :
860 :
861 : /* Given a memory reference expression T, return TRUE if base object
862 : it refers to is writable. The base object of a memory reference
863 : is the main object being referenced, which is returned by function
864 : get_base_address. */
865 :
866 : static bool
867 2519 : base_object_writable (tree ref)
868 : {
869 2519 : tree base_tree = get_base_address (ref);
870 :
871 2519 : return (base_tree
872 2519 : && DECL_P (base_tree)
873 1691 : && decl_binds_to_current_def_p (base_tree)
874 4206 : && !TREE_READONLY (base_tree));
875 : }
876 :
877 : /* Return true when the memory references of STMT won't trap in the
878 : if-converted code. There are two things that we have to check for:
879 :
880 : - writes to memory occur to writable memory: if-conversion of
881 : memory writes transforms the conditional memory writes into
882 : unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
883 : into "A[i] = cond ? foo : A[i]", and as the write to memory may not
884 : be executed at all in the original code, it may be a readonly
885 : memory. To check that A is not const-qualified, we check that
886 : there exists at least an unconditional write to A in the current
887 : function.
888 :
889 : - reads or writes to memory are valid memory accesses for every
890 : iteration. To check that the memory accesses are correctly formed
891 : and that we are allowed to read and write in these locations, we
892 : check that the memory accesses to be if-converted occur at every
893 : iteration unconditionally.
894 :
895 : Returns true for the memory reference in STMT, same memory reference
896 : is read or written unconditionally atleast once and the base memory
897 : reference is written unconditionally once. This is to check reference
898 : will not write fault. Also retuns true if the memory reference is
899 : unconditionally read once then we are conditionally writing to memory
900 : which is defined as read and write and is bound to the definition
901 : we are seeing. */
902 : static bool
903 20313 : ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
904 : {
905 : /* If DR didn't see a reference here we can't use it to tell
906 : whether the ref traps or not. */
907 20313 : if (gimple_uid (stmt) == 0)
908 : return false;
909 :
910 20312 : data_reference_p *master_dr, *base_master_dr;
911 20312 : data_reference_p a = drs[gimple_uid (stmt) - 1];
912 :
913 20312 : tree base = DR_BASE_OBJECT (a);
914 20312 : innermost_loop_behavior *innermost = &DR_INNERMOST (a);
915 :
916 20312 : gcc_assert (DR_STMT (a) == stmt);
917 20312 : gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
918 : || DR_INIT (a) || DR_STEP (a));
919 :
920 20312 : master_dr = innermost_DR_map->get (innermost);
921 20312 : gcc_assert (master_dr != NULL);
922 :
923 20312 : base_master_dr = baseref_DR_map->get (base);
924 :
925 : /* If a is unconditionally written to it doesn't trap. */
926 20312 : if (DR_W_UNCONDITIONALLY (*master_dr))
927 : return true;
928 :
929 : /* If a is unconditionally accessed then ...
930 :
931 : Even a is conditional access, we can treat it as an unconditional
932 : one if it's an array reference and all its index are within array
933 : bound. */
934 18437 : if (DR_RW_UNCONDITIONALLY (*master_dr)
935 18437 : || ref_within_array_bound (stmt, DR_REF (a)))
936 : {
937 : /* an unconditional read won't trap. */
938 6718 : if (DR_IS_READ (a))
939 : return true;
940 :
941 : /* an unconditionaly write won't trap if the base is written
942 : to unconditionally. */
943 2556 : if ((base_master_dr
944 2556 : && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
945 : /* or the base is known to be not readonly. */
946 5075 : || base_object_writable (DR_REF (a)))
947 1724 : return !ref_can_have_store_data_races (base);
948 : }
949 :
950 : return false;
951 : }
952 :
953 : /* Return true if STMT could be converted into a masked load or store
954 : (conditional load or store based on a mask computed from bb predicate). */
955 :
956 : static bool
957 11992 : ifcvt_can_use_mask_load_store (gimple *stmt)
958 : {
959 : /* Check whether this is a load or store. */
960 11992 : tree lhs = gimple_assign_lhs (stmt);
961 11992 : bool is_load;
962 11992 : tree ref;
963 11992 : if (gimple_store_p (stmt))
964 : {
965 2722 : if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
966 : return false;
967 : is_load = false;
968 : ref = lhs;
969 : }
970 9270 : else if (gimple_assign_load_p (stmt))
971 : {
972 9269 : is_load = true;
973 9269 : ref = gimple_assign_rhs1 (stmt);
974 : }
975 : else
976 : return false;
977 :
978 11991 : if (may_be_nonaddressable_p (ref))
979 : return false;
980 :
981 : /* Mask should be integer mode of the same size as the load/store
982 : mode. */
983 11932 : machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
984 11932 : if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
985 31 : return false;
986 :
987 11901 : if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
988 : return true;
989 :
990 : return false;
991 : }
992 :
993 : /* Return true if STMT could be converted from an operation that is
994 : unconditional to one that is conditional on a bb predicate mask. */
995 :
996 : static bool
997 13991 : ifcvt_can_predicate (gimple *stmt)
998 : {
999 13991 : basic_block bb = gimple_bb (stmt);
1000 :
1001 320 : if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
1002 13991 : || bb->loop_father->dont_vectorize
1003 27982 : || gimple_has_volatile_ops (stmt))
1004 : return false;
1005 :
1006 13991 : if (gimple_assign_single_p (stmt))
1007 11992 : return ifcvt_can_use_mask_load_store (stmt);
1008 :
1009 1999 : tree callee;
1010 1999 : if (gimple_call_builtin_p (stmt))
1011 155 : if ((callee = gimple_call_fndecl (stmt))
1012 155 : && fndecl_built_in_p (callee, BUILT_IN_NORMAL))
1013 : {
1014 149 : auto ifn = associated_internal_fn (callee);
1015 149 : auto cond_ifn = get_conditional_internal_fn (ifn);
1016 149 : tree type = TREE_TYPE (gimple_call_fntype (stmt));
1017 149 : return (cond_ifn != IFN_LAST
1018 149 : && vectorized_internal_fn_supported_p (cond_ifn, type));
1019 : }
1020 :
1021 1850 : if (!is_gimple_assign (stmt))
1022 : return false;
1023 :
1024 1844 : tree_code code = gimple_assign_rhs_code (stmt);
1025 1844 : tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
1026 1844 : tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
1027 1844 : if (!types_compatible_p (lhs_type, rhs_type))
1028 : return false;
1029 1102 : internal_fn cond_fn = get_conditional_internal_fn (code);
1030 1102 : return (cond_fn != IFN_LAST
1031 1102 : && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
1032 : }
1033 :
1034 : /* Return true when STMT is if-convertible.
1035 :
1036 : GIMPLE_ASSIGN statement is not if-convertible if,
1037 : - it is not movable,
1038 : - it could trap,
1039 : - LHS is not var decl. */
1040 :
1041 : static bool
1042 77151 : if_convertible_gimple_assign_stmt_p (gimple *stmt,
1043 : vec<data_reference_p> refs)
1044 : {
1045 77151 : tree lhs = gimple_assign_lhs (stmt);
1046 :
1047 77151 : if (dump_file && (dump_flags & TDF_DETAILS))
1048 : {
1049 42 : fprintf (dump_file, "-------------------------\n");
1050 42 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1051 : }
1052 :
1053 77151 : if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1054 : return false;
1055 :
1056 : /* Some of these constrains might be too conservative. */
1057 76872 : if (stmt_ends_bb_p (stmt)
1058 76872 : || gimple_has_volatile_ops (stmt)
1059 76797 : || (TREE_CODE (lhs) == SSA_NAME
1060 72313 : && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1061 153669 : || gimple_has_side_effects (stmt))
1062 : {
1063 75 : if (dump_file && (dump_flags & TDF_DETAILS))
1064 0 : fprintf (dump_file, "stmt not suitable for ifcvt\n");
1065 75 : return false;
1066 : }
1067 :
1068 : /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because
1069 : in between if_convertible_loop_p and combine_blocks
1070 : we can perform loop versioning. */
1071 76797 : gimple_set_plf (stmt, GF_PLF_2, false);
1072 :
1073 76797 : if ((! gimple_vuse (stmt)
1074 20313 : || gimple_could_trap_p_1 (stmt, false, false)
1075 20313 : || ! ifcvt_memrefs_wont_trap (stmt, refs))
1076 89787 : && gimple_could_trap_p (stmt))
1077 : {
1078 13836 : if (ifcvt_can_predicate (stmt))
1079 : {
1080 2778 : gimple_set_plf (stmt, GF_PLF_2, true);
1081 2778 : need_to_predicate = true;
1082 2778 : return true;
1083 : }
1084 11058 : if (dump_file && (dump_flags & TDF_DETAILS))
1085 0 : fprintf (dump_file, "tree could trap...\n");
1086 11058 : return false;
1087 : }
1088 62961 : else if (gimple_needing_rewrite_undefined (stmt))
1089 : /* We have to rewrite stmts with undefined overflow. */
1090 28008 : need_to_rewrite_undefined = true;
1091 :
1092 : /* When if-converting stores force versioning, likewise if we
1093 : ended up generating store data races. */
1094 125922 : if (gimple_vdef (stmt))
1095 1762 : need_to_predicate = true;
1096 :
1097 : return true;
1098 : }
1099 :
1100 : /* Return true when SW switch statement is equivalent to cond, that
1101 : all non default labels point to the same label.
1102 :
1103 : Fallthrough is not checked for and could even happen
1104 : with cond (using goto), so is handled.
1105 :
1106 : This is intended for switches created by the if-switch-conversion
1107 : pass, but can handle some programmer supplied cases too. */
1108 :
1109 : static bool
1110 14 : if_convertible_switch_p (gswitch *sw)
1111 : {
1112 14 : if (gimple_switch_num_labels (sw) <= 1)
1113 : return false;
1114 14 : tree label = CASE_LABEL (gimple_switch_label (sw, 1));
1115 43 : for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++)
1116 : {
1117 29 : if (CASE_LABEL (gimple_switch_label (sw, i)) != label)
1118 : return false;
1119 : }
1120 : return true;
1121 : }
1122 :
1123 : /* Return true when STMT is an if-convertible SIMD clone stmts.
1124 :
1125 : A SIMD clone statement is if-convertible if:
1126 : - it is an GIMPLE_CALL,
1127 : - it has a FNDECL,
1128 : - it has SIMD clones,
1129 : - it has at least one inbranch clone. */
1130 : static bool
1131 1593 : if_convertible_simdclone_stmt_p (gimple *stmt)
1132 : {
1133 1593 : if (!is_gimple_call (stmt))
1134 : return false;
1135 :
1136 1593 : tree fndecl = gimple_call_fndecl (stmt);
1137 1593 : if (fndecl)
1138 : {
1139 : /* We can vectorize some builtins and functions with SIMD "inbranch"
1140 : clones. */
1141 1325 : struct cgraph_node *node = cgraph_node::get (fndecl);
1142 1325 : if (node && node->simd_clones != NULL)
1143 : /* Ensure that at least one clone can be "inbranch". */
1144 1962 : for (struct cgraph_node *n = node->simd_clones; n != NULL;
1145 961 : n = n->simdclone->next_clone)
1146 1960 : if (n->simdclone->inbranch)
1147 : return true;
1148 : }
1149 :
1150 : return false;
1151 : }
1152 :
1153 : /* Return true when STMT is if-convertible.
1154 :
1155 : A statement is if-convertible if:
1156 : - it is an if-convertible GIMPLE_ASSIGN,
1157 : - it is a GIMPLE_LABEL or a GIMPLE_COND,
1158 : - it is a switch equivalent to COND
1159 : - it is builtins call,
1160 : - it is a call to a function with a SIMD clone. */
1161 :
1162 : static bool
1163 174819 : if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1164 : {
1165 174819 : switch (gimple_code (stmt))
1166 : {
1167 : case GIMPLE_LABEL:
1168 : case GIMPLE_DEBUG:
1169 : case GIMPLE_COND:
1170 : return true;
1171 :
1172 14 : case GIMPLE_SWITCH:
1173 14 : return if_convertible_switch_p (as_a <gswitch *> (stmt));
1174 :
1175 77151 : case GIMPLE_ASSIGN:
1176 77151 : return if_convertible_gimple_assign_stmt_p (stmt, refs);
1177 :
1178 1485 : case GIMPLE_CALL:
1179 1485 : {
1180 : /* Check if stmt is a simd clone first. */
1181 1485 : if (if_convertible_simdclone_stmt_p (stmt))
1182 : {
1183 999 : gimple_set_plf (stmt, GF_PLF_2, true);
1184 999 : need_to_predicate = true;
1185 999 : return true;
1186 : }
1187 :
1188 : /* Check if the call can trap and if so require predication. */
1189 486 : if (gimple_could_trap_p (stmt))
1190 : {
1191 155 : if (ifcvt_can_predicate (stmt))
1192 : {
1193 108 : gimple_set_plf (stmt, GF_PLF_2, true);
1194 108 : need_to_predicate = true;
1195 108 : return true;
1196 : }
1197 : else
1198 : {
1199 47 : if (dump_file && (dump_flags & TDF_DETAILS))
1200 0 : fprintf (dump_file, "stmt could trap...\n");
1201 47 : return false;
1202 : }
1203 : }
1204 :
1205 : /* Check if it's a prefetch. Many ISAs contain vectorized and/or
1206 : conditional prefetches so if-convert should convert them or remove
1207 : them. Mark them as supported. */
1208 331 : if (gimple_call_builtin_p (stmt, BUILT_IN_PREFETCH))
1209 : return true;
1210 :
1211 : /* There are some IFN_s that are used to replace builtins but have the
1212 : same semantics. Even if MASK_CALL cannot handle them vectorable_call
1213 : will insert the proper selection, so do not block conversion. */
1214 329 : int flags = gimple_call_flags (stmt);
1215 329 : if ((flags & ECF_CONST)
1216 329 : && !(flags & ECF_LOOPING_CONST_OR_PURE)
1217 658 : && gimple_call_combined_fn (stmt) != CFN_LAST)
1218 : return true;
1219 :
1220 : return false;
1221 : }
1222 :
1223 0 : default:
1224 : /* Don't know what to do with 'em so don't do anything. */
1225 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1226 : {
1227 0 : fprintf (dump_file, "don't know what to do\n");
1228 0 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1229 : }
1230 : return false;
1231 : }
1232 : }
1233 :
1234 : /* Assumes that BB has more than 1 predecessors.
1235 : Returns false if at least one successor is not on critical edge
1236 : and true otherwise. */
1237 :
1238 : static inline bool
1239 81310 : all_preds_critical_p (basic_block bb)
1240 : {
1241 81310 : edge e;
1242 81310 : edge_iterator ei;
1243 :
1244 158861 : FOR_EACH_EDGE (e, ei, bb->preds)
1245 140992 : if (EDGE_COUNT (e->src->succs) == 1)
1246 : return false;
1247 : return true;
1248 : }
1249 :
1250 : /* Return true when BB is if-convertible. This routine does not check
1251 : basic block's statements and phis.
1252 :
1253 : A basic block is not if-convertible if:
1254 : - it is non-empty and it is after the exit block (in BFS order),
1255 : - it is after the exit block but before the latch,
1256 : - its edges are not normal.
1257 :
1258 : EXIT_BB is the basic block containing the exit of the LOOP. BB is
1259 : inside LOOP. */
1260 :
1261 : static bool
1262 229634 : if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1263 : {
1264 229634 : edge e;
1265 229634 : edge_iterator ei;
1266 :
1267 229634 : if (dump_file && (dump_flags & TDF_DETAILS))
1268 102 : fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1269 :
1270 229634 : if (EDGE_COUNT (bb->succs) > 2)
1271 : return false;
1272 :
1273 459140 : if (gcall *call = safe_dyn_cast <gcall *> (*gsi_last_bb (bb)))
1274 190 : if (gimple_call_ctrl_altering_p (call))
1275 : return false;
1276 :
1277 229570 : if (exit_bb)
1278 : {
1279 42399 : if (bb != loop->latch)
1280 : {
1281 528 : if (dump_file && (dump_flags & TDF_DETAILS))
1282 0 : fprintf (dump_file, "basic block after exit bb but before latch\n");
1283 528 : return false;
1284 : }
1285 41871 : else if (!empty_block_p (bb))
1286 : {
1287 1436 : if (dump_file && (dump_flags & TDF_DETAILS))
1288 0 : fprintf (dump_file, "non empty basic block after exit bb\n");
1289 1436 : return false;
1290 : }
1291 40435 : else if (bb == loop->latch
1292 40435 : && bb != exit_bb
1293 80870 : && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1294 : {
1295 11 : if (dump_file && (dump_flags & TDF_DETAILS))
1296 0 : fprintf (dump_file, "latch is not dominated by exit_block\n");
1297 11 : return false;
1298 : }
1299 : }
1300 :
1301 : /* Be less adventurous and handle only normal edges. */
1302 557100 : FOR_EACH_EDGE (e, ei, bb->succs)
1303 329515 : if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1304 : {
1305 10 : if (dump_file && (dump_flags & TDF_DETAILS))
1306 0 : fprintf (dump_file, "Difficult to handle edges\n");
1307 10 : return false;
1308 : }
1309 :
1310 : return true;
1311 : }
1312 :
1313 : /* Return true when all predecessor blocks of BB are visited. The
1314 : VISITED bitmap keeps track of the visited blocks. */
1315 :
1316 : static bool
1317 2309646 : pred_blocks_visited_p (basic_block bb, bitmap *visited)
1318 : {
1319 2309646 : edge e;
1320 2309646 : edge_iterator ei;
1321 3980298 : FOR_EACH_EDGE (e, ei, bb->preds)
1322 2618441 : if (!bitmap_bit_p (*visited, e->src->index))
1323 : return false;
1324 :
1325 : return true;
1326 : }
1327 :
1328 : /* Get body of a LOOP in suitable order for if-conversion. It is
1329 : caller's responsibility to deallocate basic block list.
1330 : If-conversion suitable order is, breadth first sort (BFS) order
1331 : with an additional constraint: select a block only if all its
1332 : predecessors are already selected. */
1333 :
1334 : static basic_block *
1335 402272 : get_loop_body_in_if_conv_order (const class loop *loop)
1336 : {
1337 402272 : basic_block *blocks, *blocks_in_bfs_order;
1338 402272 : basic_block bb;
1339 402272 : bitmap visited;
1340 402272 : unsigned int index = 0;
1341 402272 : unsigned int visited_count = 0;
1342 :
1343 402272 : gcc_assert (loop->num_nodes);
1344 402272 : gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1345 :
1346 402272 : blocks = XCNEWVEC (basic_block, loop->num_nodes);
1347 402272 : visited = BITMAP_ALLOC (NULL);
1348 :
1349 402272 : blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1350 :
1351 402272 : index = 0;
1352 4006822 : while (index < loop->num_nodes)
1353 : {
1354 3202330 : bb = blocks_in_bfs_order [index];
1355 :
1356 3202330 : if (bb->flags & BB_IRREDUCIBLE_LOOP)
1357 : {
1358 52 : free (blocks_in_bfs_order);
1359 52 : BITMAP_FREE (visited);
1360 52 : free (blocks);
1361 52 : return NULL;
1362 : }
1363 :
1364 3202278 : if (!bitmap_bit_p (visited, bb->index))
1365 : {
1366 2309646 : if (pred_blocks_visited_p (bb, &visited)
1367 2309646 : || bb == loop->header)
1368 : {
1369 : /* This block is now visited. */
1370 1764129 : bitmap_set_bit (visited, bb->index);
1371 1764129 : blocks[visited_count++] = bb;
1372 : }
1373 : }
1374 :
1375 3202278 : index++;
1376 :
1377 3202278 : if (index == loop->num_nodes
1378 462481 : && visited_count != loop->num_nodes)
1379 : /* Not done yet. */
1380 3202278 : index = 0;
1381 : }
1382 402220 : free (blocks_in_bfs_order);
1383 402220 : BITMAP_FREE (visited);
1384 :
1385 : /* Go through loop and reject if-conversion or lowering of bitfields if we
1386 : encounter statements we do not believe the vectorizer will be able to
1387 : handle. If adding a new type of statement here, make sure
1388 : 'ifcvt_local_dce' is also able to handle it propertly. */
1389 2155084 : for (index = 0; index < loop->num_nodes; index++)
1390 : {
1391 1755389 : basic_block bb = blocks[index];
1392 1755389 : gimple_stmt_iterator gsi;
1393 :
1394 1755389 : bool may_have_nonlocal_labels
1395 1755389 : = bb_with_exit_edge_p (loop, bb) || bb == loop->latch;
1396 16055219 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1397 12546966 : switch (gimple_code (gsi_stmt (gsi)))
1398 : {
1399 40337 : case GIMPLE_LABEL:
1400 40337 : if (!may_have_nonlocal_labels)
1401 : {
1402 6782 : tree label
1403 6782 : = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi)));
1404 13564 : if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1405 : {
1406 40 : free (blocks);
1407 40 : return NULL;
1408 : }
1409 : }
1410 : /* Fallthru. */
1411 12544441 : case GIMPLE_ASSIGN:
1412 12544441 : case GIMPLE_CALL:
1413 12544441 : case GIMPLE_DEBUG:
1414 12544441 : case GIMPLE_COND:
1415 12544441 : case GIMPLE_SWITCH:
1416 12544441 : gimple_set_uid (gsi_stmt (gsi), 0);
1417 12544441 : break;
1418 2485 : default:
1419 2485 : free (blocks);
1420 2485 : return NULL;
1421 : }
1422 : }
1423 : return blocks;
1424 : }
1425 :
1426 : /* Returns true when the analysis of the predicates for all the basic
1427 : blocks in LOOP succeeded.
1428 :
1429 : predicate_bbs first allocates the predicates of the basic blocks.
1430 : These fields are then initialized with the tree expressions
1431 : representing the predicates under which a basic block is executed
1432 : in the LOOP. As the loop->header is executed at each iteration, it
1433 : has the "true" predicate. Other statements executed under a
1434 : condition are predicated with that condition, for example
1435 :
1436 : | if (x)
1437 : | S1;
1438 : | else
1439 : | S2;
1440 :
1441 : S1 will be predicated with "x", and
1442 : S2 will be predicated with "!x". */
1443 :
1444 : static void
1445 40424 : predicate_bbs (loop_p loop)
1446 : {
1447 40424 : unsigned int i;
1448 :
1449 260512 : for (i = 0; i < loop->num_nodes; i++)
1450 220088 : init_bb_predicate (ifc_bbs[i]);
1451 :
1452 260512 : for (i = 0; i < loop->num_nodes; i++)
1453 : {
1454 220088 : basic_block bb = ifc_bbs[i];
1455 220088 : tree cond;
1456 :
1457 : /* The loop latch and loop exit block are always executed and
1458 : have no extra conditions to be processed: skip them. */
1459 300936 : if (bb == loop->latch
1460 220088 : || bb_with_exit_edge_p (loop, bb))
1461 : {
1462 80848 : reset_bb_predicate (bb);
1463 80848 : continue;
1464 : }
1465 :
1466 139240 : cond = bb_predicate (bb);
1467 278480 : if (gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb)))
1468 : {
1469 57030 : tree c2;
1470 57030 : edge true_edge, false_edge;
1471 57030 : location_t loc = gimple_location (stmt);
1472 57030 : tree c;
1473 : /* gcc.dg/fold-bopcond-1.c shows that despite all forwprop passes
1474 : conditions can remain unfolded because of multiple uses so
1475 : try to re-fold here, especially to get precision changing
1476 : conversions sorted out. Do not simply fold the stmt since
1477 : this is analysis only. When conditions were embedded in
1478 : COND_EXPRs those were folded separately before folding the
1479 : COND_EXPR but as they are now outside we have to make sure
1480 : to fold them. Do it here - another opportunity would be to
1481 : fold predicates as they are inserted. */
1482 57030 : gimple_match_op cexpr (gimple_match_cond::UNCOND,
1483 57030 : gimple_cond_code (stmt),
1484 : boolean_type_node,
1485 : gimple_cond_lhs (stmt),
1486 57030 : gimple_cond_rhs (stmt));
1487 57030 : if (cexpr.resimplify (NULL, follow_all_ssa_edges)
1488 3972 : && cexpr.code.is_tree_code ()
1489 61002 : && TREE_CODE_CLASS ((tree_code)cexpr.code) == tcc_comparison)
1490 575 : c = build2_loc (loc, (tree_code)cexpr.code, boolean_type_node,
1491 : cexpr.ops[0], cexpr.ops[1]);
1492 : else
1493 56455 : c = build2_loc (loc, gimple_cond_code (stmt),
1494 : boolean_type_node,
1495 : gimple_cond_lhs (stmt),
1496 : gimple_cond_rhs (stmt));
1497 :
1498 : /* Add new condition into destination's predicate list. */
1499 57030 : extract_true_false_edges_from_block (gimple_bb (stmt),
1500 : &true_edge, &false_edge);
1501 :
1502 : /* If C is true, then TRUE_EDGE is taken. */
1503 57030 : add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1504 : unshare_expr (c));
1505 :
1506 : /* If C is false, then FALSE_EDGE is taken. */
1507 57030 : c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1508 : unshare_expr (c));
1509 57030 : add_to_dst_predicate_list (loop, false_edge,
1510 : unshare_expr (cond), c2);
1511 :
1512 57030 : cond = NULL_TREE;
1513 : }
1514 :
1515 : /* Assumes the limited COND like switches checked for earlier. */
1516 82210 : else if (gswitch *sw = safe_dyn_cast <gswitch *> (*gsi_last_bb (bb)))
1517 : {
1518 71 : location_t loc = gimple_location (*gsi_last_bb (bb));
1519 :
1520 71 : tree default_label = CASE_LABEL (gimple_switch_default_label (sw));
1521 71 : tree cond_label = CASE_LABEL (gimple_switch_label (sw, 1));
1522 :
1523 71 : edge false_edge = find_edge (bb, label_to_block (cfun, default_label));
1524 71 : edge true_edge = find_edge (bb, label_to_block (cfun, cond_label));
1525 :
1526 : /* Create chain of switch tests for each case. */
1527 71 : tree switch_cond = NULL_TREE;
1528 71 : tree index = gimple_switch_index (sw);
1529 272 : for (unsigned i = 1; i < gimple_switch_num_labels (sw); i++)
1530 : {
1531 201 : tree label = gimple_switch_label (sw, i);
1532 201 : tree case_cond;
1533 201 : if (CASE_HIGH (label))
1534 : {
1535 7 : tree low = build2_loc (loc, GE_EXPR,
1536 : boolean_type_node,
1537 7 : index, fold_convert_loc (loc, TREE_TYPE (index),
1538 7 : CASE_LOW (label)));
1539 14 : tree high = build2_loc (loc, LE_EXPR,
1540 : boolean_type_node,
1541 7 : index, fold_convert_loc (loc, TREE_TYPE (index),
1542 7 : CASE_HIGH (label)));
1543 7 : case_cond = build2_loc (loc, TRUTH_AND_EXPR,
1544 : boolean_type_node,
1545 : low, high);
1546 : }
1547 : else
1548 194 : case_cond = build2_loc (loc, EQ_EXPR,
1549 : boolean_type_node,
1550 : index,
1551 194 : fold_convert_loc (loc, TREE_TYPE (index),
1552 194 : CASE_LOW (label)));
1553 201 : if (i > 1)
1554 130 : switch_cond = build2_loc (loc, TRUTH_OR_EXPR,
1555 : boolean_type_node,
1556 : case_cond, switch_cond);
1557 : else
1558 : switch_cond = case_cond;
1559 : }
1560 :
1561 71 : add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1562 : unshare_expr (switch_cond));
1563 71 : switch_cond = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1564 : unshare_expr (switch_cond));
1565 71 : add_to_dst_predicate_list (loop, false_edge,
1566 : unshare_expr (cond), switch_cond);
1567 71 : cond = NULL_TREE;
1568 : }
1569 :
1570 : /* If current bb has only one successor, then consider it as an
1571 : unconditional goto. */
1572 302227 : if (single_succ_p (bb))
1573 : {
1574 82139 : basic_block bb_n = single_succ (bb);
1575 :
1576 : /* The successor bb inherits the predicate of its
1577 : predecessor. If there is no predicate in the predecessor
1578 : bb, then consider the successor bb as always executed. */
1579 82139 : if (cond == NULL_TREE)
1580 0 : cond = boolean_true_node;
1581 :
1582 82139 : add_to_predicate_list (loop, bb_n, cond);
1583 : }
1584 : }
1585 :
1586 : /* The loop header is always executed. */
1587 40424 : reset_bb_predicate (loop->header);
1588 40424 : gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1589 : && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1590 40424 : }
1591 :
1592 : /* Build region by adding loop pre-header and post-header blocks. */
1593 :
1594 : static vec<basic_block>
1595 40424 : build_region (class loop *loop)
1596 : {
1597 40424 : vec<basic_block> region = vNULL;
1598 40424 : basic_block exit_bb = NULL;
1599 :
1600 40424 : gcc_assert (ifc_bbs);
1601 : /* The first element is loop pre-header. */
1602 40424 : region.safe_push (loop_preheader_edge (loop)->src);
1603 :
1604 260512 : for (unsigned int i = 0; i < loop->num_nodes; i++)
1605 : {
1606 220088 : basic_block bb = ifc_bbs[i];
1607 220088 : region.safe_push (bb);
1608 : /* Find loop postheader. */
1609 220088 : edge e;
1610 220088 : edge_iterator ei;
1611 488703 : FOR_EACH_EDGE (e, ei, bb->succs)
1612 309039 : if (loop_exit_edge_p (loop, e))
1613 : {
1614 40424 : exit_bb = e->dest;
1615 40424 : break;
1616 : }
1617 : }
1618 : /* The last element is loop post-header. */
1619 40424 : gcc_assert (exit_bb);
1620 40424 : region.safe_push (exit_bb);
1621 40424 : return region;
1622 : }
1623 :
1624 : /* Return true when LOOP is if-convertible. This is a helper function
1625 : for if_convertible_loop_p. REFS and DDRS are initialized and freed
1626 : in if_convertible_loop_p. */
1627 :
1628 : static bool
1629 42473 : if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1630 : {
1631 42473 : unsigned int i;
1632 42473 : basic_block exit_bb = NULL;
1633 42473 : vec<basic_block> region;
1634 :
1635 42473 : calculate_dominance_info (CDI_DOMINATORS);
1636 :
1637 270058 : for (i = 0; i < loop->num_nodes; i++)
1638 : {
1639 229634 : basic_block bb = ifc_bbs[i];
1640 :
1641 229634 : if (!if_convertible_bb_p (loop, bb, exit_bb))
1642 : return false;
1643 :
1644 227585 : if (bb_with_exit_edge_p (loop, bb))
1645 42399 : exit_bb = bb;
1646 : }
1647 :
1648 40424 : data_reference_p dr;
1649 :
1650 40424 : innermost_DR_map
1651 40424 : = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1652 40424 : baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1653 :
1654 : /* Compute post-dominator tree locally. */
1655 40424 : region = build_region (loop);
1656 40424 : calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1657 :
1658 40424 : predicate_bbs (loop);
1659 :
1660 : /* Free post-dominator tree since it is not used after predication. */
1661 40424 : free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1662 40424 : region.release ();
1663 :
1664 179289 : for (i = 0; refs->iterate (i, &dr); i++)
1665 : {
1666 138865 : tree ref = DR_REF (dr);
1667 :
1668 138865 : dr->aux = XNEW (struct ifc_dr);
1669 138865 : DR_BASE_W_UNCONDITIONALLY (dr) = false;
1670 138865 : DR_RW_UNCONDITIONALLY (dr) = false;
1671 138865 : DR_W_UNCONDITIONALLY (dr) = false;
1672 138865 : IFC_DR (dr)->rw_predicate = boolean_false_node;
1673 138865 : IFC_DR (dr)->w_predicate = boolean_false_node;
1674 138865 : IFC_DR (dr)->base_w_predicate = boolean_false_node;
1675 138865 : if (gimple_uid (DR_STMT (dr)) == 0)
1676 138188 : gimple_set_uid (DR_STMT (dr), i + 1);
1677 :
1678 : /* If DR doesn't have innermost loop behavior or it's a compound
1679 : memory reference, we synthesize its innermost loop behavior
1680 : for hashing. */
1681 138865 : if (TREE_CODE (ref) == COMPONENT_REF
1682 : || TREE_CODE (ref) == IMAGPART_EXPR
1683 : || TREE_CODE (ref) == REALPART_EXPR
1684 90147 : || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1685 26353 : || DR_INIT (dr) || DR_STEP (dr)))
1686 : {
1687 131948 : while (TREE_CODE (ref) == COMPONENT_REF
1688 76230 : || TREE_CODE (ref) == IMAGPART_EXPR
1689 207600 : || TREE_CODE (ref) == REALPART_EXPR)
1690 56877 : ref = TREE_OPERAND (ref, 0);
1691 :
1692 75071 : memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1693 75071 : DR_BASE_ADDRESS (dr) = ref;
1694 : }
1695 138865 : hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1696 : }
1697 :
1698 203479 : for (i = 0; i < loop->num_nodes; i++)
1699 : {
1700 174528 : basic_block bb = ifc_bbs[i];
1701 174528 : gimple_stmt_iterator itr;
1702 :
1703 : /* Check the if-convertibility of statements in predicated BBs. */
1704 174528 : if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1705 307292 : for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1706 174819 : if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1707 13522 : return false;
1708 : }
1709 :
1710 : /* Checking PHIs needs to be done after stmts, as the fact whether there
1711 : are any masked loads or stores affects the tests. */
1712 177415 : for (i = 0; i < loop->num_nodes; i++)
1713 : {
1714 148464 : basic_block bb = ifc_bbs[i];
1715 148464 : gphi_iterator itr;
1716 :
1717 287168 : for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1718 138704 : if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1719 : return false;
1720 : }
1721 :
1722 28951 : if (dump_file)
1723 33 : fprintf (dump_file, "Applying if-conversion\n");
1724 :
1725 : return true;
1726 : }
1727 :
1728 : /* Return true when LOOP is if-convertible.
1729 : LOOP is if-convertible if:
1730 : - it is innermost,
1731 : - it has two or more basic blocks,
1732 : - it has only one exit,
1733 : - loop header is not the exit edge,
1734 : - if its basic blocks and phi nodes are if convertible. */
1735 :
1736 : static bool
1737 42614 : if_convertible_loop_p (class loop *loop, vec<data_reference_p> *refs)
1738 : {
1739 42614 : edge e;
1740 42614 : edge_iterator ei;
1741 42614 : bool res = false;
1742 :
1743 : /* Handle only innermost loop. */
1744 42614 : if (!loop || loop->inner)
1745 : {
1746 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1747 0 : fprintf (dump_file, "not innermost loop\n");
1748 0 : return false;
1749 : }
1750 :
1751 : /* If only one block, no need for if-conversion. */
1752 42614 : if (loop->num_nodes <= 2)
1753 : {
1754 0 : if (dump_file && (dump_flags & TDF_DETAILS))
1755 0 : fprintf (dump_file, "less than 2 basic blocks\n");
1756 0 : return false;
1757 : }
1758 :
1759 : /* If one of the loop header's edge is an exit edge then do not
1760 : apply if-conversion. */
1761 127724 : FOR_EACH_EDGE (e, ei, loop->header->succs)
1762 85251 : if (loop_exit_edge_p (loop, e))
1763 : return false;
1764 :
1765 42473 : res = if_convertible_loop_p_1 (loop, refs);
1766 :
1767 82897 : delete innermost_DR_map;
1768 42473 : innermost_DR_map = NULL;
1769 :
1770 82897 : delete baseref_DR_map;
1771 42473 : baseref_DR_map = NULL;
1772 :
1773 42473 : return res;
1774 : }
1775 :
1776 : /* Return reduc_1 if has_nop.
1777 :
1778 : if (...)
1779 : tmp1 = (unsigned type) reduc_1;
1780 : tmp2 = tmp1 + rhs2;
1781 : reduc_3 = (signed type) tmp2. */
1782 : static tree
1783 11188 : strip_nop_cond_scalar_reduction (bool has_nop, tree op)
1784 : {
1785 11188 : if (!has_nop)
1786 : return op;
1787 :
1788 414 : if (TREE_CODE (op) != SSA_NAME)
1789 : return NULL_TREE;
1790 :
1791 368 : gassign *stmt = safe_dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
1792 368 : if (!stmt
1793 368 : || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
1794 230 : || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE
1795 : (gimple_assign_rhs1 (stmt))))
1796 149 : return NULL_TREE;
1797 :
1798 219 : return gimple_assign_rhs1 (stmt);
1799 : }
1800 :
1801 : /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1802 : which is in predicated basic block.
1803 : In fact, the following PHI pattern is searching:
1804 : loop-header:
1805 : reduc_1 = PHI <..., reduc_2>
1806 : ...
1807 : if (...)
1808 : reduc_3 = ...
1809 : reduc_2 = PHI <reduc_1, reduc_3>
1810 :
1811 : ARG_0 and ARG_1 are correspondent PHI arguments.
1812 : REDUC, OP0 and OP1 contain reduction stmt and its operands.
1813 : EXTENDED is true if PHI has > 2 arguments. */
1814 :
1815 : static bool
1816 50244 : is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1817 : tree *op0, tree *op1, bool extended, bool* has_nop,
1818 : gimple **nop_reduc)
1819 : {
1820 50244 : tree lhs, r_op1, r_op2, r_nop1, r_nop2;
1821 50244 : gimple *stmt;
1822 50244 : gimple *header_phi = NULL;
1823 50244 : enum tree_code reduction_op;
1824 50244 : basic_block bb = gimple_bb (phi);
1825 50244 : class loop *loop = bb->loop_father;
1826 50244 : edge latch_e = loop_latch_edge (loop);
1827 50244 : imm_use_iterator imm_iter;
1828 50244 : use_operand_p use_p;
1829 50244 : edge e;
1830 50244 : edge_iterator ei;
1831 50244 : bool result = *has_nop = false;
1832 50244 : if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1833 : return false;
1834 :
1835 33711 : if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1836 : {
1837 8390 : lhs = arg_1;
1838 8390 : header_phi = SSA_NAME_DEF_STMT (arg_0);
1839 8390 : stmt = SSA_NAME_DEF_STMT (arg_1);
1840 : }
1841 25321 : else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1842 : {
1843 11191 : lhs = arg_0;
1844 11191 : header_phi = SSA_NAME_DEF_STMT (arg_1);
1845 11191 : stmt = SSA_NAME_DEF_STMT (arg_0);
1846 : }
1847 : else
1848 : return false;
1849 19581 : if (gimple_bb (header_phi) != loop->header)
1850 : return false;
1851 :
1852 18691 : if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1853 : return false;
1854 :
1855 12680 : if (gimple_code (stmt) != GIMPLE_ASSIGN
1856 12680 : || gimple_has_volatile_ops (stmt))
1857 : return false;
1858 :
1859 12550 : if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1860 : return false;
1861 :
1862 12461 : if (!is_predicated (gimple_bb (stmt)))
1863 : return false;
1864 :
1865 : /* Check that stmt-block is predecessor of phi-block. */
1866 10334 : FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1867 10270 : if (e->dest == bb)
1868 : {
1869 : result = true;
1870 : break;
1871 : }
1872 10206 : if (!result)
1873 : return false;
1874 :
1875 10142 : if (!has_single_use (lhs))
1876 : return false;
1877 :
1878 10091 : reduction_op = gimple_assign_rhs_code (stmt);
1879 :
1880 : /* Catch something like below
1881 :
1882 : loop-header:
1883 : reduc_1 = PHI <..., reduc_2>
1884 : ...
1885 : if (...)
1886 : tmp1 = (unsigned type) reduc_1;
1887 : tmp2 = tmp1 + rhs2;
1888 : reduc_3 = (signed type) tmp2;
1889 :
1890 : reduc_2 = PHI <reduc_1, reduc_3>
1891 :
1892 : and convert to
1893 :
1894 : reduc_2 = PHI <0, reduc_1>
1895 : tmp1 = (unsigned type)reduc_1;
1896 : ifcvt = cond_expr ? rhs2 : 0
1897 : tmp2 = tmp1 +/- ifcvt;
1898 : reduc_1 = (signed type)tmp2; */
1899 :
1900 10091 : if (CONVERT_EXPR_CODE_P (reduction_op))
1901 : {
1902 413 : lhs = gimple_assign_rhs1 (stmt);
1903 413 : if (TREE_CODE (lhs) != SSA_NAME
1904 413 : || !has_single_use (lhs))
1905 : return false;
1906 :
1907 224 : *nop_reduc = stmt;
1908 224 : stmt = SSA_NAME_DEF_STMT (lhs);
1909 224 : if (gimple_bb (stmt) != gimple_bb (*nop_reduc)
1910 224 : || !is_gimple_assign (stmt))
1911 : return false;
1912 :
1913 221 : *has_nop = true;
1914 221 : reduction_op = gimple_assign_rhs_code (stmt);
1915 : }
1916 :
1917 9899 : if (reduction_op != PLUS_EXPR
1918 : && reduction_op != MINUS_EXPR
1919 9899 : && reduction_op != MULT_EXPR
1920 9899 : && reduction_op != BIT_IOR_EXPR
1921 : && reduction_op != BIT_XOR_EXPR
1922 4419 : && reduction_op != BIT_AND_EXPR)
1923 : return false;
1924 5594 : r_op1 = gimple_assign_rhs1 (stmt);
1925 5594 : r_op2 = gimple_assign_rhs2 (stmt);
1926 :
1927 5594 : r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1);
1928 5594 : r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2);
1929 :
1930 : /* Make R_OP1 to hold reduction variable. */
1931 5594 : if (r_nop2 == PHI_RESULT (header_phi)
1932 5594 : && commutative_tree_code (reduction_op))
1933 : {
1934 : std::swap (r_op1, r_op2);
1935 : std::swap (r_nop1, r_nop2);
1936 : }
1937 4487 : else if (r_nop1 != PHI_RESULT (header_phi))
1938 : return false;
1939 :
1940 5283 : if (*has_nop)
1941 : {
1942 : /* Check that R_NOP1 is used in nop_stmt or in PHI only. */
1943 598 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1)
1944 : {
1945 304 : gimple *use_stmt = USE_STMT (use_p);
1946 304 : if (is_gimple_debug (use_stmt))
1947 0 : continue;
1948 304 : if (use_stmt == SSA_NAME_DEF_STMT (r_op1))
1949 126 : continue;
1950 178 : if (use_stmt != phi)
1951 42 : return false;
1952 168 : }
1953 : }
1954 :
1955 : /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1956 21548 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1957 : {
1958 11275 : gimple *use_stmt = USE_STMT (use_p);
1959 11275 : if (is_gimple_debug (use_stmt))
1960 29 : continue;
1961 11246 : if (use_stmt == stmt)
1962 5057 : continue;
1963 6189 : if (gimple_code (use_stmt) != GIMPLE_PHI)
1964 209 : return false;
1965 209 : }
1966 :
1967 5032 : *op0 = r_op1; *op1 = r_op2;
1968 5032 : *reduc = stmt;
1969 5032 : return true;
1970 : }
1971 :
1972 : /* Converts conditional scalar reduction into unconditional form, e.g.
1973 : bb_4
1974 : if (_5 != 0) goto bb_5 else goto bb_6
1975 : end_bb_4
1976 : bb_5
1977 : res_6 = res_13 + 1;
1978 : end_bb_5
1979 : bb_6
1980 : # res_2 = PHI <res_13(4), res_6(5)>
1981 : end_bb_6
1982 :
1983 : will be converted into sequence
1984 : _ifc__1 = _5 != 0 ? 1 : 0;
1985 : res_2 = res_13 + _ifc__1;
1986 : Argument SWAP tells that arguments of conditional expression should be
1987 : swapped.
1988 : If LOOP_VERSIONED is true if we assume that we versioned the loop for
1989 : vectorization. In that case we can create a COND_OP.
1990 : Returns rhs of resulting PHI assignment. */
1991 :
1992 : static tree
1993 5032 : convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1994 : tree cond, tree op0, tree op1, bool swap,
1995 : bool has_nop, gimple* nop_reduc,
1996 : bool loop_versioned)
1997 : {
1998 5032 : gimple_stmt_iterator stmt_it;
1999 5032 : gimple *new_assign;
2000 5032 : tree rhs;
2001 5032 : tree rhs1 = gimple_assign_rhs1 (reduc);
2002 5032 : tree lhs = gimple_assign_lhs (reduc);
2003 5032 : tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
2004 5032 : tree c;
2005 5032 : enum tree_code reduction_op = gimple_assign_rhs_code (reduc);
2006 5032 : tree op_nochange = neutral_op_for_reduction (TREE_TYPE (rhs1), reduction_op,
2007 : NULL, false);
2008 5032 : gimple_seq stmts = NULL;
2009 :
2010 5032 : if (dump_file && (dump_flags & TDF_DETAILS))
2011 : {
2012 2 : fprintf (dump_file, "Found cond scalar reduction.\n");
2013 2 : print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
2014 : }
2015 :
2016 : /* If possible create a COND_OP instead of a COND_EXPR and an OP_EXPR.
2017 : The COND_OP will have a neutral_op else value. */
2018 5032 : internal_fn ifn;
2019 5032 : ifn = get_conditional_internal_fn (reduction_op);
2020 5032 : if (loop_versioned && ifn != IFN_LAST
2021 5030 : && vectorized_internal_fn_supported_p (ifn, TREE_TYPE (lhs))
2022 1363 : && !VECTOR_TYPE_P (TREE_TYPE (lhs))
2023 6395 : && !swap)
2024 : {
2025 1343 : gcall *cond_call = gimple_build_call_internal (ifn, 4,
2026 : unshare_expr (cond),
2027 : op0, op1, op0);
2028 1343 : gsi_insert_before (gsi, cond_call, GSI_SAME_STMT);
2029 1343 : gimple_call_set_lhs (cond_call, tmp);
2030 : rhs = tmp;
2031 : }
2032 : else
2033 : {
2034 : /* Build cond expression using COND and constant operand
2035 : of reduction rhs. */
2036 7121 : c = fold_build_cond_expr (TREE_TYPE (rhs1),
2037 : unshare_expr (cond),
2038 : swap ? op_nochange : op1,
2039 : swap ? op1 : op_nochange);
2040 : /* Create assignment stmt and insert it at GSI. */
2041 3689 : new_assign = gimple_build_assign (tmp, c);
2042 3689 : gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
2043 : /* Build rhs for unconditional increment/decrement/logic_operation. */
2044 3689 : rhs = gimple_build (&stmts, reduction_op,
2045 3689 : TREE_TYPE (rhs1), op0, tmp);
2046 : }
2047 :
2048 5032 : if (has_nop)
2049 : {
2050 126 : rhs = gimple_convert (&stmts,
2051 126 : TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs);
2052 126 : stmt_it = gsi_for_stmt (nop_reduc);
2053 126 : gsi_remove (&stmt_it, true);
2054 126 : release_defs (nop_reduc);
2055 : }
2056 5032 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
2057 :
2058 : /* Delete original reduction stmt. */
2059 5032 : stmt_it = gsi_for_stmt (reduc);
2060 5032 : gsi_remove (&stmt_it, true);
2061 5032 : release_defs (reduc);
2062 5032 : return rhs;
2063 : }
2064 :
2065 : /* Generate a simplified conditional. */
2066 :
2067 : static tree
2068 56700 : gen_simplified_condition (tree cond, scalar_cond_masked_set_type &cond_set)
2069 : {
2070 : /* Check if the value is already live in a previous branch. This resolves
2071 : nested conditionals from diamond PHI reductions. */
2072 56700 : if (TREE_CODE (cond) == SSA_NAME)
2073 : {
2074 56692 : gimple *stmt = SSA_NAME_DEF_STMT (cond);
2075 56692 : gassign *assign = NULL;
2076 56692 : if ((assign = as_a <gassign *> (stmt))
2077 56692 : && gimple_assign_rhs_code (assign) == BIT_AND_EXPR)
2078 : {
2079 3643 : tree arg1 = gimple_assign_rhs1 (assign);
2080 3643 : tree arg2 = gimple_assign_rhs2 (assign);
2081 3643 : if (cond_set.contains ({ arg1, 1 }))
2082 150 : arg1 = boolean_true_node;
2083 : else
2084 3493 : arg1 = gen_simplified_condition (arg1, cond_set);
2085 :
2086 3643 : if (cond_set.contains ({ arg2, 1 }))
2087 1787 : arg2 = boolean_true_node;
2088 : else
2089 1856 : arg2 = gen_simplified_condition (arg2, cond_set);
2090 :
2091 3643 : cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2);
2092 : }
2093 : }
2094 56700 : return cond;
2095 : }
2096 :
2097 : /* Structure used to track meta-data on PHI arguments used to generate
2098 : most efficient comparison sequence to slatten a PHI node. */
2099 :
2100 : typedef struct ifcvt_arg_entry
2101 : {
2102 : /* The PHI node argument value. */
2103 : tree arg;
2104 :
2105 : /* The number of compares required to reach this PHI node from start of the
2106 : BB being if-converted. */
2107 : unsigned num_compares;
2108 :
2109 : /* The number of times this PHI node argument appears in the current PHI
2110 : node. */
2111 : unsigned occurs;
2112 :
2113 : /* The indices at which this PHI arg occurs inside the PHI node. */
2114 : vec <int> *indexes;
2115 : } ifcvt_arg_entry_t;
2116 :
2117 : /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
2118 : as to whether the condition is inverted. */
2119 :
2120 : static tree
2121 4265 : gen_phi_arg_condition (gphi *phi, ifcvt_arg_entry_t &arg,
2122 : gimple_stmt_iterator *gsi,
2123 : scalar_cond_masked_set_type &cond_set, bool *invert)
2124 : {
2125 4265 : int len;
2126 4265 : int i;
2127 4265 : tree cond = NULL_TREE;
2128 4265 : tree c;
2129 4265 : edge e;
2130 :
2131 4265 : *invert = false;
2132 4265 : len = arg.indexes->length ();
2133 4265 : gcc_assert (len > 0);
2134 8602 : for (i = 0; i < len; i++)
2135 : {
2136 4337 : e = gimple_phi_arg_edge (phi, (*arg.indexes)[i]);
2137 4337 : c = bb_predicate (e->src);
2138 4337 : if (is_true_predicate (c))
2139 : {
2140 0 : cond = c;
2141 0 : break;
2142 : }
2143 : /* If we have just a single inverted predicate, signal that and
2144 : instead invert the COND_EXPR arms. */
2145 4337 : if (len == 1 && TREE_CODE (c) == TRUTH_NOT_EXPR)
2146 : {
2147 84 : c = TREE_OPERAND (c, 0);
2148 84 : *invert = true;
2149 : }
2150 :
2151 4337 : c = gen_simplified_condition (c, cond_set);
2152 4337 : c = force_gimple_operand_gsi (gsi, unshare_expr (c),
2153 : true, NULL_TREE, true, GSI_SAME_STMT);
2154 4337 : if (cond != NULL_TREE)
2155 : {
2156 : /* Must build OR expression. */
2157 72 : cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
2158 72 : cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2159 : NULL_TREE, true, GSI_SAME_STMT);
2160 : }
2161 : else
2162 : cond = c;
2163 :
2164 : /* Register the new possibly simplified conditional. When more than 2
2165 : entries in a phi node we chain entries in the false branch, so the
2166 : inverted condition is active. */
2167 4337 : scalar_cond_masked_key pred_cond ({ cond, 1 });
2168 4337 : if (!*invert)
2169 4253 : pred_cond.inverted_p = !pred_cond.inverted_p;
2170 4337 : cond_set.add (pred_cond);
2171 : }
2172 4265 : gcc_assert (cond != NULL_TREE);
2173 4265 : return cond;
2174 : }
2175 :
2176 : /* Find the operand which is different between ARG0_OP and ARG1_OP.
2177 : Returns the operand num where the difference is.
2178 : Set NEWARG0 and NEWARG1 from the different argument.
2179 : Returns -1 if none is found.
2180 : If ARG0_OP/ARG1_OP is commutative also try swapping the
2181 : two commutative operands and return the operand number where
2182 : the difference happens in ARG0_OP. */
2183 :
2184 : static int
2185 1121 : find_different_opnum (const gimple_match_op &arg0_op,
2186 : const gimple_match_op &arg1_op,
2187 : tree *new_arg0, tree *new_arg1)
2188 : {
2189 1121 : unsigned opnum = -1;
2190 1121 : unsigned first;
2191 1121 : first = first_commutative_argument (arg1_op.code, arg1_op.type);
2192 3048 : for (unsigned i = 0; i < arg0_op.num_ops; i++)
2193 : {
2194 2254 : if (!operand_equal_for_phi_arg_p (arg0_op.ops[i],
2195 2254 : arg1_op.ops[i]))
2196 : {
2197 : /* Can handle only one non equal operand. */
2198 1448 : if (opnum != -1u)
2199 : {
2200 : /* Though if opnum is right before i and opnum is equal
2201 : to the first communtative argument, handle communtative
2202 : specially. */
2203 327 : if (i == opnum + 1 && opnum == first)
2204 272 : goto commutative;
2205 : return -1;
2206 : }
2207 : opnum = i;
2208 : }
2209 : }
2210 : /* If all operands are equal only do this is there was single
2211 : operand. */
2212 794 : if (opnum == -1u)
2213 : {
2214 0 : if (arg0_op.num_ops != 1)
2215 : return -1;
2216 : opnum = 0;
2217 : }
2218 794 : *new_arg0 = arg0_op.ops[opnum];
2219 794 : *new_arg1 = arg1_op.ops[opnum];
2220 794 : return opnum;
2221 :
2222 : /* Handle commutative operations. */
2223 272 : commutative:
2224 272 : gcc_assert (first != -1u);
2225 :
2226 : /* Check the rest of the arguments to make sure they are the same. */
2227 272 : for (unsigned i = first + 2; i < arg0_op.num_ops; i++)
2228 0 : if (!operand_equal_for_phi_arg_p (arg0_op.ops[i],
2229 0 : arg1_op.ops[i]))
2230 : return -1;
2231 :
2232 : /* If the arg0[first+1] and arg1[first] are the same
2233 : then the one which is different is arg0[first] and arg1[first+1]
2234 : return first since this is based on arg0. */
2235 272 : if (operand_equal_for_phi_arg_p (arg0_op.ops[first + 1],
2236 272 : arg1_op.ops[first]))
2237 : {
2238 33 : *new_arg0 = arg0_op.ops[first];
2239 33 : *new_arg1 = arg1_op.ops[first + 1];
2240 33 : return first;
2241 : }
2242 : /* If the arg0[first] and arg1[first+1] are the same
2243 : then the one which is different is arg0[first+1] and arg1[first]
2244 : return first+1 since this is based on arg0. */
2245 239 : if (operand_equal_for_phi_arg_p (arg0_op.ops[first],
2246 239 : arg1_op.ops[first + 1]))
2247 : {
2248 14 : *new_arg0 = arg0_op.ops[first + 1];
2249 14 : *new_arg1 = arg1_op.ops[first];
2250 14 : return first + 1;
2251 : }
2252 : return -1;
2253 : }
2254 :
2255 : /* Factors out an operation from *ARG0 and *ARG1 and
2256 : create the new statement at GSI. *RES is the
2257 : result of that new statement. Update *ARG0 and *ARG1
2258 : and *RES to the new values if the factoring happened.
2259 : Loops until all of the factoring is completed. */
2260 :
2261 : static void
2262 47014 : factor_out_operators (tree *res, gimple_stmt_iterator *gsi,
2263 : tree *arg0, tree *arg1, gphi *phi)
2264 : {
2265 47014 : gimple_match_op arg0_op, arg1_op;
2266 47014 : bool repeated = false;
2267 :
2268 47835 : again:
2269 47835 : if (TREE_CODE (*arg0) != SSA_NAME || TREE_CODE (*arg1) != SSA_NAME)
2270 : return;
2271 :
2272 32327 : if (operand_equal_p (*arg0, *arg1))
2273 : return;
2274 :
2275 : /* If either args have > 1 use, then this transformation actually
2276 : increases the number of expressions evaluated at runtime. */
2277 32327 : if (repeated
2278 32327 : ? (!has_zero_uses (*arg0) || !has_zero_uses (*arg1))
2279 31543 : : (!has_single_use (*arg0) || !has_single_use (*arg1)))
2280 : return;
2281 :
2282 5698 : gimple *arg0_def_stmt = SSA_NAME_DEF_STMT (*arg0);
2283 5698 : if (!gimple_extract_op (arg0_def_stmt, &arg0_op))
2284 : return;
2285 :
2286 : /* Might pick up abnormals from previous bbs so stop the loop. */
2287 2417 : if (arg0_op.operands_occurs_in_abnormal_phi ())
2288 : return;
2289 :
2290 2415 : gimple *arg1_def_stmt = SSA_NAME_DEF_STMT (*arg1);
2291 2415 : if (!gimple_extract_op (arg1_def_stmt, &arg1_op))
2292 : return;
2293 :
2294 : /* Might pick up abnormals from previous bbs so stop the loop. */
2295 2089 : if (arg1_op.operands_occurs_in_abnormal_phi ())
2296 : return;
2297 :
2298 : /* No factoring can happen if the codes are different
2299 : or the number operands. */
2300 2089 : if (arg1_op.code != arg0_op.code
2301 2089 : || arg1_op.num_ops != arg0_op.num_ops)
2302 : return;
2303 :
2304 1121 : tree new_arg0, new_arg1;
2305 1121 : int opnum = find_different_opnum (arg0_op, arg1_op, &new_arg0, &new_arg1);
2306 1121 : if (opnum == -1)
2307 : return;
2308 :
2309 : /* BIT_FIELD_REF and BIT_INSERT_EXPR can't be factored out for non-0 operands
2310 : as the other operands require constants. */
2311 841 : if ((arg1_op.code == BIT_FIELD_REF
2312 832 : || arg1_op.code == BIT_INSERT_EXPR)
2313 850 : && opnum != 0)
2314 : return;
2315 :
2316 : /* It is not profitability to factor out vec_perm with
2317 : constant masks (operand 2). The target might not support it
2318 : and that might be invalid to do as such. Also with constants
2319 : masks, the number of elements of the mask type does not need
2320 : to match tne number of elements of other operands and can be
2321 : arbitrary integral vector type so factoring that out can't work.
2322 : Note in the case where one mask is a constant and the other is not,
2323 : the next check for compatiable types will reject the case the
2324 : constant mask has the incompatible type. */
2325 833 : if (arg1_op.code == VEC_PERM_EXPR && opnum == 2
2326 8 : && TREE_CODE (new_arg0) == VECTOR_CST
2327 841 : && TREE_CODE (new_arg1) == VECTOR_CST)
2328 : return;
2329 :
2330 829 : if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1)))
2331 : return;
2332 821 : tree new_res = make_ssa_name (TREE_TYPE (new_arg0), NULL);
2333 :
2334 : /* Create the operation stmt if possible and insert it. */
2335 :
2336 821 : gimple_match_op new_op = arg0_op;
2337 821 : new_op.ops[opnum] = new_res;
2338 821 : gimple_seq seq = NULL;
2339 821 : tree result = *res;
2340 821 : result = maybe_push_res_to_seq (&new_op, &seq, result);
2341 :
2342 : /* If we can't create the new statement, release the temp name
2343 : and return back. */
2344 821 : if (!result)
2345 : {
2346 0 : release_ssa_name (new_res);
2347 0 : return;
2348 : }
2349 821 : gsi_insert_seq_before (gsi, seq, GSI_CONTINUE_LINKING);
2350 :
2351 821 : if (dump_file && (dump_flags & TDF_DETAILS))
2352 : {
2353 9 : fprintf (dump_file, "PHI ");
2354 9 : print_generic_expr (dump_file, gimple_phi_result (phi));
2355 9 : fprintf (dump_file,
2356 : " changed to factor operation out from COND_EXPR.\n");
2357 9 : fprintf (dump_file, "New stmt with OPERATION that defines ");
2358 9 : print_generic_expr (dump_file, result);
2359 9 : fprintf (dump_file, ".\n");
2360 : }
2361 :
2362 : /* Remove the old operation(s) that has single use. */
2363 821 : gimple_stmt_iterator gsi_for_def;
2364 :
2365 821 : gsi_for_def = gsi_for_stmt (arg0_def_stmt);
2366 821 : gsi_remove (&gsi_for_def, true);
2367 821 : release_defs (arg0_def_stmt);
2368 821 : gsi_for_def = gsi_for_stmt (arg1_def_stmt);
2369 821 : gsi_remove (&gsi_for_def, true);
2370 821 : release_defs (arg1_def_stmt);
2371 :
2372 : /* Update the arguments and try again. */
2373 821 : *arg0 = new_arg0;
2374 821 : *arg1 = new_arg1;
2375 821 : *res = new_res;
2376 :
2377 : /* Update the phi node too. */
2378 821 : gimple_phi_set_result (phi, new_res);
2379 821 : gimple_phi_arg (phi, 0)->def = new_arg0;
2380 821 : gimple_phi_arg (phi, 1)->def = new_arg1;
2381 821 : update_stmt (phi);
2382 :
2383 821 : repeated = true;
2384 821 : goto again;
2385 : }
2386 :
2387 : /* Create the smallest nested conditional possible. On pre-order we record
2388 : which conditionals are live, and on post-order rewrite the chain by removing
2389 : already active conditions.
2390 :
2391 : As an example we simplify:
2392 :
2393 : _7 = a_10 < 0;
2394 : _21 = a_10 >= 0;
2395 : _22 = a_10 < e_11(D);
2396 : _23 = _21 & _22;
2397 : _ifc__42 = _23 ? t_13 : 0;
2398 : t_6 = _7 ? 1 : _ifc__42
2399 :
2400 : into
2401 :
2402 : _7 = a_10 < 0;
2403 : _22 = a_10 < e_11(D);
2404 : _ifc__42 = _22 ? t_13 : 0;
2405 : t_6 = _7 ? 1 : _ifc__42;
2406 :
2407 : which produces better code. */
2408 :
2409 : static tree
2410 6402 : gen_phi_nest_statement (gphi *phi, gimple_stmt_iterator *gsi,
2411 : scalar_cond_masked_set_type &cond_set, tree type,
2412 : gimple **res_stmt, tree lhs0,
2413 : vec<struct ifcvt_arg_entry> &args, unsigned idx)
2414 : {
2415 12804 : if (idx == args.length ())
2416 2137 : return args[idx - 1].arg;
2417 :
2418 4265 : bool invert;
2419 4265 : tree cond = gen_phi_arg_condition (phi, args[idx - 1], gsi, cond_set,
2420 : &invert);
2421 4265 : tree arg1 = gen_phi_nest_statement (phi, gsi, cond_set, type, res_stmt, lhs0,
2422 : args, idx + 1);
2423 :
2424 4265 : unsigned prev = idx;
2425 4265 : unsigned curr = prev - 1;
2426 4265 : tree arg0 = args[curr].arg;
2427 4265 : tree rhs, lhs;
2428 4265 : if (idx > 1)
2429 2128 : lhs = make_temp_ssa_name (type, NULL, "_ifc_");
2430 : else
2431 : lhs = lhs0;
2432 :
2433 4265 : if (invert)
2434 84 : rhs = fold_build_cond_expr (type, unshare_expr (cond),
2435 : arg1, arg0);
2436 : else
2437 4181 : rhs = fold_build_cond_expr (type, unshare_expr (cond),
2438 : arg0, arg1);
2439 4265 : gassign *new_stmt = gimple_build_assign (lhs, rhs);
2440 4265 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2441 4265 : update_stmt (new_stmt);
2442 4265 : *res_stmt = new_stmt;
2443 4265 : return lhs;
2444 : }
2445 :
2446 : /* When flattening a PHI node we have a choice of which conditions to test to
2447 : for all the paths from the start of the dominator block of the BB with the
2448 : PHI node. If the PHI node has X arguments we have to only test X - 1
2449 : conditions as the last one is implicit. It does matter which conditions we
2450 : test first. We should test the shortest condition first (distance here is
2451 : measures in the number of logical operators in the condition) and the
2452 : longest one last. This allows us to skip testing the most expensive
2453 : condition. To accomplish this we need to sort the conditions. P1 and P2
2454 : are sorted first based on the number of logical operations (num_compares)
2455 : and then by how often they occur in the PHI node. */
2456 :
2457 : static int
2458 36509 : cmp_arg_entry (const void *p1, const void *p2, void * /* data. */)
2459 : {
2460 36509 : const ifcvt_arg_entry sval1 = *(const ifcvt_arg_entry *)p1;
2461 36509 : const ifcvt_arg_entry sval2 = *(const ifcvt_arg_entry *)p2;
2462 :
2463 36509 : if (sval1.num_compares < sval2.num_compares)
2464 : return -1;
2465 12428 : else if (sval1.num_compares > sval2.num_compares)
2466 : return 1;
2467 :
2468 665 : if (sval1.occurs < sval2.occurs)
2469 : return -1;
2470 665 : else if (sval1.occurs > sval2.occurs)
2471 0 : return 1;
2472 :
2473 : return 0;
2474 : }
2475 :
2476 : /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
2477 : This routine can handle PHI nodes with more than two arguments.
2478 :
2479 : For example,
2480 : S1: A = PHI <x1(1), x2(5)>
2481 : is converted into,
2482 : S2: A = cond ? x1 : x2;
2483 :
2484 : The generated code is inserted at GSI that points to the top of
2485 : basic block's statement list.
2486 : If PHI node has more than two arguments a chain of conditional
2487 : expression is produced.
2488 : LOOP_VERSIONED should be true if we know that the loop was versioned for
2489 : vectorization. */
2490 :
2491 :
2492 : static void
2493 52438 : predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned)
2494 : {
2495 52438 : gimple *new_stmt = NULL, *reduc, *nop_reduc;
2496 52438 : tree rhs, res, arg0, arg1, op0, op1, scev;
2497 52438 : tree cond;
2498 52438 : unsigned int index0;
2499 52438 : edge e;
2500 52438 : basic_block bb;
2501 52438 : unsigned int i;
2502 52438 : bool has_nop;
2503 :
2504 52438 : res = gimple_phi_result (phi);
2505 104876 : if (virtual_operand_p (res))
2506 47071 : return;
2507 :
2508 52438 : if ((rhs = degenerate_phi_result (phi))
2509 52438 : || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
2510 : res))
2511 52398 : && !chrec_contains_undetermined (scev)
2512 52398 : && scev != res
2513 17 : && (rhs = gimple_phi_arg_def (phi, 0))))
2514 : {
2515 57 : if (dump_file && (dump_flags & TDF_DETAILS))
2516 : {
2517 0 : fprintf (dump_file, "Degenerate phi!\n");
2518 0 : print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
2519 : }
2520 57 : new_stmt = gimple_build_assign (res, rhs);
2521 57 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2522 57 : update_stmt (new_stmt);
2523 57 : return;
2524 : }
2525 :
2526 52381 : bb = gimple_bb (phi);
2527 : /* Keep track of conditionals already seen. */
2528 52381 : scalar_cond_masked_set_type cond_set;
2529 52381 : if (EDGE_COUNT (bb->preds) == 2)
2530 : {
2531 : /* Predicate ordinary PHI node with 2 arguments. */
2532 47014 : edge first_edge, second_edge;
2533 47014 : basic_block true_bb;
2534 47014 : first_edge = EDGE_PRED (bb, 0);
2535 47014 : second_edge = EDGE_PRED (bb, 1);
2536 47014 : cond = bb_predicate (first_edge->src);
2537 47014 : cond_set.add ({ cond, 1 });
2538 47014 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2539 22384 : std::swap (first_edge, second_edge);
2540 47014 : if (EDGE_COUNT (first_edge->src->succs) > 1)
2541 : {
2542 22687 : cond = bb_predicate (second_edge->src);
2543 22687 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2544 9841 : cond = TREE_OPERAND (cond, 0);
2545 : else
2546 : first_edge = second_edge;
2547 : }
2548 : else
2549 24327 : cond = bb_predicate (first_edge->src);
2550 :
2551 : /* Gimplify the condition to a valid cond-expr conditonal operand. */
2552 47014 : cond = gen_simplified_condition (cond, cond_set);
2553 47014 : cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2554 : NULL_TREE, true, GSI_SAME_STMT);
2555 47014 : true_bb = first_edge->src;
2556 47014 : if (EDGE_PRED (bb, 1)->src == true_bb)
2557 : {
2558 35230 : arg0 = gimple_phi_arg_def (phi, 1);
2559 35230 : arg1 = gimple_phi_arg_def (phi, 0);
2560 : }
2561 : else
2562 : {
2563 11784 : arg0 = gimple_phi_arg_def (phi, 0);
2564 11784 : arg1 = gimple_phi_arg_def (phi, 1);
2565 : }
2566 :
2567 : /* Factor out operand if possible. This can only be done easily
2568 : for PHI with 2 elements. */
2569 47014 : factor_out_operators (&res, gsi, &arg0, &arg1, phi);
2570 :
2571 47014 : if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
2572 : &op0, &op1, false, &has_nop,
2573 : &nop_reduc))
2574 : {
2575 : /* Convert reduction stmt into vectorizable form. */
2576 8154 : rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2577 4077 : true_bb != gimple_bb (reduc),
2578 : has_nop, nop_reduc,
2579 : loop_versioned);
2580 4077 : redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2581 : }
2582 : else
2583 : /* Build new RHS using selected condition and arguments. */
2584 42937 : rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2585 : arg0, arg1);
2586 47014 : new_stmt = gimple_build_assign (res, rhs);
2587 47014 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2588 47014 : gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
2589 47014 : if (fold_stmt (&new_gsi, follow_all_ssa_edges))
2590 : {
2591 1490 : new_stmt = gsi_stmt (new_gsi);
2592 1490 : update_stmt (new_stmt);
2593 : }
2594 :
2595 47014 : if (dump_file && (dump_flags & TDF_DETAILS))
2596 : {
2597 19 : fprintf (dump_file, "new phi replacement stmt\n");
2598 19 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2599 : }
2600 47014 : return;
2601 : }
2602 :
2603 : /* Create hashmap for PHI node which contain vector of argument indexes
2604 : having the same value. */
2605 5367 : bool swap = false;
2606 5367 : hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
2607 5367 : unsigned int num_args = gimple_phi_num_args (phi);
2608 : /* Vector of different PHI argument values. */
2609 5367 : auto_vec<ifcvt_arg_entry_t> args;
2610 :
2611 : /* Compute phi_arg_map, determine the list of unique PHI args and the indices
2612 : where they are in the PHI node. The indices will be used to determine
2613 : the conditions to apply and their complexity. */
2614 21749 : for (i = 0; i < num_args; i++)
2615 : {
2616 16382 : tree arg;
2617 :
2618 16382 : arg = gimple_phi_arg_def (phi, i);
2619 16382 : if (!phi_arg_map.get (arg))
2620 12862 : args.safe_push ({ arg, 0, 0, NULL });
2621 16382 : phi_arg_map.get_or_insert (arg).safe_push (i);
2622 : }
2623 :
2624 : /* Determine element with max number of occurrences and complexity. Looking
2625 : at only number of occurrences as a measure for complexity isn't enough as
2626 : all usages can be unique but the comparisons to reach the PHI node differ
2627 : per branch. */
2628 18229 : for (unsigned i = 0; i < args.length (); i++)
2629 : {
2630 12862 : unsigned int len = 0;
2631 12862 : vec<int> *indices = phi_arg_map.get (args[i].arg);
2632 54968 : for (int index : *indices)
2633 : {
2634 16382 : edge e = gimple_phi_arg_edge (phi, index);
2635 16382 : len += get_bb_num_predicate_stmts (e->src);
2636 : }
2637 :
2638 12862 : unsigned occur = indices->length ();
2639 12862 : if (dump_file && (dump_flags & TDF_DETAILS))
2640 7 : fprintf (dump_file, "Ranking %d as len=%d, idx=%d\n", i, len, occur);
2641 12862 : args[i].num_compares = len;
2642 12862 : args[i].occurs = occur;
2643 12862 : args[i].indexes = indices;
2644 : }
2645 :
2646 : /* Sort elements based on rankings ARGS. */
2647 5367 : args.stablesort (cmp_arg_entry, NULL);
2648 :
2649 : /* Handle one special case when number of arguments with different values
2650 : is equal 2 and one argument has the only occurrence. Such PHI can be
2651 : handled as if would have only 2 arguments. */
2652 5367 : if (args.length () == 2
2653 8669 : && args[0].indexes->length () == 1)
2654 : {
2655 3230 : index0 = (*args[0].indexes)[0];
2656 3230 : arg0 = args[0].arg;
2657 3230 : arg1 = args[1].arg;
2658 3230 : e = gimple_phi_arg_edge (phi, index0);
2659 3230 : cond = bb_predicate (e->src);
2660 3230 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2661 : {
2662 39 : swap = true;
2663 39 : cond = TREE_OPERAND (cond, 0);
2664 : }
2665 : /* Gimplify the condition to a valid cond-expr conditonal operand. */
2666 3230 : cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
2667 : NULL_TREE, true, GSI_SAME_STMT);
2668 3230 : if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
2669 : &op0, &op1, true, &has_nop, &nop_reduc)))
2670 4513 : rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
2671 : swap ? arg1 : arg0,
2672 : swap ? arg0 : arg1);
2673 : else
2674 : {
2675 : /* Convert reduction stmt into vectorizable form. */
2676 955 : rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
2677 : swap, has_nop, nop_reduc,
2678 : loop_versioned);
2679 955 : redundant_ssa_names.safe_push (std::make_pair (res, rhs));
2680 : }
2681 3230 : new_stmt = gimple_build_assign (res, rhs);
2682 3230 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2683 3230 : update_stmt (new_stmt);
2684 : }
2685 : else
2686 : {
2687 : /* Common case. */
2688 2137 : tree type = TREE_TYPE (gimple_phi_result (phi));
2689 2137 : gen_phi_nest_statement (phi, gsi, cond_set, type, &new_stmt, res,
2690 : args, 1);
2691 : }
2692 :
2693 5367 : if (dump_file && (dump_flags & TDF_DETAILS))
2694 : {
2695 3 : fprintf (dump_file, "new extended phi replacement stmt\n");
2696 3 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2697 : }
2698 52381 : }
2699 :
2700 : /* Replaces in LOOP all the scalar phi nodes other than those in the
2701 : LOOP->header block with conditional modify expressions.
2702 : LOOP_VERSIONED should be true if we know that the loop was versioned for
2703 : vectorization. */
2704 :
2705 : static void
2706 28951 : predicate_all_scalar_phis (class loop *loop, bool loop_versioned)
2707 : {
2708 28951 : basic_block bb;
2709 28951 : unsigned int orig_loop_num_nodes = loop->num_nodes;
2710 28951 : unsigned int i;
2711 :
2712 148464 : for (i = 1; i < orig_loop_num_nodes; i++)
2713 : {
2714 119513 : gphi *phi;
2715 119513 : gimple_stmt_iterator gsi;
2716 119513 : gphi_iterator phi_gsi;
2717 119513 : bb = ifc_bbs[i];
2718 :
2719 119513 : if (bb == loop->header)
2720 85450 : continue;
2721 :
2722 119513 : phi_gsi = gsi_start_phis (bb);
2723 119513 : if (gsi_end_p (phi_gsi))
2724 85450 : continue;
2725 :
2726 34063 : gsi = gsi_after_labels (bb);
2727 88884 : while (!gsi_end_p (phi_gsi))
2728 : {
2729 54821 : phi = phi_gsi.phi ();
2730 109642 : if (virtual_operand_p (gimple_phi_result (phi)))
2731 2383 : gsi_next (&phi_gsi);
2732 : else
2733 : {
2734 52438 : predicate_scalar_phi (phi, &gsi, loop_versioned);
2735 52438 : remove_phi_node (&phi_gsi, false);
2736 : }
2737 : }
2738 : }
2739 28951 : }
2740 :
2741 : /* Insert in each basic block of LOOP the statements produced by the
2742 : gimplification of the predicates. */
2743 :
2744 : static void
2745 28951 : insert_gimplified_predicates (loop_p loop)
2746 : {
2747 28951 : unsigned int i;
2748 :
2749 177415 : for (i = 0; i < loop->num_nodes; i++)
2750 : {
2751 148464 : basic_block bb = ifc_bbs[i];
2752 148464 : gimple_seq stmts;
2753 148464 : if (!is_predicated (bb))
2754 91005 : gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2755 148464 : if (!is_predicated (bb))
2756 : {
2757 : /* Do not insert statements for a basic block that is not
2758 : predicated. Also make sure that the predicate of the
2759 : basic block is set to true. */
2760 91005 : reset_bb_predicate (bb);
2761 91005 : continue;
2762 : }
2763 :
2764 57459 : stmts = bb_predicate_gimplified_stmts (bb);
2765 57459 : if (stmts)
2766 : {
2767 57079 : if (need_to_predicate)
2768 : {
2769 : /* Insert the predicate of the BB just after the label,
2770 : as the if-conversion of memory writes will use this
2771 : predicate. */
2772 5986 : gimple_stmt_iterator gsi = gsi_after_labels (bb);
2773 5986 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2774 : }
2775 : else
2776 : {
2777 : /* Insert the predicate of the BB at the end of the BB
2778 : as this would reduce the register pressure: the only
2779 : use of this predicate will be in successor BBs. */
2780 51093 : gimple_stmt_iterator gsi = gsi_last_bb (bb);
2781 :
2782 51093 : if (gsi_end_p (gsi)
2783 51093 : || stmt_ends_bb_p (gsi_stmt (gsi)))
2784 29449 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2785 : else
2786 21644 : gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2787 : }
2788 :
2789 : /* Once the sequence is code generated, set it to NULL. */
2790 57079 : set_bb_predicate_gimplified_stmts (bb, NULL, true);
2791 : }
2792 : }
2793 28951 : }
2794 :
2795 : /* Helper function for predicate_statements. Returns index of existent
2796 : mask if it was created for given SIZE and -1 otherwise. */
2797 :
2798 : static int
2799 937 : mask_exists (int size, const vec<int> &vec)
2800 : {
2801 937 : unsigned int ix;
2802 937 : int v;
2803 1026 : FOR_EACH_VEC_ELT (vec, ix, v)
2804 965 : if (v == size)
2805 876 : return (int) ix;
2806 : return -1;
2807 : }
2808 :
2809 : /* Helper function for predicate_statements. STMT is a memory read or
2810 : write and it needs to be predicated by MASK. Return a statement
2811 : that does so. */
2812 :
2813 : static gimple *
2814 2016 : predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2815 : {
2816 2016 : gcall *new_stmt;
2817 :
2818 2016 : tree lhs = gimple_assign_lhs (stmt);
2819 2016 : tree rhs = gimple_assign_rhs1 (stmt);
2820 2016 : tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2821 2016 : mark_addressable (ref);
2822 2016 : tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2823 : true, NULL_TREE, true, GSI_SAME_STMT);
2824 2016 : tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2825 2016 : get_object_alignment (ref));
2826 : /* Copy points-to info if possible. */
2827 2016 : if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2828 519 : copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2829 : ref);
2830 2016 : if (TREE_CODE (lhs) == SSA_NAME)
2831 : {
2832 : /* Get a zero else value. This might not be what a target actually uses
2833 : but we cannot be sure about which vector mode the vectorizer will
2834 : choose. Therefore, leave the decision whether we need to force the
2835 : inactive elements to zero to the vectorizer. */
2836 1207 : tree els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
2837 1207 : TREE_TYPE (lhs));
2838 :
2839 1207 : new_stmt
2840 1207 : = gimple_build_call_internal (IFN_MASK_LOAD, 4, addr,
2841 : ptr, mask, els);
2842 :
2843 1207 : gimple_call_set_lhs (new_stmt, lhs);
2844 2414 : gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2845 : }
2846 : else
2847 : {
2848 809 : new_stmt
2849 809 : = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2850 : mask, rhs);
2851 809 : gimple_move_vops (new_stmt, stmt);
2852 : }
2853 2016 : gimple_call_set_nothrow (new_stmt, true);
2854 2016 : return new_stmt;
2855 : }
2856 :
2857 : /* STMT uses OP_LHS. Check whether it is equivalent to:
2858 :
2859 : ... = OP_MASK ? OP_LHS : X;
2860 :
2861 : Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2862 : known to have value OP_COND. */
2863 :
2864 : static tree
2865 820 : check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2866 : tree op_lhs)
2867 : {
2868 1505 : gassign *assign = dyn_cast <gassign *> (stmt);
2869 1028 : if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2870 : return NULL_TREE;
2871 :
2872 203 : tree use_cond = gimple_assign_rhs1 (assign);
2873 203 : tree if_true = gimple_assign_rhs2 (assign);
2874 203 : tree if_false = gimple_assign_rhs3 (assign);
2875 :
2876 72 : if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2877 203 : && if_true == op_lhs)
2878 : return if_false;
2879 :
2880 72 : if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2881 : return if_true;
2882 :
2883 : return NULL_TREE;
2884 : }
2885 :
2886 : /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2887 : the set of SSA names defined earlier in STMT's block. */
2888 :
2889 : static bool
2890 131 : value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2891 : tree value)
2892 : {
2893 131 : if (is_gimple_min_invariant (value))
2894 : return true;
2895 :
2896 95 : if (TREE_CODE (value) == SSA_NAME)
2897 : {
2898 95 : if (SSA_NAME_IS_DEFAULT_DEF (value))
2899 : return true;
2900 :
2901 95 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2902 95 : basic_block use_bb = gimple_bb (stmt);
2903 95 : return (def_bb == use_bb
2904 95 : ? ssa_names->contains (value)
2905 95 : : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2906 : }
2907 :
2908 : return false;
2909 : }
2910 :
2911 : /* Helper function for predicate_statements. STMT is a potentially-trapping
2912 : arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2913 : has value COND. Return a statement that does so. SSA_NAMES is the set of
2914 : SSA names defined earlier in STMT's block. */
2915 :
2916 : static gimple *
2917 615 : predicate_rhs_code (gimple *stmt, tree mask, tree cond,
2918 : hash_set<tree_ssa_name_hash> *ssa_names)
2919 : {
2920 615 : internal_fn cond_fn;
2921 615 : if (is_gimple_assign (stmt))
2922 : {
2923 507 : tree_code code = gimple_assign_rhs_code (stmt);
2924 507 : cond_fn = get_conditional_internal_fn (code);
2925 : }
2926 108 : else if (tree callee = gimple_call_fndecl (stmt))
2927 : {
2928 108 : auto ifn = associated_internal_fn (callee);
2929 108 : cond_fn = get_conditional_internal_fn (ifn);
2930 : }
2931 : else
2932 : return NULL;
2933 :
2934 615 : if (cond_fn == IFN_LAST)
2935 : {
2936 0 : gcc_assert (!gimple_could_trap_p (stmt));
2937 : return NULL;
2938 : }
2939 :
2940 615 : tree lhs = gimple_get_lhs (stmt);
2941 615 : unsigned int nops = gimple_num_args (stmt) + 1;
2942 :
2943 : /* Construct the arguments to the conditional internal function. */
2944 615 : auto_vec<tree, 8> args;
2945 615 : args.safe_grow (nops + 1, true);
2946 615 : args[0] = mask;
2947 1953 : for (unsigned int i = 0; i < nops - 1; ++i)
2948 1338 : args[i+1] = gimple_arg (stmt, i);
2949 615 : args[nops] = NULL_TREE;
2950 :
2951 : /* Look for uses of the result to see whether they are COND_EXPRs that can
2952 : be folded into the conditional call. */
2953 615 : imm_use_iterator imm_iter;
2954 615 : gimple *use_stmt;
2955 2050 : FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2956 : {
2957 820 : tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2958 820 : if (new_else && value_available_p (stmt, ssa_names, new_else))
2959 : {
2960 110 : if (!args[nops])
2961 110 : args[nops] = new_else;
2962 110 : if (operand_equal_p (new_else, args[nops], 0))
2963 : {
2964 : /* We have:
2965 :
2966 : LHS = IFN_COND (MASK, ..., ELSE);
2967 : X = MASK ? LHS : ELSE;
2968 :
2969 : which makes X equivalent to LHS. */
2970 110 : tree use_lhs = gimple_assign_lhs (use_stmt);
2971 110 : redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2972 : }
2973 : }
2974 615 : }
2975 615 : if (!args[nops])
2976 1010 : args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2977 505 : nops - 1, &args[1]);
2978 :
2979 : /* Create and insert the call. */
2980 615 : gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2981 615 : gimple_call_set_lhs (new_stmt, lhs);
2982 615 : gimple_call_set_nothrow (new_stmt, true);
2983 :
2984 615 : return new_stmt;
2985 615 : }
2986 :
2987 : /* Predicate each write to memory in LOOP.
2988 :
2989 : This function transforms control flow constructs containing memory
2990 : writes of the form:
2991 :
2992 : | for (i = 0; i < N; i++)
2993 : | if (cond)
2994 : | A[i] = expr;
2995 :
2996 : into the following form that does not contain control flow:
2997 :
2998 : | for (i = 0; i < N; i++)
2999 : | A[i] = cond ? expr : A[i];
3000 :
3001 : The original CFG looks like this:
3002 :
3003 : | bb_0
3004 : | i = 0
3005 : | end_bb_0
3006 : |
3007 : | bb_1
3008 : | if (i < N) goto bb_5 else goto bb_2
3009 : | end_bb_1
3010 : |
3011 : | bb_2
3012 : | cond = some_computation;
3013 : | if (cond) goto bb_3 else goto bb_4
3014 : | end_bb_2
3015 : |
3016 : | bb_3
3017 : | A[i] = expr;
3018 : | goto bb_4
3019 : | end_bb_3
3020 : |
3021 : | bb_4
3022 : | goto bb_1
3023 : | end_bb_4
3024 :
3025 : insert_gimplified_predicates inserts the computation of the COND
3026 : expression at the beginning of the destination basic block:
3027 :
3028 : | bb_0
3029 : | i = 0
3030 : | end_bb_0
3031 : |
3032 : | bb_1
3033 : | if (i < N) goto bb_5 else goto bb_2
3034 : | end_bb_1
3035 : |
3036 : | bb_2
3037 : | cond = some_computation;
3038 : | if (cond) goto bb_3 else goto bb_4
3039 : | end_bb_2
3040 : |
3041 : | bb_3
3042 : | cond = some_computation;
3043 : | A[i] = expr;
3044 : | goto bb_4
3045 : | end_bb_3
3046 : |
3047 : | bb_4
3048 : | goto bb_1
3049 : | end_bb_4
3050 :
3051 : predicate_statements is then predicating the memory write as follows:
3052 :
3053 : | bb_0
3054 : | i = 0
3055 : | end_bb_0
3056 : |
3057 : | bb_1
3058 : | if (i < N) goto bb_5 else goto bb_2
3059 : | end_bb_1
3060 : |
3061 : | bb_2
3062 : | if (cond) goto bb_3 else goto bb_4
3063 : | end_bb_2
3064 : |
3065 : | bb_3
3066 : | cond = some_computation;
3067 : | A[i] = cond ? expr : A[i];
3068 : | goto bb_4
3069 : | end_bb_3
3070 : |
3071 : | bb_4
3072 : | goto bb_1
3073 : | end_bb_4
3074 :
3075 : and finally combine_blocks removes the basic block boundaries making
3076 : the loop vectorizable:
3077 :
3078 : | bb_0
3079 : | i = 0
3080 : | if (i < N) goto bb_5 else goto bb_1
3081 : | end_bb_0
3082 : |
3083 : | bb_1
3084 : | cond = some_computation;
3085 : | A[i] = cond ? expr : A[i];
3086 : | if (i < N) goto bb_5 else goto bb_4
3087 : | end_bb_1
3088 : |
3089 : | bb_4
3090 : | goto bb_1
3091 : | end_bb_4
3092 : */
3093 :
3094 : static void
3095 11927 : predicate_statements (loop_p loop)
3096 : {
3097 11927 : unsigned int i, orig_loop_num_nodes = loop->num_nodes;
3098 11927 : auto_vec<int, 1> vect_sizes;
3099 11927 : auto_vec<tree, 1> vect_masks;
3100 11927 : hash_set<tree_ssa_name_hash> ssa_names;
3101 :
3102 62861 : for (i = 1; i < orig_loop_num_nodes; i++)
3103 : {
3104 50934 : gimple_stmt_iterator gsi;
3105 50934 : basic_block bb = ifc_bbs[i];
3106 50934 : tree cond = bb_predicate (bb);
3107 50934 : bool swap;
3108 50934 : int index;
3109 :
3110 50934 : if (is_true_predicate (cond))
3111 25183 : continue;
3112 :
3113 25751 : swap = false;
3114 25751 : if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
3115 : {
3116 9678 : swap = true;
3117 9678 : cond = TREE_OPERAND (cond, 0);
3118 : }
3119 :
3120 25751 : vect_sizes.truncate (0);
3121 25751 : vect_masks.truncate (0);
3122 :
3123 198331 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3124 : {
3125 146829 : gimple *stmt = gsi_stmt (gsi);
3126 146829 : if (!is_gimple_assign (stmt)
3127 146829 : && !gimple_call_builtin_p (stmt))
3128 : ;
3129 88433 : else if (is_false_predicate (cond)
3130 88433 : && gimple_vdef (stmt))
3131 : {
3132 0 : unlink_stmt_vdef (stmt);
3133 0 : gsi_remove (&gsi, true);
3134 0 : release_defs (stmt);
3135 2 : continue;
3136 : }
3137 : /* For now, just drop prefetches. Do it now to remove any possible
3138 : aliasing check failures from the address calculations of the
3139 : prefetch. Vect would be too late in that regard. */
3140 88433 : else if (gimple_call_builtin_p (stmt, BUILT_IN_PREFETCH))
3141 : {
3142 2 : unlink_stmt_vdef (stmt);
3143 2 : gsi_remove (&gsi, true);
3144 2 : release_defs (stmt);
3145 2 : continue;
3146 : }
3147 88431 : else if (gimple_plf (stmt, GF_PLF_2)
3148 88431 : && (is_gimple_assign (stmt)
3149 108 : || (gimple_call_builtin_p (stmt)
3150 108 : && !if_convertible_simdclone_stmt_p (stmt))))
3151 : {
3152 2631 : tree lhs = gimple_get_lhs (stmt);
3153 : /* ?? Assume that calls without an LHS are not data processing
3154 : and so no issues with traps. */
3155 2631 : if (!lhs)
3156 0 : continue;
3157 2631 : tree mask;
3158 2631 : gimple *new_stmt;
3159 2631 : gimple_seq stmts = NULL;
3160 2631 : machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
3161 : /* We checked before setting GF_PLF_2 that an equivalent
3162 : integer mode exists. */
3163 2631 : int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
3164 2631 : if (!vect_sizes.is_empty ()
3165 937 : && (index = mask_exists (bitsize, vect_sizes)) != -1)
3166 : /* Use created mask. */
3167 876 : mask = vect_masks[index];
3168 : else
3169 : {
3170 1755 : if (COMPARISON_CLASS_P (cond))
3171 0 : mask = gimple_build (&stmts, TREE_CODE (cond),
3172 : boolean_type_node,
3173 0 : TREE_OPERAND (cond, 0),
3174 0 : TREE_OPERAND (cond, 1));
3175 : else
3176 1755 : mask = cond;
3177 :
3178 1755 : if (swap)
3179 : {
3180 455 : tree true_val
3181 455 : = constant_boolean_node (true, TREE_TYPE (mask));
3182 455 : mask = gimple_build (&stmts, BIT_XOR_EXPR,
3183 455 : TREE_TYPE (mask), mask, true_val);
3184 : }
3185 1755 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
3186 :
3187 : /* Save mask and its size for further use. */
3188 1755 : vect_sizes.safe_push (bitsize);
3189 1755 : vect_masks.safe_push (mask);
3190 : }
3191 2631 : if (gimple_assign_single_p (stmt))
3192 2016 : new_stmt = predicate_load_or_store (&gsi,
3193 : as_a <gassign *> (stmt),
3194 : mask);
3195 : else
3196 615 : new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
3197 :
3198 2631 : if (new_stmt)
3199 2631 : gsi_replace (&gsi, new_stmt, true);
3200 : }
3201 85800 : else if (gimple_needing_rewrite_undefined (stmt))
3202 17852 : rewrite_to_defined_unconditional (&gsi);
3203 135896 : else if (gimple_vdef (stmt))
3204 : {
3205 1612 : tree lhs = gimple_assign_lhs (stmt);
3206 1612 : tree rhs = gimple_assign_rhs1 (stmt);
3207 1612 : tree type = TREE_TYPE (lhs);
3208 :
3209 1612 : lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
3210 1612 : rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
3211 1612 : if (swap)
3212 575 : std::swap (lhs, rhs);
3213 1612 : cond = force_gimple_operand_gsi (&gsi, unshare_expr (cond), true,
3214 : NULL_TREE, true, GSI_SAME_STMT);
3215 1612 : rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
3216 1612 : gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
3217 1612 : update_stmt (stmt);
3218 : }
3219 :
3220 146827 : if (gimple_plf (gsi_stmt (gsi), GF_PLF_2)
3221 146827 : && is_gimple_call (gsi_stmt (gsi)))
3222 : {
3223 : /* Convert functions that have a SIMD clone to IFN_MASK_CALL.
3224 : This will cause the vectorizer to match the "in branch"
3225 : clone variants, and serves to build the mask vector
3226 : in a natural way. */
3227 999 : tree mask = cond;
3228 999 : gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
3229 999 : tree orig_fn = gimple_call_fn (call);
3230 999 : int orig_nargs = gimple_call_num_args (call);
3231 999 : auto_vec<tree> args;
3232 999 : args.safe_push (orig_fn);
3233 2013 : for (int i = 0; i < orig_nargs; i++)
3234 1014 : args.safe_push (gimple_call_arg (call, i));
3235 : /* If `swap', we invert the mask used for the if branch for use
3236 : when masking the function call. */
3237 999 : if (swap)
3238 : {
3239 948 : gimple_seq stmts = NULL;
3240 948 : tree true_val
3241 948 : = constant_boolean_node (true, TREE_TYPE (mask));
3242 948 : mask = gimple_build (&stmts, BIT_XOR_EXPR,
3243 948 : TREE_TYPE (mask), mask, true_val);
3244 948 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
3245 : }
3246 999 : args.safe_push (mask);
3247 :
3248 : /* Replace the call with a IFN_MASK_CALL that has the extra
3249 : condition parameter. */
3250 999 : gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
3251 : args);
3252 999 : gimple_call_set_lhs (new_call, gimple_call_lhs (call));
3253 999 : gsi_replace (&gsi, new_call, true);
3254 999 : }
3255 :
3256 146827 : tree lhs = gimple_get_lhs (gsi_stmt (gsi));
3257 146827 : if (lhs && TREE_CODE (lhs) == SSA_NAME)
3258 86111 : ssa_names.add (lhs);
3259 146827 : gsi_next (&gsi);
3260 : }
3261 51475 : ssa_names.empty ();
3262 : }
3263 11927 : }
3264 :
3265 : /* Remove all GIMPLE_CONDs and GIMPLE_LABELs and GIMPLE_SWITCH of all
3266 : the basic blocks other than the exit and latch of the LOOP. Also
3267 : resets the GIMPLE_DEBUG information. */
3268 :
3269 : static void
3270 28951 : remove_conditions_and_labels (loop_p loop)
3271 : {
3272 28951 : gimple_stmt_iterator gsi;
3273 28951 : unsigned int i;
3274 :
3275 177415 : for (i = 0; i < loop->num_nodes; i++)
3276 : {
3277 148464 : basic_block bb = ifc_bbs[i];
3278 :
3279 148464 : if (bb_with_exit_edge_p (loop, bb)
3280 148464 : || bb == loop->latch)
3281 57902 : continue;
3282 :
3283 765513 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
3284 584389 : switch (gimple_code (gsi_stmt (gsi)))
3285 : {
3286 37099 : case GIMPLE_COND:
3287 37099 : case GIMPLE_LABEL:
3288 37099 : case GIMPLE_SWITCH:
3289 37099 : gsi_remove (&gsi, true);
3290 37099 : break;
3291 :
3292 354522 : case GIMPLE_DEBUG:
3293 : /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
3294 354522 : if (gimple_debug_bind_p (gsi_stmt (gsi)))
3295 : {
3296 273710 : gimple_debug_bind_reset_value (gsi_stmt (gsi));
3297 273710 : update_stmt (gsi_stmt (gsi));
3298 : }
3299 354522 : gsi_next (&gsi);
3300 354522 : break;
3301 :
3302 192768 : default:
3303 192768 : gsi_next (&gsi);
3304 : }
3305 : }
3306 28951 : }
3307 :
3308 : /* Combine all the basic blocks from LOOP into one or two super basic
3309 : blocks. Replace PHI nodes with conditional modify expressions.
3310 : LOOP_VERSIONED should be true if we know that the loop was versioned for
3311 : vectorization. */
3312 :
3313 : static void
3314 28951 : combine_blocks (class loop *loop, bool loop_versioned)
3315 : {
3316 28951 : basic_block bb, exit_bb, merge_target_bb;
3317 28951 : unsigned int orig_loop_num_nodes = loop->num_nodes;
3318 28951 : unsigned int i;
3319 28951 : edge e;
3320 28951 : edge_iterator ei;
3321 :
3322 : /* Reset flow-sensitive info before predicating stmts or PHIs we
3323 : might fold. */
3324 177415 : for (i = 0; i < orig_loop_num_nodes; i++)
3325 : {
3326 148464 : bb = ifc_bbs[i];
3327 148464 : if (is_predicated (bb))
3328 : {
3329 57459 : for (auto gsi = gsi_start_phis (bb);
3330 58964 : !gsi_end_p (gsi); gsi_next (&gsi))
3331 1505 : reset_flow_sensitive_info (gimple_phi_result (*gsi));
3332 234569 : for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3333 : {
3334 119651 : gimple *stmt = gsi_stmt (gsi);
3335 119651 : ssa_op_iter i;
3336 119651 : tree op;
3337 170089 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
3338 50438 : reset_flow_sensitive_info (op);
3339 : }
3340 : }
3341 : }
3342 :
3343 28951 : remove_conditions_and_labels (loop);
3344 28951 : insert_gimplified_predicates (loop);
3345 28951 : predicate_all_scalar_phis (loop, loop_versioned);
3346 :
3347 28951 : if (need_to_predicate || need_to_rewrite_undefined)
3348 11927 : predicate_statements (loop);
3349 :
3350 : /* Merge basic blocks. */
3351 28951 : exit_bb = single_exit (loop)->src;
3352 28951 : gcc_assert (exit_bb != loop->latch);
3353 177415 : for (i = 0; i < orig_loop_num_nodes; i++)
3354 : {
3355 148464 : bb = ifc_bbs[i];
3356 148464 : free_bb_predicate (bb);
3357 : }
3358 :
3359 28951 : merge_target_bb = loop->header;
3360 :
3361 : /* Get at the virtual def valid for uses starting at the first block
3362 : we merge into the header. Without a virtual PHI the loop has the
3363 : same virtual use on all stmts. */
3364 28951 : gphi *vphi = get_virtual_phi (loop->header);
3365 28951 : tree last_vdef = NULL_TREE;
3366 28951 : if (vphi)
3367 : {
3368 17262 : last_vdef = gimple_phi_result (vphi);
3369 34524 : for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
3370 279140 : ! gsi_end_p (gsi); gsi_next (&gsi))
3371 358557 : if (gimple_vdef (gsi_stmt (gsi)))
3372 5098 : last_vdef = gimple_vdef (gsi_stmt (gsi));
3373 : }
3374 148464 : for (i = 1; i < orig_loop_num_nodes; i++)
3375 : {
3376 119513 : gimple_stmt_iterator gsi;
3377 119513 : gimple_stmt_iterator last;
3378 :
3379 119513 : bb = ifc_bbs[i];
3380 :
3381 119513 : if (bb == exit_bb || bb == loop->latch)
3382 57902 : continue;
3383 :
3384 : /* We release virtual PHIs late because we have to propagate them
3385 : out using the current VUSE. The def might be the one used
3386 : after the loop. */
3387 61611 : vphi = get_virtual_phi (bb);
3388 61611 : if (vphi)
3389 : {
3390 : /* When there's just loads inside the loop a stray virtual
3391 : PHI merging the uses can appear, update last_vdef from
3392 : it. */
3393 596 : if (!last_vdef)
3394 0 : last_vdef = gimple_phi_arg_def (vphi, 0);
3395 596 : imm_use_iterator iter;
3396 596 : use_operand_p use_p;
3397 596 : gimple *use_stmt;
3398 2649 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
3399 : {
3400 4373 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3401 1458 : SET_USE (use_p, last_vdef);
3402 596 : }
3403 596 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
3404 0 : SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
3405 596 : gsi = gsi_for_stmt (vphi);
3406 596 : remove_phi_node (&gsi, true);
3407 : }
3408 :
3409 : /* Make stmts member of loop->header and clear range info from all stmts
3410 : in BB which is now no longer executed conditional on a predicate we
3411 : could have derived it from. */
3412 389282 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3413 : {
3414 266060 : gimple *stmt = gsi_stmt (gsi);
3415 266060 : gimple_set_bb (stmt, merge_target_bb);
3416 : /* Update virtual operands. */
3417 266060 : if (last_vdef)
3418 : {
3419 206597 : use_operand_p use_p = ssa_vuse_operand (stmt);
3420 14852 : if (use_p
3421 14852 : && USE_FROM_PTR (use_p) != last_vdef)
3422 763 : SET_USE (use_p, last_vdef);
3423 619387 : if (gimple_vdef (stmt))
3424 266060 : last_vdef = gimple_vdef (stmt);
3425 : }
3426 : else
3427 : /* If this is the first load we arrive at update last_vdef
3428 : so we handle stray PHIs correctly. */
3429 306344 : last_vdef = gimple_vuse (stmt);
3430 : }
3431 :
3432 : /* Update stmt list. */
3433 61611 : last = gsi_last_bb (merge_target_bb);
3434 123222 : gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
3435 61611 : set_bb_seq (bb, NULL);
3436 : }
3437 :
3438 : /* Fixup virtual operands in the exit block. */
3439 28951 : if (exit_bb
3440 28951 : && exit_bb != loop->header)
3441 : {
3442 : /* We release virtual PHIs late because we have to propagate them
3443 : out using the current VUSE. The def might be the one used
3444 : after the loop. */
3445 28951 : vphi = get_virtual_phi (exit_bb);
3446 28951 : if (vphi)
3447 : {
3448 : /* When there's just loads inside the loop a stray virtual
3449 : PHI merging the uses can appear, update last_vdef from
3450 : it. */
3451 1787 : if (!last_vdef)
3452 0 : last_vdef = gimple_phi_arg_def (vphi, 0);
3453 1787 : imm_use_iterator iter;
3454 1787 : use_operand_p use_p;
3455 1787 : gimple *use_stmt;
3456 7169 : FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
3457 : {
3458 10785 : FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3459 3595 : SET_USE (use_p, last_vdef);
3460 1787 : }
3461 1787 : if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
3462 0 : SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
3463 1787 : gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
3464 1787 : remove_phi_node (&gsi, true);
3465 : }
3466 : }
3467 :
3468 : /* Now remove all the edges in the loop, except for those from the exit
3469 : block and delete the blocks we elided. */
3470 148464 : for (i = 1; i < orig_loop_num_nodes; i++)
3471 : {
3472 119513 : bb = ifc_bbs[i];
3473 :
3474 275750 : for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
3475 : {
3476 156237 : if (e->src == exit_bb)
3477 28951 : ei_next (&ei);
3478 : else
3479 127286 : remove_edge (e);
3480 : }
3481 : }
3482 148464 : for (i = 1; i < orig_loop_num_nodes; i++)
3483 : {
3484 119513 : bb = ifc_bbs[i];
3485 :
3486 119513 : if (bb == exit_bb || bb == loop->latch)
3487 57902 : continue;
3488 :
3489 61611 : delete_basic_block (bb);
3490 : }
3491 :
3492 : /* Re-connect the exit block. */
3493 28951 : if (exit_bb != NULL)
3494 : {
3495 28951 : if (exit_bb != loop->header)
3496 : {
3497 : /* Connect this node to loop header. */
3498 28951 : make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
3499 28951 : set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
3500 : }
3501 :
3502 : /* Redirect non-exit edges to loop->latch. */
3503 86853 : FOR_EACH_EDGE (e, ei, exit_bb->succs)
3504 : {
3505 57902 : if (!loop_exit_edge_p (loop, e))
3506 28951 : redirect_edge_and_branch (e, loop->latch);
3507 : }
3508 28951 : set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
3509 : }
3510 : else
3511 : {
3512 : /* If the loop does not have an exit, reconnect header and latch. */
3513 0 : make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
3514 0 : set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
3515 : }
3516 :
3517 : /* If possible, merge loop header to the block with the exit edge.
3518 : This reduces the number of basic blocks to two, to please the
3519 : vectorizer that handles only loops with two nodes. */
3520 28951 : if (exit_bb
3521 28951 : && exit_bb != loop->header)
3522 : {
3523 28951 : if (can_merge_blocks_p (loop->header, exit_bb))
3524 28949 : merge_blocks (loop->header, exit_bb);
3525 : }
3526 :
3527 28951 : free (ifc_bbs);
3528 28951 : ifc_bbs = NULL;
3529 28951 : }
3530 :
3531 : /* Version LOOP before if-converting it; the original loop
3532 : will be if-converted, the new copy of the loop will not,
3533 : and the LOOP_VECTORIZED internal call will be guarding which
3534 : loop to execute. The vectorizer pass will fold this
3535 : internal call into either true or false.
3536 :
3537 : Note that this function intentionally invalidates profile. Both edges
3538 : out of LOOP_VECTORIZED must have 100% probability so the profile remains
3539 : consistent after the condition is folded in the vectorizer. */
3540 :
3541 : static class loop *
3542 29161 : version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
3543 : {
3544 29161 : basic_block cond_bb;
3545 29161 : tree cond = make_ssa_name (boolean_type_node);
3546 29161 : class loop *new_loop;
3547 29161 : gimple *g;
3548 29161 : gimple_stmt_iterator gsi;
3549 29161 : unsigned int save_length = 0;
3550 :
3551 29161 : g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
3552 29161 : build_int_cst (integer_type_node, loop->num),
3553 : integer_zero_node);
3554 29161 : gimple_call_set_lhs (g, cond);
3555 :
3556 29161 : void **saved_preds = NULL;
3557 29161 : if (any_complicated_phi || need_to_predicate)
3558 : {
3559 : /* Save BB->aux around loop_version as that uses the same field. */
3560 3551 : save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
3561 3551 : saved_preds = XALLOCAVEC (void *, save_length);
3562 26726 : for (unsigned i = 0; i < save_length; i++)
3563 23175 : saved_preds[i] = ifc_bbs[i]->aux;
3564 : }
3565 :
3566 29161 : initialize_original_copy_tables ();
3567 : /* At this point we invalidate profile consistency until IFN_LOOP_VECTORIZED
3568 : is re-merged in the vectorizer. */
3569 29161 : new_loop = loop_version (loop, cond, &cond_bb,
3570 : profile_probability::always (),
3571 : profile_probability::always (),
3572 : profile_probability::always (),
3573 : profile_probability::always (), true);
3574 29161 : free_original_copy_tables ();
3575 :
3576 29161 : if (any_complicated_phi || need_to_predicate)
3577 26726 : for (unsigned i = 0; i < save_length; i++)
3578 23175 : ifc_bbs[i]->aux = saved_preds[i];
3579 :
3580 29161 : if (new_loop == NULL)
3581 : return NULL;
3582 :
3583 29161 : new_loop->dont_vectorize = true;
3584 29161 : new_loop->force_vectorize = false;
3585 29161 : gsi = gsi_last_bb (cond_bb);
3586 29161 : gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
3587 29161 : if (preds)
3588 29161 : preds->safe_push (g);
3589 29161 : gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3590 29161 : update_ssa (TODO_update_ssa_no_phi);
3591 29161 : return new_loop;
3592 : }
3593 :
3594 : /* Return true when LOOP satisfies the follow conditions that will
3595 : allow it to be recognized by the vectorizer for outer-loop
3596 : vectorization:
3597 : - The loop is not the root node of the loop tree.
3598 : - The loop has exactly one inner loop.
3599 : - The loop has a single exit.
3600 : - The loop header has a single successor, which is the inner
3601 : loop header.
3602 : - Each of the inner and outer loop latches have a single
3603 : predecessor.
3604 : - The loop exit block has a single predecessor, which is the
3605 : inner loop's exit block. */
3606 :
3607 : static bool
3608 29161 : versionable_outer_loop_p (class loop *loop)
3609 : {
3610 29161 : if (!loop_outer (loop)
3611 7951 : || loop->dont_vectorize
3612 6980 : || !loop->inner
3613 6980 : || loop->inner->next
3614 2876 : || !single_exit (loop)
3615 2222 : || !single_succ_p (loop->header)
3616 931 : || single_succ (loop->header) != loop->inner->header
3617 931 : || !single_pred_p (loop->latch)
3618 37112 : || !single_pred_p (loop->inner->latch))
3619 28230 : return false;
3620 :
3621 931 : basic_block outer_exit = single_pred (loop->latch);
3622 931 : basic_block inner_exit = single_pred (loop->inner->latch);
3623 :
3624 30085 : if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
3625 : return false;
3626 :
3627 923 : if (dump_file)
3628 0 : fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
3629 :
3630 : return true;
3631 : }
3632 :
3633 : /* Performs splitting of critical edges. Skip splitting and return false
3634 : if LOOP will not be converted because:
3635 :
3636 : - LOOP is not well formed.
3637 : - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
3638 :
3639 : Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
3640 :
3641 : static bool
3642 314107 : ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
3643 : {
3644 314107 : basic_block *body;
3645 314107 : basic_block bb;
3646 314107 : unsigned int num = loop->num_nodes;
3647 314107 : unsigned int i;
3648 314107 : edge e;
3649 314107 : edge_iterator ei;
3650 314107 : auto_vec<edge> critical_edges;
3651 :
3652 : /* Loop is not well formed. */
3653 314107 : if (loop->inner)
3654 : return false;
3655 :
3656 225728 : body = get_loop_body (loop);
3657 1811325 : for (i = 0; i < num; i++)
3658 : {
3659 1365208 : bb = body[i];
3660 1365208 : if (!aggressive_if_conv
3661 1356966 : && phi_nodes (bb)
3662 1791341 : && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
3663 : {
3664 5339 : if (dump_file && (dump_flags & TDF_DETAILS))
3665 0 : fprintf (dump_file,
3666 : "BB %d has complicated PHI with more than %u args.\n",
3667 : bb->index, MAX_PHI_ARG_NUM);
3668 :
3669 5339 : free (body);
3670 5339 : return false;
3671 : }
3672 1359869 : if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
3673 771261 : continue;
3674 :
3675 : /* Skip basic blocks not ending with conditional branch. */
3676 1408553 : if (!safe_is_a <gcond *> (*gsi_last_bb (bb))
3677 588608 : && !safe_is_a <gswitch *> (*gsi_last_bb (bb)))
3678 327887 : continue;
3679 :
3680 785223 : FOR_EACH_EDGE (e, ei, bb->succs)
3681 646154 : if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
3682 121652 : critical_edges.safe_push (e);
3683 : }
3684 220389 : free (body);
3685 :
3686 432977 : while (critical_edges.length () > 0)
3687 : {
3688 118870 : e = critical_edges.pop ();
3689 : /* Don't split if bb can be predicated along non-critical edge. */
3690 118870 : if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
3691 55429 : split_edge (e);
3692 : }
3693 :
3694 : return true;
3695 314107 : }
3696 :
3697 : /* Delete redundant statements produced by predication which prevents
3698 : loop vectorization. */
3699 :
3700 : static void
3701 29210 : ifcvt_local_dce (class loop *loop)
3702 : {
3703 29210 : gimple *stmt;
3704 29210 : gimple *stmt1;
3705 29210 : gimple *phi;
3706 29210 : gimple_stmt_iterator gsi;
3707 29210 : auto_vec<gimple *> worklist;
3708 29210 : enum gimple_code code;
3709 29210 : use_operand_p use_p;
3710 29210 : imm_use_iterator imm_iter;
3711 :
3712 : /* The loop has a single BB only. */
3713 29210 : basic_block bb = loop->header;
3714 29210 : tree latch_vdef = NULL_TREE;
3715 :
3716 29210 : worklist.create (64);
3717 : /* Consider all phi as live statements. */
3718 113863 : for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3719 : {
3720 84653 : phi = gsi_stmt (gsi);
3721 84653 : gimple_set_plf (phi, GF_PLF_2, true);
3722 84653 : worklist.safe_push (phi);
3723 186760 : if (virtual_operand_p (gimple_phi_result (phi)))
3724 17454 : latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
3725 : }
3726 : /* Consider load/store statements, CALL and COND as live. */
3727 938433 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3728 : {
3729 880013 : stmt = gsi_stmt (gsi);
3730 880013 : if (is_gimple_debug (stmt))
3731 : {
3732 460463 : gimple_set_plf (stmt, GF_PLF_2, true);
3733 460463 : continue;
3734 : }
3735 419550 : if (gimple_store_p (stmt) || gimple_assign_load_p (stmt))
3736 : {
3737 77841 : gimple_set_plf (stmt, GF_PLF_2, true);
3738 77841 : worklist.safe_push (stmt);
3739 77841 : continue;
3740 : }
3741 341709 : code = gimple_code (stmt);
3742 341709 : if (code == GIMPLE_COND || code == GIMPLE_CALL || code == GIMPLE_SWITCH)
3743 : {
3744 34733 : gimple_set_plf (stmt, GF_PLF_2, true);
3745 34733 : worklist.safe_push (stmt);
3746 34733 : continue;
3747 : }
3748 306976 : gimple_set_plf (stmt, GF_PLF_2, false);
3749 :
3750 306976 : if (code == GIMPLE_ASSIGN)
3751 : {
3752 306967 : tree lhs = gimple_assign_lhs (stmt);
3753 1024330 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3754 : {
3755 429906 : stmt1 = USE_STMT (use_p);
3756 429906 : if (!is_gimple_debug (stmt1) && gimple_bb (stmt1) != bb)
3757 : {
3758 19510 : gimple_set_plf (stmt, GF_PLF_2, true);
3759 19510 : worklist.safe_push (stmt);
3760 19510 : break;
3761 : }
3762 306967 : }
3763 : }
3764 : }
3765 : /* Propagate liveness through arguments of live stmt. */
3766 517058 : while (worklist.length () > 0)
3767 : {
3768 487848 : ssa_op_iter iter;
3769 487848 : use_operand_p use_p;
3770 487848 : tree use;
3771 :
3772 487848 : stmt = worklist.pop ();
3773 1719660 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3774 : {
3775 743964 : use = USE_FROM_PTR (use_p);
3776 743964 : if (TREE_CODE (use) != SSA_NAME)
3777 44235 : continue;
3778 699729 : stmt1 = SSA_NAME_DEF_STMT (use);
3779 699729 : if (gimple_bb (stmt1) != bb || gimple_plf (stmt1, GF_PLF_2))
3780 428618 : continue;
3781 271111 : gimple_set_plf (stmt1, GF_PLF_2, true);
3782 271111 : worklist.safe_push (stmt1);
3783 : }
3784 : }
3785 : /* Delete dead statements. */
3786 29210 : gsi = gsi_last_bb (bb);
3787 909223 : while (!gsi_end_p (gsi))
3788 : {
3789 880013 : gimple_stmt_iterator gsiprev = gsi;
3790 880013 : gsi_prev (&gsiprev);
3791 880013 : stmt = gsi_stmt (gsi);
3792 880013 : if (!gimple_has_volatile_ops (stmt)
3793 879872 : && gimple_store_p (stmt)
3794 432105 : && gimple_vdef (stmt))
3795 : {
3796 20885 : tree lhs = gimple_get_lhs (stmt);
3797 20885 : ao_ref write;
3798 20885 : ao_ref_init (&write, lhs);
3799 :
3800 20885 : if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
3801 : == DSE_STORE_DEAD)
3802 347 : delete_dead_or_redundant_assignment (&gsi, "dead");
3803 20885 : gsi = gsiprev;
3804 20885 : continue;
3805 20885 : }
3806 :
3807 859128 : if (gimple_plf (stmt, GF_PLF_2))
3808 : {
3809 842773 : gsi = gsiprev;
3810 842773 : continue;
3811 : }
3812 16355 : if (dump_file && (dump_flags & TDF_DETAILS))
3813 : {
3814 16 : fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
3815 16 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3816 : }
3817 16355 : gsi_remove (&gsi, true);
3818 16355 : release_defs (stmt);
3819 16355 : gsi = gsiprev;
3820 : }
3821 29210 : }
3822 :
3823 : /* Return true if VALUE is already available on edge PE. */
3824 :
3825 : static bool
3826 264390 : ifcvt_available_on_edge_p (edge pe, tree value)
3827 : {
3828 264390 : if (is_gimple_min_invariant (value))
3829 : return true;
3830 :
3831 258226 : if (TREE_CODE (value) == SSA_NAME)
3832 : {
3833 256987 : basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
3834 256987 : if (!def_bb || dominated_by_p (CDI_DOMINATORS, pe->dest, def_bb))
3835 26782 : return true;
3836 : }
3837 :
3838 : return false;
3839 : }
3840 :
3841 : /* Return true if STMT can be hoisted from if-converted loop LOOP to
3842 : edge PE. */
3843 :
3844 : static bool
3845 863311 : ifcvt_can_hoist (class loop *loop, edge pe, gimple *stmt)
3846 : {
3847 863311 : if (auto *call = dyn_cast<gcall *> (stmt))
3848 : {
3849 5529 : if (gimple_call_internal_p (call)
3850 5529 : && internal_fn_mask_index (gimple_call_internal_fn (call)) >= 0)
3851 : return false;
3852 : }
3853 1227174 : else if (auto *assign = dyn_cast<gassign *> (stmt))
3854 : {
3855 446856 : if (gimple_assign_rhs_code (assign) == COND_EXPR)
3856 : return false;
3857 : }
3858 : else
3859 : return false;
3860 :
3861 324937 : if (gimple_has_side_effects (stmt)
3862 323717 : || gimple_could_trap_p (stmt)
3863 243521 : || stmt_could_throw_p (cfun, stmt)
3864 243519 : || gimple_vdef (stmt)
3865 567774 : || gimple_vuse (stmt))
3866 83185 : return false;
3867 :
3868 241752 : int num_args = gimple_num_args (stmt);
3869 241752 : if (pe != loop_preheader_edge (loop))
3870 : {
3871 268370 : for (int i = 0; i < num_args; ++i)
3872 264390 : if (!ifcvt_available_on_edge_p (pe, gimple_arg (stmt, i)))
3873 : return false;
3874 : }
3875 : else
3876 : {
3877 8059 : for (int i = 0; i < num_args; ++i)
3878 7789 : if (!expr_invariant_in_loop_p (loop, gimple_arg (stmt, i)))
3879 : return false;
3880 : }
3881 :
3882 : return true;
3883 : }
3884 :
3885 : /* Hoist invariant statements from LOOP to edge PE. */
3886 :
3887 : static void
3888 29210 : ifcvt_hoist_invariants (class loop *loop, edge pe)
3889 : {
3890 : /* Only hoist from the now unconditionally executed part of the loop. */
3891 29210 : basic_block bb = loop->header;
3892 29210 : gimple_stmt_iterator hoist_gsi = {};
3893 921731 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3894 : {
3895 863311 : gimple *stmt = gsi_stmt (gsi);
3896 863311 : if (ifcvt_can_hoist (loop, pe, stmt))
3897 : {
3898 : /* Once we've hoisted one statement, insert other statements
3899 : after it. */
3900 4250 : gsi_remove (&gsi, false);
3901 4250 : if (hoist_gsi.ptr)
3902 1826 : gsi_insert_after (&hoist_gsi, stmt, GSI_NEW_STMT);
3903 : else
3904 : {
3905 2424 : gsi_insert_on_edge_immediate (pe, stmt);
3906 2424 : hoist_gsi = gsi_for_stmt (stmt);
3907 : }
3908 4250 : continue;
3909 : }
3910 859061 : gsi_next (&gsi);
3911 : }
3912 29210 : }
3913 :
3914 : /* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
3915 : type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
3916 : value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
3917 : if not NULL, will hold the tree representing the base struct of this
3918 : bitfield. */
3919 :
3920 : static tree
3921 1229 : get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
3922 : tree *struct_expr)
3923 : {
3924 1229 : tree comp_ref = write ? gimple_assign_lhs (stmt)
3925 386 : : gimple_assign_rhs1 (stmt);
3926 :
3927 1229 : tree field_decl = TREE_OPERAND (comp_ref, 1);
3928 1229 : tree ref_offset = component_ref_field_offset (comp_ref);
3929 1229 : tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
3930 :
3931 : /* Bail out if the representative is not a suitable type for a scalar
3932 : register variable. */
3933 1229 : if (!is_gimple_reg_type (TREE_TYPE (rep_decl)))
3934 : return NULL_TREE;
3935 :
3936 : /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
3937 : precision. */
3938 1214 : unsigned HOST_WIDE_INT bf_prec
3939 1214 : = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
3940 1214 : if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
3941 : return NULL_TREE;
3942 :
3943 1214 : if (TREE_CODE (DECL_FIELD_OFFSET (rep_decl)) != INTEGER_CST
3944 1214 : || TREE_CODE (ref_offset) != INTEGER_CST)
3945 : {
3946 2 : if (dump_file && (dump_flags & TDF_DETAILS))
3947 2 : fprintf (dump_file, "\t Bitfield NOT OK to lower,"
3948 : " offset is non-constant.\n");
3949 2 : return NULL_TREE;
3950 : }
3951 :
3952 1212 : if (struct_expr)
3953 606 : *struct_expr = TREE_OPERAND (comp_ref, 0);
3954 :
3955 1212 : if (bitpos)
3956 : {
3957 : /* To calculate the bitposition of the BITFIELD_REF we have to determine
3958 : where our bitfield starts in relation to the container REP_DECL. The
3959 : DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells
3960 : us how many bytes from the start of the structure there are until the
3961 : start of the group of bitfield members the FIELD_DECL belongs to,
3962 : whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that
3963 : position our actual bitfield member starts. For the container
3964 : REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell
3965 : us the distance between the start of the structure and the start of
3966 : the container, though the first is in bytes and the later other in
3967 : bits. With this in mind we calculate the bit position of our new
3968 : BITFIELD_REF by subtracting the number of bits between the start of
3969 : the structure and the container from the number of bits from the start
3970 : of the structure and the actual bitfield member. */
3971 606 : tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype,
3972 : ref_offset,
3973 : build_int_cst (bitsizetype, BITS_PER_UNIT));
3974 606 : bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos,
3975 : DECL_FIELD_BIT_OFFSET (field_decl));
3976 606 : tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype,
3977 : DECL_FIELD_OFFSET (rep_decl),
3978 : build_int_cst (bitsizetype, BITS_PER_UNIT));
3979 606 : rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos,
3980 : DECL_FIELD_BIT_OFFSET (rep_decl));
3981 :
3982 606 : *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos);
3983 : }
3984 :
3985 : return rep_decl;
3986 :
3987 : }
3988 :
3989 : /* Lowers the bitfield described by DATA.
3990 : For a write like:
3991 :
3992 : struct.bf = _1;
3993 :
3994 : lower to:
3995 :
3996 : __ifc_1 = struct.<representative>;
3997 : __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
3998 : struct.<representative> = __ifc_2;
3999 :
4000 : For a read:
4001 :
4002 : _1 = struct.bf;
4003 :
4004 : lower to:
4005 :
4006 : __ifc_1 = struct.<representative>;
4007 : _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
4008 :
4009 : where representative is a legal load that contains the bitfield value,
4010 : bitsize is the size of the bitfield and bitpos the offset to the start of
4011 : the bitfield within the representative. */
4012 :
4013 : static void
4014 606 : lower_bitfield (gassign *stmt, bool write)
4015 : {
4016 606 : tree struct_expr;
4017 606 : tree bitpos;
4018 606 : tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
4019 606 : tree rep_type = TREE_TYPE (rep_decl);
4020 606 : tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
4021 :
4022 606 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4023 606 : if (dump_file && (dump_flags & TDF_DETAILS))
4024 : {
4025 9 : fprintf (dump_file, "Lowering:\n");
4026 9 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
4027 9 : fprintf (dump_file, "to:\n");
4028 : }
4029 :
4030 : /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
4031 : defining SSA_NAME. */
4032 606 : tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
4033 : NULL_TREE);
4034 606 : tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
4035 :
4036 606 : if (dump_file && (dump_flags & TDF_DETAILS))
4037 9 : print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
4038 :
4039 606 : if (write)
4040 : {
4041 417 : new_val = ifc_temp_var (rep_type,
4042 : build3 (BIT_INSERT_EXPR, rep_type, new_val,
4043 : unshare_expr (gimple_assign_rhs1 (stmt)),
4044 : bitpos), &gsi);
4045 :
4046 417 : if (dump_file && (dump_flags & TDF_DETAILS))
4047 0 : print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
4048 :
4049 417 : gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
4050 : new_val);
4051 417 : gimple_move_vops (new_stmt, stmt);
4052 417 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
4053 :
4054 417 : if (dump_file && (dump_flags & TDF_DETAILS))
4055 0 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
4056 : }
4057 : else
4058 : {
4059 189 : tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
4060 189 : build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
4061 : bitpos);
4062 189 : new_val = ifc_temp_var (bf_type, bfr, &gsi);
4063 :
4064 189 : gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
4065 : new_val);
4066 189 : gimple_move_vops (new_stmt, stmt);
4067 189 : gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
4068 :
4069 189 : if (dump_file && (dump_flags & TDF_DETAILS))
4070 9 : print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
4071 : }
4072 :
4073 606 : gsi_remove (&gsi, true);
4074 606 : }
4075 :
4076 : /* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
4077 : with data structures representing these bitfields. */
4078 :
4079 : static bool
4080 235753 : bitfields_to_lower_p (class loop *loop,
4081 : vec <gassign *> &reads_to_lower,
4082 : vec <gassign *> &writes_to_lower)
4083 : {
4084 235753 : gimple_stmt_iterator gsi;
4085 :
4086 235753 : if (dump_file && (dump_flags & TDF_DETAILS))
4087 : {
4088 31 : fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
4089 : }
4090 :
4091 1001915 : for (unsigned i = 0; i < loop->num_nodes; ++i)
4092 : {
4093 766183 : basic_block bb = ifc_bbs[i];
4094 6510727 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4095 : {
4096 4978382 : gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
4097 4978382 : if (!stmt)
4098 4852518 : continue;
4099 :
4100 2010771 : tree op = gimple_assign_lhs (stmt);
4101 2010771 : bool write = TREE_CODE (op) == COMPONENT_REF;
4102 :
4103 2010771 : if (!write)
4104 1979364 : op = gimple_assign_rhs1 (stmt);
4105 :
4106 2010771 : if (TREE_CODE (op) != COMPONENT_REF)
4107 1884907 : continue;
4108 :
4109 125864 : if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
4110 : {
4111 627 : if (dump_file && (dump_flags & TDF_DETAILS))
4112 11 : print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
4113 :
4114 627 : if (TREE_THIS_VOLATILE (op))
4115 : {
4116 4 : if (dump_file && (dump_flags & TDF_DETAILS))
4117 0 : fprintf (dump_file, "\t Bitfield NO OK to lower,"
4118 : " the access is volatile.\n");
4119 21 : return false;
4120 : }
4121 :
4122 623 : if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
4123 : {
4124 0 : if (dump_file && (dump_flags & TDF_DETAILS))
4125 0 : fprintf (dump_file, "\t Bitfield NO OK to lower,"
4126 : " field type is not Integral.\n");
4127 0 : return false;
4128 : }
4129 :
4130 623 : if (!get_bitfield_rep (stmt, write, NULL, NULL))
4131 : {
4132 17 : if (dump_file && (dump_flags & TDF_DETAILS))
4133 2 : fprintf (dump_file, "\t Bitfield NOT OK to lower,"
4134 : " representative is BLKmode.\n");
4135 17 : return false;
4136 : }
4137 :
4138 606 : if (dump_file && (dump_flags & TDF_DETAILS))
4139 9 : fprintf (dump_file, "\tBitfield OK to lower.\n");
4140 606 : if (write)
4141 417 : writes_to_lower.safe_push (stmt);
4142 : else
4143 189 : reads_to_lower.safe_push (stmt);
4144 : }
4145 : }
4146 : }
4147 471328 : return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
4148 : }
4149 :
4150 :
4151 : /* If-convert LOOP when it is legal. For the moment this pass has no
4152 : profitability analysis. Returns non-zero todo flags when something
4153 : changed. */
4154 :
4155 : unsigned int
4156 495067 : tree_if_conversion (class loop *loop, vec<gimple *> *preds)
4157 : {
4158 495067 : unsigned int todo = 0;
4159 495067 : bool aggressive_if_conv;
4160 495067 : class loop *rloop;
4161 495067 : auto_vec <gassign *, 4> reads_to_lower;
4162 495067 : auto_vec <gassign *, 4> writes_to_lower;
4163 495067 : bitmap exit_bbs;
4164 495067 : edge pe;
4165 495067 : auto_vec<data_reference_p, 10> refs;
4166 495990 : bool loop_versioned;
4167 :
4168 495990 : again:
4169 495990 : rloop = NULL;
4170 495990 : ifc_bbs = NULL;
4171 495990 : need_to_lower_bitfields = false;
4172 495990 : need_to_ifcvt = false;
4173 495990 : need_to_predicate = false;
4174 495990 : need_to_rewrite_undefined = false;
4175 495990 : any_complicated_phi = false;
4176 495990 : loop_versioned = false;
4177 :
4178 : /* Apply more aggressive if-conversion when loop or its outer loop were
4179 : marked with simd pragma. When that's the case, we try to if-convert
4180 : loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
4181 495990 : aggressive_if_conv = loop->force_vectorize;
4182 495990 : if (!aggressive_if_conv)
4183 : {
4184 487638 : class loop *outer_loop = loop_outer (loop);
4185 487638 : if (outer_loop && outer_loop->force_vectorize)
4186 8668 : aggressive_if_conv = true;
4187 : }
4188 :
4189 : /* If there are more than two BBs in the loop then there is at least one if
4190 : to convert. */
4191 495990 : if (loop->num_nodes > 2
4192 495990 : && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
4193 93718 : goto cleanup;
4194 :
4195 402272 : ifc_bbs = get_loop_body_in_if_conv_order (loop);
4196 402272 : if (!ifc_bbs)
4197 : {
4198 2577 : if (dump_file && (dump_flags & TDF_DETAILS))
4199 3 : fprintf (dump_file, "Irreducible loop\n");
4200 2577 : goto cleanup;
4201 : }
4202 :
4203 399695 : if (find_data_references_in_loop (loop, &refs) == chrec_dont_know)
4204 150226 : goto cleanup;
4205 :
4206 249469 : if (loop->num_nodes > 2)
4207 : {
4208 : /* More than one loop exit is too much to handle. */
4209 136710 : if (!single_exit (loop))
4210 : {
4211 94096 : if (dump_file && (dump_flags & TDF_DETAILS))
4212 10 : fprintf (dump_file, "Can not ifcvt due to multiple exits\n");
4213 : }
4214 : else
4215 : {
4216 42614 : need_to_ifcvt = true;
4217 :
4218 42614 : if (!if_convertible_loop_p (loop, &refs)
4219 42614 : || !dbg_cnt (if_conversion_tree))
4220 13663 : goto cleanup;
4221 :
4222 28951 : if ((need_to_predicate || any_complicated_phi)
4223 3551 : && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
4224 3551 : || loop->dont_vectorize))
4225 0 : goto cleanup;
4226 : }
4227 : }
4228 :
4229 235806 : if ((flag_tree_loop_vectorize || loop->force_vectorize)
4230 235753 : && !loop->dont_vectorize)
4231 235753 : need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
4232 : writes_to_lower);
4233 :
4234 235806 : if (!need_to_ifcvt && !need_to_lower_bitfields)
4235 206596 : goto cleanup;
4236 :
4237 : /* The edge to insert invariant stmts on. */
4238 29210 : pe = loop_preheader_edge (loop);
4239 :
4240 : /* Since we have no cost model, always version loops unless the user
4241 : specified -ftree-loop-if-convert or unless versioning is required.
4242 : Either version this loop, or if the pattern is right for outer-loop
4243 : vectorization, version the outer loop. In the latter case we will
4244 : still if-convert the original inner loop. */
4245 29210 : if (need_to_lower_bitfields
4246 28949 : || need_to_predicate
4247 26789 : || any_complicated_phi
4248 25400 : || flag_tree_loop_if_convert != 1)
4249 : {
4250 29161 : class loop *vloop
4251 29161 : = (versionable_outer_loop_p (loop_outer (loop))
4252 29161 : ? loop_outer (loop) : loop);
4253 29161 : class loop *nloop = version_loop_for_if_conversion (vloop, preds);
4254 29161 : if (nloop == NULL)
4255 0 : goto cleanup;
4256 29161 : if (vloop != loop)
4257 : {
4258 : /* If versionable_outer_loop_p decided to version the
4259 : outer loop, version also the inner loop of the non-vectorized
4260 : loop copy. So we transform:
4261 : loop1
4262 : loop2
4263 : into:
4264 : if (LOOP_VECTORIZED (1, 3))
4265 : {
4266 : loop1
4267 : loop2
4268 : }
4269 : else
4270 : loop3 (copy of loop1)
4271 : if (LOOP_VECTORIZED (4, 5))
4272 : loop4 (copy of loop2)
4273 : else
4274 : loop5 (copy of loop4) */
4275 923 : gcc_assert (nloop->inner && nloop->inner->next == NULL);
4276 : rloop = nloop->inner;
4277 : }
4278 : else
4279 : /* If we versioned loop then make sure to insert invariant
4280 : stmts before the .LOOP_VECTORIZED check since the vectorizer
4281 : will re-use that for things like runtime alias versioning
4282 : whose condition can end up using those invariants. */
4283 28238 : pe = single_pred_edge (gimple_bb (preds->last ()));
4284 :
4285 : loop_versioned = true;
4286 : }
4287 :
4288 29210 : if (need_to_lower_bitfields)
4289 : {
4290 261 : if (dump_file && (dump_flags & TDF_DETAILS))
4291 : {
4292 9 : fprintf (dump_file, "-------------------------\n");
4293 9 : fprintf (dump_file, "Start lowering bitfields\n");
4294 : }
4295 450 : while (!reads_to_lower.is_empty ())
4296 189 : lower_bitfield (reads_to_lower.pop (), false);
4297 678 : while (!writes_to_lower.is_empty ())
4298 417 : lower_bitfield (writes_to_lower.pop (), true);
4299 :
4300 261 : if (dump_file && (dump_flags & TDF_DETAILS))
4301 : {
4302 9 : fprintf (dump_file, "Done lowering bitfields\n");
4303 9 : fprintf (dump_file, "-------------------------\n");
4304 : }
4305 : }
4306 29210 : if (need_to_ifcvt)
4307 : {
4308 : /* Before we rewrite edges we'll record their original position in the
4309 : edge map such that we can map the edges between the ifcvt and the
4310 : non-ifcvt loop during peeling. */
4311 28951 : uintptr_t idx = 0;
4312 115804 : for (edge exit : get_loop_exit_edges (loop))
4313 28951 : exit->aux = (void*)idx++;
4314 :
4315 : /* Now all statements are if-convertible. Combine all the basic
4316 : blocks into one huge basic block doing the if-conversion
4317 : on-the-fly. */
4318 28951 : combine_blocks (loop, loop_versioned);
4319 : }
4320 :
4321 : std::pair <tree, tree> *name_pair;
4322 : unsigned ssa_names_idx;
4323 34352 : FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair)
4324 5142 : replace_uses_by (name_pair->first, name_pair->second);
4325 29210 : redundant_ssa_names.release ();
4326 :
4327 : /* Perform local CSE, this esp. helps the vectorizer analysis if loads
4328 : and stores are involved. CSE only the loop body, not the entry
4329 : PHIs, those are to be kept in sync with the non-if-converted copy.
4330 : ??? We'll still keep dead stores though. */
4331 29210 : exit_bbs = BITMAP_ALLOC (NULL);
4332 117020 : for (edge exit : get_loop_exit_edges (loop))
4333 29408 : bitmap_set_bit (exit_bbs, exit->dest->index);
4334 29210 : todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs,
4335 : false, true, true);
4336 :
4337 : /* Delete dead predicate computations. */
4338 29210 : ifcvt_local_dce (loop);
4339 29210 : BITMAP_FREE (exit_bbs);
4340 :
4341 29210 : ifcvt_hoist_invariants (loop, pe);
4342 :
4343 29210 : todo |= TODO_cleanup_cfg;
4344 :
4345 495990 : cleanup:
4346 495990 : data_reference_p dr;
4347 495990 : unsigned int i;
4348 1644556 : for (i = 0; refs.iterate (i, &dr); i++)
4349 : {
4350 1148566 : free (dr->aux);
4351 1148566 : free_data_ref (dr);
4352 : }
4353 495990 : refs.truncate (0);
4354 :
4355 495990 : if (ifc_bbs)
4356 : {
4357 : unsigned int i;
4358 :
4359 1973827 : for (i = 0; i < loop->num_nodes; i++)
4360 1603083 : free_bb_predicate (ifc_bbs[i]);
4361 :
4362 370744 : free (ifc_bbs);
4363 370744 : ifc_bbs = NULL;
4364 : }
4365 495990 : if (rloop != NULL)
4366 : {
4367 923 : loop = rloop;
4368 923 : reads_to_lower.truncate (0);
4369 923 : writes_to_lower.truncate (0);
4370 923 : goto again;
4371 : }
4372 :
4373 495067 : return todo;
4374 495067 : }
4375 :
4376 : /* Tree if-conversion pass management. */
4377 :
4378 : namespace {
4379 :
4380 : const pass_data pass_data_if_conversion =
4381 : {
4382 : GIMPLE_PASS, /* type */
4383 : "ifcvt", /* name */
4384 : OPTGROUP_NONE, /* optinfo_flags */
4385 : TV_TREE_LOOP_IFCVT, /* tv_id */
4386 : ( PROP_cfg | PROP_ssa ), /* properties_required */
4387 : 0, /* properties_provided */
4388 : 0, /* properties_destroyed */
4389 : 0, /* todo_flags_start */
4390 : 0, /* todo_flags_finish */
4391 : };
4392 :
4393 : class pass_if_conversion : public gimple_opt_pass
4394 : {
4395 : public:
4396 288767 : pass_if_conversion (gcc::context *ctxt)
4397 577534 : : gimple_opt_pass (pass_data_if_conversion, ctxt)
4398 : {}
4399 :
4400 : /* opt_pass methods: */
4401 : bool gate (function *) final override;
4402 : unsigned int execute (function *) final override;
4403 :
4404 : }; // class pass_if_conversion
4405 :
4406 : bool
4407 241715 : pass_if_conversion::gate (function *fun)
4408 : {
4409 35112 : return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
4410 208415 : && flag_tree_loop_if_convert != 0)
4411 275024 : || flag_tree_loop_if_convert == 1);
4412 : }
4413 :
4414 : unsigned int
4415 208427 : pass_if_conversion::execute (function *fun)
4416 : {
4417 208427 : unsigned todo = 0;
4418 :
4419 416854 : if (number_of_loops (fun) <= 1)
4420 : return 0;
4421 :
4422 208427 : auto_vec<gimple *> preds;
4423 1127721 : for (auto loop : loops_list (cfun, 0))
4424 502440 : if (flag_tree_loop_if_convert == 1
4425 502193 : || ((flag_tree_loop_vectorize || loop->force_vectorize)
4426 499425 : && !loop->dont_vectorize))
4427 495067 : todo |= tree_if_conversion (loop, &preds);
4428 :
4429 208427 : if (todo)
4430 : {
4431 18904 : free_numbers_of_iterations_estimates (fun);
4432 18904 : scev_reset ();
4433 : }
4434 :
4435 208427 : if (flag_checking)
4436 : {
4437 208423 : basic_block bb;
4438 7000573 : FOR_EACH_BB_FN (bb, fun)
4439 6792150 : gcc_assert (!bb->aux);
4440 : }
4441 :
4442 : /* Perform IL update now, it might elide some loops. */
4443 208427 : if (todo & TODO_cleanup_cfg)
4444 : {
4445 18904 : cleanup_tree_cfg ();
4446 18904 : if (need_ssa_update_p (fun))
4447 0 : todo |= TODO_update_ssa;
4448 : }
4449 208427 : if (todo & TODO_update_ssa_any)
4450 0 : update_ssa (todo & TODO_update_ssa_any);
4451 :
4452 : /* If if-conversion elided the loop fall back to the original one. Likewise
4453 : if the loops are not nested in the same outer loop. */
4454 237588 : for (unsigned i = 0; i < preds.length (); ++i)
4455 : {
4456 29161 : gimple *g = preds[i];
4457 29161 : if (!gimple_bb (g))
4458 0 : continue;
4459 29161 : auto ifcvt_loop = get_loop (fun, tree_to_uhwi (gimple_call_arg (g, 0)));
4460 29161 : auto orig_loop = get_loop (fun, tree_to_uhwi (gimple_call_arg (g, 1)));
4461 29161 : if (!ifcvt_loop || !orig_loop)
4462 : {
4463 2 : if (dump_file)
4464 0 : fprintf (dump_file, "If-converted loop vanished\n");
4465 2 : fold_loop_internal_call (g, boolean_false_node);
4466 : }
4467 29159 : else if (loop_outer (ifcvt_loop) != loop_outer (orig_loop))
4468 : {
4469 0 : if (dump_file)
4470 0 : fprintf (dump_file, "If-converted loop in different outer loop\n");
4471 0 : fold_loop_internal_call (g, boolean_false_node);
4472 : }
4473 : }
4474 :
4475 208427 : return 0;
4476 208427 : }
4477 :
4478 : } // anon namespace
4479 :
4480 : gimple_opt_pass *
4481 288767 : make_pass_if_conversion (gcc::context *ctxt)
4482 : {
4483 288767 : return new pass_if_conversion (ctxt);
4484 : }
|