Line data Source code
1 : /* Analysis Utilities for Loop Vectorization.
2 : Copyright (C) 2006-2026 Free Software Foundation, Inc.
3 : Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #include "config.h"
22 : #include "system.h"
23 : #include "coretypes.h"
24 : #include "backend.h"
25 : #include "rtl.h"
26 : #include "tree.h"
27 : #include "gimple.h"
28 : #include "gimple-iterator.h"
29 : #include "gimple-fold.h"
30 : #include "ssa.h"
31 : #include "expmed.h"
32 : #include "optabs-tree.h"
33 : #include "insn-config.h"
34 : #include "recog.h" /* FIXME: for insn_data */
35 : #include "fold-const.h"
36 : #include "stor-layout.h"
37 : #include "tree-eh.h"
38 : #include "gimplify.h"
39 : #include "gimple-iterator.h"
40 : #include "gimple-fold.h"
41 : #include "gimplify-me.h"
42 : #include "cfgloop.h"
43 : #include "tree-vectorizer.h"
44 : #include "dumpfile.h"
45 : #include "builtins.h"
46 : #include "internal-fn.h"
47 : #include "case-cfn-macros.h"
48 : #include "fold-const-call.h"
49 : #include "attribs.h"
50 : #include "cgraph.h"
51 : #include "omp-simd-clone.h"
52 : #include "predict.h"
53 : #include "tree-vector-builder.h"
54 : #include "tree-ssa-loop-ivopts.h"
55 : #include "vec-perm-indices.h"
56 : #include "gimple-range.h"
57 : #include "alias.h"
58 :
59 :
60 : /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
61 : in the first operand. Disentangling this is future work, the
62 : IL is properly transferred to VEC_COND_EXPRs with separate compares. */
63 :
64 :
65 : /* Return true if we have a useful VR_RANGE range for VAR, storing it
66 : in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
67 :
68 : bool
69 12266167 : vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
70 : {
71 12266167 : int_range_max vr;
72 12266167 : tree vr_min, vr_max;
73 24532334 : get_range_query (cfun)->range_of_expr (vr, var);
74 12266167 : if (vr.undefined_p ())
75 71 : vr.set_varying (TREE_TYPE (var));
76 12266167 : value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
77 12266167 : *min_value = wi::to_wide (vr_min);
78 12266167 : *max_value = wi::to_wide (vr_max);
79 12266167 : wide_int nonzero = get_nonzero_bits (var);
80 12266167 : signop sgn = TYPE_SIGN (TREE_TYPE (var));
81 12266167 : if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
82 : nonzero, sgn) == VR_RANGE)
83 : {
84 6091902 : if (dump_enabled_p ())
85 : {
86 88391 : dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
87 88391 : dump_printf (MSG_NOTE, " has range [");
88 88391 : dump_hex (MSG_NOTE, *min_value);
89 88391 : dump_printf (MSG_NOTE, ", ");
90 88391 : dump_hex (MSG_NOTE, *max_value);
91 88391 : dump_printf (MSG_NOTE, "]\n");
92 : }
93 6091902 : return true;
94 : }
95 : else
96 : {
97 6174265 : if (dump_enabled_p ())
98 : {
99 67618 : dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
100 67618 : dump_printf (MSG_NOTE, " has no range info\n");
101 : }
102 6174265 : return false;
103 : }
104 12266167 : }
105 :
106 : /* Report that we've found an instance of pattern PATTERN in
107 : statement STMT. */
108 :
109 : static void
110 1257245 : vect_pattern_detected (const char *name, gimple *stmt)
111 : {
112 1257245 : if (dump_enabled_p ())
113 25167 : dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
114 1257245 : }
115 :
116 : /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
117 : return the pattern statement's stmt_vec_info. Set its vector type to
118 : VECTYPE if it doesn't have one already. */
119 :
120 : static stmt_vec_info
121 2427841 : vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
122 : stmt_vec_info orig_stmt_info, tree vectype)
123 : {
124 2427841 : stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
125 2427841 : if (pattern_stmt_info == NULL)
126 1299749 : pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
127 2427841 : gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
128 :
129 2427841 : pattern_stmt_info->pattern_stmt_p = true;
130 2427841 : STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
131 2427841 : STMT_VINFO_DEF_TYPE (pattern_stmt_info)
132 2427841 : = STMT_VINFO_DEF_TYPE (orig_stmt_info);
133 2427841 : if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
134 : {
135 2196733 : gcc_assert (!vectype
136 : || is_a <gcond *> (pattern_stmt)
137 : || (VECTOR_BOOLEAN_TYPE_P (vectype)
138 : == vect_use_mask_type_p (orig_stmt_info)));
139 1310464 : STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
140 1310464 : pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
141 : }
142 2427841 : return pattern_stmt_info;
143 : }
144 :
145 : /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
146 : Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
147 : have one already. */
148 :
149 : static void
150 1034080 : vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
151 : stmt_vec_info orig_stmt_info, tree vectype)
152 : {
153 1034080 : STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
154 1034080 : STMT_VINFO_RELATED_STMT (orig_stmt_info)
155 0 : = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
156 1003092 : }
157 :
158 : /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
159 : is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
160 : be different from the vector type of the final pattern statement.
161 : If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
162 : from which it was derived. */
163 :
164 : static inline void
165 1352676 : append_pattern_def_seq (vec_info *vinfo,
166 : stmt_vec_info stmt_info, gimple *new_stmt,
167 : tree vectype = NULL_TREE,
168 : tree scalar_type_for_mask = NULL_TREE)
169 : {
170 2065418 : gcc_assert (!scalar_type_for_mask
171 : == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
172 1352676 : if (vectype)
173 : {
174 1117616 : stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
175 1117616 : STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
176 1117616 : if (scalar_type_for_mask)
177 639934 : new_stmt_info->mask_precision
178 1279868 : = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
179 : }
180 1352676 : gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
181 : new_stmt);
182 1352676 : }
183 :
184 :
185 : /* Add NEW_STMT to VINFO's invariant pattern definition statements. These
186 : statements are not vectorized but are materialized as scalar in the loop
187 : preheader. */
188 :
189 : static inline void
190 1383 : append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
191 : {
192 1383 : gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
193 : }
194 :
195 : /* The caller wants to perform new operations on vect_external variable
196 : VAR, so that the result of the operations would also be vect_external.
197 : Return the edge on which the operations can be performed, if one exists.
198 : Return null if the operations should instead be treated as part of
199 : the pattern that needs them. */
200 :
201 : static edge
202 8608 : vect_get_external_def_edge (vec_info *vinfo, tree var)
203 : {
204 8608 : edge e = NULL;
205 8608 : if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
206 : {
207 819 : e = loop_preheader_edge (loop_vinfo->loop);
208 819 : if (!SSA_NAME_IS_DEFAULT_DEF (var))
209 : {
210 623 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
211 623 : if (bb == NULL
212 623 : || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
213 : e = NULL;
214 : }
215 : }
216 8608 : return e;
217 : }
218 :
219 : /* Return true if the target supports a vector version of CODE,
220 : where CODE is known to map to a direct optab with the given SUBTYPE.
221 : ITYPE specifies the type of (some of) the scalar inputs and OTYPE
222 : specifies the type of the scalar result.
223 :
224 : If CODE allows the inputs and outputs to have different type
225 : (such as for WIDEN_SUM_EXPR), it is the input mode rather
226 : than the output mode that determines the appropriate target pattern.
227 : Operand 0 of the target pattern then specifies the mode that the output
228 : must have.
229 :
230 : When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
231 : Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
232 : is nonnull. */
233 :
234 : static bool
235 813 : vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
236 : tree itype, tree *vecotype_out,
237 : tree *vecitype_out = NULL,
238 : enum optab_subtype subtype = optab_default)
239 : {
240 813 : tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
241 813 : if (!vecitype)
242 : return false;
243 :
244 813 : tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
245 813 : if (!vecotype)
246 : return false;
247 :
248 813 : optab optab = optab_for_tree_code (code, vecitype, subtype);
249 813 : if (!optab)
250 : return false;
251 :
252 813 : insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
253 813 : if (icode == CODE_FOR_nothing
254 813 : || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
255 394 : return false;
256 :
257 419 : *vecotype_out = vecotype;
258 419 : if (vecitype_out)
259 412 : *vecitype_out = vecitype;
260 : return true;
261 : }
262 :
263 : /* Return true if the target supports a vector version of CODE,
264 : where CODE is known to map to a conversion optab with the given SUBTYPE.
265 : ITYPE specifies the type of (some of) the scalar inputs and OTYPE
266 : specifies the type of the scalar result.
267 :
268 : When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
269 : Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
270 : is nonnull. */
271 :
272 : static bool
273 3667 : vect_supportable_conv_optab_p (vec_info *vinfo, tree otype, tree_code code,
274 : tree itype, tree *vecotype_out,
275 : tree *vecitype_out = NULL,
276 : enum optab_subtype subtype = optab_default)
277 : {
278 3667 : tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
279 3667 : tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
280 3667 : if (!vecitype || !vecotype)
281 : return false;
282 :
283 3405 : if (!directly_supported_p (code, vecotype, vecitype, subtype))
284 : return false;
285 :
286 714 : *vecotype_out = vecotype;
287 714 : if (vecitype_out)
288 714 : *vecitype_out = vecitype;
289 : return true;
290 : }
291 :
292 : /* Round bit precision PRECISION up to a full element. */
293 :
294 : static unsigned int
295 3250251 : vect_element_precision (unsigned int precision)
296 : {
297 0 : precision = 1 << ceil_log2 (precision);
298 4874654 : return MAX (precision, BITS_PER_UNIT);
299 : }
300 :
301 : /* If OP is defined by a statement that's being considered for vectorization,
302 : return information about that statement, otherwise return NULL. */
303 :
304 : static stmt_vec_info
305 342487 : vect_get_internal_def (vec_info *vinfo, tree op)
306 : {
307 342487 : stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
308 342487 : if (def_stmt_info
309 329054 : && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
310 313245 : return vect_stmt_to_vectorize (def_stmt_info);
311 : return NULL;
312 : }
313 :
314 : /* Holds information about an input operand after some sign changes
315 : and type promotions have been peeled away. */
316 : class vect_unpromoted_value {
317 : public:
318 : vect_unpromoted_value ();
319 :
320 : void set_op (tree, vect_def_type, stmt_vec_info = NULL);
321 :
322 : /* The value obtained after peeling away zero or more casts. */
323 : tree op;
324 :
325 : /* The type of OP. */
326 : tree type;
327 :
328 : /* The definition type of OP. */
329 : vect_def_type dt;
330 :
331 : /* If OP is the result of peeling at least one cast, and if the cast
332 : of OP itself is a vectorizable statement, CASTER identifies that
333 : statement, otherwise it is null. */
334 : stmt_vec_info caster;
335 : };
336 :
337 294383156 : inline vect_unpromoted_value::vect_unpromoted_value ()
338 294383156 : : op (NULL_TREE),
339 294383156 : type (NULL_TREE),
340 294383156 : dt (vect_uninitialized_def),
341 3235332 : caster (NULL)
342 : {
343 : }
344 :
345 : /* Set the operand to OP_IN, its definition type to DT_IN, and the
346 : statement that casts it to CASTER_IN. */
347 :
348 : inline void
349 11382745 : vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
350 : stmt_vec_info caster_in)
351 : {
352 11382745 : op = op_in;
353 11382745 : type = TREE_TYPE (op);
354 11382745 : dt = dt_in;
355 11382745 : caster = caster_in;
356 11382745 : }
357 :
358 : /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
359 : to reach some vectorizable inner operand OP', continuing as long as it
360 : is possible to convert OP' back to OP using a possible sign change
361 : followed by a possible promotion P. Return this OP', or null if OP is
362 : not a vectorizable SSA name. If there is a promotion P, describe its
363 : input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
364 : is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
365 : have more than one user.
366 :
367 : A successful return means that it is possible to go from OP' to OP
368 : via UNPROM. The cast from OP' to UNPROM is at most a sign change,
369 : whereas the cast from UNPROM to OP might be a promotion, a sign
370 : change, or a nop.
371 :
372 : E.g. say we have:
373 :
374 : signed short *ptr = ...;
375 : signed short C = *ptr;
376 : unsigned short B = (unsigned short) C; // sign change
377 : signed int A = (signed int) B; // unsigned promotion
378 : ...possible other uses of A...
379 : unsigned int OP = (unsigned int) A; // sign change
380 :
381 : In this case it's possible to go directly from C to OP using:
382 :
383 : OP = (unsigned int) (unsigned short) C;
384 : +------------+ +--------------+
385 : promotion sign change
386 :
387 : so OP' would be C. The input to the promotion is B, so UNPROM
388 : would describe B. */
389 :
390 : static tree
391 8400130 : vect_look_through_possible_promotion (vec_info *vinfo, tree op,
392 : vect_unpromoted_value *unprom,
393 : bool *single_use_p = NULL)
394 : {
395 8400130 : tree op_type = TREE_TYPE (op);
396 8400130 : if (!INTEGRAL_TYPE_P (op_type))
397 : return NULL_TREE;
398 :
399 8348875 : tree res = NULL_TREE;
400 8348875 : unsigned int orig_precision = TYPE_PRECISION (op_type);
401 8348875 : unsigned int min_precision = orig_precision;
402 8348875 : stmt_vec_info caster = NULL;
403 10010846 : while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
404 : {
405 : /* See whether OP is simple enough to vectorize. */
406 9782387 : stmt_vec_info def_stmt_info;
407 9782387 : gimple *def_stmt;
408 9782387 : vect_def_type dt;
409 9782387 : if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
410 : break;
411 :
412 : /* If OP is the input of a demotion, skip over it to see whether
413 : OP is itself the result of a promotion. If so, the combined
414 : effect of the promotion and the demotion might fit the required
415 : pattern, otherwise neither operation fits.
416 :
417 : This copes with cases such as the result of an arithmetic
418 : operation being truncated before being stored, and where that
419 : arithmetic operation has been recognized as an over-widened one. */
420 9766201 : if (TYPE_PRECISION (op_type) <= min_precision)
421 : {
422 : /* Use OP as the UNPROM described above if we haven't yet
423 : found a promotion, or if using the new input preserves the
424 : sign of the previous promotion. */
425 9639994 : if (!res
426 1417367 : || TYPE_PRECISION (unprom->type) == orig_precision
427 38752 : || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)
428 9675937 : || (TYPE_UNSIGNED (op_type)
429 25429 : && TYPE_PRECISION (op_type) < TYPE_PRECISION (unprom->type)))
430 : {
431 9604481 : unprom->set_op (op, dt, caster);
432 9604481 : min_precision = TYPE_PRECISION (op_type);
433 : }
434 : /* Stop if we've already seen a promotion and if this
435 : conversion does more than change the sign. */
436 35513 : else if (TYPE_PRECISION (op_type)
437 35513 : != TYPE_PRECISION (unprom->type))
438 : break;
439 :
440 : /* The sequence now extends to OP. */
441 : res = op;
442 : }
443 :
444 : /* See whether OP is defined by a cast. Record it as CASTER if
445 : the cast is potentially vectorizable. */
446 9766157 : if (!def_stmt)
447 : break;
448 9565533 : caster = def_stmt_info;
449 :
450 : /* Ignore pattern statements, since we don't link uses for them. */
451 9565533 : if (caster
452 9565533 : && single_use_p
453 1970517 : && !STMT_VINFO_RELATED_STMT (caster)
454 11388383 : && !has_single_use (res))
455 1096012 : *single_use_p = false;
456 :
457 17685949 : gassign *assign = dyn_cast <gassign *> (def_stmt);
458 6014223 : if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
459 : break;
460 :
461 : /* Continue with the input to the cast. */
462 1661971 : op = gimple_assign_rhs1 (def_stmt);
463 1661971 : op_type = TREE_TYPE (op);
464 : }
465 : return res;
466 : }
467 :
468 : /* OP is an integer operand to an operation that returns TYPE, and we
469 : want to treat the operation as a widening one. So far we can treat
470 : it as widening from *COMMON_TYPE.
471 :
472 : Return true if OP is suitable for such a widening operation,
473 : either widening from *COMMON_TYPE or from some supertype of it.
474 : Update *COMMON_TYPE to the supertype in the latter case.
475 :
476 : SHIFT_P is true if OP is a shift amount. */
477 :
478 : static bool
479 300909 : vect_joust_widened_integer (tree type, bool shift_p, tree op,
480 : tree *common_type)
481 : {
482 : /* Calculate the minimum precision required by OP, without changing
483 : the sign of either operand. */
484 300909 : unsigned int precision;
485 300909 : if (shift_p)
486 : {
487 12988 : if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
488 : return false;
489 10419 : precision = TREE_INT_CST_LOW (op);
490 : }
491 : else
492 : {
493 287921 : precision = wi::min_precision (wi::to_widest (op),
494 287921 : TYPE_SIGN (*common_type));
495 287921 : if (precision * 2 > TYPE_PRECISION (type))
496 : return false;
497 : }
498 :
499 : /* If OP requires a wider type, switch to that type. The checks
500 : above ensure that this is still narrower than the result. */
501 284662 : precision = vect_element_precision (precision);
502 284662 : if (TYPE_PRECISION (*common_type) < precision)
503 6441 : *common_type = build_nonstandard_integer_type
504 6441 : (precision, TYPE_UNSIGNED (*common_type));
505 : return true;
506 : }
507 :
508 : /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
509 : is narrower than type, storing the supertype in *COMMON_TYPE if so. */
510 :
511 : static bool
512 45127 : vect_joust_widened_type (tree type, tree new_type, tree *common_type)
513 : {
514 45127 : if (types_compatible_p (*common_type, new_type))
515 : return true;
516 :
517 : /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
518 7760 : if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
519 7760 : && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
520 : return true;
521 :
522 : /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
523 7147 : if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
524 7147 : && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
525 : {
526 342 : *common_type = new_type;
527 342 : return true;
528 : }
529 :
530 : /* We have mismatched signs, with the signed type being
531 : no wider than the unsigned type. In this case we need
532 : a wider signed type. */
533 6805 : unsigned int precision = MAX (TYPE_PRECISION (*common_type),
534 : TYPE_PRECISION (new_type));
535 6805 : precision *= 2;
536 :
537 6805 : if (precision * 2 > TYPE_PRECISION (type))
538 : return false;
539 :
540 43 : *common_type = build_nonstandard_integer_type (precision, false);
541 43 : return true;
542 : }
543 :
544 : /* Check whether STMT_INFO can be viewed as a tree of integer operations
545 : in which each node either performs CODE or WIDENED_CODE, and where
546 : each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
547 : specifies the maximum number of leaf operands. SHIFT_P says whether
548 : CODE and WIDENED_CODE are some sort of shift.
549 :
550 : If STMT_INFO is such a tree, return the number of leaf operands
551 : and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
552 : to a type that (a) is narrower than the result of STMT_INFO and
553 : (b) can hold all leaf operand values.
554 :
555 : If SUBTYPE then allow that the signs of the operands
556 : may differ in signs but not in precision. SUBTYPE is updated to reflect
557 : this.
558 :
559 : Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
560 : exists. */
561 :
562 : static unsigned int
563 124081592 : vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
564 : code_helper widened_code, bool shift_p,
565 : unsigned int max_nops,
566 : vect_unpromoted_value *unprom, tree *common_type,
567 : enum optab_subtype *subtype = NULL)
568 : {
569 : /* Check for an integer operation with the right code. */
570 124081592 : gimple* stmt = stmt_info->stmt;
571 124081592 : if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
572 : return 0;
573 :
574 100220392 : code_helper rhs_code;
575 100220392 : if (is_gimple_assign (stmt))
576 85924664 : rhs_code = gimple_assign_rhs_code (stmt);
577 14295728 : else if (is_gimple_call (stmt))
578 14295728 : rhs_code = gimple_call_combined_fn (stmt);
579 : else
580 : return 0;
581 :
582 100220392 : if (rhs_code != code
583 100220392 : && rhs_code != widened_code)
584 : return 0;
585 :
586 6323555 : tree lhs = gimple_get_lhs (stmt);
587 6323555 : tree type = TREE_TYPE (lhs);
588 6323555 : if (!INTEGRAL_TYPE_P (type))
589 : return 0;
590 :
591 : /* Assume that both operands will be leaf operands. */
592 5742334 : max_nops -= 2;
593 :
594 : /* Check the operands. */
595 5742334 : unsigned int next_op = 0;
596 6491833 : for (unsigned int i = 0; i < 2; ++i)
597 : {
598 6168383 : vect_unpromoted_value *this_unprom = &unprom[next_op];
599 6168383 : unsigned int nops = 1;
600 6168383 : tree op = gimple_arg (stmt, i);
601 6168383 : if (i == 1 && TREE_CODE (op) == INTEGER_CST)
602 : {
603 : /* We already have a common type from earlier operands.
604 : Update it to account for OP. */
605 300909 : this_unprom->set_op (op, vect_constant_def);
606 300909 : if (!vect_joust_widened_integer (type, shift_p, op, common_type))
607 : return 0;
608 : }
609 : else
610 : {
611 : /* Only allow shifts by constants. */
612 5867474 : if (shift_p && i == 1)
613 : return 0;
614 :
615 5861413 : if (rhs_code != code)
616 : {
617 : /* If rhs_code is widened_code, don't look through further
618 : possible promotions, there is a promotion already embedded
619 : in the WIDEN_*_EXPR. */
620 1662 : if (TREE_CODE (op) != SSA_NAME
621 1662 : || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
622 0 : return 0;
623 :
624 1662 : stmt_vec_info def_stmt_info;
625 1662 : gimple *def_stmt;
626 1662 : vect_def_type dt;
627 1662 : if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
628 : &def_stmt))
629 : return 0;
630 1662 : this_unprom->set_op (op, dt, NULL);
631 : }
632 5859751 : else if (!vect_look_through_possible_promotion (vinfo, op,
633 : this_unprom))
634 : return 0;
635 :
636 5739273 : if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
637 : {
638 : /* The operand isn't widened. If STMT_INFO has the code
639 : for an unwidened operation, recursively check whether
640 : this operand is a node of the tree. */
641 5263907 : if (rhs_code != code
642 5263907 : || max_nops == 0
643 5264373 : || this_unprom->dt != vect_internal_def)
644 : return 0;
645 :
646 : /* Give back the leaf slot allocated above now that we're
647 : not treating this as a leaf operand. */
648 466 : max_nops += 1;
649 :
650 : /* Recursively process the definition of the operand. */
651 466 : stmt_vec_info def_stmt_info
652 466 : = vect_get_internal_def (vinfo, this_unprom->op);
653 :
654 466 : nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
655 : widened_code, shift_p, max_nops,
656 : this_unprom, common_type,
657 : subtype);
658 466 : if (nops == 0)
659 : return 0;
660 :
661 311 : max_nops -= nops;
662 : }
663 : else
664 : {
665 : /* Make sure that the operand is narrower than the result. */
666 475366 : if (TYPE_PRECISION (this_unprom->type) * 2
667 475366 : > TYPE_PRECISION (type))
668 : return 0;
669 :
670 : /* Update COMMON_TYPE for the new operand. */
671 471003 : if (i == 0)
672 425876 : *common_type = this_unprom->type;
673 45127 : else if (!vect_joust_widened_type (type, this_unprom->type,
674 : common_type))
675 : {
676 6762 : if (subtype)
677 : {
678 : /* See if we can sign extend the smaller type. */
679 285 : if (TYPE_PRECISION (this_unprom->type)
680 285 : > TYPE_PRECISION (*common_type))
681 27 : *common_type = this_unprom->type;
682 285 : *subtype = optab_vector_mixed_sign;
683 : }
684 : else
685 : return 0;
686 : }
687 : }
688 : }
689 749499 : next_op += nops;
690 : }
691 : return next_op;
692 : }
693 :
694 : /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
695 : is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
696 :
697 : static tree
698 2063422 : vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
699 : {
700 0 : return make_temp_ssa_name (type, stmt, "patt");
701 : }
702 :
703 : /* STMT2_INFO describes a type conversion that could be split into STMT1
704 : followed by a version of STMT2_INFO that takes NEW_RHS as its first
705 : input. Try to do this using pattern statements, returning true on
706 : success. */
707 :
708 : static bool
709 31448 : vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
710 : gimple *stmt1, tree vectype)
711 : {
712 31448 : if (is_pattern_stmt_p (stmt2_info))
713 : {
714 : /* STMT2_INFO is part of a pattern. Get the statement to which
715 : the pattern is attached. */
716 460 : stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
717 460 : vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
718 :
719 460 : if (dump_enabled_p ())
720 19 : dump_printf_loc (MSG_NOTE, vect_location,
721 : "Splitting pattern statement: %G", stmt2_info->stmt);
722 :
723 : /* Since STMT2_INFO is a pattern statement, we can change it
724 : in-situ without worrying about changing the code for the
725 : containing block. */
726 460 : gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
727 :
728 460 : if (dump_enabled_p ())
729 : {
730 19 : dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
731 19 : dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
732 : stmt2_info->stmt);
733 : }
734 :
735 460 : gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
736 460 : if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
737 : /* STMT2_INFO is the actual pattern statement. Add STMT1
738 : to the end of the definition sequence. */
739 457 : gimple_seq_add_stmt_without_update (def_seq, stmt1);
740 : else
741 : {
742 : /* STMT2_INFO belongs to the definition sequence. Insert STMT1
743 : before it. */
744 3 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
745 3 : gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
746 : }
747 460 : return true;
748 : }
749 : else
750 : {
751 : /* STMT2_INFO doesn't yet have a pattern. Try to create a
752 : two-statement pattern now. */
753 30988 : gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
754 30988 : tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
755 30988 : tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
756 30988 : if (!lhs_vectype)
757 : return false;
758 :
759 30988 : if (dump_enabled_p ())
760 1927 : dump_printf_loc (MSG_NOTE, vect_location,
761 : "Splitting statement: %G", stmt2_info->stmt);
762 :
763 : /* Add STMT1 as a singleton pattern definition sequence. */
764 30988 : gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
765 30988 : vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
766 30988 : gimple_seq_add_stmt_without_update (def_seq, stmt1);
767 :
768 : /* Build the second of the two pattern statements. */
769 30988 : tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
770 30988 : gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
771 30988 : vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
772 :
773 30988 : if (dump_enabled_p ())
774 : {
775 1927 : dump_printf_loc (MSG_NOTE, vect_location,
776 : "into pattern statements: %G", stmt1);
777 1927 : dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
778 : (gimple *) new_stmt2);
779 : }
780 :
781 30988 : return true;
782 : }
783 : }
784 :
785 : /* Look for the following pattern
786 : X = x[i]
787 : Y = y[i]
788 : DIFF = X - Y
789 : DAD = ABS_EXPR<DIFF>
790 :
791 : ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
792 : HALF_TYPE and UNPROM will be set should the statement be found to
793 : be a widened operation.
794 : DIFF_STMT will be set to the MINUS_EXPR
795 : statement that precedes the ABS_STMT if it is a MINUS_EXPR..
796 : */
797 : static bool
798 21107533 : vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
799 : tree *half_type,
800 : vect_unpromoted_value unprom[2],
801 : gassign **diff_stmt)
802 : {
803 21107533 : if (!abs_stmt)
804 : return false;
805 :
806 : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
807 : inside the loop (in case we are analyzing an outer-loop). */
808 21107533 : enum tree_code code = gimple_assign_rhs_code (abs_stmt);
809 21107533 : if (code != ABS_EXPR && code != ABSU_EXPR)
810 : return false;
811 :
812 24381 : tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
813 24381 : tree abs_type = TREE_TYPE (abs_oprnd);
814 24381 : if (!abs_oprnd)
815 : return false;
816 16861 : if (!ANY_INTEGRAL_TYPE_P (abs_type)
817 7816 : || TYPE_OVERFLOW_WRAPS (abs_type)
818 32049 : || TYPE_UNSIGNED (abs_type))
819 : return false;
820 :
821 : /* Peel off conversions from the ABS input. This can involve sign
822 : changes (e.g. from an unsigned subtraction to a signed ABS input)
823 : or signed promotion, but it can't include unsigned promotion.
824 : (Note that ABS of an unsigned promotion should have been folded
825 : away before now anyway.) */
826 7668 : vect_unpromoted_value unprom_diff;
827 7668 : abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
828 : &unprom_diff);
829 7668 : if (!abs_oprnd)
830 : return false;
831 7372 : if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
832 7372 : && TYPE_UNSIGNED (unprom_diff.type))
833 : return false;
834 :
835 : /* We then detect if the operand of abs_expr is defined by a minus_expr. */
836 7372 : stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
837 7372 : if (!diff_stmt_vinfo)
838 : return false;
839 :
840 7201 : gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
841 7201 : if (diff_stmt && diff
842 5765 : && gimple_assign_rhs_code (diff) == MINUS_EXPR
843 9306 : && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
844 275 : *diff_stmt = diff;
845 :
846 : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
847 : inside the loop (in case we are analyzing an outer-loop). */
848 7201 : if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
849 7201 : MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
850 : false, 2, unprom, half_type))
851 : return true;
852 :
853 : return false;
854 : }
855 :
856 : /* Convert UNPROM to TYPE and return the result, adding new statements
857 : to STMT_INFO's pattern definition statements if no better way is
858 : available. VECTYPE is the vector form of TYPE.
859 :
860 : If SUBTYPE then convert the type based on the subtype. */
861 :
862 : static tree
863 474920 : vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
864 : vect_unpromoted_value *unprom, tree vectype,
865 : enum optab_subtype subtype = optab_default)
866 : {
867 : /* Update the type if the signs differ. */
868 474920 : if (subtype == optab_vector_mixed_sign)
869 : {
870 322 : gcc_assert (!TYPE_UNSIGNED (type));
871 322 : if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
872 : {
873 161 : type = unsigned_type_for (type);
874 161 : vectype = unsigned_type_for (vectype);
875 : }
876 : }
877 :
878 : /* Check for a no-op conversion. */
879 474920 : if (types_compatible_p (type, TREE_TYPE (unprom->op)))
880 163517 : return unprom->op;
881 :
882 : /* Allow the caller to create constant vect_unpromoted_values. */
883 311403 : if (TREE_CODE (unprom->op) == INTEGER_CST)
884 190828 : return wide_int_to_tree (type, wi::to_widest (unprom->op));
885 :
886 120575 : tree input = unprom->op;
887 120575 : if (unprom->caster)
888 : {
889 65635 : tree lhs = gimple_get_lhs (unprom->caster->stmt);
890 65635 : tree lhs_type = TREE_TYPE (lhs);
891 :
892 : /* If the result of the existing cast is the right width, use it
893 : instead of the source of the cast. */
894 65635 : if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
895 : input = lhs;
896 : /* If the precision we want is between the source and result
897 : precisions of the existing cast, try splitting the cast into
898 : two and tapping into a mid-way point. */
899 63573 : else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
900 63573 : && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
901 : {
902 : /* In order to preserve the semantics of the original cast,
903 : give the mid-way point the same signedness as the input value.
904 :
905 : It would be possible to use a signed type here instead if
906 : TYPE is signed and UNPROM->TYPE is unsigned, but that would
907 : make the sign of the midtype sensitive to the order in
908 : which we process the statements, since the signedness of
909 : TYPE is the signedness required by just one of possibly
910 : many users. Also, unsigned promotions are usually as cheap
911 : as or cheaper than signed ones, so it's better to keep an
912 : unsigned promotion. */
913 31448 : tree midtype = build_nonstandard_integer_type
914 31448 : (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
915 31448 : tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
916 31448 : if (vec_midtype)
917 : {
918 31448 : input = vect_recog_temp_ssa_var (midtype, NULL);
919 31448 : gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
920 : unprom->op);
921 31448 : if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
922 : vec_midtype))
923 0 : append_pattern_def_seq (vinfo, stmt_info,
924 : new_stmt, vec_midtype);
925 : }
926 : }
927 :
928 : /* See if we can reuse an existing result. */
929 65635 : if (types_compatible_p (type, TREE_TYPE (input)))
930 : return input;
931 : }
932 :
933 : /* We need a new conversion statement. */
934 97771 : tree new_op = vect_recog_temp_ssa_var (type, NULL);
935 97771 : gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
936 :
937 : /* If OP is an external value, see if we can insert the new statement
938 : on an incoming edge. */
939 97771 : if (input == unprom->op && unprom->dt == vect_external_def)
940 8593 : if (edge e = vect_get_external_def_edge (vinfo, input))
941 : {
942 804 : basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
943 804 : gcc_assert (!new_bb);
944 : return new_op;
945 : }
946 :
947 : /* As a (common) last resort, add the statement to the pattern itself. */
948 96967 : append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
949 96967 : return new_op;
950 : }
951 :
952 : /* Invoke vect_convert_input for N elements of UNPROM and store the
953 : result in the corresponding elements of RESULT.
954 :
955 : If SUBTYPE then convert the type based on the subtype. */
956 :
957 : static void
958 241353 : vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
959 : tree *result, tree type, vect_unpromoted_value *unprom,
960 : tree vectype, enum optab_subtype subtype = optab_default)
961 : {
962 716244 : for (unsigned int i = 0; i < n; ++i)
963 : {
964 : unsigned int j;
965 707958 : for (j = 0; j < i; ++j)
966 233538 : if (unprom[j].op == unprom[i].op)
967 : break;
968 :
969 474891 : if (j < i)
970 471 : result[i] = result[j];
971 : else
972 474420 : result[i] = vect_convert_input (vinfo, stmt_info,
973 474420 : type, &unprom[i], vectype, subtype);
974 : }
975 241353 : }
976 :
977 : /* The caller has created a (possibly empty) sequence of pattern definition
978 : statements followed by a single statement PATTERN_STMT. Cast the result
979 : of this final statement to TYPE. If a new statement is needed, add
980 : PATTERN_STMT to the end of STMT_INFO's pattern definition statements
981 : and return the new statement, otherwise return PATTERN_STMT as-is.
982 : VECITYPE is the vector form of PATTERN_STMT's result type. */
983 :
984 : static gimple *
985 268082 : vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
986 : gimple *pattern_stmt, tree vecitype)
987 : {
988 268082 : tree lhs = gimple_get_lhs (pattern_stmt);
989 268082 : if (!types_compatible_p (type, TREE_TYPE (lhs)))
990 : {
991 240628 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
992 240628 : tree cast_var = vect_recog_temp_ssa_var (type, NULL);
993 240628 : pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
994 : }
995 268082 : return pattern_stmt;
996 : }
997 :
998 : /* Return true if STMT_VINFO describes a reduction for which reassociation
999 : is allowed. If STMT_INFO is part of a group, assume that it's part of
1000 : a reduction chain and optimistically assume that all statements
1001 : except the last allow reassociation.
1002 : Also require it to have code CODE and to be a reduction
1003 : in the outermost loop. When returning true, store the operands in
1004 : *OP0_OUT and *OP1_OUT. */
1005 :
1006 : static bool
1007 92551743 : vect_reassociating_reduction_p (vec_info *vinfo,
1008 : stmt_vec_info stmt_info, tree_code code,
1009 : tree *op0_out, tree *op1_out)
1010 : {
1011 92551743 : loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1012 13424703 : if (!loop_info)
1013 : return false;
1014 :
1015 13424703 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1016 14660064 : if (!assign || gimple_assign_rhs_code (assign) != code)
1017 : return false;
1018 :
1019 : /* We don't allow changing the order of the computation in the inner-loop
1020 : when doing outer-loop vectorization. */
1021 2626808 : class loop *loop = LOOP_VINFO_LOOP (loop_info);
1022 95019687 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
1023 : return false;
1024 :
1025 2573696 : if (!vect_is_reduction (stmt_info))
1026 : return false;
1027 :
1028 170423 : if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1029 170423 : code))
1030 : return false;
1031 :
1032 158864 : *op0_out = gimple_assign_rhs1 (assign);
1033 158864 : *op1_out = gimple_assign_rhs2 (assign);
1034 158864 : if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1035 63702 : std::swap (*op0_out, *op1_out);
1036 : return true;
1037 : }
1038 :
1039 : /* Return true iff the target has a vector optab implementing the operation
1040 : CODE on type VECTYPE with SUBTYPE. */
1041 :
1042 : static bool
1043 842991 : target_has_vecop_for_code (tree_code code, tree vectype,
1044 : enum optab_subtype subtype = optab_vector)
1045 : {
1046 842991 : optab voptab = optab_for_tree_code (code, vectype, subtype);
1047 842991 : return voptab && can_implement_p (voptab, TYPE_MODE (vectype));
1048 : }
1049 :
1050 : /* match.pd function to match
1051 : (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1052 : with conditions:
1053 : 1) @1, @2, c, d, a, b are all integral type.
1054 : 2) There's single_use for both @1 and @2.
1055 : 3) a, c have same precision.
1056 : 4) c and @1 have different precision.
1057 : 5) c, d are the same type or they can differ in sign when convert is
1058 : truncation.
1059 :
1060 : record a and c and d and @3. */
1061 :
1062 : extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1063 :
1064 : /* Function vect_recog_cond_expr_convert
1065 :
1066 : Try to find the following pattern:
1067 :
1068 : TYPE_AB A,B;
1069 : TYPE_CD C,D;
1070 : TYPE_E E;
1071 : TYPE_E op_true = (TYPE_E) A;
1072 : TYPE_E op_false = (TYPE_E) B;
1073 :
1074 : E = C cmp D ? op_true : op_false;
1075 :
1076 : where
1077 : TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1078 : TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1079 : single_use of op_true and op_false.
1080 : TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1081 :
1082 : Input:
1083 :
1084 : * STMT_VINFO: The stmt from which the pattern search begins.
1085 : here it starts with E = c cmp D ? op_true : op_false;
1086 :
1087 : Output:
1088 :
1089 : TYPE1 E' = C cmp D ? A : B;
1090 : TYPE3 E = (TYPE3) E';
1091 :
1092 : There may extra nop_convert for A or B to handle different signness.
1093 :
1094 : * TYPE_OUT: The vector type of the output of this pattern.
1095 :
1096 : * Return value: A new stmt that will be used to replace the sequence of
1097 : stmts that constitute the pattern. In this case it will be:
1098 : E = (TYPE3)E';
1099 : E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1100 :
1101 : static gimple *
1102 30922576 : vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1103 : stmt_vec_info stmt_vinfo, tree *type_out)
1104 : {
1105 30922576 : gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1106 21199158 : tree lhs, match[4], temp, type, new_lhs, op2, op1;
1107 21199158 : gimple *cond_stmt;
1108 21199158 : gimple *pattern_stmt;
1109 30922547 : enum tree_code code = NOP_EXPR;
1110 :
1111 21199158 : if (!last_stmt)
1112 : return NULL;
1113 :
1114 21199158 : lhs = gimple_assign_lhs (last_stmt);
1115 :
1116 : /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1117 : TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1118 21199158 : if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1119 : return NULL;
1120 :
1121 29 : if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
1122 20 : code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : CONVERT_EXPR;
1123 9 : else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1])))
1124 0 : code = FIX_TRUNC_EXPR;
1125 :
1126 29 : op1 = match[1];
1127 29 : op2 = match[2];
1128 29 : type = TREE_TYPE (op1);
1129 : /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from
1130 : SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p.
1131 : Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR
1132 : or CONVERT_EXPR. */
1133 29 : if (TREE_CODE (op1) == REAL_CST)
1134 : {
1135 20 : op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1);
1136 20 : type = TREE_TYPE (op2);
1137 20 : if (op1 == NULL_TREE)
1138 : return NULL;
1139 : }
1140 9 : else if (TREE_CODE (op2) == REAL_CST)
1141 : {
1142 0 : op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2);
1143 0 : if (op2 == NULL_TREE)
1144 : return NULL;
1145 : }
1146 9 : else if (code == NOP_EXPR)
1147 : {
1148 9 : if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1149 : {
1150 9 : op2 = vect_recog_temp_ssa_var (type, NULL);
1151 9 : gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1152 9 : append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt);
1153 : }
1154 : }
1155 :
1156 29 : vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1157 :
1158 29 : temp = vect_recog_temp_ssa_var (type, NULL);
1159 29 : cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1160 : op1, op2));
1161 29 : append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt);
1162 29 : new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1163 29 : pattern_stmt = gimple_build_assign (new_lhs, code, temp);
1164 29 : *type_out = NULL_TREE;
1165 :
1166 29 : if (dump_enabled_p ())
1167 20 : dump_printf_loc (MSG_NOTE, vect_location,
1168 : "created pattern stmt: %G", pattern_stmt);
1169 : return pattern_stmt;
1170 : }
1171 :
1172 : /* Function vect_recog_dot_prod_pattern
1173 :
1174 : Try to find the following pattern:
1175 :
1176 : type1a x_t
1177 : type1b y_t;
1178 : TYPE1 prod;
1179 : TYPE2 sum = init;
1180 : loop:
1181 : sum_0 = phi <init, sum_1>
1182 : S1 x_t = ...
1183 : S2 y_t = ...
1184 : S3 x_T = (TYPE1) x_t;
1185 : S4 y_T = (TYPE1) y_t;
1186 : S5 prod = x_T * y_T;
1187 : [S6 prod = (TYPE2) prod; #optional]
1188 : S7 sum_1 = prod + sum_0;
1189 :
1190 : where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1191 : the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1192 : 'type1a' and 'type1b' can differ.
1193 :
1194 : Input:
1195 :
1196 : * STMT_VINFO: The stmt from which the pattern search begins. In the
1197 : example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1198 : will be detected.
1199 :
1200 : Output:
1201 :
1202 : * TYPE_OUT: The type of the output of this pattern.
1203 :
1204 : * Return value: A new stmt that will be used to replace the sequence of
1205 : stmts that constitute the pattern. In this case it will be:
1206 : WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1207 :
1208 : Note: The dot-prod idiom is a widening reduction pattern that is
1209 : vectorized without preserving all the intermediate results. It
1210 : produces only N/2 (widened) results (by summing up pairs of
1211 : intermediate results) rather than all N results. Therefore, we
1212 : cannot allow this pattern when we want to get all the results and in
1213 : the correct order (as is the case when this computation is in an
1214 : inner-loop nested in an outer-loop that us being vectorized). */
1215 :
1216 : static gimple *
1217 30851191 : vect_recog_dot_prod_pattern (vec_info *vinfo,
1218 : stmt_vec_info stmt_vinfo, tree *type_out)
1219 : {
1220 30851191 : tree oprnd0, oprnd1;
1221 30851191 : gimple *last_stmt = stmt_vinfo->stmt;
1222 30851191 : tree type, half_type;
1223 30851191 : gimple *pattern_stmt;
1224 30851191 : tree var;
1225 :
1226 : /* Look for the following pattern
1227 : DX = (TYPE1) X;
1228 : DY = (TYPE1) Y;
1229 : DPROD = DX * DY;
1230 : DDPROD = (TYPE2) DPROD;
1231 : sum_1 = DDPROD + sum_0;
1232 : In which
1233 : - DX is double the size of X
1234 : - DY is double the size of Y
1235 : - DX, DY, DPROD all have the same type but the sign
1236 : between X, Y and DPROD can differ.
1237 : - sum is the same size of DPROD or bigger
1238 : - sum has been recognized as a reduction variable.
1239 :
1240 : This is equivalent to:
1241 : DPROD = X w* Y; #widen mult
1242 : sum_1 = DPROD w+ sum_0; #widen summation
1243 : or
1244 : DPROD = X w* Y; #widen mult
1245 : sum_1 = DPROD + sum_0; #summation
1246 : */
1247 :
1248 : /* Starting from LAST_STMT, follow the defs of its uses in search
1249 : of the above pattern. */
1250 :
1251 30851191 : if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1252 : &oprnd0, &oprnd1))
1253 : return NULL;
1254 :
1255 53568 : type = TREE_TYPE (gimple_get_lhs (last_stmt));
1256 :
1257 53568 : vect_unpromoted_value unprom_mult;
1258 53568 : oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1259 :
1260 : /* So far so good. Since last_stmt was detected as a (summation) reduction,
1261 : we know that oprnd1 is the reduction variable (defined by a loop-header
1262 : phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1263 : Left to check that oprnd0 is defined by a (widen_)mult_expr */
1264 53568 : if (!oprnd0)
1265 : return NULL;
1266 :
1267 36285 : stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1268 36285 : if (!mult_vinfo)
1269 : return NULL;
1270 :
1271 : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1272 : inside the loop (in case we are analyzing an outer-loop). */
1273 106320 : vect_unpromoted_value unprom0[2];
1274 35440 : enum optab_subtype subtype = optab_vector;
1275 35440 : if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1276 : false, 2, unprom0, &half_type, &subtype))
1277 : return NULL;
1278 :
1279 : /* If there are two widening operations, make sure they agree on the sign
1280 : of the extension. The result of an optab_vector_mixed_sign operation
1281 : is signed; otherwise, the result has the same sign as the operands. */
1282 1363 : if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1283 2085 : && (subtype == optab_vector_mixed_sign
1284 722 : ? TYPE_UNSIGNED (unprom_mult.type)
1285 519 : : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1286 : return NULL;
1287 :
1288 1282 : vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1289 :
1290 : /* If the inputs have mixed signs, canonicalize on using the signed
1291 : input type for analysis. This also helps when emulating mixed-sign
1292 : operations using signed operations. */
1293 1282 : if (subtype == optab_vector_mixed_sign)
1294 240 : half_type = signed_type_for (half_type);
1295 :
1296 1282 : tree half_vectype;
1297 1282 : if (!vect_supportable_conv_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1298 : type_out, &half_vectype, subtype))
1299 : {
1300 : /* We can emulate a mixed-sign dot-product using a sequence of
1301 : signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1302 583 : if (subtype != optab_vector_mixed_sign
1303 583 : || !vect_supportable_conv_optab_p (vinfo, signed_type_for (type),
1304 : DOT_PROD_EXPR, half_type,
1305 : type_out, &half_vectype,
1306 : optab_vector))
1307 568 : return NULL;
1308 :
1309 15 : *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1310 : *type_out);
1311 : }
1312 :
1313 : /* Get the inputs in the appropriate types. */
1314 714 : tree mult_oprnd[2];
1315 714 : vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1316 : unprom0, half_vectype, subtype);
1317 :
1318 714 : var = vect_recog_temp_ssa_var (type, NULL);
1319 714 : pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1320 : mult_oprnd[0], mult_oprnd[1], oprnd1);
1321 :
1322 714 : return pattern_stmt;
1323 : }
1324 :
1325 :
1326 : /* Function vect_recog_sad_pattern
1327 :
1328 : Try to find the following Sum of Absolute Difference (SAD) pattern:
1329 :
1330 : type x_t, y_t;
1331 : signed TYPE1 diff, abs_diff;
1332 : TYPE2 sum = init;
1333 : loop:
1334 : sum_0 = phi <init, sum_1>
1335 : S1 x_t = ...
1336 : S2 y_t = ...
1337 : S3 x_T = (TYPE1) x_t;
1338 : S4 y_T = (TYPE1) y_t;
1339 : S5 diff = x_T - y_T;
1340 : S6 abs_diff = ABS_EXPR <diff>;
1341 : [S7 abs_diff = (TYPE2) abs_diff; #optional]
1342 : S8 sum_1 = abs_diff + sum_0;
1343 :
1344 : where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1345 : same size of 'TYPE1' or bigger. This is a special case of a reduction
1346 : computation.
1347 :
1348 : Input:
1349 :
1350 : * STMT_VINFO: The stmt from which the pattern search begins. In the
1351 : example, when this function is called with S8, the pattern
1352 : {S3,S4,S5,S6,S7,S8} will be detected.
1353 :
1354 : Output:
1355 :
1356 : * TYPE_OUT: The type of the output of this pattern.
1357 :
1358 : * Return value: A new stmt that will be used to replace the sequence of
1359 : stmts that constitute the pattern. In this case it will be:
1360 : SAD_EXPR <x_t, y_t, sum_0>
1361 : */
1362 :
1363 : static gimple *
1364 30850482 : vect_recog_sad_pattern (vec_info *vinfo,
1365 : stmt_vec_info stmt_vinfo, tree *type_out)
1366 : {
1367 30850482 : gimple *last_stmt = stmt_vinfo->stmt;
1368 30850482 : tree half_type;
1369 :
1370 : /* Look for the following pattern
1371 : DX = (TYPE1) X;
1372 : DY = (TYPE1) Y;
1373 : DDIFF = DX - DY;
1374 : DAD = ABS_EXPR <DDIFF>;
1375 : DDPROD = (TYPE2) DPROD;
1376 : sum_1 = DAD + sum_0;
1377 : In which
1378 : - DX is at least double the size of X
1379 : - DY is at least double the size of Y
1380 : - DX, DY, DDIFF, DAD all have the same type
1381 : - sum is the same size of DAD or bigger
1382 : - sum has been recognized as a reduction variable.
1383 :
1384 : This is equivalent to:
1385 : DDIFF = X w- Y; #widen sub
1386 : DAD = ABS_EXPR <DDIFF>;
1387 : sum_1 = DAD w+ sum_0; #widen summation
1388 : or
1389 : DDIFF = X w- Y; #widen sub
1390 : DAD = ABS_EXPR <DDIFF>;
1391 : sum_1 = DAD + sum_0; #summation
1392 : */
1393 :
1394 : /* Starting from LAST_STMT, follow the defs of its uses in search
1395 : of the above pattern. */
1396 :
1397 30850482 : tree plus_oprnd0, plus_oprnd1;
1398 30850482 : if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1399 : &plus_oprnd0, &plus_oprnd1))
1400 : return NULL;
1401 :
1402 52854 : tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1403 :
1404 : /* Any non-truncating sequence of conversions is OK here, since
1405 : with a successful match, the result of the ABS(U) is known to fit
1406 : within the nonnegative range of the result type. (It cannot be the
1407 : negative of the minimum signed value due to the range of the widening
1408 : MINUS_EXPR.) */
1409 52854 : vect_unpromoted_value unprom_abs;
1410 52854 : plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1411 : &unprom_abs);
1412 :
1413 : /* So far so good. Since last_stmt was detected as a (summation) reduction,
1414 : we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1415 : phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1416 : Then check that plus_oprnd0 is defined by an abs_expr. */
1417 :
1418 52854 : if (!plus_oprnd0)
1419 : return NULL;
1420 :
1421 35571 : stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1422 35571 : if (!abs_stmt_vinfo)
1423 : return NULL;
1424 :
1425 : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1426 : inside the loop (in case we are analyzing an outer-loop). */
1427 34726 : gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1428 104178 : vect_unpromoted_value unprom[2];
1429 :
1430 34726 : if (!abs_stmt)
1431 : {
1432 30850372 : gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1433 302 : if (!abd_stmt
1434 302 : || !gimple_call_internal_p (abd_stmt)
1435 0 : || gimple_call_num_args (abd_stmt) != 2)
1436 : return NULL;
1437 :
1438 0 : tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1439 0 : tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1440 :
1441 0 : if (gimple_call_internal_fn (abd_stmt) == IFN_ABD
1442 0 : || gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1443 : {
1444 0 : unprom[0].op = abd_oprnd0;
1445 0 : unprom[0].type = TREE_TYPE (abd_oprnd0);
1446 0 : unprom[1].op = abd_oprnd1;
1447 0 : unprom[1].type = TREE_TYPE (abd_oprnd1);
1448 : }
1449 : else
1450 : return NULL;
1451 :
1452 0 : half_type = unprom[0].type;
1453 : }
1454 34359 : else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1455 : unprom, NULL))
1456 : return NULL;
1457 :
1458 806 : vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1459 :
1460 806 : tree half_vectype;
1461 806 : if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1462 : type_out, &half_vectype))
1463 : return NULL;
1464 :
1465 : /* Get the inputs to the SAD_EXPR in the appropriate types. */
1466 412 : tree sad_oprnd[2];
1467 412 : vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1468 : unprom, half_vectype);
1469 :
1470 412 : tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1471 412 : gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1472 : sad_oprnd[1], plus_oprnd1);
1473 :
1474 412 : return pattern_stmt;
1475 : }
1476 :
1477 : /* Function vect_recog_abd_pattern
1478 :
1479 : Try to find the following ABsolute Difference (ABD) or
1480 : widening ABD (WIDEN_ABD) pattern:
1481 :
1482 : TYPE1 x;
1483 : TYPE2 y;
1484 : TYPE3 x_cast = (TYPE3) x; // widening or no-op
1485 : TYPE3 y_cast = (TYPE3) y; // widening or no-op
1486 : TYPE3 diff = x_cast - y_cast;
1487 : TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1488 : TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1489 :
1490 : WIDEN_ABD exists to optimize the case where TYPE4 is at least
1491 : twice as wide as TYPE3.
1492 :
1493 : Input:
1494 :
1495 : * STMT_VINFO: The stmt from which the pattern search begins
1496 :
1497 : Output:
1498 :
1499 : * TYPE_OUT: The type of the output of this pattern
1500 :
1501 : * Return value: A new stmt that will be used to replace the sequence of
1502 : stmts that constitute the pattern, principally:
1503 : out = IFN_ABD (x, y)
1504 : out = IFN_WIDEN_ABD (x, y)
1505 : */
1506 :
1507 : static gimple *
1508 30796434 : vect_recog_abd_pattern (vec_info *vinfo,
1509 : stmt_vec_info stmt_vinfo, tree *type_out)
1510 : {
1511 51869608 : gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1512 21073174 : if (!last_stmt)
1513 : return NULL;
1514 :
1515 21073174 : tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1516 :
1517 63219522 : vect_unpromoted_value unprom[2];
1518 21073174 : gassign *diff_stmt = NULL;
1519 21073174 : tree abd_in_type;
1520 21073174 : if (!vect_recog_absolute_difference (vinfo, last_stmt, &abd_in_type,
1521 : unprom, &diff_stmt))
1522 : {
1523 : /* We cannot try further without having a non-widening MINUS. */
1524 21071560 : if (!diff_stmt)
1525 : return NULL;
1526 :
1527 275 : unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1528 275 : unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1529 275 : abd_in_type = signed_type_for (out_type);
1530 : }
1531 :
1532 1889 : tree abd_out_type = abd_in_type;
1533 :
1534 1889 : tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1535 1889 : if (!vectype_in)
1536 : return NULL;
1537 :
1538 1872 : internal_fn ifn = IFN_ABD;
1539 1872 : tree vectype_out = vectype_in;
1540 :
1541 1872 : if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1542 1872 : && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1543 : {
1544 1505 : tree mid_type
1545 1505 : = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1546 1505 : TYPE_UNSIGNED (abd_in_type));
1547 1505 : tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1548 :
1549 1505 : code_helper dummy_code;
1550 1505 : int dummy_int;
1551 1505 : auto_vec<tree> dummy_vec;
1552 1505 : if (mid_vectype
1553 1505 : && supportable_widening_operation (IFN_VEC_WIDEN_ABD,
1554 : mid_vectype, vectype_in, false,
1555 : &dummy_code, &dummy_code,
1556 : &dummy_int, &dummy_vec))
1557 : {
1558 0 : ifn = IFN_VEC_WIDEN_ABD;
1559 0 : abd_out_type = mid_type;
1560 0 : vectype_out = mid_vectype;
1561 : }
1562 1505 : }
1563 :
1564 1505 : if (ifn == IFN_ABD
1565 1872 : && !direct_internal_fn_supported_p (ifn, vectype_in,
1566 : OPTIMIZE_FOR_SPEED))
1567 : return NULL;
1568 :
1569 0 : vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1570 :
1571 0 : tree abd_oprnds[2];
1572 0 : vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1573 : abd_in_type, unprom, vectype_in);
1574 :
1575 0 : *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1576 :
1577 0 : tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1578 0 : gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1579 : abd_oprnds[0], abd_oprnds[1]);
1580 0 : gimple_call_set_lhs (abd_stmt, abd_result);
1581 0 : gimple_set_location (abd_stmt, gimple_location (last_stmt));
1582 :
1583 0 : gimple *stmt = abd_stmt;
1584 0 : if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1585 0 : && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1586 0 : && !TYPE_UNSIGNED (abd_out_type))
1587 : {
1588 0 : tree unsign = unsigned_type_for (abd_out_type);
1589 0 : stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
1590 0 : vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
1591 : }
1592 :
1593 0 : return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1594 : }
1595 :
1596 : /* Recognize an operation that performs ORIG_CODE on widened inputs,
1597 : so that it can be treated as though it had the form:
1598 :
1599 : A_TYPE a;
1600 : B_TYPE b;
1601 : HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1602 : HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1603 : | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1604 : | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1605 : | RES_TYPE res = a_extend ORIG_CODE b_extend;
1606 :
1607 : Try to replace the pattern with:
1608 :
1609 : A_TYPE a;
1610 : B_TYPE b;
1611 : HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1612 : HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1613 : | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1614 : | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1615 :
1616 : where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1617 :
1618 : SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1619 : name of the pattern being matched, for dump purposes. */
1620 :
1621 : static gimple *
1622 124008541 : vect_recog_widen_op_pattern (vec_info *vinfo,
1623 : stmt_vec_info last_stmt_info, tree *type_out,
1624 : tree_code orig_code, code_helper wide_code,
1625 : bool shift_p, const char *name)
1626 : {
1627 124008541 : gimple *last_stmt = last_stmt_info->stmt;
1628 :
1629 372025623 : vect_unpromoted_value unprom[2];
1630 124008541 : tree half_type;
1631 124008541 : if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1632 : shift_p, 2, unprom, &half_type))
1633 :
1634 : return NULL;
1635 :
1636 : /* Pattern detected. */
1637 317196 : vect_pattern_detected (name, last_stmt);
1638 :
1639 317196 : tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1640 317196 : tree itype = type;
1641 317196 : if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1642 317196 : || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1643 223261 : itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1644 223261 : TYPE_UNSIGNED (half_type));
1645 :
1646 : /* Check target support */
1647 317196 : tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1648 317196 : tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1649 317196 : tree ctype = itype;
1650 317196 : tree vecctype = vecitype;
1651 317196 : if (orig_code == MINUS_EXPR
1652 9485 : && TYPE_UNSIGNED (itype)
1653 321692 : && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1654 : {
1655 : /* Subtraction is special, even if half_type is unsigned and no matter
1656 : whether type is signed or unsigned, if type is wider than itype,
1657 : we need to sign-extend from the widening operation result to the
1658 : result type.
1659 : Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1660 : itype unsigned short and type either int or unsigned int.
1661 : Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1662 : (unsigned short) 0xffff, but for type int we want the result -1
1663 : and for type unsigned int 0xffffffff rather than 0xffff. */
1664 694 : ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1665 694 : vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1666 : }
1667 :
1668 317196 : code_helper dummy_code;
1669 317196 : int dummy_int;
1670 317196 : auto_vec<tree> dummy_vec;
1671 317196 : if (!vectype
1672 317196 : || !vecitype
1673 247046 : || !vecctype
1674 564242 : || !supportable_widening_operation (wide_code, vecitype, vectype, true,
1675 : &dummy_code, &dummy_code,
1676 : &dummy_int, &dummy_vec))
1677 210796 : return NULL;
1678 :
1679 106400 : *type_out = get_vectype_for_scalar_type (vinfo, type);
1680 106400 : if (!*type_out)
1681 : return NULL;
1682 :
1683 106400 : tree oprnd[2];
1684 106400 : vect_convert_inputs (vinfo, last_stmt_info,
1685 : 2, oprnd, half_type, unprom, vectype);
1686 :
1687 106400 : tree var = vect_recog_temp_ssa_var (itype, NULL);
1688 106400 : gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1689 :
1690 106400 : if (vecctype != vecitype)
1691 0 : pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1692 : pattern_stmt, vecitype);
1693 :
1694 106400 : return vect_convert_output (vinfo, last_stmt_info,
1695 106400 : type, pattern_stmt, vecctype);
1696 317196 : }
1697 :
1698 : /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1699 : to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1700 :
1701 : static gimple *
1702 30875925 : vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1703 : tree *type_out)
1704 : {
1705 30875925 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1706 30875925 : MULT_EXPR, WIDEN_MULT_EXPR, false,
1707 30875925 : "vect_recog_widen_mult_pattern");
1708 : }
1709 :
1710 : /* Try to detect addition on widened inputs, converting PLUS_EXPR
1711 : to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1712 :
1713 : static gimple *
1714 31141001 : vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1715 : tree *type_out)
1716 : {
1717 31141001 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1718 31141001 : PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1719 31141001 : false, "vect_recog_widen_plus_pattern");
1720 : }
1721 :
1722 : /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1723 : to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1724 : static gimple *
1725 31141001 : vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1726 : tree *type_out)
1727 : {
1728 31141001 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1729 31141001 : MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1730 31141001 : false, "vect_recog_widen_minus_pattern");
1731 : }
1732 :
1733 : /* Try to detect abd on widened inputs, converting IFN_ABD
1734 : to IFN_VEC_WIDEN_ABD. */
1735 : static gimple *
1736 31141001 : vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1737 : tree *type_out)
1738 : {
1739 31141001 : gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1740 28845243 : if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1741 : return NULL;
1742 :
1743 3035471 : tree last_rhs = gimple_assign_rhs1 (last_stmt);
1744 :
1745 3035471 : tree in_type = TREE_TYPE (last_rhs);
1746 3035471 : tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1747 3035471 : if (!INTEGRAL_TYPE_P (in_type)
1748 2723614 : || !INTEGRAL_TYPE_P (out_type)
1749 2607087 : || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1750 3658679 : || !TYPE_UNSIGNED (in_type))
1751 : return NULL;
1752 :
1753 217073 : vect_unpromoted_value unprom;
1754 217073 : tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1755 217073 : if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1756 : return NULL;
1757 :
1758 214564 : stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1759 214564 : if (!abd_pattern_vinfo)
1760 : return NULL;
1761 :
1762 31150165 : gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1763 9164 : if (!abd_stmt
1764 9164 : || !gimple_call_internal_p (abd_stmt)
1765 265 : || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1766 : return NULL;
1767 :
1768 0 : tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1769 0 : tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1770 :
1771 0 : code_helper dummy_code;
1772 0 : int dummy_int;
1773 0 : auto_vec<tree> dummy_vec;
1774 0 : if (!supportable_widening_operation (IFN_VEC_WIDEN_ABD, vectype_out,
1775 : vectype_in, false,
1776 : &dummy_code, &dummy_code,
1777 : &dummy_int, &dummy_vec))
1778 : return NULL;
1779 :
1780 0 : vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1781 :
1782 0 : *type_out = vectype_out;
1783 :
1784 0 : tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1785 0 : tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1786 0 : tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1787 0 : gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1788 : abd_oprnd0, abd_oprnd1);
1789 0 : gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1790 0 : gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1791 0 : return widen_abd_stmt;
1792 0 : }
1793 :
1794 : /* Function vect_recog_ctz_ffs_pattern
1795 :
1796 : Try to find the following pattern:
1797 :
1798 : TYPE1 A;
1799 : TYPE1 B;
1800 :
1801 : B = __builtin_ctz{,l,ll} (A);
1802 :
1803 : or
1804 :
1805 : B = __builtin_ffs{,l,ll} (A);
1806 :
1807 : Input:
1808 :
1809 : * STMT_VINFO: The stmt from which the pattern search begins.
1810 : here it starts with B = __builtin_* (A);
1811 :
1812 : Output:
1813 :
1814 : * TYPE_OUT: The vector type of the output of this pattern.
1815 :
1816 : * Return value: A new stmt that will be used to replace the sequence of
1817 : stmts that constitute the pattern, using clz or popcount builtins. */
1818 :
1819 : static gimple *
1820 30850421 : vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1821 : tree *type_out)
1822 : {
1823 30850421 : gimple *call_stmt = stmt_vinfo->stmt;
1824 30850421 : gimple *pattern_stmt;
1825 30850421 : tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1826 30850421 : tree new_var;
1827 30850421 : internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1828 30850421 : bool defined_at_zero = true, defined_at_zero_new = false;
1829 30850421 : int val = 0, val_new = 0, val_cmp = 0;
1830 30850421 : int prec;
1831 30850421 : int sub = 0, add = 0;
1832 30850421 : location_t loc;
1833 :
1834 30850421 : if (!is_gimple_call (call_stmt))
1835 : return NULL;
1836 :
1837 3577157 : if (gimple_call_num_args (call_stmt) != 1
1838 3577157 : && gimple_call_num_args (call_stmt) != 2)
1839 : return NULL;
1840 :
1841 1995561 : rhs_oprnd = gimple_call_arg (call_stmt, 0);
1842 1995561 : rhs_type = TREE_TYPE (rhs_oprnd);
1843 1995561 : lhs_oprnd = gimple_call_lhs (call_stmt);
1844 1995561 : if (!lhs_oprnd)
1845 : return NULL;
1846 975241 : lhs_type = TREE_TYPE (lhs_oprnd);
1847 975241 : if (!INTEGRAL_TYPE_P (lhs_type)
1848 327291 : || !INTEGRAL_TYPE_P (rhs_type)
1849 44566 : || !type_has_mode_precision_p (rhs_type)
1850 1018220 : || TREE_CODE (rhs_oprnd) != SSA_NAME)
1851 944462 : return NULL;
1852 :
1853 30779 : switch (gimple_call_combined_fn (call_stmt))
1854 : {
1855 1554 : CASE_CFN_CTZ:
1856 1554 : ifn = IFN_CTZ;
1857 1554 : if (!gimple_call_internal_p (call_stmt)
1858 1554 : || gimple_call_num_args (call_stmt) != 2)
1859 : defined_at_zero = false;
1860 : else
1861 121 : val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
1862 : break;
1863 : CASE_CFN_FFS:
1864 : ifn = IFN_FFS;
1865 : break;
1866 : default:
1867 : return NULL;
1868 : }
1869 :
1870 1789 : prec = TYPE_PRECISION (rhs_type);
1871 1789 : loc = gimple_location (call_stmt);
1872 :
1873 1789 : vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1874 1789 : if (!vec_type)
1875 : return NULL;
1876 :
1877 1783 : vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1878 1783 : if (!vec_rhs_type)
1879 : return NULL;
1880 :
1881 : /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1882 : ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1883 : popcount<vector_mode>2. */
1884 1544 : if (!vec_type
1885 1544 : || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1886 : OPTIMIZE_FOR_SPEED))
1887 : return NULL;
1888 :
1889 1544 : if (ifn == IFN_FFS
1890 1544 : && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1891 : OPTIMIZE_FOR_SPEED))
1892 : {
1893 0 : ifnnew = IFN_CTZ;
1894 0 : defined_at_zero_new
1895 0 : = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1896 : val_new) == 2;
1897 : }
1898 1544 : else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1899 : OPTIMIZE_FOR_SPEED))
1900 : {
1901 160 : ifnnew = IFN_CLZ;
1902 160 : defined_at_zero_new
1903 160 : = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1904 : val_new) == 2;
1905 : }
1906 160 : if ((ifnnew == IFN_LAST
1907 160 : || (defined_at_zero && !defined_at_zero_new))
1908 1384 : && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1909 : OPTIMIZE_FOR_SPEED))
1910 : {
1911 : ifnnew = IFN_POPCOUNT;
1912 : defined_at_zero_new = true;
1913 : val_new = prec;
1914 : }
1915 1418 : if (ifnnew == IFN_LAST)
1916 : return NULL;
1917 :
1918 286 : vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1919 :
1920 286 : val_cmp = val_new;
1921 286 : if ((ifnnew == IFN_CLZ
1922 286 : && defined_at_zero
1923 106 : && defined_at_zero_new
1924 106 : && val == prec
1925 54 : && val_new == prec)
1926 232 : || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1927 : {
1928 137 : if (vect_is_reduction (stmt_vinfo))
1929 : return NULL;
1930 :
1931 : /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1932 : .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1933 137 : if (ifnnew == IFN_CLZ)
1934 54 : sub = prec;
1935 137 : val_cmp = prec;
1936 :
1937 137 : if (!TYPE_UNSIGNED (rhs_type))
1938 : {
1939 12 : rhs_type = unsigned_type_for (rhs_type);
1940 12 : vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1941 12 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1942 12 : pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1943 12 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1944 : vec_rhs_type);
1945 12 : rhs_oprnd = new_var;
1946 : }
1947 :
1948 137 : tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1949 137 : pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1950 : build_int_cst (rhs_type, -1));
1951 137 : gimple_set_location (pattern_stmt, loc);
1952 137 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1953 :
1954 137 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1955 137 : pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1956 137 : gimple_set_location (pattern_stmt, loc);
1957 137 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1958 137 : rhs_oprnd = new_var;
1959 :
1960 137 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1961 137 : pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1962 : m1, rhs_oprnd);
1963 137 : gimple_set_location (pattern_stmt, loc);
1964 137 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1965 137 : rhs_oprnd = new_var;
1966 137 : }
1967 149 : else if (ifnnew == IFN_CLZ)
1968 : {
1969 106 : if (vect_is_reduction (stmt_vinfo))
1970 : return NULL;
1971 :
1972 : /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1973 : .FFS (X) = PREC - .CLZ (X & -X). */
1974 106 : sub = prec - (ifn == IFN_CTZ);
1975 106 : val_cmp = sub - val_new;
1976 :
1977 106 : tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1978 106 : pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1979 106 : gimple_set_location (pattern_stmt, loc);
1980 106 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1981 :
1982 106 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1983 106 : pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1984 : rhs_oprnd, neg);
1985 106 : gimple_set_location (pattern_stmt, loc);
1986 106 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1987 106 : rhs_oprnd = new_var;
1988 : }
1989 43 : else if (ifnnew == IFN_POPCOUNT)
1990 : {
1991 43 : if (vect_is_reduction (stmt_vinfo))
1992 : return NULL;
1993 :
1994 : /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1995 : .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1996 43 : sub = prec + (ifn == IFN_FFS);
1997 43 : val_cmp = sub;
1998 :
1999 43 : tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
2000 43 : pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
2001 43 : gimple_set_location (pattern_stmt, loc);
2002 43 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
2003 :
2004 43 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
2005 43 : pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
2006 : rhs_oprnd, neg);
2007 43 : gimple_set_location (pattern_stmt, loc);
2008 43 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
2009 43 : rhs_oprnd = new_var;
2010 : }
2011 0 : else if (ifnnew == IFN_CTZ)
2012 : {
2013 : /* .FFS (X) = .CTZ (X) + 1. */
2014 0 : add = 1;
2015 0 : val_cmp++;
2016 :
2017 0 : if (vect_is_reduction (stmt_vinfo)
2018 0 : && defined_at_zero
2019 0 : && (!defined_at_zero_new || val != val_cmp))
2020 : return NULL;
2021 : }
2022 :
2023 : /* Create B = .IFNNEW (A). */
2024 286 : new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2025 286 : if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
2026 160 : pattern_stmt
2027 160 : = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
2028 : build_int_cst (integer_type_node,
2029 160 : val_new));
2030 : else
2031 126 : pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2032 286 : gimple_call_set_lhs (pattern_stmt, new_var);
2033 286 : gimple_set_location (pattern_stmt, loc);
2034 286 : *type_out = vec_type;
2035 :
2036 286 : if (sub)
2037 : {
2038 203 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2039 203 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2040 203 : pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2041 203 : build_int_cst (lhs_type, sub),
2042 : new_var);
2043 203 : gimple_set_location (pattern_stmt, loc);
2044 203 : new_var = ret_var;
2045 : }
2046 83 : else if (add)
2047 : {
2048 0 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2049 0 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2050 0 : pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2051 0 : build_int_cst (lhs_type, add));
2052 0 : gimple_set_location (pattern_stmt, loc);
2053 0 : new_var = ret_var;
2054 : }
2055 :
2056 286 : if (defined_at_zero
2057 210 : && (!defined_at_zero_new || val != val_cmp))
2058 : {
2059 43 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2060 43 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2061 43 : rhs_oprnd = gimple_call_arg (call_stmt, 0);
2062 43 : rhs_type = TREE_TYPE (rhs_oprnd);
2063 43 : tree cmp = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2064 43 : pattern_stmt = gimple_build_assign (cmp, NE_EXPR, rhs_oprnd,
2065 : build_zero_cst (rhs_type));
2066 43 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
2067 : truth_type_for (vec_type), rhs_type);
2068 43 : pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2069 : new_var,
2070 43 : build_int_cst (lhs_type, val));
2071 : }
2072 :
2073 286 : if (dump_enabled_p ())
2074 36 : dump_printf_loc (MSG_NOTE, vect_location,
2075 : "created pattern stmt: %G", pattern_stmt);
2076 :
2077 : return pattern_stmt;
2078 : }
2079 :
2080 : /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2081 :
2082 : Try to find the following pattern:
2083 :
2084 : UTYPE1 A;
2085 : TYPE1 B;
2086 : UTYPE2 temp_in;
2087 : TYPE3 temp_out;
2088 : temp_in = (UTYPE2)A;
2089 :
2090 : temp_out = __builtin_popcount{,l,ll} (temp_in);
2091 : B = (TYPE1) temp_out;
2092 :
2093 : TYPE2 may or may not be equal to TYPE3.
2094 : i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2095 : i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2096 :
2097 : Input:
2098 :
2099 : * STMT_VINFO: The stmt from which the pattern search begins.
2100 : here it starts with B = (TYPE1) temp_out;
2101 :
2102 : Output:
2103 :
2104 : * TYPE_OUT: The vector type of the output of this pattern.
2105 :
2106 : * Return value: A new stmt that will be used to replace the sequence of
2107 : stmts that constitute the pattern. In this case it will be:
2108 : B = .POPCOUNT (A);
2109 :
2110 : Similarly for clz, ctz and ffs.
2111 : */
2112 :
2113 : static gimple *
2114 30850055 : vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2115 : stmt_vec_info stmt_vinfo,
2116 : tree *type_out)
2117 : {
2118 30850055 : gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2119 21126511 : gimple *call_stmt, *pattern_stmt;
2120 21126511 : tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2121 51976248 : internal_fn ifn = IFN_LAST;
2122 30849737 : int addend = 0;
2123 :
2124 : /* Find B = (TYPE1) temp_out. */
2125 21126511 : if (!last_stmt)
2126 : return NULL;
2127 21126511 : tree_code code = gimple_assign_rhs_code (last_stmt);
2128 21126511 : if (!CONVERT_EXPR_CODE_P (code))
2129 : return NULL;
2130 :
2131 2904627 : lhs_oprnd = gimple_assign_lhs (last_stmt);
2132 2904627 : lhs_type = TREE_TYPE (lhs_oprnd);
2133 2904627 : if (!INTEGRAL_TYPE_P (lhs_type))
2134 : return NULL;
2135 :
2136 2721699 : rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2137 2721699 : if (TREE_CODE (rhs_oprnd) != SSA_NAME
2138 2721699 : || !has_single_use (rhs_oprnd))
2139 : return NULL;
2140 1392660 : call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2141 :
2142 : /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2143 1392660 : if (!is_gimple_call (call_stmt))
2144 : return NULL;
2145 99582 : switch (gimple_call_combined_fn (call_stmt))
2146 : {
2147 : int val;
2148 : CASE_CFN_POPCOUNT:
2149 : ifn = IFN_POPCOUNT;
2150 : break;
2151 2059 : CASE_CFN_CLZ:
2152 2059 : ifn = IFN_CLZ;
2153 : /* Punt if call result is unsigned and defined value at zero
2154 : is negative, as the negative value doesn't extend correctly. */
2155 2059 : if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2156 0 : && gimple_call_internal_p (call_stmt)
2157 2059 : && CLZ_DEFINED_VALUE_AT_ZERO
2158 : (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2159 2059 : && val < 0)
2160 : return NULL;
2161 : break;
2162 706 : CASE_CFN_CTZ:
2163 706 : ifn = IFN_CTZ;
2164 : /* Punt if call result is unsigned and defined value at zero
2165 : is negative, as the negative value doesn't extend correctly. */
2166 706 : if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2167 0 : && gimple_call_internal_p (call_stmt)
2168 706 : && CTZ_DEFINED_VALUE_AT_ZERO
2169 : (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2170 706 : && val < 0)
2171 : return NULL;
2172 : break;
2173 57 : CASE_CFN_FFS:
2174 57 : ifn = IFN_FFS;
2175 57 : break;
2176 : default:
2177 : return NULL;
2178 : }
2179 :
2180 3124 : if (gimple_call_num_args (call_stmt) != 1
2181 3124 : && gimple_call_num_args (call_stmt) != 2)
2182 : return NULL;
2183 :
2184 3124 : rhs_oprnd = gimple_call_arg (call_stmt, 0);
2185 3124 : vect_unpromoted_value unprom_diff;
2186 3124 : rhs_origin
2187 3124 : = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2188 :
2189 3124 : if (!rhs_origin)
2190 : return NULL;
2191 :
2192 : /* Input and output of .POPCOUNT should be same-precision integer. */
2193 3114 : if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2194 : return NULL;
2195 :
2196 : /* Also A should be unsigned or same precision as temp_in, otherwise
2197 : different builtins/internal functions have different behaviors. */
2198 1607 : if (TYPE_PRECISION (unprom_diff.type)
2199 1607 : != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2200 264 : switch (ifn)
2201 : {
2202 95 : case IFN_POPCOUNT:
2203 : /* For popcount require zero extension, which doesn't add any
2204 : further bits to the count. */
2205 95 : if (!TYPE_UNSIGNED (unprom_diff.type))
2206 : return NULL;
2207 : break;
2208 109 : case IFN_CLZ:
2209 : /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2210 : if it is undefined at zero or if it matches also for the
2211 : defined value there. */
2212 109 : if (!TYPE_UNSIGNED (unprom_diff.type))
2213 : return NULL;
2214 109 : if (!type_has_mode_precision_p (lhs_type)
2215 109 : || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2216 0 : return NULL;
2217 109 : addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2218 109 : - TYPE_PRECISION (lhs_type));
2219 109 : if (gimple_call_internal_p (call_stmt)
2220 109 : && gimple_call_num_args (call_stmt) == 2)
2221 : {
2222 0 : int val1, val2;
2223 0 : val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2224 0 : int d2
2225 0 : = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2226 : val2);
2227 0 : if (d2 != 2 || val1 != val2 + addend)
2228 : return NULL;
2229 : }
2230 : break;
2231 40 : case IFN_CTZ:
2232 : /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2233 : if it is undefined at zero or if it matches also for the
2234 : defined value there. */
2235 40 : if (gimple_call_internal_p (call_stmt)
2236 40 : && gimple_call_num_args (call_stmt) == 2)
2237 : {
2238 0 : int val1, val2;
2239 0 : val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2240 0 : int d2
2241 0 : = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2242 : val2);
2243 0 : if (d2 != 2 || val1 != val2)
2244 : return NULL;
2245 : }
2246 : break;
2247 : case IFN_FFS:
2248 : /* ffsll (x) == ffs (x) for unsigned or signed x. */
2249 : break;
2250 0 : default:
2251 0 : gcc_unreachable ();
2252 : }
2253 :
2254 1607 : vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2255 : /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2256 1607 : if (!vec_type)
2257 : return NULL;
2258 :
2259 1482 : bool supported
2260 1482 : = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2261 1482 : if (!supported)
2262 1305 : switch (ifn)
2263 : {
2264 : case IFN_POPCOUNT:
2265 : case IFN_CLZ:
2266 : return NULL;
2267 57 : case IFN_FFS:
2268 : /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2269 57 : if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2270 : OPTIMIZE_FOR_SPEED))
2271 : break;
2272 : /* FALLTHRU */
2273 515 : case IFN_CTZ:
2274 : /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2275 : clz or popcount. */
2276 515 : if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2277 : OPTIMIZE_FOR_SPEED))
2278 : break;
2279 455 : if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2280 : OPTIMIZE_FOR_SPEED))
2281 : break;
2282 : return NULL;
2283 0 : default:
2284 0 : gcc_unreachable ();
2285 : }
2286 :
2287 318 : vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2288 : call_stmt);
2289 :
2290 : /* Create B = .POPCOUNT (A). */
2291 318 : new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2292 318 : tree arg2 = NULL_TREE;
2293 318 : int val;
2294 318 : if (ifn == IFN_CLZ
2295 368 : && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2296 : val) == 2)
2297 48 : arg2 = build_int_cst (integer_type_node, val);
2298 270 : else if (ifn == IFN_CTZ
2299 363 : && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2300 : val) == 2)
2301 93 : arg2 = build_int_cst (integer_type_node, val);
2302 318 : if (arg2)
2303 141 : pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
2304 : else
2305 177 : pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2306 318 : gimple_call_set_lhs (pattern_stmt, new_var);
2307 318 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2308 318 : *type_out = vec_type;
2309 :
2310 318 : if (dump_enabled_p ())
2311 24 : dump_printf_loc (MSG_NOTE, vect_location,
2312 : "created pattern stmt: %G", pattern_stmt);
2313 :
2314 318 : if (addend)
2315 : {
2316 12 : gcc_assert (supported);
2317 12 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2318 12 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2319 12 : pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2320 12 : build_int_cst (lhs_type, addend));
2321 : }
2322 306 : else if (!supported)
2323 : {
2324 141 : stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2325 141 : STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2326 141 : pattern_stmt
2327 141 : = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2328 141 : if (pattern_stmt == NULL)
2329 : return NULL;
2330 141 : if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2331 : {
2332 141 : gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2333 141 : gimple_seq_add_seq_without_update (pseq, seq);
2334 : }
2335 : }
2336 : return pattern_stmt;
2337 : }
2338 :
2339 : /* Function vect_recog_pow_pattern
2340 :
2341 : Try to find the following pattern:
2342 :
2343 : x = POW (y, N);
2344 :
2345 : with POW being one of pow, powf, powi, powif and N being
2346 : either 2 or 0.5.
2347 :
2348 : Input:
2349 :
2350 : * STMT_VINFO: The stmt from which the pattern search begins.
2351 :
2352 : Output:
2353 :
2354 : * TYPE_OUT: The type of the output of this pattern.
2355 :
2356 : * Return value: A new stmt that will be used to replace the sequence of
2357 : stmts that constitute the pattern. In this case it will be:
2358 : x = x * x
2359 : or
2360 : x = sqrt (x)
2361 : */
2362 :
2363 : static gimple *
2364 30850070 : vect_recog_pow_pattern (vec_info *vinfo,
2365 : stmt_vec_info stmt_vinfo, tree *type_out)
2366 : {
2367 30850070 : gimple *last_stmt = stmt_vinfo->stmt;
2368 30850070 : tree base, exp;
2369 30850070 : gimple *stmt;
2370 30850070 : tree var;
2371 :
2372 30850070 : if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2373 : return NULL;
2374 :
2375 1500550 : switch (gimple_call_combined_fn (last_stmt))
2376 : {
2377 276 : CASE_CFN_POW:
2378 276 : CASE_CFN_POWI:
2379 276 : break;
2380 :
2381 : default:
2382 : return NULL;
2383 : }
2384 :
2385 276 : base = gimple_call_arg (last_stmt, 0);
2386 276 : exp = gimple_call_arg (last_stmt, 1);
2387 276 : if (TREE_CODE (exp) != REAL_CST
2388 249 : && TREE_CODE (exp) != INTEGER_CST)
2389 : {
2390 249 : if (flag_unsafe_math_optimizations
2391 37 : && TREE_CODE (base) == REAL_CST
2392 252 : && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2393 : {
2394 3 : combined_fn log_cfn;
2395 3 : built_in_function exp_bfn;
2396 3 : switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2397 : {
2398 : case BUILT_IN_POW:
2399 : log_cfn = CFN_BUILT_IN_LOG;
2400 : exp_bfn = BUILT_IN_EXP;
2401 : break;
2402 0 : case BUILT_IN_POWF:
2403 0 : log_cfn = CFN_BUILT_IN_LOGF;
2404 0 : exp_bfn = BUILT_IN_EXPF;
2405 0 : break;
2406 0 : case BUILT_IN_POWL:
2407 0 : log_cfn = CFN_BUILT_IN_LOGL;
2408 0 : exp_bfn = BUILT_IN_EXPL;
2409 0 : break;
2410 : default:
2411 : return NULL;
2412 : }
2413 3 : tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2414 3 : tree exp_decl = builtin_decl_implicit (exp_bfn);
2415 : /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2416 : does that, but if C is a power of 2, we want to use
2417 : exp2 (log2 (C) * x) in the non-vectorized version, but for
2418 : vectorization we don't have vectorized exp2. */
2419 3 : if (logc
2420 3 : && TREE_CODE (logc) == REAL_CST
2421 3 : && exp_decl
2422 6 : && lookup_attribute ("omp declare simd",
2423 3 : DECL_ATTRIBUTES (exp_decl)))
2424 : {
2425 3 : cgraph_node *node = cgraph_node::get_create (exp_decl);
2426 3 : if (node->simd_clones == NULL)
2427 : {
2428 2 : if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2429 2 : || node->definition)
2430 : return NULL;
2431 2 : expand_simd_clones (node);
2432 2 : if (node->simd_clones == NULL)
2433 : return NULL;
2434 : }
2435 3 : *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2436 3 : if (!*type_out)
2437 : return NULL;
2438 3 : tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2439 3 : gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2440 3 : append_pattern_def_seq (vinfo, stmt_vinfo, g);
2441 3 : tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2442 3 : g = gimple_build_call (exp_decl, 1, def);
2443 3 : gimple_call_set_lhs (g, res);
2444 3 : return g;
2445 : }
2446 : }
2447 :
2448 246 : return NULL;
2449 : }
2450 :
2451 : /* We now have a pow or powi builtin function call with a constant
2452 : exponent. */
2453 :
2454 : /* Catch squaring. */
2455 27 : if ((tree_fits_shwi_p (exp)
2456 0 : && tree_to_shwi (exp) == 2)
2457 27 : || (TREE_CODE (exp) == REAL_CST
2458 27 : && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2459 : {
2460 7 : if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2461 7 : TREE_TYPE (base), type_out))
2462 : return NULL;
2463 :
2464 7 : var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2465 7 : stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2466 7 : return stmt;
2467 : }
2468 :
2469 : /* Catch square root. */
2470 20 : if (TREE_CODE (exp) == REAL_CST
2471 20 : && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2472 : {
2473 10 : *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2474 10 : if (*type_out
2475 10 : && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2476 : OPTIMIZE_FOR_SPEED))
2477 : {
2478 8 : gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2479 8 : var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2480 8 : gimple_call_set_lhs (stmt, var);
2481 8 : gimple_call_set_nothrow (stmt, true);
2482 8 : return stmt;
2483 : }
2484 : }
2485 :
2486 : return NULL;
2487 : }
2488 :
2489 :
2490 : /* Function vect_recog_widen_sum_pattern
2491 :
2492 : Try to find the following pattern:
2493 :
2494 : type x_t;
2495 : TYPE x_T, sum = init;
2496 : loop:
2497 : sum_0 = phi <init, sum_1>
2498 : S1 x_t = *p;
2499 : S2 x_T = (TYPE) x_t;
2500 : S3 sum_1 = x_T + sum_0;
2501 :
2502 : where type 'TYPE' is at least double the size of type 'type', i.e - we're
2503 : summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2504 : a special case of a reduction computation.
2505 :
2506 : Input:
2507 :
2508 : * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2509 : when this function is called with S3, the pattern {S2,S3} will be detected.
2510 :
2511 : Output:
2512 :
2513 : * TYPE_OUT: The type of the output of this pattern.
2514 :
2515 : * Return value: A new stmt that will be used to replace the sequence of
2516 : stmts that constitute the pattern. In this case it will be:
2517 : WIDEN_SUM <x_t, sum_0>
2518 :
2519 : Note: The widening-sum idiom is a widening reduction pattern that is
2520 : vectorized without preserving all the intermediate results. It
2521 : produces only N/2 (widened) results (by summing up pairs of
2522 : intermediate results) rather than all N results. Therefore, we
2523 : cannot allow this pattern when we want to get all the results and in
2524 : the correct order (as is the case when this computation is in an
2525 : inner-loop nested in an outer-loop that us being vectorized). */
2526 :
2527 : static gimple *
2528 30850070 : vect_recog_widen_sum_pattern (vec_info *vinfo,
2529 : stmt_vec_info stmt_vinfo, tree *type_out)
2530 : {
2531 30850070 : gimple *last_stmt = stmt_vinfo->stmt;
2532 30850070 : tree oprnd0, oprnd1;
2533 30850070 : tree type;
2534 30850070 : gimple *pattern_stmt;
2535 30850070 : tree var;
2536 :
2537 : /* Look for the following pattern
2538 : DX = (TYPE) X;
2539 : sum_1 = DX + sum_0;
2540 : In which DX is at least double the size of X, and sum_1 has been
2541 : recognized as a reduction variable.
2542 : */
2543 :
2544 : /* Starting from LAST_STMT, follow the defs of its uses in search
2545 : of the above pattern. */
2546 :
2547 30850070 : if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2548 : &oprnd0, &oprnd1)
2549 52442 : || TREE_CODE (oprnd0) != SSA_NAME
2550 30902243 : || !vinfo->lookup_def (oprnd0))
2551 30797962 : return NULL;
2552 :
2553 52108 : type = TREE_TYPE (gimple_get_lhs (last_stmt));
2554 :
2555 : /* So far so good. Since last_stmt was detected as a (summation) reduction,
2556 : we know that oprnd1 is the reduction variable (defined by a loop-header
2557 : phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2558 : Left to check that oprnd0 is defined by a cast from type 'type' to type
2559 : 'TYPE'. */
2560 :
2561 52108 : vect_unpromoted_value unprom0;
2562 52108 : if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2563 52108 : || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2564 : return NULL;
2565 :
2566 2291 : vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2567 :
2568 2291 : if (!vect_supportable_conv_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2569 : unprom0.type, type_out))
2570 : return NULL;
2571 :
2572 0 : var = vect_recog_temp_ssa_var (type, NULL);
2573 0 : pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2574 :
2575 0 : return pattern_stmt;
2576 : }
2577 :
2578 : /* Function vect_recog_bitfield_ref_pattern
2579 :
2580 : Try to find the following pattern:
2581 :
2582 : bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2583 : result = (type_out) bf_value;
2584 :
2585 : or
2586 :
2587 : if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2588 :
2589 : where type_out is a non-bitfield type, that is to say, it's precision matches
2590 : 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2591 :
2592 : Input:
2593 :
2594 : * STMT_VINFO: The stmt from which the pattern search begins.
2595 : here it starts with:
2596 : result = (type_out) bf_value;
2597 :
2598 : or
2599 :
2600 : if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2601 :
2602 : Output:
2603 :
2604 : * TYPE_OUT: The vector type of the output of this pattern.
2605 :
2606 : * Return value: A new stmt that will be used to replace the sequence of
2607 : stmts that constitute the pattern. If the precision of type_out is bigger
2608 : than the precision type of _1 we perform the widening before the shifting,
2609 : since the new precision will be large enough to shift the value and moving
2610 : widening operations up the statement chain enables the generation of
2611 : widening loads. If we are widening and the operation after the pattern is
2612 : an addition then we mask first and shift later, to enable the generation of
2613 : shifting adds. In the case of narrowing we will always mask first, shift
2614 : last and then perform a narrowing operation. This will enable the
2615 : generation of narrowing shifts.
2616 :
2617 : Widening with mask first, shift later:
2618 : container = (type_out) container;
2619 : masked = container & (((1 << bitsize) - 1) << bitpos);
2620 : result = masked >> bitpos;
2621 :
2622 : Widening with shift first, mask last:
2623 : container = (type_out) container;
2624 : shifted = container >> bitpos;
2625 : result = shifted & ((1 << bitsize) - 1);
2626 :
2627 : Narrowing:
2628 : masked = container & (((1 << bitsize) - 1) << bitpos);
2629 : result = masked >> bitpos;
2630 : result = (type_out) result;
2631 :
2632 : If the bitfield is signed and it's wider than type_out, we need to
2633 : keep the result sign-extended:
2634 : container = (type) container;
2635 : masked = container << (prec - bitsize - bitpos);
2636 : result = (type_out) (masked >> (prec - bitsize));
2637 :
2638 : Here type is the signed variant of the wider of type_out and the type
2639 : of container.
2640 :
2641 : The shifting is always optional depending on whether bitpos != 0.
2642 :
2643 : When the original bitfield was inside a gcond then an new gcond is also
2644 : generated with the newly `result` as the operand to the comparison.
2645 :
2646 : */
2647 :
2648 : static gimple *
2649 30792984 : vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2650 : tree *type_out)
2651 : {
2652 30792984 : gimple *bf_stmt = NULL;
2653 30792984 : tree lhs = NULL_TREE;
2654 30792984 : tree ret_type = NULL_TREE;
2655 30792984 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
2656 30792984 : if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
2657 : {
2658 5144197 : tree op = gimple_cond_lhs (cond_stmt);
2659 5144197 : if (TREE_CODE (op) != SSA_NAME)
2660 : return NULL;
2661 5143893 : bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2662 5143893 : if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2663 : return NULL;
2664 : }
2665 25648787 : else if (is_gimple_assign (stmt)
2666 21069121 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2667 28474664 : && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2668 : {
2669 2784521 : gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2670 2784521 : bf_stmt = dyn_cast <gassign *> (second_stmt);
2671 2784521 : lhs = gimple_assign_lhs (stmt);
2672 2784521 : ret_type = TREE_TYPE (lhs);
2673 : }
2674 :
2675 6110737 : if (!bf_stmt
2676 6110737 : || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2677 : return NULL;
2678 :
2679 15446 : tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2680 15446 : tree container = TREE_OPERAND (bf_ref, 0);
2681 15446 : ret_type = ret_type ? ret_type : TREE_TYPE (container);
2682 :
2683 15446 : if (!bit_field_offset (bf_ref).is_constant ()
2684 15446 : || !bit_field_size (bf_ref).is_constant ()
2685 15446 : || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2686 : return NULL;
2687 :
2688 30514 : if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2689 15444 : || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2690 17603 : || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2691 13289 : return NULL;
2692 :
2693 2157 : gimple *use_stmt, *pattern_stmt;
2694 2157 : use_operand_p use_p;
2695 2157 : bool shift_first = true;
2696 2157 : tree container_type = TREE_TYPE (container);
2697 2157 : tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2698 :
2699 : /* Calculate shift_n before the adjustments for widening loads, otherwise
2700 : the container may change and we have to consider offset change for
2701 : widening loads on big endianness. The shift_n calculated here can be
2702 : independent of widening. */
2703 2157 : unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2704 2157 : unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2705 2157 : unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2706 2157 : if (BYTES_BIG_ENDIAN)
2707 : shift_n = prec - shift_n - mask_width;
2708 :
2709 2157 : bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2710 1394 : TYPE_PRECISION (ret_type) > mask_width);
2711 2157 : bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2712 2157 : TYPE_PRECISION (ret_type));
2713 :
2714 : /* We move the conversion earlier if the loaded type is smaller than the
2715 : return type to enable the use of widening loads. And if we need a
2716 : sign extension, we need to convert the loaded value early to a signed
2717 : type as well. */
2718 2157 : if (ref_sext || load_widen)
2719 : {
2720 941 : tree type = load_widen ? ret_type : container_type;
2721 941 : if (ref_sext)
2722 902 : type = gimple_signed_type (type);
2723 941 : pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2724 : NOP_EXPR, container);
2725 941 : container = gimple_get_lhs (pattern_stmt);
2726 941 : container_type = TREE_TYPE (container);
2727 941 : prec = tree_to_uhwi (TYPE_SIZE (container_type));
2728 941 : vectype = get_vectype_for_scalar_type (vinfo, container_type);
2729 941 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2730 : }
2731 1216 : else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2732 : /* If we are doing the conversion last then also delay the shift as we may
2733 : be able to combine the shift and conversion in certain cases. */
2734 : shift_first = false;
2735 :
2736 : /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2737 : PLUS_EXPR then do the shift last as some targets can combine the shift and
2738 : add into a single instruction. */
2739 1416 : if (lhs && !is_pattern_stmt_p (stmt_info)
2740 3573 : && single_imm_use (lhs, &use_p, &use_stmt))
2741 : {
2742 1049 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2743 1049 : && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2744 : shift_first = false;
2745 : }
2746 :
2747 : /* If we don't have to shift we only generate the mask, so just fix the
2748 : code-path to shift_first. */
2749 2157 : if (shift_n == 0)
2750 756 : shift_first = true;
2751 :
2752 2157 : tree result;
2753 2157 : if (shift_first && !ref_sext)
2754 : {
2755 503 : tree shifted = container;
2756 503 : if (shift_n)
2757 : {
2758 59 : pattern_stmt
2759 59 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2760 : RSHIFT_EXPR, container,
2761 59 : build_int_cst (sizetype, shift_n));
2762 59 : shifted = gimple_assign_lhs (pattern_stmt);
2763 59 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2764 : }
2765 :
2766 503 : tree mask = wide_int_to_tree (container_type,
2767 503 : wi::mask (mask_width, false, prec));
2768 :
2769 503 : pattern_stmt
2770 503 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2771 : BIT_AND_EXPR, shifted, mask);
2772 503 : result = gimple_assign_lhs (pattern_stmt);
2773 : }
2774 : else
2775 : {
2776 1654 : tree temp = vect_recog_temp_ssa_var (container_type);
2777 1654 : if (!ref_sext)
2778 : {
2779 752 : tree mask = wide_int_to_tree (container_type,
2780 752 : wi::shifted_mask (shift_n,
2781 : mask_width,
2782 : false, prec));
2783 752 : pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2784 : container, mask);
2785 : }
2786 : else
2787 : {
2788 902 : HOST_WIDE_INT shl = prec - shift_n - mask_width;
2789 902 : shift_n += shl;
2790 902 : pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2791 : container,
2792 : build_int_cst (sizetype,
2793 902 : shl));
2794 : }
2795 :
2796 1654 : tree masked = gimple_assign_lhs (pattern_stmt);
2797 1654 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2798 1654 : pattern_stmt
2799 1654 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2800 : RSHIFT_EXPR, masked,
2801 1654 : build_int_cst (sizetype, shift_n));
2802 1654 : result = gimple_assign_lhs (pattern_stmt);
2803 : }
2804 :
2805 2157 : if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2806 : {
2807 1438 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2808 1438 : pattern_stmt
2809 1438 : = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2810 : NOP_EXPR, result);
2811 : }
2812 :
2813 2157 : if (!lhs)
2814 : {
2815 741 : if (!vectype)
2816 : return NULL;
2817 :
2818 603 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2819 603 : vectype = truth_type_for (vectype);
2820 :
2821 : /* FIXME: This part extracts the boolean value out of the bitfield in the
2822 : same way as vect_recog_gcond_pattern does. However because
2823 : patterns cannot match the same root twice, when we handle and
2824 : lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2825 : apply anymore. We should really fix it so that we don't need to
2826 : duplicate transformations like these. */
2827 603 : tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2828 603 : gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
2829 603 : tree cond_cst = gimple_cond_rhs (cond_stmt);
2830 603 : gimple *new_stmt
2831 603 : = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
2832 : gimple_get_lhs (pattern_stmt),
2833 : fold_convert (container_type, cond_cst));
2834 603 : append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
2835 603 : pattern_stmt
2836 603 : = gimple_build_cond (NE_EXPR, new_lhs,
2837 603 : build_zero_cst (TREE_TYPE (new_lhs)),
2838 : NULL_TREE, NULL_TREE);
2839 : }
2840 :
2841 2019 : *type_out = STMT_VINFO_VECTYPE (stmt_info);
2842 2019 : vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2843 :
2844 2019 : return pattern_stmt;
2845 : }
2846 :
2847 : /* Function vect_recog_bit_insert_pattern
2848 :
2849 : Try to find the following pattern:
2850 :
2851 : written = BIT_INSERT_EXPR (container, value, bitpos);
2852 :
2853 : Input:
2854 :
2855 : * STMT_VINFO: The stmt we want to replace.
2856 :
2857 : Output:
2858 :
2859 : * TYPE_OUT: The vector type of the output of this pattern.
2860 :
2861 : * Return value: A new stmt that will be used to replace the sequence of
2862 : stmts that constitute the pattern. In this case it will be:
2863 : value = (container_type) value; // Make sure
2864 : shifted = value << bitpos; // Shift value into place
2865 : masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2866 : // the 'to-write value'.
2867 : cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2868 : // write to from the value we want
2869 : // to write to.
2870 : written = cleared | masked; // Write bits.
2871 :
2872 :
2873 : where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2874 : bits corresponding to the real size of the bitfield value we are writing to.
2875 : The shifting is always optional depending on whether bitpos != 0.
2876 :
2877 : */
2878 :
2879 : static gimple *
2880 30795994 : vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2881 : tree *type_out)
2882 : {
2883 30795994 : gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2884 28136487 : if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2885 : return NULL;
2886 :
2887 597 : tree container = gimple_assign_rhs1 (bf_stmt);
2888 597 : tree value = gimple_assign_rhs2 (bf_stmt);
2889 597 : tree shift = gimple_assign_rhs3 (bf_stmt);
2890 :
2891 597 : tree bf_type = TREE_TYPE (value);
2892 597 : tree container_type = TREE_TYPE (container);
2893 :
2894 597 : if (!INTEGRAL_TYPE_P (container_type)
2895 597 : || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2896 : return NULL;
2897 :
2898 500 : gimple *pattern_stmt;
2899 :
2900 500 : vect_unpromoted_value unprom;
2901 500 : unprom.set_op (value, vect_internal_def);
2902 500 : value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2903 : get_vectype_for_scalar_type (vinfo,
2904 : container_type));
2905 :
2906 500 : unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2907 500 : unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2908 500 : unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2909 500 : if (BYTES_BIG_ENDIAN)
2910 : {
2911 : shift_n = prec - shift_n - mask_width;
2912 : shift = build_int_cst (TREE_TYPE (shift), shift_n);
2913 : }
2914 :
2915 500 : if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2916 : {
2917 0 : pattern_stmt =
2918 0 : gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2919 : NOP_EXPR, value);
2920 0 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2921 0 : value = gimple_get_lhs (pattern_stmt);
2922 : }
2923 :
2924 : /* Shift VALUE into place. */
2925 500 : tree shifted = value;
2926 500 : if (shift_n)
2927 : {
2928 249 : gimple_seq stmts = NULL;
2929 249 : shifted
2930 249 : = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2931 249 : if (!gimple_seq_empty_p (stmts))
2932 112 : append_pattern_def_seq (vinfo, stmt_info,
2933 : gimple_seq_first_stmt (stmts));
2934 : }
2935 :
2936 500 : tree mask_t
2937 500 : = wide_int_to_tree (container_type,
2938 500 : wi::shifted_mask (shift_n, mask_width, false, prec));
2939 :
2940 : /* Clear bits we don't want to write back from SHIFTED. */
2941 500 : gimple_seq stmts = NULL;
2942 500 : tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2943 : mask_t);
2944 500 : if (!gimple_seq_empty_p (stmts))
2945 : {
2946 110 : pattern_stmt = gimple_seq_first_stmt (stmts);
2947 110 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2948 : }
2949 :
2950 : /* Mask off the bits in the container that we are to write to. */
2951 500 : mask_t = wide_int_to_tree (container_type,
2952 500 : wi::shifted_mask (shift_n, mask_width, true, prec));
2953 500 : tree cleared = vect_recog_temp_ssa_var (container_type);
2954 500 : pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2955 500 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2956 :
2957 : /* Write MASKED into CLEARED. */
2958 500 : pattern_stmt
2959 500 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2960 : BIT_IOR_EXPR, cleared, masked);
2961 :
2962 500 : *type_out = STMT_VINFO_VECTYPE (stmt_info);
2963 500 : vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2964 :
2965 500 : return pattern_stmt;
2966 : }
2967 :
2968 :
2969 : /* Recognize cases in which an operation is performed in one type WTYPE
2970 : but could be done more efficiently in a narrower type NTYPE. For example,
2971 : if we have:
2972 :
2973 : ATYPE a; // narrower than NTYPE
2974 : BTYPE b; // narrower than NTYPE
2975 : WTYPE aw = (WTYPE) a;
2976 : WTYPE bw = (WTYPE) b;
2977 : WTYPE res = aw + bw; // only uses of aw and bw
2978 :
2979 : then it would be more efficient to do:
2980 :
2981 : NTYPE an = (NTYPE) a;
2982 : NTYPE bn = (NTYPE) b;
2983 : NTYPE resn = an + bn;
2984 : WTYPE res = (WTYPE) resn;
2985 :
2986 : Other situations include things like:
2987 :
2988 : ATYPE a; // NTYPE or narrower
2989 : WTYPE aw = (WTYPE) a;
2990 : WTYPE res = aw + b;
2991 :
2992 : when only "(NTYPE) res" is significant. In that case it's more efficient
2993 : to truncate "b" and do the operation on NTYPE instead:
2994 :
2995 : NTYPE an = (NTYPE) a;
2996 : NTYPE bn = (NTYPE) b; // truncation
2997 : NTYPE resn = an + bn;
2998 : WTYPE res = (WTYPE) resn;
2999 :
3000 : All users of "res" should then use "resn" instead, making the final
3001 : statement dead (not marked as relevant). The final statement is still
3002 : needed to maintain the type correctness of the IR.
3003 :
3004 : vect_determine_precisions has already determined the minimum
3005 : precision of the operation and the minimum precision required
3006 : by users of the result. */
3007 :
3008 : static gimple *
3009 30796434 : vect_recog_over_widening_pattern (vec_info *vinfo,
3010 : stmt_vec_info last_stmt_info, tree *type_out)
3011 : {
3012 30796434 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3013 21073174 : if (!last_stmt)
3014 : return NULL;
3015 :
3016 : /* See whether we have found that this operation can be done on a
3017 : narrower type without changing its semantics. */
3018 21073174 : unsigned int new_precision = last_stmt_info->operation_precision;
3019 21073174 : if (!new_precision)
3020 : return NULL;
3021 :
3022 1634307 : tree lhs = gimple_assign_lhs (last_stmt);
3023 1634307 : tree type = TREE_TYPE (lhs);
3024 1634307 : tree_code code = gimple_assign_rhs_code (last_stmt);
3025 :
3026 : /* Punt for reductions where we don't handle the type conversions. */
3027 1634307 : if (vect_is_reduction (last_stmt_info))
3028 : return NULL;
3029 :
3030 : /* Keep the first operand of a COND_EXPR as-is: only the other two
3031 : operands are interesting. */
3032 1625961 : unsigned int first_op = (code == COND_EXPR ? 2 : 1);
3033 :
3034 : /* Check the operands. */
3035 1625961 : unsigned int nops = gimple_num_ops (last_stmt) - first_op;
3036 1625961 : auto_vec <vect_unpromoted_value, 3> unprom (nops);
3037 1625961 : unprom.quick_grow_cleared (nops);
3038 1625961 : unsigned int min_precision = 0;
3039 1625961 : bool single_use_p = false;
3040 4858499 : for (unsigned int i = 0; i < nops; ++i)
3041 : {
3042 3234096 : tree op = gimple_op (last_stmt, first_op + i);
3043 3234096 : if (TREE_CODE (op) == INTEGER_CST)
3044 1475193 : unprom[i].set_op (op, vect_constant_def);
3045 1758903 : else if (TREE_CODE (op) == SSA_NAME)
3046 : {
3047 1758903 : bool op_single_use_p = true;
3048 1758903 : if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
3049 : &op_single_use_p))
3050 1558 : return NULL;
3051 : /* If:
3052 :
3053 : (1) N bits of the result are needed;
3054 : (2) all inputs are widened from M<N bits; and
3055 : (3) one operand OP is a single-use SSA name
3056 :
3057 : we can shift the M->N widening from OP to the output
3058 : without changing the number or type of extensions involved.
3059 : This then reduces the number of copies of STMT_INFO.
3060 :
3061 : If instead of (3) more than one operand is a single-use SSA name,
3062 : shifting the extension to the output is even more of a win.
3063 :
3064 : If instead:
3065 :
3066 : (1) N bits of the result are needed;
3067 : (2) one operand OP2 is widened from M2<N bits;
3068 : (3) another operand OP1 is widened from M1<M2 bits; and
3069 : (4) both OP1 and OP2 are single-use
3070 :
3071 : the choice is between:
3072 :
3073 : (a) truncating OP2 to M1, doing the operation on M1,
3074 : and then widening the result to N
3075 :
3076 : (b) widening OP1 to M2, doing the operation on M2, and then
3077 : widening the result to N
3078 :
3079 : Both shift the M2->N widening of the inputs to the output.
3080 : (a) additionally shifts the M1->M2 widening to the output;
3081 : it requires fewer copies of STMT_INFO but requires an extra
3082 : M2->M1 truncation.
3083 :
3084 : Which is better will depend on the complexity and cost of
3085 : STMT_INFO, which is hard to predict at this stage. However,
3086 : a clear tie-breaker in favor of (b) is the fact that the
3087 : truncation in (a) increases the length of the operation chain.
3088 :
3089 : If instead of (4) only one of OP1 or OP2 is single-use,
3090 : (b) is still a win over doing the operation in N bits:
3091 : it still shifts the M2->N widening on the single-use operand
3092 : to the output and reduces the number of STMT_INFO copies.
3093 :
3094 : If neither operand is single-use then operating on fewer than
3095 : N bits might lead to more extensions overall. Whether it does
3096 : or not depends on global information about the vectorization
3097 : region, and whether that's a good trade-off would again
3098 : depend on the complexity and cost of the statements involved,
3099 : as well as things like register pressure that are not normally
3100 : modelled at this stage. We therefore ignore these cases
3101 : and just optimize the clear single-use wins above.
3102 :
3103 : Thus we take the maximum precision of the unpromoted operands
3104 : and record whether any operand is single-use. */
3105 1757345 : if (unprom[i].dt == vect_internal_def)
3106 : {
3107 1017054 : min_precision = MAX (min_precision,
3108 : TYPE_PRECISION (unprom[i].type));
3109 1017054 : single_use_p |= op_single_use_p;
3110 : }
3111 : }
3112 : else
3113 : return NULL;
3114 : }
3115 :
3116 : /* Although the operation could be done in operation_precision, we have
3117 : to balance that against introducing extra truncations or extensions.
3118 : Calculate the minimum precision that can be handled efficiently.
3119 :
3120 : The loop above determined that the operation could be handled
3121 : efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3122 : extension from the inputs to the output without introducing more
3123 : instructions, and would reduce the number of instructions required
3124 : for STMT_INFO itself.
3125 :
3126 : vect_determine_precisions has also determined that the result only
3127 : needs min_output_precision bits. Truncating by a factor of N times
3128 : requires a tree of N - 1 instructions, so if TYPE is N times wider
3129 : than min_output_precision, doing the operation in TYPE and truncating
3130 : the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3131 : In contrast:
3132 :
3133 : - truncating the input to a unary operation and doing the operation
3134 : in the new type requires at most N - 1 + 1 = N instructions per
3135 : output vector
3136 :
3137 : - doing the same for a binary operation requires at most
3138 : (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3139 :
3140 : Both unary and binary operations require fewer instructions than
3141 : this if the operands were extended from a suitable truncated form.
3142 : Thus there is usually nothing to lose by doing operations in
3143 : min_output_precision bits, but there can be something to gain. */
3144 1624403 : if (!single_use_p)
3145 1283505 : min_precision = last_stmt_info->min_output_precision;
3146 : else
3147 340898 : min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3148 :
3149 : /* Apply the minimum efficient precision we just calculated. */
3150 1624403 : if (new_precision < min_precision)
3151 : new_precision = min_precision;
3152 1624403 : new_precision = vect_element_precision (new_precision);
3153 1624403 : if (new_precision >= TYPE_PRECISION (type))
3154 : return NULL;
3155 :
3156 151334 : vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3157 :
3158 151334 : *type_out = get_vectype_for_scalar_type (vinfo, type);
3159 151334 : if (!*type_out)
3160 : return NULL;
3161 :
3162 : /* We've found a viable pattern. Get the new type of the operation. */
3163 134268 : bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3164 134268 : tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3165 :
3166 : /* If we're truncating an operation, we need to make sure that we
3167 : don't introduce new undefined overflow. The codes tested here are
3168 : a subset of those accepted by vect_truncatable_operation_p. */
3169 134268 : tree op_type = new_type;
3170 134268 : if (TYPE_OVERFLOW_UNDEFINED (new_type)
3171 174519 : && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3172 27861 : op_type = build_nonstandard_integer_type (new_precision, true);
3173 :
3174 134268 : tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3175 134268 : tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3176 134268 : if (!new_vectype || !op_vectype)
3177 : return NULL;
3178 :
3179 : /* Verify we can handle the new operation. For shifts and rotates
3180 : apply heuristic of whether we are likely facing vector-vector or
3181 : vector-scalar operation. Since we are eventually expecting that
3182 : a later pattern might eventually want to rewrite an unsupported
3183 : into a supported case error on that side in case the original
3184 : operation was not supported either or this is a binary operation
3185 : and the 2nd operand is constant. */
3186 134268 : if (code == RSHIFT_EXPR || code == LSHIFT_EXPR || code == RROTATE_EXPR)
3187 : {
3188 27892 : if (!target_has_vecop_for_code (code, op_vectype, optab_vector)
3189 26839 : && ((unprom[1].dt != vect_external_def
3190 26427 : && unprom[1].dt != vect_constant_def)
3191 18112 : || !target_has_vecop_for_code (code, op_vectype, optab_scalar))
3192 36648 : && !(!target_has_vecop_for_code (code, *type_out, optab_vector)
3193 7770 : && ((unprom[1].dt != vect_external_def
3194 7770 : || unprom[1].dt != vect_constant_def)
3195 : || !target_has_vecop_for_code (code, *type_out,
3196 : optab_scalar))))
3197 : return NULL;
3198 : }
3199 106376 : else if (!target_has_vecop_for_code (code, op_vectype, optab_vector)
3200 106376 : && (target_has_vecop_for_code (code, *type_out, optab_vector)
3201 27 : && !(nops == 2 && unprom[1].dt == vect_constant_def)))
3202 : return NULL;
3203 :
3204 133273 : if (dump_enabled_p ())
3205 4327 : dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3206 : type, new_type);
3207 :
3208 : /* Calculate the rhs operands for an operation on OP_TYPE. */
3209 133273 : tree ops[3] = {};
3210 133501 : for (unsigned int i = 1; i < first_op; ++i)
3211 228 : ops[i - 1] = gimple_op (last_stmt, i);
3212 133273 : vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3213 133273 : op_type, &unprom[0], op_vectype);
3214 :
3215 : /* Use the operation to produce a result of type OP_TYPE. */
3216 133273 : tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3217 133273 : gimple *pattern_stmt = gimple_build_assign (new_var, code,
3218 : ops[0], ops[1], ops[2]);
3219 133273 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3220 :
3221 133273 : if (dump_enabled_p ())
3222 4327 : dump_printf_loc (MSG_NOTE, vect_location,
3223 : "created pattern stmt: %G", pattern_stmt);
3224 :
3225 : /* Convert back to the original signedness, if OP_TYPE is different
3226 : from NEW_TYPE. */
3227 133273 : if (op_type != new_type)
3228 27855 : pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3229 : pattern_stmt, op_vectype);
3230 :
3231 : /* Promote the result to the original type. */
3232 133273 : pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3233 : pattern_stmt, new_vectype);
3234 :
3235 133273 : return pattern_stmt;
3236 1625961 : }
3237 :
3238 : /* Recognize the following patterns:
3239 :
3240 : ATYPE a; // narrower than TYPE
3241 : BTYPE b; // narrower than TYPE
3242 :
3243 : 1) Multiply high with scaling
3244 : TYPE res = ((TYPE) a * (TYPE) b) >> c;
3245 : Here, c is bitsize (TYPE) / 2 - 1.
3246 :
3247 : 2) ... or also with rounding
3248 : TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3249 : Here, d is bitsize (TYPE) / 2 - 2.
3250 :
3251 : 3) Normal multiply high
3252 : TYPE res = ((TYPE) a * (TYPE) b) >> e;
3253 : Here, e is bitsize (TYPE) / 2.
3254 :
3255 : where only the bottom half of res is used. */
3256 :
3257 : static gimple *
3258 30922585 : vect_recog_mulhs_pattern (vec_info *vinfo,
3259 : stmt_vec_info last_stmt_info, tree *type_out)
3260 : {
3261 : /* Check for a right shift. */
3262 30922585 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3263 21199167 : if (!last_stmt
3264 21199167 : || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3265 : return NULL;
3266 :
3267 : /* Check that the shift result is wider than the users of the
3268 : result need (i.e. that narrowing would be a natural choice). */
3269 359881 : tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3270 359881 : unsigned int target_precision
3271 359881 : = vect_element_precision (last_stmt_info->min_output_precision);
3272 359881 : if (!INTEGRAL_TYPE_P (lhs_type)
3273 359881 : || target_precision >= TYPE_PRECISION (lhs_type))
3274 : return NULL;
3275 :
3276 : /* Look through any change in sign on the outer shift input. */
3277 49663 : vect_unpromoted_value unprom_rshift_input;
3278 49663 : tree rshift_input = vect_look_through_possible_promotion
3279 49663 : (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3280 49663 : if (!rshift_input
3281 49663 : || TYPE_PRECISION (TREE_TYPE (rshift_input))
3282 49065 : != TYPE_PRECISION (lhs_type))
3283 : return NULL;
3284 :
3285 : /* Get the definition of the shift input. */
3286 45836 : stmt_vec_info rshift_input_stmt_info
3287 45836 : = vect_get_internal_def (vinfo, rshift_input);
3288 45836 : if (!rshift_input_stmt_info)
3289 : return NULL;
3290 41213 : gassign *rshift_input_stmt
3291 30960570 : = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3292 38129 : if (!rshift_input_stmt)
3293 : return NULL;
3294 :
3295 38129 : stmt_vec_info mulh_stmt_info;
3296 38129 : tree scale_term;
3297 38129 : bool rounding_p = false;
3298 :
3299 : /* Check for the presence of the rounding term. */
3300 45286 : if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3301 : {
3302 : /* Check that the outer shift was by 1. */
3303 18994 : if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3304 9430 : return NULL;
3305 :
3306 : /* Check that the second operand of the PLUS_EXPR is 1. */
3307 1304 : if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3308 : return NULL;
3309 :
3310 : /* Look through any change in sign on the addition input. */
3311 110 : vect_unpromoted_value unprom_plus_input;
3312 110 : tree plus_input = vect_look_through_possible_promotion
3313 110 : (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3314 110 : if (!plus_input
3315 110 : || TYPE_PRECISION (TREE_TYPE (plus_input))
3316 110 : != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3317 : return NULL;
3318 :
3319 : /* Get the definition of the multiply-high-scale part. */
3320 110 : stmt_vec_info plus_input_stmt_info
3321 110 : = vect_get_internal_def (vinfo, plus_input);
3322 110 : if (!plus_input_stmt_info)
3323 : return NULL;
3324 110 : gassign *plus_input_stmt
3325 9540 : = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3326 110 : if (!plus_input_stmt
3327 110 : || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3328 : return NULL;
3329 :
3330 : /* Look through any change in sign on the scaling input. */
3331 67 : vect_unpromoted_value unprom_scale_input;
3332 67 : tree scale_input = vect_look_through_possible_promotion
3333 67 : (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3334 67 : if (!scale_input
3335 67 : || TYPE_PRECISION (TREE_TYPE (scale_input))
3336 67 : != TYPE_PRECISION (TREE_TYPE (plus_input)))
3337 : return NULL;
3338 :
3339 : /* Get the definition of the multiply-high part. */
3340 67 : mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3341 67 : if (!mulh_stmt_info)
3342 : return NULL;
3343 :
3344 : /* Get the scaling term. */
3345 67 : scale_term = gimple_assign_rhs2 (plus_input_stmt);
3346 67 : rounding_p = true;
3347 : }
3348 : else
3349 : {
3350 28632 : mulh_stmt_info = rshift_input_stmt_info;
3351 28632 : scale_term = gimple_assign_rhs2 (last_stmt);
3352 : }
3353 :
3354 : /* Check that the scaling factor is constant. */
3355 28699 : if (TREE_CODE (scale_term) != INTEGER_CST)
3356 : return NULL;
3357 :
3358 : /* Check whether the scaling input term can be seen as two widened
3359 : inputs multiplied together. */
3360 83238 : vect_unpromoted_value unprom_mult[2];
3361 27746 : tree new_type;
3362 27746 : unsigned int nops
3363 27746 : = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3364 : false, 2, unprom_mult, &new_type);
3365 27746 : if (nops != 2)
3366 : return NULL;
3367 :
3368 : /* Adjust output precision. */
3369 1253 : if (TYPE_PRECISION (new_type) < target_precision)
3370 0 : new_type = build_nonstandard_integer_type
3371 0 : (target_precision, TYPE_UNSIGNED (new_type));
3372 :
3373 1253 : unsigned mult_precision = TYPE_PRECISION (new_type);
3374 1253 : internal_fn ifn;
3375 : /* Check that the scaling factor is expected. Instead of
3376 : target_precision, we should use the one that we actually
3377 : use for internal function. */
3378 1253 : if (rounding_p)
3379 : {
3380 : /* Check pattern 2). */
3381 134 : if (wi::to_widest (scale_term) + mult_precision + 2
3382 201 : != TYPE_PRECISION (lhs_type))
3383 : return NULL;
3384 :
3385 : ifn = IFN_MULHRS;
3386 : }
3387 : else
3388 : {
3389 : /* Check for pattern 1). */
3390 2372 : if (wi::to_widest (scale_term) + mult_precision + 1
3391 3558 : == TYPE_PRECISION (lhs_type))
3392 : ifn = IFN_MULHS;
3393 : /* Check for pattern 3). */
3394 1152 : else if (wi::to_widest (scale_term) + mult_precision
3395 2304 : == TYPE_PRECISION (lhs_type))
3396 : ifn = IFN_MULH;
3397 : else
3398 : return NULL;
3399 : }
3400 :
3401 1190 : vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3402 :
3403 : /* Check for target support. */
3404 1190 : tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3405 1190 : if (!new_vectype
3406 2351 : || !direct_internal_fn_supported_p
3407 1161 : (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3408 1046 : return NULL;
3409 :
3410 : /* The IR requires a valid vector type for the cast result, even though
3411 : it's likely to be discarded. */
3412 144 : *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3413 144 : if (!*type_out)
3414 : return NULL;
3415 :
3416 : /* Generate the IFN_MULHRS call. */
3417 144 : tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3418 144 : tree new_ops[2];
3419 144 : vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3420 : unprom_mult, new_vectype);
3421 144 : gcall *mulhrs_stmt
3422 144 : = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3423 144 : gimple_call_set_lhs (mulhrs_stmt, new_var);
3424 144 : gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3425 :
3426 144 : if (dump_enabled_p ())
3427 0 : dump_printf_loc (MSG_NOTE, vect_location,
3428 : "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3429 :
3430 144 : return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3431 144 : mulhrs_stmt, new_vectype);
3432 : }
3433 :
3434 : /* Recognize the patterns:
3435 :
3436 : ATYPE a; // narrower than TYPE
3437 : BTYPE b; // narrower than TYPE
3438 : (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3439 : or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3440 :
3441 : where only the bottom half of avg is used. Try to transform them into:
3442 :
3443 : (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3444 : or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3445 :
3446 : followed by:
3447 :
3448 : TYPE avg = (TYPE) avg';
3449 :
3450 : where NTYPE is no wider than half of TYPE. Since only the bottom half
3451 : of avg is used, all or part of the cast of avg' should become redundant.
3452 :
3453 : If there is no target support available, generate code to distribute rshift
3454 : over plus and add a carry. */
3455 :
3456 : static gimple *
3457 30920951 : vect_recog_average_pattern (vec_info *vinfo,
3458 : stmt_vec_info last_stmt_info, tree *type_out)
3459 : {
3460 : /* Check for a shift right by one bit. */
3461 30920951 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3462 21197691 : if (!last_stmt
3463 21197691 : || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3464 359787 : || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3465 30864997 : return NULL;
3466 :
3467 : /* Check that the shift result is wider than the users of the
3468 : result need (i.e. that narrowing would be a natural choice). */
3469 55954 : tree lhs = gimple_assign_lhs (last_stmt);
3470 55954 : tree type = TREE_TYPE (lhs);
3471 55954 : unsigned int target_precision
3472 55954 : = vect_element_precision (last_stmt_info->min_output_precision);
3473 55954 : if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3474 : return NULL;
3475 :
3476 : /* Look through any change in sign on the shift input. */
3477 2218 : tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3478 2218 : vect_unpromoted_value unprom_plus;
3479 2218 : rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3480 : &unprom_plus);
3481 2218 : if (!rshift_rhs
3482 2218 : || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3483 : return NULL;
3484 :
3485 : /* Get the definition of the shift input. */
3486 2216 : stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3487 2216 : if (!plus_stmt_info)
3488 : return NULL;
3489 :
3490 : /* Check whether the shift input can be seen as a tree of additions on
3491 : 2 or 3 widened inputs.
3492 :
3493 : Note that the pattern should be a win even if the result of one or
3494 : more additions is reused elsewhere: if the pattern matches, we'd be
3495 : replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3496 8792 : internal_fn ifn = IFN_AVG_FLOOR;
3497 8792 : vect_unpromoted_value unprom[3];
3498 2198 : tree new_type;
3499 2198 : unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3500 2198 : IFN_VEC_WIDEN_PLUS, false, 3,
3501 : unprom, &new_type);
3502 2198 : if (nops == 0)
3503 : return NULL;
3504 907 : if (nops == 3)
3505 : {
3506 : /* Check that one operand is 1. */
3507 : unsigned int i;
3508 987 : for (i = 0; i < 3; ++i)
3509 933 : if (integer_onep (unprom[i].op))
3510 : break;
3511 311 : if (i == 3)
3512 : return NULL;
3513 : /* Throw away the 1 operand and keep the other two. */
3514 257 : if (i < 2)
3515 0 : unprom[i] = unprom[2];
3516 : ifn = IFN_AVG_CEIL;
3517 : }
3518 :
3519 853 : vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3520 :
3521 : /* We know that:
3522 :
3523 : (a) the operation can be viewed as:
3524 :
3525 : TYPE widened0 = (TYPE) UNPROM[0];
3526 : TYPE widened1 = (TYPE) UNPROM[1];
3527 : TYPE tmp1 = widened0 + widened1 {+ 1};
3528 : TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3529 :
3530 : (b) the first two statements are equivalent to:
3531 :
3532 : TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3533 : TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3534 :
3535 : (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3536 : where sensible;
3537 :
3538 : (d) all the operations can be performed correctly at twice the width of
3539 : NEW_TYPE, due to the nature of the average operation; and
3540 :
3541 : (e) users of the result of the right shift need only TARGET_PRECISION
3542 : bits, where TARGET_PRECISION is no more than half of TYPE's
3543 : precision.
3544 :
3545 : Under these circumstances, the only situation in which NEW_TYPE
3546 : could be narrower than TARGET_PRECISION is if widened0, widened1
3547 : and an addition result are all used more than once. Thus we can
3548 : treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3549 : as "free", whereas widening the result of the average instruction
3550 : from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3551 : therefore better not to go narrower than TARGET_PRECISION. */
3552 853 : if (TYPE_PRECISION (new_type) < target_precision)
3553 0 : new_type = build_nonstandard_integer_type (target_precision,
3554 0 : TYPE_UNSIGNED (new_type));
3555 :
3556 : /* Check for target support. */
3557 853 : tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3558 853 : if (!new_vectype)
3559 : return NULL;
3560 :
3561 853 : bool fallback_p = false;
3562 :
3563 853 : if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3564 : ;
3565 695 : else if (TYPE_UNSIGNED (new_type)
3566 256 : && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3567 256 : && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3568 256 : && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3569 951 : && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3570 : fallback_p = true;
3571 : else
3572 439 : return NULL;
3573 :
3574 : /* The IR requires a valid vector type for the cast result, even though
3575 : it's likely to be discarded. */
3576 414 : *type_out = get_vectype_for_scalar_type (vinfo, type);
3577 414 : if (!*type_out)
3578 : return NULL;
3579 :
3580 410 : tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3581 410 : tree new_ops[2];
3582 410 : vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3583 : unprom, new_vectype);
3584 :
3585 410 : if (fallback_p)
3586 : {
3587 : /* As a fallback, generate code for following sequence:
3588 :
3589 : shifted_op0 = new_ops[0] >> 1;
3590 : shifted_op1 = new_ops[1] >> 1;
3591 : sum_of_shifted = shifted_op0 + shifted_op1;
3592 : unmasked_carry = new_ops[0] and/or new_ops[1];
3593 : carry = unmasked_carry & 1;
3594 : new_var = sum_of_shifted + carry;
3595 : */
3596 :
3597 252 : tree one_cst = build_one_cst (new_type);
3598 252 : gassign *g;
3599 :
3600 252 : tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3601 252 : g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3602 252 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3603 :
3604 252 : tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3605 252 : g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3606 252 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3607 :
3608 252 : tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3609 252 : g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3610 : shifted_op0, shifted_op1);
3611 252 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3612 :
3613 252 : tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3614 252 : tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3615 252 : g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3616 252 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3617 :
3618 252 : tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3619 252 : g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3620 252 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3621 :
3622 252 : g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3623 252 : return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3624 : }
3625 :
3626 : /* Generate the IFN_AVG* call. */
3627 158 : gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3628 : new_ops[1]);
3629 158 : gimple_call_set_lhs (average_stmt, new_var);
3630 158 : gimple_set_location (average_stmt, gimple_location (last_stmt));
3631 :
3632 158 : if (dump_enabled_p ())
3633 31 : dump_printf_loc (MSG_NOTE, vect_location,
3634 : "created pattern stmt: %G", (gimple *) average_stmt);
3635 :
3636 158 : return vect_convert_output (vinfo, last_stmt_info,
3637 158 : type, average_stmt, new_vectype);
3638 : }
3639 :
3640 : /* Recognize cases in which the input to a cast is wider than its
3641 : output, and the input is fed by a widening operation. Fold this
3642 : by removing the unnecessary intermediate widening. E.g.:
3643 :
3644 : unsigned char a;
3645 : unsigned int b = (unsigned int) a;
3646 : unsigned short c = (unsigned short) b;
3647 :
3648 : -->
3649 :
3650 : unsigned short c = (unsigned short) a;
3651 :
3652 : Although this is rare in input IR, it is an expected side-effect
3653 : of the over-widening pattern above.
3654 :
3655 : This is beneficial also for integer-to-float conversions, if the
3656 : widened integer has more bits than the float, and if the unwidened
3657 : input doesn't. */
3658 :
3659 : static gimple *
3660 30922585 : vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3661 : stmt_vec_info last_stmt_info, tree *type_out)
3662 : {
3663 : /* Check for a cast, including an integer-to-float conversion. */
3664 52074948 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3665 21199023 : if (!last_stmt)
3666 : return NULL;
3667 21199023 : tree_code code = gimple_assign_rhs_code (last_stmt);
3668 21199023 : if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3669 : return NULL;
3670 :
3671 : /* Make sure that the rhs is a scalar with a natural bitsize. */
3672 3015827 : tree lhs = gimple_assign_lhs (last_stmt);
3673 3015827 : if (!lhs)
3674 : return NULL;
3675 3015827 : tree lhs_type = TREE_TYPE (lhs);
3676 3015827 : scalar_mode lhs_mode;
3677 2996072 : if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3678 6010163 : || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3679 25309 : return NULL;
3680 :
3681 : /* Check for a narrowing operation (from a vector point of view). */
3682 2990518 : tree rhs = gimple_assign_rhs1 (last_stmt);
3683 2990518 : tree rhs_type = TREE_TYPE (rhs);
3684 2990518 : if (!INTEGRAL_TYPE_P (rhs_type)
3685 2681067 : || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3686 8192186 : || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3687 : return NULL;
3688 :
3689 : /* Try to find an unpromoted input. */
3690 343023 : vect_unpromoted_value unprom;
3691 343023 : if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3692 343023 : || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3693 : return NULL;
3694 :
3695 : /* If the bits above RHS_TYPE matter, make sure that they're the
3696 : same when extending from UNPROM as they are when extending from RHS. */
3697 46790 : if (!INTEGRAL_TYPE_P (lhs_type)
3698 46790 : && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3699 : return NULL;
3700 :
3701 : /* We can get the same result by casting UNPROM directly, to avoid
3702 : the unnecessary widening and narrowing. */
3703 46660 : vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3704 :
3705 46660 : *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3706 46660 : if (!*type_out)
3707 : return NULL;
3708 :
3709 46660 : tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3710 46660 : gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3711 46660 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3712 :
3713 46660 : return pattern_stmt;
3714 : }
3715 :
3716 : /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3717 : to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3718 :
3719 : static gimple *
3720 30850614 : vect_recog_widen_shift_pattern (vec_info *vinfo,
3721 : stmt_vec_info last_stmt_info, tree *type_out)
3722 : {
3723 30850614 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3724 30850614 : LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3725 30850614 : "vect_recog_widen_shift_pattern");
3726 : }
3727 :
3728 : /* Detect a rotate pattern wouldn't be otherwise vectorized:
3729 :
3730 : type a_t, b_t, c_t;
3731 :
3732 : S0 a_t = b_t r<< c_t;
3733 :
3734 : Input/Output:
3735 :
3736 : * STMT_VINFO: The stmt from which the pattern search begins,
3737 : i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3738 : with a sequence:
3739 :
3740 : S1 d_t = -c_t;
3741 : S2 e_t = d_t & (B - 1);
3742 : S3 f_t = b_t << c_t;
3743 : S4 g_t = b_t >> e_t;
3744 : S0 a_t = f_t | g_t;
3745 :
3746 : where B is element bitsize of type.
3747 :
3748 : Output:
3749 :
3750 : * TYPE_OUT: The type of the output of this pattern.
3751 :
3752 : * Return value: A new stmt that will be used to replace the rotate
3753 : S0 stmt. */
3754 :
3755 : static gimple *
3756 30850614 : vect_recog_rotate_pattern (vec_info *vinfo,
3757 : stmt_vec_info stmt_vinfo, tree *type_out)
3758 : {
3759 30850614 : gimple *last_stmt = stmt_vinfo->stmt;
3760 30850614 : tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3761 30850614 : gimple *pattern_stmt, *def_stmt;
3762 30850614 : enum tree_code rhs_code;
3763 30850614 : enum vect_def_type dt;
3764 30850614 : optab optab1, optab2;
3765 30850614 : edge ext_def = NULL;
3766 30850614 : bool bswap16_p = false;
3767 :
3768 30850614 : if (is_gimple_assign (last_stmt))
3769 : {
3770 21127000 : rhs_code = gimple_assign_rhs_code (last_stmt);
3771 21127000 : switch (rhs_code)
3772 : {
3773 7245 : case LROTATE_EXPR:
3774 7245 : case RROTATE_EXPR:
3775 7245 : break;
3776 : default:
3777 : return NULL;
3778 : }
3779 :
3780 7245 : lhs = gimple_assign_lhs (last_stmt);
3781 7245 : oprnd0 = gimple_assign_rhs1 (last_stmt);
3782 7245 : type = TREE_TYPE (oprnd0);
3783 7245 : oprnd1 = gimple_assign_rhs2 (last_stmt);
3784 : }
3785 9723614 : else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3786 : {
3787 : /* __builtin_bswap16 (x) is another form of x r>> 8.
3788 : The vectorizer has bswap support, but only if the argument isn't
3789 : promoted. */
3790 170 : lhs = gimple_call_lhs (last_stmt);
3791 170 : oprnd0 = gimple_call_arg (last_stmt, 0);
3792 170 : type = TREE_TYPE (oprnd0);
3793 170 : if (!lhs
3794 170 : || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3795 170 : || TYPE_PRECISION (type) <= 16
3796 0 : || TREE_CODE (oprnd0) != SSA_NAME
3797 170 : || BITS_PER_UNIT != 8)
3798 170 : return NULL;
3799 :
3800 0 : stmt_vec_info def_stmt_info;
3801 0 : if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3802 : return NULL;
3803 :
3804 0 : if (dt != vect_internal_def)
3805 : return NULL;
3806 :
3807 0 : if (gimple_assign_cast_p (def_stmt))
3808 : {
3809 0 : def = gimple_assign_rhs1 (def_stmt);
3810 0 : if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3811 0 : && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3812 : oprnd0 = def;
3813 : }
3814 :
3815 0 : type = TREE_TYPE (lhs);
3816 0 : vectype = get_vectype_for_scalar_type (vinfo, type);
3817 0 : if (vectype == NULL_TREE)
3818 : return NULL;
3819 :
3820 0 : if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3821 : {
3822 : /* The encoding uses one stepped pattern for each byte in the
3823 : 16-bit word. */
3824 0 : vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3825 0 : for (unsigned i = 0; i < 3; ++i)
3826 0 : for (unsigned j = 0; j < 2; ++j)
3827 0 : elts.quick_push ((i + 1) * 2 - j - 1);
3828 :
3829 0 : vec_perm_indices indices (elts, 1,
3830 0 : TYPE_VECTOR_SUBPARTS (char_vectype));
3831 0 : machine_mode vmode = TYPE_MODE (char_vectype);
3832 0 : if (can_vec_perm_const_p (vmode, vmode, indices))
3833 : {
3834 : /* vectorizable_bswap can handle the __builtin_bswap16 if we
3835 : undo the argument promotion. */
3836 0 : if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3837 : {
3838 0 : def = vect_recog_temp_ssa_var (type, NULL);
3839 0 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3840 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3841 0 : oprnd0 = def;
3842 : }
3843 :
3844 : /* Pattern detected. */
3845 0 : vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3846 :
3847 0 : *type_out = vectype;
3848 :
3849 : /* Pattern supported. Create a stmt to be used to replace the
3850 : pattern, with the unpromoted argument. */
3851 0 : var = vect_recog_temp_ssa_var (type, NULL);
3852 0 : pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3853 : 1, oprnd0);
3854 0 : gimple_call_set_lhs (pattern_stmt, var);
3855 0 : gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3856 : gimple_call_fntype (last_stmt));
3857 0 : return pattern_stmt;
3858 : }
3859 0 : }
3860 :
3861 0 : oprnd1 = build_int_cst (integer_type_node, 8);
3862 0 : rhs_code = LROTATE_EXPR;
3863 0 : bswap16_p = true;
3864 : }
3865 : else
3866 : return NULL;
3867 :
3868 7245 : if (TREE_CODE (oprnd0) != SSA_NAME
3869 7125 : || !INTEGRAL_TYPE_P (type)
3870 14049 : || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3871 : return NULL;
3872 :
3873 6804 : stmt_vec_info def_stmt_info;
3874 6804 : if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3875 : return NULL;
3876 :
3877 6804 : if (dt != vect_internal_def
3878 6601 : && dt != vect_constant_def
3879 25 : && dt != vect_external_def)
3880 : return NULL;
3881 :
3882 6798 : vectype = get_vectype_for_scalar_type (vinfo, type);
3883 6798 : if (vectype == NULL_TREE)
3884 : return NULL;
3885 :
3886 : /* If vector/vector or vector/scalar rotate is supported by the target,
3887 : don't do anything here. */
3888 6571 : optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3889 6571 : if (optab1
3890 6571 : && can_implement_p (optab1, TYPE_MODE (vectype)))
3891 : {
3892 564 : use_rotate:
3893 564 : if (bswap16_p)
3894 : {
3895 0 : if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3896 : {
3897 0 : def = vect_recog_temp_ssa_var (type, NULL);
3898 0 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3899 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3900 0 : oprnd0 = def;
3901 : }
3902 :
3903 : /* Pattern detected. */
3904 0 : vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3905 :
3906 0 : *type_out = vectype;
3907 :
3908 : /* Pattern supported. Create a stmt to be used to replace the
3909 : pattern. */
3910 0 : var = vect_recog_temp_ssa_var (type, NULL);
3911 0 : pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3912 : oprnd1);
3913 0 : return pattern_stmt;
3914 : }
3915 : return NULL;
3916 : }
3917 :
3918 6547 : if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3919 : {
3920 6463 : optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3921 6463 : if (optab2
3922 6463 : && can_implement_p (optab2, TYPE_MODE (vectype)))
3923 540 : goto use_rotate;
3924 : }
3925 :
3926 : /* We may not use a reduction operand twice. */
3927 6007 : if (vect_is_reduction (stmt_vinfo))
3928 : return NULL;
3929 :
3930 5986 : tree utype = unsigned_type_for (type);
3931 5986 : tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3932 5986 : if (!uvectype)
3933 : return NULL;
3934 :
3935 : /* If vector/vector or vector/scalar shifts aren't supported by the target,
3936 : don't do anything here either. */
3937 5986 : optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3938 5986 : optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3939 5986 : if (!optab1
3940 5986 : || !can_implement_p (optab1, TYPE_MODE (uvectype))
3941 746 : || !optab2
3942 6732 : || !can_implement_p (optab2, TYPE_MODE (uvectype)))
3943 : {
3944 5240 : if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3945 : return NULL;
3946 5177 : optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3947 5177 : optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3948 5177 : if (!optab1
3949 5177 : || !can_implement_p (optab1, TYPE_MODE (uvectype))
3950 3942 : || !optab2
3951 9119 : || !can_implement_p (optab2, TYPE_MODE (uvectype)))
3952 1235 : return NULL;
3953 : }
3954 :
3955 4688 : *type_out = vectype;
3956 :
3957 4688 : if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3958 : {
3959 52 : def = vect_recog_temp_ssa_var (utype, NULL);
3960 52 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3961 52 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3962 52 : oprnd0 = def;
3963 : }
3964 :
3965 4688 : if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3966 15 : ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3967 :
3968 4688 : def = NULL_TREE;
3969 4688 : scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3970 4688 : if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3971 : def = oprnd1;
3972 28 : else if (def_stmt && gimple_assign_cast_p (def_stmt))
3973 : {
3974 0 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
3975 0 : if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3976 0 : && TYPE_PRECISION (TREE_TYPE (rhs1))
3977 0 : == TYPE_PRECISION (type))
3978 : def = rhs1;
3979 : }
3980 :
3981 4660 : if (def == NULL_TREE)
3982 : {
3983 28 : def = vect_recog_temp_ssa_var (utype, NULL);
3984 28 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3985 28 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3986 : }
3987 4688 : stype = TREE_TYPE (def);
3988 :
3989 4688 : if (TREE_CODE (def) == INTEGER_CST)
3990 : {
3991 4590 : if (!tree_fits_uhwi_p (def)
3992 4590 : || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3993 9180 : || integer_zerop (def))
3994 0 : return NULL;
3995 4590 : def2 = build_int_cst (stype,
3996 4590 : GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3997 : }
3998 : else
3999 : {
4000 98 : tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
4001 :
4002 98 : if (vecstype == NULL_TREE)
4003 : return NULL;
4004 98 : def2 = vect_recog_temp_ssa_var (stype, NULL);
4005 98 : def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
4006 98 : if (ext_def)
4007 : {
4008 15 : basic_block new_bb
4009 15 : = gsi_insert_on_edge_immediate (ext_def, def_stmt);
4010 15 : gcc_assert (!new_bb);
4011 : }
4012 : else
4013 83 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4014 :
4015 98 : def2 = vect_recog_temp_ssa_var (stype, NULL);
4016 98 : tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
4017 98 : def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
4018 : gimple_assign_lhs (def_stmt), mask);
4019 98 : if (ext_def)
4020 : {
4021 15 : basic_block new_bb
4022 15 : = gsi_insert_on_edge_immediate (ext_def, def_stmt);
4023 15 : gcc_assert (!new_bb);
4024 : }
4025 : else
4026 83 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4027 : }
4028 :
4029 4688 : var1 = vect_recog_temp_ssa_var (utype, NULL);
4030 9305 : def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
4031 : ? LSHIFT_EXPR : RSHIFT_EXPR,
4032 : oprnd0, def);
4033 4688 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
4034 :
4035 4688 : var2 = vect_recog_temp_ssa_var (utype, NULL);
4036 9305 : def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
4037 : ? RSHIFT_EXPR : LSHIFT_EXPR,
4038 : oprnd0, def2);
4039 4688 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
4040 :
4041 : /* Pattern detected. */
4042 4688 : vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
4043 :
4044 : /* Pattern supported. Create a stmt to be used to replace the pattern. */
4045 4688 : var = vect_recog_temp_ssa_var (utype, NULL);
4046 4688 : pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
4047 :
4048 4688 : if (!useless_type_conversion_p (type, utype))
4049 : {
4050 52 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
4051 52 : tree result = vect_recog_temp_ssa_var (type, NULL);
4052 52 : pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
4053 : }
4054 : return pattern_stmt;
4055 : }
4056 :
4057 : /* Detect a vector by vector shift pattern that wouldn't be otherwise
4058 : vectorized:
4059 :
4060 : type a_t;
4061 : TYPE b_T, res_T;
4062 :
4063 : S1 a_t = ;
4064 : S2 b_T = ;
4065 : S3 res_T = b_T op a_t;
4066 :
4067 : where type 'TYPE' is a type with different size than 'type',
4068 : and op is <<, >> or rotate.
4069 :
4070 : Also detect cases:
4071 :
4072 : type a_t;
4073 : TYPE b_T, c_T, res_T;
4074 :
4075 : S0 c_T = ;
4076 : S1 a_t = (type) c_T;
4077 : S2 b_T = ;
4078 : S3 res_T = b_T op a_t;
4079 :
4080 : Input/Output:
4081 :
4082 : * STMT_VINFO: The stmt from which the pattern search begins,
4083 : i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4084 : with a shift/rotate which has same type on both operands, in the
4085 : second case just b_T op c_T, in the first case with added cast
4086 : from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4087 :
4088 : Output:
4089 :
4090 : * TYPE_OUT: The type of the output of this pattern.
4091 :
4092 : * Return value: A new stmt that will be used to replace the shift/rotate
4093 : S3 stmt. */
4094 :
4095 : static gimple *
4096 30855600 : vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4097 : stmt_vec_info stmt_vinfo,
4098 : tree *type_out)
4099 : {
4100 30855600 : gimple *last_stmt = stmt_vinfo->stmt;
4101 30855600 : tree oprnd0, oprnd1, lhs, var;
4102 30855600 : gimple *pattern_stmt;
4103 30855600 : enum tree_code rhs_code;
4104 :
4105 30855600 : if (!is_gimple_assign (last_stmt))
4106 : return NULL;
4107 :
4108 21131986 : rhs_code = gimple_assign_rhs_code (last_stmt);
4109 21131986 : switch (rhs_code)
4110 : {
4111 506301 : case LSHIFT_EXPR:
4112 506301 : case RSHIFT_EXPR:
4113 506301 : case LROTATE_EXPR:
4114 506301 : case RROTATE_EXPR:
4115 506301 : break;
4116 : default:
4117 : return NULL;
4118 : }
4119 :
4120 506301 : lhs = gimple_assign_lhs (last_stmt);
4121 506301 : oprnd0 = gimple_assign_rhs1 (last_stmt);
4122 506301 : oprnd1 = gimple_assign_rhs2 (last_stmt);
4123 506301 : if (TREE_CODE (oprnd1) != SSA_NAME
4124 107513 : || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4125 48905 : || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4126 48473 : || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4127 554774 : || TYPE_PRECISION (TREE_TYPE (lhs))
4128 48473 : != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4129 457828 : return NULL;
4130 :
4131 48473 : stmt_vec_info def_vinfo = vinfo->lookup_def (oprnd1);
4132 48473 : if (!def_vinfo || STMT_VINFO_DEF_TYPE (def_vinfo) == vect_external_def)
4133 : return NULL;
4134 :
4135 45569 : def_vinfo = vect_stmt_to_vectorize (def_vinfo);
4136 1130 : gcc_assert (def_vinfo);
4137 :
4138 45569 : *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4139 45569 : if (*type_out == NULL_TREE)
4140 : return NULL;
4141 :
4142 33010 : tree def = NULL_TREE;
4143 33010 : gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4144 19328 : if (def_stmt && gimple_assign_cast_p (def_stmt))
4145 : {
4146 5312 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
4147 5312 : if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4148 5312 : && TYPE_PRECISION (TREE_TYPE (rhs1))
4149 1191 : == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4150 : {
4151 1191 : if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4152 1191 : >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4153 : def = rhs1;
4154 : else
4155 : {
4156 1104 : tree mask
4157 1104 : = build_low_bits_mask (TREE_TYPE (rhs1),
4158 1104 : TYPE_PRECISION (TREE_TYPE (oprnd1)));
4159 1104 : def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4160 1104 : def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4161 1104 : tree vecstype = get_vectype_for_scalar_type (vinfo,
4162 1104 : TREE_TYPE (rhs1));
4163 1104 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4164 : }
4165 : }
4166 : }
4167 :
4168 1191 : if (def == NULL_TREE)
4169 : {
4170 31819 : def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4171 31819 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4172 31819 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4173 : }
4174 :
4175 : /* Pattern detected. */
4176 33010 : vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4177 :
4178 : /* Pattern supported. Create a stmt to be used to replace the pattern. */
4179 33010 : var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4180 33010 : pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4181 :
4182 33010 : return pattern_stmt;
4183 : }
4184 :
4185 : /* Verify that the target has optabs of VECTYPE to perform all the steps
4186 : needed by the multiplication-by-immediate synthesis algorithm described by
4187 : ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4188 : present. Return true iff the target supports all the steps. */
4189 :
4190 : static bool
4191 298176 : target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4192 : tree vectype, bool synth_shift_p)
4193 : {
4194 298176 : if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4195 : return false;
4196 :
4197 298176 : bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4198 298176 : bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4199 :
4200 298176 : if (var == negate_variant
4201 298176 : && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4202 : return false;
4203 :
4204 : /* If we must synthesize shifts with additions make sure that vector
4205 : addition is available. */
4206 297584 : if ((var == add_variant || synth_shift_p) && !supports_vplus)
4207 : return false;
4208 :
4209 143674 : for (int i = 1; i < alg->ops; i++)
4210 : {
4211 107644 : switch (alg->op[i])
4212 : {
4213 : case alg_shift:
4214 : break;
4215 26536 : case alg_add_t_m2:
4216 26536 : case alg_add_t2_m:
4217 26536 : case alg_add_factor:
4218 26536 : if (!supports_vplus)
4219 : return false;
4220 : break;
4221 16609 : case alg_sub_t_m2:
4222 16609 : case alg_sub_t2_m:
4223 16609 : case alg_sub_factor:
4224 16609 : if (!supports_vminus)
4225 : return false;
4226 : break;
4227 : case alg_unknown:
4228 : case alg_m:
4229 : case alg_zero:
4230 : case alg_impossible:
4231 : return false;
4232 0 : default:
4233 0 : gcc_unreachable ();
4234 : }
4235 : }
4236 :
4237 : return true;
4238 : }
4239 :
4240 : /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4241 : putting the final result in DEST. Append all statements but the last into
4242 : VINFO. Return the last statement. */
4243 :
4244 : static gimple *
4245 0 : synth_lshift_by_additions (vec_info *vinfo,
4246 : tree dest, tree op, HOST_WIDE_INT amnt,
4247 : stmt_vec_info stmt_info, tree vectype)
4248 : {
4249 0 : HOST_WIDE_INT i;
4250 0 : tree itype = TREE_TYPE (op);
4251 0 : tree prev_res = op;
4252 0 : gcc_assert (amnt >= 0);
4253 0 : for (i = 0; i < amnt; i++)
4254 : {
4255 0 : tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4256 : : dest;
4257 0 : gimple *stmt
4258 0 : = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4259 0 : prev_res = tmp_var;
4260 0 : if (i < amnt - 1)
4261 0 : append_pattern_def_seq (vinfo, stmt_info, stmt, vectype);
4262 : else
4263 0 : return stmt;
4264 : }
4265 0 : gcc_unreachable ();
4266 : return NULL;
4267 : }
4268 :
4269 : /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4270 : CODE to operands OP1 and OP2, creating a new temporary SSA var in
4271 : the process if necessary. Append the resulting assignment statements
4272 : to the sequence in STMT_VINFO. Return the SSA variable that holds the
4273 : result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4274 : left shifts using additions. */
4275 :
4276 : static tree
4277 43044 : apply_binop_and_append_stmt (vec_info *vinfo,
4278 : tree_code code, tree op1, tree op2,
4279 : stmt_vec_info stmt_vinfo, tree vectype,
4280 : bool synth_shift_p)
4281 : {
4282 43044 : if (integer_zerop (op2)
4283 43044 : && (code == LSHIFT_EXPR
4284 37314 : || code == PLUS_EXPR))
4285 : {
4286 37314 : gcc_assert (TREE_CODE (op1) == SSA_NAME);
4287 : return op1;
4288 : }
4289 :
4290 5730 : gimple *stmt;
4291 5730 : tree itype = TREE_TYPE (op1);
4292 5730 : tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4293 :
4294 5730 : if (code == LSHIFT_EXPR
4295 5730 : && synth_shift_p)
4296 : {
4297 0 : stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4298 0 : TREE_INT_CST_LOW (op2), stmt_vinfo,
4299 : vectype);
4300 0 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
4301 0 : return tmp_var;
4302 : }
4303 :
4304 5730 : stmt = gimple_build_assign (tmp_var, code, op1, op2);
4305 5730 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
4306 5730 : return tmp_var;
4307 : }
4308 :
4309 : /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4310 : and simple arithmetic operations to be vectorized. Record the statements
4311 : produced in STMT_VINFO and return the last statement in the sequence or
4312 : NULL if it's not possible to synthesize such a multiplication.
4313 : This function mirrors the behavior of expand_mult_const in expmed.cc but
4314 : works on tree-ssa form. */
4315 :
4316 : static gimple *
4317 300918 : vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4318 : stmt_vec_info stmt_vinfo)
4319 : {
4320 300918 : tree itype = TREE_TYPE (op);
4321 300918 : machine_mode mode = TYPE_MODE (itype);
4322 300918 : struct algorithm alg;
4323 300918 : mult_variant variant;
4324 300918 : if (!tree_fits_shwi_p (val))
4325 : return NULL;
4326 :
4327 : /* Multiplication synthesis by shifts, adds and subs can introduce
4328 : signed overflow where the original operation didn't. Perform the
4329 : operations on an unsigned type and cast back to avoid this.
4330 : In the future we may want to relax this for synthesis algorithms
4331 : that we can prove do not cause unexpected overflow. */
4332 298191 : bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4333 :
4334 59197 : tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4335 298191 : tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4336 298191 : if (!vectype)
4337 : return NULL;
4338 :
4339 : /* Targets that don't support vector shifts but support vector additions
4340 : can synthesize shifts that way. */
4341 298191 : bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4342 :
4343 298191 : HOST_WIDE_INT hwval = tree_to_shwi (val);
4344 : /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4345 : The vectorizer's benefit analysis will decide whether it's beneficial
4346 : to do this. */
4347 596382 : bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4348 298191 : ? TYPE_MODE (vectype) : mode,
4349 : hwval, &alg, &variant, MAX_COST);
4350 298191 : if (!possible)
4351 : return NULL;
4352 :
4353 298191 : if (vect_is_reduction (stmt_vinfo))
4354 : {
4355 26 : int op_uses = alg.op[0] != alg_zero;
4356 45 : for (int i = 1; i < alg.ops; i++)
4357 32 : switch (alg.op[i])
4358 : {
4359 4 : case alg_add_t_m2:
4360 4 : case alg_sub_t_m2:
4361 4 : if (synth_shift_p && alg.log[i])
4362 : return NULL;
4363 : else
4364 4 : op_uses++;
4365 4 : break;
4366 0 : case alg_add_t2_m:
4367 0 : case alg_sub_t2_m:
4368 0 : op_uses++;
4369 : /* Fallthru. */
4370 28 : case alg_shift:
4371 28 : if (synth_shift_p && alg.log[i])
4372 : return NULL;
4373 : break;
4374 : case alg_add_factor:
4375 : case alg_sub_factor:
4376 : return NULL;
4377 : default:
4378 : break;
4379 : }
4380 13 : if (variant == add_variant)
4381 0 : op_uses++;
4382 : /* When we'll synthesize more than a single use of the reduction
4383 : operand the reduction constraints are violated. Avoid this
4384 : situation. */
4385 13 : if (op_uses > 1)
4386 : return NULL;
4387 : }
4388 :
4389 298176 : if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4390 : return NULL;
4391 :
4392 36030 : tree accumulator;
4393 :
4394 : /* Clear out the sequence of statements so we can populate it below. */
4395 36030 : gimple *stmt = NULL;
4396 :
4397 36030 : if (cast_to_unsigned_p)
4398 : {
4399 12239 : tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4400 12239 : stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4401 12239 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
4402 12239 : op = tmp_op;
4403 : }
4404 :
4405 36030 : if (alg.op[0] == alg_zero)
4406 205 : accumulator = build_int_cst (multtype, 0);
4407 : else
4408 : accumulator = op;
4409 :
4410 36030 : bool needs_fixup = (variant == negate_variant)
4411 36030 : || (variant == add_variant);
4412 :
4413 143505 : for (int i = 1; i < alg.ops; i++)
4414 : {
4415 107475 : tree shft_log = build_int_cst (multtype, alg.log[i]);
4416 107475 : tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4417 107475 : tree tmp_var = NULL_TREE;
4418 :
4419 107475 : switch (alg.op[i])
4420 : {
4421 64431 : case alg_shift:
4422 64431 : if (synth_shift_p)
4423 0 : stmt
4424 0 : = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4425 0 : alg.log[i], stmt_vinfo, vectype);
4426 : else
4427 64431 : stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4428 : shft_log);
4429 : break;
4430 21707 : case alg_add_t_m2:
4431 21707 : tmp_var
4432 21707 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4433 : stmt_vinfo, vectype, synth_shift_p);
4434 21707 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4435 : tmp_var);
4436 21707 : break;
4437 15806 : case alg_sub_t_m2:
4438 15806 : tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4439 : shft_log, stmt_vinfo,
4440 : vectype, synth_shift_p);
4441 : /* In some algorithms the first step involves zeroing the
4442 : accumulator. If subtracting from such an accumulator
4443 : just emit the negation directly. */
4444 15806 : if (integer_zerop (accumulator))
4445 205 : stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4446 : else
4447 15601 : stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4448 : tmp_var);
4449 : break;
4450 0 : case alg_add_t2_m:
4451 0 : tmp_var
4452 0 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4453 : shft_log, stmt_vinfo, vectype,
4454 : synth_shift_p);
4455 0 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4456 0 : break;
4457 0 : case alg_sub_t2_m:
4458 0 : tmp_var
4459 0 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4460 : shft_log, stmt_vinfo, vectype,
4461 : synth_shift_p);
4462 0 : stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4463 0 : break;
4464 4766 : case alg_add_factor:
4465 4766 : tmp_var
4466 4766 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4467 : shft_log, stmt_vinfo, vectype,
4468 : synth_shift_p);
4469 4766 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4470 : tmp_var);
4471 4766 : break;
4472 765 : case alg_sub_factor:
4473 765 : tmp_var
4474 765 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4475 : shft_log, stmt_vinfo, vectype,
4476 : synth_shift_p);
4477 765 : stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4478 : accumulator);
4479 765 : break;
4480 0 : default:
4481 0 : gcc_unreachable ();
4482 : }
4483 : /* We don't want to append the last stmt in the sequence to stmt_vinfo
4484 : but rather return it directly. */
4485 :
4486 107475 : if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4487 83981 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
4488 107475 : accumulator = accum_tmp;
4489 : }
4490 36030 : if (variant == negate_variant)
4491 : {
4492 429 : tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4493 429 : stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4494 429 : accumulator = accum_tmp;
4495 429 : if (cast_to_unsigned_p)
4496 142 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
4497 : }
4498 35601 : else if (variant == add_variant)
4499 : {
4500 99 : tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4501 99 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4502 99 : accumulator = accum_tmp;
4503 99 : if (cast_to_unsigned_p)
4504 89 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt, vectype);
4505 : }
4506 : /* Move back to a signed if needed. */
4507 35733 : if (cast_to_unsigned_p)
4508 : {
4509 12239 : tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4510 12239 : stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4511 : }
4512 :
4513 : return stmt;
4514 : }
4515 :
4516 : /* Detect multiplication by constant and convert it into a sequence of
4517 : shifts and additions, subtractions, negations. We reuse the
4518 : choose_mult_variant algorithms from expmed.cc
4519 :
4520 : Input/Output:
4521 :
4522 : STMT_VINFO: The stmt from which the pattern search begins,
4523 : i.e. the mult stmt.
4524 :
4525 : Output:
4526 :
4527 : * TYPE_OUT: The type of the output of this pattern.
4528 :
4529 : * Return value: A new stmt that will be used to replace
4530 : the multiplication. */
4531 :
4532 : static gimple *
4533 31048776 : vect_recog_mult_pattern (vec_info *vinfo,
4534 : stmt_vec_info stmt_vinfo, tree *type_out)
4535 : {
4536 31048776 : gimple *last_stmt = stmt_vinfo->stmt;
4537 31048776 : tree oprnd0, oprnd1, vectype, itype;
4538 31048776 : gimple *pattern_stmt;
4539 :
4540 31048776 : if (!is_gimple_assign (last_stmt))
4541 : return NULL;
4542 :
4543 21325162 : if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4544 : return NULL;
4545 :
4546 1456951 : oprnd0 = gimple_assign_rhs1 (last_stmt);
4547 1456951 : oprnd1 = gimple_assign_rhs2 (last_stmt);
4548 1456951 : itype = TREE_TYPE (oprnd0);
4549 :
4550 1456951 : if (TREE_CODE (oprnd0) != SSA_NAME
4551 1456888 : || TREE_CODE (oprnd1) != INTEGER_CST
4552 907680 : || !INTEGRAL_TYPE_P (itype)
4553 2364631 : || !type_has_mode_precision_p (itype))
4554 549323 : return NULL;
4555 :
4556 907628 : vectype = get_vectype_for_scalar_type (vinfo, itype);
4557 907628 : if (vectype == NULL_TREE)
4558 : return NULL;
4559 :
4560 : /* If the target can handle vectorized multiplication natively,
4561 : don't attempt to optimize this. */
4562 737942 : optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4563 737942 : if (mul_optab != unknown_optab
4564 737942 : && can_implement_p (mul_optab, TYPE_MODE (vectype)))
4565 : return NULL;
4566 :
4567 300918 : pattern_stmt = vect_synth_mult_by_constant (vinfo,
4568 : oprnd0, oprnd1, stmt_vinfo);
4569 300918 : if (!pattern_stmt)
4570 : return NULL;
4571 :
4572 : /* Pattern detected. */
4573 36030 : vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4574 :
4575 36030 : *type_out = vectype;
4576 :
4577 36030 : return pattern_stmt;
4578 : }
4579 :
4580 : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
4581 : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
4582 : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
4583 :
4584 : extern bool gimple_unsigned_integer_narrow_clip (tree, tree*, tree (*)(tree));
4585 :
4586 : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
4587 : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
4588 : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
4589 :
4590 : static gimple *
4591 300 : vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
4592 : internal_fn fn, tree *type_out,
4593 : tree lhs, tree op_0, tree op_1)
4594 : {
4595 300 : tree itype = TREE_TYPE (op_0);
4596 300 : tree otype = TREE_TYPE (lhs);
4597 300 : tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4598 300 : tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4599 :
4600 300 : if (v_itype != NULL_TREE && v_otype != NULL_TREE
4601 300 : && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
4602 : {
4603 97 : gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4604 97 : tree in_ssa = vect_recog_temp_ssa_var (itype, NULL);
4605 :
4606 97 : gimple_call_set_lhs (call, in_ssa);
4607 97 : gimple_call_set_nothrow (call, /* nothrow_p */ false);
4608 97 : gimple_set_location (call, gimple_location (STMT_VINFO_STMT (stmt_info)));
4609 :
4610 97 : *type_out = v_otype;
4611 :
4612 97 : if (types_compatible_p (itype, otype))
4613 : return call;
4614 : else
4615 : {
4616 0 : append_pattern_def_seq (vinfo, stmt_info, call, v_itype);
4617 0 : tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
4618 :
4619 0 : return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa);
4620 : }
4621 : }
4622 :
4623 : return NULL;
4624 : }
4625 :
4626 : /*
4627 : * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
4628 : * _7 = _4 + _6;
4629 : * _8 = _4 > _7;
4630 : * _9 = (long unsigned int) _8;
4631 : * _10 = -_9;
4632 : * _12 = _7 | _10;
4633 : *
4634 : * And then simplified to
4635 : * _12 = .SAT_ADD (_4, _6);
4636 : */
4637 :
4638 : static gimple *
4639 31119900 : vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4640 : tree *type_out)
4641 : {
4642 31119900 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4643 :
4644 31119900 : if (!is_gimple_assign (last_stmt))
4645 : return NULL;
4646 :
4647 21396286 : tree ops[2];
4648 21396286 : tree lhs = gimple_assign_lhs (last_stmt);
4649 :
4650 21396286 : if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
4651 21396286 : || gimple_signed_integer_sat_add (lhs, ops, NULL))
4652 : {
4653 62 : if (TREE_CODE (ops[1]) == INTEGER_CST)
4654 12 : ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4655 :
4656 62 : gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
4657 : IFN_SAT_ADD, type_out,
4658 : lhs, ops[0], ops[1]);
4659 62 : if (stmt)
4660 : {
4661 44 : vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
4662 44 : return stmt;
4663 : }
4664 : }
4665 :
4666 : return NULL;
4667 : }
4668 :
4669 : /*
4670 : * Try to transform the truncation for .SAT_SUB pattern, mostly occurs in
4671 : * the benchmark zip. Aka:
4672 : *
4673 : * unsigned int _1;
4674 : * unsigned int _2;
4675 : * unsigned short int _4;
4676 : * _9 = (unsigned short int).SAT_SUB (_1, _2);
4677 : *
4678 : * if _1 is known to be in the range of unsigned short int. For example
4679 : * there is a def _1 = (unsigned short int)_4. Then we can transform the
4680 : * truncation to:
4681 : *
4682 : * _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
4683 : * _9 = .SAT_SUB (_4, _3);
4684 : *
4685 : * Then, we can better vectorized code and avoid the unnecessary narrowing
4686 : * stmt during vectorization with below stmt(s).
4687 : *
4688 : * _3 = .SAT_TRUNC(_2); // SI => HI
4689 : * _9 = .SAT_SUB (_4, _3);
4690 : */
4691 : static void
4692 238 : vect_recog_sat_sub_pattern_transform (vec_info *vinfo,
4693 : stmt_vec_info stmt_vinfo,
4694 : tree lhs, tree *ops)
4695 : {
4696 238 : tree otype = TREE_TYPE (lhs);
4697 238 : tree itype = TREE_TYPE (ops[0]);
4698 238 : unsigned itype_prec = TYPE_PRECISION (itype);
4699 238 : unsigned otype_prec = TYPE_PRECISION (otype);
4700 :
4701 238 : if (types_compatible_p (otype, itype) || otype_prec >= itype_prec)
4702 238 : return;
4703 :
4704 0 : tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4705 0 : tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4706 0 : tree_pair v_pair = tree_pair (v_otype, v_itype);
4707 :
4708 0 : if (v_otype == NULL_TREE || v_itype == NULL_TREE
4709 0 : || !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair,
4710 : OPTIMIZE_FOR_BOTH))
4711 0 : return;
4712 :
4713 : /* 1. Find the _4 and update ops[0] as above example. */
4714 0 : vect_unpromoted_value unprom;
4715 0 : tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom);
4716 :
4717 0 : if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec)
4718 : return;
4719 :
4720 0 : ops[0] = tmp;
4721 :
4722 : /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example. */
4723 0 : tree trunc_lhs_ssa = vect_recog_temp_ssa_var (otype, NULL);
4724 0 : gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[1]);
4725 :
4726 0 : gimple_call_set_lhs (call, trunc_lhs_ssa);
4727 0 : gimple_call_set_nothrow (call, /* nothrow_p */ false);
4728 0 : append_pattern_def_seq (vinfo, stmt_vinfo, call, v_otype);
4729 :
4730 0 : ops[1] = trunc_lhs_ssa;
4731 : }
4732 :
4733 : /*
4734 : * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
4735 : * Unsigned:
4736 : * _7 = _1 >= _2;
4737 : * _8 = _1 - _2;
4738 : * _10 = (long unsigned int) _7;
4739 : * _9 = _8 * _10;
4740 : *
4741 : * And then simplified to
4742 : * _9 = .SAT_SUB (_1, _2);
4743 : *
4744 : * Signed:
4745 : * x.0_4 = (unsigned char) x_16;
4746 : * y.1_5 = (unsigned char) y_18;
4747 : * _6 = x.0_4 - y.1_5;
4748 : * minus_19 = (int8_t) _6;
4749 : * _7 = x_16 ^ y_18;
4750 : * _8 = x_16 ^ minus_19;
4751 : * _44 = _7 < 0;
4752 : * _23 = x_16 < 0;
4753 : * _24 = (signed char) _23;
4754 : * _58 = (unsigned char) _24;
4755 : * _59 = -_58;
4756 : * _25 = (signed char) _59;
4757 : * _26 = _25 ^ 127;
4758 : * _42 = _8 < 0;
4759 : * _41 = _42 & _44;
4760 : * iftmp.2_11 = _41 ? _26 : minus_19;
4761 : *
4762 : * And then simplified to
4763 : * iftmp.2_11 = .SAT_SUB (x_16, y_18);
4764 : */
4765 :
4766 : static gimple *
4767 31119856 : vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4768 : tree *type_out)
4769 : {
4770 31119856 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4771 :
4772 31119856 : if (!is_gimple_assign (last_stmt))
4773 : return NULL;
4774 :
4775 21396242 : tree ops[2];
4776 21396242 : tree lhs = gimple_assign_lhs (last_stmt);
4777 :
4778 21396242 : if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)
4779 21396242 : || gimple_signed_integer_sat_sub (lhs, ops, NULL))
4780 : {
4781 238 : vect_recog_sat_sub_pattern_transform (vinfo, stmt_vinfo, lhs, ops);
4782 238 : gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
4783 : IFN_SAT_SUB, type_out,
4784 : lhs, ops[0], ops[1]);
4785 238 : if (stmt)
4786 : {
4787 53 : vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt);
4788 53 : return stmt;
4789 : }
4790 : }
4791 :
4792 : return NULL;
4793 : }
4794 :
4795 : /*
4796 : * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
4797 : * overflow_5 = x_4(D) > 4294967295;
4798 : * _1 = (unsigned int) x_4(D);
4799 : * _2 = (unsigned int) overflow_5;
4800 : * _3 = -_2;
4801 : * _6 = _1 | _3;
4802 : *
4803 : * And then simplified to
4804 : * _6 = .SAT_TRUNC (x_4(D));
4805 : */
4806 :
4807 : static gimple *
4808 31119803 : vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4809 : tree *type_out)
4810 : {
4811 31119803 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4812 :
4813 31119803 : if (!is_gimple_assign (last_stmt))
4814 : return NULL;
4815 :
4816 21396189 : tree ops[1];
4817 21396189 : tree lhs = gimple_assign_lhs (last_stmt);
4818 21396189 : tree otype = TREE_TYPE (lhs);
4819 :
4820 21396189 : if ((gimple_unsigned_integer_narrow_clip (lhs, ops, NULL))
4821 21396189 : && type_has_mode_precision_p (otype))
4822 : {
4823 16 : tree itype = TREE_TYPE (ops[0]);
4824 16 : tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4825 16 : tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4826 16 : internal_fn fn = IFN_SAT_TRUNC;
4827 :
4828 16 : if (v_itype != NULL_TREE && v_otype != NULL_TREE
4829 32 : && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
4830 : OPTIMIZE_FOR_BOTH))
4831 : {
4832 0 : tree temp = vect_recog_temp_ssa_var (itype, NULL);
4833 0 : gimple * max_stmt = gimple_build_assign (temp, build2 (MAX_EXPR, itype, build_zero_cst(itype), ops[0]));
4834 0 : append_pattern_def_seq (vinfo, stmt_vinfo, max_stmt, v_itype);
4835 :
4836 0 : gcall *call = gimple_build_call_internal (fn, 1, temp);
4837 0 : tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
4838 :
4839 0 : gimple_call_set_lhs (call, out_ssa);
4840 0 : gimple_call_set_nothrow (call, /* nothrow_p */ false);
4841 0 : gimple_set_location (call, gimple_location (last_stmt));
4842 :
4843 0 : *type_out = v_otype;
4844 :
4845 0 : return call;
4846 : }
4847 :
4848 : }
4849 :
4850 21396189 : if ((gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
4851 21395866 : || gimple_signed_integer_sat_trunc (lhs, ops, NULL))
4852 21396189 : && type_has_mode_precision_p (otype))
4853 : {
4854 311 : tree itype = TREE_TYPE (ops[0]);
4855 311 : tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4856 311 : tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4857 311 : internal_fn fn = IFN_SAT_TRUNC;
4858 :
4859 305 : if (v_itype != NULL_TREE && v_otype != NULL_TREE
4860 616 : && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
4861 : OPTIMIZE_FOR_BOTH))
4862 : {
4863 0 : gcall *call = gimple_build_call_internal (fn, 1, ops[0]);
4864 0 : tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
4865 :
4866 0 : gimple_call_set_lhs (call, out_ssa);
4867 0 : gimple_call_set_nothrow (call, /* nothrow_p */ false);
4868 0 : gimple_set_location (call, gimple_location (last_stmt));
4869 :
4870 0 : *type_out = v_otype;
4871 :
4872 0 : return call;
4873 : }
4874 : }
4875 :
4876 : return NULL;
4877 : }
4878 :
4879 :
4880 : /* Function add_code_for_floorceilround_divmod
4881 : A helper function to add compensation code for implementing FLOOR_MOD_EXPR,
4882 : FLOOR_DIV_EXPR, CEIL_MOD_EXPR, CEIL_DIV_EXPR, ROUND_MOD_EXPR and
4883 : ROUND_DIV_EXPR
4884 : The quotient and remainder are needed for implemented these operators.
4885 : FLOOR cases
4886 : r = x %[fl] y; r = x/[fl] y;
4887 : is
4888 : r = x % y; if (r && (x ^ y) < 0) r += y;
4889 : r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; Respectively
4890 : Produce following sequence
4891 : v0 = x^y
4892 : v1 = -r
4893 : v2 = r | -r
4894 : v3 = v0 & v2
4895 : v4 = v3 < 0
4896 : if (floor_mod)
4897 : v5 = v4 ? y : 0
4898 : v6 = r + v5
4899 : if (floor_div)
4900 : v5 = v4 ? 1 : 0
4901 : v6 = d - 1
4902 : Similar sequences of vector instructions are produces for following cases
4903 : CEIL cases
4904 : r = x %[cl] y; r = x/[cl] y;
4905 : is
4906 : r = x % y; if (r && (x ^ y) >= 0) r -= y;
4907 : r = x % y; if (r) r -= y; (unsigned)
4908 : r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++;
4909 : r = x % y; d = x/y; if (r) d++; (unsigned)
4910 : ROUND cases
4911 : r = x %[rd] y; r = x/[rd] y;
4912 : is
4913 : r = x % y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) r -= y; else r += y;
4914 : r = x % y; if (r > ((y-1)/2)) r -= y; (unsigned)
4915 : r = x % y; d = x/y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) d++; else d--;
4916 : r = x % y; d = x/y; if (r > ((y-1)/2)) d++; (unsigned)
4917 : Inputs:
4918 : VECTYPE: Vector type of the operands
4919 : STMT_VINFO: Statement where pattern begins
4920 : RHS_CODE: Should either be FLOOR_MOD_EXPR or FLOOR_DIV_EXPR
4921 : Q: The quotient of division
4922 : R: Remainder of division
4923 : OPRDN0/OPRND1: Actual operands involved
4924 : ITYPE: tree type of oprnd0
4925 : Output:
4926 : NULL if vectorization not possible
4927 : Gimple statement based on rhs_code
4928 : */
4929 : static gimple *
4930 431 : add_code_for_floorceilround_divmod (tree vectype, vec_info *vinfo,
4931 : stmt_vec_info stmt_vinfo,
4932 : enum tree_code rhs_code, tree q, tree r,
4933 : tree oprnd0, tree oprnd1, tree itype)
4934 : {
4935 431 : gimple *def_stmt;
4936 431 : tree mask_vectype = truth_type_for (vectype);
4937 431 : if (!mask_vectype)
4938 : return NULL;
4939 431 : tree bool_cond;
4940 431 : bool unsigned_p = TYPE_UNSIGNED (itype);
4941 :
4942 431 : switch (rhs_code)
4943 : {
4944 395 : case FLOOR_MOD_EXPR:
4945 395 : case FLOOR_DIV_EXPR:
4946 395 : case CEIL_MOD_EXPR:
4947 395 : case CEIL_DIV_EXPR:
4948 395 : {
4949 395 : if (!target_has_vecop_for_code (NEGATE_EXPR, vectype)
4950 363 : || !target_has_vecop_for_code (BIT_XOR_EXPR, vectype)
4951 363 : || !target_has_vecop_for_code (BIT_IOR_EXPR, vectype)
4952 363 : || !target_has_vecop_for_code (PLUS_EXPR, vectype)
4953 363 : || !target_has_vecop_for_code (MINUS_EXPR, vectype)
4954 363 : || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
4955 631 : || !expand_vec_cond_expr_p (vectype, mask_vectype))
4956 159 : return NULL;
4957 236 : if (unsigned_p)
4958 : {
4959 18 : gcc_assert (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
4960 :
4961 18 : if (!expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR))
4962 : return NULL;
4963 18 : bool is_mod = rhs_code == CEIL_MOD_EXPR;
4964 : // r > 0
4965 18 : bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
4966 18 : def_stmt = gimple_build_assign (bool_cond, GT_EXPR, r,
4967 : build_int_cst (itype, 0));
4968 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
4969 : itype);
4970 :
4971 : // (r > 0) ? y : 0 (mod)
4972 : // (r > 0) ? 1 : 0 (ceil)
4973 18 : tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
4974 18 : def_stmt
4975 27 : = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
4976 9 : is_mod ? oprnd1 : build_int_cst (itype, 1),
4977 : build_int_cst (itype, 0));
4978 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4979 :
4980 : // r -= (r > 0) ? y : 0 (mod)
4981 : // d += (x^y < 0 && r) ? -1 : 0 (ceil)
4982 18 : tree result = vect_recog_temp_ssa_var (itype, NULL);
4983 27 : return gimple_build_assign (result, is_mod ? MINUS_EXPR : PLUS_EXPR,
4984 18 : is_mod ? r : q, extr_cond);
4985 : }
4986 : else
4987 : {
4988 218 : bool ceil_p
4989 218 : = (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
4990 218 : if (ceil_p && !target_has_vecop_for_code (BIT_NOT_EXPR, vectype))
4991 : return NULL;
4992 : // x ^ y
4993 218 : tree xort = vect_recog_temp_ssa_var (itype, NULL);
4994 218 : def_stmt = gimple_build_assign (xort, BIT_XOR_EXPR, oprnd0, oprnd1);
4995 218 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4996 :
4997 218 : tree cond_reg = xort;
4998 : // ~(x ^ y) (ceil)
4999 218 : if (ceil_p)
5000 : {
5001 18 : cond_reg = vect_recog_temp_ssa_var (itype, NULL);
5002 18 : def_stmt = gimple_build_assign (cond_reg, BIT_NOT_EXPR, xort);
5003 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5004 : }
5005 :
5006 : // -r
5007 218 : tree negate_r = vect_recog_temp_ssa_var (itype, NULL);
5008 218 : def_stmt = gimple_build_assign (negate_r, NEGATE_EXPR, r);
5009 218 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5010 :
5011 : // r | -r , sign bit is set if r!=0
5012 218 : tree r_or_negr = vect_recog_temp_ssa_var (itype, NULL);
5013 218 : def_stmt
5014 218 : = gimple_build_assign (r_or_negr, BIT_IOR_EXPR, r, negate_r);
5015 218 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5016 :
5017 : // (x ^ y) & (r | -r)
5018 : // ~(x ^ y) & (r | -r) (ceil)
5019 218 : tree r_or_negr_and_xor = vect_recog_temp_ssa_var (itype, NULL);
5020 218 : def_stmt = gimple_build_assign (r_or_negr_and_xor, BIT_AND_EXPR,
5021 : r_or_negr, cond_reg);
5022 218 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5023 :
5024 : // (x ^ y) & (r | -r) < 0 which is equivalent to (x^y < 0 && r!=0)
5025 218 : bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5026 218 : def_stmt
5027 218 : = gimple_build_assign (bool_cond, LT_EXPR, r_or_negr_and_xor,
5028 : build_int_cst (itype, 0));
5029 218 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
5030 : itype);
5031 :
5032 : // (x^y < 0 && r) ? y : 0 (mod)
5033 : // (x^y < 0 && r) ? -1 : 0 (div)
5034 218 : bool is_mod
5035 218 : = (rhs_code == FLOOR_MOD_EXPR || rhs_code == CEIL_MOD_EXPR);
5036 218 : tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
5037 258 : def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
5038 : is_mod ? oprnd1
5039 40 : : build_int_cst (itype, -1),
5040 : build_int_cst (itype, 0));
5041 218 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5042 :
5043 : // r += (x ^ y < 0 && r) ? y : 0 (floor mod)
5044 : // d += (x^y < 0 && r) ? -1 : 0 (floor div)
5045 : // r -= (x ^ y < 0 && r) ? y : 0 (ceil mod)
5046 : // d -= (x^y < 0 && r) ? -1 : 0 (ceil div)
5047 218 : tree result = vect_recog_temp_ssa_var (itype, NULL);
5048 436 : return gimple_build_assign (result,
5049 218 : (rhs_code == FLOOR_MOD_EXPR
5050 218 : || rhs_code == FLOOR_DIV_EXPR)
5051 : ? PLUS_EXPR
5052 : : MINUS_EXPR,
5053 218 : is_mod ? r : q, extr_cond);
5054 : }
5055 : }
5056 36 : case ROUND_MOD_EXPR:
5057 36 : case ROUND_DIV_EXPR:
5058 36 : {
5059 36 : if (!target_has_vecop_for_code (BIT_AND_EXPR, vectype)
5060 36 : || !target_has_vecop_for_code (PLUS_EXPR, vectype)
5061 36 : || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
5062 36 : || !expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR)
5063 72 : || !expand_vec_cond_expr_p (vectype, mask_vectype))
5064 0 : return NULL;
5065 :
5066 36 : bool is_mod = rhs_code == ROUND_MOD_EXPR;
5067 36 : HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
5068 36 : unsigned HOST_WIDE_INT abs_d
5069 : = (d >= 0 ? (unsigned HOST_WIDE_INT) d : -(unsigned HOST_WIDE_INT) d);
5070 36 : unsigned HOST_WIDE_INT mid_d = (abs_d - 1) >> 1;
5071 36 : if (!unsigned_p)
5072 : {
5073 : // check availability of abs expression for vector
5074 18 : if (!target_has_vecop_for_code (ABS_EXPR, vectype))
5075 : return NULL;
5076 : // abs (r)
5077 18 : tree abs_r = vect_recog_temp_ssa_var (itype, NULL);
5078 18 : def_stmt = gimple_build_assign (abs_r, ABS_EXPR, r);
5079 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5080 :
5081 : // abs (r) > (abs (y-1) >> 1)
5082 18 : tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5083 18 : def_stmt = gimple_build_assign (round_p, GT_EXPR, abs_r,
5084 18 : build_int_cst (itype, mid_d));
5085 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
5086 : itype);
5087 :
5088 : // x ^ y
5089 18 : tree cond_reg = vect_recog_temp_ssa_var (itype, NULL);
5090 18 : def_stmt
5091 18 : = gimple_build_assign (cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
5092 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5093 :
5094 : // x ^ y < 0
5095 18 : bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5096 18 : def_stmt = gimple_build_assign (bool_cond, LT_EXPR, cond_reg,
5097 : build_int_cst (itype, 0));
5098 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
5099 : itype);
5100 :
5101 : // x ^ y < 0 ? y : -y (mod)
5102 : // x ^ y < 0 ? -1 : 1 (div)
5103 18 : tree val1 = vect_recog_temp_ssa_var (itype, NULL);
5104 18 : def_stmt
5105 36 : = gimple_build_assign (val1, COND_EXPR, bool_cond,
5106 27 : build_int_cst (itype, is_mod ? d : -1),
5107 18 : build_int_cst (itype, is_mod ? -d : 1));
5108 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5109 18 : int precision = TYPE_PRECISION (itype);
5110 18 : wide_int wmask = wi::mask (precision, false, precision);
5111 :
5112 : // abs (r) > (abs (y-1) >> 1) ? 0xffffffff : 0
5113 18 : tree val2 = vect_recog_temp_ssa_var (itype, NULL);
5114 36 : def_stmt = gimple_build_assign (val2, COND_EXPR, round_p,
5115 18 : wide_int_to_tree (itype, wmask),
5116 : build_int_cst (itype, 0));
5117 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5118 :
5119 18 : tree fval = vect_recog_temp_ssa_var (itype, NULL);
5120 18 : def_stmt = gimple_build_assign (fval, BIT_AND_EXPR, val1, val2);
5121 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5122 :
5123 18 : tree result = vect_recog_temp_ssa_var (itype, NULL);
5124 27 : return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
5125 : fval);
5126 18 : }
5127 : else
5128 : {
5129 : // r > (y-1 >> 1)
5130 18 : tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5131 18 : def_stmt = gimple_build_assign (round_p, GT_EXPR, r,
5132 18 : build_int_cst (itype, mid_d));
5133 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
5134 : itype);
5135 :
5136 : // (r > (y-1)>>1) ? -d : 1
5137 18 : tree val2 = vect_recog_temp_ssa_var (itype, NULL);
5138 18 : def_stmt
5139 36 : = gimple_build_assign (val2, COND_EXPR, round_p,
5140 18 : build_int_cst (itype, is_mod ? -d : 1),
5141 : build_int_cst (itype, 0));
5142 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5143 :
5144 18 : tree result = vect_recog_temp_ssa_var (itype, NULL);
5145 27 : return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
5146 18 : val2);
5147 : }
5148 : }
5149 : default:
5150 : return NULL;
5151 : }
5152 : }
5153 :
5154 : /* Detect a signed division by a constant that wouldn't be
5155 : otherwise vectorized:
5156 :
5157 : type a_t, b_t;
5158 :
5159 : S1 a_t = b_t / N;
5160 :
5161 : where type 'type' is an integral type and N is a constant.
5162 :
5163 : Similarly handle modulo by a constant:
5164 :
5165 : S4 a_t = b_t % N;
5166 :
5167 : Input/Output:
5168 :
5169 : * STMT_VINFO: The stmt from which the pattern search begins,
5170 : i.e. the division stmt. S1 is replaced by if N is a power
5171 : of two constant and type is signed:
5172 : S3 y_t = b_t < 0 ? N - 1 : 0;
5173 : S2 x_t = b_t + y_t;
5174 : S1' a_t = x_t >> log2 (N);
5175 :
5176 : S4 is replaced if N is a power of two constant and
5177 : type is signed by (where *_T temporaries have unsigned type):
5178 : S9 y_T = b_t < 0 ? -1U : 0U;
5179 : S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
5180 : S7 z_t = (type) z_T;
5181 : S6 w_t = b_t + z_t;
5182 : S5 x_t = w_t & (N - 1);
5183 : S4' a_t = x_t - z_t;
5184 :
5185 : Output:
5186 :
5187 : * TYPE_OUT: The type of the output of this pattern.
5188 :
5189 : * Return value: A new stmt that will be used to replace the division
5190 : S1 or modulo S4 stmt. */
5191 :
5192 : static gimple *
5193 30855513 : vect_recog_divmod_pattern (vec_info *vinfo,
5194 : stmt_vec_info stmt_vinfo, tree *type_out)
5195 : {
5196 30855513 : gimple *last_stmt = stmt_vinfo->stmt;
5197 30855513 : tree oprnd0, oprnd1, vectype, itype, cond;
5198 30855513 : gimple *pattern_stmt = NULL;
5199 30855513 : gimple *def_stmt = NULL;
5200 30855513 : enum tree_code rhs_code;
5201 30855513 : optab optab;
5202 30855513 : tree q, cst;
5203 30855513 : int prec;
5204 :
5205 30855513 : if (!is_gimple_assign (last_stmt)
5206 : /* The pattern will disrupt the reduction chain with multiple uses. */
5207 30855513 : || vect_is_reduction (stmt_vinfo))
5208 : return NULL;
5209 :
5210 21023382 : rhs_code = gimple_assign_rhs_code (last_stmt);
5211 21023382 : switch (rhs_code)
5212 : {
5213 276985 : case TRUNC_DIV_EXPR:
5214 276985 : case EXACT_DIV_EXPR:
5215 276985 : case TRUNC_MOD_EXPR:
5216 276985 : case FLOOR_MOD_EXPR:
5217 276985 : case FLOOR_DIV_EXPR:
5218 276985 : case CEIL_MOD_EXPR:
5219 276985 : case CEIL_DIV_EXPR:
5220 276985 : case ROUND_MOD_EXPR:
5221 276985 : case ROUND_DIV_EXPR:
5222 276985 : break;
5223 : default:
5224 : return NULL;
5225 : }
5226 :
5227 276985 : oprnd0 = gimple_assign_rhs1 (last_stmt);
5228 276985 : oprnd1 = gimple_assign_rhs2 (last_stmt);
5229 276985 : itype = TREE_TYPE (oprnd0);
5230 276985 : if (TREE_CODE (oprnd0) != SSA_NAME
5231 259379 : || TREE_CODE (oprnd1) != INTEGER_CST
5232 165242 : || TREE_CODE (itype) != INTEGER_TYPE
5233 442227 : || !type_has_mode_precision_p (itype))
5234 111743 : return NULL;
5235 :
5236 165242 : scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
5237 165242 : vectype = get_vectype_for_scalar_type (vinfo, itype);
5238 165242 : if (vectype == NULL_TREE)
5239 : return NULL;
5240 :
5241 132566 : if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
5242 : {
5243 : /* If the target can handle vectorized division or modulo natively,
5244 : don't attempt to optimize this, since native division is likely
5245 : to give smaller code. */
5246 2224 : optab = optab_for_tree_code (rhs_code, vectype, optab_default);
5247 2224 : if (optab != unknown_optab
5248 2224 : && can_implement_p (optab, TYPE_MODE (vectype)))
5249 : return NULL;
5250 : }
5251 :
5252 132566 : prec = TYPE_PRECISION (itype);
5253 :
5254 265132 : bool is_flclrd_moddiv_p
5255 132566 : = rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR
5256 : || rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR
5257 131955 : || rhs_code == ROUND_MOD_EXPR || rhs_code == ROUND_DIV_EXPR;
5258 132566 : if (integer_pow2p (oprnd1))
5259 : {
5260 79972 : if ((TYPE_UNSIGNED (itype)
5261 57 : && (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR))
5262 80026 : || tree_int_cst_sgn (oprnd1) != 1)
5263 3 : return NULL;
5264 :
5265 : /* Pattern detected. */
5266 79969 : vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5267 :
5268 79969 : *type_out = vectype;
5269 :
5270 : /* Check if the target supports this internal function. */
5271 79969 : internal_fn ifn = IFN_DIV_POW2;
5272 79969 : if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
5273 : {
5274 0 : tree shift = build_int_cst (itype, tree_log2 (oprnd1));
5275 :
5276 0 : tree var_div = vect_recog_temp_ssa_var (itype, NULL);
5277 0 : gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
5278 0 : gimple_call_set_lhs (div_stmt, var_div);
5279 0 : if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
5280 : {
5281 0 : append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
5282 0 : tree t1 = vect_recog_temp_ssa_var (itype, NULL);
5283 0 : def_stmt
5284 0 : = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
5285 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5286 0 : pattern_stmt
5287 0 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5288 : MINUS_EXPR, oprnd0, t1);
5289 0 : if (is_flclrd_moddiv_p)
5290 : {
5291 0 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5292 0 : pattern_stmt
5293 0 : = add_code_for_floorceilround_divmod (vectype, vinfo,
5294 : stmt_vinfo, rhs_code,
5295 : var_div, t1, oprnd0,
5296 : oprnd1, itype);
5297 0 : if (pattern_stmt == NULL)
5298 : return NULL;
5299 : }
5300 : }
5301 : else
5302 : pattern_stmt = div_stmt;
5303 0 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
5304 :
5305 0 : return pattern_stmt;
5306 : }
5307 :
5308 79969 : cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5309 79969 : def_stmt = gimple_build_assign (cond, LT_EXPR, oprnd0,
5310 : build_int_cst (itype, 0));
5311 79969 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
5312 : truth_type_for (vectype), itype);
5313 79969 : tree div_result = NULL_TREE;
5314 79969 : if (rhs_code == TRUNC_DIV_EXPR
5315 79969 : || rhs_code == EXACT_DIV_EXPR
5316 : || rhs_code == FLOOR_DIV_EXPR
5317 2689 : || rhs_code == CEIL_DIV_EXPR
5318 2530 : || rhs_code == ROUND_DIV_EXPR)
5319 : {
5320 77451 : tree var = vect_recog_temp_ssa_var (itype, NULL);
5321 77451 : tree shift;
5322 77451 : def_stmt
5323 77451 : = gimple_build_assign (var, COND_EXPR, cond,
5324 : fold_build2 (MINUS_EXPR, itype, oprnd1,
5325 : build_int_cst (itype, 1)),
5326 : build_int_cst (itype, 0));
5327 77451 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5328 77451 : var = vect_recog_temp_ssa_var (itype, NULL);
5329 77451 : def_stmt
5330 77451 : = gimple_build_assign (var, PLUS_EXPR, oprnd0,
5331 : gimple_assign_lhs (def_stmt));
5332 77451 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5333 :
5334 77451 : shift = build_int_cst (itype, tree_log2 (oprnd1));
5335 77451 : div_result = vect_recog_temp_ssa_var (itype, NULL);
5336 77451 : pattern_stmt
5337 77451 : = gimple_build_assign (div_result, RSHIFT_EXPR, var, shift);
5338 : }
5339 79969 : if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
5340 : {
5341 2689 : if (rhs_code == FLOOR_DIV_EXPR
5342 : || rhs_code == CEIL_DIV_EXPR
5343 2689 : || rhs_code == ROUND_DIV_EXPR)
5344 171 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5345 :
5346 2689 : tree signmask;
5347 2689 : if (compare_tree_int (oprnd1, 2) == 0)
5348 : {
5349 1283 : signmask = vect_recog_temp_ssa_var (itype, NULL);
5350 1283 : def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
5351 : build_int_cst (itype, 1),
5352 : build_int_cst (itype, 0));
5353 1283 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5354 : }
5355 : else
5356 : {
5357 1406 : tree utype
5358 1406 : = build_nonstandard_integer_type (prec, 1);
5359 1406 : tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
5360 1406 : tree shift
5361 1406 : = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
5362 1406 : - tree_log2 (oprnd1));
5363 1406 : tree var = vect_recog_temp_ssa_var (utype, NULL);
5364 :
5365 1406 : def_stmt = gimple_build_assign (var, COND_EXPR, cond,
5366 : build_int_cst (utype, -1),
5367 : build_int_cst (utype, 0));
5368 1406 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
5369 1406 : var = vect_recog_temp_ssa_var (utype, NULL);
5370 1406 : def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
5371 : gimple_assign_lhs (def_stmt),
5372 : shift);
5373 1406 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
5374 1406 : signmask = vect_recog_temp_ssa_var (itype, NULL);
5375 1406 : def_stmt
5376 1406 : = gimple_build_assign (signmask, NOP_EXPR, var);
5377 1406 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5378 : }
5379 2689 : def_stmt
5380 2689 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5381 : PLUS_EXPR, oprnd0, signmask);
5382 2689 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5383 2689 : def_stmt
5384 2689 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5385 : BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
5386 : fold_build2 (MINUS_EXPR, itype, oprnd1,
5387 : build_int_cst (itype, 1)));
5388 2689 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5389 :
5390 2689 : tree r = vect_recog_temp_ssa_var (itype, NULL);
5391 2689 : pattern_stmt
5392 2689 : = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt),
5393 : signmask);
5394 2689 : if (is_flclrd_moddiv_p)
5395 : {
5396 285 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5397 285 : pattern_stmt
5398 285 : = add_code_for_floorceilround_divmod (vectype, vinfo,
5399 : stmt_vinfo, rhs_code,
5400 : div_result, r, oprnd0,
5401 : oprnd1, itype);
5402 285 : if (pattern_stmt == NULL)
5403 : return NULL;
5404 : }
5405 : }
5406 :
5407 79810 : return pattern_stmt;
5408 : }
5409 :
5410 52594 : if ((cst = uniform_integer_cst_p (oprnd1))
5411 52594 : && TYPE_UNSIGNED (itype)
5412 : && rhs_code == TRUNC_DIV_EXPR
5413 29441 : && vectype
5414 70311 : && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
5415 : {
5416 : /* We can use the relationship:
5417 :
5418 : x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
5419 :
5420 : to optimize cases where N+1 is a power of 2, and where // (N+1)
5421 : is therefore a shift right. When operating in modes that are
5422 : multiples of a byte in size, there are two cases:
5423 :
5424 : (1) N(N+3) is not representable, in which case the question
5425 : becomes whether the replacement expression overflows.
5426 : It is enough to test that x+N+2 does not overflow,
5427 : i.e. that x < MAX-(N+1).
5428 :
5429 : (2) N(N+3) is representable, in which case it is the (only)
5430 : bound that we need to check.
5431 :
5432 : ??? For now we just handle the case where // (N+1) is a shift
5433 : right by half the precision, since some architectures can
5434 : optimize the associated addition and shift combinations
5435 : into single instructions. */
5436 :
5437 12023 : auto wcst = wi::to_wide (cst);
5438 12023 : int pow = wi::exact_log2 (wcst + 1);
5439 12023 : if (pow == prec / 2)
5440 : {
5441 472 : gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
5442 :
5443 472 : gimple_ranger ranger;
5444 472 : int_range_max r;
5445 :
5446 : /* Check that no overflow will occur. If we don't have range
5447 : information we can't perform the optimization. */
5448 :
5449 472 : if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
5450 : {
5451 470 : wide_int max = r.upper_bound ();
5452 470 : wide_int one = wi::shwi (1, prec);
5453 470 : wide_int adder = wi::add (one, wi::lshift (one, pow));
5454 470 : wi::overflow_type ovf;
5455 470 : wi::add (max, adder, UNSIGNED, &ovf);
5456 470 : if (ovf == wi::OVF_NONE)
5457 : {
5458 313 : *type_out = vectype;
5459 313 : tree tadder = wide_int_to_tree (itype, adder);
5460 313 : tree rshift = wide_int_to_tree (itype, pow);
5461 :
5462 313 : tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
5463 313 : gassign *patt1
5464 313 : = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
5465 313 : append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5466 :
5467 313 : tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
5468 313 : patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
5469 : rshift);
5470 313 : append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5471 :
5472 313 : tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
5473 313 : patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
5474 : oprnd0);
5475 313 : append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5476 :
5477 313 : tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
5478 313 : pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
5479 : new_lhs3, rshift);
5480 :
5481 313 : return pattern_stmt;
5482 : }
5483 470 : }
5484 472 : }
5485 : }
5486 :
5487 52281 : if (prec > HOST_BITS_PER_WIDE_INT
5488 52281 : || integer_zerop (oprnd1))
5489 262 : return NULL;
5490 :
5491 52019 : if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
5492 : return NULL;
5493 :
5494 14109 : if (TYPE_UNSIGNED (itype))
5495 : {
5496 8742 : unsigned HOST_WIDE_INT mh, ml;
5497 8742 : int pre_shift, post_shift;
5498 8742 : unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
5499 8742 : & GET_MODE_MASK (itype_mode));
5500 8742 : tree t1, t2, t3, t4;
5501 :
5502 8742 : if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
5503 : /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
5504 23 : return NULL;
5505 :
5506 : /* Find a suitable multiplier and right shift count instead of
5507 : directly dividing by D. */
5508 8719 : mh = choose_multiplier (d, prec, prec, &ml, &post_shift);
5509 :
5510 : /* If the suggested multiplier is more than PREC bits, we can do better
5511 : for even divisors, using an initial right shift. */
5512 8719 : if (mh != 0 && (d & 1) == 0)
5513 : {
5514 248 : pre_shift = ctz_or_zero (d);
5515 248 : mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
5516 : &ml, &post_shift);
5517 248 : gcc_assert (!mh);
5518 : }
5519 : else
5520 : pre_shift = 0;
5521 :
5522 634 : if (mh != 0)
5523 : {
5524 634 : if (post_shift - 1 >= prec)
5525 : return NULL;
5526 :
5527 : /* t1 = oprnd0 h* ml;
5528 : t2 = oprnd0 - t1;
5529 : t3 = t2 >> 1;
5530 : t4 = t1 + t3;
5531 : q = t4 >> (post_shift - 1); */
5532 634 : t1 = vect_recog_temp_ssa_var (itype, NULL);
5533 634 : def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
5534 634 : build_int_cst (itype, ml));
5535 634 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5536 :
5537 634 : t2 = vect_recog_temp_ssa_var (itype, NULL);
5538 634 : def_stmt
5539 634 : = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
5540 634 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5541 :
5542 634 : t3 = vect_recog_temp_ssa_var (itype, NULL);
5543 634 : def_stmt
5544 634 : = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
5545 634 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5546 :
5547 634 : t4 = vect_recog_temp_ssa_var (itype, NULL);
5548 634 : def_stmt
5549 634 : = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
5550 :
5551 634 : if (post_shift != 1)
5552 : {
5553 634 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5554 :
5555 634 : q = vect_recog_temp_ssa_var (itype, NULL);
5556 634 : pattern_stmt
5557 634 : = gimple_build_assign (q, RSHIFT_EXPR, t4,
5558 634 : build_int_cst (itype, post_shift - 1));
5559 : }
5560 : else
5561 : {
5562 : q = t4;
5563 : pattern_stmt = def_stmt;
5564 : }
5565 : }
5566 : else
5567 : {
5568 8085 : if (pre_shift >= prec || post_shift >= prec)
5569 : return NULL;
5570 :
5571 : /* t1 = oprnd0 >> pre_shift;
5572 : t2 = t1 h* ml;
5573 : q = t2 >> post_shift; */
5574 8085 : if (pre_shift)
5575 : {
5576 248 : t1 = vect_recog_temp_ssa_var (itype, NULL);
5577 248 : def_stmt
5578 248 : = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
5579 248 : build_int_cst (NULL, pre_shift));
5580 248 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5581 : }
5582 : else
5583 : t1 = oprnd0;
5584 :
5585 8085 : t2 = vect_recog_temp_ssa_var (itype, NULL);
5586 8085 : def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
5587 8085 : build_int_cst (itype, ml));
5588 :
5589 8085 : if (post_shift)
5590 : {
5591 8075 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5592 :
5593 8075 : q = vect_recog_temp_ssa_var (itype, NULL);
5594 8075 : def_stmt
5595 8075 : = gimple_build_assign (q, RSHIFT_EXPR, t2,
5596 8075 : build_int_cst (itype, post_shift));
5597 : }
5598 : else
5599 : q = t2;
5600 :
5601 : pattern_stmt = def_stmt;
5602 : }
5603 : }
5604 : else
5605 : {
5606 5367 : unsigned HOST_WIDE_INT ml;
5607 5367 : int post_shift;
5608 5367 : HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
5609 5367 : unsigned HOST_WIDE_INT abs_d;
5610 5367 : bool add = false;
5611 5367 : tree t1, t2, t3, t4;
5612 :
5613 : /* Give up for -1. */
5614 5367 : if (d == -1)
5615 0 : return NULL;
5616 :
5617 : /* Since d might be INT_MIN, we have to cast to
5618 : unsigned HOST_WIDE_INT before negating to avoid
5619 : undefined signed overflow. */
5620 5367 : abs_d = (d >= 0
5621 5367 : ? (unsigned HOST_WIDE_INT) d
5622 : : - (unsigned HOST_WIDE_INT) d);
5623 :
5624 : /* n rem d = n rem -d */
5625 5367 : if (rhs_code == TRUNC_MOD_EXPR && d < 0)
5626 : {
5627 0 : d = abs_d;
5628 0 : oprnd1 = build_int_cst (itype, abs_d);
5629 : }
5630 5367 : if (HOST_BITS_PER_WIDE_INT >= prec
5631 5367 : && abs_d == HOST_WIDE_INT_1U << (prec - 1))
5632 : /* This case is not handled correctly below. */
5633 : return NULL;
5634 :
5635 5367 : choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift);
5636 5367 : if (ml >= HOST_WIDE_INT_1U << (prec - 1))
5637 : {
5638 1586 : add = true;
5639 1586 : ml |= HOST_WIDE_INT_M1U << (prec - 1);
5640 : }
5641 5367 : if (post_shift >= prec)
5642 : return NULL;
5643 :
5644 : /* t1 = oprnd0 h* ml; */
5645 5367 : t1 = vect_recog_temp_ssa_var (itype, NULL);
5646 5367 : def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
5647 5367 : build_int_cst (itype, ml));
5648 :
5649 5367 : if (add)
5650 : {
5651 : /* t2 = t1 + oprnd0; */
5652 1586 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5653 1586 : t2 = vect_recog_temp_ssa_var (itype, NULL);
5654 1586 : def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
5655 : }
5656 : else
5657 : t2 = t1;
5658 :
5659 5367 : if (post_shift)
5660 : {
5661 : /* t3 = t2 >> post_shift; */
5662 4559 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5663 4559 : t3 = vect_recog_temp_ssa_var (itype, NULL);
5664 4559 : def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
5665 4559 : build_int_cst (itype, post_shift));
5666 : }
5667 : else
5668 : t3 = t2;
5669 :
5670 5367 : int msb = 1;
5671 5367 : int_range_max r;
5672 10734 : get_range_query (cfun)->range_of_expr (r, oprnd0);
5673 5367 : if (!r.varying_p () && !r.undefined_p ())
5674 : {
5675 2966 : if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
5676 : msb = 0;
5677 738 : else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
5678 : msb = -1;
5679 : }
5680 :
5681 2228 : if (msb == 0 && d >= 0)
5682 : {
5683 : /* q = t3; */
5684 : q = t3;
5685 : pattern_stmt = def_stmt;
5686 : }
5687 : else
5688 : {
5689 : /* t4 = oprnd0 >> (prec - 1);
5690 : or if we know from VRP that oprnd0 >= 0
5691 : t4 = 0;
5692 : or if we know from VRP that oprnd0 < 0
5693 : t4 = -1; */
5694 3199 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5695 3199 : t4 = vect_recog_temp_ssa_var (itype, NULL);
5696 3199 : if (msb != 1)
5697 68 : def_stmt = gimple_build_assign (t4, INTEGER_CST,
5698 68 : build_int_cst (itype, msb));
5699 : else
5700 3131 : def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
5701 3131 : build_int_cst (itype, prec - 1));
5702 3199 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5703 :
5704 : /* q = t3 - t4; or q = t4 - t3; */
5705 3199 : q = vect_recog_temp_ssa_var (itype, NULL);
5706 6218 : pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
5707 : d < 0 ? t3 : t4);
5708 : }
5709 5367 : }
5710 :
5711 14086 : if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
5712 : {
5713 6752 : tree r, t1;
5714 :
5715 : /* We divided. Now finish by:
5716 : t1 = q * oprnd1;
5717 : r = oprnd0 - t1; */
5718 6752 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5719 :
5720 6752 : t1 = vect_recog_temp_ssa_var (itype, NULL);
5721 6752 : def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5722 6752 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5723 :
5724 6752 : r = vect_recog_temp_ssa_var (itype, NULL);
5725 6752 : pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5726 :
5727 6752 : if (is_flclrd_moddiv_p)
5728 : {
5729 146 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5730 146 : pattern_stmt
5731 146 : = add_code_for_floorceilround_divmod (vectype, vinfo, stmt_vinfo,
5732 : rhs_code, q, r, oprnd0, oprnd1,
5733 : itype);
5734 146 : if (pattern_stmt == NULL)
5735 : return NULL;
5736 : }
5737 : }
5738 :
5739 : /* Pattern detected. */
5740 14086 : vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5741 :
5742 14086 : *type_out = vectype;
5743 14086 : return pattern_stmt;
5744 : }
5745 :
5746 : /* Detects pattern with a modulo operation (S1) where both arguments
5747 : are variables of integral type.
5748 : The statement is replaced by division, multiplication, and subtraction.
5749 : The last statement (S4) is returned.
5750 :
5751 : Example:
5752 : S1 c_t = a_t % b_t;
5753 :
5754 : is replaced by
5755 : S2 x_t = a_t / b_t;
5756 : S3 y_t = x_t * b_t;
5757 : S4 z_t = a_t - y_t; */
5758 :
5759 : static gimple *
5760 31048776 : vect_recog_mod_var_pattern (vec_info *vinfo,
5761 : stmt_vec_info stmt_vinfo, tree *type_out)
5762 : {
5763 31048776 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5764 31048776 : tree oprnd0, oprnd1, vectype, itype;
5765 31048776 : gimple *pattern_stmt, *def_stmt;
5766 31048776 : enum tree_code rhs_code;
5767 :
5768 31048776 : if (!is_gimple_assign (last_stmt) || vect_is_reduction (stmt_vinfo))
5769 : return NULL;
5770 :
5771 21216645 : rhs_code = gimple_assign_rhs_code (last_stmt);
5772 21216645 : if (rhs_code != TRUNC_MOD_EXPR)
5773 : return NULL;
5774 :
5775 68865 : oprnd0 = gimple_assign_rhs1 (last_stmt);
5776 68865 : oprnd1 = gimple_assign_rhs2 (last_stmt);
5777 68865 : itype = TREE_TYPE (oprnd0);
5778 68865 : if (TREE_CODE (oprnd0) != SSA_NAME
5779 60542 : || TREE_CODE (oprnd1) != SSA_NAME
5780 44050 : || TREE_CODE (itype) != INTEGER_TYPE)
5781 : return NULL;
5782 :
5783 43923 : vectype = get_vectype_for_scalar_type (vinfo, itype);
5784 :
5785 43923 : if (!vectype
5786 35845 : || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
5787 35845 : || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
5788 0 : || !target_has_vecop_for_code (MULT_EXPR, vectype)
5789 43923 : || !target_has_vecop_for_code (MINUS_EXPR, vectype))
5790 43923 : return NULL;
5791 :
5792 0 : tree q, tmp, r;
5793 0 : q = vect_recog_temp_ssa_var (itype, NULL);
5794 0 : def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
5795 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
5796 :
5797 0 : tmp = vect_recog_temp_ssa_var (itype, NULL);
5798 0 : def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
5799 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
5800 :
5801 0 : r = vect_recog_temp_ssa_var (itype, NULL);
5802 0 : pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
5803 :
5804 : /* Pattern detected. */
5805 0 : *type_out = vectype;
5806 0 : vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
5807 :
5808 0 : return pattern_stmt;
5809 : }
5810 :
5811 :
5812 : /* Return the proper type for converting bool VAR into
5813 : an integer value or NULL_TREE if no such type exists.
5814 : The type is chosen so that the converted value has the
5815 : same number of elements as VAR's vector type. */
5816 :
5817 : static tree
5818 4426723 : integer_type_for_mask (tree var, vec_info *vinfo, vect_def_type *dt = nullptr)
5819 : {
5820 4426723 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5821 : return NULL_TREE;
5822 :
5823 2013877 : stmt_vec_info def_stmt_info = vinfo->lookup_def (var);
5824 2013877 : if (dt)
5825 : {
5826 342400 : if (!def_stmt_info)
5827 3401 : *dt = vect_external_def;
5828 : else
5829 338999 : *dt = STMT_VINFO_DEF_TYPE (def_stmt_info);
5830 : }
5831 342400 : if (!def_stmt_info
5832 1924302 : || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def
5833 3595779 : || !vect_use_mask_type_p (def_stmt_info))
5834 777075 : return NULL_TREE;
5835 :
5836 1236802 : return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5837 : }
5838 :
5839 : /* Function vect_recog_gcond_pattern
5840 :
5841 : Try to find pattern like following:
5842 :
5843 : if (a op b)
5844 :
5845 : where operator 'op' is not != and convert it to an adjusted boolean pattern
5846 :
5847 : mask = a op b
5848 : if (mask != 0)
5849 :
5850 : and set the mask type on MASK.
5851 :
5852 : Input:
5853 :
5854 : * STMT_VINFO: The stmt at the end from which the pattern
5855 : search begins, i.e. cast of a bool to
5856 : an integer type.
5857 :
5858 : Output:
5859 :
5860 : * TYPE_OUT: The type of the output of this pattern.
5861 :
5862 : * Return value: A new stmt that will be used to replace the pattern. */
5863 :
5864 : static gimple *
5865 31119803 : vect_recog_gcond_pattern (vec_info *vinfo,
5866 : stmt_vec_info stmt_vinfo, tree *type_out)
5867 : {
5868 : /* Currently we only support this for loop vectorization and when multiple
5869 : exits. */
5870 31119803 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5871 4496499 : if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
5872 : return NULL;
5873 :
5874 1643304 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5875 1643304 : gcond* cond = NULL;
5876 31139203 : if (!(cond = dyn_cast <gcond *> (last_stmt)))
5877 : return NULL;
5878 :
5879 382352 : auto lhs = gimple_cond_lhs (cond);
5880 382352 : auto rhs = gimple_cond_rhs (cond);
5881 382352 : auto code = gimple_cond_code (cond);
5882 :
5883 382352 : tree scalar_type = TREE_TYPE (lhs);
5884 382352 : if (VECTOR_TYPE_P (scalar_type))
5885 : return NULL;
5886 :
5887 : /* If the input is a boolean then try to figure out the precision that the
5888 : vector type should use. We cannot use the scalar precision as this would
5889 : later mismatch. This is similar to what recog_bool does. */
5890 382352 : if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
5891 : {
5892 10249 : if (tree stype = integer_type_for_mask (lhs, vinfo))
5893 382352 : scalar_type = stype;
5894 : }
5895 :
5896 382352 : tree vectype = get_mask_type_for_scalar_type (vinfo, scalar_type);
5897 382352 : if (vectype == NULL_TREE)
5898 : return NULL;
5899 :
5900 362952 : tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5901 362952 : gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
5902 362952 : append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
5903 :
5904 362952 : gimple *pattern_stmt
5905 362952 : = gimple_build_cond (NE_EXPR, new_lhs,
5906 362952 : build_int_cst (TREE_TYPE (new_lhs), 0),
5907 : NULL_TREE, NULL_TREE);
5908 362952 : *type_out = vectype;
5909 362952 : vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
5910 362952 : return pattern_stmt;
5911 : }
5912 :
5913 :
5914 : /* A helper for vect_recog_mask_conversion_pattern. Build
5915 : conversion of MASK to a type suitable for masking VECTYPE.
5916 : Built statement gets required vectype and is appended to
5917 : a pattern sequence of STMT_VINFO.
5918 :
5919 : Return converted mask. */
5920 :
5921 : static tree
5922 126885 : build_mask_conversion (vec_info *vinfo,
5923 : tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5924 : {
5925 126885 : gimple *stmt;
5926 126885 : tree masktype, tmp;
5927 :
5928 126885 : masktype = truth_type_for (vectype);
5929 126885 : tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5930 126885 : stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5931 126885 : append_pattern_def_seq (vinfo, stmt_vinfo,
5932 126885 : stmt, masktype, TREE_TYPE (vectype));
5933 :
5934 126885 : return tmp;
5935 : }
5936 :
5937 :
5938 : /* Return MASK if MASK is suitable for masking an operation on vectors
5939 : of type VECTYPE, otherwise convert it into such a form and return
5940 : the result. Associate any conversion statements with STMT_INFO's
5941 : pattern. */
5942 :
5943 : static tree
5944 73625 : vect_convert_mask_for_vectype (tree mask, tree vectype,
5945 : stmt_vec_info stmt_info, vec_info *vinfo)
5946 : {
5947 73625 : tree mask_type = integer_type_for_mask (mask, vinfo);
5948 73625 : if (mask_type)
5949 : {
5950 73625 : tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
5951 73625 : if (mask_vectype
5952 147250 : && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
5953 87965 : TYPE_VECTOR_SUBPARTS (mask_vectype)))
5954 59285 : mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
5955 : }
5956 73625 : return mask;
5957 : }
5958 :
5959 :
5960 : /* Function vect_recog_bool_pattern
5961 :
5962 : Try to find pattern like following:
5963 :
5964 : bool a_b, b_b, c_b, d_b, e_b;
5965 : TYPE f_T;
5966 : loop:
5967 : S1 a_b = x1 CMP1 y1;
5968 : S2 b_b = x2 CMP2 y2;
5969 : S3 c_b = a_b & b_b;
5970 : S4 d_b = x3 CMP3 y3;
5971 : S5 e_b = c_b | d_b;
5972 : S6 f_T = (TYPE) e_b;
5973 :
5974 : where type 'TYPE' is an integral type. Or a similar pattern
5975 : ending in
5976 :
5977 : S6 f_Y = e_b ? r_Y : s_Y;
5978 :
5979 : as results from if-conversion of a complex condition.
5980 :
5981 : Input:
5982 :
5983 : * STMT_VINFO: The stmt at the end from which the pattern
5984 : search begins, i.e. cast of a bool to
5985 : an integer type.
5986 :
5987 : Output:
5988 :
5989 : * TYPE_OUT: The type of the output of this pattern.
5990 :
5991 : * Return value: A new stmt that will be used to replace the pattern.
5992 :
5993 : Assuming size of TYPE is the same as size of all comparisons
5994 : (otherwise some casts would be added where needed), the above
5995 : sequence we create related pattern stmts:
5996 : S1' a_T = x1 CMP1 y1 ? 1 : 0;
5997 : S3' c_T = x2 CMP2 y2 ? a_T : 0;
5998 : S4' d_T = x3 CMP3 y3 ? 1 : 0;
5999 : S5' e_T = c_T | d_T;
6000 : S6' f_T = e_T;
6001 :
6002 : Instead of the above S3' we could emit:
6003 : S2' b_T = x2 CMP2 y2 ? 1 : 0;
6004 : S3' c_T = a_T | b_T;
6005 : but the above is more efficient. */
6006 :
6007 : static gimple *
6008 31119803 : vect_recog_bool_pattern (vec_info *vinfo,
6009 : stmt_vec_info stmt_vinfo, tree *type_out)
6010 : {
6011 31119803 : gimple *last_stmt = stmt_vinfo->stmt;
6012 31119803 : enum tree_code rhs_code;
6013 31119803 : tree var, lhs, rhs, vectype;
6014 31119803 : gimple *pattern_stmt;
6015 :
6016 31119803 : if (!is_gimple_assign (last_stmt))
6017 : return NULL;
6018 :
6019 21759141 : var = gimple_assign_rhs1 (last_stmt);
6020 21759141 : lhs = gimple_assign_lhs (last_stmt);
6021 21759141 : rhs_code = gimple_assign_rhs_code (last_stmt);
6022 :
6023 21759141 : if (rhs_code == VIEW_CONVERT_EXPR)
6024 192005 : var = TREE_OPERAND (var, 0);
6025 :
6026 21759141 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
6027 : return NULL;
6028 :
6029 733889 : hash_set<gimple *> bool_stmts;
6030 :
6031 733889 : if (CONVERT_EXPR_CODE_P (rhs_code)
6032 : || rhs_code == VIEW_CONVERT_EXPR
6033 : || rhs_code == FLOAT_EXPR)
6034 : {
6035 174885 : if (! (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
6036 2111 : || SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
6037 173308 : || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6038 : return NULL;
6039 82075 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6040 :
6041 82075 : tree type = integer_type_for_mask (var, vinfo);
6042 82075 : tree cst0, cst1, tmp;
6043 :
6044 82075 : if (!type)
6045 : return NULL;
6046 :
6047 : /* We may directly use cond with narrowed type to avoid multiple cond
6048 : exprs with following result packing and perform single cond with
6049 : packed mask instead. In case of widening we better make cond first
6050 : and then extract results. */
6051 42319 : if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
6052 29542 : type = TREE_TYPE (lhs);
6053 :
6054 42319 : cst0 = build_int_cst (type, 0);
6055 42319 : cst1 = build_int_cst (type, 1);
6056 42319 : tmp = vect_recog_temp_ssa_var (type, NULL);
6057 42319 : pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
6058 :
6059 42319 : if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
6060 : {
6061 12777 : tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
6062 12777 : append_pattern_def_seq (vinfo, stmt_vinfo,
6063 : pattern_stmt, new_vectype);
6064 :
6065 12777 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6066 12777 : pattern_stmt
6067 25240 : = gimple_build_assign (lhs, (rhs_code == FLOAT_EXPR
6068 : ? FLOAT_EXPR : CONVERT_EXPR), tmp);
6069 : }
6070 :
6071 42319 : *type_out = vectype;
6072 42319 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6073 :
6074 42319 : return pattern_stmt;
6075 : }
6076 : else if (rhs_code == COND_EXPR
6077 213186 : && TREE_CODE (var) == SSA_NAME)
6078 : {
6079 213186 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6080 213186 : if (vectype == NULL_TREE)
6081 : return NULL;
6082 :
6083 : /* Build a scalar type for the boolean result that when
6084 : vectorized matches the vector type of the result in
6085 : size and number of elements. */
6086 198499 : unsigned prec
6087 198499 : = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
6088 : TYPE_VECTOR_SUBPARTS (vectype));
6089 :
6090 198499 : tree type
6091 396998 : = build_nonstandard_integer_type (prec,
6092 198499 : TYPE_UNSIGNED (TREE_TYPE (var)));
6093 198499 : if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
6094 : return NULL;
6095 :
6096 198499 : enum vect_def_type dt;
6097 198499 : if (integer_type_for_mask (var, vinfo))
6098 : return NULL;
6099 36562 : else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
6100 36562 : && vect_is_simple_use (var, vinfo, &dt)
6101 36562 : && (dt == vect_external_def
6102 36555 : || dt == vect_constant_def))
6103 : {
6104 : /* If the condition is already a boolean then manually convert it to a
6105 : mask of the given integer type but don't set a vectype. */
6106 1383 : tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
6107 1383 : pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
6108 : build_all_ones_cst (type),
6109 : build_zero_cst (type));
6110 1383 : append_inv_pattern_def_seq (vinfo, pattern_stmt);
6111 1383 : var = lhs_ivar;
6112 : }
6113 :
6114 36562 : tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
6115 36562 : pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
6116 36562 : build_zero_cst (TREE_TYPE (var)));
6117 :
6118 36562 : tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
6119 36562 : if (!new_vectype)
6120 : return NULL;
6121 :
6122 36562 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
6123 36562 : TREE_TYPE (var));
6124 :
6125 36562 : lhs_var = vect_convert_mask_for_vectype (lhs_var, vectype, stmt_vinfo,
6126 : vinfo);
6127 :
6128 36562 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6129 36562 : pattern_stmt
6130 36562 : = gimple_build_assign (lhs, COND_EXPR, lhs_var,
6131 : gimple_assign_rhs2 (last_stmt),
6132 : gimple_assign_rhs3 (last_stmt));
6133 36562 : *type_out = vectype;
6134 36562 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6135 :
6136 36562 : return pattern_stmt;
6137 : }
6138 432891 : else if (rhs_code == BIT_NOT_EXPR
6139 432891 : && !vect_use_mask_type_p (stmt_vinfo))
6140 : {
6141 : /* When we have a bool data inversion rewrite that to an XOR to
6142 : cope with the fact that we'll use a wider vector element type. */
6143 8724 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6144 8724 : pattern_stmt
6145 8724 : = gimple_build_assign (lhs, BIT_XOR_EXPR, var,
6146 8724 : build_all_ones_cst (TREE_TYPE (var)));
6147 8724 : *type_out = NULL_TREE;
6148 8724 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6149 :
6150 8724 : return pattern_stmt;
6151 : }
6152 424167 : else if ((rhs_code == BIT_XOR_EXPR
6153 : || rhs_code == BIT_AND_EXPR
6154 424167 : || rhs_code == BIT_IOR_EXPR)
6155 331236 : && TREE_CODE (var) == SSA_NAME)
6156 : {
6157 331236 : tree rhs2 = gimple_assign_rhs2 (last_stmt);
6158 331236 : if (TREE_CODE (rhs2) != SSA_NAME)
6159 : return NULL;
6160 331236 : tree lhs_type = integer_type_for_mask (lhs, vinfo);
6161 331236 : if (!lhs_type)
6162 : return NULL;
6163 171200 : vectype = get_mask_type_for_scalar_type (vinfo, lhs_type);
6164 171200 : if (!vectype)
6165 : return NULL;
6166 171200 : vect_def_type dt1, dt2;
6167 171200 : tree rhs1_type = integer_type_for_mask (var, vinfo, &dt1);
6168 171200 : tree rhs2_type = integer_type_for_mask (rhs2, vinfo, &dt2);
6169 171200 : if ((rhs1_type || dt1 == vect_external_def)
6170 157759 : && (rhs2_type || dt2 == vect_external_def))
6171 : return NULL;
6172 : /* When one input is a mask and the other is not create a pattern
6173 : stmt sequence that creates a mask for the non-mask input and
6174 : convert it to one suitable for the output mask used. */
6175 32630 : if (rhs1_type && !rhs2_type)
6176 : {
6177 19189 : tree rhs1_vectype = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6178 19189 : if (!rhs1_vectype)
6179 : return NULL;
6180 19189 : tree rhs2_vectype = get_vectype_for_scalar_type (vinfo,
6181 19189 : TREE_TYPE (rhs2));
6182 19189 : if (!rhs2_vectype)
6183 : return NULL;
6184 19189 : tree new_vectype = truth_type_for (rhs2_vectype);
6185 19189 : tree tem = vect_recog_temp_ssa_var (TREE_TYPE (new_vectype), NULL);
6186 19189 : pattern_stmt = gimple_build_assign (tem, NE_EXPR, rhs2,
6187 : build_zero_cst
6188 19189 : (TREE_TYPE (rhs2)));
6189 19189 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
6190 19189 : new_vectype, TREE_TYPE (new_vectype));
6191 19189 : rhs2 = vect_convert_mask_for_vectype (tem, rhs1_vectype,
6192 : stmt_vinfo, vinfo);
6193 : }
6194 13441 : else if (!rhs1_type && rhs2_type)
6195 : {
6196 13441 : tree rhs2_vectype = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6197 13441 : if (!rhs2_vectype)
6198 : return NULL;
6199 13441 : tree rhs1_vectype = get_vectype_for_scalar_type (vinfo,
6200 13441 : TREE_TYPE (var));
6201 13441 : if (!rhs1_vectype)
6202 : return NULL;
6203 13441 : tree new_vectype = truth_type_for (rhs1_vectype);
6204 13441 : tree tem = vect_recog_temp_ssa_var (TREE_TYPE (new_vectype), NULL);
6205 13441 : pattern_stmt = gimple_build_assign (tem, NE_EXPR, var,
6206 : build_zero_cst
6207 13441 : (TREE_TYPE (var)));
6208 13441 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
6209 13441 : new_vectype, TREE_TYPE (new_vectype));
6210 13441 : var = vect_convert_mask_for_vectype (tem, rhs2_vectype,
6211 : stmt_vinfo, vinfo);
6212 : }
6213 32630 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6214 32630 : pattern_stmt = gimple_build_assign (lhs, rhs_code, var, rhs2);
6215 32630 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6216 32630 : *type_out = vectype;
6217 32630 : return pattern_stmt;
6218 : }
6219 92931 : else if (rhs_code == SSA_NAME
6220 26193 : && STMT_VINFO_DATA_REF (stmt_vinfo))
6221 : {
6222 7807 : stmt_vec_info pattern_stmt_info;
6223 7807 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6224 7807 : if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
6225 0 : return NULL;
6226 :
6227 7807 : tree type = integer_type_for_mask (var, vinfo);
6228 7807 : if (!type)
6229 : return NULL;
6230 :
6231 4433 : var = vect_convert_mask_for_vectype (var, vectype, stmt_vinfo, vinfo);
6232 :
6233 4433 : tree cst0 = build_int_cst (TREE_TYPE (vectype), 0);
6234 4433 : tree cst1 = build_int_cst (TREE_TYPE (vectype), 1);
6235 4433 : rhs = vect_recog_temp_ssa_var (TREE_TYPE (vectype), NULL);
6236 4433 : pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
6237 4433 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype);
6238 :
6239 4433 : lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
6240 4433 : pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
6241 4433 : pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
6242 4433 : vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6243 4433 : *type_out = vectype;
6244 4433 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6245 :
6246 4433 : return pattern_stmt;
6247 : }
6248 : else
6249 : return NULL;
6250 733889 : }
6251 :
6252 :
6253 : /* Function vect_recog_mask_conversion_pattern
6254 :
6255 : Try to find statements which require boolean type
6256 : conversion. Additional conversion statements are
6257 : added to handle such cases. For example:
6258 :
6259 : bool m_1, m_2, m_3;
6260 : int i_4, i_5;
6261 : double d_6, d_7;
6262 : char c_1, c_2, c_3;
6263 :
6264 : S1 m_1 = i_4 > i_5;
6265 : S2 m_2 = d_6 < d_7;
6266 : S3 m_3 = m_1 & m_2;
6267 : S4 c_1 = m_3 ? c_2 : c_3;
6268 :
6269 : Will be transformed into:
6270 :
6271 : S1 m_1 = i_4 > i_5;
6272 : S2 m_2 = d_6 < d_7;
6273 : S3'' m_2' = (_Bool[bitsize=32])m_2
6274 : S3' m_3' = m_1 & m_2';
6275 : S4'' m_3'' = (_Bool[bitsize=8])m_3'
6276 : S4' c_1' = m_3'' ? c_2 : c_3; */
6277 :
6278 : static gimple *
6279 31140951 : vect_recog_mask_conversion_pattern (vec_info *vinfo,
6280 : stmt_vec_info stmt_vinfo, tree *type_out)
6281 : {
6282 31140951 : gimple *last_stmt = stmt_vinfo->stmt;
6283 31140951 : enum tree_code rhs_code;
6284 31140951 : tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
6285 31140951 : tree vectype1, vectype2;
6286 31140951 : stmt_vec_info pattern_stmt_info;
6287 :
6288 : /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
6289 : conversion. */
6290 31140951 : if (is_gimple_call (last_stmt)
6291 31140951 : && gimple_call_internal_p (last_stmt))
6292 : {
6293 115050 : gcall *pattern_stmt;
6294 :
6295 115050 : internal_fn ifn = gimple_call_internal_fn (last_stmt);
6296 115050 : int mask_argno = internal_fn_mask_index (ifn);
6297 115050 : if (mask_argno < 0)
6298 : return NULL;
6299 :
6300 15443 : bool store_p = internal_store_fn_p (ifn);
6301 15443 : bool load_p = internal_store_fn_p (ifn);
6302 15443 : if (store_p)
6303 : {
6304 2660 : int rhs_index = internal_fn_stored_value_index (ifn);
6305 2660 : tree rhs = gimple_call_arg (last_stmt, rhs_index);
6306 2660 : vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
6307 : }
6308 : else
6309 : {
6310 12783 : lhs = gimple_call_lhs (last_stmt);
6311 12783 : if (!lhs)
6312 : return NULL;
6313 12783 : vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6314 : }
6315 :
6316 15443 : if (!vectype1)
6317 : return NULL;
6318 :
6319 15163 : tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
6320 15163 : tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
6321 15163 : if (mask_arg_type)
6322 : {
6323 13421 : vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
6324 :
6325 13421 : if (!vectype2
6326 13421 : || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6327 : TYPE_VECTOR_SUBPARTS (vectype2)))
6328 8564 : return NULL;
6329 : }
6330 1742 : else if (store_p || load_p)
6331 : return NULL;
6332 :
6333 6282 : tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
6334 :
6335 6282 : auto_vec<tree, 8> args;
6336 6282 : unsigned int nargs = gimple_call_num_args (last_stmt);
6337 6282 : args.safe_grow (nargs, true);
6338 31410 : for (unsigned int i = 0; i < nargs; ++i)
6339 25128 : args[i] = ((int) i == mask_argno
6340 25128 : ? tmp
6341 18846 : : gimple_call_arg (last_stmt, i));
6342 6282 : pattern_stmt = gimple_build_call_internal_vec (ifn, args);
6343 :
6344 6282 : if (!store_p)
6345 : {
6346 6010 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6347 6010 : gimple_call_set_lhs (pattern_stmt, lhs);
6348 : }
6349 :
6350 6010 : if (load_p || store_p)
6351 272 : gimple_call_set_nothrow (pattern_stmt, true);
6352 :
6353 6282 : pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
6354 6282 : if (STMT_VINFO_DATA_REF (stmt_vinfo))
6355 1977 : vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6356 :
6357 6282 : *type_out = vectype1;
6358 6282 : vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6359 :
6360 6282 : return pattern_stmt;
6361 6282 : }
6362 :
6363 31025901 : if (!is_gimple_assign (last_stmt))
6364 : return NULL;
6365 :
6366 21780289 : gimple *pattern_stmt;
6367 21780289 : lhs = gimple_assign_lhs (last_stmt);
6368 21780289 : rhs1 = gimple_assign_rhs1 (last_stmt);
6369 21780289 : rhs_code = gimple_assign_rhs_code (last_stmt);
6370 :
6371 : /* Check for cond expression requiring mask conversion. */
6372 21780289 : if (rhs_code == COND_EXPR)
6373 : {
6374 193983 : vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6375 :
6376 193983 : gcc_assert (! COMPARISON_CLASS_P (rhs1));
6377 193983 : if (TREE_CODE (rhs1) == SSA_NAME)
6378 : {
6379 193983 : rhs1_type = integer_type_for_mask (rhs1, vinfo);
6380 193983 : if (!rhs1_type)
6381 : return NULL;
6382 : }
6383 : else
6384 : return NULL;
6385 :
6386 181450 : vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6387 :
6388 181450 : if (!vectype1 || !vectype2)
6389 : return NULL;
6390 :
6391 : /* Continue if a conversion is needed. Also continue if we have
6392 : a comparison whose vector type would normally be different from
6393 : VECTYPE2 when considered in isolation. In that case we'll
6394 : replace the comparison with an SSA name (so that we can record
6395 : its vector type) and behave as though the comparison was an SSA
6396 : name from the outset. */
6397 179276 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6398 : TYPE_VECTOR_SUBPARTS (vectype2)))
6399 : return NULL;
6400 :
6401 45378 : if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
6402 90756 : TYPE_VECTOR_SUBPARTS (vectype2)))
6403 45378 : tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6404 : else
6405 : tmp = rhs1;
6406 :
6407 45378 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6408 45378 : pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6409 : gimple_assign_rhs2 (last_stmt),
6410 : gimple_assign_rhs3 (last_stmt));
6411 :
6412 45378 : *type_out = vectype1;
6413 45378 : vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6414 :
6415 45378 : return pattern_stmt;
6416 : }
6417 :
6418 : /* Now check for binary boolean operations requiring conversion for
6419 : one of operands. */
6420 21586306 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6421 : return NULL;
6422 :
6423 1764027 : if (rhs_code != BIT_IOR_EXPR
6424 : && rhs_code != BIT_XOR_EXPR
6425 1764027 : && rhs_code != BIT_AND_EXPR
6426 1465421 : && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6427 : return NULL;
6428 :
6429 1585843 : rhs2 = gimple_assign_rhs2 (last_stmt);
6430 :
6431 1585843 : rhs1_type = integer_type_for_mask (rhs1, vinfo);
6432 1585843 : rhs2_type = integer_type_for_mask (rhs2, vinfo);
6433 :
6434 1585843 : if (!rhs1_type || !rhs2_type
6435 1585843 : || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6436 : return NULL;
6437 :
6438 15940 : if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6439 : {
6440 10381 : vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6441 10381 : if (!vectype1)
6442 : return NULL;
6443 10381 : rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
6444 : }
6445 : else
6446 : {
6447 5559 : vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6448 5559 : if (!vectype1)
6449 : return NULL;
6450 5559 : rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6451 : }
6452 :
6453 15940 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6454 15940 : pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6455 :
6456 15940 : *type_out = vectype1;
6457 15940 : vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6458 :
6459 15940 : return pattern_stmt;
6460 : }
6461 :
6462 : /* STMT_INFO is a load or store. If the load or store is conditional, return
6463 : the boolean condition under which it occurs, otherwise return null. */
6464 :
6465 : static tree
6466 100055 : vect_get_load_store_mask (stmt_vec_info stmt_info)
6467 : {
6468 100055 : if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6469 : {
6470 98582 : gcc_assert (gimple_assign_single_p (def_assign));
6471 : return NULL_TREE;
6472 : }
6473 :
6474 1473 : if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6475 : {
6476 1473 : internal_fn ifn = gimple_call_internal_fn (def_call);
6477 1473 : int mask_index = internal_fn_mask_index (ifn);
6478 1473 : return gimple_call_arg (def_call, mask_index);
6479 : }
6480 :
6481 0 : gcc_unreachable ();
6482 : }
6483 :
6484 : /* Return the equivalent of:
6485 :
6486 : fold_convert (TYPE, VALUE)
6487 :
6488 : with the expectation that the operation will be vectorized.
6489 : If new statements are needed, add them as pattern statements
6490 : to STMT_INFO. */
6491 :
6492 : static tree
6493 0 : vect_add_conversion_to_pattern (vec_info *vinfo,
6494 : tree type, tree value, stmt_vec_info stmt_info)
6495 : {
6496 0 : if (useless_type_conversion_p (type, TREE_TYPE (value)))
6497 : return value;
6498 :
6499 0 : tree new_value = vect_recog_temp_ssa_var (type, NULL);
6500 0 : gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6501 0 : append_pattern_def_seq (vinfo, stmt_info, conversion,
6502 : get_vectype_for_scalar_type (vinfo, type));
6503 0 : return new_value;
6504 : }
6505 :
6506 : /* Try to convert STMT_INFO into a call to a gather load or scatter store
6507 : internal function. Return the final statement on success and set
6508 : *TYPE_OUT to the vector type being loaded or stored.
6509 :
6510 : This function only handles gathers and scatters that were recognized
6511 : as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6512 :
6513 : static gimple *
6514 31140951 : vect_recog_gather_scatter_pattern (vec_info *vinfo,
6515 : stmt_vec_info stmt_info, tree *type_out)
6516 : {
6517 : /* Currently we only support this for loop vectorization. */
6518 35650186 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6519 4509235 : if (!loop_vinfo)
6520 : return NULL;
6521 :
6522 : /* Make sure that we're looking at a gather load or scatter store. */
6523 4509235 : data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6524 4509235 : if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6525 : return NULL;
6526 :
6527 : /* Get the boolean that controls whether the load or store happens.
6528 : This is null if the operation is unconditional. */
6529 100055 : tree mask = vect_get_load_store_mask (stmt_info);
6530 :
6531 : /* DR analysis nailed down the vector type for the access. */
6532 100055 : tree gs_vectype = STMT_VINFO_VECTYPE (stmt_info);
6533 :
6534 : /* Make sure that the target supports an appropriate internal
6535 : function for the gather/scatter operation. */
6536 100055 : gather_scatter_info gs_info;
6537 100055 : if (!vect_check_gather_scatter (stmt_info, gs_vectype, loop_vinfo, &gs_info)
6538 100055 : || gs_info.ifn == IFN_LAST)
6539 : return NULL;
6540 :
6541 : /* Convert the mask to the right form. */
6542 0 : if (mask)
6543 0 : mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6544 : loop_vinfo);
6545 0 : else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6546 0 : || gs_info.ifn == IFN_MASK_GATHER_LOAD
6547 0 : || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6548 0 : || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6549 0 : mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6550 :
6551 : /* Get the invariant base and non-invariant offset, converting the
6552 : latter to the same width as the vector elements. */
6553 0 : tree base = gs_info.base;
6554 0 : tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6555 0 : tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6556 : gs_info.offset, stmt_info);
6557 :
6558 : /* Build the new pattern statement. */
6559 0 : tree scale = size_int (gs_info.scale);
6560 0 : gcall *pattern_stmt;
6561 :
6562 0 : if (DR_IS_READ (dr))
6563 : {
6564 0 : tree zero = build_zero_cst (gs_info.element_type);
6565 0 : if (mask != NULL)
6566 : {
6567 0 : int elsval = MASK_LOAD_ELSE_ZERO;
6568 :
6569 0 : tree vec_els
6570 0 : = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
6571 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base,
6572 : gs_info.alias_ptr,
6573 : offset, scale, zero, mask,
6574 : vec_els);
6575 : }
6576 : else
6577 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6578 : gs_info.alias_ptr,
6579 : offset, scale, zero);
6580 0 : tree lhs = gimple_get_lhs (stmt_info->stmt);
6581 0 : tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6582 0 : gimple_call_set_lhs (pattern_stmt, load_lhs);
6583 : }
6584 : else
6585 : {
6586 0 : tree rhs = vect_get_store_rhs (stmt_info);
6587 0 : if (mask != NULL)
6588 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6,
6589 : base, gs_info.alias_ptr,
6590 : offset, scale, rhs, mask);
6591 : else
6592 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6593 : base, gs_info.alias_ptr,
6594 : offset, scale, rhs);
6595 : }
6596 0 : gimple_call_set_nothrow (pattern_stmt, true);
6597 :
6598 : /* Copy across relevant vectorization info and associate DR with the
6599 : new pattern statement instead of the original statement. */
6600 0 : stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6601 0 : loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6602 :
6603 0 : *type_out = gs_vectype;
6604 0 : vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6605 :
6606 0 : return pattern_stmt;
6607 : }
6608 :
6609 : /* Helper method of vect_recog_cond_store_pattern, checks to see if COND_ARG
6610 : is points to a load statement that reads the same data as that of
6611 : STORE_VINFO. */
6612 :
6613 : static bool
6614 35634 : vect_cond_store_pattern_same_ref (vec_info *vinfo,
6615 : stmt_vec_info store_vinfo, tree cond_arg)
6616 : {
6617 35634 : stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
6618 35634 : if (!load_stmt_vinfo
6619 20650 : || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
6620 12429 : || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
6621 48063 : || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
6622 : STMT_VINFO_DATA_REF (load_stmt_vinfo)))
6623 26253 : return false;
6624 :
6625 : return true;
6626 : }
6627 :
6628 : /* Function vect_recog_cond_store_pattern
6629 :
6630 : Try to find the following pattern:
6631 :
6632 : x = *_3;
6633 : c = a CMP b;
6634 : y = c ? t_20 : x;
6635 : *_3 = y;
6636 :
6637 : where the store of _3 happens on a conditional select on a value loaded
6638 : from the same location. In such case we can elide the initial load if
6639 : MASK_STORE is supported and instead only conditionally write out the result.
6640 :
6641 : The pattern produces for the above:
6642 :
6643 : c = a CMP b;
6644 : .MASK_STORE (_3, c, t_20)
6645 :
6646 : Input:
6647 :
6648 : * STMT_VINFO: The stmt from which the pattern search begins. In the
6649 : example, when this function is called with _3 then the search begins.
6650 :
6651 : Output:
6652 :
6653 : * TYPE_OUT: The type of the output of this pattern.
6654 :
6655 : * Return value: A new stmt that will be used to replace the sequence. */
6656 :
6657 : static gimple *
6658 31140951 : vect_recog_cond_store_pattern (vec_info *vinfo,
6659 : stmt_vec_info stmt_vinfo, tree *type_out)
6660 : {
6661 31140951 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6662 4509235 : if (!loop_vinfo)
6663 : return NULL;
6664 :
6665 4509235 : gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
6666 :
6667 : /* Needs to be a gimple store where we have DR info for. */
6668 4509235 : if (!STMT_VINFO_DATA_REF (stmt_vinfo)
6669 1079655 : || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo))
6670 4891025 : || !gimple_store_p (store_stmt))
6671 4129905 : return NULL;
6672 :
6673 379330 : tree st_rhs = gimple_assign_rhs1 (store_stmt);
6674 :
6675 379330 : if (TREE_CODE (st_rhs) != SSA_NAME)
6676 : return NULL;
6677 :
6678 295540 : auto cond_vinfo = vinfo->lookup_def (st_rhs);
6679 :
6680 : /* If the condition isn't part of the loop then bool recog wouldn't have seen
6681 : it and so this transformation may not be valid. */
6682 295540 : if (!cond_vinfo)
6683 : return NULL;
6684 :
6685 278189 : cond_vinfo = vect_stmt_to_vectorize (cond_vinfo);
6686 31405084 : gassign *cond_stmt = dyn_cast<gassign *> (STMT_VINFO_STMT (cond_vinfo));
6687 344864 : if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
6688 : return NULL;
6689 :
6690 : /* Check if the else value matches the original loaded one. */
6691 18878 : bool invert = false;
6692 18878 : tree cmp_ls = gimple_arg (cond_stmt, 0);
6693 18878 : if (TREE_CODE (cmp_ls) != SSA_NAME)
6694 : return NULL;
6695 :
6696 18878 : tree cond_arg1 = gimple_arg (cond_stmt, 1);
6697 18878 : tree cond_arg2 = gimple_arg (cond_stmt, 2);
6698 :
6699 18878 : if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
6700 18878 : && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
6701 : cond_arg1)))
6702 : return NULL;
6703 :
6704 9381 : vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
6705 :
6706 9381 : tree scalar_type = TREE_TYPE (st_rhs);
6707 9381 : if (VECTOR_TYPE_P (scalar_type))
6708 : return NULL;
6709 :
6710 9381 : tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6711 9381 : if (vectype == NULL_TREE)
6712 : return NULL;
6713 :
6714 9381 : machine_mode mask_mode;
6715 9381 : machine_mode vecmode = TYPE_MODE (vectype);
6716 1850 : if (!VECTOR_MODE_P (vecmode)
6717 9381 : || targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
6718 0 : || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
6719 9381 : || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
6720 9381 : return NULL;
6721 :
6722 0 : tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
6723 0 : if (may_be_nonaddressable_p (base))
6724 : return NULL;
6725 :
6726 : /* We need to use the false parameter of the conditional select. */
6727 0 : tree cond_store_arg = invert ? cond_arg2 : cond_arg1;
6728 0 : tree cond_load_arg = invert ? cond_arg1 : cond_arg2;
6729 0 : gimple *load_stmt = SSA_NAME_DEF_STMT (cond_load_arg);
6730 :
6731 : /* This is a rough estimation to check that there aren't any aliasing stores
6732 : in between the load and store. It's a bit strict, but for now it's good
6733 : enough. */
6734 0 : if (gimple_vuse (load_stmt) != gimple_vuse (store_stmt))
6735 : return NULL;
6736 :
6737 : /* If we have to invert the condition, i.e. use the true argument rather than
6738 : the false argument, we have to negate the mask. */
6739 0 : if (invert)
6740 : {
6741 0 : tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
6742 :
6743 : /* Invert the mask using ^ 1. */
6744 0 : tree itype = TREE_TYPE (cmp_ls);
6745 0 : gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
6746 : build_int_cst (itype, 1));
6747 :
6748 0 : tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
6749 0 : append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
6750 0 : cmp_ls= var;
6751 : }
6752 :
6753 0 : if (TREE_CODE (base) != MEM_REF)
6754 0 : base = build_fold_addr_expr (base);
6755 :
6756 0 : tree ptr = build_int_cst (reference_alias_ptr_type (base),
6757 0 : get_object_alignment (base));
6758 :
6759 : /* Convert the mask to the right form. */
6760 0 : tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
6761 : vinfo);
6762 :
6763 0 : gcall *call
6764 0 : = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
6765 : cond_store_arg);
6766 0 : gimple_set_location (call, gimple_location (store_stmt));
6767 :
6768 : /* Copy across relevant vectorization info and associate DR with the
6769 : new pattern statement instead of the original statement. */
6770 0 : stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
6771 0 : loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6772 :
6773 0 : *type_out = vectype;
6774 0 : return call;
6775 : }
6776 :
6777 : /* Return true if TYPE is a non-boolean integer type. These are the types
6778 : that we want to consider for narrowing. */
6779 :
6780 : static bool
6781 62573606 : vect_narrowable_type_p (tree type)
6782 : {
6783 62573606 : return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6784 : }
6785 :
6786 : /* Return true if the operation given by CODE can be truncated to N bits
6787 : when only N bits of the output are needed. This is only true if bit N+1
6788 : of the inputs has no effect on the low N bits of the result. */
6789 :
6790 : static bool
6791 16052718 : vect_truncatable_operation_p (tree_code code)
6792 : {
6793 16052718 : switch (code)
6794 : {
6795 : case NEGATE_EXPR:
6796 : case PLUS_EXPR:
6797 : case MINUS_EXPR:
6798 : case MULT_EXPR:
6799 : case BIT_NOT_EXPR:
6800 : case BIT_AND_EXPR:
6801 : case BIT_IOR_EXPR:
6802 : case BIT_XOR_EXPR:
6803 : case COND_EXPR:
6804 : return true;
6805 :
6806 6112558 : default:
6807 6112558 : return false;
6808 : }
6809 : }
6810 :
6811 : /* Record that STMT_INFO could be changed from operating on TYPE to
6812 : operating on a type with the precision and sign given by PRECISION
6813 : and SIGN respectively. PRECISION is an arbitrary bit precision;
6814 : it might not be a whole number of bytes. */
6815 :
6816 : static void
6817 2549754 : vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6818 : unsigned int precision, signop sign)
6819 : {
6820 : /* Round the precision up to a whole number of bytes. */
6821 2549754 : precision = vect_element_precision (precision);
6822 2549754 : if (precision < TYPE_PRECISION (type)
6823 2549754 : && (!stmt_info->operation_precision
6824 40806 : || stmt_info->operation_precision > precision))
6825 : {
6826 1641872 : stmt_info->operation_precision = precision;
6827 1641872 : stmt_info->operation_sign = sign;
6828 : }
6829 2549754 : }
6830 :
6831 : /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6832 : non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6833 : is an arbitrary bit precision; it might not be a whole number of bytes. */
6834 :
6835 : static void
6836 11668321 : vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6837 : unsigned int min_input_precision)
6838 : {
6839 : /* This operation in isolation only requires the inputs to have
6840 : MIN_INPUT_PRECISION of precision, However, that doesn't mean
6841 : that MIN_INPUT_PRECISION is a natural precision for the chain
6842 : as a whole. E.g. consider something like:
6843 :
6844 : unsigned short *x, *y;
6845 : *y = ((*x & 0xf0) >> 4) | (*y << 4);
6846 :
6847 : The right shift can be done on unsigned chars, and only requires the
6848 : result of "*x & 0xf0" to be done on unsigned chars. But taking that
6849 : approach would mean turning a natural chain of single-vector unsigned
6850 : short operations into one that truncates "*x" and then extends
6851 : "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6852 : operation and one vector for each unsigned char operation.
6853 : This would be a significant pessimization.
6854 :
6855 : Instead only propagate the maximum of this precision and the precision
6856 : required by the users of the result. This means that we don't pessimize
6857 : the case above but continue to optimize things like:
6858 :
6859 : unsigned char *y;
6860 : unsigned short *x;
6861 : *y = ((*x & 0xf0) >> 4) | (*y << 4);
6862 :
6863 : Here we would truncate two vectors of *x to a single vector of
6864 : unsigned chars and use single-vector unsigned char operations for
6865 : everything else, rather than doing two unsigned short copies of
6866 : "(*x & 0xf0) >> 4" and then truncating the result. */
6867 11668321 : min_input_precision = MAX (min_input_precision,
6868 : stmt_info->min_output_precision);
6869 :
6870 11668321 : if (min_input_precision < TYPE_PRECISION (type)
6871 11668321 : && (!stmt_info->min_input_precision
6872 63092 : || stmt_info->min_input_precision > min_input_precision))
6873 572291 : stmt_info->min_input_precision = min_input_precision;
6874 11668321 : }
6875 :
6876 : /* Subroutine of vect_determine_min_output_precision. Return true if
6877 : we can calculate a reduced number of output bits for STMT_INFO,
6878 : whose result is LHS. */
6879 :
6880 : static bool
6881 14825216 : vect_determine_min_output_precision_1 (vec_info *vinfo,
6882 : stmt_vec_info stmt_info, tree lhs)
6883 : {
6884 : /* Take the maximum precision required by users of the result. */
6885 14825216 : unsigned int precision = 0;
6886 14825216 : imm_use_iterator iter;
6887 14825216 : use_operand_p use;
6888 30604746 : FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6889 : {
6890 15509179 : gimple *use_stmt = USE_STMT (use);
6891 15509179 : if (is_gimple_debug (use_stmt))
6892 676490 : continue;
6893 14832689 : stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6894 14832689 : if (!use_stmt_info || !use_stmt_info->min_input_precision)
6895 : return false;
6896 : /* The input precision recorded for COND_EXPRs applies only to the
6897 : "then" and "else" values. */
6898 278384 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6899 248333 : if (assign
6900 248333 : && gimple_assign_rhs_code (assign) == COND_EXPR
6901 560 : && use->use != gimple_assign_rhs2_ptr (assign)
6902 560 : && use->use != gimple_assign_rhs3_ptr (assign))
6903 : return false;
6904 956022 : precision = MAX (precision, use_stmt_info->min_input_precision);
6905 14554865 : }
6906 :
6907 270351 : if (dump_enabled_p ())
6908 5826 : dump_printf_loc (MSG_NOTE, vect_location,
6909 : "only the low %d bits of %T are significant\n",
6910 : precision, lhs);
6911 270351 : stmt_info->min_output_precision = precision;
6912 270351 : return true;
6913 : }
6914 :
6915 : /* Calculate min_output_precision for STMT_INFO. */
6916 :
6917 : static void
6918 37568348 : vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6919 : {
6920 : /* We're only interested in statements with a narrowable result. */
6921 37568348 : tree lhs = gimple_get_lhs (stmt_info->stmt);
6922 37568348 : if (!lhs
6923 29344733 : || TREE_CODE (lhs) != SSA_NAME
6924 62319344 : || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6925 : return;
6926 :
6927 14825216 : if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6928 14554865 : stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6929 : }
6930 :
6931 : /* Use range information to decide whether STMT (described by STMT_INFO)
6932 : could be done in a narrower type. This is effectively a forward
6933 : propagation, since it uses context-independent information that applies
6934 : to all users of an SSA name. */
6935 :
6936 : static void
6937 21069121 : vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6938 : {
6939 21069121 : tree lhs = gimple_assign_lhs (stmt);
6940 21069121 : if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6941 18630712 : return;
6942 :
6943 16753489 : tree type = TREE_TYPE (lhs);
6944 16753489 : if (!vect_narrowable_type_p (type))
6945 : return;
6946 :
6947 : /* First see whether we have any useful range information for the result. */
6948 11412280 : unsigned int precision = TYPE_PRECISION (type);
6949 11412280 : signop sign = TYPE_SIGN (type);
6950 11412280 : wide_int min_value, max_value;
6951 11412280 : if (!vect_get_range_info (lhs, &min_value, &max_value))
6952 : return;
6953 :
6954 5623591 : tree_code code = gimple_assign_rhs_code (stmt);
6955 5623591 : unsigned int nops = gimple_num_ops (stmt);
6956 :
6957 5623591 : if (!vect_truncatable_operation_p (code))
6958 : {
6959 : /* Handle operations that can be computed in type T if all inputs
6960 : and outputs can be represented in type T. Also handle left and
6961 : right shifts, where (in addition) the maximum shift amount must
6962 : be less than the number of bits in T. */
6963 2039230 : bool is_shift;
6964 2039230 : switch (code)
6965 : {
6966 : case LSHIFT_EXPR:
6967 : case RSHIFT_EXPR:
6968 : is_shift = true;
6969 : break;
6970 :
6971 285142 : case ABS_EXPR:
6972 285142 : case MIN_EXPR:
6973 285142 : case MAX_EXPR:
6974 285142 : case TRUNC_DIV_EXPR:
6975 285142 : case CEIL_DIV_EXPR:
6976 285142 : case FLOOR_DIV_EXPR:
6977 285142 : case ROUND_DIV_EXPR:
6978 285142 : case EXACT_DIV_EXPR:
6979 : /* Modulus is excluded because it is typically calculated by doing
6980 : a division, for which minimum signed / -1 isn't representable in
6981 : the original signed type. We could take the division range into
6982 : account instead, if handling modulus ever becomes important. */
6983 285142 : is_shift = false;
6984 285142 : break;
6985 :
6986 : default:
6987 : return;
6988 : }
6989 1362895 : for (unsigned int i = 1; i < nops; ++i)
6990 : {
6991 1052597 : tree op = gimple_op (stmt, i);
6992 1052597 : wide_int op_min_value, op_max_value;
6993 1052597 : if (TREE_CODE (op) == INTEGER_CST)
6994 : {
6995 304569 : unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
6996 304569 : op_min_value = op_max_value = wi::to_wide (op, op_precision);
6997 : }
6998 748028 : else if (TREE_CODE (op) == SSA_NAME)
6999 : {
7000 748028 : if (!vect_get_range_info (op, &op_min_value, &op_max_value))
7001 : return;
7002 : }
7003 : else
7004 : return;
7005 :
7006 691348 : if (is_shift && i == 2)
7007 : {
7008 : /* There needs to be one more bit than the maximum shift amount.
7009 :
7010 : If the maximum shift amount is already 1 less than PRECISION
7011 : then we can't narrow the shift further. Dealing with that
7012 : case first ensures that we can safely use an unsigned range
7013 : below.
7014 :
7015 : op_min_value isn't relevant, since shifts by negative amounts
7016 : are UB. */
7017 205598 : if (wi::geu_p (op_max_value, precision - 1))
7018 : return;
7019 182455 : unsigned int min_bits = op_max_value.to_uhwi () + 1;
7020 :
7021 : /* As explained below, we can convert a signed shift into an
7022 : unsigned shift if the sign bit is always clear. At this
7023 : point we've already processed the ranges of the output and
7024 : the first input. */
7025 182455 : auto op_sign = sign;
7026 182455 : if (sign == SIGNED && !wi::neg_p (min_value))
7027 : op_sign = UNSIGNED;
7028 364910 : op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
7029 182455 : precision, op_sign);
7030 364910 : op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
7031 182455 : precision, op_sign);
7032 : }
7033 668205 : min_value = wi::min (min_value, op_min_value, sign);
7034 668205 : max_value = wi::max (max_value, op_max_value, sign);
7035 1052597 : }
7036 : }
7037 :
7038 : /* Try to switch signed types for unsigned types if we can.
7039 : This is better for two reasons. First, unsigned ops tend
7040 : to be cheaper than signed ops. Second, it means that we can
7041 : handle things like:
7042 :
7043 : signed char c;
7044 : int res = (int) c & 0xff00; // range [0x0000, 0xff00]
7045 :
7046 : as:
7047 :
7048 : signed char c;
7049 : unsigned short res_1 = (unsigned short) c & 0xff00;
7050 : int res = (int) res_1;
7051 :
7052 : where the intermediate result res_1 has unsigned rather than
7053 : signed type. */
7054 3894659 : if (sign == SIGNED && !wi::neg_p (min_value))
7055 : sign = UNSIGNED;
7056 :
7057 : /* See what precision is required for MIN_VALUE and MAX_VALUE. */
7058 3894659 : unsigned int precision1 = wi::min_precision (min_value, sign);
7059 3894659 : unsigned int precision2 = wi::min_precision (max_value, sign);
7060 3894659 : unsigned int value_precision = MAX (precision1, precision2);
7061 3894659 : if (value_precision >= precision)
7062 : return;
7063 :
7064 2438409 : if (dump_enabled_p ())
7065 111159 : dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
7066 : " without loss of precision: %G",
7067 : sign == SIGNED ? "signed" : "unsigned",
7068 : value_precision, (gimple *) stmt);
7069 :
7070 2438409 : vect_set_operation_type (stmt_info, type, value_precision, sign);
7071 2438409 : vect_set_min_input_precision (stmt_info, type, value_precision);
7072 11412280 : }
7073 :
7074 : /* Use information about the users of STMT's result to decide whether
7075 : STMT (described by STMT_INFO) could be done in a narrower type.
7076 : This is effectively a backward propagation. */
7077 :
7078 : static void
7079 21069121 : vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
7080 : {
7081 21069121 : tree_code code = gimple_assign_rhs_code (stmt);
7082 21069121 : unsigned int opno = (code == COND_EXPR ? 2 : 1);
7083 21069121 : tree type = TREE_TYPE (gimple_op (stmt, opno));
7084 21069121 : if (!vect_narrowable_type_p (type))
7085 11839209 : return;
7086 :
7087 13338490 : unsigned int precision = TYPE_PRECISION (type);
7088 13338490 : unsigned int operation_precision, min_input_precision;
7089 13338490 : switch (code)
7090 : {
7091 2430122 : CASE_CONVERT:
7092 : /* Only the bits that contribute to the output matter. Don't change
7093 : the precision of the operation itself. */
7094 2430122 : operation_precision = precision;
7095 2430122 : min_input_precision = stmt_info->min_output_precision;
7096 2430122 : break;
7097 :
7098 479241 : case LSHIFT_EXPR:
7099 479241 : case RSHIFT_EXPR:
7100 479241 : {
7101 479241 : tree shift = gimple_assign_rhs2 (stmt);
7102 479241 : unsigned int min_const_shift, max_const_shift;
7103 479241 : wide_int min_shift, max_shift;
7104 479241 : if (TREE_CODE (shift) == SSA_NAME
7105 105729 : && vect_get_range_info (shift, &min_shift, &max_shift)
7106 81506 : && wi::ge_p (min_shift, 0, TYPE_SIGN (TREE_TYPE (shift)))
7107 558002 : && wi::lt_p (max_shift, TYPE_PRECISION (type),
7108 78761 : TYPE_SIGN (TREE_TYPE (shift))))
7109 : {
7110 70587 : min_const_shift = min_shift.to_uhwi ();
7111 70587 : max_const_shift = max_shift.to_uhwi ();
7112 : }
7113 408654 : else if (TREE_CODE (shift) == INTEGER_CST
7114 782166 : && wi::ltu_p (wi::to_widest (shift), precision))
7115 373404 : min_const_shift = max_const_shift = TREE_INT_CST_LOW (shift);
7116 : else
7117 35250 : return;
7118 443991 : if (code == LSHIFT_EXPR)
7119 : {
7120 : /* Avoid creating an undefined shift.
7121 :
7122 : ??? We could instead use min_output_precision as-is and
7123 : optimize out-of-range shifts to zero. However, only
7124 : degenerate testcases shift away all their useful input data,
7125 : and it isn't natural to drop input operations in the middle
7126 : of vectorization. This sort of thing should really be
7127 : handled before vectorization. */
7128 108192 : operation_precision = MAX (stmt_info->min_output_precision,
7129 : max_const_shift + 1);
7130 : /* We need CONST_SHIFT fewer bits of the input. */
7131 108192 : min_input_precision = (MAX (operation_precision, max_const_shift)
7132 : - min_const_shift);
7133 : }
7134 : else
7135 : {
7136 : /* We need CONST_SHIFT extra bits to do the operation. */
7137 335799 : operation_precision = (stmt_info->min_output_precision
7138 : + max_const_shift);
7139 335799 : min_input_precision = operation_precision;
7140 : }
7141 443991 : break;
7142 479241 : }
7143 :
7144 10429127 : default:
7145 10429127 : if (vect_truncatable_operation_p (code))
7146 : {
7147 : /* Input bit N has no effect on output bits N-1 and lower. */
7148 6355799 : operation_precision = stmt_info->min_output_precision;
7149 6355799 : min_input_precision = operation_precision;
7150 6355799 : break;
7151 : }
7152 : return;
7153 : }
7154 :
7155 9229912 : if (operation_precision < precision)
7156 : {
7157 111345 : if (dump_enabled_p ())
7158 2786 : dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
7159 : " without affecting users: %G",
7160 2786 : TYPE_UNSIGNED (type) ? "unsigned" : "signed",
7161 : operation_precision, (gimple *) stmt);
7162 222690 : vect_set_operation_type (stmt_info, type, operation_precision,
7163 111345 : TYPE_SIGN (type));
7164 : }
7165 9229912 : vect_set_min_input_precision (stmt_info, type, min_input_precision);
7166 : }
7167 :
7168 : /* Return true if the statement described by STMT_INFO sets a boolean
7169 : SSA_NAME and if we know how to vectorize this kind of statement using
7170 : vector mask types. */
7171 :
7172 : static bool
7173 38655640 : possible_vector_mask_operation_p (stmt_vec_info stmt_info)
7174 : {
7175 38655640 : tree lhs = gimple_get_lhs (stmt_info->stmt);
7176 38655640 : tree_code code = ERROR_MARK;
7177 38655640 : gassign *assign = NULL;
7178 38655640 : gcond *cond = NULL;
7179 :
7180 38655640 : if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
7181 21845499 : code = gimple_assign_rhs_code (assign);
7182 16810141 : else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
7183 : {
7184 5232600 : lhs = gimple_cond_lhs (cond);
7185 5232600 : code = gimple_cond_code (cond);
7186 : }
7187 :
7188 38655640 : if (!lhs
7189 35570858 : || TREE_CODE (lhs) != SSA_NAME
7190 69594247 : || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
7191 : return false;
7192 :
7193 2093465 : if (code != ERROR_MARK)
7194 : {
7195 1842290 : switch (code)
7196 : {
7197 : CASE_CONVERT:
7198 : case SSA_NAME:
7199 : case BIT_NOT_EXPR:
7200 : case BIT_IOR_EXPR:
7201 : case BIT_XOR_EXPR:
7202 : case BIT_AND_EXPR:
7203 : return true;
7204 :
7205 1447808 : default:
7206 1447808 : return TREE_CODE_CLASS (code) == tcc_comparison;
7207 : }
7208 : }
7209 251175 : else if (is_a <gphi *> (stmt_info->stmt))
7210 142623 : return true;
7211 : return false;
7212 : }
7213 :
7214 : /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
7215 : a vector mask type instead of a normal vector type. Record the
7216 : result in STMT_INFO->mask_precision. Returns true when the
7217 : precision changed. */
7218 :
7219 : static bool
7220 38655640 : vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
7221 : {
7222 38655640 : if (!possible_vector_mask_operation_p (stmt_info))
7223 : return false;
7224 :
7225 : /* If at least one boolean input uses a vector mask type,
7226 : pick the mask type with the narrowest elements.
7227 :
7228 : ??? This is the traditional behavior. It should always produce
7229 : the smallest number of operations, but isn't necessarily the
7230 : optimal choice. For example, if we have:
7231 :
7232 : a = b & c
7233 :
7234 : where:
7235 :
7236 : - the user of a wants it to have a mask type for 16-bit elements (M16)
7237 : - b also uses M16
7238 : - c uses a mask type for 8-bit elements (M8)
7239 :
7240 : then picking M8 gives:
7241 :
7242 : - 1 M16->M8 pack for b
7243 : - 1 M8 AND for a
7244 : - 2 M8->M16 unpacks for the user of a
7245 :
7246 : whereas picking M16 would have given:
7247 :
7248 : - 2 M8->M16 unpacks for c
7249 : - 2 M16 ANDs for a
7250 :
7251 : The number of operations are equal, but M16 would have given
7252 : a shorter dependency chain and allowed more ILP. */
7253 1934415 : unsigned int precision = ~0U;
7254 1934415 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
7255 :
7256 : /* If the statement compares two values that shouldn't use vector masks,
7257 : try comparing the values as normal scalars instead. */
7258 1934415 : tree_code code = ERROR_MARK;
7259 1934415 : tree op0_type;
7260 1934415 : unsigned int nops = -1;
7261 1934415 : unsigned int ops_start = 0;
7262 :
7263 1934415 : if (gassign *assign = dyn_cast <gassign *> (stmt))
7264 : {
7265 1259592 : code = gimple_assign_rhs_code (assign);
7266 1259592 : op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
7267 1259592 : nops = gimple_num_ops (assign);
7268 1259592 : ops_start = 1;
7269 : }
7270 674823 : else if (gcond *cond = dyn_cast <gcond *> (stmt))
7271 : {
7272 532200 : code = gimple_cond_code (cond);
7273 532200 : op0_type = TREE_TYPE (gimple_cond_lhs (cond));
7274 532200 : nops = 2;
7275 532200 : ops_start = 0;
7276 : }
7277 :
7278 1791792 : if (code != ERROR_MARK)
7279 : {
7280 5333175 : for (unsigned int i = ops_start; i < nops; ++i)
7281 : {
7282 3541383 : tree rhs = gimple_op (stmt, i);
7283 3541383 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
7284 1740829 : continue;
7285 :
7286 1800554 : stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
7287 1800554 : if (!def_stmt_info)
7288 : /* Don't let external or constant operands influence the choice.
7289 : We can convert them to whichever vector type we pick. */
7290 547140 : continue;
7291 :
7292 1253414 : if (def_stmt_info->mask_precision)
7293 : {
7294 1034938 : if (precision > def_stmt_info->mask_precision)
7295 3541383 : precision = def_stmt_info->mask_precision;
7296 : }
7297 : }
7298 :
7299 1791792 : if (precision == ~0U
7300 1457545 : && TREE_CODE_CLASS (code) == tcc_comparison)
7301 : {
7302 1262247 : scalar_mode mode;
7303 1262247 : tree vectype, mask_type;
7304 1262247 : if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
7305 : /* Do not allow this to set vinfo->vector_mode, this might
7306 : disrupt the result for the next iteration. */
7307 1262247 : && (vectype = get_related_vectype_for_scalar_type
7308 1512301 : (vinfo->vector_mode, op0_type))
7309 1103827 : && (mask_type = truth_type_for (vectype))
7310 1103827 : && expand_vec_cmp_expr_p (vectype, mask_type, code))
7311 1707546 : precision = GET_MODE_BITSIZE (mode);
7312 : }
7313 : }
7314 : else
7315 : {
7316 142623 : gphi *phi = as_a <gphi *> (stmt_info->stmt);
7317 578308 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
7318 : {
7319 435685 : tree rhs = gimple_phi_arg_def (phi, i);
7320 :
7321 435685 : stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
7322 435685 : if (!def_stmt_info)
7323 : /* Don't let external or constant operands influence the choice.
7324 : We can convert them to whichever vector type we pick. */
7325 281344 : continue;
7326 :
7327 154341 : if (def_stmt_info->mask_precision)
7328 : {
7329 129183 : if (precision > def_stmt_info->mask_precision)
7330 435685 : precision = def_stmt_info->mask_precision;
7331 : }
7332 : }
7333 : }
7334 :
7335 1934415 : if (stmt_info->mask_precision != precision)
7336 : {
7337 1806446 : if (dump_enabled_p ())
7338 : {
7339 8024 : if (precision == ~0U)
7340 1882 : dump_printf_loc (MSG_NOTE, vect_location,
7341 : "using normal nonmask vectors for %G",
7342 : stmt_info->stmt);
7343 : else
7344 6142 : dump_printf_loc (MSG_NOTE, vect_location,
7345 : "using boolean precision %d for %G",
7346 : precision, stmt_info->stmt);
7347 : }
7348 :
7349 : /* ??? We'd like to assert stmt_info->mask_precision == 0
7350 : || stmt_info->mask_precision > precision, thus that we only
7351 : decrease mask precisions throughout iteration, but the
7352 : tcc_comparison handling above means for comparisons of bools
7353 : we start with 8 but might increase in case the bools get mask
7354 : precision on their own. */
7355 1806446 : stmt_info->mask_precision = precision;
7356 1806446 : return true;
7357 : }
7358 : return false;
7359 : }
7360 :
7361 : /* Handle vect_determine_precisions for STMT_INFO, given that we
7362 : have already done so for the users of its result. */
7363 :
7364 : void
7365 37568348 : vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
7366 : {
7367 37568348 : vect_determine_min_output_precision (vinfo, stmt_info);
7368 37568348 : if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
7369 : {
7370 21069121 : vect_determine_precisions_from_range (stmt_info, stmt);
7371 21069121 : vect_determine_precisions_from_users (stmt_info, stmt);
7372 : }
7373 37568348 : }
7374 :
7375 : /* Walk backwards through the vectorizable region to determine the
7376 : values of these fields:
7377 :
7378 : - min_output_precision
7379 : - min_input_precision
7380 : - operation_precision
7381 : - operation_sign. */
7382 :
7383 : void
7384 1053685 : vect_determine_precisions (vec_info *vinfo)
7385 : {
7386 1053685 : basic_block *bbs = vinfo->bbs;
7387 1053685 : unsigned int nbbs = vinfo->nbbs;
7388 :
7389 1073160 : DUMP_VECT_SCOPE ("vect_determine_precisions");
7390 :
7391 : /* For mask precisions we have to iterate since otherwise we do not
7392 : get reduction PHI precision correct. For now do this only for
7393 : loop vectorization. */
7394 1123843 : bool changed;
7395 1123843 : do
7396 : {
7397 1123843 : changed = false;
7398 12692294 : for (unsigned int i = 0; i < nbbs; i++)
7399 : {
7400 11568451 : basic_block bb = bbs[i];
7401 11568451 : for (auto gsi = gsi_start_phis (bb);
7402 18728222 : !gsi_end_p (gsi); gsi_next (&gsi))
7403 : {
7404 7159771 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
7405 7159771 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7406 6979014 : changed |= vect_determine_mask_precision (vinfo, stmt_info);
7407 : }
7408 120238589 : for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7409 : {
7410 97101687 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
7411 97101687 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7412 31676626 : changed |= vect_determine_mask_precision (vinfo, stmt_info);
7413 : }
7414 : }
7415 : }
7416 2177528 : while (changed && is_a <loop_vec_info> (vinfo));
7417 :
7418 12460559 : for (unsigned int i = 0; i < nbbs; i++)
7419 : {
7420 11406874 : basic_block bb = bbs[nbbs - i - 1];
7421 213725296 : for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
7422 : {
7423 95455774 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
7424 95455774 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7425 30792984 : vect_determine_stmt_precisions (vinfo, stmt_info);
7426 : }
7427 18362995 : for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7428 : {
7429 6956121 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
7430 6956121 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7431 6775364 : vect_determine_stmt_precisions (vinfo, stmt_info);
7432 : }
7433 : }
7434 1053685 : }
7435 :
7436 : typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
7437 :
7438 : struct vect_recog_func
7439 : {
7440 : vect_recog_func_ptr fn;
7441 : const char *name;
7442 : };
7443 :
7444 : /* Note that ordering matters - the first pattern matching on a stmt is
7445 : taken which means usually the more complex one needs to precede the
7446 : less comples onex (widen_sum only after dot_prod or sad for example). */
7447 : static vect_recog_func vect_vect_recog_func_ptrs[] = {
7448 : { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
7449 : { vect_recog_bit_insert_pattern, "bit_insert" },
7450 : { vect_recog_abd_pattern, "abd" },
7451 : { vect_recog_over_widening_pattern, "over_widening" },
7452 : /* Must come after over_widening, which narrows the shift as much as
7453 : possible beforehand. */
7454 : { vect_recog_average_pattern, "average" },
7455 : { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
7456 : { vect_recog_mulhs_pattern, "mult_high" },
7457 : { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
7458 : { vect_recog_widen_mult_pattern, "widen_mult" },
7459 : { vect_recog_dot_prod_pattern, "dot_prod" },
7460 : { vect_recog_sad_pattern, "sad" },
7461 : { vect_recog_widen_sum_pattern, "widen_sum" },
7462 : { vect_recog_pow_pattern, "pow" },
7463 : { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
7464 : { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
7465 : { vect_recog_widen_shift_pattern, "widen_shift" },
7466 : { vect_recog_rotate_pattern, "rotate" },
7467 : { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
7468 : { vect_recog_divmod_pattern, "divmod" },
7469 : { vect_recog_mod_var_pattern, "modvar" },
7470 : { vect_recog_mult_pattern, "mult" },
7471 : { vect_recog_sat_add_pattern, "sat_add" },
7472 : { vect_recog_sat_sub_pattern, "sat_sub" },
7473 : { vect_recog_sat_trunc_pattern, "sat_trunc" },
7474 : { vect_recog_gcond_pattern, "gcond" },
7475 : { vect_recog_bool_pattern, "bool" },
7476 : /* This must come before mask conversion, and includes the parts
7477 : of mask conversion that are needed for gather and scatter
7478 : internal functions. */
7479 : { vect_recog_gather_scatter_pattern, "gather_scatter" },
7480 : { vect_recog_cond_store_pattern, "cond_store" },
7481 : { vect_recog_mask_conversion_pattern, "mask_conversion" },
7482 : { vect_recog_widen_plus_pattern, "widen_plus" },
7483 : { vect_recog_widen_minus_pattern, "widen_minus" },
7484 : { vect_recog_widen_abd_pattern, "widen_abd" },
7485 : /* These must come after the double widening ones. */
7486 : };
7487 :
7488 : /* Mark statements that are involved in a pattern. */
7489 :
7490 : void
7491 1014296 : vect_mark_pattern_stmts (vec_info *vinfo,
7492 : stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
7493 : tree pattern_vectype)
7494 : {
7495 1014296 : stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
7496 1014296 : gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7497 :
7498 1014296 : gimple *orig_pattern_stmt = NULL;
7499 1014296 : if (is_pattern_stmt_p (orig_stmt_info))
7500 : {
7501 : /* We're replacing a statement in an existing pattern definition
7502 : sequence. */
7503 11204 : orig_pattern_stmt = orig_stmt_info->stmt;
7504 11204 : if (dump_enabled_p ())
7505 664 : dump_printf_loc (MSG_NOTE, vect_location,
7506 : "replacing earlier pattern %G", orig_pattern_stmt);
7507 :
7508 : /* To keep the book-keeping simple, just swap the lhs of the
7509 : old and new statements, so that the old one has a valid but
7510 : unused lhs. */
7511 11204 : tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
7512 11204 : gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
7513 11204 : gimple_set_lhs (pattern_stmt, old_lhs);
7514 :
7515 11204 : if (dump_enabled_p ())
7516 664 : dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
7517 :
7518 : /* Switch to the statement that ORIG replaces. */
7519 11204 : orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
7520 :
7521 : /* We shouldn't be replacing the main pattern statement. */
7522 11204 : gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
7523 : != orig_pattern_stmt);
7524 : }
7525 :
7526 1014296 : if (def_seq)
7527 : for (gimple_stmt_iterator si = gsi_start (def_seq);
7528 2235560 : !gsi_end_p (si); gsi_next (&si))
7529 : {
7530 1351109 : if (dump_enabled_p ())
7531 24652 : dump_printf_loc (MSG_NOTE, vect_location,
7532 : "extra pattern stmt: %G", gsi_stmt (si));
7533 1351109 : stmt_vec_info pattern_stmt_info
7534 1351109 : = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
7535 : orig_stmt_info, pattern_vectype);
7536 : /* Stmts in the def sequence are not vectorizable cycle or
7537 : induction defs, instead they should all be vect_internal_def
7538 : feeding the main pattern stmt which retains this def type. */
7539 1351109 : STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
7540 : }
7541 :
7542 1014296 : if (orig_pattern_stmt)
7543 : {
7544 11204 : vect_init_pattern_stmt (vinfo, pattern_stmt,
7545 : orig_stmt_info, pattern_vectype);
7546 :
7547 : /* Insert all the new pattern statements before the original one. */
7548 11204 : gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7549 11204 : gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
7550 : orig_def_seq);
7551 11204 : gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
7552 11204 : gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
7553 :
7554 : /* Remove the pattern statement that this new pattern replaces. */
7555 11204 : gsi_remove (&gsi, false);
7556 : }
7557 : else
7558 1003092 : vect_set_pattern_stmt (vinfo,
7559 : pattern_stmt, orig_stmt_info, pattern_vectype);
7560 :
7561 : /* For any conditionals mark them as vect_condition_def. */
7562 1014296 : if (is_a <gcond *> (pattern_stmt))
7563 363555 : STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
7564 :
7565 : /* Transfer reduction path info to the pattern. */
7566 1014296 : if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
7567 : {
7568 15999 : gimple_match_op op;
7569 15999 : if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
7570 0 : gcc_unreachable ();
7571 15999 : tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
7572 : /* Search the pattern def sequence and the main pattern stmt. Note
7573 : we may have inserted all into a containing pattern def sequence
7574 : so the following is a bit awkward. */
7575 15999 : gimple_stmt_iterator si;
7576 15999 : gimple *s;
7577 15999 : if (def_seq)
7578 : {
7579 14864 : si = gsi_start (def_seq);
7580 14864 : s = gsi_stmt (si);
7581 14864 : gsi_next (&si);
7582 : }
7583 : else
7584 : {
7585 : si = gsi_none ();
7586 : s = pattern_stmt;
7587 : }
7588 33676 : do
7589 : {
7590 33676 : bool found = false;
7591 33676 : if (gimple_extract_op (s, &op))
7592 : {
7593 82006 : for (unsigned i = 0; i < op.num_ops; ++i)
7594 64329 : if (op.ops[i] == lookfor)
7595 : {
7596 15999 : STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7597 15999 : lookfor = gimple_get_lhs (s);
7598 15999 : found = true;
7599 15999 : break;
7600 : }
7601 : /* Try harder to find a mid-entry into an earlier pattern
7602 : sequence. Likewise an entry to a stmt skipping a conversion
7603 : on an input. This means that the initial 'lookfor' was
7604 : bogus. */
7605 15999 : if (!found)
7606 : {
7607 38361 : for (unsigned i = 0; i < op.num_ops; ++i)
7608 20684 : if (TREE_CODE (op.ops[i]) == SSA_NAME)
7609 17677 : if (auto def = vinfo->lookup_def (op.ops[i]))
7610 17484 : if (vect_is_reduction (def)
7611 17484 : || (is_a <gphi *> (def->stmt)
7612 0 : && STMT_VINFO_REDUC_DEF (def) != NULL))
7613 : {
7614 0 : STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7615 0 : lookfor = gimple_get_lhs (s);
7616 0 : found = true;
7617 0 : break;
7618 : }
7619 : }
7620 : }
7621 33676 : if (s == pattern_stmt)
7622 : {
7623 15999 : if (!found && dump_enabled_p ())
7624 0 : dump_printf_loc (MSG_NOTE, vect_location,
7625 : "failed to update reduction index.\n");
7626 15999 : break;
7627 : }
7628 17677 : if (gsi_end_p (si))
7629 : s = pattern_stmt;
7630 : else
7631 : {
7632 2813 : s = gsi_stmt (si);
7633 2813 : if (s == pattern_stmt)
7634 : /* Found the end inside a bigger pattern def seq. */
7635 : si = gsi_none ();
7636 : else
7637 2813 : gsi_next (&si);
7638 : }
7639 : } while (1);
7640 : }
7641 1014296 : }
7642 :
7643 : /* Function vect_pattern_recog_1
7644 :
7645 : Input:
7646 : PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7647 : computation pattern.
7648 : STMT_INFO: A stmt from which the pattern search should start.
7649 :
7650 : If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7651 : a sequence of statements that has the same functionality and can be
7652 : used to replace STMT_INFO. It returns the last statement in the sequence
7653 : and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7654 : PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7655 : statement, having first checked that the target supports the new operation
7656 : in that type.
7657 :
7658 : This function also does some bookkeeping, as explained in the documentation
7659 : for vect_recog_pattern. */
7660 :
7661 : static void
7662 1003708221 : vect_pattern_recog_1 (vec_info *vinfo,
7663 : const vect_recog_func &recog_func, stmt_vec_info stmt_info)
7664 : {
7665 1003708221 : gimple *pattern_stmt;
7666 1003708221 : tree pattern_vectype;
7667 :
7668 : /* If this statement has already been replaced with pattern statements,
7669 : leave the original statement alone, since the first match wins.
7670 : Instead try to match against the definition statements that feed
7671 : the main pattern statement. */
7672 1003708221 : if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7673 : {
7674 12904691 : gimple_stmt_iterator gsi;
7675 12904691 : for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7676 31237424 : !gsi_end_p (gsi); gsi_next (&gsi))
7677 18332733 : vect_pattern_recog_1 (vinfo, recog_func,
7678 : vinfo->lookup_stmt (gsi_stmt (gsi)));
7679 : return;
7680 : }
7681 :
7682 990803530 : gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7683 990803530 : pattern_stmt = recog_func.fn (vinfo, stmt_info, &pattern_vectype);
7684 990803530 : if (!pattern_stmt)
7685 : {
7686 : /* Clear any half-formed pattern definition sequence. */
7687 989789234 : STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7688 989789234 : return;
7689 : }
7690 :
7691 : /* Found a vectorizable pattern. */
7692 1014296 : if (dump_enabled_p ())
7693 18908 : dump_printf_loc (MSG_NOTE, vect_location,
7694 : "%s pattern recognized: %G",
7695 18908 : recog_func.name, pattern_stmt);
7696 :
7697 : /* Mark the stmts that are involved in the pattern. */
7698 1014296 : vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
7699 : }
7700 :
7701 :
7702 : /* Function vect_pattern_recog
7703 :
7704 : Input:
7705 : LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7706 : computation idioms.
7707 :
7708 : Output - for each computation idiom that is detected we create a new stmt
7709 : that provides the same functionality and that can be vectorized. We
7710 : also record some information in the struct_stmt_info of the relevant
7711 : stmts, as explained below:
7712 :
7713 : At the entry to this function we have the following stmts, with the
7714 : following initial value in the STMT_VINFO fields:
7715 :
7716 : stmt in_pattern_p related_stmt vec_stmt
7717 : S1: a_i = .... - - -
7718 : S2: a_2 = ..use(a_i).. - - -
7719 : S3: a_1 = ..use(a_2).. - - -
7720 : S4: a_0 = ..use(a_1).. - - -
7721 : S5: ... = ..use(a_0).. - - -
7722 :
7723 : Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7724 : represented by a single stmt. We then:
7725 : - create a new stmt S6 equivalent to the pattern (the stmt is not
7726 : inserted into the code)
7727 : - fill in the STMT_VINFO fields as follows:
7728 :
7729 : in_pattern_p related_stmt vec_stmt
7730 : S1: a_i = .... - - -
7731 : S2: a_2 = ..use(a_i).. - - -
7732 : S3: a_1 = ..use(a_2).. - - -
7733 : S4: a_0 = ..use(a_1).. true S6 -
7734 : '---> S6: a_new = .... - S4 -
7735 : S5: ... = ..use(a_0).. - - -
7736 :
7737 : (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7738 : to each other through the RELATED_STMT field).
7739 :
7740 : S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7741 : of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7742 : remain irrelevant unless used by stmts other than S4.
7743 :
7744 : If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7745 : (because they are marked as irrelevant). It will vectorize S6, and record
7746 : a pointer to the new vector stmt VS6 from S6 (as usual).
7747 : S4 will be skipped, and S5 will be vectorized as usual:
7748 :
7749 : in_pattern_p related_stmt vec_stmt
7750 : S1: a_i = .... - - -
7751 : S2: a_2 = ..use(a_i).. - - -
7752 : S3: a_1 = ..use(a_2).. - - -
7753 : > VS6: va_new = .... - - -
7754 : S4: a_0 = ..use(a_1).. true S6 VS6
7755 : '---> S6: a_new = .... - S4 VS6
7756 : > VS5: ... = ..vuse(va_new).. - - -
7757 : S5: ... = ..use(a_0).. - - -
7758 :
7759 : DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7760 : elsewhere), and we'll end up with:
7761 :
7762 : VS6: va_new = ....
7763 : VS5: ... = ..vuse(va_new)..
7764 :
7765 : In case of more than one pattern statements, e.g., widen-mult with
7766 : intermediate type:
7767 :
7768 : S1 a_t = ;
7769 : S2 a_T = (TYPE) a_t;
7770 : '--> S3: a_it = (interm_type) a_t;
7771 : S4 prod_T = a_T * CONST;
7772 : '--> S5: prod_T' = a_it w* CONST;
7773 :
7774 : there may be other users of a_T outside the pattern. In that case S2 will
7775 : be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7776 : and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7777 : be recorded in S3. */
7778 :
7779 : void
7780 1053685 : vect_pattern_recog (vec_info *vinfo)
7781 : {
7782 1053685 : basic_block *bbs = vinfo->bbs;
7783 1053685 : unsigned int nbbs = vinfo->nbbs;
7784 :
7785 1053685 : vect_determine_precisions (vinfo);
7786 :
7787 1053685 : DUMP_VECT_SCOPE ("vect_pattern_recog");
7788 :
7789 : /* Scan through the stmts in the region, applying the pattern recognition
7790 : functions starting at each stmt visited. */
7791 12460559 : for (unsigned i = 0; i < nbbs; i++)
7792 : {
7793 11406874 : basic_block bb = bbs[i];
7794 :
7795 118269522 : for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7796 : {
7797 95455774 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7798 :
7799 95455774 : if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7800 64662790 : continue;
7801 :
7802 : /* Scan over all generic vect_recog_xxx_pattern functions. */
7803 1016168472 : for (const auto &func_ptr : vect_vect_recog_func_ptrs)
7804 985375488 : vect_pattern_recog_1 (vinfo, func_ptr,
7805 : stmt_info);
7806 : }
7807 : }
7808 :
7809 : /* After this no more add_stmt calls are allowed. */
7810 1053685 : vinfo->stmt_vec_info_ro = true;
7811 1053685 : }
7812 :
7813 : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7814 : or internal_fn contained in ch, respectively. */
7815 : gimple *
7816 159427 : vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7817 : {
7818 159427 : gcc_assert (op0 != NULL_TREE);
7819 159427 : if (ch.is_tree_code ())
7820 159427 : return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7821 :
7822 0 : gcc_assert (ch.is_internal_fn ());
7823 0 : gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7824 : op1 == NULL_TREE ? 1 : 2,
7825 : op0, op1);
7826 0 : gimple_call_set_lhs (stmt, lhs);
7827 0 : return stmt;
7828 : }
|