Branch data Line data Source code
1 : : /* Analysis Utilities for Loop Vectorization.
2 : : Copyright (C) 2006-2025 Free Software Foundation, Inc.
3 : : Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : #include "config.h"
22 : : #include "system.h"
23 : : #include "coretypes.h"
24 : : #include "backend.h"
25 : : #include "rtl.h"
26 : : #include "tree.h"
27 : : #include "gimple.h"
28 : : #include "gimple-iterator.h"
29 : : #include "gimple-fold.h"
30 : : #include "ssa.h"
31 : : #include "expmed.h"
32 : : #include "optabs-tree.h"
33 : : #include "insn-config.h"
34 : : #include "recog.h" /* FIXME: for insn_data */
35 : : #include "fold-const.h"
36 : : #include "stor-layout.h"
37 : : #include "tree-eh.h"
38 : : #include "gimplify.h"
39 : : #include "gimple-iterator.h"
40 : : #include "gimple-fold.h"
41 : : #include "gimplify-me.h"
42 : : #include "cfgloop.h"
43 : : #include "tree-vectorizer.h"
44 : : #include "dumpfile.h"
45 : : #include "builtins.h"
46 : : #include "internal-fn.h"
47 : : #include "case-cfn-macros.h"
48 : : #include "fold-const-call.h"
49 : : #include "attribs.h"
50 : : #include "cgraph.h"
51 : : #include "omp-simd-clone.h"
52 : : #include "predict.h"
53 : : #include "tree-vector-builder.h"
54 : : #include "tree-ssa-loop-ivopts.h"
55 : : #include "vec-perm-indices.h"
56 : : #include "gimple-range.h"
57 : : #include "alias.h"
58 : :
59 : :
60 : : /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
61 : : in the first operand. Disentangling this is future work, the
62 : : IL is properly transfered to VEC_COND_EXPRs with separate compares. */
63 : :
64 : :
65 : : /* Return true if we have a useful VR_RANGE range for VAR, storing it
66 : : in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
67 : :
68 : : bool
69 : 11534673 : vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
70 : : {
71 : 11534673 : int_range_max vr;
72 : 11534673 : tree vr_min, vr_max;
73 : 23069346 : get_range_query (cfun)->range_of_expr (vr, var);
74 : 11534673 : if (vr.undefined_p ())
75 : 84 : vr.set_varying (TREE_TYPE (var));
76 : 11534673 : value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
77 : 11534673 : *min_value = wi::to_wide (vr_min);
78 : 11534673 : *max_value = wi::to_wide (vr_max);
79 : 11534673 : wide_int nonzero = get_nonzero_bits (var);
80 : 11534673 : signop sgn = TYPE_SIGN (TREE_TYPE (var));
81 : 11534673 : if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
82 : : nonzero, sgn) == VR_RANGE)
83 : : {
84 : 5643212 : if (dump_enabled_p ())
85 : : {
86 : 80268 : dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
87 : 80268 : dump_printf (MSG_NOTE, " has range [");
88 : 80268 : dump_hex (MSG_NOTE, *min_value);
89 : 80268 : dump_printf (MSG_NOTE, ", ");
90 : 80268 : dump_hex (MSG_NOTE, *max_value);
91 : 80268 : dump_printf (MSG_NOTE, "]\n");
92 : : }
93 : 5643212 : return true;
94 : : }
95 : : else
96 : : {
97 : 5891461 : if (dump_enabled_p ())
98 : : {
99 : 63254 : dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
100 : 63254 : dump_printf (MSG_NOTE, " has no range info\n");
101 : : }
102 : 5891461 : return false;
103 : : }
104 : 11534673 : }
105 : :
106 : : /* Report that we've found an instance of pattern PATTERN in
107 : : statement STMT. */
108 : :
109 : : static void
110 : 1092786 : vect_pattern_detected (const char *name, gimple *stmt)
111 : : {
112 : 1092786 : if (dump_enabled_p ())
113 : 22680 : dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
114 : 1092786 : }
115 : :
116 : : /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
117 : : return the pattern statement's stmt_vec_info. Set its vector type to
118 : : VECTYPE if it doesn't have one already. */
119 : :
120 : : static stmt_vec_info
121 : 2130533 : vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
122 : : stmt_vec_info orig_stmt_info, tree vectype)
123 : : {
124 : 2130533 : stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
125 : 2130533 : if (pattern_stmt_info == NULL)
126 : 1247620 : pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
127 : 2130533 : gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
128 : :
129 : 2130533 : pattern_stmt_info->pattern_stmt_p = true;
130 : 2130533 : STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
131 : 2130533 : STMT_VINFO_DEF_TYPE (pattern_stmt_info)
132 : 2130533 : = STMT_VINFO_DEF_TYPE (orig_stmt_info);
133 : 2130533 : if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
134 : : {
135 : 2170244 : gcc_assert (!vectype
136 : : || is_a <gcond *> (pattern_stmt)
137 : : || (VECTOR_BOOLEAN_TYPE_P (vectype)
138 : : == vect_use_mask_type_p (orig_stmt_info)));
139 : 1256059 : STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
140 : 1256059 : pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
141 : : }
142 : 2130533 : return pattern_stmt_info;
143 : : }
144 : :
145 : : /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
146 : : Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
147 : : have one already. */
148 : :
149 : : static void
150 : 890722 : vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
151 : : stmt_vec_info orig_stmt_info, tree vectype)
152 : : {
153 : 890722 : STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
154 : 890722 : STMT_VINFO_RELATED_STMT (orig_stmt_info)
155 : 0 : = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
156 : 861158 : }
157 : :
158 : : /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
159 : : is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
160 : : be different from the vector type of the final pattern statement.
161 : : If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
162 : : from which it was derived. */
163 : :
164 : : static inline void
165 : 1200829 : append_pattern_def_seq (vec_info *vinfo,
166 : : stmt_vec_info stmt_info, gimple *new_stmt,
167 : : tree vectype = NULL_TREE,
168 : : tree scalar_type_for_mask = NULL_TREE)
169 : : {
170 : 1877752 : gcc_assert (!scalar_type_for_mask
171 : : == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
172 : 1200829 : if (vectype)
173 : : {
174 : 874713 : stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
175 : 874713 : STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
176 : 874713 : if (scalar_type_for_mask)
177 : 523906 : new_stmt_info->mask_precision
178 : 1047812 : = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
179 : : }
180 : 1200829 : gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
181 : : new_stmt);
182 : 1200829 : }
183 : :
184 : :
185 : : /* Add NEW_STMT to VINFO's invariant pattern definition statements. These
186 : : statements are not vectorized but are materialized as scalar in the loop
187 : : preheader. */
188 : :
189 : : static inline void
190 : 1228 : append_inv_pattern_def_seq (vec_info *vinfo, gimple *new_stmt)
191 : : {
192 : 1228 : gimple_seq_add_stmt_without_update (&vinfo->inv_pattern_def_seq, new_stmt);
193 : : }
194 : :
195 : : /* The caller wants to perform new operations on vect_external variable
196 : : VAR, so that the result of the operations would also be vect_external.
197 : : Return the edge on which the operations can be performed, if one exists.
198 : : Return null if the operations should instead be treated as part of
199 : : the pattern that needs them. */
200 : :
201 : : static edge
202 : 9422 : vect_get_external_def_edge (vec_info *vinfo, tree var)
203 : : {
204 : 9422 : edge e = NULL;
205 : 9422 : if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
206 : : {
207 : 718 : e = loop_preheader_edge (loop_vinfo->loop);
208 : 718 : if (!SSA_NAME_IS_DEFAULT_DEF (var))
209 : : {
210 : 535 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
211 : 535 : if (bb == NULL
212 : 535 : || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
213 : : e = NULL;
214 : : }
215 : : }
216 : 9422 : return e;
217 : : }
218 : :
219 : : /* Return true if the target supports a vector version of CODE,
220 : : where CODE is known to map to a direct optab with the given SUBTYPE.
221 : : ITYPE specifies the type of (some of) the scalar inputs and OTYPE
222 : : specifies the type of the scalar result.
223 : :
224 : : If CODE allows the inputs and outputs to have different type
225 : : (such as for WIDEN_SUM_EXPR), it is the input mode rather
226 : : than the output mode that determines the appropriate target pattern.
227 : : Operand 0 of the target pattern then specifies the mode that the output
228 : : must have.
229 : :
230 : : When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
231 : : Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
232 : : is nonnull. */
233 : :
234 : : static bool
235 : 426 : vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
236 : : tree itype, tree *vecotype_out,
237 : : tree *vecitype_out = NULL,
238 : : enum optab_subtype subtype = optab_default)
239 : : {
240 : 426 : tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
241 : 426 : if (!vecitype)
242 : : return false;
243 : :
244 : 426 : tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
245 : 426 : if (!vecotype)
246 : : return false;
247 : :
248 : 426 : optab optab = optab_for_tree_code (code, vecitype, subtype);
249 : 426 : if (!optab)
250 : : return false;
251 : :
252 : 426 : insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
253 : 426 : if (icode == CODE_FOR_nothing
254 : 426 : || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
255 : 160 : return false;
256 : :
257 : 266 : *vecotype_out = vecotype;
258 : 266 : if (vecitype_out)
259 : 266 : *vecitype_out = vecitype;
260 : : return true;
261 : : }
262 : :
263 : : /* Return true if the target supports a vector version of CODE,
264 : : where CODE is known to map to a conversion optab with the given SUBTYPE.
265 : : ITYPE specifies the type of (some of) the scalar inputs and OTYPE
266 : : specifies the type of the scalar result.
267 : :
268 : : When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
269 : : Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
270 : : is nonnull. */
271 : :
272 : : static bool
273 : 2677 : vect_supportable_conv_optab_p (vec_info *vinfo, tree otype, tree_code code,
274 : : tree itype, tree *vecotype_out,
275 : : tree *vecitype_out = NULL,
276 : : enum optab_subtype subtype = optab_default)
277 : : {
278 : 2677 : tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
279 : 2677 : tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
280 : 2677 : if (!vecitype || !vecotype)
281 : : return false;
282 : :
283 : 2443 : if (!directly_supported_p (code, vecotype, vecitype, subtype))
284 : : return false;
285 : :
286 : 470 : *vecotype_out = vecotype;
287 : 470 : if (vecitype_out)
288 : 470 : *vecitype_out = vecitype;
289 : : return true;
290 : : }
291 : :
292 : : /* Round bit precision PRECISION up to a full element. */
293 : :
294 : : static unsigned int
295 : 2971295 : vect_element_precision (unsigned int precision)
296 : : {
297 : 0 : precision = 1 << ceil_log2 (precision);
298 : 4445344 : return MAX (precision, BITS_PER_UNIT);
299 : : }
300 : :
301 : : /* If OP is defined by a statement that's being considered for vectorization,
302 : : return information about that statement, otherwise return NULL. */
303 : :
304 : : static stmt_vec_info
305 : 318360 : vect_get_internal_def (vec_info *vinfo, tree op)
306 : : {
307 : 318360 : stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
308 : 318360 : if (def_stmt_info
309 : 303509 : && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
310 : 293724 : return vect_stmt_to_vectorize (def_stmt_info);
311 : : return NULL;
312 : : }
313 : :
314 : : /* Holds information about an input operand after some sign changes
315 : : and type promotions have been peeled away. */
316 : : class vect_unpromoted_value {
317 : : public:
318 : : vect_unpromoted_value ();
319 : :
320 : : void set_op (tree, vect_def_type, stmt_vec_info = NULL);
321 : :
322 : : /* The value obtained after peeling away zero or more casts. */
323 : : tree op;
324 : :
325 : : /* The type of OP. */
326 : : tree type;
327 : :
328 : : /* The definition type of OP. */
329 : : vect_def_type dt;
330 : :
331 : : /* If OP is the result of peeling at least one cast, and if the cast
332 : : of OP itself is a vectorizable statement, CASTER identifies that
333 : : statement, otherwise it is null. */
334 : : stmt_vec_info caster;
335 : : };
336 : :
337 : 284999926 : inline vect_unpromoted_value::vect_unpromoted_value ()
338 : 284999926 : : op (NULL_TREE),
339 : 284999926 : type (NULL_TREE),
340 : 284999926 : dt (vect_uninitialized_def),
341 : 2935361 : caster (NULL)
342 : : {
343 : : }
344 : :
345 : : /* Set the operand to OP_IN, its definition type to DT_IN, and the
346 : : statement that casts it to CASTER_IN. */
347 : :
348 : : inline void
349 : 10530174 : vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
350 : : stmt_vec_info caster_in)
351 : : {
352 : 10530174 : op = op_in;
353 : 10530174 : type = TREE_TYPE (op);
354 : 10530174 : dt = dt_in;
355 : 10530174 : caster = caster_in;
356 : 10530174 : }
357 : :
358 : : /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
359 : : to reach some vectorizable inner operand OP', continuing as long as it
360 : : is possible to convert OP' back to OP using a possible sign change
361 : : followed by a possible promotion P. Return this OP', or null if OP is
362 : : not a vectorizable SSA name. If there is a promotion P, describe its
363 : : input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
364 : : is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
365 : : have more than one user.
366 : :
367 : : A successful return means that it is possible to go from OP' to OP
368 : : via UNPROM. The cast from OP' to UNPROM is at most a sign change,
369 : : whereas the cast from UNPROM to OP might be a promotion, a sign
370 : : change, or a nop.
371 : :
372 : : E.g. say we have:
373 : :
374 : : signed short *ptr = ...;
375 : : signed short C = *ptr;
376 : : unsigned short B = (unsigned short) C; // sign change
377 : : signed int A = (signed int) B; // unsigned promotion
378 : : ...possible other uses of A...
379 : : unsigned int OP = (unsigned int) A; // sign change
380 : :
381 : : In this case it's possible to go directly from C to OP using:
382 : :
383 : : OP = (unsigned int) (unsigned short) C;
384 : : +------------+ +--------------+
385 : : promotion sign change
386 : :
387 : : so OP' would be C. The input to the promotion is B, so UNPROM
388 : : would describe B. */
389 : :
390 : : static tree
391 : 7757599 : vect_look_through_possible_promotion (vec_info *vinfo, tree op,
392 : : vect_unpromoted_value *unprom,
393 : : bool *single_use_p = NULL)
394 : : {
395 : 7757599 : tree op_type = TREE_TYPE (op);
396 : 7757599 : if (!INTEGRAL_TYPE_P (op_type))
397 : : return NULL_TREE;
398 : :
399 : 7726204 : tree res = NULL_TREE;
400 : 7726204 : unsigned int orig_precision = TYPE_PRECISION (op_type);
401 : 7726204 : unsigned int min_precision = orig_precision;
402 : 7726204 : stmt_vec_info caster = NULL;
403 : 9282353 : while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
404 : : {
405 : : /* See whether OP is simple enough to vectorize. */
406 : 9075208 : stmt_vec_info def_stmt_info;
407 : 9075208 : gimple *def_stmt;
408 : 9075208 : vect_def_type dt;
409 : 9075208 : if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
410 : : break;
411 : :
412 : : /* If OP is the input of a demotion, skip over it to see whether
413 : : OP is itself the result of a promotion. If so, the combined
414 : : effect of the promotion and the demotion might fit the required
415 : : pattern, otherwise neither operation fits.
416 : :
417 : : This copes with cases such as the result of an arithmetic
418 : : operation being truncated before being stored, and where that
419 : : arithmetic operation has been recognized as an over-widened one. */
420 : 9070365 : if (TYPE_PRECISION (op_type) <= min_precision)
421 : : {
422 : : /* Use OP as the UNPROM described above if we haven't yet
423 : : found a promotion, or if using the new input preserves the
424 : : sign of the previous promotion. */
425 : 8945750 : if (!res
426 : 1327670 : || TYPE_PRECISION (unprom->type) == orig_precision
427 : 36752 : || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)
428 : 8979875 : || (TYPE_UNSIGNED (op_type)
429 : 22534 : && TYPE_PRECISION (op_type) < TYPE_PRECISION (unprom->type)))
430 : : {
431 : 8912225 : unprom->set_op (op, dt, caster);
432 : 8912225 : min_precision = TYPE_PRECISION (op_type);
433 : : }
434 : : /* Stop if we've already seen a promotion and if this
435 : : conversion does more than change the sign. */
436 : 33525 : else if (TYPE_PRECISION (op_type)
437 : 33525 : != TYPE_PRECISION (unprom->type))
438 : : break;
439 : :
440 : : /* The sequence now extends to OP. */
441 : : res = op;
442 : : }
443 : :
444 : : /* See whether OP is defined by a cast. Record it as CASTER if
445 : : the cast is potentially vectorizable. */
446 : 9070324 : if (!def_stmt)
447 : : break;
448 : 8864075 : caster = def_stmt_info;
449 : :
450 : : /* Ignore pattern statements, since we don't link uses for them. */
451 : 8864075 : if (caster
452 : 8864075 : && single_use_p
453 : 1795268 : && !STMT_VINFO_RELATED_STMT (caster)
454 : 10521611 : && !has_single_use (res))
455 : 991796 : *single_use_p = false;
456 : :
457 : 16383134 : gassign *assign = dyn_cast <gassign *> (def_stmt);
458 : 5596484 : if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
459 : : break;
460 : :
461 : : /* Continue with the input to the cast. */
462 : 1556149 : op = gimple_assign_rhs1 (def_stmt);
463 : 1556149 : op_type = TREE_TYPE (op);
464 : : }
465 : : return res;
466 : : }
467 : :
468 : : /* OP is an integer operand to an operation that returns TYPE, and we
469 : : want to treat the operation as a widening one. So far we can treat
470 : : it as widening from *COMMON_TYPE.
471 : :
472 : : Return true if OP is suitable for such a widening operation,
473 : : either widening from *COMMON_TYPE or from some supertype of it.
474 : : Update *COMMON_TYPE to the supertype in the latter case.
475 : :
476 : : SHIFT_P is true if OP is a shift amount. */
477 : :
478 : : static bool
479 : 274323 : vect_joust_widened_integer (tree type, bool shift_p, tree op,
480 : : tree *common_type)
481 : : {
482 : : /* Calculate the minimum precision required by OP, without changing
483 : : the sign of either operand. */
484 : 274323 : unsigned int precision;
485 : 274323 : if (shift_p)
486 : : {
487 : 12982 : if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
488 : : return false;
489 : 10444 : precision = TREE_INT_CST_LOW (op);
490 : : }
491 : : else
492 : : {
493 : 261341 : precision = wi::min_precision (wi::to_widest (op),
494 : 261341 : TYPE_SIGN (*common_type));
495 : 261341 : if (precision * 2 > TYPE_PRECISION (type))
496 : : return false;
497 : : }
498 : :
499 : : /* If OP requires a wider type, switch to that type. The checks
500 : : above ensure that this is still narrower than the result. */
501 : 257730 : precision = vect_element_precision (precision);
502 : 257730 : if (TYPE_PRECISION (*common_type) < precision)
503 : 7103 : *common_type = build_nonstandard_integer_type
504 : 7103 : (precision, TYPE_UNSIGNED (*common_type));
505 : : return true;
506 : : }
507 : :
508 : : /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
509 : : is narrower than type, storing the supertype in *COMMON_TYPE if so. */
510 : :
511 : : static bool
512 : 40999 : vect_joust_widened_type (tree type, tree new_type, tree *common_type)
513 : : {
514 : 40999 : if (types_compatible_p (*common_type, new_type))
515 : : return true;
516 : :
517 : : /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
518 : 7307 : if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
519 : 7307 : && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
520 : : return true;
521 : :
522 : : /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
523 : 6698 : if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
524 : 6698 : && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
525 : : {
526 : 350 : *common_type = new_type;
527 : 350 : return true;
528 : : }
529 : :
530 : : /* We have mismatched signs, with the signed type being
531 : : no wider than the unsigned type. In this case we need
532 : : a wider signed type. */
533 : 6348 : unsigned int precision = MAX (TYPE_PRECISION (*common_type),
534 : : TYPE_PRECISION (new_type));
535 : 6348 : precision *= 2;
536 : :
537 : 6348 : if (precision * 2 > TYPE_PRECISION (type))
538 : : return false;
539 : :
540 : 37 : *common_type = build_nonstandard_integer_type (precision, false);
541 : 37 : return true;
542 : : }
543 : :
544 : : /* Check whether STMT_INFO can be viewed as a tree of integer operations
545 : : in which each node either performs CODE or WIDENED_CODE, and where
546 : : each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
547 : : specifies the maximum number of leaf operands. SHIFT_P says whether
548 : : CODE and WIDENED_CODE are some sort of shift.
549 : :
550 : : If STMT_INFO is such a tree, return the number of leaf operands
551 : : and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
552 : : to a type that (a) is narrower than the result of STMT_INFO and
553 : : (b) can hold all leaf operand values.
554 : :
555 : : If SUBTYPE then allow that the signs of the operands
556 : : may differ in signs but not in precision. SUBTYPE is updated to reflect
557 : : this.
558 : :
559 : : Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
560 : : exists. */
561 : :
562 : : static unsigned int
563 : 120443220 : vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
564 : : code_helper widened_code, bool shift_p,
565 : : unsigned int max_nops,
566 : : vect_unpromoted_value *unprom, tree *common_type,
567 : : enum optab_subtype *subtype = NULL)
568 : : {
569 : : /* Check for an integer operation with the right code. */
570 : 120443220 : gimple* stmt = stmt_info->stmt;
571 : 120443220 : if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
572 : : return 0;
573 : :
574 : 96391226 : code_helper rhs_code;
575 : 96391226 : if (is_gimple_assign (stmt))
576 : 82228825 : rhs_code = gimple_assign_rhs_code (stmt);
577 : 14162401 : else if (is_gimple_call (stmt))
578 : 14162401 : rhs_code = gimple_call_combined_fn (stmt);
579 : : else
580 : : return 0;
581 : :
582 : 96391226 : if (rhs_code != code
583 : 96391226 : && rhs_code != widened_code)
584 : : return 0;
585 : :
586 : 5865014 : tree lhs = gimple_get_lhs (stmt);
587 : 5865014 : tree type = TREE_TYPE (lhs);
588 : 5865014 : if (!INTEGRAL_TYPE_P (type))
589 : : return 0;
590 : :
591 : : /* Assume that both operands will be leaf operands. */
592 : 5322177 : max_nops -= 2;
593 : :
594 : : /* Check the operands. */
595 : 5322177 : unsigned int next_op = 0;
596 : 6006886 : for (unsigned int i = 0; i < 2; ++i)
597 : : {
598 : 5714126 : vect_unpromoted_value *this_unprom = &unprom[next_op];
599 : 5714126 : unsigned int nops = 1;
600 : 5714126 : tree op = gimple_arg (stmt, i);
601 : 5714126 : if (i == 1 && TREE_CODE (op) == INTEGER_CST)
602 : : {
603 : : /* We already have a common type from earlier operands.
604 : : Update it to account for OP. */
605 : 274323 : this_unprom->set_op (op, vect_constant_def);
606 : 274323 : if (!vect_joust_widened_integer (type, shift_p, op, common_type))
607 : : return 0;
608 : : }
609 : : else
610 : : {
611 : : /* Only allow shifts by constants. */
612 : 5439803 : if (shift_p && i == 1)
613 : : return 0;
614 : :
615 : 5435083 : if (rhs_code != code)
616 : : {
617 : : /* If rhs_code is widened_code, don't look through further
618 : : possible promotions, there is a promotion already embedded
619 : : in the WIDEN_*_EXPR. */
620 : 1515 : if (TREE_CODE (op) != SSA_NAME
621 : 1515 : || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
622 : 0 : return 0;
623 : :
624 : 1515 : stmt_vec_info def_stmt_info;
625 : 1515 : gimple *def_stmt;
626 : 1515 : vect_def_type dt;
627 : 1515 : if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
628 : : &def_stmt))
629 : : return 0;
630 : 1515 : this_unprom->set_op (op, dt, NULL);
631 : : }
632 : 5433568 : else if (!vect_look_through_possible_promotion (vinfo, op,
633 : : this_unprom))
634 : : return 0;
635 : :
636 : 5329069 : if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
637 : : {
638 : : /* The operand isn't widened. If STMT_INFO has the code
639 : : for an unwidened operation, recursively check whether
640 : : this operand is a node of the tree. */
641 : 4891856 : if (rhs_code != code
642 : 4891856 : || max_nops == 0
643 : 4892265 : || this_unprom->dt != vect_internal_def)
644 : : return 0;
645 : :
646 : : /* Give back the leaf slot allocated above now that we're
647 : : not treating this as a leaf operand. */
648 : 409 : max_nops += 1;
649 : :
650 : : /* Recursively process the definition of the operand. */
651 : 409 : stmt_vec_info def_stmt_info
652 : 409 : = vect_get_internal_def (vinfo, this_unprom->op);
653 : :
654 : 409 : nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
655 : : widened_code, shift_p, max_nops,
656 : : this_unprom, common_type,
657 : : subtype);
658 : 409 : if (nops == 0)
659 : : return 0;
660 : :
661 : 273 : max_nops -= nops;
662 : : }
663 : : else
664 : : {
665 : : /* Make sure that the operand is narrower than the result. */
666 : 437213 : if (TYPE_PRECISION (this_unprom->type) * 2
667 : 437213 : > TYPE_PRECISION (type))
668 : : return 0;
669 : :
670 : : /* Update COMMON_TYPE for the new operand. */
671 : 432813 : if (i == 0)
672 : 391814 : *common_type = this_unprom->type;
673 : 40999 : else if (!vect_joust_widened_type (type, this_unprom->type,
674 : : common_type))
675 : : {
676 : 6311 : if (subtype)
677 : : {
678 : : /* See if we can sign extend the smaller type. */
679 : 204 : if (TYPE_PRECISION (this_unprom->type)
680 : 204 : > TYPE_PRECISION (*common_type))
681 : 27 : *common_type = this_unprom->type;
682 : 204 : *subtype = optab_vector_mixed_sign;
683 : : }
684 : : else
685 : : return 0;
686 : : }
687 : : }
688 : : }
689 : 684709 : next_op += nops;
690 : : }
691 : : return next_op;
692 : : }
693 : :
694 : : /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
695 : : is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
696 : :
697 : : static tree
698 : 1848020 : vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
699 : : {
700 : 0 : return make_temp_ssa_name (type, stmt, "patt");
701 : : }
702 : :
703 : : /* STMT2_INFO describes a type conversion that could be split into STMT1
704 : : followed by a version of STMT2_INFO that takes NEW_RHS as its first
705 : : input. Try to do this using pattern statements, returning true on
706 : : success. */
707 : :
708 : : static bool
709 : 29995 : vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
710 : : gimple *stmt1, tree vectype)
711 : : {
712 : 29995 : if (is_pattern_stmt_p (stmt2_info))
713 : : {
714 : : /* STMT2_INFO is part of a pattern. Get the statement to which
715 : : the pattern is attached. */
716 : 431 : stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
717 : 431 : vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
718 : :
719 : 431 : if (dump_enabled_p ())
720 : 19 : dump_printf_loc (MSG_NOTE, vect_location,
721 : : "Splitting pattern statement: %G", stmt2_info->stmt);
722 : :
723 : : /* Since STMT2_INFO is a pattern statement, we can change it
724 : : in-situ without worrying about changing the code for the
725 : : containing block. */
726 : 431 : gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
727 : :
728 : 431 : if (dump_enabled_p ())
729 : : {
730 : 19 : dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
731 : 19 : dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
732 : : stmt2_info->stmt);
733 : : }
734 : :
735 : 431 : gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
736 : 431 : if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
737 : : /* STMT2_INFO is the actual pattern statement. Add STMT1
738 : : to the end of the definition sequence. */
739 : 428 : gimple_seq_add_stmt_without_update (def_seq, stmt1);
740 : : else
741 : : {
742 : : /* STMT2_INFO belongs to the definition sequence. Insert STMT1
743 : : before it. */
744 : 3 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
745 : 3 : gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
746 : : }
747 : 431 : return true;
748 : : }
749 : : else
750 : : {
751 : : /* STMT2_INFO doesn't yet have a pattern. Try to create a
752 : : two-statement pattern now. */
753 : 29564 : gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
754 : 29564 : tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
755 : 29564 : tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
756 : 29564 : if (!lhs_vectype)
757 : : return false;
758 : :
759 : 29564 : if (dump_enabled_p ())
760 : 1887 : dump_printf_loc (MSG_NOTE, vect_location,
761 : : "Splitting statement: %G", stmt2_info->stmt);
762 : :
763 : : /* Add STMT1 as a singleton pattern definition sequence. */
764 : 29564 : gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
765 : 29564 : vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
766 : 29564 : gimple_seq_add_stmt_without_update (def_seq, stmt1);
767 : :
768 : : /* Build the second of the two pattern statements. */
769 : 29564 : tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
770 : 29564 : gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
771 : 29564 : vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
772 : :
773 : 29564 : if (dump_enabled_p ())
774 : : {
775 : 1887 : dump_printf_loc (MSG_NOTE, vect_location,
776 : : "into pattern statements: %G", stmt1);
777 : 1887 : dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
778 : : (gimple *) new_stmt2);
779 : : }
780 : :
781 : 29564 : return true;
782 : : }
783 : : }
784 : :
785 : : /* Look for the following pattern
786 : : X = x[i]
787 : : Y = y[i]
788 : : DIFF = X - Y
789 : : DAD = ABS_EXPR<DIFF>
790 : :
791 : : ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
792 : : HALF_TYPE and UNPROM will be set should the statement be found to
793 : : be a widened operation.
794 : : DIFF_STMT will be set to the MINUS_EXPR
795 : : statement that precedes the ABS_STMT if it is a MINUS_EXPR..
796 : : */
797 : : static bool
798 : 20227330 : vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
799 : : tree *half_type,
800 : : vect_unpromoted_value unprom[2],
801 : : gassign **diff_stmt)
802 : : {
803 : 20227330 : if (!abs_stmt)
804 : : return false;
805 : :
806 : : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
807 : : inside the loop (in case we are analyzing an outer-loop). */
808 : 20227330 : enum tree_code code = gimple_assign_rhs_code (abs_stmt);
809 : 20227330 : if (code != ABS_EXPR && code != ABSU_EXPR)
810 : : return false;
811 : :
812 : 22999 : tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
813 : 22999 : tree abs_type = TREE_TYPE (abs_oprnd);
814 : 22999 : if (!abs_oprnd)
815 : : return false;
816 : 17831 : if (!ANY_INTEGRAL_TYPE_P (abs_type)
817 : 5464 : || TYPE_OVERFLOW_WRAPS (abs_type)
818 : 28327 : || TYPE_UNSIGNED (abs_type))
819 : : return false;
820 : :
821 : : /* Peel off conversions from the ABS input. This can involve sign
822 : : changes (e.g. from an unsigned subtraction to a signed ABS input)
823 : : or signed promotion, but it can't include unsigned promotion.
824 : : (Note that ABS of an unsigned promotion should have been folded
825 : : away before now anyway.) */
826 : 5328 : vect_unpromoted_value unprom_diff;
827 : 5328 : abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
828 : : &unprom_diff);
829 : 5328 : if (!abs_oprnd)
830 : : return false;
831 : 5032 : if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
832 : 5032 : && TYPE_UNSIGNED (unprom_diff.type))
833 : : return false;
834 : :
835 : : /* We then detect if the operand of abs_expr is defined by a minus_expr. */
836 : 5032 : stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
837 : 5032 : if (!diff_stmt_vinfo)
838 : : return false;
839 : :
840 : 4871 : gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
841 : 4871 : if (diff_stmt && diff
842 : 3782 : && gimple_assign_rhs_code (diff) == MINUS_EXPR
843 : 6645 : && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
844 : 330 : *diff_stmt = diff;
845 : :
846 : : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
847 : : inside the loop (in case we are analyzing an outer-loop). */
848 : 4871 : if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
849 : 4871 : MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
850 : : false, 2, unprom, half_type))
851 : : return true;
852 : :
853 : : return false;
854 : : }
855 : :
856 : : /* Convert UNPROM to TYPE and return the result, adding new statements
857 : : to STMT_INFO's pattern definition statements if no better way is
858 : : available. VECTYPE is the vector form of TYPE.
859 : :
860 : : If SUBTYPE then convert the type based on the subtype. */
861 : :
862 : : static tree
863 : 436908 : vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
864 : : vect_unpromoted_value *unprom, tree vectype,
865 : : enum optab_subtype subtype = optab_default)
866 : : {
867 : : /* Update the type if the signs differ. */
868 : 436908 : if (subtype == optab_vector_mixed_sign)
869 : : {
870 : 194 : gcc_assert (!TYPE_UNSIGNED (type));
871 : 194 : if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
872 : : {
873 : 97 : type = unsigned_type_for (type);
874 : 97 : vectype = unsigned_type_for (vectype);
875 : : }
876 : : }
877 : :
878 : : /* Check for a no-op conversion. */
879 : 436908 : if (types_compatible_p (type, TREE_TYPE (unprom->op)))
880 : 144632 : return unprom->op;
881 : :
882 : : /* Allow the caller to create constant vect_unpromoted_values. */
883 : 292276 : if (TREE_CODE (unprom->op) == INTEGER_CST)
884 : 177469 : return wide_int_to_tree (type, wi::to_widest (unprom->op));
885 : :
886 : 114807 : tree input = unprom->op;
887 : 114807 : if (unprom->caster)
888 : : {
889 : 61401 : tree lhs = gimple_get_lhs (unprom->caster->stmt);
890 : 61401 : tree lhs_type = TREE_TYPE (lhs);
891 : :
892 : : /* If the result of the existing cast is the right width, use it
893 : : instead of the source of the cast. */
894 : 61401 : if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
895 : : input = lhs;
896 : : /* If the precision we want is between the source and result
897 : : precisions of the existing cast, try splitting the cast into
898 : : two and tapping into a mid-way point. */
899 : 59372 : else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
900 : 59372 : && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
901 : : {
902 : : /* In order to preserve the semantics of the original cast,
903 : : give the mid-way point the same signedness as the input value.
904 : :
905 : : It would be possible to use a signed type here instead if
906 : : TYPE is signed and UNPROM->TYPE is unsigned, but that would
907 : : make the sign of the midtype sensitive to the order in
908 : : which we process the statements, since the signedness of
909 : : TYPE is the signedness required by just one of possibly
910 : : many users. Also, unsigned promotions are usually as cheap
911 : : as or cheaper than signed ones, so it's better to keep an
912 : : unsigned promotion. */
913 : 29995 : tree midtype = build_nonstandard_integer_type
914 : 29995 : (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
915 : 29995 : tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
916 : 29995 : if (vec_midtype)
917 : : {
918 : 29995 : input = vect_recog_temp_ssa_var (midtype, NULL);
919 : 29995 : gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
920 : : unprom->op);
921 : 29995 : if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
922 : : vec_midtype))
923 : 0 : append_pattern_def_seq (vinfo, stmt_info,
924 : : new_stmt, vec_midtype);
925 : : }
926 : : }
927 : :
928 : : /* See if we can reuse an existing result. */
929 : 61401 : if (types_compatible_p (type, TREE_TYPE (input)))
930 : : return input;
931 : : }
932 : :
933 : : /* We need a new conversion statement. */
934 : 93281 : tree new_op = vect_recog_temp_ssa_var (type, NULL);
935 : 93281 : gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
936 : :
937 : : /* If OP is an external value, see if we can insert the new statement
938 : : on an incoming edge. */
939 : 93281 : if (input == unprom->op && unprom->dt == vect_external_def)
940 : 9409 : if (edge e = vect_get_external_def_edge (vinfo, input))
941 : : {
942 : 705 : basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
943 : 705 : gcc_assert (!new_bb);
944 : : return new_op;
945 : : }
946 : :
947 : : /* As a (common) last resort, add the statement to the pattern itself. */
948 : 92576 : append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
949 : 92576 : return new_op;
950 : : }
951 : :
952 : : /* Invoke vect_convert_input for N elements of UNPROM and store the
953 : : result in the corresponding elements of RESULT.
954 : :
955 : : If SUBTYPE then convert the type based on the subtype. */
956 : :
957 : : static void
958 : 222077 : vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
959 : : tree *result, tree type, vect_unpromoted_value *unprom,
960 : : tree vectype, enum optab_subtype subtype = optab_default)
961 : : {
962 : 658846 : for (unsigned int i = 0; i < n; ++i)
963 : : {
964 : : unsigned int j;
965 : 651141 : for (j = 0; j < i; ++j)
966 : 214692 : if (unprom[j].op == unprom[i].op)
967 : : break;
968 : :
969 : 436769 : if (j < i)
970 : 320 : result[i] = result[j];
971 : : else
972 : 436449 : result[i] = vect_convert_input (vinfo, stmt_info,
973 : 436449 : type, &unprom[i], vectype, subtype);
974 : : }
975 : 222077 : }
976 : :
977 : : /* The caller has created a (possibly empty) sequence of pattern definition
978 : : statements followed by a single statement PATTERN_STMT. Cast the result
979 : : of this final statement to TYPE. If a new statement is needed, add
980 : : PATTERN_STMT to the end of STMT_INFO's pattern definition statements
981 : : and return the new statement, otherwise return PATTERN_STMT as-is.
982 : : VECITYPE is the vector form of PATTERN_STMT's result type. */
983 : :
984 : : static gimple *
985 : 246669 : vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
986 : : gimple *pattern_stmt, tree vecitype)
987 : : {
988 : 246669 : tree lhs = gimple_get_lhs (pattern_stmt);
989 : 246669 : if (!types_compatible_p (type, TREE_TYPE (lhs)))
990 : : {
991 : 221403 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
992 : 221403 : tree cast_var = vect_recog_temp_ssa_var (type, NULL);
993 : 221403 : pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
994 : : }
995 : 246669 : return pattern_stmt;
996 : : }
997 : :
998 : : /* Return true if STMT_VINFO describes a reduction for which reassociation
999 : : is allowed. If STMT_INFO is part of a group, assume that it's part of
1000 : : a reduction chain and optimistically assume that all statements
1001 : : except the last allow reassociation.
1002 : : Also require it to have code CODE and to be a reduction
1003 : : in the outermost loop. When returning true, store the operands in
1004 : : *OP0_OUT and *OP1_OUT. */
1005 : :
1006 : : static bool
1007 : 89848134 : vect_reassociating_reduction_p (vec_info *vinfo,
1008 : : stmt_vec_info stmt_info, tree_code code,
1009 : : tree *op0_out, tree *op1_out)
1010 : : {
1011 : 89848134 : loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1012 : 9874623 : if (!loop_info)
1013 : : return false;
1014 : :
1015 : 9874623 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1016 : 10700061 : if (!assign || gimple_assign_rhs_code (assign) != code)
1017 : : return false;
1018 : :
1019 : : /* We don't allow changing the order of the computation in the inner-loop
1020 : : when doing outer-loop vectorization. */
1021 : 1996530 : class loop *loop = LOOP_VINFO_LOOP (loop_info);
1022 : 91739946 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
1023 : : return false;
1024 : :
1025 : 1946574 : if (!vect_is_reduction (stmt_info))
1026 : : return false;
1027 : :
1028 : 114876 : if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1029 : 114876 : code))
1030 : : return false;
1031 : :
1032 : 104718 : *op0_out = gimple_assign_rhs1 (assign);
1033 : 104718 : *op1_out = gimple_assign_rhs2 (assign);
1034 : 104718 : if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1035 : 41176 : std::swap (*op0_out, *op1_out);
1036 : : return true;
1037 : : }
1038 : :
1039 : : /* match.pd function to match
1040 : : (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1041 : : with conditions:
1042 : : 1) @1, @2, c, d, a, b are all integral type.
1043 : : 2) There's single_use for both @1 and @2.
1044 : : 3) a, c have same precision.
1045 : : 4) c and @1 have different precision.
1046 : : 5) c, d are the same type or they can differ in sign when convert is
1047 : : truncation.
1048 : :
1049 : : record a and c and d and @3. */
1050 : :
1051 : : extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1052 : :
1053 : : /* Function vect_recog_cond_expr_convert
1054 : :
1055 : : Try to find the following pattern:
1056 : :
1057 : : TYPE_AB A,B;
1058 : : TYPE_CD C,D;
1059 : : TYPE_E E;
1060 : : TYPE_E op_true = (TYPE_E) A;
1061 : : TYPE_E op_false = (TYPE_E) B;
1062 : :
1063 : : E = C cmp D ? op_true : op_false;
1064 : :
1065 : : where
1066 : : TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1067 : : TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1068 : : single_use of op_true and op_false.
1069 : : TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1070 : :
1071 : : Input:
1072 : :
1073 : : * STMT_VINFO: The stmt from which the pattern search begins.
1074 : : here it starts with E = c cmp D ? op_true : op_false;
1075 : :
1076 : : Output:
1077 : :
1078 : : TYPE1 E' = C cmp D ? A : B;
1079 : : TYPE3 E = (TYPE3) E';
1080 : :
1081 : : There may extra nop_convert for A or B to handle different signness.
1082 : :
1083 : : * TYPE_OUT: The vector type of the output of this pattern.
1084 : :
1085 : : * Return value: A new stmt that will be used to replace the sequence of
1086 : : stmts that constitute the pattern. In this case it will be:
1087 : : E = (TYPE3)E';
1088 : : E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1089 : :
1090 : : static gimple *
1091 : 30018620 : vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1092 : : stmt_vec_info stmt_vinfo, tree *type_out)
1093 : : {
1094 : 30018620 : gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1095 : 20322890 : tree lhs, match[4], temp, type, new_lhs, op2, op1;
1096 : 20322890 : gimple *cond_stmt;
1097 : 20322890 : gimple *pattern_stmt;
1098 : 30018589 : enum tree_code code = NOP_EXPR;
1099 : :
1100 : 20322890 : if (!last_stmt)
1101 : : return NULL;
1102 : :
1103 : 20322890 : lhs = gimple_assign_lhs (last_stmt);
1104 : :
1105 : : /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1106 : : TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1107 : 20322890 : if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1108 : : return NULL;
1109 : :
1110 : 31 : if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
1111 : 8 : code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : CONVERT_EXPR;
1112 : 23 : else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1])))
1113 : 0 : code = FIX_TRUNC_EXPR;
1114 : :
1115 : 31 : op1 = match[1];
1116 : 31 : op2 = match[2];
1117 : 31 : type = TREE_TYPE (op1);
1118 : : /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from
1119 : : SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p.
1120 : : Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR
1121 : : or CONVERT_EXPR. */
1122 : 31 : if (TREE_CODE (op1) == REAL_CST)
1123 : : {
1124 : 8 : op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1);
1125 : 8 : type = TREE_TYPE (op2);
1126 : 8 : if (op1 == NULL_TREE)
1127 : : return NULL;
1128 : : }
1129 : 23 : else if (TREE_CODE (op2) == REAL_CST)
1130 : : {
1131 : 0 : op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2);
1132 : 0 : if (op2 == NULL_TREE)
1133 : : return NULL;
1134 : : }
1135 : 23 : else if (code == NOP_EXPR)
1136 : : {
1137 : 23 : if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1138 : : {
1139 : 23 : op2 = vect_recog_temp_ssa_var (type, NULL);
1140 : 23 : gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1141 : 23 : append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt);
1142 : : }
1143 : : }
1144 : :
1145 : 31 : vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1146 : :
1147 : 31 : temp = vect_recog_temp_ssa_var (type, NULL);
1148 : 31 : cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1149 : : op1, op2));
1150 : 31 : append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt);
1151 : 31 : new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1152 : 31 : pattern_stmt = gimple_build_assign (new_lhs, code, temp);
1153 : 31 : *type_out = NULL_TREE;
1154 : :
1155 : 31 : if (dump_enabled_p ())
1156 : 8 : dump_printf_loc (MSG_NOTE, vect_location,
1157 : : "created pattern stmt: %G", pattern_stmt);
1158 : : return pattern_stmt;
1159 : : }
1160 : :
1161 : : /* Function vect_recog_dot_prod_pattern
1162 : :
1163 : : Try to find the following pattern:
1164 : :
1165 : : type1a x_t
1166 : : type1b y_t;
1167 : : TYPE1 prod;
1168 : : TYPE2 sum = init;
1169 : : loop:
1170 : : sum_0 = phi <init, sum_1>
1171 : : S1 x_t = ...
1172 : : S2 y_t = ...
1173 : : S3 x_T = (TYPE1) x_t;
1174 : : S4 y_T = (TYPE1) y_t;
1175 : : S5 prod = x_T * y_T;
1176 : : [S6 prod = (TYPE2) prod; #optional]
1177 : : S7 sum_1 = prod + sum_0;
1178 : :
1179 : : where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1180 : : the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1181 : : 'type1a' and 'type1b' can differ.
1182 : :
1183 : : Input:
1184 : :
1185 : : * STMT_VINFO: The stmt from which the pattern search begins. In the
1186 : : example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1187 : : will be detected.
1188 : :
1189 : : Output:
1190 : :
1191 : : * TYPE_OUT: The type of the output of this pattern.
1192 : :
1193 : : * Return value: A new stmt that will be used to replace the sequence of
1194 : : stmts that constitute the pattern. In this case it will be:
1195 : : WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1196 : :
1197 : : Note: The dot-prod idiom is a widening reduction pattern that is
1198 : : vectorized without preserving all the intermediate results. It
1199 : : produces only N/2 (widened) results (by summing up pairs of
1200 : : intermediate results) rather than all N results. Therefore, we
1201 : : cannot allow this pattern when we want to get all the results and in
1202 : : the correct order (as is the case when this computation is in an
1203 : : inner-loop nested in an outer-loop that us being vectorized). */
1204 : :
1205 : : static gimple *
1206 : 29949776 : vect_recog_dot_prod_pattern (vec_info *vinfo,
1207 : : stmt_vec_info stmt_vinfo, tree *type_out)
1208 : : {
1209 : 29949776 : tree oprnd0, oprnd1;
1210 : 29949776 : gimple *last_stmt = stmt_vinfo->stmt;
1211 : 29949776 : tree type, half_type;
1212 : 29949776 : gimple *pattern_stmt;
1213 : 29949776 : tree var;
1214 : :
1215 : : /* Look for the following pattern
1216 : : DX = (TYPE1) X;
1217 : : DY = (TYPE1) Y;
1218 : : DPROD = DX * DY;
1219 : : DDPROD = (TYPE2) DPROD;
1220 : : sum_1 = DDPROD + sum_0;
1221 : : In which
1222 : : - DX is double the size of X
1223 : : - DY is double the size of Y
1224 : : - DX, DY, DPROD all have the same type but the sign
1225 : : between X, Y and DPROD can differ.
1226 : : - sum is the same size of DPROD or bigger
1227 : : - sum has been recognized as a reduction variable.
1228 : :
1229 : : This is equivalent to:
1230 : : DPROD = X w* Y; #widen mult
1231 : : sum_1 = DPROD w+ sum_0; #widen summation
1232 : : or
1233 : : DPROD = X w* Y; #widen mult
1234 : : sum_1 = DPROD + sum_0; #summation
1235 : : */
1236 : :
1237 : : /* Starting from LAST_STMT, follow the defs of its uses in search
1238 : : of the above pattern. */
1239 : :
1240 : 29949776 : if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1241 : : &oprnd0, &oprnd1))
1242 : : return NULL;
1243 : :
1244 : 35308 : type = TREE_TYPE (gimple_get_lhs (last_stmt));
1245 : :
1246 : 35308 : vect_unpromoted_value unprom_mult;
1247 : 35308 : oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1248 : :
1249 : : /* So far so good. Since last_stmt was detected as a (summation) reduction,
1250 : : we know that oprnd1 is the reduction variable (defined by a loop-header
1251 : : phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1252 : : Left to check that oprnd0 is defined by a (widen_)mult_expr */
1253 : 35308 : if (!oprnd0)
1254 : : return NULL;
1255 : :
1256 : 24749 : stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1257 : 24749 : if (!mult_vinfo)
1258 : : return NULL;
1259 : :
1260 : : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1261 : : inside the loop (in case we are analyzing an outer-loop). */
1262 : 72603 : vect_unpromoted_value unprom0[2];
1263 : 24201 : enum optab_subtype subtype = optab_vector;
1264 : 24201 : if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1265 : : false, 2, unprom0, &half_type, &subtype))
1266 : : return NULL;
1267 : :
1268 : : /* If there are two widening operations, make sure they agree on the sign
1269 : : of the extension. The result of an optab_vector_mixed_sign operation
1270 : : is signed; otherwise, the result has the same sign as the operands. */
1271 : 1000 : if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1272 : 1553 : && (subtype == optab_vector_mixed_sign
1273 : 553 : ? TYPE_UNSIGNED (unprom_mult.type)
1274 : 387 : : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1275 : : return NULL;
1276 : :
1277 : 919 : vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1278 : :
1279 : : /* If the inputs have mixed signs, canonicalize on using the signed
1280 : : input type for analysis. This also helps when emulating mixed-sign
1281 : : operations using signed operations. */
1282 : 919 : if (subtype == optab_vector_mixed_sign)
1283 : 159 : half_type = signed_type_for (half_type);
1284 : :
1285 : 919 : tree half_vectype;
1286 : 919 : if (!vect_supportable_conv_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1287 : : type_out, &half_vectype, subtype))
1288 : : {
1289 : : /* We can emulate a mixed-sign dot-product using a sequence of
1290 : : signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1291 : 460 : if (subtype != optab_vector_mixed_sign
1292 : 460 : || !vect_supportable_conv_optab_p (vinfo, signed_type_for (type),
1293 : : DOT_PROD_EXPR, half_type,
1294 : : type_out, &half_vectype,
1295 : : optab_vector))
1296 : 449 : return NULL;
1297 : :
1298 : 11 : *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1299 : : *type_out);
1300 : : }
1301 : :
1302 : : /* Get the inputs in the appropriate types. */
1303 : 470 : tree mult_oprnd[2];
1304 : 470 : vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1305 : : unprom0, half_vectype, subtype);
1306 : :
1307 : 470 : var = vect_recog_temp_ssa_var (type, NULL);
1308 : 470 : pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1309 : : mult_oprnd[0], mult_oprnd[1], oprnd1);
1310 : :
1311 : 470 : return pattern_stmt;
1312 : : }
1313 : :
1314 : :
1315 : : /* Function vect_recog_sad_pattern
1316 : :
1317 : : Try to find the following Sum of Absolute Difference (SAD) pattern:
1318 : :
1319 : : type x_t, y_t;
1320 : : signed TYPE1 diff, abs_diff;
1321 : : TYPE2 sum = init;
1322 : : loop:
1323 : : sum_0 = phi <init, sum_1>
1324 : : S1 x_t = ...
1325 : : S2 y_t = ...
1326 : : S3 x_T = (TYPE1) x_t;
1327 : : S4 y_T = (TYPE1) y_t;
1328 : : S5 diff = x_T - y_T;
1329 : : S6 abs_diff = ABS_EXPR <diff>;
1330 : : [S7 abs_diff = (TYPE2) abs_diff; #optional]
1331 : : S8 sum_1 = abs_diff + sum_0;
1332 : :
1333 : : where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1334 : : same size of 'TYPE1' or bigger. This is a special case of a reduction
1335 : : computation.
1336 : :
1337 : : Input:
1338 : :
1339 : : * STMT_VINFO: The stmt from which the pattern search begins. In the
1340 : : example, when this function is called with S8, the pattern
1341 : : {S3,S4,S5,S6,S7,S8} will be detected.
1342 : :
1343 : : Output:
1344 : :
1345 : : * TYPE_OUT: The type of the output of this pattern.
1346 : :
1347 : : * Return value: A new stmt that will be used to replace the sequence of
1348 : : stmts that constitute the pattern. In this case it will be:
1349 : : SAD_EXPR <x_t, y_t, sum_0>
1350 : : */
1351 : :
1352 : : static gimple *
1353 : 29949312 : vect_recog_sad_pattern (vec_info *vinfo,
1354 : : stmt_vec_info stmt_vinfo, tree *type_out)
1355 : : {
1356 : 29949312 : gimple *last_stmt = stmt_vinfo->stmt;
1357 : 29949312 : tree half_type;
1358 : :
1359 : : /* Look for the following pattern
1360 : : DX = (TYPE1) X;
1361 : : DY = (TYPE1) Y;
1362 : : DDIFF = DX - DY;
1363 : : DAD = ABS_EXPR <DDIFF>;
1364 : : DDPROD = (TYPE2) DPROD;
1365 : : sum_1 = DAD + sum_0;
1366 : : In which
1367 : : - DX is at least double the size of X
1368 : : - DY is at least double the size of Y
1369 : : - DX, DY, DDIFF, DAD all have the same type
1370 : : - sum is the same size of DAD or bigger
1371 : : - sum has been recognized as a reduction variable.
1372 : :
1373 : : This is equivalent to:
1374 : : DDIFF = X w- Y; #widen sub
1375 : : DAD = ABS_EXPR <DDIFF>;
1376 : : sum_1 = DAD w+ sum_0; #widen summation
1377 : : or
1378 : : DDIFF = X w- Y; #widen sub
1379 : : DAD = ABS_EXPR <DDIFF>;
1380 : : sum_1 = DAD + sum_0; #summation
1381 : : */
1382 : :
1383 : : /* Starting from LAST_STMT, follow the defs of its uses in search
1384 : : of the above pattern. */
1385 : :
1386 : 29949312 : tree plus_oprnd0, plus_oprnd1;
1387 : 29949312 : if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1388 : : &plus_oprnd0, &plus_oprnd1))
1389 : : return NULL;
1390 : :
1391 : 34838 : tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1392 : :
1393 : : /* Any non-truncating sequence of conversions is OK here, since
1394 : : with a successful match, the result of the ABS(U) is known to fit
1395 : : within the nonnegative range of the result type. (It cannot be the
1396 : : negative of the minimum signed value due to the range of the widening
1397 : : MINUS_EXPR.) */
1398 : 34838 : vect_unpromoted_value unprom_abs;
1399 : 34838 : plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1400 : : &unprom_abs);
1401 : :
1402 : : /* So far so good. Since last_stmt was detected as a (summation) reduction,
1403 : : we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1404 : : phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1405 : : Then check that plus_oprnd0 is defined by an abs_expr. */
1406 : :
1407 : 34838 : if (!plus_oprnd0)
1408 : : return NULL;
1409 : :
1410 : 24279 : stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1411 : 24279 : if (!abs_stmt_vinfo)
1412 : : return NULL;
1413 : :
1414 : : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1415 : : inside the loop (in case we are analyzing an outer-loop). */
1416 : 23731 : gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1417 : 71193 : vect_unpromoted_value unprom[2];
1418 : :
1419 : 23731 : if (!abs_stmt)
1420 : : {
1421 : 29949316 : gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1422 : 270 : if (!abd_stmt
1423 : 270 : || !gimple_call_internal_p (abd_stmt)
1424 : 0 : || gimple_call_num_args (abd_stmt) != 2)
1425 : : return NULL;
1426 : :
1427 : 0 : tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1428 : 0 : tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1429 : :
1430 : 0 : if (gimple_call_internal_fn (abd_stmt) == IFN_ABD
1431 : 0 : || gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1432 : : {
1433 : 0 : unprom[0].op = abd_oprnd0;
1434 : 0 : unprom[0].type = TREE_TYPE (abd_oprnd0);
1435 : 0 : unprom[1].op = abd_oprnd1;
1436 : 0 : unprom[1].type = TREE_TYPE (abd_oprnd1);
1437 : : }
1438 : : else
1439 : : return NULL;
1440 : :
1441 : 0 : half_type = unprom[0].type;
1442 : : }
1443 : 23410 : else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1444 : : unprom, NULL))
1445 : : return NULL;
1446 : :
1447 : 426 : vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1448 : :
1449 : 426 : tree half_vectype;
1450 : 426 : if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1451 : : type_out, &half_vectype))
1452 : : return NULL;
1453 : :
1454 : : /* Get the inputs to the SAD_EXPR in the appropriate types. */
1455 : 266 : tree sad_oprnd[2];
1456 : 266 : vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1457 : : unprom, half_vectype);
1458 : :
1459 : 266 : tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1460 : 266 : gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1461 : : sad_oprnd[1], plus_oprnd1);
1462 : :
1463 : 266 : return pattern_stmt;
1464 : : }
1465 : :
1466 : : /* Function vect_recog_abd_pattern
1467 : :
1468 : : Try to find the following ABsolute Difference (ABD) or
1469 : : widening ABD (WIDEN_ABD) pattern:
1470 : :
1471 : : TYPE1 x;
1472 : : TYPE2 y;
1473 : : TYPE3 x_cast = (TYPE3) x; // widening or no-op
1474 : : TYPE3 y_cast = (TYPE3) y; // widening or no-op
1475 : : TYPE3 diff = x_cast - y_cast;
1476 : : TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1477 : : TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1478 : :
1479 : : WIDEN_ABD exists to optimize the case where TYPE4 is at least
1480 : : twice as wide as TYPE3.
1481 : :
1482 : : Input:
1483 : :
1484 : : * STMT_VINFO: The stmt from which the pattern search begins
1485 : :
1486 : : Output:
1487 : :
1488 : : * TYPE_OUT: The type of the output of this pattern
1489 : :
1490 : : * Return value: A new stmt that will be used to replace the sequence of
1491 : : stmts that constitute the pattern, principally:
1492 : : out = IFN_ABD (x, y)
1493 : : out = IFN_WIDEN_ABD (x, y)
1494 : : */
1495 : :
1496 : : static gimple *
1497 : 29899527 : vect_recog_abd_pattern (vec_info *vinfo,
1498 : : stmt_vec_info stmt_vinfo, tree *type_out)
1499 : : {
1500 : 50103447 : gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1501 : 20203920 : if (!last_stmt)
1502 : : return NULL;
1503 : :
1504 : 20203920 : tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1505 : :
1506 : 60611760 : vect_unpromoted_value unprom[2];
1507 : 20203920 : gassign *diff_stmt = NULL;
1508 : 20203920 : tree abd_in_type;
1509 : 20203920 : if (!vect_recog_absolute_difference (vinfo, last_stmt, &abd_in_type,
1510 : : unprom, &diff_stmt))
1511 : : {
1512 : : /* We cannot try further without having a non-widening MINUS. */
1513 : 20202692 : if (!diff_stmt)
1514 : : return NULL;
1515 : :
1516 : 330 : unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1517 : 330 : unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1518 : 330 : abd_in_type = signed_type_for (out_type);
1519 : : }
1520 : :
1521 : 1558 : tree abd_out_type = abd_in_type;
1522 : :
1523 : 1558 : tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1524 : 1558 : if (!vectype_in)
1525 : : return NULL;
1526 : :
1527 : 1525 : internal_fn ifn = IFN_ABD;
1528 : 1525 : tree vectype_out = vectype_in;
1529 : :
1530 : 1525 : if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1531 : 1525 : && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1532 : : {
1533 : 1123 : tree mid_type
1534 : 1123 : = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1535 : 1123 : TYPE_UNSIGNED (abd_in_type));
1536 : 1123 : tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1537 : :
1538 : 1123 : code_helper dummy_code;
1539 : 1123 : int dummy_int;
1540 : 1123 : auto_vec<tree> dummy_vec;
1541 : 1123 : if (mid_vectype
1542 : 1123 : && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1543 : : stmt_vinfo, mid_vectype,
1544 : : vectype_in,
1545 : : &dummy_code, &dummy_code,
1546 : : &dummy_int, &dummy_vec))
1547 : : {
1548 : 0 : ifn = IFN_VEC_WIDEN_ABD;
1549 : 0 : abd_out_type = mid_type;
1550 : 0 : vectype_out = mid_vectype;
1551 : : }
1552 : 1123 : }
1553 : :
1554 : 1123 : if (ifn == IFN_ABD
1555 : 1525 : && !direct_internal_fn_supported_p (ifn, vectype_in,
1556 : : OPTIMIZE_FOR_SPEED))
1557 : : return NULL;
1558 : :
1559 : 0 : vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1560 : :
1561 : 0 : tree abd_oprnds[2];
1562 : 0 : vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1563 : : abd_in_type, unprom, vectype_in);
1564 : :
1565 : 0 : *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1566 : :
1567 : 0 : tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1568 : 0 : gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1569 : : abd_oprnds[0], abd_oprnds[1]);
1570 : 0 : gimple_call_set_lhs (abd_stmt, abd_result);
1571 : 0 : gimple_set_location (abd_stmt, gimple_location (last_stmt));
1572 : :
1573 : 0 : gimple *stmt = abd_stmt;
1574 : 0 : if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1575 : 0 : && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1576 : 0 : && !TYPE_UNSIGNED (abd_out_type))
1577 : : {
1578 : 0 : tree unsign = unsigned_type_for (abd_out_type);
1579 : 0 : stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
1580 : 0 : vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
1581 : : }
1582 : :
1583 : 0 : return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1584 : : }
1585 : :
1586 : : /* Recognize an operation that performs ORIG_CODE on widened inputs,
1587 : : so that it can be treated as though it had the form:
1588 : :
1589 : : A_TYPE a;
1590 : : B_TYPE b;
1591 : : HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1592 : : HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1593 : : | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1594 : : | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1595 : : | RES_TYPE res = a_extend ORIG_CODE b_extend;
1596 : :
1597 : : Try to replace the pattern with:
1598 : :
1599 : : A_TYPE a;
1600 : : B_TYPE b;
1601 : : HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1602 : : HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1603 : : | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1604 : : | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1605 : :
1606 : : where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1607 : :
1608 : : SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1609 : : name of the pattern being matched, for dump purposes. */
1610 : :
1611 : : static gimple *
1612 : 120389560 : vect_recog_widen_op_pattern (vec_info *vinfo,
1613 : : stmt_vec_info last_stmt_info, tree *type_out,
1614 : : tree_code orig_code, code_helper wide_code,
1615 : : bool shift_p, const char *name)
1616 : : {
1617 : 120389560 : gimple *last_stmt = last_stmt_info->stmt;
1618 : :
1619 : 361168680 : vect_unpromoted_value unprom[2];
1620 : 120389560 : tree half_type;
1621 : 120389560 : if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1622 : : shift_p, 2, unprom, &half_type))
1623 : :
1624 : : return NULL;
1625 : :
1626 : : /* Pattern detected. */
1627 : 287793 : vect_pattern_detected (name, last_stmt);
1628 : :
1629 : 287793 : tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1630 : 287793 : tree itype = type;
1631 : 287793 : if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1632 : 287793 : || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1633 : 200552 : itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1634 : 200552 : TYPE_UNSIGNED (half_type));
1635 : :
1636 : : /* Check target support */
1637 : 287793 : tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1638 : 287793 : tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1639 : 287793 : tree ctype = itype;
1640 : 287793 : tree vecctype = vecitype;
1641 : 287793 : if (orig_code == MINUS_EXPR
1642 : 8272 : && TYPE_UNSIGNED (itype)
1643 : 291749 : && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1644 : : {
1645 : : /* Subtraction is special, even if half_type is unsigned and no matter
1646 : : whether type is signed or unsigned, if type is wider than itype,
1647 : : we need to sign-extend from the widening operation result to the
1648 : : result type.
1649 : : Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1650 : : itype unsigned short and type either int or unsigned int.
1651 : : Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1652 : : (unsigned short) 0xffff, but for type int we want the result -1
1653 : : and for type unsigned int 0xffffffff rather than 0xffff. */
1654 : 588 : ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1655 : 588 : vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1656 : : }
1657 : :
1658 : 287793 : code_helper dummy_code;
1659 : 287793 : int dummy_int;
1660 : 287793 : auto_vec<tree> dummy_vec;
1661 : 287793 : if (!vectype
1662 : 287793 : || !vecitype
1663 : 226012 : || !vecctype
1664 : 513805 : || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1665 : : vecitype, vectype,
1666 : : &dummy_code, &dummy_code,
1667 : : &dummy_int, &dummy_vec))
1668 : 196436 : return NULL;
1669 : :
1670 : 91357 : *type_out = get_vectype_for_scalar_type (vinfo, type);
1671 : 91357 : if (!*type_out)
1672 : : return NULL;
1673 : :
1674 : 91357 : tree oprnd[2];
1675 : 91357 : vect_convert_inputs (vinfo, last_stmt_info,
1676 : : 2, oprnd, half_type, unprom, vectype);
1677 : :
1678 : 91357 : tree var = vect_recog_temp_ssa_var (itype, NULL);
1679 : 91357 : gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1680 : :
1681 : 91357 : if (vecctype != vecitype)
1682 : 0 : pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1683 : : pattern_stmt, vecitype);
1684 : :
1685 : 91357 : return vect_convert_output (vinfo, last_stmt_info,
1686 : 91357 : type, pattern_stmt, vecctype);
1687 : 287793 : }
1688 : :
1689 : : /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1690 : : to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1691 : :
1692 : : static gimple *
1693 : 29971954 : vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1694 : : tree *type_out)
1695 : : {
1696 : 29971954 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1697 : 29971954 : MULT_EXPR, WIDEN_MULT_EXPR, false,
1698 : 29971954 : "vect_recog_widen_mult_pattern");
1699 : : }
1700 : :
1701 : : /* Try to detect addition on widened inputs, converting PLUS_EXPR
1702 : : to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1703 : :
1704 : : static gimple *
1705 : 30234176 : vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1706 : : tree *type_out)
1707 : : {
1708 : 30234176 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1709 : 30234176 : PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1710 : 30234176 : false, "vect_recog_widen_plus_pattern");
1711 : : }
1712 : :
1713 : : /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1714 : : to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1715 : : static gimple *
1716 : 30234176 : vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1717 : : tree *type_out)
1718 : : {
1719 : 30234176 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1720 : 30234176 : MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1721 : 30234176 : false, "vect_recog_widen_minus_pattern");
1722 : : }
1723 : :
1724 : : /* Try to detect abd on widened inputs, converting IFN_ABD
1725 : : to IFN_VEC_WIDEN_ABD. */
1726 : : static gimple *
1727 : 30234176 : vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1728 : : tree *type_out)
1729 : : {
1730 : 30234176 : gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1731 : 27636031 : if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1732 : : return NULL;
1733 : :
1734 : 2926769 : tree last_rhs = gimple_assign_rhs1 (last_stmt);
1735 : :
1736 : 2926769 : tree in_type = TREE_TYPE (last_rhs);
1737 : 2926769 : tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1738 : 2926769 : if (!INTEGRAL_TYPE_P (in_type)
1739 : 2629436 : || !INTEGRAL_TYPE_P (out_type)
1740 : 2526221 : || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1741 : 3516916 : || !TYPE_UNSIGNED (in_type))
1742 : : return NULL;
1743 : :
1744 : 220822 : vect_unpromoted_value unprom;
1745 : 220822 : tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1746 : 220822 : if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1747 : : return NULL;
1748 : :
1749 : 218996 : stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1750 : 218996 : if (!abd_pattern_vinfo)
1751 : : return NULL;
1752 : :
1753 : 30243251 : gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1754 : 9075 : if (!abd_stmt
1755 : 9075 : || !gimple_call_internal_p (abd_stmt)
1756 : 224 : || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1757 : : return NULL;
1758 : :
1759 : 0 : tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1760 : 0 : tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1761 : :
1762 : 0 : code_helper dummy_code;
1763 : 0 : int dummy_int;
1764 : 0 : auto_vec<tree> dummy_vec;
1765 : 0 : if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1766 : : vectype_out, vectype_in,
1767 : : &dummy_code, &dummy_code,
1768 : : &dummy_int, &dummy_vec))
1769 : : return NULL;
1770 : :
1771 : 0 : vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1772 : :
1773 : 0 : *type_out = vectype_out;
1774 : :
1775 : 0 : tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1776 : 0 : tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1777 : 0 : tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1778 : 0 : gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1779 : : abd_oprnd0, abd_oprnd1);
1780 : 0 : gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1781 : 0 : gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1782 : 0 : return widen_abd_stmt;
1783 : 0 : }
1784 : :
1785 : : /* Function vect_recog_ctz_ffs_pattern
1786 : :
1787 : : Try to find the following pattern:
1788 : :
1789 : : TYPE1 A;
1790 : : TYPE1 B;
1791 : :
1792 : : B = __builtin_ctz{,l,ll} (A);
1793 : :
1794 : : or
1795 : :
1796 : : B = __builtin_ffs{,l,ll} (A);
1797 : :
1798 : : Input:
1799 : :
1800 : : * STMT_VINFO: The stmt from which the pattern search begins.
1801 : : here it starts with B = __builtin_* (A);
1802 : :
1803 : : Output:
1804 : :
1805 : : * TYPE_OUT: The vector type of the output of this pattern.
1806 : :
1807 : : * Return value: A new stmt that will be used to replace the sequence of
1808 : : stmts that constitute the pattern, using clz or popcount builtins. */
1809 : :
1810 : : static gimple *
1811 : 29949158 : vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1812 : : tree *type_out)
1813 : : {
1814 : 29949158 : gimple *call_stmt = stmt_vinfo->stmt;
1815 : 29949158 : gimple *pattern_stmt;
1816 : 29949158 : tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1817 : 29949158 : tree new_var;
1818 : 29949158 : internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1819 : 29949158 : bool defined_at_zero = true, defined_at_zero_new = false;
1820 : 29949158 : int val = 0, val_new = 0, val_cmp = 0;
1821 : 29949158 : int prec;
1822 : 29949158 : int sub = 0, add = 0;
1823 : 29949158 : location_t loc;
1824 : :
1825 : 29949158 : if (!is_gimple_call (call_stmt))
1826 : : return NULL;
1827 : :
1828 : 3542649 : if (gimple_call_num_args (call_stmt) != 1
1829 : 3542649 : && gimple_call_num_args (call_stmt) != 2)
1830 : : return NULL;
1831 : :
1832 : 1923127 : rhs_oprnd = gimple_call_arg (call_stmt, 0);
1833 : 1923127 : rhs_type = TREE_TYPE (rhs_oprnd);
1834 : 1923127 : lhs_oprnd = gimple_call_lhs (call_stmt);
1835 : 1923127 : if (!lhs_oprnd)
1836 : : return NULL;
1837 : 950651 : lhs_type = TREE_TYPE (lhs_oprnd);
1838 : 950651 : if (!INTEGRAL_TYPE_P (lhs_type)
1839 : 333668 : || !INTEGRAL_TYPE_P (rhs_type)
1840 : 55039 : || !type_has_mode_precision_p (rhs_type)
1841 : 1004116 : || TREE_CODE (rhs_oprnd) != SSA_NAME)
1842 : 908598 : return NULL;
1843 : :
1844 : 42053 : switch (gimple_call_combined_fn (call_stmt))
1845 : : {
1846 : 1133 : CASE_CFN_CTZ:
1847 : 1133 : ifn = IFN_CTZ;
1848 : 1133 : if (!gimple_call_internal_p (call_stmt)
1849 : 1133 : || gimple_call_num_args (call_stmt) != 2)
1850 : : defined_at_zero = false;
1851 : : else
1852 : 48 : val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
1853 : : break;
1854 : : CASE_CFN_FFS:
1855 : : ifn = IFN_FFS;
1856 : : break;
1857 : : default:
1858 : : return NULL;
1859 : : }
1860 : :
1861 : 1293 : prec = TYPE_PRECISION (rhs_type);
1862 : 1293 : loc = gimple_location (call_stmt);
1863 : :
1864 : 1293 : vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1865 : 1293 : if (!vec_type)
1866 : : return NULL;
1867 : :
1868 : 1287 : vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1869 : 1287 : if (!vec_rhs_type)
1870 : : return NULL;
1871 : :
1872 : : /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1873 : : ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1874 : : popcount<vector_mode>2. */
1875 : 1055 : if (!vec_type
1876 : 1055 : || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1877 : : OPTIMIZE_FOR_SPEED))
1878 : : return NULL;
1879 : :
1880 : 1055 : if (ifn == IFN_FFS
1881 : 1055 : && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1882 : : OPTIMIZE_FOR_SPEED))
1883 : : {
1884 : 0 : ifnnew = IFN_CTZ;
1885 : 0 : defined_at_zero_new
1886 : 0 : = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1887 : : val_new) == 2;
1888 : : }
1889 : 1055 : else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1890 : : OPTIMIZE_FOR_SPEED))
1891 : : {
1892 : 88 : ifnnew = IFN_CLZ;
1893 : 88 : defined_at_zero_new
1894 : 88 : = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1895 : : val_new) == 2;
1896 : : }
1897 : 88 : if ((ifnnew == IFN_LAST
1898 : 88 : || (defined_at_zero && !defined_at_zero_new))
1899 : 967 : && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1900 : : OPTIMIZE_FOR_SPEED))
1901 : : {
1902 : : ifnnew = IFN_POPCOUNT;
1903 : : defined_at_zero_new = true;
1904 : : val_new = prec;
1905 : : }
1906 : 1019 : if (ifnnew == IFN_LAST)
1907 : : return NULL;
1908 : :
1909 : 124 : vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1910 : :
1911 : 124 : val_cmp = val_new;
1912 : 124 : if ((ifnnew == IFN_CLZ
1913 : 124 : && defined_at_zero
1914 : 60 : && defined_at_zero_new
1915 : 60 : && val == prec
1916 : 31 : && val_new == prec)
1917 : 93 : || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1918 : : {
1919 : : /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1920 : : .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1921 : : if (ifnnew == IFN_CLZ)
1922 : : sub = prec;
1923 : 56 : val_cmp = prec;
1924 : :
1925 : 56 : if (!TYPE_UNSIGNED (rhs_type))
1926 : : {
1927 : 12 : rhs_type = unsigned_type_for (rhs_type);
1928 : 12 : vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1929 : 12 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1930 : 12 : pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1931 : 12 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1932 : : vec_rhs_type);
1933 : 12 : rhs_oprnd = new_var;
1934 : : }
1935 : :
1936 : 56 : tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1937 : 56 : pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1938 : : build_int_cst (rhs_type, -1));
1939 : 56 : gimple_set_location (pattern_stmt, loc);
1940 : 56 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1941 : :
1942 : 56 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1943 : 56 : pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1944 : 56 : gimple_set_location (pattern_stmt, loc);
1945 : 56 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1946 : 56 : rhs_oprnd = new_var;
1947 : :
1948 : 56 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1949 : 56 : pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1950 : : m1, rhs_oprnd);
1951 : 56 : gimple_set_location (pattern_stmt, loc);
1952 : 56 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1953 : 56 : rhs_oprnd = new_var;
1954 : 56 : }
1955 : 68 : else if (ifnnew == IFN_CLZ)
1956 : : {
1957 : : /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1958 : : .FFS (X) = PREC - .CLZ (X & -X). */
1959 : 57 : sub = prec - (ifn == IFN_CTZ);
1960 : 57 : val_cmp = sub - val_new;
1961 : :
1962 : 57 : tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1963 : 57 : pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1964 : 57 : gimple_set_location (pattern_stmt, loc);
1965 : 57 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1966 : :
1967 : 57 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1968 : 57 : pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1969 : : rhs_oprnd, neg);
1970 : 57 : gimple_set_location (pattern_stmt, loc);
1971 : 57 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1972 : 57 : rhs_oprnd = new_var;
1973 : : }
1974 : 11 : else if (ifnnew == IFN_POPCOUNT)
1975 : : {
1976 : : /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1977 : : .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1978 : 11 : sub = prec + (ifn == IFN_FFS);
1979 : 11 : val_cmp = sub;
1980 : :
1981 : 11 : tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1982 : 11 : pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1983 : 11 : gimple_set_location (pattern_stmt, loc);
1984 : 11 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1985 : :
1986 : 11 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1987 : 11 : pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1988 : : rhs_oprnd, neg);
1989 : 11 : gimple_set_location (pattern_stmt, loc);
1990 : 11 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1991 : 11 : rhs_oprnd = new_var;
1992 : : }
1993 : 0 : else if (ifnnew == IFN_CTZ)
1994 : : {
1995 : : /* .FFS (X) = .CTZ (X) + 1. */
1996 : 0 : add = 1;
1997 : 0 : val_cmp++;
1998 : : }
1999 : :
2000 : : /* Create B = .IFNNEW (A). */
2001 : 124 : new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2002 : 124 : if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
2003 : 88 : pattern_stmt
2004 : 88 : = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
2005 : : build_int_cst (integer_type_node,
2006 : 88 : val_new));
2007 : : else
2008 : 36 : pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2009 : 124 : gimple_call_set_lhs (pattern_stmt, new_var);
2010 : 124 : gimple_set_location (pattern_stmt, loc);
2011 : 124 : *type_out = vec_type;
2012 : :
2013 : 124 : if (sub)
2014 : : {
2015 : 99 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2016 : 99 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2017 : 99 : pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2018 : 99 : build_int_cst (lhs_type, sub),
2019 : : new_var);
2020 : 99 : gimple_set_location (pattern_stmt, loc);
2021 : 99 : new_var = ret_var;
2022 : : }
2023 : 25 : else if (add)
2024 : : {
2025 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2026 : 0 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2027 : 0 : pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2028 : 0 : build_int_cst (lhs_type, add));
2029 : 0 : gimple_set_location (pattern_stmt, loc);
2030 : 0 : new_var = ret_var;
2031 : : }
2032 : :
2033 : 124 : if (defined_at_zero
2034 : 88 : && (!defined_at_zero_new || val != val_cmp))
2035 : : {
2036 : 11 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2037 : 11 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2038 : 11 : rhs_oprnd = gimple_call_arg (call_stmt, 0);
2039 : 11 : rhs_type = TREE_TYPE (rhs_oprnd);
2040 : 11 : tree cmp = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2041 : 11 : pattern_stmt = gimple_build_assign (cmp, NE_EXPR, rhs_oprnd,
2042 : : build_zero_cst (rhs_type));
2043 : 11 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
2044 : : truth_type_for (vec_type), rhs_type);
2045 : 11 : pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2046 : : new_var,
2047 : 11 : build_int_cst (lhs_type, val));
2048 : : }
2049 : :
2050 : 124 : if (dump_enabled_p ())
2051 : 36 : dump_printf_loc (MSG_NOTE, vect_location,
2052 : : "created pattern stmt: %G", pattern_stmt);
2053 : :
2054 : : return pattern_stmt;
2055 : : }
2056 : :
2057 : : /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2058 : :
2059 : : Try to find the following pattern:
2060 : :
2061 : : UTYPE1 A;
2062 : : TYPE1 B;
2063 : : UTYPE2 temp_in;
2064 : : TYPE3 temp_out;
2065 : : temp_in = (UTYPE2)A;
2066 : :
2067 : : temp_out = __builtin_popcount{,l,ll} (temp_in);
2068 : : B = (TYPE1) temp_out;
2069 : :
2070 : : TYPE2 may or may not be equal to TYPE3.
2071 : : i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2072 : : i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2073 : :
2074 : : Input:
2075 : :
2076 : : * STMT_VINFO: The stmt from which the pattern search begins.
2077 : : here it starts with B = (TYPE1) temp_out;
2078 : :
2079 : : Output:
2080 : :
2081 : : * TYPE_OUT: The vector type of the output of this pattern.
2082 : :
2083 : : * Return value: A new stmt that will be used to replace the sequence of
2084 : : stmts that constitute the pattern. In this case it will be:
2085 : : B = .POPCOUNT (A);
2086 : :
2087 : : Similarly for clz, ctz and ffs.
2088 : : */
2089 : :
2090 : : static gimple *
2091 : 29949038 : vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2092 : : stmt_vec_info stmt_vinfo,
2093 : : tree *type_out)
2094 : : {
2095 : 29949038 : gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2096 : 20253231 : gimple *call_stmt, *pattern_stmt;
2097 : 20253231 : tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2098 : 50202110 : internal_fn ifn = IFN_LAST;
2099 : 29948879 : int addend = 0;
2100 : :
2101 : : /* Find B = (TYPE1) temp_out. */
2102 : 20253231 : if (!last_stmt)
2103 : : return NULL;
2104 : 20253231 : tree_code code = gimple_assign_rhs_code (last_stmt);
2105 : 20253231 : if (!CONVERT_EXPR_CODE_P (code))
2106 : : return NULL;
2107 : :
2108 : 2825775 : lhs_oprnd = gimple_assign_lhs (last_stmt);
2109 : 2825775 : lhs_type = TREE_TYPE (lhs_oprnd);
2110 : 2825775 : if (!INTEGRAL_TYPE_P (lhs_type))
2111 : : return NULL;
2112 : :
2113 : 2660879 : rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2114 : 2660879 : if (TREE_CODE (rhs_oprnd) != SSA_NAME
2115 : 2660879 : || !has_single_use (rhs_oprnd))
2116 : : return NULL;
2117 : 1373917 : call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2118 : :
2119 : : /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2120 : 1373917 : if (!is_gimple_call (call_stmt))
2121 : : return NULL;
2122 : 105212 : switch (gimple_call_combined_fn (call_stmt))
2123 : : {
2124 : : int val;
2125 : : CASE_CFN_POPCOUNT:
2126 : : ifn = IFN_POPCOUNT;
2127 : : break;
2128 : 4638 : CASE_CFN_CLZ:
2129 : 4638 : ifn = IFN_CLZ;
2130 : : /* Punt if call result is unsigned and defined value at zero
2131 : : is negative, as the negative value doesn't extend correctly. */
2132 : 4638 : if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2133 : 0 : && gimple_call_internal_p (call_stmt)
2134 : 4638 : && CLZ_DEFINED_VALUE_AT_ZERO
2135 : : (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2136 : 4638 : && val < 0)
2137 : : return NULL;
2138 : : break;
2139 : 573 : CASE_CFN_CTZ:
2140 : 573 : ifn = IFN_CTZ;
2141 : : /* Punt if call result is unsigned and defined value at zero
2142 : : is negative, as the negative value doesn't extend correctly. */
2143 : 573 : if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2144 : 0 : && gimple_call_internal_p (call_stmt)
2145 : 573 : && CTZ_DEFINED_VALUE_AT_ZERO
2146 : : (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2147 : 573 : && val < 0)
2148 : : return NULL;
2149 : : break;
2150 : 18 : CASE_CFN_FFS:
2151 : 18 : ifn = IFN_FFS;
2152 : 18 : break;
2153 : : default:
2154 : : return NULL;
2155 : : }
2156 : :
2157 : 5459 : if (gimple_call_num_args (call_stmt) != 1
2158 : 5459 : && gimple_call_num_args (call_stmt) != 2)
2159 : : return NULL;
2160 : :
2161 : 5459 : rhs_oprnd = gimple_call_arg (call_stmt, 0);
2162 : 5459 : vect_unpromoted_value unprom_diff;
2163 : 5459 : rhs_origin
2164 : 5459 : = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2165 : :
2166 : 5459 : if (!rhs_origin)
2167 : : return NULL;
2168 : :
2169 : : /* Input and output of .POPCOUNT should be same-precision integer. */
2170 : 5459 : if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2171 : : return NULL;
2172 : :
2173 : : /* Also A should be unsigned or same precision as temp_in, otherwise
2174 : : different builtins/internal functions have different behaviors. */
2175 : 1944 : if (TYPE_PRECISION (unprom_diff.type)
2176 : 1944 : != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2177 : 158 : switch (ifn)
2178 : : {
2179 : 79 : case IFN_POPCOUNT:
2180 : : /* For popcount require zero extension, which doesn't add any
2181 : : further bits to the count. */
2182 : 79 : if (!TYPE_UNSIGNED (unprom_diff.type))
2183 : : return NULL;
2184 : : break;
2185 : 61 : case IFN_CLZ:
2186 : : /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2187 : : if it is undefined at zero or if it matches also for the
2188 : : defined value there. */
2189 : 61 : if (!TYPE_UNSIGNED (unprom_diff.type))
2190 : : return NULL;
2191 : 61 : if (!type_has_mode_precision_p (lhs_type)
2192 : 61 : || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2193 : 0 : return NULL;
2194 : 61 : addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2195 : 61 : - TYPE_PRECISION (lhs_type));
2196 : 61 : if (gimple_call_internal_p (call_stmt)
2197 : 61 : && gimple_call_num_args (call_stmt) == 2)
2198 : : {
2199 : 0 : int val1, val2;
2200 : 0 : val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2201 : 0 : int d2
2202 : 0 : = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2203 : : val2);
2204 : 0 : if (d2 != 2 || val1 != val2 + addend)
2205 : : return NULL;
2206 : : }
2207 : : break;
2208 : 13 : case IFN_CTZ:
2209 : : /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2210 : : if it is undefined at zero or if it matches also for the
2211 : : defined value there. */
2212 : 13 : if (gimple_call_internal_p (call_stmt)
2213 : 13 : && gimple_call_num_args (call_stmt) == 2)
2214 : : {
2215 : 0 : int val1, val2;
2216 : 0 : val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2217 : 0 : int d2
2218 : 0 : = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2219 : : val2);
2220 : 0 : if (d2 != 2 || val1 != val2)
2221 : : return NULL;
2222 : : }
2223 : : break;
2224 : : case IFN_FFS:
2225 : : /* ffsll (x) == ffs (x) for unsigned or signed x. */
2226 : : break;
2227 : 0 : default:
2228 : 0 : gcc_unreachable ();
2229 : : }
2230 : :
2231 : 1944 : vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2232 : : /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2233 : 1944 : if (!vec_type)
2234 : : return NULL;
2235 : :
2236 : 1821 : bool supported
2237 : 1821 : = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2238 : 1821 : if (!supported)
2239 : 1718 : switch (ifn)
2240 : : {
2241 : : case IFN_POPCOUNT:
2242 : : case IFN_CLZ:
2243 : : return NULL;
2244 : 18 : case IFN_FFS:
2245 : : /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2246 : 18 : if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2247 : : OPTIMIZE_FOR_SPEED))
2248 : : break;
2249 : : /* FALLTHRU */
2250 : 359 : case IFN_CTZ:
2251 : : /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2252 : : clz or popcount. */
2253 : 359 : if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2254 : : OPTIMIZE_FOR_SPEED))
2255 : : break;
2256 : 325 : if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2257 : : OPTIMIZE_FOR_SPEED))
2258 : : break;
2259 : : return NULL;
2260 : 0 : default:
2261 : 0 : gcc_unreachable ();
2262 : : }
2263 : :
2264 : 159 : vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2265 : : call_stmt);
2266 : :
2267 : : /* Create B = .POPCOUNT (A). */
2268 : 159 : new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2269 : 159 : tree arg2 = NULL_TREE;
2270 : 159 : int val;
2271 : 159 : if (ifn == IFN_CLZ
2272 : 191 : && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2273 : : val) == 2)
2274 : 30 : arg2 = build_int_cst (integer_type_node, val);
2275 : 129 : else if (ifn == IFN_CTZ
2276 : 167 : && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2277 : : val) == 2)
2278 : 38 : arg2 = build_int_cst (integer_type_node, val);
2279 : 159 : if (arg2)
2280 : 68 : pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
2281 : : else
2282 : 91 : pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2283 : 159 : gimple_call_set_lhs (pattern_stmt, new_var);
2284 : 159 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2285 : 159 : *type_out = vec_type;
2286 : :
2287 : 159 : if (dump_enabled_p ())
2288 : 24 : dump_printf_loc (MSG_NOTE, vect_location,
2289 : : "created pattern stmt: %G", pattern_stmt);
2290 : :
2291 : 159 : if (addend)
2292 : : {
2293 : 6 : gcc_assert (supported);
2294 : 6 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2295 : 6 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2296 : 6 : pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2297 : 6 : build_int_cst (lhs_type, addend));
2298 : : }
2299 : 153 : else if (!supported)
2300 : : {
2301 : 56 : stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2302 : 56 : STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2303 : 56 : pattern_stmt
2304 : 56 : = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2305 : 56 : if (pattern_stmt == NULL)
2306 : : return NULL;
2307 : 56 : if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2308 : : {
2309 : 56 : gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2310 : 56 : gimple_seq_add_seq_without_update (pseq, seq);
2311 : : }
2312 : : }
2313 : : return pattern_stmt;
2314 : : }
2315 : :
2316 : : /* Function vect_recog_pow_pattern
2317 : :
2318 : : Try to find the following pattern:
2319 : :
2320 : : x = POW (y, N);
2321 : :
2322 : : with POW being one of pow, powf, powi, powif and N being
2323 : : either 2 or 0.5.
2324 : :
2325 : : Input:
2326 : :
2327 : : * STMT_VINFO: The stmt from which the pattern search begins.
2328 : :
2329 : : Output:
2330 : :
2331 : : * TYPE_OUT: The type of the output of this pattern.
2332 : :
2333 : : * Return value: A new stmt that will be used to replace the sequence of
2334 : : stmts that constitute the pattern. In this case it will be:
2335 : : x = x * x
2336 : : or
2337 : : x = sqrt (x)
2338 : : */
2339 : :
2340 : : static gimple *
2341 : 29949046 : vect_recog_pow_pattern (vec_info *vinfo,
2342 : : stmt_vec_info stmt_vinfo, tree *type_out)
2343 : : {
2344 : 29949046 : gimple *last_stmt = stmt_vinfo->stmt;
2345 : 29949046 : tree base, exp;
2346 : 29949046 : gimple *stmt;
2347 : 29949046 : tree var;
2348 : :
2349 : 29949046 : if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2350 : : return NULL;
2351 : :
2352 : 1505396 : switch (gimple_call_combined_fn (last_stmt))
2353 : : {
2354 : 257 : CASE_CFN_POW:
2355 : 257 : CASE_CFN_POWI:
2356 : 257 : break;
2357 : :
2358 : : default:
2359 : : return NULL;
2360 : : }
2361 : :
2362 : 257 : base = gimple_call_arg (last_stmt, 0);
2363 : 257 : exp = gimple_call_arg (last_stmt, 1);
2364 : 257 : if (TREE_CODE (exp) != REAL_CST
2365 : 237 : && TREE_CODE (exp) != INTEGER_CST)
2366 : : {
2367 : 237 : if (flag_unsafe_math_optimizations
2368 : 25 : && TREE_CODE (base) == REAL_CST
2369 : 239 : && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2370 : : {
2371 : 2 : combined_fn log_cfn;
2372 : 2 : built_in_function exp_bfn;
2373 : 2 : switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2374 : : {
2375 : : case BUILT_IN_POW:
2376 : : log_cfn = CFN_BUILT_IN_LOG;
2377 : : exp_bfn = BUILT_IN_EXP;
2378 : : break;
2379 : 0 : case BUILT_IN_POWF:
2380 : 0 : log_cfn = CFN_BUILT_IN_LOGF;
2381 : 0 : exp_bfn = BUILT_IN_EXPF;
2382 : 0 : break;
2383 : 0 : case BUILT_IN_POWL:
2384 : 0 : log_cfn = CFN_BUILT_IN_LOGL;
2385 : 0 : exp_bfn = BUILT_IN_EXPL;
2386 : 0 : break;
2387 : : default:
2388 : : return NULL;
2389 : : }
2390 : 2 : tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2391 : 2 : tree exp_decl = builtin_decl_implicit (exp_bfn);
2392 : : /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2393 : : does that, but if C is a power of 2, we want to use
2394 : : exp2 (log2 (C) * x) in the non-vectorized version, but for
2395 : : vectorization we don't have vectorized exp2. */
2396 : 2 : if (logc
2397 : 2 : && TREE_CODE (logc) == REAL_CST
2398 : 2 : && exp_decl
2399 : 4 : && lookup_attribute ("omp declare simd",
2400 : 2 : DECL_ATTRIBUTES (exp_decl)))
2401 : : {
2402 : 2 : cgraph_node *node = cgraph_node::get_create (exp_decl);
2403 : 2 : if (node->simd_clones == NULL)
2404 : : {
2405 : 2 : if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2406 : 2 : || node->definition)
2407 : : return NULL;
2408 : 2 : expand_simd_clones (node);
2409 : 2 : if (node->simd_clones == NULL)
2410 : : return NULL;
2411 : : }
2412 : 2 : *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2413 : 2 : if (!*type_out)
2414 : : return NULL;
2415 : 2 : tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2416 : 2 : gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2417 : 2 : append_pattern_def_seq (vinfo, stmt_vinfo, g);
2418 : 2 : tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2419 : 2 : g = gimple_build_call (exp_decl, 1, def);
2420 : 2 : gimple_call_set_lhs (g, res);
2421 : 2 : return g;
2422 : : }
2423 : : }
2424 : :
2425 : 235 : return NULL;
2426 : : }
2427 : :
2428 : : /* We now have a pow or powi builtin function call with a constant
2429 : : exponent. */
2430 : :
2431 : : /* Catch squaring. */
2432 : 20 : if ((tree_fits_shwi_p (exp)
2433 : 0 : && tree_to_shwi (exp) == 2)
2434 : 20 : || (TREE_CODE (exp) == REAL_CST
2435 : 20 : && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2436 : : {
2437 : 0 : if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2438 : 0 : TREE_TYPE (base), type_out))
2439 : : return NULL;
2440 : :
2441 : 0 : var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2442 : 0 : stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2443 : 0 : return stmt;
2444 : : }
2445 : :
2446 : : /* Catch square root. */
2447 : 20 : if (TREE_CODE (exp) == REAL_CST
2448 : 20 : && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2449 : : {
2450 : 10 : *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2451 : 10 : if (*type_out
2452 : 10 : && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2453 : : OPTIMIZE_FOR_SPEED))
2454 : : {
2455 : 8 : gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2456 : 8 : var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2457 : 8 : gimple_call_set_lhs (stmt, var);
2458 : 8 : gimple_call_set_nothrow (stmt, true);
2459 : 8 : return stmt;
2460 : : }
2461 : : }
2462 : :
2463 : : return NULL;
2464 : : }
2465 : :
2466 : :
2467 : : /* Function vect_recog_widen_sum_pattern
2468 : :
2469 : : Try to find the following pattern:
2470 : :
2471 : : type x_t;
2472 : : TYPE x_T, sum = init;
2473 : : loop:
2474 : : sum_0 = phi <init, sum_1>
2475 : : S1 x_t = *p;
2476 : : S2 x_T = (TYPE) x_t;
2477 : : S3 sum_1 = x_T + sum_0;
2478 : :
2479 : : where type 'TYPE' is at least double the size of type 'type', i.e - we're
2480 : : summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2481 : : a special case of a reduction computation.
2482 : :
2483 : : Input:
2484 : :
2485 : : * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2486 : : when this function is called with S3, the pattern {S2,S3} will be detected.
2487 : :
2488 : : Output:
2489 : :
2490 : : * TYPE_OUT: The type of the output of this pattern.
2491 : :
2492 : : * Return value: A new stmt that will be used to replace the sequence of
2493 : : stmts that constitute the pattern. In this case it will be:
2494 : : WIDEN_SUM <x_t, sum_0>
2495 : :
2496 : : Note: The widening-sum idiom is a widening reduction pattern that is
2497 : : vectorized without preserving all the intermediate results. It
2498 : : produces only N/2 (widened) results (by summing up pairs of
2499 : : intermediate results) rather than all N results. Therefore, we
2500 : : cannot allow this pattern when we want to get all the results and in
2501 : : the correct order (as is the case when this computation is in an
2502 : : inner-loop nested in an outer-loop that us being vectorized). */
2503 : :
2504 : : static gimple *
2505 : 29949046 : vect_recog_widen_sum_pattern (vec_info *vinfo,
2506 : : stmt_vec_info stmt_vinfo, tree *type_out)
2507 : : {
2508 : 29949046 : gimple *last_stmt = stmt_vinfo->stmt;
2509 : 29949046 : tree oprnd0, oprnd1;
2510 : 29949046 : tree type;
2511 : 29949046 : gimple *pattern_stmt;
2512 : 29949046 : tree var;
2513 : :
2514 : : /* Look for the following pattern
2515 : : DX = (TYPE) X;
2516 : : sum_1 = DX + sum_0;
2517 : : In which DX is at least double the size of X, and sum_1 has been
2518 : : recognized as a reduction variable.
2519 : : */
2520 : :
2521 : : /* Starting from LAST_STMT, follow the defs of its uses in search
2522 : : of the above pattern. */
2523 : :
2524 : 29949046 : if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2525 : : &oprnd0, &oprnd1)
2526 : 34572 : || TREE_CODE (oprnd0) != SSA_NAME
2527 : 29983437 : || !vinfo->lookup_def (oprnd0))
2528 : 29914716 : return NULL;
2529 : :
2530 : 34330 : type = TREE_TYPE (gimple_get_lhs (last_stmt));
2531 : :
2532 : : /* So far so good. Since last_stmt was detected as a (summation) reduction,
2533 : : we know that oprnd1 is the reduction variable (defined by a loop-header
2534 : : phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2535 : : Left to check that oprnd0 is defined by a cast from type 'type' to type
2536 : : 'TYPE'. */
2537 : :
2538 : 34330 : vect_unpromoted_value unprom0;
2539 : 34330 : if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2540 : 34330 : || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2541 : : return NULL;
2542 : :
2543 : 1685 : vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2544 : :
2545 : 1685 : if (!vect_supportable_conv_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2546 : : unprom0.type, type_out))
2547 : : return NULL;
2548 : :
2549 : 0 : var = vect_recog_temp_ssa_var (type, NULL);
2550 : 0 : pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2551 : :
2552 : 0 : return pattern_stmt;
2553 : : }
2554 : :
2555 : : /* Function vect_recog_bitfield_ref_pattern
2556 : :
2557 : : Try to find the following pattern:
2558 : :
2559 : : bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2560 : : result = (type_out) bf_value;
2561 : :
2562 : : or
2563 : :
2564 : : if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2565 : :
2566 : : where type_out is a non-bitfield type, that is to say, it's precision matches
2567 : : 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2568 : :
2569 : : Input:
2570 : :
2571 : : * STMT_VINFO: The stmt from which the pattern search begins.
2572 : : here it starts with:
2573 : : result = (type_out) bf_value;
2574 : :
2575 : : or
2576 : :
2577 : : if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2578 : :
2579 : : Output:
2580 : :
2581 : : * TYPE_OUT: The vector type of the output of this pattern.
2582 : :
2583 : : * Return value: A new stmt that will be used to replace the sequence of
2584 : : stmts that constitute the pattern. If the precision of type_out is bigger
2585 : : than the precision type of _1 we perform the widening before the shifting,
2586 : : since the new precision will be large enough to shift the value and moving
2587 : : widening operations up the statement chain enables the generation of
2588 : : widening loads. If we are widening and the operation after the pattern is
2589 : : an addition then we mask first and shift later, to enable the generation of
2590 : : shifting adds. In the case of narrowing we will always mask first, shift
2591 : : last and then perform a narrowing operation. This will enable the
2592 : : generation of narrowing shifts.
2593 : :
2594 : : Widening with mask first, shift later:
2595 : : container = (type_out) container;
2596 : : masked = container & (((1 << bitsize) - 1) << bitpos);
2597 : : result = masked >> bitpos;
2598 : :
2599 : : Widening with shift first, mask last:
2600 : : container = (type_out) container;
2601 : : shifted = container >> bitpos;
2602 : : result = shifted & ((1 << bitsize) - 1);
2603 : :
2604 : : Narrowing:
2605 : : masked = container & (((1 << bitsize) - 1) << bitpos);
2606 : : result = masked >> bitpos;
2607 : : result = (type_out) result;
2608 : :
2609 : : If the bitfield is signed and it's wider than type_out, we need to
2610 : : keep the result sign-extended:
2611 : : container = (type) container;
2612 : : masked = container << (prec - bitsize - bitpos);
2613 : : result = (type_out) (masked >> (prec - bitsize));
2614 : :
2615 : : Here type is the signed variant of the wider of type_out and the type
2616 : : of container.
2617 : :
2618 : : The shifting is always optional depending on whether bitpos != 0.
2619 : :
2620 : : When the original bitfield was inside a gcond then an new gcond is also
2621 : : generated with the newly `result` as the operand to the comparison.
2622 : :
2623 : : */
2624 : :
2625 : : static gimple *
2626 : 29896072 : vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2627 : : tree *type_out)
2628 : : {
2629 : 29896072 : gimple *bf_stmt = NULL;
2630 : 29896072 : tree lhs = NULL_TREE;
2631 : 29896072 : tree ret_type = NULL_TREE;
2632 : 29896072 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
2633 : 29896072 : if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
2634 : : {
2635 : 5093016 : tree op = gimple_cond_lhs (cond_stmt);
2636 : 5093016 : if (TREE_CODE (op) != SSA_NAME)
2637 : : return NULL;
2638 : 5092783 : bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2639 : 5092783 : if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2640 : : return NULL;
2641 : : }
2642 : 24803056 : else if (is_gimple_assign (stmt)
2643 : 20199862 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2644 : 27556997 : && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2645 : : {
2646 : 2709075 : gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2647 : 2709075 : bf_stmt = dyn_cast <gassign *> (second_stmt);
2648 : 2709075 : lhs = gimple_assign_lhs (stmt);
2649 : 2709075 : ret_type = TREE_TYPE (lhs);
2650 : : }
2651 : :
2652 : 6063603 : if (!bf_stmt
2653 : 6063603 : || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2654 : : return NULL;
2655 : :
2656 : 14907 : tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2657 : 14907 : tree container = TREE_OPERAND (bf_ref, 0);
2658 : 14907 : ret_type = ret_type ? ret_type : TREE_TYPE (container);
2659 : :
2660 : 14907 : if (!bit_field_offset (bf_ref).is_constant ()
2661 : 14907 : || !bit_field_size (bf_ref).is_constant ()
2662 : 14907 : || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2663 : : return NULL;
2664 : :
2665 : 29436 : if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2666 : 14905 : || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2667 : 17065 : || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2668 : 12749 : return NULL;
2669 : :
2670 : 2158 : gimple *use_stmt, *pattern_stmt;
2671 : 2158 : use_operand_p use_p;
2672 : 2158 : bool shift_first = true;
2673 : 2158 : tree container_type = TREE_TYPE (container);
2674 : 2158 : tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2675 : :
2676 : : /* Calculate shift_n before the adjustments for widening loads, otherwise
2677 : : the container may change and we have to consider offset change for
2678 : : widening loads on big endianness. The shift_n calculated here can be
2679 : : independent of widening. */
2680 : 2158 : unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2681 : 2158 : unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2682 : 2158 : unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2683 : 2158 : if (BYTES_BIG_ENDIAN)
2684 : : shift_n = prec - shift_n - mask_width;
2685 : :
2686 : 2158 : bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2687 : 1398 : TYPE_PRECISION (ret_type) > mask_width);
2688 : 2158 : bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2689 : 2158 : TYPE_PRECISION (ret_type));
2690 : :
2691 : : /* We move the conversion earlier if the loaded type is smaller than the
2692 : : return type to enable the use of widening loads. And if we need a
2693 : : sign extension, we need to convert the loaded value early to a signed
2694 : : type as well. */
2695 : 2158 : if (ref_sext || load_widen)
2696 : : {
2697 : 945 : tree type = load_widen ? ret_type : container_type;
2698 : 945 : if (ref_sext)
2699 : 906 : type = gimple_signed_type (type);
2700 : 945 : pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2701 : : NOP_EXPR, container);
2702 : 945 : container = gimple_get_lhs (pattern_stmt);
2703 : 945 : container_type = TREE_TYPE (container);
2704 : 945 : prec = tree_to_uhwi (TYPE_SIZE (container_type));
2705 : 945 : vectype = get_vectype_for_scalar_type (vinfo, container_type);
2706 : 945 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2707 : : }
2708 : 1213 : else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2709 : : /* If we are doing the conversion last then also delay the shift as we may
2710 : : be able to combine the shift and conversion in certain cases. */
2711 : : shift_first = false;
2712 : :
2713 : : /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2714 : : PLUS_EXPR then do the shift last as some targets can combine the shift and
2715 : : add into a single instruction. */
2716 : 1417 : if (lhs && !is_pattern_stmt_p (stmt_info)
2717 : 3575 : && single_imm_use (lhs, &use_p, &use_stmt))
2718 : : {
2719 : 1053 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2720 : 1053 : && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2721 : : shift_first = false;
2722 : : }
2723 : :
2724 : : /* If we don't have to shift we only generate the mask, so just fix the
2725 : : code-path to shift_first. */
2726 : 2158 : if (shift_n == 0)
2727 : 758 : shift_first = true;
2728 : :
2729 : 2158 : tree result;
2730 : 2158 : if (shift_first && !ref_sext)
2731 : : {
2732 : 500 : tree shifted = container;
2733 : 500 : if (shift_n)
2734 : : {
2735 : 59 : pattern_stmt
2736 : 59 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2737 : : RSHIFT_EXPR, container,
2738 : 59 : build_int_cst (sizetype, shift_n));
2739 : 59 : shifted = gimple_assign_lhs (pattern_stmt);
2740 : 59 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2741 : : }
2742 : :
2743 : 500 : tree mask = wide_int_to_tree (container_type,
2744 : 500 : wi::mask (mask_width, false, prec));
2745 : :
2746 : 500 : pattern_stmt
2747 : 500 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2748 : : BIT_AND_EXPR, shifted, mask);
2749 : 500 : result = gimple_assign_lhs (pattern_stmt);
2750 : : }
2751 : : else
2752 : : {
2753 : 1658 : tree temp = vect_recog_temp_ssa_var (container_type);
2754 : 1658 : if (!ref_sext)
2755 : : {
2756 : 752 : tree mask = wide_int_to_tree (container_type,
2757 : 752 : wi::shifted_mask (shift_n,
2758 : : mask_width,
2759 : : false, prec));
2760 : 752 : pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2761 : : container, mask);
2762 : : }
2763 : : else
2764 : : {
2765 : 906 : HOST_WIDE_INT shl = prec - shift_n - mask_width;
2766 : 906 : shift_n += shl;
2767 : 906 : pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2768 : : container,
2769 : : build_int_cst (sizetype,
2770 : 906 : shl));
2771 : : }
2772 : :
2773 : 1658 : tree masked = gimple_assign_lhs (pattern_stmt);
2774 : 1658 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2775 : 1658 : pattern_stmt
2776 : 1658 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2777 : : RSHIFT_EXPR, masked,
2778 : 1658 : build_int_cst (sizetype, shift_n));
2779 : 1658 : result = gimple_assign_lhs (pattern_stmt);
2780 : : }
2781 : :
2782 : 2158 : if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2783 : : {
2784 : 1436 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2785 : 1436 : pattern_stmt
2786 : 1436 : = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2787 : : NOP_EXPR, result);
2788 : : }
2789 : :
2790 : 2158 : if (!lhs)
2791 : : {
2792 : 741 : if (!vectype)
2793 : : return NULL;
2794 : :
2795 : 603 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2796 : 603 : vectype = truth_type_for (vectype);
2797 : :
2798 : : /* FIXME: This part extracts the boolean value out of the bitfield in the
2799 : : same way as vect_recog_gcond_pattern does. However because
2800 : : patterns cannot match the same root twice, when we handle and
2801 : : lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2802 : : apply anymore. We should really fix it so that we don't need to
2803 : : duplicate transformations like these. */
2804 : 603 : tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2805 : 603 : gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
2806 : 603 : tree cond_cst = gimple_cond_rhs (cond_stmt);
2807 : 603 : gimple *new_stmt
2808 : 603 : = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
2809 : : gimple_get_lhs (pattern_stmt),
2810 : : fold_convert (container_type, cond_cst));
2811 : 603 : append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
2812 : 603 : pattern_stmt
2813 : 603 : = gimple_build_cond (NE_EXPR, new_lhs,
2814 : 603 : build_zero_cst (TREE_TYPE (new_lhs)),
2815 : : NULL_TREE, NULL_TREE);
2816 : : }
2817 : :
2818 : 2020 : *type_out = STMT_VINFO_VECTYPE (stmt_info);
2819 : 2020 : vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2820 : :
2821 : 2020 : return pattern_stmt;
2822 : : }
2823 : :
2824 : : /* Function vect_recog_bit_insert_pattern
2825 : :
2826 : : Try to find the following pattern:
2827 : :
2828 : : written = BIT_INSERT_EXPR (container, value, bitpos);
2829 : :
2830 : : Input:
2831 : :
2832 : : * STMT_VINFO: The stmt we want to replace.
2833 : :
2834 : : Output:
2835 : :
2836 : : * TYPE_OUT: The vector type of the output of this pattern.
2837 : :
2838 : : * Return value: A new stmt that will be used to replace the sequence of
2839 : : stmts that constitute the pattern. In this case it will be:
2840 : : value = (container_type) value; // Make sure
2841 : : shifted = value << bitpos; // Shift value into place
2842 : : masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2843 : : // the 'to-write value'.
2844 : : cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2845 : : // write to from the value we want
2846 : : // to write to.
2847 : : written = cleared | masked; // Write bits.
2848 : :
2849 : :
2850 : : where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2851 : : bits corresponding to the real size of the bitfield value we are writing to.
2852 : : The shifting is always optional depending on whether bitpos != 0.
2853 : :
2854 : : */
2855 : :
2856 : : static gimple *
2857 : 29899087 : vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2858 : : tree *type_out)
2859 : : {
2860 : 29899087 : gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2861 : 27021288 : if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2862 : : return NULL;
2863 : :
2864 : 563 : tree container = gimple_assign_rhs1 (bf_stmt);
2865 : 563 : tree value = gimple_assign_rhs2 (bf_stmt);
2866 : 563 : tree shift = gimple_assign_rhs3 (bf_stmt);
2867 : :
2868 : 563 : tree bf_type = TREE_TYPE (value);
2869 : 563 : tree container_type = TREE_TYPE (container);
2870 : :
2871 : 563 : if (!INTEGRAL_TYPE_P (container_type)
2872 : 563 : || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2873 : : return NULL;
2874 : :
2875 : 459 : gimple *pattern_stmt;
2876 : :
2877 : 459 : vect_unpromoted_value unprom;
2878 : 459 : unprom.set_op (value, vect_internal_def);
2879 : 459 : value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2880 : : get_vectype_for_scalar_type (vinfo,
2881 : : container_type));
2882 : :
2883 : 459 : unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2884 : 459 : unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2885 : 459 : unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2886 : 459 : if (BYTES_BIG_ENDIAN)
2887 : : {
2888 : : shift_n = prec - shift_n - mask_width;
2889 : : shift = build_int_cst (TREE_TYPE (shift), shift_n);
2890 : : }
2891 : :
2892 : 459 : if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2893 : : {
2894 : 0 : pattern_stmt =
2895 : 0 : gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2896 : : NOP_EXPR, value);
2897 : 0 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2898 : 0 : value = gimple_get_lhs (pattern_stmt);
2899 : : }
2900 : :
2901 : : /* Shift VALUE into place. */
2902 : 459 : tree shifted = value;
2903 : 459 : if (shift_n)
2904 : : {
2905 : 241 : gimple_seq stmts = NULL;
2906 : 241 : shifted
2907 : 241 : = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2908 : 241 : if (!gimple_seq_empty_p (stmts))
2909 : 112 : append_pattern_def_seq (vinfo, stmt_info,
2910 : : gimple_seq_first_stmt (stmts));
2911 : : }
2912 : :
2913 : 459 : tree mask_t
2914 : 459 : = wide_int_to_tree (container_type,
2915 : 459 : wi::shifted_mask (shift_n, mask_width, false, prec));
2916 : :
2917 : : /* Clear bits we don't want to write back from SHIFTED. */
2918 : 459 : gimple_seq stmts = NULL;
2919 : 459 : tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2920 : : mask_t);
2921 : 459 : if (!gimple_seq_empty_p (stmts))
2922 : : {
2923 : 110 : pattern_stmt = gimple_seq_first_stmt (stmts);
2924 : 110 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2925 : : }
2926 : :
2927 : : /* Mask off the bits in the container that we are to write to. */
2928 : 459 : mask_t = wide_int_to_tree (container_type,
2929 : 459 : wi::shifted_mask (shift_n, mask_width, true, prec));
2930 : 459 : tree cleared = vect_recog_temp_ssa_var (container_type);
2931 : 459 : pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2932 : 459 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2933 : :
2934 : : /* Write MASKED into CLEARED. */
2935 : 459 : pattern_stmt
2936 : 459 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2937 : : BIT_IOR_EXPR, cleared, masked);
2938 : :
2939 : 459 : *type_out = STMT_VINFO_VECTYPE (stmt_info);
2940 : 459 : vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2941 : :
2942 : 459 : return pattern_stmt;
2943 : : }
2944 : :
2945 : :
2946 : : /* Recognize cases in which an operation is performed in one type WTYPE
2947 : : but could be done more efficiently in a narrower type NTYPE. For example,
2948 : : if we have:
2949 : :
2950 : : ATYPE a; // narrower than NTYPE
2951 : : BTYPE b; // narrower than NTYPE
2952 : : WTYPE aw = (WTYPE) a;
2953 : : WTYPE bw = (WTYPE) b;
2954 : : WTYPE res = aw + bw; // only uses of aw and bw
2955 : :
2956 : : then it would be more efficient to do:
2957 : :
2958 : : NTYPE an = (NTYPE) a;
2959 : : NTYPE bn = (NTYPE) b;
2960 : : NTYPE resn = an + bn;
2961 : : WTYPE res = (WTYPE) resn;
2962 : :
2963 : : Other situations include things like:
2964 : :
2965 : : ATYPE a; // NTYPE or narrower
2966 : : WTYPE aw = (WTYPE) a;
2967 : : WTYPE res = aw + b;
2968 : :
2969 : : when only "(NTYPE) res" is significant. In that case it's more efficient
2970 : : to truncate "b" and do the operation on NTYPE instead:
2971 : :
2972 : : NTYPE an = (NTYPE) a;
2973 : : NTYPE bn = (NTYPE) b; // truncation
2974 : : NTYPE resn = an + bn;
2975 : : WTYPE res = (WTYPE) resn;
2976 : :
2977 : : All users of "res" should then use "resn" instead, making the final
2978 : : statement dead (not marked as relevant). The final statement is still
2979 : : needed to maintain the type correctness of the IR.
2980 : :
2981 : : vect_determine_precisions has already determined the minimum
2982 : : precison of the operation and the minimum precision required
2983 : : by users of the result. */
2984 : :
2985 : : static gimple *
2986 : 29899527 : vect_recog_over_widening_pattern (vec_info *vinfo,
2987 : : stmt_vec_info last_stmt_info, tree *type_out)
2988 : : {
2989 : 29899527 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2990 : 20203920 : if (!last_stmt)
2991 : : return NULL;
2992 : :
2993 : : /* See whether we have found that this operation can be done on a
2994 : : narrower type without changing its semantics. */
2995 : 20203920 : unsigned int new_precision = last_stmt_info->operation_precision;
2996 : 20203920 : if (!new_precision)
2997 : : return NULL;
2998 : :
2999 : 1481998 : tree lhs = gimple_assign_lhs (last_stmt);
3000 : 1481998 : tree type = TREE_TYPE (lhs);
3001 : 1481998 : tree_code code = gimple_assign_rhs_code (last_stmt);
3002 : :
3003 : : /* Punt for reductions where we don't handle the type conversions. */
3004 : 1481998 : if (vect_is_reduction (last_stmt_info))
3005 : : return NULL;
3006 : :
3007 : : /* Keep the first operand of a COND_EXPR as-is: only the other two
3008 : : operands are interesting. */
3009 : 1474718 : unsigned int first_op = (code == COND_EXPR ? 2 : 1);
3010 : :
3011 : : /* Check the operands. */
3012 : 1474718 : unsigned int nops = gimple_num_ops (last_stmt) - first_op;
3013 : 1474718 : auto_vec <vect_unpromoted_value, 3> unprom (nops);
3014 : 1474718 : unprom.quick_grow_cleared (nops);
3015 : 1474718 : unsigned int min_precision = 0;
3016 : 1474718 : bool single_use_p = false;
3017 : 4408886 : for (unsigned int i = 0; i < nops; ++i)
3018 : : {
3019 : 2934837 : tree op = gimple_op (last_stmt, first_op + i);
3020 : 2934837 : if (TREE_CODE (op) == INTEGER_CST)
3021 : 1341652 : unprom[i].set_op (op, vect_constant_def);
3022 : 1593185 : else if (TREE_CODE (op) == SSA_NAME)
3023 : : {
3024 : 1593185 : bool op_single_use_p = true;
3025 : 1593185 : if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
3026 : : &op_single_use_p))
3027 : 669 : return NULL;
3028 : : /* If:
3029 : :
3030 : : (1) N bits of the result are needed;
3031 : : (2) all inputs are widened from M<N bits; and
3032 : : (3) one operand OP is a single-use SSA name
3033 : :
3034 : : we can shift the M->N widening from OP to the output
3035 : : without changing the number or type of extensions involved.
3036 : : This then reduces the number of copies of STMT_INFO.
3037 : :
3038 : : If instead of (3) more than one operand is a single-use SSA name,
3039 : : shifting the extension to the output is even more of a win.
3040 : :
3041 : : If instead:
3042 : :
3043 : : (1) N bits of the result are needed;
3044 : : (2) one operand OP2 is widened from M2<N bits;
3045 : : (3) another operand OP1 is widened from M1<M2 bits; and
3046 : : (4) both OP1 and OP2 are single-use
3047 : :
3048 : : the choice is between:
3049 : :
3050 : : (a) truncating OP2 to M1, doing the operation on M1,
3051 : : and then widening the result to N
3052 : :
3053 : : (b) widening OP1 to M2, doing the operation on M2, and then
3054 : : widening the result to N
3055 : :
3056 : : Both shift the M2->N widening of the inputs to the output.
3057 : : (a) additionally shifts the M1->M2 widening to the output;
3058 : : it requires fewer copies of STMT_INFO but requires an extra
3059 : : M2->M1 truncation.
3060 : :
3061 : : Which is better will depend on the complexity and cost of
3062 : : STMT_INFO, which is hard to predict at this stage. However,
3063 : : a clear tie-breaker in favor of (b) is the fact that the
3064 : : truncation in (a) increases the length of the operation chain.
3065 : :
3066 : : If instead of (4) only one of OP1 or OP2 is single-use,
3067 : : (b) is still a win over doing the operation in N bits:
3068 : : it still shifts the M2->N widening on the single-use operand
3069 : : to the output and reduces the number of STMT_INFO copies.
3070 : :
3071 : : If neither operand is single-use then operating on fewer than
3072 : : N bits might lead to more extensions overall. Whether it does
3073 : : or not depends on global information about the vectorization
3074 : : region, and whether that's a good trade-off would again
3075 : : depend on the complexity and cost of the statements involved,
3076 : : as well as things like register pressure that are not normally
3077 : : modelled at this stage. We therefore ignore these cases
3078 : : and just optimize the clear single-use wins above.
3079 : :
3080 : : Thus we take the maximum precision of the unpromoted operands
3081 : : and record whether any operand is single-use. */
3082 : 1592516 : if (unprom[i].dt == vect_internal_def)
3083 : : {
3084 : 993299 : min_precision = MAX (min_precision,
3085 : : TYPE_PRECISION (unprom[i].type));
3086 : 993299 : single_use_p |= op_single_use_p;
3087 : : }
3088 : : }
3089 : : else
3090 : : return NULL;
3091 : : }
3092 : :
3093 : : /* Although the operation could be done in operation_precision, we have
3094 : : to balance that against introducing extra truncations or extensions.
3095 : : Calculate the minimum precision that can be handled efficiently.
3096 : :
3097 : : The loop above determined that the operation could be handled
3098 : : efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3099 : : extension from the inputs to the output without introducing more
3100 : : instructions, and would reduce the number of instructions required
3101 : : for STMT_INFO itself.
3102 : :
3103 : : vect_determine_precisions has also determined that the result only
3104 : : needs min_output_precision bits. Truncating by a factor of N times
3105 : : requires a tree of N - 1 instructions, so if TYPE is N times wider
3106 : : than min_output_precision, doing the operation in TYPE and truncating
3107 : : the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3108 : : In contrast:
3109 : :
3110 : : - truncating the input to a unary operation and doing the operation
3111 : : in the new type requires at most N - 1 + 1 = N instructions per
3112 : : output vector
3113 : :
3114 : : - doing the same for a binary operation requires at most
3115 : : (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3116 : :
3117 : : Both unary and binary operations require fewer instructions than
3118 : : this if the operands were extended from a suitable truncated form.
3119 : : Thus there is usually nothing to lose by doing operations in
3120 : : min_output_precision bits, but there can be something to gain. */
3121 : 1474049 : if (!single_use_p)
3122 : 1141782 : min_precision = last_stmt_info->min_output_precision;
3123 : : else
3124 : 332267 : min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3125 : :
3126 : : /* Apply the minimum efficient precision we just calculated. */
3127 : 1474049 : if (new_precision < min_precision)
3128 : : new_precision = min_precision;
3129 : 1474049 : new_precision = vect_element_precision (new_precision);
3130 : 1474049 : if (new_precision >= TYPE_PRECISION (type))
3131 : : return NULL;
3132 : :
3133 : 145590 : vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3134 : :
3135 : 145590 : *type_out = get_vectype_for_scalar_type (vinfo, type);
3136 : 145590 : if (!*type_out)
3137 : : return NULL;
3138 : :
3139 : : /* We've found a viable pattern. Get the new type of the operation. */
3140 : 129540 : bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3141 : 129540 : tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3142 : :
3143 : : /* If we're truncating an operation, we need to make sure that we
3144 : : don't introduce new undefined overflow. The codes tested here are
3145 : : a subset of those accepted by vect_truncatable_operation_p. */
3146 : 129540 : tree op_type = new_type;
3147 : 129540 : if (TYPE_OVERFLOW_UNDEFINED (new_type)
3148 : 165696 : && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3149 : 25328 : op_type = build_nonstandard_integer_type (new_precision, true);
3150 : :
3151 : : /* We specifically don't check here whether the target supports the
3152 : : new operation, since it might be something that a later pattern
3153 : : wants to rewrite anyway. If targets have a minimum element size
3154 : : for some optabs, we should pattern-match smaller ops to larger ops
3155 : : where beneficial. */
3156 : 129540 : tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3157 : 129540 : tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3158 : 129540 : if (!new_vectype || !op_vectype)
3159 : : return NULL;
3160 : :
3161 : 129540 : if (dump_enabled_p ())
3162 : 4057 : dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3163 : : type, new_type);
3164 : :
3165 : : /* Calculate the rhs operands for an operation on OP_TYPE. */
3166 : 129540 : tree ops[3] = {};
3167 : 129767 : for (unsigned int i = 1; i < first_op; ++i)
3168 : 227 : ops[i - 1] = gimple_op (last_stmt, i);
3169 : 129540 : vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3170 : 129540 : op_type, &unprom[0], op_vectype);
3171 : :
3172 : : /* Use the operation to produce a result of type OP_TYPE. */
3173 : 129540 : tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3174 : 129540 : gimple *pattern_stmt = gimple_build_assign (new_var, code,
3175 : : ops[0], ops[1], ops[2]);
3176 : 129540 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3177 : :
3178 : 129540 : if (dump_enabled_p ())
3179 : 4057 : dump_printf_loc (MSG_NOTE, vect_location,
3180 : : "created pattern stmt: %G", pattern_stmt);
3181 : :
3182 : : /* Convert back to the original signedness, if OP_TYPE is different
3183 : : from NEW_TYPE. */
3184 : 129540 : if (op_type != new_type)
3185 : 25328 : pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3186 : : pattern_stmt, op_vectype);
3187 : :
3188 : : /* Promote the result to the original type. */
3189 : 129540 : pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3190 : : pattern_stmt, new_vectype);
3191 : :
3192 : 129540 : return pattern_stmt;
3193 : 1474718 : }
3194 : :
3195 : : /* Recognize the following patterns:
3196 : :
3197 : : ATYPE a; // narrower than TYPE
3198 : : BTYPE b; // narrower than TYPE
3199 : :
3200 : : 1) Multiply high with scaling
3201 : : TYPE res = ((TYPE) a * (TYPE) b) >> c;
3202 : : Here, c is bitsize (TYPE) / 2 - 1.
3203 : :
3204 : : 2) ... or also with rounding
3205 : : TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3206 : : Here, d is bitsize (TYPE) / 2 - 2.
3207 : :
3208 : : 3) Normal multiply high
3209 : : TYPE res = ((TYPE) a * (TYPE) b) >> e;
3210 : : Here, e is bitsize (TYPE) / 2.
3211 : :
3212 : : where only the bottom half of res is used. */
3213 : :
3214 : : static gimple *
3215 : 30018663 : vect_recog_mulhs_pattern (vec_info *vinfo,
3216 : : stmt_vec_info last_stmt_info, tree *type_out)
3217 : : {
3218 : : /* Check for a right shift. */
3219 : 30018663 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3220 : 20322933 : if (!last_stmt
3221 : 20322933 : || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3222 : : return NULL;
3223 : :
3224 : : /* Check that the shift result is wider than the users of the
3225 : : result need (i.e. that narrowing would be a natural choice). */
3226 : 345310 : tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3227 : 345310 : unsigned int target_precision
3228 : 345310 : = vect_element_precision (last_stmt_info->min_output_precision);
3229 : 345310 : if (!INTEGRAL_TYPE_P (lhs_type)
3230 : 345310 : || target_precision >= TYPE_PRECISION (lhs_type))
3231 : : return NULL;
3232 : :
3233 : : /* Look through any change in sign on the outer shift input. */
3234 : 45343 : vect_unpromoted_value unprom_rshift_input;
3235 : 45343 : tree rshift_input = vect_look_through_possible_promotion
3236 : 45343 : (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3237 : 45343 : if (!rshift_input
3238 : 45343 : || TYPE_PRECISION (TREE_TYPE (rshift_input))
3239 : 44775 : != TYPE_PRECISION (lhs_type))
3240 : : return NULL;
3241 : :
3242 : : /* Get the definition of the shift input. */
3243 : 42663 : stmt_vec_info rshift_input_stmt_info
3244 : 42663 : = vect_get_internal_def (vinfo, rshift_input);
3245 : 42663 : if (!rshift_input_stmt_info)
3246 : : return NULL;
3247 : 37795 : gassign *rshift_input_stmt
3248 : 30050768 : = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3249 : 32192 : if (!rshift_input_stmt)
3250 : : return NULL;
3251 : :
3252 : 32192 : stmt_vec_info mulh_stmt_info;
3253 : 32192 : tree scale_term;
3254 : 32192 : bool rounding_p = false;
3255 : :
3256 : : /* Check for the presence of the rounding term. */
3257 : 38975 : if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3258 : : {
3259 : : /* Check that the outer shift was by 1. */
3260 : 18498 : if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3261 : 9201 : return NULL;
3262 : :
3263 : : /* Check that the second operand of the PLUS_EXPR is 1. */
3264 : 1254 : if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3265 : : return NULL;
3266 : :
3267 : : /* Look through any change in sign on the addition input. */
3268 : 88 : vect_unpromoted_value unprom_plus_input;
3269 : 88 : tree plus_input = vect_look_through_possible_promotion
3270 : 88 : (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3271 : 88 : if (!plus_input
3272 : 88 : || TYPE_PRECISION (TREE_TYPE (plus_input))
3273 : 88 : != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3274 : : return NULL;
3275 : :
3276 : : /* Get the definition of the multiply-high-scale part. */
3277 : 88 : stmt_vec_info plus_input_stmt_info
3278 : 88 : = vect_get_internal_def (vinfo, plus_input);
3279 : 88 : if (!plus_input_stmt_info)
3280 : : return NULL;
3281 : 88 : gassign *plus_input_stmt
3282 : 9289 : = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3283 : 88 : if (!plus_input_stmt
3284 : 88 : || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3285 : : return NULL;
3286 : :
3287 : : /* Look through any change in sign on the scaling input. */
3288 : 48 : vect_unpromoted_value unprom_scale_input;
3289 : 48 : tree scale_input = vect_look_through_possible_promotion
3290 : 48 : (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3291 : 48 : if (!scale_input
3292 : 48 : || TYPE_PRECISION (TREE_TYPE (scale_input))
3293 : 48 : != TYPE_PRECISION (TREE_TYPE (plus_input)))
3294 : : return NULL;
3295 : :
3296 : : /* Get the definition of the multiply-high part. */
3297 : 48 : mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3298 : 48 : if (!mulh_stmt_info)
3299 : : return NULL;
3300 : :
3301 : : /* Get the scaling term. */
3302 : 48 : scale_term = gimple_assign_rhs2 (plus_input_stmt);
3303 : 48 : rounding_p = true;
3304 : : }
3305 : : else
3306 : : {
3307 : 22943 : mulh_stmt_info = rshift_input_stmt_info;
3308 : 22943 : scale_term = gimple_assign_rhs2 (last_stmt);
3309 : : }
3310 : :
3311 : : /* Check that the scaling factor is constant. */
3312 : 22991 : if (TREE_CODE (scale_term) != INTEGER_CST)
3313 : : return NULL;
3314 : :
3315 : : /* Check whether the scaling input term can be seen as two widened
3316 : : inputs multiplied together. */
3317 : 66303 : vect_unpromoted_value unprom_mult[2];
3318 : 22101 : tree new_type;
3319 : 22101 : unsigned int nops
3320 : 22101 : = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3321 : : false, 2, unprom_mult, &new_type);
3322 : 22101 : if (nops != 2)
3323 : : return NULL;
3324 : :
3325 : : /* Adjust output precision. */
3326 : 1192 : if (TYPE_PRECISION (new_type) < target_precision)
3327 : 0 : new_type = build_nonstandard_integer_type
3328 : 0 : (target_precision, TYPE_UNSIGNED (new_type));
3329 : :
3330 : 1192 : unsigned mult_precision = TYPE_PRECISION (new_type);
3331 : 1192 : internal_fn ifn;
3332 : : /* Check that the scaling factor is expected. Instead of
3333 : : target_precision, we should use the one that we actually
3334 : : use for internal function. */
3335 : 1192 : if (rounding_p)
3336 : : {
3337 : : /* Check pattern 2). */
3338 : 96 : if (wi::to_widest (scale_term) + mult_precision + 2
3339 : 144 : != TYPE_PRECISION (lhs_type))
3340 : : return NULL;
3341 : :
3342 : : ifn = IFN_MULHRS;
3343 : : }
3344 : : else
3345 : : {
3346 : : /* Check for pattern 1). */
3347 : 2288 : if (wi::to_widest (scale_term) + mult_precision + 1
3348 : 3432 : == TYPE_PRECISION (lhs_type))
3349 : : ifn = IFN_MULHS;
3350 : : /* Check for pattern 3). */
3351 : 1110 : else if (wi::to_widest (scale_term) + mult_precision
3352 : 2220 : == TYPE_PRECISION (lhs_type))
3353 : : ifn = IFN_MULH;
3354 : : else
3355 : : return NULL;
3356 : : }
3357 : :
3358 : 1097 : vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3359 : :
3360 : : /* Check for target support. */
3361 : 1097 : tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3362 : 1097 : if (!new_vectype
3363 : 2178 : || !direct_internal_fn_supported_p
3364 : 1081 : (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3365 : 1010 : return NULL;
3366 : :
3367 : : /* The IR requires a valid vector type for the cast result, even though
3368 : : it's likely to be discarded. */
3369 : 87 : *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3370 : 87 : if (!*type_out)
3371 : : return NULL;
3372 : :
3373 : : /* Generate the IFN_MULHRS call. */
3374 : 87 : tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3375 : 87 : tree new_ops[2];
3376 : 87 : vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3377 : : unprom_mult, new_vectype);
3378 : 87 : gcall *mulhrs_stmt
3379 : 87 : = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3380 : 87 : gimple_call_set_lhs (mulhrs_stmt, new_var);
3381 : 87 : gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3382 : :
3383 : 87 : if (dump_enabled_p ())
3384 : 0 : dump_printf_loc (MSG_NOTE, vect_location,
3385 : : "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3386 : :
3387 : 87 : return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3388 : 87 : mulhrs_stmt, new_vectype);
3389 : : }
3390 : :
3391 : : /* Recognize the patterns:
3392 : :
3393 : : ATYPE a; // narrower than TYPE
3394 : : BTYPE b; // narrower than TYPE
3395 : : (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3396 : : or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3397 : :
3398 : : where only the bottom half of avg is used. Try to transform them into:
3399 : :
3400 : : (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3401 : : or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3402 : :
3403 : : followed by:
3404 : :
3405 : : TYPE avg = (TYPE) avg';
3406 : :
3407 : : where NTYPE is no wider than half of TYPE. Since only the bottom half
3408 : : of avg is used, all or part of the cast of avg' should become redundant.
3409 : :
3410 : : If there is no target support available, generate code to distribute rshift
3411 : : over plus and add a carry. */
3412 : :
3413 : : static gimple *
3414 : 30017122 : vect_recog_average_pattern (vec_info *vinfo,
3415 : : stmt_vec_info last_stmt_info, tree *type_out)
3416 : : {
3417 : : /* Check for a shift right by one bit. */
3418 : 30017122 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3419 : 20321515 : if (!last_stmt
3420 : 20321515 : || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3421 : 345199 : || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3422 : 29962808 : return NULL;
3423 : :
3424 : : /* Check that the shift result is wider than the users of the
3425 : : result need (i.e. that narrowing would be a natural choice). */
3426 : 54314 : tree lhs = gimple_assign_lhs (last_stmt);
3427 : 54314 : tree type = TREE_TYPE (lhs);
3428 : 54314 : unsigned int target_precision
3429 : 54314 : = vect_element_precision (last_stmt_info->min_output_precision);
3430 : 54314 : if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3431 : : return NULL;
3432 : :
3433 : : /* Look through any change in sign on the shift input. */
3434 : 2098 : tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3435 : 2098 : vect_unpromoted_value unprom_plus;
3436 : 2098 : rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3437 : : &unprom_plus);
3438 : 2098 : if (!rshift_rhs
3439 : 2098 : || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3440 : : return NULL;
3441 : :
3442 : : /* Get the definition of the shift input. */
3443 : 2096 : stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3444 : 2096 : if (!plus_stmt_info)
3445 : : return NULL;
3446 : :
3447 : : /* Check whether the shift input can be seen as a tree of additions on
3448 : : 2 or 3 widened inputs.
3449 : :
3450 : : Note that the pattern should be a win even if the result of one or
3451 : : more additions is reused elsewhere: if the pattern matches, we'd be
3452 : : replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3453 : 8312 : internal_fn ifn = IFN_AVG_FLOOR;
3454 : 8312 : vect_unpromoted_value unprom[3];
3455 : 2078 : tree new_type;
3456 : 2078 : unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3457 : 2078 : IFN_VEC_WIDEN_PLUS, false, 3,
3458 : : unprom, &new_type);
3459 : 2078 : if (nops == 0)
3460 : : return NULL;
3461 : 848 : if (nops == 3)
3462 : : {
3463 : : /* Check that one operand is 1. */
3464 : : unsigned int i;
3465 : 873 : for (i = 0; i < 3; ++i)
3466 : 819 : if (integer_onep (unprom[i].op))
3467 : : break;
3468 : 273 : if (i == 3)
3469 : : return NULL;
3470 : : /* Throw away the 1 operand and keep the other two. */
3471 : 219 : if (i < 2)
3472 : 0 : unprom[i] = unprom[2];
3473 : : ifn = IFN_AVG_CEIL;
3474 : : }
3475 : :
3476 : 794 : vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3477 : :
3478 : : /* We know that:
3479 : :
3480 : : (a) the operation can be viewed as:
3481 : :
3482 : : TYPE widened0 = (TYPE) UNPROM[0];
3483 : : TYPE widened1 = (TYPE) UNPROM[1];
3484 : : TYPE tmp1 = widened0 + widened1 {+ 1};
3485 : : TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3486 : :
3487 : : (b) the first two statements are equivalent to:
3488 : :
3489 : : TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3490 : : TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3491 : :
3492 : : (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3493 : : where sensible;
3494 : :
3495 : : (d) all the operations can be performed correctly at twice the width of
3496 : : NEW_TYPE, due to the nature of the average operation; and
3497 : :
3498 : : (e) users of the result of the right shift need only TARGET_PRECISION
3499 : : bits, where TARGET_PRECISION is no more than half of TYPE's
3500 : : precision.
3501 : :
3502 : : Under these circumstances, the only situation in which NEW_TYPE
3503 : : could be narrower than TARGET_PRECISION is if widened0, widened1
3504 : : and an addition result are all used more than once. Thus we can
3505 : : treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3506 : : as "free", whereas widening the result of the average instruction
3507 : : from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3508 : : therefore better not to go narrower than TARGET_PRECISION. */
3509 : 794 : if (TYPE_PRECISION (new_type) < target_precision)
3510 : 0 : new_type = build_nonstandard_integer_type (target_precision,
3511 : 0 : TYPE_UNSIGNED (new_type));
3512 : :
3513 : : /* Check for target support. */
3514 : 794 : tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3515 : 794 : if (!new_vectype)
3516 : : return NULL;
3517 : :
3518 : 794 : bool fallback_p = false;
3519 : :
3520 : 794 : if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3521 : : ;
3522 : 671 : else if (TYPE_UNSIGNED (new_type)
3523 : 235 : && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3524 : 235 : && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3525 : 235 : && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3526 : 906 : && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3527 : : fallback_p = true;
3528 : : else
3529 : 436 : return NULL;
3530 : :
3531 : : /* The IR requires a valid vector type for the cast result, even though
3532 : : it's likely to be discarded. */
3533 : 358 : *type_out = get_vectype_for_scalar_type (vinfo, type);
3534 : 358 : if (!*type_out)
3535 : : return NULL;
3536 : :
3537 : 357 : tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3538 : 357 : tree new_ops[2];
3539 : 357 : vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3540 : : unprom, new_vectype);
3541 : :
3542 : 357 : if (fallback_p)
3543 : : {
3544 : : /* As a fallback, generate code for following sequence:
3545 : :
3546 : : shifted_op0 = new_ops[0] >> 1;
3547 : : shifted_op1 = new_ops[1] >> 1;
3548 : : sum_of_shifted = shifted_op0 + shifted_op1;
3549 : : unmasked_carry = new_ops[0] and/or new_ops[1];
3550 : : carry = unmasked_carry & 1;
3551 : : new_var = sum_of_shifted + carry;
3552 : : */
3553 : :
3554 : 234 : tree one_cst = build_one_cst (new_type);
3555 : 234 : gassign *g;
3556 : :
3557 : 234 : tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3558 : 234 : g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3559 : 234 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3560 : :
3561 : 234 : tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3562 : 234 : g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3563 : 234 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3564 : :
3565 : 234 : tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3566 : 234 : g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3567 : : shifted_op0, shifted_op1);
3568 : 234 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3569 : :
3570 : 234 : tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3571 : 234 : tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3572 : 234 : g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3573 : 234 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3574 : :
3575 : 234 : tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3576 : 234 : g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3577 : 234 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3578 : :
3579 : 234 : g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3580 : 234 : return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3581 : : }
3582 : :
3583 : : /* Generate the IFN_AVG* call. */
3584 : 123 : gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3585 : : new_ops[1]);
3586 : 123 : gimple_call_set_lhs (average_stmt, new_var);
3587 : 123 : gimple_set_location (average_stmt, gimple_location (last_stmt));
3588 : :
3589 : 123 : if (dump_enabled_p ())
3590 : 31 : dump_printf_loc (MSG_NOTE, vect_location,
3591 : : "created pattern stmt: %G", (gimple *) average_stmt);
3592 : :
3593 : 123 : return vect_convert_output (vinfo, last_stmt_info,
3594 : 123 : type, average_stmt, new_vectype);
3595 : : }
3596 : :
3597 : : /* Recognize cases in which the input to a cast is wider than its
3598 : : output, and the input is fed by a widening operation. Fold this
3599 : : by removing the unnecessary intermediate widening. E.g.:
3600 : :
3601 : : unsigned char a;
3602 : : unsigned int b = (unsigned int) a;
3603 : : unsigned short c = (unsigned short) b;
3604 : :
3605 : : -->
3606 : :
3607 : : unsigned short c = (unsigned short) a;
3608 : :
3609 : : Although this is rare in input IR, it is an expected side-effect
3610 : : of the over-widening pattern above.
3611 : :
3612 : : This is beneficial also for integer-to-float conversions, if the
3613 : : widened integer has more bits than the float, and if the unwidened
3614 : : input doesn't. */
3615 : :
3616 : : static gimple *
3617 : 30018663 : vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3618 : : stmt_vec_info last_stmt_info, tree *type_out)
3619 : : {
3620 : : /* Check for a cast, including an integer-to-float conversion. */
3621 : 50294800 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3622 : 20322846 : if (!last_stmt)
3623 : : return NULL;
3624 : 20322846 : tree_code code = gimple_assign_rhs_code (last_stmt);
3625 : 20322846 : if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3626 : : return NULL;
3627 : :
3628 : : /* Make sure that the rhs is a scalar with a natural bitsize. */
3629 : 2929607 : tree lhs = gimple_assign_lhs (last_stmt);
3630 : 2929607 : if (!lhs)
3631 : : return NULL;
3632 : 2929607 : tree lhs_type = TREE_TYPE (lhs);
3633 : 2929607 : scalar_mode lhs_mode;
3634 : 2908932 : if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3635 : 5836758 : || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3636 : 26850 : return NULL;
3637 : :
3638 : : /* Check for a narrowing operation (from a vector point of view). */
3639 : 2902757 : tree rhs = gimple_assign_rhs1 (last_stmt);
3640 : 2902757 : tree rhs_type = TREE_TYPE (rhs);
3641 : 2902757 : if (!INTEGRAL_TYPE_P (rhs_type)
3642 : 2608346 : || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3643 : 7951253 : || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3644 : : return NULL;
3645 : :
3646 : : /* Try to find an unpromoted input. */
3647 : 347184 : vect_unpromoted_value unprom;
3648 : 347184 : if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3649 : 347184 : || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3650 : : return NULL;
3651 : :
3652 : : /* If the bits above RHS_TYPE matter, make sure that they're the
3653 : : same when extending from UNPROM as they are when extending from RHS. */
3654 : 46839 : if (!INTEGRAL_TYPE_P (lhs_type)
3655 : 46839 : && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3656 : : return NULL;
3657 : :
3658 : : /* We can get the same result by casting UNPROM directly, to avoid
3659 : : the unnecessary widening and narrowing. */
3660 : 46709 : vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3661 : :
3662 : 46709 : *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3663 : 46709 : if (!*type_out)
3664 : : return NULL;
3665 : :
3666 : 46709 : tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3667 : 46709 : gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3668 : 46709 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3669 : :
3670 : 46709 : return pattern_stmt;
3671 : : }
3672 : :
3673 : : /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3674 : : to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3675 : :
3676 : : static gimple *
3677 : 29949254 : vect_recog_widen_shift_pattern (vec_info *vinfo,
3678 : : stmt_vec_info last_stmt_info, tree *type_out)
3679 : : {
3680 : 29949254 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3681 : 29949254 : LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3682 : 29949254 : "vect_recog_widen_shift_pattern");
3683 : : }
3684 : :
3685 : : /* Detect a rotate pattern wouldn't be otherwise vectorized:
3686 : :
3687 : : type a_t, b_t, c_t;
3688 : :
3689 : : S0 a_t = b_t r<< c_t;
3690 : :
3691 : : Input/Output:
3692 : :
3693 : : * STMT_VINFO: The stmt from which the pattern search begins,
3694 : : i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3695 : : with a sequence:
3696 : :
3697 : : S1 d_t = -c_t;
3698 : : S2 e_t = d_t & (B - 1);
3699 : : S3 f_t = b_t << c_t;
3700 : : S4 g_t = b_t >> e_t;
3701 : : S0 a_t = f_t | g_t;
3702 : :
3703 : : where B is element bitsize of type.
3704 : :
3705 : : Output:
3706 : :
3707 : : * TYPE_OUT: The type of the output of this pattern.
3708 : :
3709 : : * Return value: A new stmt that will be used to replace the rotate
3710 : : S0 stmt. */
3711 : :
3712 : : static gimple *
3713 : 29949254 : vect_recog_rotate_pattern (vec_info *vinfo,
3714 : : stmt_vec_info stmt_vinfo, tree *type_out)
3715 : : {
3716 : 29949254 : gimple *last_stmt = stmt_vinfo->stmt;
3717 : 29949254 : tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3718 : 29949254 : gimple *pattern_stmt, *def_stmt;
3719 : 29949254 : enum tree_code rhs_code;
3720 : 29949254 : enum vect_def_type dt;
3721 : 29949254 : optab optab1, optab2;
3722 : 29949254 : edge ext_def = NULL;
3723 : 29949254 : bool bswap16_p = false;
3724 : :
3725 : 29949254 : if (is_gimple_assign (last_stmt))
3726 : : {
3727 : 20253410 : rhs_code = gimple_assign_rhs_code (last_stmt);
3728 : 20253410 : switch (rhs_code)
3729 : : {
3730 : 6592 : case LROTATE_EXPR:
3731 : 6592 : case RROTATE_EXPR:
3732 : 6592 : break;
3733 : : default:
3734 : : return NULL;
3735 : : }
3736 : :
3737 : 6592 : lhs = gimple_assign_lhs (last_stmt);
3738 : 6592 : oprnd0 = gimple_assign_rhs1 (last_stmt);
3739 : 6592 : type = TREE_TYPE (oprnd0);
3740 : 6592 : oprnd1 = gimple_assign_rhs2 (last_stmt);
3741 : : }
3742 : 9695844 : else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3743 : : {
3744 : : /* __builtin_bswap16 (x) is another form of x r>> 8.
3745 : : The vectorizer has bswap support, but only if the argument isn't
3746 : : promoted. */
3747 : 175 : lhs = gimple_call_lhs (last_stmt);
3748 : 175 : oprnd0 = gimple_call_arg (last_stmt, 0);
3749 : 175 : type = TREE_TYPE (oprnd0);
3750 : 175 : if (!lhs
3751 : 175 : || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3752 : 175 : || TYPE_PRECISION (type) <= 16
3753 : 0 : || TREE_CODE (oprnd0) != SSA_NAME
3754 : 175 : || BITS_PER_UNIT != 8)
3755 : 175 : return NULL;
3756 : :
3757 : 0 : stmt_vec_info def_stmt_info;
3758 : 0 : if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3759 : : return NULL;
3760 : :
3761 : 0 : if (dt != vect_internal_def)
3762 : : return NULL;
3763 : :
3764 : 0 : if (gimple_assign_cast_p (def_stmt))
3765 : : {
3766 : 0 : def = gimple_assign_rhs1 (def_stmt);
3767 : 0 : if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3768 : 0 : && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3769 : : oprnd0 = def;
3770 : : }
3771 : :
3772 : 0 : type = TREE_TYPE (lhs);
3773 : 0 : vectype = get_vectype_for_scalar_type (vinfo, type);
3774 : 0 : if (vectype == NULL_TREE)
3775 : : return NULL;
3776 : :
3777 : 0 : if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3778 : : {
3779 : : /* The encoding uses one stepped pattern for each byte in the
3780 : : 16-bit word. */
3781 : 0 : vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3782 : 0 : for (unsigned i = 0; i < 3; ++i)
3783 : 0 : for (unsigned j = 0; j < 2; ++j)
3784 : 0 : elts.quick_push ((i + 1) * 2 - j - 1);
3785 : :
3786 : 0 : vec_perm_indices indices (elts, 1,
3787 : 0 : TYPE_VECTOR_SUBPARTS (char_vectype));
3788 : 0 : machine_mode vmode = TYPE_MODE (char_vectype);
3789 : 0 : if (can_vec_perm_const_p (vmode, vmode, indices))
3790 : : {
3791 : : /* vectorizable_bswap can handle the __builtin_bswap16 if we
3792 : : undo the argument promotion. */
3793 : 0 : if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3794 : : {
3795 : 0 : def = vect_recog_temp_ssa_var (type, NULL);
3796 : 0 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3797 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3798 : 0 : oprnd0 = def;
3799 : : }
3800 : :
3801 : : /* Pattern detected. */
3802 : 0 : vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3803 : :
3804 : 0 : *type_out = vectype;
3805 : :
3806 : : /* Pattern supported. Create a stmt to be used to replace the
3807 : : pattern, with the unpromoted argument. */
3808 : 0 : var = vect_recog_temp_ssa_var (type, NULL);
3809 : 0 : pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3810 : : 1, oprnd0);
3811 : 0 : gimple_call_set_lhs (pattern_stmt, var);
3812 : 0 : gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3813 : : gimple_call_fntype (last_stmt));
3814 : 0 : return pattern_stmt;
3815 : : }
3816 : 0 : }
3817 : :
3818 : 0 : oprnd1 = build_int_cst (integer_type_node, 8);
3819 : 0 : rhs_code = LROTATE_EXPR;
3820 : 0 : bswap16_p = true;
3821 : : }
3822 : : else
3823 : : return NULL;
3824 : :
3825 : 6592 : if (TREE_CODE (oprnd0) != SSA_NAME
3826 : 6472 : || !INTEGRAL_TYPE_P (type)
3827 : 12703 : || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3828 : : return NULL;
3829 : :
3830 : 6111 : stmt_vec_info def_stmt_info;
3831 : 6111 : if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3832 : : return NULL;
3833 : :
3834 : 6111 : if (dt != vect_internal_def
3835 : 5905 : && dt != vect_constant_def
3836 : 21 : && dt != vect_external_def)
3837 : : return NULL;
3838 : :
3839 : 6105 : vectype = get_vectype_for_scalar_type (vinfo, type);
3840 : 6105 : if (vectype == NULL_TREE)
3841 : : return NULL;
3842 : :
3843 : : /* If vector/vector or vector/scalar rotate is supported by the target,
3844 : : don't do anything here. */
3845 : 5878 : optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3846 : 5878 : if (optab1
3847 : 5878 : && can_implement_p (optab1, TYPE_MODE (vectype)))
3848 : : {
3849 : 354 : use_rotate:
3850 : 354 : if (bswap16_p)
3851 : : {
3852 : 0 : if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3853 : : {
3854 : 0 : def = vect_recog_temp_ssa_var (type, NULL);
3855 : 0 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3856 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3857 : 0 : oprnd0 = def;
3858 : : }
3859 : :
3860 : : /* Pattern detected. */
3861 : 0 : vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3862 : :
3863 : 0 : *type_out = vectype;
3864 : :
3865 : : /* Pattern supported. Create a stmt to be used to replace the
3866 : : pattern. */
3867 : 0 : var = vect_recog_temp_ssa_var (type, NULL);
3868 : 0 : pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3869 : : oprnd1);
3870 : 0 : return pattern_stmt;
3871 : : }
3872 : : return NULL;
3873 : : }
3874 : :
3875 : 5842 : if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3876 : : {
3877 : 5794 : optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3878 : 5794 : if (optab2
3879 : 5794 : && can_implement_p (optab2, TYPE_MODE (vectype)))
3880 : 318 : goto use_rotate;
3881 : : }
3882 : :
3883 : : /* We may not use a reduction operand twice. */
3884 : 5524 : if (vect_is_reduction (stmt_vinfo))
3885 : : return NULL;
3886 : :
3887 : 5503 : tree utype = unsigned_type_for (type);
3888 : 5503 : tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3889 : 5503 : if (!uvectype)
3890 : : return NULL;
3891 : :
3892 : : /* If vector/vector or vector/scalar shifts aren't supported by the target,
3893 : : don't do anything here either. */
3894 : 5503 : optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3895 : 5503 : optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3896 : 5503 : if (!optab1
3897 : 5503 : || !can_implement_p (optab1, TYPE_MODE (uvectype))
3898 : 599 : || !optab2
3899 : 6102 : || !can_implement_p (optab2, TYPE_MODE (uvectype)))
3900 : : {
3901 : 4904 : if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3902 : : return NULL;
3903 : 4869 : optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3904 : 4869 : optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3905 : 4869 : if (!optab1
3906 : 4869 : || !can_implement_p (optab1, TYPE_MODE (uvectype))
3907 : 3662 : || !optab2
3908 : 8531 : || !can_implement_p (optab2, TYPE_MODE (uvectype)))
3909 : 1207 : return NULL;
3910 : : }
3911 : :
3912 : 4261 : *type_out = vectype;
3913 : :
3914 : 4261 : if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3915 : : {
3916 : 47 : def = vect_recog_temp_ssa_var (utype, NULL);
3917 : 47 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3918 : 47 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3919 : 47 : oprnd0 = def;
3920 : : }
3921 : :
3922 : 4261 : if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3923 : 13 : ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3924 : :
3925 : 4261 : def = NULL_TREE;
3926 : 4261 : scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3927 : 4261 : if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3928 : : def = oprnd1;
3929 : 28 : else if (def_stmt && gimple_assign_cast_p (def_stmt))
3930 : : {
3931 : 0 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
3932 : 0 : if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3933 : 0 : && TYPE_PRECISION (TREE_TYPE (rhs1))
3934 : 0 : == TYPE_PRECISION (type))
3935 : : def = rhs1;
3936 : : }
3937 : :
3938 : 4233 : if (def == NULL_TREE)
3939 : : {
3940 : 28 : def = vect_recog_temp_ssa_var (utype, NULL);
3941 : 28 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3942 : 28 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3943 : : }
3944 : 4261 : stype = TREE_TYPE (def);
3945 : :
3946 : 4261 : if (TREE_CODE (def) == INTEGER_CST)
3947 : : {
3948 : 4149 : if (!tree_fits_uhwi_p (def)
3949 : 4149 : || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3950 : 8298 : || integer_zerop (def))
3951 : 0 : return NULL;
3952 : 4149 : def2 = build_int_cst (stype,
3953 : 4149 : GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3954 : : }
3955 : : else
3956 : : {
3957 : 112 : tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3958 : :
3959 : 112 : if (vecstype == NULL_TREE)
3960 : : return NULL;
3961 : 112 : def2 = vect_recog_temp_ssa_var (stype, NULL);
3962 : 112 : def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3963 : 112 : if (ext_def)
3964 : : {
3965 : 13 : basic_block new_bb
3966 : 13 : = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3967 : 13 : gcc_assert (!new_bb);
3968 : : }
3969 : : else
3970 : 99 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3971 : :
3972 : 112 : def2 = vect_recog_temp_ssa_var (stype, NULL);
3973 : 112 : tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3974 : 112 : def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3975 : : gimple_assign_lhs (def_stmt), mask);
3976 : 112 : if (ext_def)
3977 : : {
3978 : 13 : basic_block new_bb
3979 : 13 : = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3980 : 13 : gcc_assert (!new_bb);
3981 : : }
3982 : : else
3983 : 99 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3984 : : }
3985 : :
3986 : 4261 : var1 = vect_recog_temp_ssa_var (utype, NULL);
3987 : 8437 : def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3988 : : ? LSHIFT_EXPR : RSHIFT_EXPR,
3989 : : oprnd0, def);
3990 : 4261 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3991 : :
3992 : 4261 : var2 = vect_recog_temp_ssa_var (utype, NULL);
3993 : 8437 : def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3994 : : ? RSHIFT_EXPR : LSHIFT_EXPR,
3995 : : oprnd0, def2);
3996 : 4261 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3997 : :
3998 : : /* Pattern detected. */
3999 : 4261 : vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
4000 : :
4001 : : /* Pattern supported. Create a stmt to be used to replace the pattern. */
4002 : 4261 : var = vect_recog_temp_ssa_var (utype, NULL);
4003 : 4261 : pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
4004 : :
4005 : 4261 : if (!useless_type_conversion_p (type, utype))
4006 : : {
4007 : 47 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
4008 : 47 : tree result = vect_recog_temp_ssa_var (type, NULL);
4009 : 47 : pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
4010 : : }
4011 : : return pattern_stmt;
4012 : : }
4013 : :
4014 : : /* Detect a vector by vector shift pattern that wouldn't be otherwise
4015 : : vectorized:
4016 : :
4017 : : type a_t;
4018 : : TYPE b_T, res_T;
4019 : :
4020 : : S1 a_t = ;
4021 : : S2 b_T = ;
4022 : : S3 res_T = b_T op a_t;
4023 : :
4024 : : where type 'TYPE' is a type with different size than 'type',
4025 : : and op is <<, >> or rotate.
4026 : :
4027 : : Also detect cases:
4028 : :
4029 : : type a_t;
4030 : : TYPE b_T, c_T, res_T;
4031 : :
4032 : : S0 c_T = ;
4033 : : S1 a_t = (type) c_T;
4034 : : S2 b_T = ;
4035 : : S3 res_T = b_T op a_t;
4036 : :
4037 : : Input/Output:
4038 : :
4039 : : * STMT_VINFO: The stmt from which the pattern search begins,
4040 : : i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4041 : : with a shift/rotate which has same type on both operands, in the
4042 : : second case just b_T op c_T, in the first case with added cast
4043 : : from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4044 : :
4045 : : Output:
4046 : :
4047 : : * TYPE_OUT: The type of the output of this pattern.
4048 : :
4049 : : * Return value: A new stmt that will be used to replace the shift/rotate
4050 : : S3 stmt. */
4051 : :
4052 : : static gimple *
4053 : 29953835 : vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4054 : : stmt_vec_info stmt_vinfo,
4055 : : tree *type_out)
4056 : : {
4057 : 29953835 : gimple *last_stmt = stmt_vinfo->stmt;
4058 : 29953835 : tree oprnd0, oprnd1, lhs, var;
4059 : 29953835 : gimple *pattern_stmt;
4060 : 29953835 : enum tree_code rhs_code;
4061 : :
4062 : 29953835 : if (!is_gimple_assign (last_stmt))
4063 : : return NULL;
4064 : :
4065 : 20257991 : rhs_code = gimple_assign_rhs_code (last_stmt);
4066 : 20257991 : switch (rhs_code)
4067 : : {
4068 : 477818 : case LSHIFT_EXPR:
4069 : 477818 : case RSHIFT_EXPR:
4070 : 477818 : case LROTATE_EXPR:
4071 : 477818 : case RROTATE_EXPR:
4072 : 477818 : break;
4073 : : default:
4074 : : return NULL;
4075 : : }
4076 : :
4077 : 477818 : lhs = gimple_assign_lhs (last_stmt);
4078 : 477818 : oprnd0 = gimple_assign_rhs1 (last_stmt);
4079 : 477818 : oprnd1 = gimple_assign_rhs2 (last_stmt);
4080 : 477818 : if (TREE_CODE (oprnd1) != SSA_NAME
4081 : 95124 : || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4082 : 42383 : || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4083 : 42099 : || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4084 : 519917 : || TYPE_PRECISION (TREE_TYPE (lhs))
4085 : 42099 : != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4086 : 435719 : return NULL;
4087 : :
4088 : 42099 : stmt_vec_info def_vinfo = vinfo->lookup_def (oprnd1);
4089 : 42099 : if (!def_vinfo || STMT_VINFO_DEF_TYPE (def_vinfo) == vect_external_def)
4090 : : return NULL;
4091 : :
4092 : 40216 : def_vinfo = vect_stmt_to_vectorize (def_vinfo);
4093 : 1097 : gcc_assert (def_vinfo);
4094 : :
4095 : 40216 : *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4096 : 40216 : if (*type_out == NULL_TREE)
4097 : : return NULL;
4098 : :
4099 : 28695 : tree def = NULL_TREE;
4100 : 28695 : gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4101 : 18497 : if (def_stmt && gimple_assign_cast_p (def_stmt))
4102 : : {
4103 : 5167 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
4104 : 5167 : if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4105 : 5167 : && TYPE_PRECISION (TREE_TYPE (rhs1))
4106 : 1081 : == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4107 : : {
4108 : 1081 : if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4109 : 1081 : >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4110 : : def = rhs1;
4111 : : else
4112 : : {
4113 : 994 : tree mask
4114 : 994 : = build_low_bits_mask (TREE_TYPE (rhs1),
4115 : 994 : TYPE_PRECISION (TREE_TYPE (oprnd1)));
4116 : 994 : def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4117 : 994 : def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4118 : 994 : tree vecstype = get_vectype_for_scalar_type (vinfo,
4119 : 994 : TREE_TYPE (rhs1));
4120 : 994 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4121 : : }
4122 : : }
4123 : : }
4124 : :
4125 : 1081 : if (def == NULL_TREE)
4126 : : {
4127 : 27614 : def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4128 : 27614 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4129 : 27614 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4130 : : }
4131 : :
4132 : : /* Pattern detected. */
4133 : 28695 : vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4134 : :
4135 : : /* Pattern supported. Create a stmt to be used to replace the pattern. */
4136 : 28695 : var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4137 : 28695 : pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4138 : :
4139 : 28695 : return pattern_stmt;
4140 : : }
4141 : :
4142 : : /* Return true iff the target has a vector optab implementing the operation
4143 : : CODE on type VECTYPE. */
4144 : :
4145 : : static bool
4146 : 609902 : target_has_vecop_for_code (tree_code code, tree vectype)
4147 : : {
4148 : 609902 : optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4149 : 609902 : return voptab
4150 : 609902 : && can_implement_p (voptab, TYPE_MODE (vectype));
4151 : : }
4152 : :
4153 : : /* Verify that the target has optabs of VECTYPE to perform all the steps
4154 : : needed by the multiplication-by-immediate synthesis algorithm described by
4155 : : ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4156 : : present. Return true iff the target supports all the steps. */
4157 : :
4158 : : static bool
4159 : 270321 : target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4160 : : tree vectype, bool synth_shift_p)
4161 : : {
4162 : 270321 : if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4163 : : return false;
4164 : :
4165 : 270321 : bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4166 : 270321 : bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4167 : :
4168 : 270321 : if (var == negate_variant
4169 : 270321 : && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4170 : : return false;
4171 : :
4172 : : /* If we must synthesize shifts with additions make sure that vector
4173 : : addition is available. */
4174 : 269823 : if ((var == add_variant || synth_shift_p) && !supports_vplus)
4175 : : return false;
4176 : :
4177 : 138890 : for (int i = 1; i < alg->ops; i++)
4178 : : {
4179 : 105720 : switch (alg->op[i])
4180 : : {
4181 : : case alg_shift:
4182 : : break;
4183 : 26577 : case alg_add_t_m2:
4184 : 26577 : case alg_add_t2_m:
4185 : 26577 : case alg_add_factor:
4186 : 26577 : if (!supports_vplus)
4187 : : return false;
4188 : : break;
4189 : 16694 : case alg_sub_t_m2:
4190 : 16694 : case alg_sub_t2_m:
4191 : 16694 : case alg_sub_factor:
4192 : 16694 : if (!supports_vminus)
4193 : : return false;
4194 : : break;
4195 : : case alg_unknown:
4196 : : case alg_m:
4197 : : case alg_zero:
4198 : : case alg_impossible:
4199 : : return false;
4200 : 0 : default:
4201 : 0 : gcc_unreachable ();
4202 : : }
4203 : : }
4204 : :
4205 : : return true;
4206 : : }
4207 : :
4208 : : /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4209 : : putting the final result in DEST. Append all statements but the last into
4210 : : VINFO. Return the last statement. */
4211 : :
4212 : : static gimple *
4213 : 0 : synth_lshift_by_additions (vec_info *vinfo,
4214 : : tree dest, tree op, HOST_WIDE_INT amnt,
4215 : : stmt_vec_info stmt_info)
4216 : : {
4217 : 0 : HOST_WIDE_INT i;
4218 : 0 : tree itype = TREE_TYPE (op);
4219 : 0 : tree prev_res = op;
4220 : 0 : gcc_assert (amnt >= 0);
4221 : 0 : for (i = 0; i < amnt; i++)
4222 : : {
4223 : 0 : tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4224 : : : dest;
4225 : 0 : gimple *stmt
4226 : 0 : = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4227 : 0 : prev_res = tmp_var;
4228 : 0 : if (i < amnt - 1)
4229 : 0 : append_pattern_def_seq (vinfo, stmt_info, stmt);
4230 : : else
4231 : 0 : return stmt;
4232 : : }
4233 : 0 : gcc_unreachable ();
4234 : : return NULL;
4235 : : }
4236 : :
4237 : : /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4238 : : CODE to operands OP1 and OP2, creating a new temporary SSA var in
4239 : : the process if necessary. Append the resulting assignment statements
4240 : : to the sequence in STMT_VINFO. Return the SSA variable that holds the
4241 : : result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4242 : : left shifts using additions. */
4243 : :
4244 : : static tree
4245 : 43162 : apply_binop_and_append_stmt (vec_info *vinfo,
4246 : : tree_code code, tree op1, tree op2,
4247 : : stmt_vec_info stmt_vinfo, bool synth_shift_p)
4248 : : {
4249 : 43162 : if (integer_zerop (op2)
4250 : 43162 : && (code == LSHIFT_EXPR
4251 : 37575 : || code == PLUS_EXPR))
4252 : : {
4253 : 37575 : gcc_assert (TREE_CODE (op1) == SSA_NAME);
4254 : : return op1;
4255 : : }
4256 : :
4257 : 5587 : gimple *stmt;
4258 : 5587 : tree itype = TREE_TYPE (op1);
4259 : 5587 : tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4260 : :
4261 : 5587 : if (code == LSHIFT_EXPR
4262 : 5587 : && synth_shift_p)
4263 : : {
4264 : 0 : stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4265 : 0 : TREE_INT_CST_LOW (op2), stmt_vinfo);
4266 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4267 : 0 : return tmp_var;
4268 : : }
4269 : :
4270 : 5587 : stmt = gimple_build_assign (tmp_var, code, op1, op2);
4271 : 5587 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4272 : 5587 : return tmp_var;
4273 : : }
4274 : :
4275 : : /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4276 : : and simple arithmetic operations to be vectorized. Record the statements
4277 : : produced in STMT_VINFO and return the last statement in the sequence or
4278 : : NULL if it's not possible to synthesize such a multiplication.
4279 : : This function mirrors the behavior of expand_mult_const in expmed.cc but
4280 : : works on tree-ssa form. */
4281 : :
4282 : : static gimple *
4283 : 272999 : vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4284 : : stmt_vec_info stmt_vinfo)
4285 : : {
4286 : 272999 : tree itype = TREE_TYPE (op);
4287 : 272999 : machine_mode mode = TYPE_MODE (itype);
4288 : 272999 : struct algorithm alg;
4289 : 272999 : mult_variant variant;
4290 : 272999 : if (!tree_fits_shwi_p (val))
4291 : : return NULL;
4292 : :
4293 : : /* Multiplication synthesis by shifts, adds and subs can introduce
4294 : : signed overflow where the original operation didn't. Perform the
4295 : : operations on an unsigned type and cast back to avoid this.
4296 : : In the future we may want to relax this for synthesis algorithms
4297 : : that we can prove do not cause unexpected overflow. */
4298 : 270324 : bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4299 : :
4300 : 50700 : tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4301 : 270324 : tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4302 : 270324 : if (!vectype)
4303 : : return NULL;
4304 : :
4305 : : /* Targets that don't support vector shifts but support vector additions
4306 : : can synthesize shifts that way. */
4307 : 270324 : bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4308 : :
4309 : 270324 : HOST_WIDE_INT hwval = tree_to_shwi (val);
4310 : : /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4311 : : The vectorizer's benefit analysis will decide whether it's beneficial
4312 : : to do this. */
4313 : 540648 : bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4314 : 270324 : ? TYPE_MODE (vectype) : mode,
4315 : : hwval, &alg, &variant, MAX_COST);
4316 : 270324 : if (!possible)
4317 : : return NULL;
4318 : :
4319 : 270324 : if (vect_is_reduction (stmt_vinfo))
4320 : : {
4321 : 14 : int op_uses = alg.op[0] != alg_zero;
4322 : 29 : for (int i = 1; i < alg.ops; i++)
4323 : 17 : switch (alg.op[i])
4324 : : {
4325 : 2 : case alg_add_t_m2:
4326 : 2 : case alg_sub_t_m2:
4327 : 2 : if (synth_shift_p && alg.log[i])
4328 : : return NULL;
4329 : : else
4330 : 2 : op_uses++;
4331 : 2 : break;
4332 : 0 : case alg_add_t2_m:
4333 : 0 : case alg_sub_t2_m:
4334 : 0 : op_uses++;
4335 : : /* Fallthru. */
4336 : 15 : case alg_shift:
4337 : 15 : if (synth_shift_p && alg.log[i])
4338 : : return NULL;
4339 : : break;
4340 : : case alg_add_factor:
4341 : : case alg_sub_factor:
4342 : : return NULL;
4343 : : default:
4344 : : break;
4345 : : }
4346 : 12 : if (variant == add_variant)
4347 : 0 : op_uses++;
4348 : : /* When we'll synthesize more than a single use of the reduction
4349 : : operand the reduction constraints are violated. Avoid this
4350 : : situation. */
4351 : 12 : if (op_uses > 1)
4352 : : return NULL;
4353 : : }
4354 : :
4355 : 270321 : if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4356 : : return NULL;
4357 : :
4358 : 33170 : tree accumulator;
4359 : :
4360 : : /* Clear out the sequence of statements so we can populate it below. */
4361 : 33170 : gimple *stmt = NULL;
4362 : :
4363 : 33170 : if (cast_to_unsigned_p)
4364 : : {
4365 : 10529 : tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4366 : 10529 : stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4367 : 10529 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4368 : 10529 : op = tmp_op;
4369 : : }
4370 : :
4371 : 33170 : if (alg.op[0] == alg_zero)
4372 : 180 : accumulator = build_int_cst (multtype, 0);
4373 : : else
4374 : : accumulator = op;
4375 : :
4376 : 33170 : bool needs_fixup = (variant == negate_variant)
4377 : 33170 : || (variant == add_variant);
4378 : :
4379 : 138705 : for (int i = 1; i < alg.ops; i++)
4380 : : {
4381 : 105535 : tree shft_log = build_int_cst (multtype, alg.log[i]);
4382 : 105535 : tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4383 : 105535 : tree tmp_var = NULL_TREE;
4384 : :
4385 : 105535 : switch (alg.op[i])
4386 : : {
4387 : 62373 : case alg_shift:
4388 : 62373 : if (synth_shift_p)
4389 : 0 : stmt
4390 : 0 : = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4391 : 0 : alg.log[i], stmt_vinfo);
4392 : : else
4393 : 62373 : stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4394 : : shft_log);
4395 : : break;
4396 : 21782 : case alg_add_t_m2:
4397 : 21782 : tmp_var
4398 : 21782 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4399 : : stmt_vinfo, synth_shift_p);
4400 : 21782 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4401 : : tmp_var);
4402 : 21782 : break;
4403 : 15944 : case alg_sub_t_m2:
4404 : 15944 : tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4405 : : shft_log, stmt_vinfo,
4406 : : synth_shift_p);
4407 : : /* In some algorithms the first step involves zeroing the
4408 : : accumulator. If subtracting from such an accumulator
4409 : : just emit the negation directly. */
4410 : 15944 : if (integer_zerop (accumulator))
4411 : 180 : stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4412 : : else
4413 : 15764 : stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4414 : : tmp_var);
4415 : : break;
4416 : 0 : case alg_add_t2_m:
4417 : 0 : tmp_var
4418 : 0 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4419 : : shft_log, stmt_vinfo, synth_shift_p);
4420 : 0 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4421 : 0 : break;
4422 : 0 : case alg_sub_t2_m:
4423 : 0 : tmp_var
4424 : 0 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4425 : : shft_log, stmt_vinfo, synth_shift_p);
4426 : 0 : stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4427 : 0 : break;
4428 : 4721 : case alg_add_factor:
4429 : 4721 : tmp_var
4430 : 4721 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4431 : : shft_log, stmt_vinfo, synth_shift_p);
4432 : 4721 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4433 : : tmp_var);
4434 : 4721 : break;
4435 : 715 : case alg_sub_factor:
4436 : 715 : tmp_var
4437 : 715 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4438 : : shft_log, stmt_vinfo, synth_shift_p);
4439 : 715 : stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4440 : : accumulator);
4441 : 715 : break;
4442 : 0 : default:
4443 : 0 : gcc_unreachable ();
4444 : : }
4445 : : /* We don't want to append the last stmt in the sequence to stmt_vinfo
4446 : : but rather return it directly. */
4447 : :
4448 : 105535 : if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4449 : 83158 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4450 : 105535 : accumulator = accum_tmp;
4451 : : }
4452 : 33170 : if (variant == negate_variant)
4453 : : {
4454 : 385 : tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4455 : 385 : stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4456 : 385 : accumulator = accum_tmp;
4457 : 385 : if (cast_to_unsigned_p)
4458 : 131 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4459 : : }
4460 : 32785 : else if (variant == add_variant)
4461 : : {
4462 : 68 : tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4463 : 68 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4464 : 68 : accumulator = accum_tmp;
4465 : 68 : if (cast_to_unsigned_p)
4466 : 58 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4467 : : }
4468 : : /* Move back to a signed if needed. */
4469 : 32906 : if (cast_to_unsigned_p)
4470 : : {
4471 : 10529 : tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4472 : 10529 : stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4473 : : }
4474 : :
4475 : : return stmt;
4476 : : }
4477 : :
4478 : : /* Detect multiplication by constant and convert it into a sequence of
4479 : : shifts and additions, subtractions, negations. We reuse the
4480 : : choose_mult_variant algorithms from expmed.cc
4481 : :
4482 : : Input/Output:
4483 : :
4484 : : STMT_VINFO: The stmt from which the pattern search begins,
4485 : : i.e. the mult stmt.
4486 : :
4487 : : Output:
4488 : :
4489 : : * TYPE_OUT: The type of the output of this pattern.
4490 : :
4491 : : * Return value: A new stmt that will be used to replace
4492 : : the multiplication. */
4493 : :
4494 : : static gimple *
4495 : 30140936 : vect_recog_mult_pattern (vec_info *vinfo,
4496 : : stmt_vec_info stmt_vinfo, tree *type_out)
4497 : : {
4498 : 30140936 : gimple *last_stmt = stmt_vinfo->stmt;
4499 : 30140936 : tree oprnd0, oprnd1, vectype, itype;
4500 : 30140936 : gimple *pattern_stmt;
4501 : :
4502 : 30140936 : if (!is_gimple_assign (last_stmt))
4503 : : return NULL;
4504 : :
4505 : 20445092 : if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4506 : : return NULL;
4507 : :
4508 : 1341106 : oprnd0 = gimple_assign_rhs1 (last_stmt);
4509 : 1341106 : oprnd1 = gimple_assign_rhs2 (last_stmt);
4510 : 1341106 : itype = TREE_TYPE (oprnd0);
4511 : :
4512 : 1341106 : if (TREE_CODE (oprnd0) != SSA_NAME
4513 : 1341043 : || TREE_CODE (oprnd1) != INTEGER_CST
4514 : 833112 : || !INTEGRAL_TYPE_P (itype)
4515 : 2174218 : || !type_has_mode_precision_p (itype))
4516 : 508046 : return NULL;
4517 : :
4518 : 833060 : vectype = get_vectype_for_scalar_type (vinfo, itype);
4519 : 833060 : if (vectype == NULL_TREE)
4520 : : return NULL;
4521 : :
4522 : : /* If the target can handle vectorized multiplication natively,
4523 : : don't attempt to optimize this. */
4524 : 679526 : optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4525 : 679526 : if (mul_optab != unknown_optab
4526 : 679526 : && can_implement_p (mul_optab, TYPE_MODE (vectype)))
4527 : : return NULL;
4528 : :
4529 : 272999 : pattern_stmt = vect_synth_mult_by_constant (vinfo,
4530 : : oprnd0, oprnd1, stmt_vinfo);
4531 : 272999 : if (!pattern_stmt)
4532 : : return NULL;
4533 : :
4534 : : /* Pattern detected. */
4535 : 33170 : vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4536 : :
4537 : 33170 : *type_out = vectype;
4538 : :
4539 : 33170 : return pattern_stmt;
4540 : : }
4541 : :
4542 : : extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
4543 : : extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree));
4544 : : extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree));
4545 : :
4546 : : extern bool gimple_unsigned_integer_narrow_clip (tree, tree*, tree (*)(tree));
4547 : :
4548 : : extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
4549 : : extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
4550 : : extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
4551 : :
4552 : : static gimple *
4553 : 255 : vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
4554 : : internal_fn fn, tree *type_out,
4555 : : tree lhs, tree op_0, tree op_1)
4556 : : {
4557 : 255 : tree itype = TREE_TYPE (op_0);
4558 : 255 : tree otype = TREE_TYPE (lhs);
4559 : 255 : tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4560 : 255 : tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4561 : :
4562 : 255 : if (v_itype != NULL_TREE && v_otype != NULL_TREE
4563 : 255 : && direct_internal_fn_supported_p (fn, v_itype, OPTIMIZE_FOR_BOTH))
4564 : : {
4565 : 59 : gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
4566 : 59 : tree in_ssa = vect_recog_temp_ssa_var (itype, NULL);
4567 : :
4568 : 59 : gimple_call_set_lhs (call, in_ssa);
4569 : 59 : gimple_call_set_nothrow (call, /* nothrow_p */ false);
4570 : 59 : gimple_set_location (call, gimple_location (STMT_VINFO_STMT (stmt_info)));
4571 : :
4572 : 59 : *type_out = v_otype;
4573 : :
4574 : 59 : if (types_compatible_p (itype, otype))
4575 : : return call;
4576 : : else
4577 : : {
4578 : 0 : append_pattern_def_seq (vinfo, stmt_info, call, v_itype);
4579 : 0 : tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
4580 : :
4581 : 0 : return gimple_build_assign (out_ssa, NOP_EXPR, in_ssa);
4582 : : }
4583 : : }
4584 : :
4585 : : return NULL;
4586 : : }
4587 : :
4588 : : /*
4589 : : * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
4590 : : * _7 = _4 + _6;
4591 : : * _8 = _4 > _7;
4592 : : * _9 = (long unsigned int) _8;
4593 : : * _10 = -_9;
4594 : : * _12 = _7 | _10;
4595 : : *
4596 : : * And then simplied to
4597 : : * _12 = .SAT_ADD (_4, _6);
4598 : : */
4599 : :
4600 : : static gimple *
4601 : 30211154 : vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4602 : : tree *type_out)
4603 : : {
4604 : 30211154 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4605 : :
4606 : 30211154 : if (!is_gimple_assign (last_stmt))
4607 : : return NULL;
4608 : :
4609 : 20515310 : tree ops[2];
4610 : 20515310 : tree lhs = gimple_assign_lhs (last_stmt);
4611 : :
4612 : 20515310 : if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
4613 : 20515310 : || gimple_signed_integer_sat_add (lhs, ops, NULL))
4614 : : {
4615 : 46 : if (TREE_CODE (ops[1]) == INTEGER_CST)
4616 : 12 : ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
4617 : :
4618 : 46 : gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
4619 : : IFN_SAT_ADD, type_out,
4620 : : lhs, ops[0], ops[1]);
4621 : 46 : if (stmt)
4622 : : {
4623 : 28 : vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
4624 : 28 : return stmt;
4625 : : }
4626 : : }
4627 : :
4628 : : return NULL;
4629 : : }
4630 : :
4631 : : /*
4632 : : * Try to transform the truncation for .SAT_SUB pattern, mostly occurs in
4633 : : * the benchmark zip. Aka:
4634 : : *
4635 : : * unsigned int _1;
4636 : : * unsigned int _2;
4637 : : * unsigned short int _4;
4638 : : * _9 = (unsigned short int).SAT_SUB (_1, _2);
4639 : : *
4640 : : * if _1 is known to be in the range of unsigned short int. For example
4641 : : * there is a def _1 = (unsigned short int)_4. Then we can transform the
4642 : : * truncation to:
4643 : : *
4644 : : * _3 = (unsigned short int) MIN (65535, _2); // aka _3 = .SAT_TRUNC (_2);
4645 : : * _9 = .SAT_SUB (_4, _3);
4646 : : *
4647 : : * Then, we can better vectorized code and avoid the unnecessary narrowing
4648 : : * stmt during vectorization with below stmt(s).
4649 : : *
4650 : : * _3 = .SAT_TRUNC(_2); // SI => HI
4651 : : * _9 = .SAT_SUB (_4, _3);
4652 : : */
4653 : : static void
4654 : 209 : vect_recog_sat_sub_pattern_transform (vec_info *vinfo,
4655 : : stmt_vec_info stmt_vinfo,
4656 : : tree lhs, tree *ops)
4657 : : {
4658 : 209 : tree otype = TREE_TYPE (lhs);
4659 : 209 : tree itype = TREE_TYPE (ops[0]);
4660 : 209 : unsigned itype_prec = TYPE_PRECISION (itype);
4661 : 209 : unsigned otype_prec = TYPE_PRECISION (otype);
4662 : :
4663 : 209 : if (types_compatible_p (otype, itype) || otype_prec >= itype_prec)
4664 : 209 : return;
4665 : :
4666 : 0 : tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4667 : 0 : tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4668 : 0 : tree_pair v_pair = tree_pair (v_otype, v_itype);
4669 : :
4670 : 0 : if (v_otype == NULL_TREE || v_itype == NULL_TREE
4671 : 0 : || !direct_internal_fn_supported_p (IFN_SAT_TRUNC, v_pair,
4672 : : OPTIMIZE_FOR_BOTH))
4673 : 0 : return;
4674 : :
4675 : : /* 1. Find the _4 and update ops[0] as above example. */
4676 : 0 : vect_unpromoted_value unprom;
4677 : 0 : tree tmp = vect_look_through_possible_promotion (vinfo, ops[0], &unprom);
4678 : :
4679 : 0 : if (tmp == NULL_TREE || TYPE_PRECISION (unprom.type) != otype_prec)
4680 : : return;
4681 : :
4682 : 0 : ops[0] = tmp;
4683 : :
4684 : : /* 2. Generate _3 = .SAT_TRUNC (_2) and update ops[1] as above example. */
4685 : 0 : tree trunc_lhs_ssa = vect_recog_temp_ssa_var (otype, NULL);
4686 : 0 : gcall *call = gimple_build_call_internal (IFN_SAT_TRUNC, 1, ops[1]);
4687 : :
4688 : 0 : gimple_call_set_lhs (call, trunc_lhs_ssa);
4689 : 0 : gimple_call_set_nothrow (call, /* nothrow_p */ false);
4690 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, call, v_otype);
4691 : :
4692 : 0 : ops[1] = trunc_lhs_ssa;
4693 : : }
4694 : :
4695 : : /*
4696 : : * Try to detect saturation sub pattern (SAT_ADD), aka below gimple:
4697 : : * Unsigned:
4698 : : * _7 = _1 >= _2;
4699 : : * _8 = _1 - _2;
4700 : : * _10 = (long unsigned int) _7;
4701 : : * _9 = _8 * _10;
4702 : : *
4703 : : * And then simplied to
4704 : : * _9 = .SAT_SUB (_1, _2);
4705 : : *
4706 : : * Signed:
4707 : : * x.0_4 = (unsigned char) x_16;
4708 : : * y.1_5 = (unsigned char) y_18;
4709 : : * _6 = x.0_4 - y.1_5;
4710 : : * minus_19 = (int8_t) _6;
4711 : : * _7 = x_16 ^ y_18;
4712 : : * _8 = x_16 ^ minus_19;
4713 : : * _44 = _7 < 0;
4714 : : * _23 = x_16 < 0;
4715 : : * _24 = (signed char) _23;
4716 : : * _58 = (unsigned char) _24;
4717 : : * _59 = -_58;
4718 : : * _25 = (signed char) _59;
4719 : : * _26 = _25 ^ 127;
4720 : : * _42 = _8 < 0;
4721 : : * _41 = _42 & _44;
4722 : : * iftmp.2_11 = _41 ? _26 : minus_19;
4723 : : *
4724 : : * And then simplied to
4725 : : * iftmp.2_11 = .SAT_SUB (x_16, y_18);
4726 : : */
4727 : :
4728 : : static gimple *
4729 : 30211126 : vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4730 : : tree *type_out)
4731 : : {
4732 : 30211126 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4733 : :
4734 : 30211126 : if (!is_gimple_assign (last_stmt))
4735 : : return NULL;
4736 : :
4737 : 20515282 : tree ops[2];
4738 : 20515282 : tree lhs = gimple_assign_lhs (last_stmt);
4739 : :
4740 : 20515282 : if (gimple_unsigned_integer_sat_sub (lhs, ops, NULL)
4741 : 20515282 : || gimple_signed_integer_sat_sub (lhs, ops, NULL))
4742 : : {
4743 : 209 : vect_recog_sat_sub_pattern_transform (vinfo, stmt_vinfo, lhs, ops);
4744 : 209 : gimple *stmt = vect_recog_build_binary_gimple_stmt (vinfo, stmt_vinfo,
4745 : : IFN_SAT_SUB, type_out,
4746 : : lhs, ops[0], ops[1]);
4747 : 209 : if (stmt)
4748 : : {
4749 : 31 : vect_pattern_detected ("vect_recog_sat_sub_pattern", last_stmt);
4750 : 31 : return stmt;
4751 : : }
4752 : : }
4753 : :
4754 : : return NULL;
4755 : : }
4756 : :
4757 : : /*
4758 : : * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple:
4759 : : * overflow_5 = x_4(D) > 4294967295;
4760 : : * _1 = (unsigned int) x_4(D);
4761 : : * _2 = (unsigned int) overflow_5;
4762 : : * _3 = -_2;
4763 : : * _6 = _1 | _3;
4764 : : *
4765 : : * And then simplied to
4766 : : * _6 = .SAT_TRUNC (x_4(D));
4767 : : */
4768 : :
4769 : : static gimple *
4770 : 30211095 : vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
4771 : : tree *type_out)
4772 : : {
4773 : 30211095 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
4774 : :
4775 : 30211095 : if (!is_gimple_assign (last_stmt))
4776 : : return NULL;
4777 : :
4778 : 20515251 : tree ops[1];
4779 : 20515251 : tree lhs = gimple_assign_lhs (last_stmt);
4780 : 20515251 : tree otype = TREE_TYPE (lhs);
4781 : :
4782 : 20515251 : if ((gimple_unsigned_integer_narrow_clip (lhs, ops, NULL))
4783 : 20515251 : && type_has_mode_precision_p (otype))
4784 : : {
4785 : 8 : tree itype = TREE_TYPE (ops[0]);
4786 : 8 : tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4787 : 8 : tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4788 : 8 : internal_fn fn = IFN_SAT_TRUNC;
4789 : :
4790 : 8 : if (v_itype != NULL_TREE && v_otype != NULL_TREE
4791 : 16 : && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
4792 : : OPTIMIZE_FOR_BOTH))
4793 : : {
4794 : 0 : tree temp = vect_recog_temp_ssa_var (itype, NULL);
4795 : 0 : gimple * max_stmt = gimple_build_assign (temp, build2 (MAX_EXPR, itype, build_zero_cst(itype), ops[0]));
4796 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, max_stmt, v_itype);
4797 : :
4798 : 0 : gcall *call = gimple_build_call_internal (fn, 1, temp);
4799 : 0 : tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
4800 : :
4801 : 0 : gimple_call_set_lhs (call, out_ssa);
4802 : 0 : gimple_call_set_nothrow (call, /* nothrow_p */ false);
4803 : 0 : gimple_set_location (call, gimple_location (last_stmt));
4804 : :
4805 : 0 : *type_out = v_otype;
4806 : :
4807 : 0 : return call;
4808 : : }
4809 : :
4810 : : }
4811 : :
4812 : 20515251 : if ((gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
4813 : 20514980 : || gimple_signed_integer_sat_trunc (lhs, ops, NULL))
4814 : 20515251 : && type_has_mode_precision_p (otype))
4815 : : {
4816 : 259 : tree itype = TREE_TYPE (ops[0]);
4817 : 259 : tree v_itype = get_vectype_for_scalar_type (vinfo, itype);
4818 : 259 : tree v_otype = get_vectype_for_scalar_type (vinfo, otype);
4819 : 259 : internal_fn fn = IFN_SAT_TRUNC;
4820 : :
4821 : 253 : if (v_itype != NULL_TREE && v_otype != NULL_TREE
4822 : 512 : && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype),
4823 : : OPTIMIZE_FOR_BOTH))
4824 : : {
4825 : 0 : gcall *call = gimple_build_call_internal (fn, 1, ops[0]);
4826 : 0 : tree out_ssa = vect_recog_temp_ssa_var (otype, NULL);
4827 : :
4828 : 0 : gimple_call_set_lhs (call, out_ssa);
4829 : 0 : gimple_call_set_nothrow (call, /* nothrow_p */ false);
4830 : 0 : gimple_set_location (call, gimple_location (last_stmt));
4831 : :
4832 : 0 : *type_out = v_otype;
4833 : :
4834 : 0 : return call;
4835 : : }
4836 : : }
4837 : :
4838 : : return NULL;
4839 : : }
4840 : :
4841 : :
4842 : : /* Function add_code_for_floorceilround_divmod
4843 : : A helper function to add compensation code for implementing FLOOR_MOD_EXPR,
4844 : : FLOOR_DIV_EXPR, CEIL_MOD_EXPR, CEIL_DIV_EXPR, ROUND_MOD_EXPR and
4845 : : ROUND_DIV_EXPR
4846 : : The quotient and remainder are needed for implemented these operators.
4847 : : FLOOR cases
4848 : : r = x %[fl] y; r = x/[fl] y;
4849 : : is
4850 : : r = x % y; if (r && (x ^ y) < 0) r += y;
4851 : : r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; Respectively
4852 : : Produce following sequence
4853 : : v0 = x^y
4854 : : v1 = -r
4855 : : v2 = r | -r
4856 : : v3 = v0 & v2
4857 : : v4 = v3 < 0
4858 : : if (floor_mod)
4859 : : v5 = v4 ? y : 0
4860 : : v6 = r + v5
4861 : : if (floor_div)
4862 : : v5 = v4 ? 1 : 0
4863 : : v6 = d - 1
4864 : : Similar sequences of vector instructions are produces for following cases
4865 : : CEIL cases
4866 : : r = x %[cl] y; r = x/[cl] y;
4867 : : is
4868 : : r = x % y; if (r && (x ^ y) >= 0) r -= y;
4869 : : r = x % y; if (r) r -= y; (unsigned)
4870 : : r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++;
4871 : : r = x % y; d = x/y; if (r) d++; (unsigned)
4872 : : ROUND cases
4873 : : r = x %[rd] y; r = x/[rd] y;
4874 : : is
4875 : : r = x % y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) r -= y; else r += y;
4876 : : r = x % y; if (r > ((y-1)/2)) r -= y; (unsigned)
4877 : : r = x % y; d = x/y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) d++; else d--;
4878 : : r = x % y; d = x/y; if (r > ((y-1)/2)) d++; (unsigned)
4879 : : Inputs:
4880 : : VECTYPE: Vector type of the operands
4881 : : STMT_VINFO: Statement where pattern begins
4882 : : RHS_CODE: Should either be FLOOR_MOD_EXPR or FLOOR_DIV_EXPR
4883 : : Q: The quotient of division
4884 : : R: Remainder of division
4885 : : OPRDN0/OPRND1: Actual operands involved
4886 : : ITYPE: tree type of oprnd0
4887 : : Output:
4888 : : NULL if vectorization not possible
4889 : : Gimple statement based on rhs_code
4890 : : */
4891 : : static gimple *
4892 : 427 : add_code_for_floorceilround_divmod (tree vectype, vec_info *vinfo,
4893 : : stmt_vec_info stmt_vinfo,
4894 : : enum tree_code rhs_code, tree q, tree r,
4895 : : tree oprnd0, tree oprnd1, tree itype)
4896 : : {
4897 : 427 : gimple *def_stmt;
4898 : 427 : tree mask_vectype = truth_type_for (vectype);
4899 : 427 : if (!mask_vectype)
4900 : : return NULL;
4901 : 427 : tree bool_cond;
4902 : 427 : bool unsigned_p = TYPE_UNSIGNED (itype);
4903 : :
4904 : 427 : switch (rhs_code)
4905 : : {
4906 : 391 : case FLOOR_MOD_EXPR:
4907 : 391 : case FLOOR_DIV_EXPR:
4908 : 391 : case CEIL_MOD_EXPR:
4909 : 391 : case CEIL_DIV_EXPR:
4910 : 391 : {
4911 : 391 : if (!target_has_vecop_for_code (NEGATE_EXPR, vectype)
4912 : 359 : || !target_has_vecop_for_code (BIT_XOR_EXPR, vectype)
4913 : 359 : || !target_has_vecop_for_code (BIT_IOR_EXPR, vectype)
4914 : 359 : || !target_has_vecop_for_code (PLUS_EXPR, vectype)
4915 : 359 : || !target_has_vecop_for_code (MINUS_EXPR, vectype)
4916 : 359 : || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
4917 : 623 : || !expand_vec_cond_expr_p (vectype, mask_vectype))
4918 : 159 : return NULL;
4919 : 232 : if (unsigned_p)
4920 : : {
4921 : 18 : gcc_assert (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
4922 : :
4923 : 18 : if (!expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR))
4924 : : return NULL;
4925 : 18 : bool is_mod = rhs_code == CEIL_MOD_EXPR;
4926 : : // r > 0
4927 : 18 : bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
4928 : 18 : def_stmt = gimple_build_assign (bool_cond, GT_EXPR, r,
4929 : : build_int_cst (itype, 0));
4930 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
4931 : : itype);
4932 : :
4933 : : // (r > 0) ? y : 0 (mod)
4934 : : // (r > 0) ? 1 : 0 (ceil)
4935 : 18 : tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
4936 : 18 : def_stmt
4937 : 27 : = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
4938 : 9 : is_mod ? oprnd1 : build_int_cst (itype, 1),
4939 : : build_int_cst (itype, 0));
4940 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4941 : :
4942 : : // r -= (r > 0) ? y : 0 (mod)
4943 : : // d += (x^y < 0 && r) ? -1 : 0 (ceil)
4944 : 18 : tree result = vect_recog_temp_ssa_var (itype, NULL);
4945 : 27 : return gimple_build_assign (result, is_mod ? MINUS_EXPR : PLUS_EXPR,
4946 : 18 : is_mod ? r : q, extr_cond);
4947 : : }
4948 : : else
4949 : : {
4950 : 214 : bool ceil_p
4951 : 214 : = (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
4952 : 214 : if (ceil_p && !target_has_vecop_for_code (BIT_NOT_EXPR, vectype))
4953 : : return NULL;
4954 : : // x ^ y
4955 : 214 : tree xort = vect_recog_temp_ssa_var (itype, NULL);
4956 : 214 : def_stmt = gimple_build_assign (xort, BIT_XOR_EXPR, oprnd0, oprnd1);
4957 : 214 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4958 : :
4959 : 214 : tree cond_reg = xort;
4960 : : // ~(x ^ y) (ceil)
4961 : 214 : if (ceil_p)
4962 : : {
4963 : 18 : cond_reg = vect_recog_temp_ssa_var (itype, NULL);
4964 : 18 : def_stmt = gimple_build_assign (cond_reg, BIT_NOT_EXPR, xort);
4965 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4966 : : }
4967 : :
4968 : : // -r
4969 : 214 : tree negate_r = vect_recog_temp_ssa_var (itype, NULL);
4970 : 214 : def_stmt = gimple_build_assign (negate_r, NEGATE_EXPR, r);
4971 : 214 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4972 : :
4973 : : // r | -r , sign bit is set if r!=0
4974 : 214 : tree r_or_negr = vect_recog_temp_ssa_var (itype, NULL);
4975 : 214 : def_stmt
4976 : 214 : = gimple_build_assign (r_or_negr, BIT_IOR_EXPR, r, negate_r);
4977 : 214 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4978 : :
4979 : : // (x ^ y) & (r | -r)
4980 : : // ~(x ^ y) & (r | -r) (ceil)
4981 : 214 : tree r_or_negr_and_xor = vect_recog_temp_ssa_var (itype, NULL);
4982 : 214 : def_stmt = gimple_build_assign (r_or_negr_and_xor, BIT_AND_EXPR,
4983 : : r_or_negr, cond_reg);
4984 : 214 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4985 : :
4986 : : // (x ^ y) & (r | -r) < 0 which is equivalent to (x^y < 0 && r!=0)
4987 : 214 : bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
4988 : 214 : def_stmt
4989 : 214 : = gimple_build_assign (bool_cond, LT_EXPR, r_or_negr_and_xor,
4990 : : build_int_cst (itype, 0));
4991 : 214 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
4992 : : itype);
4993 : :
4994 : : // (x^y < 0 && r) ? y : 0 (mod)
4995 : : // (x^y < 0 && r) ? -1 : 0 (div)
4996 : 214 : bool is_mod
4997 : 214 : = (rhs_code == FLOOR_MOD_EXPR || rhs_code == CEIL_MOD_EXPR);
4998 : 214 : tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
4999 : 254 : def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
5000 : : is_mod ? oprnd1
5001 : 40 : : build_int_cst (itype, -1),
5002 : : build_int_cst (itype, 0));
5003 : 214 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5004 : :
5005 : : // r += (x ^ y < 0 && r) ? y : 0 (floor mod)
5006 : : // d += (x^y < 0 && r) ? -1 : 0 (floor div)
5007 : : // r -= (x ^ y < 0 && r) ? y : 0 (ceil mod)
5008 : : // d -= (x^y < 0 && r) ? -1 : 0 (ceil div)
5009 : 214 : tree result = vect_recog_temp_ssa_var (itype, NULL);
5010 : 428 : return gimple_build_assign (result,
5011 : 214 : (rhs_code == FLOOR_MOD_EXPR
5012 : 214 : || rhs_code == FLOOR_DIV_EXPR)
5013 : : ? PLUS_EXPR
5014 : : : MINUS_EXPR,
5015 : 214 : is_mod ? r : q, extr_cond);
5016 : : }
5017 : : }
5018 : 36 : case ROUND_MOD_EXPR:
5019 : 36 : case ROUND_DIV_EXPR:
5020 : 36 : {
5021 : 36 : if (!target_has_vecop_for_code (BIT_AND_EXPR, vectype)
5022 : 36 : || !target_has_vecop_for_code (PLUS_EXPR, vectype)
5023 : 36 : || !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
5024 : 36 : || !expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR)
5025 : 72 : || !expand_vec_cond_expr_p (vectype, mask_vectype))
5026 : 0 : return NULL;
5027 : :
5028 : 36 : bool is_mod = rhs_code == ROUND_MOD_EXPR;
5029 : 36 : HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
5030 : 36 : unsigned HOST_WIDE_INT abs_d
5031 : : = (d >= 0 ? (unsigned HOST_WIDE_INT) d : -(unsigned HOST_WIDE_INT) d);
5032 : 36 : unsigned HOST_WIDE_INT mid_d = (abs_d - 1) >> 1;
5033 : 36 : if (!unsigned_p)
5034 : : {
5035 : : // check availibility of abs expression for vector
5036 : 18 : if (!target_has_vecop_for_code (ABS_EXPR, vectype))
5037 : : return NULL;
5038 : : // abs (r)
5039 : 18 : tree abs_r = vect_recog_temp_ssa_var (itype, NULL);
5040 : 18 : def_stmt = gimple_build_assign (abs_r, ABS_EXPR, r);
5041 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5042 : :
5043 : : // abs (r) > (abs (y-1) >> 1)
5044 : 18 : tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5045 : 18 : def_stmt = gimple_build_assign (round_p, GT_EXPR, abs_r,
5046 : 18 : build_int_cst (itype, mid_d));
5047 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
5048 : : itype);
5049 : :
5050 : : // x ^ y
5051 : 18 : tree cond_reg = vect_recog_temp_ssa_var (itype, NULL);
5052 : 18 : def_stmt
5053 : 18 : = gimple_build_assign (cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
5054 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5055 : :
5056 : : // x ^ y < 0
5057 : 18 : bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5058 : 18 : def_stmt = gimple_build_assign (bool_cond, LT_EXPR, cond_reg,
5059 : : build_int_cst (itype, 0));
5060 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
5061 : : itype);
5062 : :
5063 : : // x ^ y < 0 ? y : -y (mod)
5064 : : // x ^ y < 0 ? -1 : 1 (div)
5065 : 18 : tree val1 = vect_recog_temp_ssa_var (itype, NULL);
5066 : 18 : def_stmt
5067 : 36 : = gimple_build_assign (val1, COND_EXPR, bool_cond,
5068 : 27 : build_int_cst (itype, is_mod ? d : -1),
5069 : 18 : build_int_cst (itype, is_mod ? -d : 1));
5070 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5071 : 18 : int precision = TYPE_PRECISION (itype);
5072 : 18 : wide_int wmask = wi::mask (precision, false, precision);
5073 : :
5074 : : // abs (r) > (abs (y-1) >> 1) ? 0xffffffff : 0
5075 : 18 : tree val2 = vect_recog_temp_ssa_var (itype, NULL);
5076 : 36 : def_stmt = gimple_build_assign (val2, COND_EXPR, round_p,
5077 : 18 : wide_int_to_tree (itype, wmask),
5078 : : build_int_cst (itype, 0));
5079 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5080 : :
5081 : 18 : tree fval = vect_recog_temp_ssa_var (itype, NULL);
5082 : 18 : def_stmt = gimple_build_assign (fval, BIT_AND_EXPR, val1, val2);
5083 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5084 : :
5085 : 18 : tree result = vect_recog_temp_ssa_var (itype, NULL);
5086 : 27 : return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
5087 : : fval);
5088 : 18 : }
5089 : : else
5090 : : {
5091 : : // r > (y-1 >> 1)
5092 : 18 : tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5093 : 18 : def_stmt = gimple_build_assign (round_p, GT_EXPR, r,
5094 : 18 : build_int_cst (itype, mid_d));
5095 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
5096 : : itype);
5097 : :
5098 : : // (r > (y-1)>>1) ? -d : 1
5099 : 18 : tree val2 = vect_recog_temp_ssa_var (itype, NULL);
5100 : 18 : def_stmt
5101 : 36 : = gimple_build_assign (val2, COND_EXPR, round_p,
5102 : 18 : build_int_cst (itype, is_mod ? -d : 1),
5103 : : build_int_cst (itype, 0));
5104 : 18 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5105 : :
5106 : 18 : tree result = vect_recog_temp_ssa_var (itype, NULL);
5107 : 27 : return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
5108 : 18 : val2);
5109 : : }
5110 : : }
5111 : : default:
5112 : : return NULL;
5113 : : }
5114 : : }
5115 : :
5116 : : /* Detect a signed division by a constant that wouldn't be
5117 : : otherwise vectorized:
5118 : :
5119 : : type a_t, b_t;
5120 : :
5121 : : S1 a_t = b_t / N;
5122 : :
5123 : : where type 'type' is an integral type and N is a constant.
5124 : :
5125 : : Similarly handle modulo by a constant:
5126 : :
5127 : : S4 a_t = b_t % N;
5128 : :
5129 : : Input/Output:
5130 : :
5131 : : * STMT_VINFO: The stmt from which the pattern search begins,
5132 : : i.e. the division stmt. S1 is replaced by if N is a power
5133 : : of two constant and type is signed:
5134 : : S3 y_t = b_t < 0 ? N - 1 : 0;
5135 : : S2 x_t = b_t + y_t;
5136 : : S1' a_t = x_t >> log2 (N);
5137 : :
5138 : : S4 is replaced if N is a power of two constant and
5139 : : type is signed by (where *_T temporaries have unsigned type):
5140 : : S9 y_T = b_t < 0 ? -1U : 0U;
5141 : : S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
5142 : : S7 z_t = (type) z_T;
5143 : : S6 w_t = b_t + z_t;
5144 : : S5 x_t = w_t & (N - 1);
5145 : : S4' a_t = x_t - z_t;
5146 : :
5147 : : Output:
5148 : :
5149 : : * TYPE_OUT: The type of the output of this pattern.
5150 : :
5151 : : * Return value: A new stmt that will be used to replace the division
5152 : : S1 or modulo S4 stmt. */
5153 : :
5154 : : static gimple *
5155 : 29953748 : vect_recog_divmod_pattern (vec_info *vinfo,
5156 : : stmt_vec_info stmt_vinfo, tree *type_out)
5157 : : {
5158 : 29953748 : gimple *last_stmt = stmt_vinfo->stmt;
5159 : 29953748 : tree oprnd0, oprnd1, vectype, itype, cond;
5160 : 29953748 : gimple *pattern_stmt = NULL;
5161 : 29953748 : gimple *def_stmt = NULL;
5162 : 29953748 : enum tree_code rhs_code;
5163 : 29953748 : optab optab;
5164 : 29953748 : tree q, cst;
5165 : 29953748 : int prec;
5166 : :
5167 : 29953748 : if (!is_gimple_assign (last_stmt)
5168 : : /* The pattern will disrupt the reduction chain with multiple uses. */
5169 : 29953748 : || vect_is_reduction (stmt_vinfo))
5170 : : return NULL;
5171 : :
5172 : 20187116 : rhs_code = gimple_assign_rhs_code (last_stmt);
5173 : 20187116 : switch (rhs_code)
5174 : : {
5175 : 273395 : case TRUNC_DIV_EXPR:
5176 : 273395 : case EXACT_DIV_EXPR:
5177 : 273395 : case TRUNC_MOD_EXPR:
5178 : 273395 : case FLOOR_MOD_EXPR:
5179 : 273395 : case FLOOR_DIV_EXPR:
5180 : 273395 : case CEIL_MOD_EXPR:
5181 : 273395 : case CEIL_DIV_EXPR:
5182 : 273395 : case ROUND_MOD_EXPR:
5183 : 273395 : case ROUND_DIV_EXPR:
5184 : 273395 : break;
5185 : : default:
5186 : : return NULL;
5187 : : }
5188 : :
5189 : 273395 : oprnd0 = gimple_assign_rhs1 (last_stmt);
5190 : 273395 : oprnd1 = gimple_assign_rhs2 (last_stmt);
5191 : 273395 : itype = TREE_TYPE (oprnd0);
5192 : 273395 : if (TREE_CODE (oprnd0) != SSA_NAME
5193 : 255801 : || TREE_CODE (oprnd1) != INTEGER_CST
5194 : 165520 : || TREE_CODE (itype) != INTEGER_TYPE
5195 : 438915 : || !type_has_mode_precision_p (itype))
5196 : 107875 : return NULL;
5197 : :
5198 : 165520 : scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
5199 : 165520 : vectype = get_vectype_for_scalar_type (vinfo, itype);
5200 : 165520 : if (vectype == NULL_TREE)
5201 : : return NULL;
5202 : :
5203 : 132790 : if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
5204 : : {
5205 : : /* If the target can handle vectorized division or modulo natively,
5206 : : don't attempt to optimize this, since native division is likely
5207 : : to give smaller code. */
5208 : 1644 : optab = optab_for_tree_code (rhs_code, vectype, optab_default);
5209 : 1644 : if (optab != unknown_optab
5210 : 1644 : && can_implement_p (optab, TYPE_MODE (vectype)))
5211 : : return NULL;
5212 : : }
5213 : :
5214 : 132790 : prec = TYPE_PRECISION (itype);
5215 : :
5216 : 265580 : bool is_flclrd_moddiv_p
5217 : 132790 : = rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR
5218 : : || rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR
5219 : 132209 : || rhs_code == ROUND_MOD_EXPR || rhs_code == ROUND_DIV_EXPR;
5220 : 132790 : if (integer_pow2p (oprnd1))
5221 : : {
5222 : 75387 : if ((TYPE_UNSIGNED (itype)
5223 : 57 : && (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR))
5224 : 75441 : || tree_int_cst_sgn (oprnd1) != 1)
5225 : 3 : return NULL;
5226 : :
5227 : : /* Pattern detected. */
5228 : 75384 : vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5229 : :
5230 : 75384 : *type_out = vectype;
5231 : :
5232 : : /* Check if the target supports this internal function. */
5233 : 75384 : internal_fn ifn = IFN_DIV_POW2;
5234 : 75384 : if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
5235 : : {
5236 : 0 : tree shift = build_int_cst (itype, tree_log2 (oprnd1));
5237 : :
5238 : 0 : tree var_div = vect_recog_temp_ssa_var (itype, NULL);
5239 : 0 : gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
5240 : 0 : gimple_call_set_lhs (div_stmt, var_div);
5241 : 0 : if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
5242 : : {
5243 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
5244 : 0 : tree t1 = vect_recog_temp_ssa_var (itype, NULL);
5245 : 0 : def_stmt
5246 : 0 : = gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
5247 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5248 : 0 : pattern_stmt
5249 : 0 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5250 : : MINUS_EXPR, oprnd0, t1);
5251 : 0 : if (is_flclrd_moddiv_p)
5252 : : {
5253 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5254 : 0 : pattern_stmt
5255 : 0 : = add_code_for_floorceilround_divmod (vectype, vinfo,
5256 : : stmt_vinfo, rhs_code,
5257 : : var_div, t1, oprnd0,
5258 : : oprnd1, itype);
5259 : 0 : if (pattern_stmt == NULL)
5260 : : return NULL;
5261 : : }
5262 : : }
5263 : : else
5264 : : pattern_stmt = div_stmt;
5265 : 0 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
5266 : :
5267 : 0 : return pattern_stmt;
5268 : : }
5269 : :
5270 : 75384 : cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5271 : 75384 : def_stmt = gimple_build_assign (cond, LT_EXPR, oprnd0,
5272 : : build_int_cst (itype, 0));
5273 : 75384 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
5274 : : truth_type_for (vectype), itype);
5275 : 75384 : tree div_result = NULL_TREE;
5276 : 75384 : if (rhs_code == TRUNC_DIV_EXPR
5277 : 75384 : || rhs_code == EXACT_DIV_EXPR
5278 : : || rhs_code == FLOOR_DIV_EXPR
5279 : 2965 : || rhs_code == CEIL_DIV_EXPR
5280 : 2806 : || rhs_code == ROUND_DIV_EXPR)
5281 : : {
5282 : 72590 : tree var = vect_recog_temp_ssa_var (itype, NULL);
5283 : 72590 : tree shift;
5284 : 72590 : def_stmt
5285 : 72590 : = gimple_build_assign (var, COND_EXPR, cond,
5286 : : fold_build2 (MINUS_EXPR, itype, oprnd1,
5287 : : build_int_cst (itype, 1)),
5288 : : build_int_cst (itype, 0));
5289 : 72590 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5290 : 72590 : var = vect_recog_temp_ssa_var (itype, NULL);
5291 : 72590 : def_stmt
5292 : 72590 : = gimple_build_assign (var, PLUS_EXPR, oprnd0,
5293 : : gimple_assign_lhs (def_stmt));
5294 : 72590 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5295 : :
5296 : 72590 : shift = build_int_cst (itype, tree_log2 (oprnd1));
5297 : 72590 : div_result = vect_recog_temp_ssa_var (itype, NULL);
5298 : 72590 : pattern_stmt
5299 : 72590 : = gimple_build_assign (div_result, RSHIFT_EXPR, var, shift);
5300 : : }
5301 : 75384 : if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
5302 : : {
5303 : 2965 : if (rhs_code == FLOOR_DIV_EXPR
5304 : : || rhs_code == CEIL_DIV_EXPR
5305 : 2965 : || rhs_code == ROUND_DIV_EXPR)
5306 : 171 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5307 : :
5308 : 2965 : tree signmask;
5309 : 2965 : if (compare_tree_int (oprnd1, 2) == 0)
5310 : : {
5311 : 1627 : signmask = vect_recog_temp_ssa_var (itype, NULL);
5312 : 1627 : def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
5313 : : build_int_cst (itype, 1),
5314 : : build_int_cst (itype, 0));
5315 : 1627 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5316 : : }
5317 : : else
5318 : : {
5319 : 1338 : tree utype
5320 : 1338 : = build_nonstandard_integer_type (prec, 1);
5321 : 1338 : tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
5322 : 1338 : tree shift
5323 : 1338 : = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
5324 : 1338 : - tree_log2 (oprnd1));
5325 : 1338 : tree var = vect_recog_temp_ssa_var (utype, NULL);
5326 : :
5327 : 1338 : def_stmt = gimple_build_assign (var, COND_EXPR, cond,
5328 : : build_int_cst (utype, -1),
5329 : : build_int_cst (utype, 0));
5330 : 1338 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
5331 : 1338 : var = vect_recog_temp_ssa_var (utype, NULL);
5332 : 1338 : def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
5333 : : gimple_assign_lhs (def_stmt),
5334 : : shift);
5335 : 1338 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
5336 : 1338 : signmask = vect_recog_temp_ssa_var (itype, NULL);
5337 : 1338 : def_stmt
5338 : 1338 : = gimple_build_assign (signmask, NOP_EXPR, var);
5339 : 1338 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5340 : : }
5341 : 2965 : def_stmt
5342 : 2965 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5343 : : PLUS_EXPR, oprnd0, signmask);
5344 : 2965 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5345 : 2965 : def_stmt
5346 : 2965 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5347 : : BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
5348 : : fold_build2 (MINUS_EXPR, itype, oprnd1,
5349 : : build_int_cst (itype, 1)));
5350 : 2965 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5351 : :
5352 : 2965 : tree r = vect_recog_temp_ssa_var (itype, NULL);
5353 : 2965 : pattern_stmt
5354 : 2965 : = gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt),
5355 : : signmask);
5356 : 2965 : if (is_flclrd_moddiv_p)
5357 : : {
5358 : 281 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5359 : 281 : pattern_stmt
5360 : 281 : = add_code_for_floorceilround_divmod (vectype, vinfo,
5361 : : stmt_vinfo, rhs_code,
5362 : : div_result, r, oprnd0,
5363 : : oprnd1, itype);
5364 : 281 : if (pattern_stmt == NULL)
5365 : : return NULL;
5366 : : }
5367 : : }
5368 : :
5369 : 75225 : return pattern_stmt;
5370 : : }
5371 : :
5372 : 57403 : if ((cst = uniform_integer_cst_p (oprnd1))
5373 : 57403 : && TYPE_UNSIGNED (itype)
5374 : : && rhs_code == TRUNC_DIV_EXPR
5375 : 33876 : && vectype
5376 : 79337 : && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
5377 : : {
5378 : : /* We can use the relationship:
5379 : :
5380 : : x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
5381 : :
5382 : : to optimize cases where N+1 is a power of 2, and where // (N+1)
5383 : : is therefore a shift right. When operating in modes that are
5384 : : multiples of a byte in size, there are two cases:
5385 : :
5386 : : (1) N(N+3) is not representable, in which case the question
5387 : : becomes whether the replacement expression overflows.
5388 : : It is enough to test that x+N+2 does not overflow,
5389 : : i.e. that x < MAX-(N+1).
5390 : :
5391 : : (2) N(N+3) is representable, in which case it is the (only)
5392 : : bound that we need to check.
5393 : :
5394 : : ??? For now we just handle the case where // (N+1) is a shift
5395 : : right by half the precision, since some architectures can
5396 : : optimize the associated addition and shift combinations
5397 : : into single instructions. */
5398 : :
5399 : 14956 : auto wcst = wi::to_wide (cst);
5400 : 14956 : int pow = wi::exact_log2 (wcst + 1);
5401 : 14956 : if (pow == prec / 2)
5402 : : {
5403 : 566 : gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
5404 : :
5405 : 566 : gimple_ranger ranger;
5406 : 566 : int_range_max r;
5407 : :
5408 : : /* Check that no overflow will occur. If we don't have range
5409 : : information we can't perform the optimization. */
5410 : :
5411 : 566 : if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
5412 : : {
5413 : 564 : wide_int max = r.upper_bound ();
5414 : 564 : wide_int one = wi::shwi (1, prec);
5415 : 564 : wide_int adder = wi::add (one, wi::lshift (one, pow));
5416 : 564 : wi::overflow_type ovf;
5417 : 564 : wi::add (max, adder, UNSIGNED, &ovf);
5418 : 564 : if (ovf == wi::OVF_NONE)
5419 : : {
5420 : 327 : *type_out = vectype;
5421 : 327 : tree tadder = wide_int_to_tree (itype, adder);
5422 : 327 : tree rshift = wide_int_to_tree (itype, pow);
5423 : :
5424 : 327 : tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
5425 : 327 : gassign *patt1
5426 : 327 : = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
5427 : 327 : append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5428 : :
5429 : 327 : tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
5430 : 327 : patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
5431 : : rshift);
5432 : 327 : append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5433 : :
5434 : 327 : tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
5435 : 327 : patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
5436 : : oprnd0);
5437 : 327 : append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
5438 : :
5439 : 327 : tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
5440 : 327 : pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
5441 : : new_lhs3, rshift);
5442 : :
5443 : 327 : return pattern_stmt;
5444 : : }
5445 : 564 : }
5446 : 566 : }
5447 : : }
5448 : :
5449 : 57076 : if (prec > HOST_BITS_PER_WIDE_INT
5450 : 57076 : || integer_zerop (oprnd1))
5451 : 637 : return NULL;
5452 : :
5453 : 56439 : if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
5454 : : return NULL;
5455 : :
5456 : 15937 : if (TYPE_UNSIGNED (itype))
5457 : : {
5458 : 10158 : unsigned HOST_WIDE_INT mh, ml;
5459 : 10158 : int pre_shift, post_shift;
5460 : 10158 : unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
5461 : 10158 : & GET_MODE_MASK (itype_mode));
5462 : 10158 : tree t1, t2, t3, t4;
5463 : :
5464 : 10158 : if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
5465 : : /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
5466 : 23 : return NULL;
5467 : :
5468 : : /* Find a suitable multiplier and right shift count instead of
5469 : : directly dividing by D. */
5470 : 10135 : mh = choose_multiplier (d, prec, prec, &ml, &post_shift);
5471 : :
5472 : : /* If the suggested multiplier is more than PREC bits, we can do better
5473 : : for even divisors, using an initial right shift. */
5474 : 10135 : if (mh != 0 && (d & 1) == 0)
5475 : : {
5476 : 342 : pre_shift = ctz_or_zero (d);
5477 : 342 : mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
5478 : : &ml, &post_shift);
5479 : 342 : gcc_assert (!mh);
5480 : : }
5481 : : else
5482 : : pre_shift = 0;
5483 : :
5484 : 1058 : if (mh != 0)
5485 : : {
5486 : 1058 : if (post_shift - 1 >= prec)
5487 : : return NULL;
5488 : :
5489 : : /* t1 = oprnd0 h* ml;
5490 : : t2 = oprnd0 - t1;
5491 : : t3 = t2 >> 1;
5492 : : t4 = t1 + t3;
5493 : : q = t4 >> (post_shift - 1); */
5494 : 1058 : t1 = vect_recog_temp_ssa_var (itype, NULL);
5495 : 1058 : def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
5496 : 1058 : build_int_cst (itype, ml));
5497 : 1058 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5498 : :
5499 : 1058 : t2 = vect_recog_temp_ssa_var (itype, NULL);
5500 : 1058 : def_stmt
5501 : 1058 : = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
5502 : 1058 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5503 : :
5504 : 1058 : t3 = vect_recog_temp_ssa_var (itype, NULL);
5505 : 1058 : def_stmt
5506 : 1058 : = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
5507 : 1058 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5508 : :
5509 : 1058 : t4 = vect_recog_temp_ssa_var (itype, NULL);
5510 : 1058 : def_stmt
5511 : 1058 : = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
5512 : :
5513 : 1058 : if (post_shift != 1)
5514 : : {
5515 : 1058 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5516 : :
5517 : 1058 : q = vect_recog_temp_ssa_var (itype, NULL);
5518 : 1058 : pattern_stmt
5519 : 1058 : = gimple_build_assign (q, RSHIFT_EXPR, t4,
5520 : 1058 : build_int_cst (itype, post_shift - 1));
5521 : : }
5522 : : else
5523 : : {
5524 : : q = t4;
5525 : : pattern_stmt = def_stmt;
5526 : : }
5527 : : }
5528 : : else
5529 : : {
5530 : 9077 : if (pre_shift >= prec || post_shift >= prec)
5531 : : return NULL;
5532 : :
5533 : : /* t1 = oprnd0 >> pre_shift;
5534 : : t2 = t1 h* ml;
5535 : : q = t2 >> post_shift; */
5536 : 9077 : if (pre_shift)
5537 : : {
5538 : 342 : t1 = vect_recog_temp_ssa_var (itype, NULL);
5539 : 342 : def_stmt
5540 : 342 : = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
5541 : 342 : build_int_cst (NULL, pre_shift));
5542 : 342 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5543 : : }
5544 : : else
5545 : : t1 = oprnd0;
5546 : :
5547 : 9077 : t2 = vect_recog_temp_ssa_var (itype, NULL);
5548 : 9077 : def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
5549 : 9077 : build_int_cst (itype, ml));
5550 : :
5551 : 9077 : if (post_shift)
5552 : : {
5553 : 9067 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5554 : :
5555 : 9067 : q = vect_recog_temp_ssa_var (itype, NULL);
5556 : 9067 : def_stmt
5557 : 9067 : = gimple_build_assign (q, RSHIFT_EXPR, t2,
5558 : 9067 : build_int_cst (itype, post_shift));
5559 : : }
5560 : : else
5561 : : q = t2;
5562 : :
5563 : : pattern_stmt = def_stmt;
5564 : : }
5565 : : }
5566 : : else
5567 : : {
5568 : 5779 : unsigned HOST_WIDE_INT ml;
5569 : 5779 : int post_shift;
5570 : 5779 : HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
5571 : 5779 : unsigned HOST_WIDE_INT abs_d;
5572 : 5779 : bool add = false;
5573 : 5779 : tree t1, t2, t3, t4;
5574 : :
5575 : : /* Give up for -1. */
5576 : 5779 : if (d == -1)
5577 : 0 : return NULL;
5578 : :
5579 : : /* Since d might be INT_MIN, we have to cast to
5580 : : unsigned HOST_WIDE_INT before negating to avoid
5581 : : undefined signed overflow. */
5582 : 5779 : abs_d = (d >= 0
5583 : 5779 : ? (unsigned HOST_WIDE_INT) d
5584 : : : - (unsigned HOST_WIDE_INT) d);
5585 : :
5586 : : /* n rem d = n rem -d */
5587 : 5779 : if (rhs_code == TRUNC_MOD_EXPR && d < 0)
5588 : : {
5589 : 0 : d = abs_d;
5590 : 0 : oprnd1 = build_int_cst (itype, abs_d);
5591 : : }
5592 : 5779 : if (HOST_BITS_PER_WIDE_INT >= prec
5593 : 5779 : && abs_d == HOST_WIDE_INT_1U << (prec - 1))
5594 : : /* This case is not handled correctly below. */
5595 : : return NULL;
5596 : :
5597 : 5779 : choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift);
5598 : 5779 : if (ml >= HOST_WIDE_INT_1U << (prec - 1))
5599 : : {
5600 : 1560 : add = true;
5601 : 1560 : ml |= HOST_WIDE_INT_M1U << (prec - 1);
5602 : : }
5603 : 5779 : if (post_shift >= prec)
5604 : : return NULL;
5605 : :
5606 : : /* t1 = oprnd0 h* ml; */
5607 : 5779 : t1 = vect_recog_temp_ssa_var (itype, NULL);
5608 : 5779 : def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
5609 : 5779 : build_int_cst (itype, ml));
5610 : :
5611 : 5779 : if (add)
5612 : : {
5613 : : /* t2 = t1 + oprnd0; */
5614 : 1560 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5615 : 1560 : t2 = vect_recog_temp_ssa_var (itype, NULL);
5616 : 1560 : def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
5617 : : }
5618 : : else
5619 : : t2 = t1;
5620 : :
5621 : 5779 : if (post_shift)
5622 : : {
5623 : : /* t3 = t2 >> post_shift; */
5624 : 5036 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5625 : 5036 : t3 = vect_recog_temp_ssa_var (itype, NULL);
5626 : 5036 : def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
5627 : 5036 : build_int_cst (itype, post_shift));
5628 : : }
5629 : : else
5630 : : t3 = t2;
5631 : :
5632 : 5779 : int msb = 1;
5633 : 5779 : int_range_max r;
5634 : 11558 : get_range_query (cfun)->range_of_expr (r, oprnd0);
5635 : 5779 : if (!r.varying_p () && !r.undefined_p ())
5636 : : {
5637 : 3223 : if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
5638 : : msb = 0;
5639 : 713 : else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
5640 : : msb = -1;
5641 : : }
5642 : :
5643 : 2510 : if (msb == 0 && d >= 0)
5644 : : {
5645 : : /* q = t3; */
5646 : : q = t3;
5647 : : pattern_stmt = def_stmt;
5648 : : }
5649 : : else
5650 : : {
5651 : : /* t4 = oprnd0 >> (prec - 1);
5652 : : or if we know from VRP that oprnd0 >= 0
5653 : : t4 = 0;
5654 : : or if we know from VRP that oprnd0 < 0
5655 : : t4 = -1; */
5656 : 3329 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5657 : 3329 : t4 = vect_recog_temp_ssa_var (itype, NULL);
5658 : 3329 : if (msb != 1)
5659 : 68 : def_stmt = gimple_build_assign (t4, INTEGER_CST,
5660 : 68 : build_int_cst (itype, msb));
5661 : : else
5662 : 3261 : def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
5663 : 3261 : build_int_cst (itype, prec - 1));
5664 : 3329 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5665 : :
5666 : : /* q = t3 - t4; or q = t4 - t3; */
5667 : 3329 : q = vect_recog_temp_ssa_var (itype, NULL);
5668 : 6490 : pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
5669 : : d < 0 ? t3 : t4);
5670 : : }
5671 : 5779 : }
5672 : :
5673 : 15914 : if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
5674 : : {
5675 : 7378 : tree r, t1;
5676 : :
5677 : : /* We divided. Now finish by:
5678 : : t1 = q * oprnd1;
5679 : : r = oprnd0 - t1; */
5680 : 7378 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5681 : :
5682 : 7378 : t1 = vect_recog_temp_ssa_var (itype, NULL);
5683 : 7378 : def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5684 : 7378 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5685 : :
5686 : 7378 : r = vect_recog_temp_ssa_var (itype, NULL);
5687 : 7378 : pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5688 : :
5689 : 7378 : if (is_flclrd_moddiv_p)
5690 : : {
5691 : 146 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5692 : 146 : pattern_stmt
5693 : 146 : = add_code_for_floorceilround_divmod (vectype, vinfo, stmt_vinfo,
5694 : : rhs_code, q, r, oprnd0, oprnd1,
5695 : : itype);
5696 : 146 : if (pattern_stmt == NULL)
5697 : : return NULL;
5698 : : }
5699 : : }
5700 : :
5701 : : /* Pattern detected. */
5702 : 15914 : vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5703 : :
5704 : 15914 : *type_out = vectype;
5705 : 15914 : return pattern_stmt;
5706 : : }
5707 : :
5708 : : /* Detects pattern with a modulo operation (S1) where both arguments
5709 : : are variables of integral type.
5710 : : The statement is replaced by division, multiplication, and subtraction.
5711 : : The last statement (S4) is returned.
5712 : :
5713 : : Example:
5714 : : S1 c_t = a_t % b_t;
5715 : :
5716 : : is replaced by
5717 : : S2 x_t = a_t / b_t;
5718 : : S3 y_t = x_t * b_t;
5719 : : S4 z_t = a_t - y_t; */
5720 : :
5721 : : static gimple *
5722 : 30140936 : vect_recog_mod_var_pattern (vec_info *vinfo,
5723 : : stmt_vec_info stmt_vinfo, tree *type_out)
5724 : : {
5725 : 30140936 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5726 : 30140936 : tree oprnd0, oprnd1, vectype, itype;
5727 : 30140936 : gimple *pattern_stmt, *def_stmt;
5728 : 30140936 : enum tree_code rhs_code;
5729 : :
5730 : 30140936 : if (!is_gimple_assign (last_stmt) || vect_is_reduction (stmt_vinfo))
5731 : : return NULL;
5732 : :
5733 : 20374304 : rhs_code = gimple_assign_rhs_code (last_stmt);
5734 : 20374304 : if (rhs_code != TRUNC_MOD_EXPR)
5735 : : return NULL;
5736 : :
5737 : 66313 : oprnd0 = gimple_assign_rhs1 (last_stmt);
5738 : 66313 : oprnd1 = gimple_assign_rhs2 (last_stmt);
5739 : 66313 : itype = TREE_TYPE (oprnd0);
5740 : 66313 : if (TREE_CODE (oprnd0) != SSA_NAME
5741 : 57999 : || TREE_CODE (oprnd1) != SSA_NAME
5742 : 41260 : || TREE_CODE (itype) != INTEGER_TYPE)
5743 : : return NULL;
5744 : :
5745 : 41156 : vectype = get_vectype_for_scalar_type (vinfo, itype);
5746 : :
5747 : 41156 : if (!vectype
5748 : 33221 : || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
5749 : 33221 : || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
5750 : 0 : || !target_has_vecop_for_code (MULT_EXPR, vectype)
5751 : 41156 : || !target_has_vecop_for_code (MINUS_EXPR, vectype))
5752 : 41156 : return NULL;
5753 : :
5754 : 0 : tree q, tmp, r;
5755 : 0 : q = vect_recog_temp_ssa_var (itype, NULL);
5756 : 0 : def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
5757 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
5758 : :
5759 : 0 : tmp = vect_recog_temp_ssa_var (itype, NULL);
5760 : 0 : def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
5761 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
5762 : :
5763 : 0 : r = vect_recog_temp_ssa_var (itype, NULL);
5764 : 0 : pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
5765 : :
5766 : : /* Pattern detected. */
5767 : 0 : *type_out = vectype;
5768 : 0 : vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
5769 : :
5770 : 0 : return pattern_stmt;
5771 : : }
5772 : :
5773 : :
5774 : : /* Return the proper type for converting bool VAR into
5775 : : an integer value or NULL_TREE if no such type exists.
5776 : : The type is chosen so that the converted value has the
5777 : : same number of elements as VAR's vector type. */
5778 : :
5779 : : static tree
5780 : 4335550 : integer_type_for_mask (tree var, vec_info *vinfo, vect_def_type *dt = nullptr)
5781 : : {
5782 : 4335550 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5783 : : return NULL_TREE;
5784 : :
5785 : 2032658 : stmt_vec_info def_stmt_info = vinfo->lookup_def (var);
5786 : 2032658 : if (dt)
5787 : : {
5788 : 368292 : if (!def_stmt_info)
5789 : 2620 : *dt = vect_external_def;
5790 : : else
5791 : 365672 : *dt = STMT_VINFO_DEF_TYPE (def_stmt_info);
5792 : : }
5793 : 368292 : if (!def_stmt_info
5794 : 1947083 : || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def
5795 : 3611449 : || !vect_use_mask_type_p (def_stmt_info))
5796 : 812563 : return NULL_TREE;
5797 : :
5798 : 1220095 : return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5799 : : }
5800 : :
5801 : : /* Function vect_recog_gcond_pattern
5802 : :
5803 : : Try to find pattern like following:
5804 : :
5805 : : if (a op b)
5806 : :
5807 : : where operator 'op' is not != and convert it to an adjusted boolean pattern
5808 : :
5809 : : mask = a op b
5810 : : if (mask != 0)
5811 : :
5812 : : and set the mask type on MASK.
5813 : :
5814 : : Input:
5815 : :
5816 : : * STMT_VINFO: The stmt at the end from which the pattern
5817 : : search begins, i.e. cast of a bool to
5818 : : an integer type.
5819 : :
5820 : : Output:
5821 : :
5822 : : * TYPE_OUT: The type of the output of this pattern.
5823 : :
5824 : : * Return value: A new stmt that will be used to replace the pattern. */
5825 : :
5826 : : static gimple *
5827 : 30211095 : vect_recog_gcond_pattern (vec_info *vinfo,
5828 : : stmt_vec_info stmt_vinfo, tree *type_out)
5829 : : {
5830 : : /* Currently we only support this for loop vectorization and when multiple
5831 : : exits. */
5832 : 30211095 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5833 : 3307351 : if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
5834 : : return NULL;
5835 : :
5836 : 1168507 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5837 : 1168507 : gcond* cond = NULL;
5838 : 30231331 : if (!(cond = dyn_cast <gcond *> (last_stmt)))
5839 : : return NULL;
5840 : :
5841 : 301069 : auto lhs = gimple_cond_lhs (cond);
5842 : 301069 : auto rhs = gimple_cond_rhs (cond);
5843 : 301069 : auto code = gimple_cond_code (cond);
5844 : :
5845 : 301069 : tree scalar_type = TREE_TYPE (lhs);
5846 : 301069 : if (VECTOR_TYPE_P (scalar_type))
5847 : : return NULL;
5848 : :
5849 : : /* If the input is a boolean then try to figure out the precision that the
5850 : : vector type should use. We cannot use the scalar precision as this would
5851 : : later mismatch. This is similar to what recog_bool does. */
5852 : 301069 : if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
5853 : : {
5854 : 12488 : if (tree stype = integer_type_for_mask (lhs, vinfo))
5855 : 301069 : scalar_type = stype;
5856 : : }
5857 : :
5858 : 301069 : tree vectype = get_mask_type_for_scalar_type (vinfo, scalar_type);
5859 : 301069 : if (vectype == NULL_TREE)
5860 : : return NULL;
5861 : :
5862 : 280833 : tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5863 : 280833 : gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
5864 : 280833 : append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
5865 : :
5866 : 280833 : gimple *pattern_stmt
5867 : 280833 : = gimple_build_cond (NE_EXPR, new_lhs,
5868 : 280833 : build_int_cst (TREE_TYPE (new_lhs), 0),
5869 : : NULL_TREE, NULL_TREE);
5870 : 280833 : *type_out = vectype;
5871 : 280833 : vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
5872 : 280833 : return pattern_stmt;
5873 : : }
5874 : :
5875 : :
5876 : : /* A helper for vect_recog_mask_conversion_pattern. Build
5877 : : conversion of MASK to a type suitable for masking VECTYPE.
5878 : : Built statement gets required vectype and is appended to
5879 : : a pattern sequence of STMT_VINFO.
5880 : :
5881 : : Return converted mask. */
5882 : :
5883 : : static tree
5884 : 102278 : build_mask_conversion (vec_info *vinfo,
5885 : : tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5886 : : {
5887 : 102278 : gimple *stmt;
5888 : 102278 : tree masktype, tmp;
5889 : :
5890 : 102278 : masktype = truth_type_for (vectype);
5891 : 102278 : tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5892 : 102278 : stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5893 : 102278 : append_pattern_def_seq (vinfo, stmt_vinfo,
5894 : 102278 : stmt, masktype, TREE_TYPE (vectype));
5895 : :
5896 : 102278 : return tmp;
5897 : : }
5898 : :
5899 : :
5900 : : /* Return MASK if MASK is suitable for masking an operation on vectors
5901 : : of type VECTYPE, otherwise convert it into such a form and return
5902 : : the result. Associate any conversion statements with STMT_INFO's
5903 : : pattern. */
5904 : :
5905 : : static tree
5906 : 68814 : vect_convert_mask_for_vectype (tree mask, tree vectype,
5907 : : stmt_vec_info stmt_info, vec_info *vinfo)
5908 : : {
5909 : 68814 : tree mask_type = integer_type_for_mask (mask, vinfo);
5910 : 68814 : if (mask_type)
5911 : : {
5912 : 68814 : tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
5913 : 68814 : if (mask_vectype
5914 : 137628 : && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
5915 : 86421 : TYPE_VECTOR_SUBPARTS (mask_vectype)))
5916 : 51207 : mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
5917 : : }
5918 : 68814 : return mask;
5919 : : }
5920 : :
5921 : :
5922 : : /* Function vect_recog_bool_pattern
5923 : :
5924 : : Try to find pattern like following:
5925 : :
5926 : : bool a_b, b_b, c_b, d_b, e_b;
5927 : : TYPE f_T;
5928 : : loop:
5929 : : S1 a_b = x1 CMP1 y1;
5930 : : S2 b_b = x2 CMP2 y2;
5931 : : S3 c_b = a_b & b_b;
5932 : : S4 d_b = x3 CMP3 y3;
5933 : : S5 e_b = c_b | d_b;
5934 : : S6 f_T = (TYPE) e_b;
5935 : :
5936 : : where type 'TYPE' is an integral type. Or a similar pattern
5937 : : ending in
5938 : :
5939 : : S6 f_Y = e_b ? r_Y : s_Y;
5940 : :
5941 : : as results from if-conversion of a complex condition.
5942 : :
5943 : : Input:
5944 : :
5945 : : * STMT_VINFO: The stmt at the end from which the pattern
5946 : : search begins, i.e. cast of a bool to
5947 : : an integer type.
5948 : :
5949 : : Output:
5950 : :
5951 : : * TYPE_OUT: The type of the output of this pattern.
5952 : :
5953 : : * Return value: A new stmt that will be used to replace the pattern.
5954 : :
5955 : : Assuming size of TYPE is the same as size of all comparisons
5956 : : (otherwise some casts would be added where needed), the above
5957 : : sequence we create related pattern stmts:
5958 : : S1' a_T = x1 CMP1 y1 ? 1 : 0;
5959 : : S3' c_T = x2 CMP2 y2 ? a_T : 0;
5960 : : S4' d_T = x3 CMP3 y3 ? 1 : 0;
5961 : : S5' e_T = c_T | d_T;
5962 : : S6' f_T = e_T;
5963 : :
5964 : : Instead of the above S3' we could emit:
5965 : : S2' b_T = x2 CMP2 y2 ? 1 : 0;
5966 : : S3' c_T = a_T | b_T;
5967 : : but the above is more efficient. */
5968 : :
5969 : : static gimple *
5970 : 30211095 : vect_recog_bool_pattern (vec_info *vinfo,
5971 : : stmt_vec_info stmt_vinfo, tree *type_out)
5972 : : {
5973 : 30211095 : gimple *last_stmt = stmt_vinfo->stmt;
5974 : 30211095 : enum tree_code rhs_code;
5975 : 30211095 : tree var, lhs, rhs, vectype;
5976 : 30211095 : gimple *pattern_stmt;
5977 : :
5978 : 30211095 : if (!is_gimple_assign (last_stmt))
5979 : : return NULL;
5980 : :
5981 : 20796084 : var = gimple_assign_rhs1 (last_stmt);
5982 : 20796084 : lhs = gimple_assign_lhs (last_stmt);
5983 : 20796084 : rhs_code = gimple_assign_rhs_code (last_stmt);
5984 : :
5985 : 20796084 : if (rhs_code == VIEW_CONVERT_EXPR)
5986 : 185316 : var = TREE_OPERAND (var, 0);
5987 : :
5988 : 20796084 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5989 : : return NULL;
5990 : :
5991 : 738798 : hash_set<gimple *> bool_stmts;
5992 : :
5993 : 738798 : if (CONVERT_EXPR_CODE_P (rhs_code)
5994 : : || rhs_code == VIEW_CONVERT_EXPR
5995 : : || rhs_code == FLOAT_EXPR)
5996 : : {
5997 : 91795 : if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5998 : : return NULL;
5999 : 87762 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6000 : :
6001 : 87762 : tree type = integer_type_for_mask (var, vinfo);
6002 : 87762 : tree cst0, cst1, tmp;
6003 : :
6004 : 87762 : if (!type)
6005 : : return NULL;
6006 : :
6007 : : /* We may directly use cond with narrowed type to avoid multiple cond
6008 : : exprs with following result packing and perform single cond with
6009 : : packed mask instead. In case of widening we better make cond first
6010 : : and then extract results. */
6011 : 41810 : if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
6012 : 28317 : type = TREE_TYPE (lhs);
6013 : :
6014 : 41810 : cst0 = build_int_cst (type, 0);
6015 : 41810 : cst1 = build_int_cst (type, 1);
6016 : 41810 : tmp = vect_recog_temp_ssa_var (type, NULL);
6017 : 41810 : pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
6018 : :
6019 : 41810 : if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
6020 : : {
6021 : 13493 : tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
6022 : 13493 : append_pattern_def_seq (vinfo, stmt_vinfo,
6023 : : pattern_stmt, new_vectype);
6024 : :
6025 : 13493 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6026 : 13493 : pattern_stmt
6027 : 26672 : = gimple_build_assign (lhs, (rhs_code == FLOAT_EXPR
6028 : : ? FLOAT_EXPR : CONVERT_EXPR), tmp);
6029 : : }
6030 : :
6031 : 41810 : *type_out = vectype;
6032 : 41810 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6033 : :
6034 : 41810 : return pattern_stmt;
6035 : : }
6036 : : else if (rhs_code == COND_EXPR
6037 : 173154 : && TREE_CODE (var) == SSA_NAME)
6038 : : {
6039 : 173154 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6040 : 173154 : if (vectype == NULL_TREE)
6041 : : return NULL;
6042 : :
6043 : : /* Build a scalar type for the boolean result that when
6044 : : vectorized matches the vector type of the result in
6045 : : size and number of elements. */
6046 : 159790 : unsigned prec
6047 : 159790 : = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
6048 : : TYPE_VECTOR_SUBPARTS (vectype));
6049 : :
6050 : 159790 : tree type
6051 : 319580 : = build_nonstandard_integer_type (prec,
6052 : 159790 : TYPE_UNSIGNED (TREE_TYPE (var)));
6053 : 159790 : if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
6054 : : return NULL;
6055 : :
6056 : 159790 : enum vect_def_type dt;
6057 : 159790 : if (integer_type_for_mask (var, vinfo))
6058 : : return NULL;
6059 : 25500 : else if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
6060 : 25500 : && vect_is_simple_use (var, vinfo, &dt)
6061 : 25500 : && (dt == vect_external_def
6062 : 25493 : || dt == vect_constant_def))
6063 : : {
6064 : : /* If the condition is already a boolean then manually convert it to a
6065 : : mask of the given integer type but don't set a vectype. */
6066 : 1228 : tree lhs_ivar = vect_recog_temp_ssa_var (type, NULL);
6067 : 1228 : pattern_stmt = gimple_build_assign (lhs_ivar, COND_EXPR, var,
6068 : : build_all_ones_cst (type),
6069 : : build_zero_cst (type));
6070 : 1228 : append_inv_pattern_def_seq (vinfo, pattern_stmt);
6071 : 1228 : var = lhs_ivar;
6072 : : }
6073 : :
6074 : 25500 : tree lhs_var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
6075 : 25500 : pattern_stmt = gimple_build_assign (lhs_var, NE_EXPR, var,
6076 : 25500 : build_zero_cst (TREE_TYPE (var)));
6077 : :
6078 : 25500 : tree new_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (var));
6079 : 25500 : if (!new_vectype)
6080 : : return NULL;
6081 : :
6082 : 25500 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype,
6083 : 25500 : TREE_TYPE (var));
6084 : :
6085 : 25500 : lhs_var = vect_convert_mask_for_vectype (lhs_var, vectype, stmt_vinfo,
6086 : : vinfo);
6087 : :
6088 : 25500 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6089 : 25500 : pattern_stmt
6090 : 25500 : = gimple_build_assign (lhs, COND_EXPR, lhs_var,
6091 : : gimple_assign_rhs2 (last_stmt),
6092 : : gimple_assign_rhs3 (last_stmt));
6093 : 25500 : *type_out = vectype;
6094 : 25500 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6095 : :
6096 : 25500 : return pattern_stmt;
6097 : : }
6098 : 473849 : else if ((rhs_code == BIT_XOR_EXPR
6099 : : || rhs_code == BIT_AND_EXPR
6100 : 473849 : || rhs_code == BIT_IOR_EXPR)
6101 : 358107 : && TREE_CODE (var) == SSA_NAME)
6102 : : {
6103 : 358107 : tree rhs2 = gimple_assign_rhs2 (last_stmt);
6104 : 358107 : if (TREE_CODE (rhs2) != SSA_NAME)
6105 : : return NULL;
6106 : 358107 : tree lhs_type = integer_type_for_mask (lhs, vinfo);
6107 : 358107 : if (!lhs_type)
6108 : : return NULL;
6109 : 184146 : vectype = get_mask_type_for_scalar_type (vinfo, lhs_type);
6110 : 184146 : if (!vectype)
6111 : : return NULL;
6112 : 184146 : vect_def_type dt1, dt2;
6113 : 184146 : tree rhs1_type = integer_type_for_mask (var, vinfo, &dt1);
6114 : 184146 : tree rhs2_type = integer_type_for_mask (rhs2, vinfo, &dt2);
6115 : 184146 : if ((rhs1_type || dt1 == vect_external_def)
6116 : 165800 : && (rhs2_type || dt2 == vect_external_def))
6117 : : return NULL;
6118 : : /* When one input is a mask and the other is not create a pattern
6119 : : stmt sequence that creates a mask for the non-mask input and
6120 : : convert it to one suitable for the output mask used. */
6121 : 39011 : if (rhs1_type && !rhs2_type)
6122 : : {
6123 : 20665 : tree rhs1_vectype = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6124 : 20665 : if (!rhs1_vectype)
6125 : : return NULL;
6126 : 20665 : tree rhs2_vectype = get_vectype_for_scalar_type (vinfo,
6127 : 20665 : TREE_TYPE (rhs2));
6128 : 20665 : if (!rhs2_vectype)
6129 : : return NULL;
6130 : 20665 : tree new_vectype = truth_type_for (rhs2_vectype);
6131 : 20665 : tree tem = vect_recog_temp_ssa_var (TREE_TYPE (new_vectype), NULL);
6132 : 20665 : pattern_stmt = gimple_build_assign (tem, NE_EXPR, rhs2,
6133 : : build_zero_cst
6134 : 20665 : (TREE_TYPE (rhs2)));
6135 : 20665 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
6136 : 20665 : new_vectype, TREE_TYPE (new_vectype));
6137 : 20665 : rhs2 = vect_convert_mask_for_vectype (tem, rhs1_vectype,
6138 : : stmt_vinfo, vinfo);
6139 : : }
6140 : 18346 : else if (!rhs1_type && rhs2_type)
6141 : : {
6142 : 18346 : tree rhs2_vectype = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6143 : 18346 : if (!rhs2_vectype)
6144 : : return NULL;
6145 : 18346 : tree rhs1_vectype = get_vectype_for_scalar_type (vinfo,
6146 : 18346 : TREE_TYPE (var));
6147 : 18346 : if (!rhs1_vectype)
6148 : : return NULL;
6149 : 18346 : tree new_vectype = truth_type_for (rhs1_vectype);
6150 : 18346 : tree tem = vect_recog_temp_ssa_var (TREE_TYPE (new_vectype), NULL);
6151 : 18346 : pattern_stmt = gimple_build_assign (tem, NE_EXPR, var,
6152 : : build_zero_cst
6153 : 18346 : (TREE_TYPE (var)));
6154 : 18346 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
6155 : 18346 : new_vectype, TREE_TYPE (new_vectype));
6156 : 18346 : var = vect_convert_mask_for_vectype (tem, rhs2_vectype,
6157 : : stmt_vinfo, vinfo);
6158 : : }
6159 : 39011 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6160 : 39011 : pattern_stmt = gimple_build_assign (lhs, rhs_code, var, rhs2);
6161 : 39011 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6162 : 39011 : *type_out = vectype;
6163 : 39011 : return pattern_stmt;
6164 : : }
6165 : 115742 : else if (rhs_code == SSA_NAME
6166 : 32472 : && STMT_VINFO_DATA_REF (stmt_vinfo))
6167 : : {
6168 : 7717 : stmt_vec_info pattern_stmt_info;
6169 : 7717 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6170 : 7717 : if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
6171 : 0 : return NULL;
6172 : :
6173 : 7717 : tree type = integer_type_for_mask (var, vinfo);
6174 : 7717 : if (!type)
6175 : : return NULL;
6176 : :
6177 : 4303 : var = vect_convert_mask_for_vectype (var, vectype, stmt_vinfo, vinfo);
6178 : :
6179 : 4303 : tree cst0 = build_int_cst (TREE_TYPE (vectype), 0);
6180 : 4303 : tree cst1 = build_int_cst (TREE_TYPE (vectype), 1);
6181 : 4303 : rhs = vect_recog_temp_ssa_var (TREE_TYPE (vectype), NULL);
6182 : 4303 : pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
6183 : 4303 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype);
6184 : :
6185 : 4303 : lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
6186 : 4303 : pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
6187 : 4303 : pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
6188 : 4303 : vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6189 : 4303 : *type_out = vectype;
6190 : 4303 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
6191 : :
6192 : 4303 : return pattern_stmt;
6193 : : }
6194 : : else
6195 : : return NULL;
6196 : 738798 : }
6197 : :
6198 : :
6199 : : /* Function vect_recog_mask_conversion_pattern
6200 : :
6201 : : Try to find statements which require boolean type
6202 : : converison. Additional conversion statements are
6203 : : added to handle such cases. For example:
6204 : :
6205 : : bool m_1, m_2, m_3;
6206 : : int i_4, i_5;
6207 : : double d_6, d_7;
6208 : : char c_1, c_2, c_3;
6209 : :
6210 : : S1 m_1 = i_4 > i_5;
6211 : : S2 m_2 = d_6 < d_7;
6212 : : S3 m_3 = m_1 & m_2;
6213 : : S4 c_1 = m_3 ? c_2 : c_3;
6214 : :
6215 : : Will be transformed into:
6216 : :
6217 : : S1 m_1 = i_4 > i_5;
6218 : : S2 m_2 = d_6 < d_7;
6219 : : S3'' m_2' = (_Bool[bitsize=32])m_2
6220 : : S3' m_3' = m_1 & m_2';
6221 : : S4'' m_3'' = (_Bool[bitsize=8])m_3'
6222 : : S4' c_1' = m_3'' ? c_2 : c_3; */
6223 : :
6224 : : static gimple *
6225 : 30234106 : vect_recog_mask_conversion_pattern (vec_info *vinfo,
6226 : : stmt_vec_info stmt_vinfo, tree *type_out)
6227 : : {
6228 : 30234106 : gimple *last_stmt = stmt_vinfo->stmt;
6229 : 30234106 : enum tree_code rhs_code;
6230 : 30234106 : tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
6231 : 30234106 : tree vectype1, vectype2;
6232 : 30234106 : stmt_vec_info pattern_stmt_info;
6233 : :
6234 : : /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
6235 : : conversion. */
6236 : 30234106 : if (is_gimple_call (last_stmt)
6237 : 30234106 : && gimple_call_internal_p (last_stmt))
6238 : : {
6239 : 113786 : gcall *pattern_stmt;
6240 : :
6241 : 113786 : internal_fn ifn = gimple_call_internal_fn (last_stmt);
6242 : 113786 : int mask_argno = internal_fn_mask_index (ifn);
6243 : 113786 : if (mask_argno < 0)
6244 : : return NULL;
6245 : :
6246 : 8947 : bool store_p = internal_store_fn_p (ifn);
6247 : 8947 : bool load_p = internal_store_fn_p (ifn);
6248 : 8947 : if (store_p)
6249 : : {
6250 : 1482 : int rhs_index = internal_fn_stored_value_index (ifn);
6251 : 1482 : tree rhs = gimple_call_arg (last_stmt, rhs_index);
6252 : 1482 : vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
6253 : : }
6254 : : else
6255 : : {
6256 : 7465 : lhs = gimple_call_lhs (last_stmt);
6257 : 7465 : if (!lhs)
6258 : : return NULL;
6259 : 7465 : vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6260 : : }
6261 : :
6262 : 8947 : if (!vectype1)
6263 : : return NULL;
6264 : :
6265 : 8729 : tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
6266 : 8729 : tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
6267 : 8729 : if (mask_arg_type)
6268 : : {
6269 : 7764 : vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
6270 : :
6271 : 7764 : if (!vectype2
6272 : 7764 : || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6273 : : TYPE_VECTOR_SUBPARTS (vectype2)))
6274 : 4411 : return NULL;
6275 : : }
6276 : 965 : else if (store_p || load_p)
6277 : : return NULL;
6278 : :
6279 : 4136 : tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
6280 : :
6281 : 4136 : auto_vec<tree, 8> args;
6282 : 4136 : unsigned int nargs = gimple_call_num_args (last_stmt);
6283 : 4136 : args.safe_grow (nargs, true);
6284 : 20680 : for (unsigned int i = 0; i < nargs; ++i)
6285 : 16544 : args[i] = ((int) i == mask_argno
6286 : 16544 : ? tmp
6287 : 12408 : : gimple_call_arg (last_stmt, i));
6288 : 4136 : pattern_stmt = gimple_build_call_internal_vec (ifn, args);
6289 : :
6290 : 4136 : if (!store_p)
6291 : : {
6292 : 3864 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6293 : 3864 : gimple_call_set_lhs (pattern_stmt, lhs);
6294 : : }
6295 : :
6296 : 3864 : if (load_p || store_p)
6297 : 272 : gimple_call_set_nothrow (pattern_stmt, true);
6298 : :
6299 : 4136 : pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
6300 : 4136 : if (STMT_VINFO_DATA_REF (stmt_vinfo))
6301 : 1620 : vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6302 : :
6303 : 4136 : *type_out = vectype1;
6304 : 4136 : vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6305 : :
6306 : 4136 : return pattern_stmt;
6307 : 4136 : }
6308 : :
6309 : 30120320 : if (!is_gimple_assign (last_stmt))
6310 : : return NULL;
6311 : :
6312 : 20819095 : gimple *pattern_stmt;
6313 : 20819095 : lhs = gimple_assign_lhs (last_stmt);
6314 : 20819095 : rhs1 = gimple_assign_rhs1 (last_stmt);
6315 : 20819095 : rhs_code = gimple_assign_rhs_code (last_stmt);
6316 : :
6317 : : /* Check for cond expression requiring mask conversion. */
6318 : 20819095 : if (rhs_code == COND_EXPR)
6319 : : {
6320 : 165591 : vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
6321 : :
6322 : 165591 : gcc_assert (! COMPARISON_CLASS_P (rhs1));
6323 : 165591 : if (TREE_CODE (rhs1) == SSA_NAME)
6324 : : {
6325 : 165591 : rhs1_type = integer_type_for_mask (rhs1, vinfo);
6326 : 165591 : if (!rhs1_type)
6327 : : return NULL;
6328 : : }
6329 : : else
6330 : : return NULL;
6331 : :
6332 : 154123 : vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6333 : :
6334 : 154123 : if (!vectype1 || !vectype2)
6335 : : return NULL;
6336 : :
6337 : : /* Continue if a conversion is needed. Also continue if we have
6338 : : a comparison whose vector type would normally be different from
6339 : : VECTYPE2 when considered in isolation. In that case we'll
6340 : : replace the comparison with an SSA name (so that we can record
6341 : : its vector type) and behave as though the comparison was an SSA
6342 : : name from the outset. */
6343 : 152207 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6344 : : TYPE_VECTOR_SUBPARTS (vectype2)))
6345 : : return NULL;
6346 : :
6347 : 28956 : if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
6348 : 57912 : TYPE_VECTOR_SUBPARTS (vectype2)))
6349 : 28956 : tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6350 : : else
6351 : : tmp = rhs1;
6352 : :
6353 : 28956 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6354 : 28956 : pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6355 : : gimple_assign_rhs2 (last_stmt),
6356 : : gimple_assign_rhs3 (last_stmt));
6357 : :
6358 : 28956 : *type_out = vectype1;
6359 : 28956 : vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6360 : :
6361 : 28956 : return pattern_stmt;
6362 : : }
6363 : :
6364 : : /* Now check for binary boolean operations requiring conversion for
6365 : : one of operands. */
6366 : 20653504 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6367 : : return NULL;
6368 : :
6369 : 1740648 : if (rhs_code != BIT_IOR_EXPR
6370 : : && rhs_code != BIT_XOR_EXPR
6371 : 1740648 : && rhs_code != BIT_AND_EXPR
6372 : 1421552 : && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6373 : : return NULL;
6374 : :
6375 : 1549130 : rhs2 = gimple_assign_rhs2 (last_stmt);
6376 : :
6377 : 1549130 : rhs1_type = integer_type_for_mask (rhs1, vinfo);
6378 : 1549130 : rhs2_type = integer_type_for_mask (rhs2, vinfo);
6379 : :
6380 : 1549130 : if (!rhs1_type || !rhs2_type
6381 : 1549130 : || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6382 : : return NULL;
6383 : :
6384 : 17979 : if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6385 : : {
6386 : 11194 : vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6387 : 11194 : if (!vectype1)
6388 : : return NULL;
6389 : 11194 : rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
6390 : : }
6391 : : else
6392 : : {
6393 : 6785 : vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6394 : 6785 : if (!vectype1)
6395 : : return NULL;
6396 : 6785 : rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6397 : : }
6398 : :
6399 : 17979 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6400 : 17979 : pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6401 : :
6402 : 17979 : *type_out = vectype1;
6403 : 17979 : vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6404 : :
6405 : 17979 : return pattern_stmt;
6406 : : }
6407 : :
6408 : : /* STMT_INFO is a load or store. If the load or store is conditional, return
6409 : : the boolean condition under which it occurs, otherwise return null. */
6410 : :
6411 : : static tree
6412 : 34442 : vect_get_load_store_mask (stmt_vec_info stmt_info)
6413 : : {
6414 : 34442 : if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6415 : : {
6416 : 33113 : gcc_assert (gimple_assign_single_p (def_assign));
6417 : : return NULL_TREE;
6418 : : }
6419 : :
6420 : 1329 : if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6421 : : {
6422 : 1329 : internal_fn ifn = gimple_call_internal_fn (def_call);
6423 : 1329 : int mask_index = internal_fn_mask_index (ifn);
6424 : 1329 : return gimple_call_arg (def_call, mask_index);
6425 : : }
6426 : :
6427 : 0 : gcc_unreachable ();
6428 : : }
6429 : :
6430 : : /* Return the equivalent of:
6431 : :
6432 : : fold_convert (TYPE, VALUE)
6433 : :
6434 : : with the expectation that the operation will be vectorized.
6435 : : If new statements are needed, add them as pattern statements
6436 : : to STMT_INFO. */
6437 : :
6438 : : static tree
6439 : 0 : vect_add_conversion_to_pattern (vec_info *vinfo,
6440 : : tree type, tree value, stmt_vec_info stmt_info)
6441 : : {
6442 : 0 : if (useless_type_conversion_p (type, TREE_TYPE (value)))
6443 : : return value;
6444 : :
6445 : 0 : tree new_value = vect_recog_temp_ssa_var (type, NULL);
6446 : 0 : gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6447 : 0 : append_pattern_def_seq (vinfo, stmt_info, conversion,
6448 : : get_vectype_for_scalar_type (vinfo, type));
6449 : 0 : return new_value;
6450 : : }
6451 : :
6452 : : /* Try to convert STMT_INFO into a call to a gather load or scatter store
6453 : : internal function. Return the final statement on success and set
6454 : : *TYPE_OUT to the vector type being loaded or stored.
6455 : :
6456 : : This function only handles gathers and scatters that were recognized
6457 : : as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6458 : :
6459 : : static gimple *
6460 : 30234106 : vect_recog_gather_scatter_pattern (vec_info *vinfo,
6461 : : stmt_vec_info stmt_info, tree *type_out)
6462 : : {
6463 : : /* Currently we only support this for loop vectorization. */
6464 : 33546369 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6465 : 3312263 : if (!loop_vinfo)
6466 : : return NULL;
6467 : :
6468 : : /* Make sure that we're looking at a gather load or scatter store. */
6469 : 3312263 : data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6470 : 3312263 : if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6471 : : return NULL;
6472 : :
6473 : : /* Get the boolean that controls whether the load or store happens.
6474 : : This is null if the operation is unconditional. */
6475 : 34442 : tree mask = vect_get_load_store_mask (stmt_info);
6476 : :
6477 : : /* DR analysis nailed down the vector type for the access. */
6478 : 34442 : tree gs_vectype = STMT_VINFO_VECTYPE (stmt_info);
6479 : :
6480 : : /* Make sure that the target supports an appropriate internal
6481 : : function for the gather/scatter operation. */
6482 : 34442 : gather_scatter_info gs_info;
6483 : 34442 : if (!vect_check_gather_scatter (stmt_info, gs_vectype, loop_vinfo, &gs_info)
6484 : 34442 : || gs_info.ifn == IFN_LAST)
6485 : : return NULL;
6486 : :
6487 : : /* Convert the mask to the right form. */
6488 : 0 : if (mask)
6489 : 0 : mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6490 : : loop_vinfo);
6491 : 0 : else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6492 : 0 : || gs_info.ifn == IFN_MASK_GATHER_LOAD
6493 : 0 : || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6494 : 0 : || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6495 : 0 : mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6496 : :
6497 : : /* Get the invariant base and non-invariant offset, converting the
6498 : : latter to the same width as the vector elements. */
6499 : 0 : tree base = gs_info.base;
6500 : 0 : tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6501 : 0 : tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6502 : : gs_info.offset, stmt_info);
6503 : :
6504 : : /* Build the new pattern statement. */
6505 : 0 : tree scale = size_int (gs_info.scale);
6506 : 0 : gcall *pattern_stmt;
6507 : :
6508 : 0 : if (DR_IS_READ (dr))
6509 : : {
6510 : 0 : tree zero = build_zero_cst (gs_info.element_type);
6511 : 0 : if (mask != NULL)
6512 : : {
6513 : 0 : int elsval = MASK_LOAD_ELSE_ZERO;
6514 : :
6515 : 0 : tree vec_els
6516 : 0 : = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
6517 : 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base,
6518 : : gs_info.alias_ptr,
6519 : : offset, scale, zero, mask,
6520 : : vec_els);
6521 : : }
6522 : : else
6523 : 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6524 : : gs_info.alias_ptr,
6525 : : offset, scale, zero);
6526 : 0 : tree lhs = gimple_get_lhs (stmt_info->stmt);
6527 : 0 : tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6528 : 0 : gimple_call_set_lhs (pattern_stmt, load_lhs);
6529 : : }
6530 : : else
6531 : : {
6532 : 0 : tree rhs = vect_get_store_rhs (stmt_info);
6533 : 0 : if (mask != NULL)
6534 : 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6,
6535 : : base, gs_info.alias_ptr,
6536 : : offset, scale, rhs, mask);
6537 : : else
6538 : 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6539 : : base, gs_info.alias_ptr,
6540 : : offset, scale, rhs);
6541 : : }
6542 : 0 : gimple_call_set_nothrow (pattern_stmt, true);
6543 : :
6544 : : /* Copy across relevant vectorization info and associate DR with the
6545 : : new pattern statement instead of the original statement. */
6546 : 0 : stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6547 : 0 : loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6548 : :
6549 : 0 : *type_out = gs_vectype;
6550 : 0 : vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6551 : :
6552 : 0 : return pattern_stmt;
6553 : : }
6554 : :
6555 : : /* Helper method of vect_recog_cond_store_pattern, checks to see if COND_ARG
6556 : : is points to a load statement that reads the same data as that of
6557 : : STORE_VINFO. */
6558 : :
6559 : : static bool
6560 : 22080 : vect_cond_store_pattern_same_ref (vec_info *vinfo,
6561 : : stmt_vec_info store_vinfo, tree cond_arg)
6562 : : {
6563 : 22080 : stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
6564 : 22080 : if (!load_stmt_vinfo
6565 : 12694 : || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
6566 : 7586 : || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
6567 : 29666 : || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
6568 : : STMT_VINFO_DATA_REF (load_stmt_vinfo)))
6569 : 17111 : return false;
6570 : :
6571 : : return true;
6572 : : }
6573 : :
6574 : : /* Function vect_recog_cond_store_pattern
6575 : :
6576 : : Try to find the following pattern:
6577 : :
6578 : : x = *_3;
6579 : : c = a CMP b;
6580 : : y = c ? t_20 : x;
6581 : : *_3 = y;
6582 : :
6583 : : where the store of _3 happens on a conditional select on a value loaded
6584 : : from the same location. In such case we can elide the initial load if
6585 : : MASK_STORE is supported and instead only conditionally write out the result.
6586 : :
6587 : : The pattern produces for the above:
6588 : :
6589 : : c = a CMP b;
6590 : : .MASK_STORE (_3, c, t_20)
6591 : :
6592 : : Input:
6593 : :
6594 : : * STMT_VINFO: The stmt from which the pattern search begins. In the
6595 : : example, when this function is called with _3 then the search begins.
6596 : :
6597 : : Output:
6598 : :
6599 : : * TYPE_OUT: The type of the output of this pattern.
6600 : :
6601 : : * Return value: A new stmt that will be used to replace the sequence. */
6602 : :
6603 : : static gimple *
6604 : 30234106 : vect_recog_cond_store_pattern (vec_info *vinfo,
6605 : : stmt_vec_info stmt_vinfo, tree *type_out)
6606 : : {
6607 : 30234106 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6608 : 3312263 : if (!loop_vinfo)
6609 : : return NULL;
6610 : :
6611 : 3312263 : gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
6612 : :
6613 : : /* Needs to be a gimple store where we have DR info for. */
6614 : 3312263 : if (!STMT_VINFO_DATA_REF (stmt_vinfo)
6615 : 789570 : || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo))
6616 : 3591718 : || !gimple_store_p (store_stmt))
6617 : 3034290 : return NULL;
6618 : :
6619 : 277973 : tree st_rhs = gimple_assign_rhs1 (store_stmt);
6620 : :
6621 : 277973 : if (TREE_CODE (st_rhs) != SSA_NAME)
6622 : : return NULL;
6623 : :
6624 : 217177 : auto cond_vinfo = vinfo->lookup_def (st_rhs);
6625 : :
6626 : : /* If the condition isn't part of the loop then bool recog wouldn't have seen
6627 : : it and so this transformation may not be valid. */
6628 : 217177 : if (!cond_vinfo)
6629 : : return NULL;
6630 : :
6631 : 203383 : cond_vinfo = vect_stmt_to_vectorize (cond_vinfo);
6632 : 30428537 : gassign *cond_stmt = dyn_cast<gassign *> (STMT_VINFO_STMT (cond_vinfo));
6633 : 254291 : if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
6634 : : return NULL;
6635 : :
6636 : : /* Check if the else value matches the original loaded one. */
6637 : 11657 : bool invert = false;
6638 : 11657 : tree cmp_ls = gimple_arg (cond_stmt, 0);
6639 : 11657 : if (TREE_CODE (cmp_ls) != SSA_NAME)
6640 : : return NULL;
6641 : :
6642 : 11657 : tree cond_arg1 = gimple_arg (cond_stmt, 1);
6643 : 11657 : tree cond_arg2 = gimple_arg (cond_stmt, 2);
6644 : :
6645 : 11657 : if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
6646 : 11657 : && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
6647 : : cond_arg1)))
6648 : : return NULL;
6649 : :
6650 : 4969 : vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
6651 : :
6652 : 4969 : tree scalar_type = TREE_TYPE (st_rhs);
6653 : 4969 : if (VECTOR_TYPE_P (scalar_type))
6654 : : return NULL;
6655 : :
6656 : 4969 : tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6657 : 4969 : if (vectype == NULL_TREE)
6658 : : return NULL;
6659 : :
6660 : 4969 : machine_mode mask_mode;
6661 : 4969 : machine_mode vecmode = TYPE_MODE (vectype);
6662 : 1066 : if (!VECTOR_MODE_P (vecmode)
6663 : 4969 : || targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
6664 : 0 : || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
6665 : 4969 : || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
6666 : 4969 : return NULL;
6667 : :
6668 : 0 : tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
6669 : 0 : if (may_be_nonaddressable_p (base))
6670 : : return NULL;
6671 : :
6672 : : /* We need to use the false parameter of the conditional select. */
6673 : 0 : tree cond_store_arg = invert ? cond_arg2 : cond_arg1;
6674 : 0 : tree cond_load_arg = invert ? cond_arg1 : cond_arg2;
6675 : 0 : gimple *load_stmt = SSA_NAME_DEF_STMT (cond_load_arg);
6676 : :
6677 : : /* This is a rough estimation to check that there aren't any aliasing stores
6678 : : in between the load and store. It's a bit strict, but for now it's good
6679 : : enough. */
6680 : 0 : if (gimple_vuse (load_stmt) != gimple_vuse (store_stmt))
6681 : : return NULL;
6682 : :
6683 : : /* If we have to invert the condition, i.e. use the true argument rather than
6684 : : the false argument, we have to negate the mask. */
6685 : 0 : if (invert)
6686 : : {
6687 : 0 : tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
6688 : :
6689 : : /* Invert the mask using ^ 1. */
6690 : 0 : tree itype = TREE_TYPE (cmp_ls);
6691 : 0 : gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
6692 : : build_int_cst (itype, 1));
6693 : :
6694 : 0 : tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
6695 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
6696 : 0 : cmp_ls= var;
6697 : : }
6698 : :
6699 : 0 : if (TREE_CODE (base) != MEM_REF)
6700 : 0 : base = build_fold_addr_expr (base);
6701 : :
6702 : 0 : tree ptr = build_int_cst (reference_alias_ptr_type (base),
6703 : 0 : get_object_alignment (base));
6704 : :
6705 : : /* Convert the mask to the right form. */
6706 : 0 : tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
6707 : : vinfo);
6708 : :
6709 : 0 : gcall *call
6710 : 0 : = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
6711 : : cond_store_arg);
6712 : 0 : gimple_set_location (call, gimple_location (store_stmt));
6713 : :
6714 : : /* Copy across relevant vectorization info and associate DR with the
6715 : : new pattern statement instead of the original statement. */
6716 : 0 : stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
6717 : 0 : loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
6718 : :
6719 : 0 : *type_out = vectype;
6720 : 0 : return call;
6721 : : }
6722 : :
6723 : : /* Return true if TYPE is a non-boolean integer type. These are the types
6724 : : that we want to consider for narrowing. */
6725 : :
6726 : : static bool
6727 : 60142271 : vect_narrowable_type_p (tree type)
6728 : : {
6729 : 60142271 : return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6730 : : }
6731 : :
6732 : : /* Return true if the operation given by CODE can be truncated to N bits
6733 : : when only N bits of the output are needed. This is only true if bit N+1
6734 : : of the inputs has no effect on the low N bits of the result. */
6735 : :
6736 : : static bool
6737 : 15021356 : vect_truncatable_operation_p (tree_code code)
6738 : : {
6739 : 15021356 : switch (code)
6740 : : {
6741 : : case NEGATE_EXPR:
6742 : : case PLUS_EXPR:
6743 : : case MINUS_EXPR:
6744 : : case MULT_EXPR:
6745 : : case BIT_NOT_EXPR:
6746 : : case BIT_AND_EXPR:
6747 : : case BIT_IOR_EXPR:
6748 : : case BIT_XOR_EXPR:
6749 : : case COND_EXPR:
6750 : : return true;
6751 : :
6752 : 5807138 : default:
6753 : 5807138 : return false;
6754 : : }
6755 : : }
6756 : :
6757 : : /* Record that STMT_INFO could be changed from operating on TYPE to
6758 : : operating on a type with the precision and sign given by PRECISION
6759 : : and SIGN respectively. PRECISION is an arbitrary bit precision;
6760 : : it might not be a whole number of bytes. */
6761 : :
6762 : : static void
6763 : 2313941 : vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6764 : : unsigned int precision, signop sign)
6765 : : {
6766 : : /* Round the precision up to a whole number of bytes. */
6767 : 2313941 : precision = vect_element_precision (precision);
6768 : 2313941 : if (precision < TYPE_PRECISION (type)
6769 : 2313941 : && (!stmt_info->operation_precision
6770 : 43141 : || stmt_info->operation_precision > precision))
6771 : : {
6772 : 1489364 : stmt_info->operation_precision = precision;
6773 : 1489364 : stmt_info->operation_sign = sign;
6774 : : }
6775 : 2313941 : }
6776 : :
6777 : : /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6778 : : non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6779 : : is an arbitrary bit precision; it might not be a whole number of bytes. */
6780 : :
6781 : : static void
6782 : 10935180 : vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6783 : : unsigned int min_input_precision)
6784 : : {
6785 : : /* This operation in isolation only requires the inputs to have
6786 : : MIN_INPUT_PRECISION of precision, However, that doesn't mean
6787 : : that MIN_INPUT_PRECISION is a natural precision for the chain
6788 : : as a whole. E.g. consider something like:
6789 : :
6790 : : unsigned short *x, *y;
6791 : : *y = ((*x & 0xf0) >> 4) | (*y << 4);
6792 : :
6793 : : The right shift can be done on unsigned chars, and only requires the
6794 : : result of "*x & 0xf0" to be done on unsigned chars. But taking that
6795 : : approach would mean turning a natural chain of single-vector unsigned
6796 : : short operations into one that truncates "*x" and then extends
6797 : : "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6798 : : operation and one vector for each unsigned char operation.
6799 : : This would be a significant pessimization.
6800 : :
6801 : : Instead only propagate the maximum of this precision and the precision
6802 : : required by the users of the result. This means that we don't pessimize
6803 : : the case above but continue to optimize things like:
6804 : :
6805 : : unsigned char *y;
6806 : : unsigned short *x;
6807 : : *y = ((*x & 0xf0) >> 4) | (*y << 4);
6808 : :
6809 : : Here we would truncate two vectors of *x to a single vector of
6810 : : unsigned chars and use single-vector unsigned char operations for
6811 : : everything else, rather than doing two unsigned short copies of
6812 : : "(*x & 0xf0) >> 4" and then truncating the result. */
6813 : 10935180 : min_input_precision = MAX (min_input_precision,
6814 : : stmt_info->min_output_precision);
6815 : :
6816 : 10935180 : if (min_input_precision < TYPE_PRECISION (type)
6817 : 10935180 : && (!stmt_info->min_input_precision
6818 : 64178 : || stmt_info->min_input_precision > min_input_precision))
6819 : 562671 : stmt_info->min_input_precision = min_input_precision;
6820 : 10935180 : }
6821 : :
6822 : : /* Subroutine of vect_determine_min_output_precision. Return true if
6823 : : we can calculate a reduced number of output bits for STMT_INFO,
6824 : : whose result is LHS. */
6825 : :
6826 : : static bool
6827 : 14132681 : vect_determine_min_output_precision_1 (vec_info *vinfo,
6828 : : stmt_vec_info stmt_info, tree lhs)
6829 : : {
6830 : : /* Take the maximum precision required by users of the result. */
6831 : 14132681 : unsigned int precision = 0;
6832 : 14132681 : imm_use_iterator iter;
6833 : 14132681 : use_operand_p use;
6834 : 29062357 : FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6835 : : {
6836 : 14668484 : gimple *use_stmt = USE_STMT (use);
6837 : 14668484 : if (is_gimple_debug (use_stmt))
6838 : 525884 : continue;
6839 : 14142600 : stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6840 : 14142600 : if (!use_stmt_info || !use_stmt_info->min_input_precision)
6841 : : return false;
6842 : : /* The input precision recorded for COND_EXPRs applies only to the
6843 : : "then" and "else" values. */
6844 : 271652 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6845 : 230079 : if (assign
6846 : 230079 : && gimple_assign_rhs_code (assign) == COND_EXPR
6847 : 541 : && use->use != gimple_assign_rhs2_ptr (assign)
6848 : 541 : && use->use != gimple_assign_rhs3_ptr (assign))
6849 : : return false;
6850 : 799031 : precision = MAX (precision, use_stmt_info->min_input_precision);
6851 : 13871489 : }
6852 : :
6853 : 261192 : if (dump_enabled_p ())
6854 : 5742 : dump_printf_loc (MSG_NOTE, vect_location,
6855 : : "only the low %d bits of %T are significant\n",
6856 : : precision, lhs);
6857 : 261192 : stmt_info->min_output_precision = precision;
6858 : 261192 : return true;
6859 : : }
6860 : :
6861 : : /* Calculate min_output_precision for STMT_INFO. */
6862 : :
6863 : : static void
6864 : 36680339 : vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6865 : : {
6866 : : /* We're only interested in statements with a narrowable result. */
6867 : 36680339 : tree lhs = gimple_get_lhs (stmt_info->stmt);
6868 : 36680339 : if (!lhs
6869 : 28489315 : || TREE_CODE (lhs) != SSA_NAME
6870 : 60645331 : || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6871 : : return;
6872 : :
6873 : 14132681 : if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6874 : 13871489 : stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6875 : : }
6876 : :
6877 : : /* Use range information to decide whether STMT (described by STMT_INFO)
6878 : : could be done in a narrower type. This is effectively a forward
6879 : : propagation, since it uses context-independent information that applies
6880 : : to all users of an SSA name. */
6881 : :
6882 : : static void
6883 : 20199862 : vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6884 : : {
6885 : 20199862 : tree lhs = gimple_assign_lhs (stmt);
6886 : 20199862 : if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6887 : 17994077 : return;
6888 : :
6889 : 15977417 : tree type = TREE_TYPE (lhs);
6890 : 15977417 : if (!vect_narrowable_type_p (type))
6891 : : return;
6892 : :
6893 : : /* First see whether we have any useful range information for the result. */
6894 : 10756864 : unsigned int precision = TYPE_PRECISION (type);
6895 : 10756864 : signop sign = TYPE_SIGN (type);
6896 : 10756864 : wide_int min_value, max_value;
6897 : 10756864 : if (!vect_get_range_info (lhs, &min_value, &max_value))
6898 : : return;
6899 : :
6900 : 5232774 : tree_code code = gimple_assign_rhs_code (stmt);
6901 : 5232774 : unsigned int nops = gimple_num_ops (stmt);
6902 : :
6903 : 5232774 : if (!vect_truncatable_operation_p (code))
6904 : : {
6905 : : /* Handle operations that can be computed in type T if all inputs
6906 : : and outputs can be represented in type T. Also handle left and
6907 : : right shifts, where (in addition) the maximum shift amount must
6908 : : be less than the number of bits in T. */
6909 : 1958173 : bool is_shift;
6910 : 1958173 : switch (code)
6911 : : {
6912 : : case LSHIFT_EXPR:
6913 : : case RSHIFT_EXPR:
6914 : : is_shift = true;
6915 : : break;
6916 : :
6917 : 255190 : case ABS_EXPR:
6918 : 255190 : case MIN_EXPR:
6919 : 255190 : case MAX_EXPR:
6920 : 255190 : case TRUNC_DIV_EXPR:
6921 : 255190 : case CEIL_DIV_EXPR:
6922 : 255190 : case FLOOR_DIV_EXPR:
6923 : 255190 : case ROUND_DIV_EXPR:
6924 : 255190 : case EXACT_DIV_EXPR:
6925 : : /* Modulus is excluded because it is typically calculated by doing
6926 : : a division, for which minimum signed / -1 isn't representable in
6927 : : the original signed type. We could take the division range into
6928 : : account instead, if handling modulus ever becomes important. */
6929 : 255190 : is_shift = false;
6930 : 255190 : break;
6931 : :
6932 : : default:
6933 : : return;
6934 : : }
6935 : 1252465 : for (unsigned int i = 1; i < nops; ++i)
6936 : : {
6937 : 969546 : tree op = gimple_op (stmt, i);
6938 : 969546 : wide_int op_min_value, op_max_value;
6939 : 969546 : if (TREE_CODE (op) == INTEGER_CST)
6940 : : {
6941 : 285563 : unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
6942 : 285563 : op_min_value = op_max_value = wi::to_wide (op, op_precision);
6943 : : }
6944 : 683983 : else if (TREE_CODE (op) == SSA_NAME)
6945 : : {
6946 : 683983 : if (!vect_get_range_info (op, &op_min_value, &op_max_value))
6947 : : return;
6948 : : }
6949 : : else
6950 : : return;
6951 : :
6952 : 624540 : if (is_shift && i == 2)
6953 : : {
6954 : : /* There needs to be one more bit than the maximum shift amount.
6955 : :
6956 : : If the maximum shift amount is already 1 less than PRECISION
6957 : : then we can't narrow the shift further. Dealing with that
6958 : : case first ensures that we can safely use an unsigned range
6959 : : below.
6960 : :
6961 : : op_min_value isn't relevant, since shifts by negative amounts
6962 : : are UB. */
6963 : 190818 : if (wi::geu_p (op_max_value, precision - 1))
6964 : : return;
6965 : 173767 : unsigned int min_bits = op_max_value.to_uhwi () + 1;
6966 : :
6967 : : /* As explained below, we can convert a signed shift into an
6968 : : unsigned shift if the sign bit is always clear. At this
6969 : : point we've already processed the ranges of the output and
6970 : : the first input. */
6971 : 173767 : auto op_sign = sign;
6972 : 173767 : if (sign == SIGNED && !wi::neg_p (min_value))
6973 : : op_sign = UNSIGNED;
6974 : 347534 : op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
6975 : 173767 : precision, op_sign);
6976 : 347534 : op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
6977 : 173767 : precision, op_sign);
6978 : : }
6979 : 607489 : min_value = wi::min (min_value, op_min_value, sign);
6980 : 607489 : max_value = wi::max (max_value, op_max_value, sign);
6981 : 969546 : }
6982 : : }
6983 : :
6984 : : /* Try to switch signed types for unsigned types if we can.
6985 : : This is better for two reasons. First, unsigned ops tend
6986 : : to be cheaper than signed ops. Second, it means that we can
6987 : : handle things like:
6988 : :
6989 : : signed char c;
6990 : : int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6991 : :
6992 : : as:
6993 : :
6994 : : signed char c;
6995 : : unsigned short res_1 = (unsigned short) c & 0xff00;
6996 : : int res = (int) res_1;
6997 : :
6998 : : where the intermediate result res_1 has unsigned rather than
6999 : : signed type. */
7000 : 3557520 : if (sign == SIGNED && !wi::neg_p (min_value))
7001 : : sign = UNSIGNED;
7002 : :
7003 : : /* See what precision is required for MIN_VALUE and MAX_VALUE. */
7004 : 3557520 : unsigned int precision1 = wi::min_precision (min_value, sign);
7005 : 3557520 : unsigned int precision2 = wi::min_precision (max_value, sign);
7006 : 3557520 : unsigned int value_precision = MAX (precision1, precision2);
7007 : 3557520 : if (value_precision >= precision)
7008 : : return;
7009 : :
7010 : 2205785 : if (dump_enabled_p ())
7011 : 99968 : dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
7012 : : " without loss of precision: %G",
7013 : : sign == SIGNED ? "signed" : "unsigned",
7014 : : value_precision, (gimple *) stmt);
7015 : :
7016 : 2205785 : vect_set_operation_type (stmt_info, type, value_precision, sign);
7017 : 2205785 : vect_set_min_input_precision (stmt_info, type, value_precision);
7018 : 10756864 : }
7019 : :
7020 : : /* Use information about the users of STMT's result to decide whether
7021 : : STMT (described by STMT_INFO) could be done in a narrower type.
7022 : : This is effectively a backward propagation. */
7023 : :
7024 : : static void
7025 : 20199862 : vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
7026 : : {
7027 : 20199862 : tree_code code = gimple_assign_rhs_code (stmt);
7028 : 20199862 : unsigned int opno = (code == COND_EXPR ? 2 : 1);
7029 : 20199862 : tree type = TREE_TYPE (gimple_op (stmt, opno));
7030 : 20199862 : if (!vect_narrowable_type_p (type))
7031 : 11470467 : return;
7032 : :
7033 : 12610898 : unsigned int precision = TYPE_PRECISION (type);
7034 : 12610898 : unsigned int operation_precision, min_input_precision;
7035 : 12610898 : switch (code)
7036 : : {
7037 : 2368972 : CASE_CONVERT:
7038 : : /* Only the bits that contribute to the output matter. Don't change
7039 : : the precision of the operation itself. */
7040 : 2368972 : operation_precision = precision;
7041 : 2368972 : min_input_precision = stmt_info->min_output_precision;
7042 : 2368972 : break;
7043 : :
7044 : 453344 : case LSHIFT_EXPR:
7045 : 453344 : case RSHIFT_EXPR:
7046 : 453344 : {
7047 : 453344 : tree shift = gimple_assign_rhs2 (stmt);
7048 : 453344 : unsigned int min_const_shift, max_const_shift;
7049 : 453344 : wide_int min_shift, max_shift;
7050 : 453344 : if (TREE_CODE (shift) == SSA_NAME
7051 : 93729 : && vect_get_range_info (shift, &min_shift, &max_shift)
7052 : 71456 : && wi::ge_p (min_shift, 0, TYPE_SIGN (TREE_TYPE (shift)))
7053 : 522068 : && wi::lt_p (max_shift, TYPE_PRECISION (type),
7054 : 68724 : TYPE_SIGN (TREE_TYPE (shift))))
7055 : : {
7056 : 61299 : min_const_shift = min_shift.to_uhwi ();
7057 : 61299 : max_const_shift = max_shift.to_uhwi ();
7058 : : }
7059 : 392045 : else if (TREE_CODE (shift) == INTEGER_CST
7060 : 751660 : && wi::ltu_p (wi::to_widest (shift), precision))
7061 : 359507 : min_const_shift = max_const_shift = TREE_INT_CST_LOW (shift);
7062 : : else
7063 : 32538 : return;
7064 : 420806 : if (code == LSHIFT_EXPR)
7065 : : {
7066 : : /* Avoid creating an undefined shift.
7067 : :
7068 : : ??? We could instead use min_output_precision as-is and
7069 : : optimize out-of-range shifts to zero. However, only
7070 : : degenerate testcases shift away all their useful input data,
7071 : : and it isn't natural to drop input operations in the middle
7072 : : of vectorization. This sort of thing should really be
7073 : : handled before vectorization. */
7074 : 98578 : operation_precision = MAX (stmt_info->min_output_precision,
7075 : : max_const_shift + 1);
7076 : : /* We need CONST_SHIFT fewer bits of the input. */
7077 : 98578 : min_input_precision = (MAX (operation_precision, max_const_shift)
7078 : : - min_const_shift);
7079 : : }
7080 : : else
7081 : : {
7082 : : /* We need CONST_SHIFT extra bits to do the operation. */
7083 : 322228 : operation_precision = (stmt_info->min_output_precision
7084 : : + max_const_shift);
7085 : 322228 : min_input_precision = operation_precision;
7086 : : }
7087 : 420806 : break;
7088 : 453344 : }
7089 : :
7090 : 9788582 : default:
7091 : 9788582 : if (vect_truncatable_operation_p (code))
7092 : : {
7093 : : /* Input bit N has no effect on output bits N-1 and lower. */
7094 : 5939617 : operation_precision = stmt_info->min_output_precision;
7095 : 5939617 : min_input_precision = operation_precision;
7096 : 5939617 : break;
7097 : : }
7098 : : return;
7099 : : }
7100 : :
7101 : 8729395 : if (operation_precision < precision)
7102 : : {
7103 : 108156 : if (dump_enabled_p ())
7104 : 2738 : dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
7105 : : " without affecting users: %G",
7106 : 2738 : TYPE_UNSIGNED (type) ? "unsigned" : "signed",
7107 : : operation_precision, (gimple *) stmt);
7108 : 216312 : vect_set_operation_type (stmt_info, type, operation_precision,
7109 : 108156 : TYPE_SIGN (type));
7110 : : }
7111 : 8729395 : vect_set_min_input_precision (stmt_info, type, min_input_precision);
7112 : : }
7113 : :
7114 : : /* Return true if the statement described by STMT_INFO sets a boolean
7115 : : SSA_NAME and if we know how to vectorize this kind of statement using
7116 : : vector mask types. */
7117 : :
7118 : : static bool
7119 : 37464054 : possible_vector_mask_operation_p (stmt_vec_info stmt_info)
7120 : : {
7121 : 37464054 : tree lhs = gimple_get_lhs (stmt_info->stmt);
7122 : 37464054 : tree_code code = ERROR_MARK;
7123 : 37464054 : gassign *assign = NULL;
7124 : 37464054 : gcond *cond = NULL;
7125 : :
7126 : 37464054 : if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
7127 : 20751199 : code = gimple_assign_rhs_code (assign);
7128 : 16712855 : else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
7129 : : {
7130 : 5165560 : lhs = gimple_cond_lhs (cond);
7131 : 5165560 : code = gimple_cond_code (cond);
7132 : : }
7133 : :
7134 : 37464054 : if (!lhs
7135 : 34361660 : || TREE_CODE (lhs) != SSA_NAME
7136 : 67273509 : || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
7137 : : return false;
7138 : :
7139 : 2211525 : if (code != ERROR_MARK)
7140 : : {
7141 : 1917635 : switch (code)
7142 : : {
7143 : : CASE_CONVERT:
7144 : : case SSA_NAME:
7145 : : case BIT_NOT_EXPR:
7146 : : case BIT_IOR_EXPR:
7147 : : case BIT_XOR_EXPR:
7148 : : case BIT_AND_EXPR:
7149 : : return true;
7150 : :
7151 : 1498280 : default:
7152 : 1498280 : return TREE_CODE_CLASS (code) == tcc_comparison;
7153 : : }
7154 : : }
7155 : 293890 : else if (is_a <gphi *> (stmt_info->stmt))
7156 : 182822 : return true;
7157 : : return false;
7158 : : }
7159 : :
7160 : : /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
7161 : : a vector mask type instead of a normal vector type. Record the
7162 : : result in STMT_INFO->mask_precision. Returns true when the
7163 : : precision changed. */
7164 : :
7165 : : static bool
7166 : 37464054 : vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
7167 : : {
7168 : 37464054 : if (!possible_vector_mask_operation_p (stmt_info))
7169 : : return false;
7170 : :
7171 : : /* If at least one boolean input uses a vector mask type,
7172 : : pick the mask type with the narrowest elements.
7173 : :
7174 : : ??? This is the traditional behavior. It should always produce
7175 : : the smallest number of operations, but isn't necessarily the
7176 : : optimal choice. For example, if we have:
7177 : :
7178 : : a = b & c
7179 : :
7180 : : where:
7181 : :
7182 : : - the user of a wants it to have a mask type for 16-bit elements (M16)
7183 : : - b also uses M16
7184 : : - c uses a mask type for 8-bit elements (M8)
7185 : :
7186 : : then picking M8 gives:
7187 : :
7188 : : - 1 M16->M8 pack for b
7189 : : - 1 M8 AND for a
7190 : : - 2 M8->M16 unpacks for the user of a
7191 : :
7192 : : whereas picking M16 would have given:
7193 : :
7194 : : - 2 M8->M16 unpacks for c
7195 : : - 2 M16 ANDs for a
7196 : :
7197 : : The number of operations are equal, but M16 would have given
7198 : : a shorter dependency chain and allowed more ILP. */
7199 : 2050319 : unsigned int precision = ~0U;
7200 : 2050319 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
7201 : :
7202 : : /* If the statement compares two values that shouldn't use vector masks,
7203 : : try comparing the values as normal scalars instead. */
7204 : 2050319 : tree_code code = ERROR_MARK;
7205 : 2050319 : tree op0_type;
7206 : 2050319 : unsigned int nops = -1;
7207 : 2050319 : unsigned int ops_start = 0;
7208 : :
7209 : 2050319 : if (gassign *assign = dyn_cast <gassign *> (stmt))
7210 : : {
7211 : 1294384 : code = gimple_assign_rhs_code (assign);
7212 : 1294384 : op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
7213 : 1294384 : nops = gimple_num_ops (assign);
7214 : 1294384 : ops_start = 1;
7215 : : }
7216 : 755935 : else if (gcond *cond = dyn_cast <gcond *> (stmt))
7217 : : {
7218 : 573113 : code = gimple_cond_code (cond);
7219 : 573113 : op0_type = TREE_TYPE (gimple_cond_lhs (cond));
7220 : 573113 : nops = 2;
7221 : 573113 : ops_start = 0;
7222 : : }
7223 : :
7224 : 1867497 : if (code != ERROR_MARK)
7225 : : {
7226 : 5559050 : for (unsigned int i = ops_start; i < nops; ++i)
7227 : : {
7228 : 3691553 : tree rhs = gimple_op (stmt, i);
7229 : 3691553 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
7230 : 1761722 : continue;
7231 : :
7232 : 1929831 : stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
7233 : 1929831 : if (!def_stmt_info)
7234 : : /* Don't let external or constant operands influence the choice.
7235 : : We can convert them to whichever vector type we pick. */
7236 : 586740 : continue;
7237 : :
7238 : 1343091 : if (def_stmt_info->mask_precision)
7239 : : {
7240 : 1123100 : if (precision > def_stmt_info->mask_precision)
7241 : 3691553 : precision = def_stmt_info->mask_precision;
7242 : : }
7243 : : }
7244 : :
7245 : 1867497 : if (precision == ~0U
7246 : 1505639 : && TREE_CODE_CLASS (code) == tcc_comparison)
7247 : : {
7248 : 1294162 : scalar_mode mode;
7249 : 1294162 : tree vectype, mask_type;
7250 : 1294162 : if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
7251 : : /* Do not allow this to set vinfo->vector_mode, this might
7252 : : disrupt the result for the next iteration. */
7253 : 1294162 : && (vectype = get_related_vectype_for_scalar_type
7254 : 1551919 : (vinfo->vector_mode, op0_type))
7255 : 1126497 : && (mask_type = truth_type_for (vectype))
7256 : 1126497 : && expand_vec_cmp_expr_p (vectype, mask_type, code))
7257 : 1737480 : precision = GET_MODE_BITSIZE (mode);
7258 : : }
7259 : : }
7260 : : else
7261 : : {
7262 : 182822 : gphi *phi = as_a <gphi *> (stmt_info->stmt);
7263 : 749791 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
7264 : : {
7265 : 566969 : tree rhs = gimple_phi_arg_def (phi, i);
7266 : :
7267 : 566969 : stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
7268 : 566969 : if (!def_stmt_info)
7269 : : /* Don't let external or constant operands influence the choice.
7270 : : We can convert them to whichever vector type we pick. */
7271 : 369907 : continue;
7272 : :
7273 : 197062 : if (def_stmt_info->mask_precision)
7274 : : {
7275 : 170677 : if (precision > def_stmt_info->mask_precision)
7276 : 566969 : precision = def_stmt_info->mask_precision;
7277 : : }
7278 : : }
7279 : : }
7280 : :
7281 : 2050319 : if (stmt_info->mask_precision != precision)
7282 : : {
7283 : 1948314 : if (dump_enabled_p ())
7284 : : {
7285 : 7238 : if (precision == ~0U)
7286 : 1703 : dump_printf_loc (MSG_NOTE, vect_location,
7287 : : "using normal nonmask vectors for %G",
7288 : : stmt_info->stmt);
7289 : : else
7290 : 5535 : dump_printf_loc (MSG_NOTE, vect_location,
7291 : : "using boolean precision %d for %G",
7292 : : precision, stmt_info->stmt);
7293 : : }
7294 : :
7295 : : /* ??? We'd like to assert stmt_info->mask_precision == 0
7296 : : || stmt_info->mask_precision > precision, thus that we only
7297 : : decrease mask precisions throughout iteration, but the
7298 : : tcc_comparison handling above means for comparisons of bools
7299 : : we start with 8 but might increase in case the bools get mask
7300 : : precision on their own. */
7301 : 1948314 : stmt_info->mask_precision = precision;
7302 : 1948314 : return true;
7303 : : }
7304 : : return false;
7305 : : }
7306 : :
7307 : : /* Handle vect_determine_precisions for STMT_INFO, given that we
7308 : : have already done so for the users of its result. */
7309 : :
7310 : : void
7311 : 36680339 : vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
7312 : : {
7313 : 36680339 : vect_determine_min_output_precision (vinfo, stmt_info);
7314 : 36680339 : if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
7315 : : {
7316 : 20199862 : vect_determine_precisions_from_range (stmt_info, stmt);
7317 : 20199862 : vect_determine_precisions_from_users (stmt_info, stmt);
7318 : : }
7319 : 36680339 : }
7320 : :
7321 : : /* Walk backwards through the vectorizable region to determine the
7322 : : values of these fields:
7323 : :
7324 : : - min_output_precision
7325 : : - min_input_precision
7326 : : - operation_precision
7327 : : - operation_sign. */
7328 : :
7329 : : void
7330 : 959009 : vect_determine_precisions (vec_info *vinfo)
7331 : : {
7332 : 959009 : basic_block *bbs = vinfo->bbs;
7333 : 959009 : unsigned int nbbs = vinfo->nbbs;
7334 : :
7335 : 976813 : DUMP_VECT_SCOPE ("vect_determine_precisions");
7336 : :
7337 : : /* For mask precisions we have to iterate since otherwise we do not
7338 : : get reduction PHI precision correct. For now do this only for
7339 : : loop vectorization. */
7340 : 1007677 : bool changed;
7341 : 1007677 : do
7342 : : {
7343 : 1007677 : changed = false;
7344 : 12631846 : for (unsigned int i = 0; i < nbbs; i++)
7345 : : {
7346 : 11624169 : basic_block bb = bbs[i];
7347 : 11624169 : for (auto gsi = gsi_start_phis (bb);
7348 : 18744955 : !gsi_end_p (gsi); gsi_next (&gsi))
7349 : : {
7350 : 7120786 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
7351 : 7120786 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7352 : 6931171 : changed |= vect_determine_mask_precision (vinfo, stmt_info);
7353 : : }
7354 : 114486396 : for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7355 : : {
7356 : 91238058 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
7357 : 91238058 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7358 : 30532883 : changed |= vect_determine_mask_precision (vinfo, stmt_info);
7359 : : }
7360 : : }
7361 : : }
7362 : 1966686 : while (changed && is_a <loop_vec_info> (vinfo));
7363 : :
7364 : 12459156 : for (unsigned int i = 0; i < nbbs; i++)
7365 : : {
7366 : 11500147 : basic_block bb = bbs[nbbs - i - 1];
7367 : 203520468 : for (auto gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
7368 : : {
7369 : 90260087 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
7370 : 90260087 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7371 : 29896072 : vect_determine_stmt_precisions (vinfo, stmt_info);
7372 : : }
7373 : 18474029 : for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7374 : : {
7375 : 6973882 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
7376 : 6973882 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
7377 : 6784267 : vect_determine_stmt_precisions (vinfo, stmt_info);
7378 : : }
7379 : : }
7380 : 959009 : }
7381 : :
7382 : : typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
7383 : :
7384 : : struct vect_recog_func
7385 : : {
7386 : : vect_recog_func_ptr fn;
7387 : : const char *name;
7388 : : };
7389 : :
7390 : : /* Note that ordering matters - the first pattern matching on a stmt is
7391 : : taken which means usually the more complex one needs to preceed the
7392 : : less comples onex (widen_sum only after dot_prod or sad for example). */
7393 : : static vect_recog_func vect_vect_recog_func_ptrs[] = {
7394 : : { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
7395 : : { vect_recog_bit_insert_pattern, "bit_insert" },
7396 : : { vect_recog_abd_pattern, "abd" },
7397 : : { vect_recog_over_widening_pattern, "over_widening" },
7398 : : /* Must come after over_widening, which narrows the shift as much as
7399 : : possible beforehand. */
7400 : : { vect_recog_average_pattern, "average" },
7401 : : { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
7402 : : { vect_recog_mulhs_pattern, "mult_high" },
7403 : : { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
7404 : : { vect_recog_widen_mult_pattern, "widen_mult" },
7405 : : { vect_recog_dot_prod_pattern, "dot_prod" },
7406 : : { vect_recog_sad_pattern, "sad" },
7407 : : { vect_recog_widen_sum_pattern, "widen_sum" },
7408 : : { vect_recog_pow_pattern, "pow" },
7409 : : { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
7410 : : { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
7411 : : { vect_recog_widen_shift_pattern, "widen_shift" },
7412 : : { vect_recog_rotate_pattern, "rotate" },
7413 : : { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
7414 : : { vect_recog_divmod_pattern, "divmod" },
7415 : : { vect_recog_mod_var_pattern, "modvar" },
7416 : : { vect_recog_mult_pattern, "mult" },
7417 : : { vect_recog_sat_add_pattern, "sat_add" },
7418 : : { vect_recog_sat_sub_pattern, "sat_sub" },
7419 : : { vect_recog_sat_trunc_pattern, "sat_trunc" },
7420 : : { vect_recog_gcond_pattern, "gcond" },
7421 : : { vect_recog_bool_pattern, "bool" },
7422 : : /* This must come before mask conversion, and includes the parts
7423 : : of mask conversion that are needed for gather and scatter
7424 : : internal functions. */
7425 : : { vect_recog_gather_scatter_pattern, "gather_scatter" },
7426 : : { vect_recog_cond_store_pattern, "cond_store" },
7427 : : { vect_recog_mask_conversion_pattern, "mask_conversion" },
7428 : : { vect_recog_widen_plus_pattern, "widen_plus" },
7429 : : { vect_recog_widen_minus_pattern, "widen_minus" },
7430 : : { vect_recog_widen_abd_pattern, "widen_abd" },
7431 : : /* These must come after the double widening ones. */
7432 : : };
7433 : :
7434 : : /* Mark statements that are involved in a pattern. */
7435 : :
7436 : : void
7437 : 871712 : vect_mark_pattern_stmts (vec_info *vinfo,
7438 : : stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
7439 : : tree pattern_vectype)
7440 : : {
7441 : 871712 : stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
7442 : 871712 : gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7443 : :
7444 : 871712 : gimple *orig_pattern_stmt = NULL;
7445 : 871712 : if (is_pattern_stmt_p (orig_stmt_info))
7446 : : {
7447 : : /* We're replacing a statement in an existing pattern definition
7448 : : sequence. */
7449 : 10554 : orig_pattern_stmt = orig_stmt_info->stmt;
7450 : 10554 : if (dump_enabled_p ())
7451 : 598 : dump_printf_loc (MSG_NOTE, vect_location,
7452 : : "replacing earlier pattern %G", orig_pattern_stmt);
7453 : :
7454 : : /* To keep the book-keeping simple, just swap the lhs of the
7455 : : old and new statements, so that the old one has a valid but
7456 : : unused lhs. */
7457 : 10554 : tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
7458 : 10554 : gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
7459 : 10554 : gimple_set_lhs (pattern_stmt, old_lhs);
7460 : :
7461 : 10554 : if (dump_enabled_p ())
7462 : 598 : dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
7463 : :
7464 : : /* Switch to the statement that ORIG replaces. */
7465 : 10554 : orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
7466 : :
7467 : : /* We shouldn't be replacing the main pattern statement. */
7468 : 10554 : gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
7469 : : != orig_pattern_stmt);
7470 : : }
7471 : :
7472 : 871712 : if (def_seq)
7473 : : for (gimple_stmt_iterator si = gsi_start (def_seq);
7474 : 1955180 : !gsi_end_p (si); gsi_next (&si))
7475 : : {
7476 : 1199262 : if (dump_enabled_p ())
7477 : 22461 : dump_printf_loc (MSG_NOTE, vect_location,
7478 : : "extra pattern stmt: %G", gsi_stmt (si));
7479 : 1199262 : stmt_vec_info pattern_stmt_info
7480 : 1199262 : = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
7481 : : orig_stmt_info, pattern_vectype);
7482 : : /* Stmts in the def sequence are not vectorizable cycle or
7483 : : induction defs, instead they should all be vect_internal_def
7484 : : feeding the main pattern stmt which retains this def type. */
7485 : 1199262 : STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
7486 : : }
7487 : :
7488 : 871712 : if (orig_pattern_stmt)
7489 : : {
7490 : 10554 : vect_init_pattern_stmt (vinfo, pattern_stmt,
7491 : : orig_stmt_info, pattern_vectype);
7492 : :
7493 : : /* Insert all the new pattern statements before the original one. */
7494 : 10554 : gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7495 : 10554 : gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
7496 : : orig_def_seq);
7497 : 10554 : gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
7498 : 10554 : gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
7499 : :
7500 : : /* Remove the pattern statement that this new pattern replaces. */
7501 : 10554 : gsi_remove (&gsi, false);
7502 : : }
7503 : : else
7504 : 861158 : vect_set_pattern_stmt (vinfo,
7505 : : pattern_stmt, orig_stmt_info, pattern_vectype);
7506 : :
7507 : : /* For any conditionals mark them as vect_condition_def. */
7508 : 871712 : if (is_a <gcond *> (pattern_stmt))
7509 : 281436 : STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
7510 : :
7511 : : /* Transfer reduction path info to the pattern. */
7512 : 871712 : if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
7513 : : {
7514 : 9346 : gimple_match_op op;
7515 : 9346 : if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
7516 : 0 : gcc_unreachable ();
7517 : 9346 : tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
7518 : : /* Search the pattern def sequence and the main pattern stmt. Note
7519 : : we may have inserted all into a containing pattern def sequence
7520 : : so the following is a bit awkward. */
7521 : 9346 : gimple_stmt_iterator si;
7522 : 9346 : gimple *s;
7523 : 9346 : if (def_seq)
7524 : : {
7525 : 8605 : si = gsi_start (def_seq);
7526 : 8605 : s = gsi_stmt (si);
7527 : 8605 : gsi_next (&si);
7528 : : }
7529 : : else
7530 : : {
7531 : : si = gsi_none ();
7532 : : s = pattern_stmt;
7533 : : }
7534 : 18282 : do
7535 : : {
7536 : 18282 : bool found = false;
7537 : 18282 : if (gimple_extract_op (s, &op))
7538 : : {
7539 : 43895 : for (unsigned i = 0; i < op.num_ops; ++i)
7540 : 34959 : if (op.ops[i] == lookfor)
7541 : : {
7542 : 9346 : STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7543 : 9346 : lookfor = gimple_get_lhs (s);
7544 : 9346 : found = true;
7545 : 9346 : break;
7546 : : }
7547 : : /* Try harder to find a mid-entry into an earlier pattern
7548 : : sequence. Likewise an entry to a stmt skipping a conversion
7549 : : on an input. This means that the initial 'lookfor' was
7550 : : bogus. */
7551 : 9346 : if (!found)
7552 : : {
7553 : 18391 : for (unsigned i = 0; i < op.num_ops; ++i)
7554 : 9455 : if (TREE_CODE (op.ops[i]) == SSA_NAME)
7555 : 8936 : if (auto def = vinfo->lookup_def (op.ops[i]))
7556 : 8749 : if (vect_is_reduction (def)
7557 : 8749 : || (is_a <gphi *> (def->stmt)
7558 : 0 : && STMT_VINFO_REDUC_DEF (def) != NULL))
7559 : : {
7560 : 0 : STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7561 : 0 : lookfor = gimple_get_lhs (s);
7562 : 0 : found = true;
7563 : 0 : break;
7564 : : }
7565 : : }
7566 : : }
7567 : 18282 : if (s == pattern_stmt)
7568 : : {
7569 : 9346 : if (!found && dump_enabled_p ())
7570 : 0 : dump_printf_loc (MSG_NOTE, vect_location,
7571 : : "failed to update reduction index.\n");
7572 : 9346 : break;
7573 : : }
7574 : 8936 : if (gsi_end_p (si))
7575 : : s = pattern_stmt;
7576 : : else
7577 : : {
7578 : 331 : s = gsi_stmt (si);
7579 : 331 : if (s == pattern_stmt)
7580 : : /* Found the end inside a bigger pattern def seq. */
7581 : : si = gsi_none ();
7582 : : else
7583 : 331 : gsi_next (&si);
7584 : : }
7585 : : } while (1);
7586 : : }
7587 : 871712 : }
7588 : :
7589 : : /* Function vect_pattern_recog_1
7590 : :
7591 : : Input:
7592 : : PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7593 : : computation pattern.
7594 : : STMT_INFO: A stmt from which the pattern search should start.
7595 : :
7596 : : If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7597 : : a sequence of statements that has the same functionality and can be
7598 : : used to replace STMT_INFO. It returns the last statement in the sequence
7599 : : and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7600 : : PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7601 : : statement, having first checked that the target supports the new operation
7602 : : in that type.
7603 : :
7604 : : This function also does some bookkeeping, as explained in the documentation
7605 : : for vect_recog_pattern. */
7606 : :
7607 : : static void
7608 : 973481653 : vect_pattern_recog_1 (vec_info *vinfo,
7609 : : const vect_recog_func &recog_func, stmt_vec_info stmt_info)
7610 : : {
7611 : 973481653 : gimple *pattern_stmt;
7612 : 973481653 : tree pattern_vectype;
7613 : :
7614 : : /* If this statement has already been replaced with pattern statements,
7615 : : leave the original statement alone, since the first match wins.
7616 : : Instead try to match against the definition statements that feed
7617 : : the main pattern statement. */
7618 : 973481653 : if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7619 : : {
7620 : 11598724 : gimple_stmt_iterator gsi;
7621 : 11598724 : for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7622 : 28406073 : !gsi_end_p (gsi); gsi_next (&gsi))
7623 : 16807349 : vect_pattern_recog_1 (vinfo, recog_func,
7624 : : vinfo->lookup_stmt (gsi_stmt (gsi)));
7625 : : return;
7626 : : }
7627 : :
7628 : 961882929 : gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7629 : 961882929 : pattern_stmt = recog_func.fn (vinfo, stmt_info, &pattern_vectype);
7630 : 961882929 : if (!pattern_stmt)
7631 : : {
7632 : : /* Clear any half-formed pattern definition sequence. */
7633 : 961011217 : STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7634 : 961011217 : return;
7635 : : }
7636 : :
7637 : : /* Found a vectorizable pattern. */
7638 : 871712 : if (dump_enabled_p ())
7639 : 16864 : dump_printf_loc (MSG_NOTE, vect_location,
7640 : : "%s pattern recognized: %G",
7641 : 16864 : recog_func.name, pattern_stmt);
7642 : :
7643 : : /* Mark the stmts that are involved in the pattern. */
7644 : 871712 : vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
7645 : : }
7646 : :
7647 : :
7648 : : /* Function vect_pattern_recog
7649 : :
7650 : : Input:
7651 : : LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7652 : : computation idioms.
7653 : :
7654 : : Output - for each computation idiom that is detected we create a new stmt
7655 : : that provides the same functionality and that can be vectorized. We
7656 : : also record some information in the struct_stmt_info of the relevant
7657 : : stmts, as explained below:
7658 : :
7659 : : At the entry to this function we have the following stmts, with the
7660 : : following initial value in the STMT_VINFO fields:
7661 : :
7662 : : stmt in_pattern_p related_stmt vec_stmt
7663 : : S1: a_i = .... - - -
7664 : : S2: a_2 = ..use(a_i).. - - -
7665 : : S3: a_1 = ..use(a_2).. - - -
7666 : : S4: a_0 = ..use(a_1).. - - -
7667 : : S5: ... = ..use(a_0).. - - -
7668 : :
7669 : : Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7670 : : represented by a single stmt. We then:
7671 : : - create a new stmt S6 equivalent to the pattern (the stmt is not
7672 : : inserted into the code)
7673 : : - fill in the STMT_VINFO fields as follows:
7674 : :
7675 : : in_pattern_p related_stmt vec_stmt
7676 : : S1: a_i = .... - - -
7677 : : S2: a_2 = ..use(a_i).. - - -
7678 : : S3: a_1 = ..use(a_2).. - - -
7679 : : S4: a_0 = ..use(a_1).. true S6 -
7680 : : '---> S6: a_new = .... - S4 -
7681 : : S5: ... = ..use(a_0).. - - -
7682 : :
7683 : : (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7684 : : to each other through the RELATED_STMT field).
7685 : :
7686 : : S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7687 : : of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7688 : : remain irrelevant unless used by stmts other than S4.
7689 : :
7690 : : If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7691 : : (because they are marked as irrelevant). It will vectorize S6, and record
7692 : : a pointer to the new vector stmt VS6 from S6 (as usual).
7693 : : S4 will be skipped, and S5 will be vectorized as usual:
7694 : :
7695 : : in_pattern_p related_stmt vec_stmt
7696 : : S1: a_i = .... - - -
7697 : : S2: a_2 = ..use(a_i).. - - -
7698 : : S3: a_1 = ..use(a_2).. - - -
7699 : : > VS6: va_new = .... - - -
7700 : : S4: a_0 = ..use(a_1).. true S6 VS6
7701 : : '---> S6: a_new = .... - S4 VS6
7702 : : > VS5: ... = ..vuse(va_new).. - - -
7703 : : S5: ... = ..use(a_0).. - - -
7704 : :
7705 : : DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7706 : : elsewhere), and we'll end up with:
7707 : :
7708 : : VS6: va_new = ....
7709 : : VS5: ... = ..vuse(va_new)..
7710 : :
7711 : : In case of more than one pattern statements, e.g., widen-mult with
7712 : : intermediate type:
7713 : :
7714 : : S1 a_t = ;
7715 : : S2 a_T = (TYPE) a_t;
7716 : : '--> S3: a_it = (interm_type) a_t;
7717 : : S4 prod_T = a_T * CONST;
7718 : : '--> S5: prod_T' = a_it w* CONST;
7719 : :
7720 : : there may be other users of a_T outside the pattern. In that case S2 will
7721 : : be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7722 : : and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7723 : : be recorded in S3. */
7724 : :
7725 : : void
7726 : 959009 : vect_pattern_recog (vec_info *vinfo)
7727 : : {
7728 : 959009 : basic_block *bbs = vinfo->bbs;
7729 : 959009 : unsigned int nbbs = vinfo->nbbs;
7730 : :
7731 : 959009 : vect_determine_precisions (vinfo);
7732 : :
7733 : 959009 : DUMP_VECT_SCOPE ("vect_pattern_recog");
7734 : :
7735 : : /* Scan through the stmts in the region, applying the pattern recognition
7736 : : functions starting at each stmt visited. */
7737 : 12459156 : for (unsigned i = 0; i < nbbs; i++)
7738 : : {
7739 : 11500147 : basic_block bb = bbs[i];
7740 : :
7741 : 113260381 : for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7742 : : {
7743 : 90260087 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7744 : :
7745 : 90260087 : if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7746 : 60364015 : continue;
7747 : :
7748 : : /* Scan over all generic vect_recog_xxx_pattern functions. */
7749 : 986570376 : for (const auto &func_ptr : vect_vect_recog_func_ptrs)
7750 : 956674304 : vect_pattern_recog_1 (vinfo, func_ptr,
7751 : : stmt_info);
7752 : : }
7753 : : }
7754 : :
7755 : : /* After this no more add_stmt calls are allowed. */
7756 : 959009 : vinfo->stmt_vec_info_ro = true;
7757 : 959009 : }
7758 : :
7759 : : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7760 : : or internal_fn contained in ch, respectively. */
7761 : : gimple *
7762 : 143394 : vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7763 : : {
7764 : 143394 : gcc_assert (op0 != NULL_TREE);
7765 : 143394 : if (ch.is_tree_code ())
7766 : 143394 : return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7767 : :
7768 : 0 : gcc_assert (ch.is_internal_fn ());
7769 : 0 : gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7770 : : op1 == NULL_TREE ? 1 : 2,
7771 : : op0, op1);
7772 : 0 : gimple_call_set_lhs (stmt, lhs);
7773 : 0 : return stmt;
7774 : : }
|