Branch data Line data Source code
1 : : /* Analysis Utilities for Loop Vectorization.
2 : : Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 : : Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : #include "config.h"
22 : : #include "system.h"
23 : : #include "coretypes.h"
24 : : #include "backend.h"
25 : : #include "rtl.h"
26 : : #include "tree.h"
27 : : #include "gimple.h"
28 : : #include "gimple-iterator.h"
29 : : #include "gimple-fold.h"
30 : : #include "ssa.h"
31 : : #include "expmed.h"
32 : : #include "optabs-tree.h"
33 : : #include "insn-config.h"
34 : : #include "recog.h" /* FIXME: for insn_data */
35 : : #include "fold-const.h"
36 : : #include "stor-layout.h"
37 : : #include "tree-eh.h"
38 : : #include "gimplify.h"
39 : : #include "gimple-iterator.h"
40 : : #include "gimple-fold.h"
41 : : #include "gimplify-me.h"
42 : : #include "cfgloop.h"
43 : : #include "tree-vectorizer.h"
44 : : #include "dumpfile.h"
45 : : #include "builtins.h"
46 : : #include "internal-fn.h"
47 : : #include "case-cfn-macros.h"
48 : : #include "fold-const-call.h"
49 : : #include "attribs.h"
50 : : #include "cgraph.h"
51 : : #include "omp-simd-clone.h"
52 : : #include "predict.h"
53 : : #include "tree-vector-builder.h"
54 : : #include "vec-perm-indices.h"
55 : : #include "gimple-range.h"
56 : :
57 : :
58 : : /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 : : in the first operand. Disentangling this is future work, the
60 : : IL is properly transfered to VEC_COND_EXPRs with separate compares. */
61 : :
62 : :
63 : : /* Return true if we have a useful VR_RANGE range for VAR, storing it
64 : : in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
65 : :
66 : : bool
67 : 9391273 : vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
68 : : {
69 : 9391273 : value_range vr;
70 : 9391273 : tree vr_min, vr_max;
71 : 18782546 : get_range_query (cfun)->range_of_expr (vr, var);
72 : 9391273 : if (vr.undefined_p ())
73 : 33 : vr.set_varying (TREE_TYPE (var));
74 : 9391273 : value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
75 : 9391273 : *min_value = wi::to_wide (vr_min);
76 : 9391273 : *max_value = wi::to_wide (vr_max);
77 : 9391273 : wide_int nonzero = get_nonzero_bits (var);
78 : 9391273 : signop sgn = TYPE_SIGN (TREE_TYPE (var));
79 : 9391273 : if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
80 : : nonzero, sgn) == VR_RANGE)
81 : : {
82 : 4118932 : if (dump_enabled_p ())
83 : : {
84 : 66994 : dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
85 : 66994 : dump_printf (MSG_NOTE, " has range [");
86 : 66994 : dump_hex (MSG_NOTE, *min_value);
87 : 66994 : dump_printf (MSG_NOTE, ", ");
88 : 66994 : dump_hex (MSG_NOTE, *max_value);
89 : 66994 : dump_printf (MSG_NOTE, "]\n");
90 : : }
91 : 4118932 : return true;
92 : : }
93 : : else
94 : : {
95 : 5272341 : if (dump_enabled_p ())
96 : : {
97 : 82766 : dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
98 : 82766 : dump_printf (MSG_NOTE, " has no range info\n");
99 : : }
100 : 5272341 : return false;
101 : : }
102 : 9391273 : }
103 : :
104 : : /* Report that we've found an instance of pattern PATTERN in
105 : : statement STMT. */
106 : :
107 : : static void
108 : 804806 : vect_pattern_detected (const char *name, gimple *stmt)
109 : : {
110 : 804806 : if (dump_enabled_p ())
111 : 23884 : dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
112 : 804806 : }
113 : :
114 : : /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 : : return the pattern statement's stmt_vec_info. Set its vector type to
116 : : VECTYPE if it doesn't have one already. */
117 : :
118 : : static stmt_vec_info
119 : 1437397 : vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
120 : : stmt_vec_info orig_stmt_info, tree vectype)
121 : : {
122 : 1437397 : stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
123 : 1437397 : if (pattern_stmt_info == NULL)
124 : 894474 : pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
125 : 1437397 : gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
126 : :
127 : 1437397 : pattern_stmt_info->pattern_stmt_p = true;
128 : 1437397 : STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
129 : 1437397 : STMT_VINFO_DEF_TYPE (pattern_stmt_info)
130 : 1437397 : = STMT_VINFO_DEF_TYPE (orig_stmt_info);
131 : 1437397 : STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
132 : 1437397 : if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
133 : : {
134 : 1592656 : gcc_assert (!vectype
135 : : || is_a <gcond *> (pattern_stmt)
136 : : || (VECTOR_BOOLEAN_TYPE_P (vectype)
137 : : == vect_use_mask_type_p (orig_stmt_info)));
138 : 903030 : STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
139 : 903030 : pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
140 : : }
141 : 1437397 : return pattern_stmt_info;
142 : : }
143 : :
144 : : /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
145 : : Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
146 : : have one already. */
147 : :
148 : : static void
149 : 637057 : vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
150 : : stmt_vec_info orig_stmt_info, tree vectype)
151 : : {
152 : 637057 : STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
153 : 637057 : STMT_VINFO_RELATED_STMT (orig_stmt_info)
154 : 0 : = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
155 : 609711 : }
156 : :
157 : : /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
158 : : is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
159 : : be different from the vector type of the final pattern statement.
160 : : If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
161 : : from which it was derived. */
162 : :
163 : : static inline void
164 : 764700 : append_pattern_def_seq (vec_info *vinfo,
165 : : stmt_vec_info stmt_info, gimple *new_stmt,
166 : : tree vectype = NULL_TREE,
167 : : tree scalar_type_for_mask = NULL_TREE)
168 : : {
169 : 1298962 : gcc_assert (!scalar_type_for_mask
170 : : == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
171 : 764700 : if (vectype)
172 : : {
173 : 534367 : stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
174 : 534367 : STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
175 : 534367 : if (scalar_type_for_mask)
176 : 230438 : new_stmt_info->mask_precision
177 : 460876 : = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
178 : : }
179 : 764700 : gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
180 : : new_stmt);
181 : 764700 : }
182 : :
183 : : /* The caller wants to perform new operations on vect_external variable
184 : : VAR, so that the result of the operations would also be vect_external.
185 : : Return the edge on which the operations can be performed, if one exists.
186 : : Return null if the operations should instead be treated as part of
187 : : the pattern that needs them. */
188 : :
189 : : static edge
190 : 5089 : vect_get_external_def_edge (vec_info *vinfo, tree var)
191 : : {
192 : 5089 : edge e = NULL;
193 : 5089 : if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
194 : : {
195 : 494 : e = loop_preheader_edge (loop_vinfo->loop);
196 : 494 : if (!SSA_NAME_IS_DEFAULT_DEF (var))
197 : : {
198 : 417 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
199 : 417 : if (bb == NULL
200 : 417 : || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
201 : : e = NULL;
202 : : }
203 : : }
204 : 5089 : return e;
205 : : }
206 : :
207 : : /* Return true if the target supports a vector version of CODE,
208 : : where CODE is known to map to a direct optab with the given SUBTYPE.
209 : : ITYPE specifies the type of (some of) the scalar inputs and OTYPE
210 : : specifies the type of the scalar result.
211 : :
212 : : If CODE allows the inputs and outputs to have different type
213 : : (such as for WIDEN_SUM_EXPR), it is the input mode rather
214 : : than the output mode that determines the appropriate target pattern.
215 : : Operand 0 of the target pattern then specifies the mode that the output
216 : : must have.
217 : :
218 : : When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
219 : : Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
220 : : is nonnull. */
221 : :
222 : : static bool
223 : 2501 : vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
224 : : tree itype, tree *vecotype_out,
225 : : tree *vecitype_out = NULL,
226 : : enum optab_subtype subtype = optab_default)
227 : : {
228 : 2501 : tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
229 : 2501 : if (!vecitype)
230 : : return false;
231 : :
232 : 2501 : tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
233 : 2501 : if (!vecotype)
234 : : return false;
235 : :
236 : 2297 : optab optab = optab_for_tree_code (code, vecitype, subtype);
237 : 2297 : if (!optab)
238 : : return false;
239 : :
240 : 2297 : insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
241 : 2297 : if (icode == CODE_FOR_nothing
242 : 2297 : || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
243 : 1914 : return false;
244 : :
245 : 383 : *vecotype_out = vecotype;
246 : 383 : if (vecitype_out)
247 : 383 : *vecitype_out = vecitype;
248 : : return true;
249 : : }
250 : :
251 : : /* Round bit precision PRECISION up to a full element. */
252 : :
253 : : static unsigned int
254 : 3071298 : vect_element_precision (unsigned int precision)
255 : : {
256 : 0 : precision = 1 << ceil_log2 (precision);
257 : 3071298 : return MAX (precision, BITS_PER_UNIT);
258 : : }
259 : :
260 : : /* If OP is defined by a statement that's being considered for vectorization,
261 : : return information about that statement, otherwise return NULL. */
262 : :
263 : : static stmt_vec_info
264 : 1267854 : vect_get_internal_def (vec_info *vinfo, tree op)
265 : : {
266 : 1267854 : stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
267 : 1267854 : if (def_stmt_info
268 : 1241223 : && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
269 : 1232450 : return def_stmt_info;
270 : : return NULL;
271 : : }
272 : :
273 : : /* Check whether NAME, an ssa-name used in STMT_VINFO,
274 : : is a result of a type promotion, such that:
275 : : DEF_STMT: NAME = NOP (name0)
276 : : If CHECK_SIGN is TRUE, check that either both types are signed or both are
277 : : unsigned. */
278 : :
279 : : static bool
280 : 0 : type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
281 : : tree *orig_type, gimple **def_stmt, bool *promotion)
282 : : {
283 : 0 : tree type = TREE_TYPE (name);
284 : 0 : tree oprnd0;
285 : 0 : enum vect_def_type dt;
286 : :
287 : 0 : stmt_vec_info def_stmt_info;
288 : 0 : if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
289 : : return false;
290 : :
291 : 0 : if (dt != vect_internal_def
292 : 0 : && dt != vect_external_def && dt != vect_constant_def)
293 : : return false;
294 : :
295 : 0 : if (!*def_stmt)
296 : : return false;
297 : :
298 : 0 : if (!is_gimple_assign (*def_stmt))
299 : : return false;
300 : :
301 : 0 : if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
302 : : return false;
303 : :
304 : 0 : oprnd0 = gimple_assign_rhs1 (*def_stmt);
305 : :
306 : 0 : *orig_type = TREE_TYPE (oprnd0);
307 : 0 : if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
308 : 0 : || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
309 : : return false;
310 : :
311 : 0 : if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
312 : 0 : *promotion = true;
313 : : else
314 : 0 : *promotion = false;
315 : :
316 : 0 : if (!vect_is_simple_use (oprnd0, vinfo, &dt))
317 : : return false;
318 : :
319 : : return true;
320 : : }
321 : :
322 : : /* Holds information about an input operand after some sign changes
323 : : and type promotions have been peeled away. */
324 : : class vect_unpromoted_value {
325 : : public:
326 : : vect_unpromoted_value ();
327 : :
328 : : void set_op (tree, vect_def_type, stmt_vec_info = NULL);
329 : :
330 : : /* The value obtained after peeling away zero or more casts. */
331 : : tree op;
332 : :
333 : : /* The type of OP. */
334 : : tree type;
335 : :
336 : : /* The definition type of OP. */
337 : : vect_def_type dt;
338 : :
339 : : /* If OP is the result of peeling at least one cast, and if the cast
340 : : of OP itself is a vectorizable statement, CASTER identifies that
341 : : statement, otherwise it is null. */
342 : : stmt_vec_info caster;
343 : : };
344 : :
345 : 236957787 : inline vect_unpromoted_value::vect_unpromoted_value ()
346 : 236957787 : : op (NULL_TREE),
347 : 236957787 : type (NULL_TREE),
348 : 236957787 : dt (vect_uninitialized_def),
349 : 1887073 : caster (NULL)
350 : : {
351 : 3075 : }
352 : :
353 : : /* Set the operand to OP_IN, its definition type to DT_IN, and the
354 : : statement that casts it to CASTER_IN. */
355 : :
356 : : inline void
357 : 8063812 : vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
358 : : stmt_vec_info caster_in)
359 : : {
360 : 8063812 : op = op_in;
361 : 8063812 : type = TREE_TYPE (op);
362 : 8063812 : dt = dt_in;
363 : 8063812 : caster = caster_in;
364 : 8063812 : }
365 : :
366 : : /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
367 : : to reach some vectorizable inner operand OP', continuing as long as it
368 : : is possible to convert OP' back to OP using a possible sign change
369 : : followed by a possible promotion P. Return this OP', or null if OP is
370 : : not a vectorizable SSA name. If there is a promotion P, describe its
371 : : input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
372 : : is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
373 : : have more than one user.
374 : :
375 : : A successful return means that it is possible to go from OP' to OP
376 : : via UNPROM. The cast from OP' to UNPROM is at most a sign change,
377 : : whereas the cast from UNPROM to OP might be a promotion, a sign
378 : : change, or a nop.
379 : :
380 : : E.g. say we have:
381 : :
382 : : signed short *ptr = ...;
383 : : signed short C = *ptr;
384 : : unsigned short B = (unsigned short) C; // sign change
385 : : signed int A = (signed int) B; // unsigned promotion
386 : : ...possible other uses of A...
387 : : unsigned int OP = (unsigned int) A; // sign change
388 : :
389 : : In this case it's possible to go directly from C to OP using:
390 : :
391 : : OP = (unsigned int) (unsigned short) C;
392 : : +------------+ +--------------+
393 : : promotion sign change
394 : :
395 : : so OP' would be C. The input to the promotion is B, so UNPROM
396 : : would describe B. */
397 : :
398 : : static tree
399 : 6043509 : vect_look_through_possible_promotion (vec_info *vinfo, tree op,
400 : : vect_unpromoted_value *unprom,
401 : : bool *single_use_p = NULL)
402 : : {
403 : 6043509 : tree op_type = TREE_TYPE (op);
404 : 6043509 : if (!INTEGRAL_TYPE_P (op_type))
405 : : return NULL_TREE;
406 : :
407 : 6008649 : tree res = NULL_TREE;
408 : 6008649 : unsigned int orig_precision = TYPE_PRECISION (op_type);
409 : 6008649 : unsigned int min_precision = orig_precision;
410 : 6008649 : stmt_vec_info caster = NULL;
411 : 7298567 : while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
412 : : {
413 : : /* See whether OP is simple enough to vectorize. */
414 : 7135738 : stmt_vec_info def_stmt_info;
415 : 7135738 : gimple *def_stmt;
416 : 7135738 : vect_def_type dt;
417 : 7135738 : if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
418 : : break;
419 : :
420 : : /* If OP is the input of a demotion, skip over it to see whether
421 : : OP is itself the result of a promotion. If so, the combined
422 : : effect of the promotion and the demotion might fit the required
423 : : pattern, otherwise neither operation fits.
424 : :
425 : : This copes with cases such as the result of an arithmetic
426 : : operation being truncated before being stored, and where that
427 : : arithmetic operation has been recognized as an over-widened one. */
428 : 7130865 : if (TYPE_PRECISION (op_type) <= min_precision)
429 : : {
430 : : /* Use OP as the UNPROM described above if we haven't yet
431 : : found a promotion, or if using the new input preserves the
432 : : sign of the previous promotion. */
433 : 7012063 : if (!res
434 : 1092263 : || TYPE_PRECISION (unprom->type) == orig_precision
435 : 7046155 : || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
436 : : {
437 : 6981491 : unprom->set_op (op, dt, caster);
438 : 6981491 : min_precision = TYPE_PRECISION (op_type);
439 : : }
440 : : /* Stop if we've already seen a promotion and if this
441 : : conversion does more than change the sign. */
442 : 30572 : else if (TYPE_PRECISION (op_type)
443 : 30572 : != TYPE_PRECISION (unprom->type))
444 : : break;
445 : :
446 : : /* The sequence now extends to OP. */
447 : : res = op;
448 : : }
449 : :
450 : : /* See whether OP is defined by a cast. Record it as CASTER if
451 : : the cast is potentially vectorizable. */
452 : 7130229 : if (!def_stmt)
453 : : break;
454 : 6962012 : caster = def_stmt_info;
455 : :
456 : : /* Ignore pattern statements, since we don't link uses for them. */
457 : 6962012 : if (caster
458 : 6962012 : && single_use_p
459 : 1201747 : && !STMT_VINFO_RELATED_STMT (caster)
460 : 8043923 : && !has_single_use (res))
461 : 712425 : *single_use_p = false;
462 : :
463 : 12807832 : gassign *assign = dyn_cast <gassign *> (def_stmt);
464 : 4550444 : if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
465 : : break;
466 : :
467 : : /* Continue with the input to the cast. */
468 : 1289918 : op = gimple_assign_rhs1 (def_stmt);
469 : 1289918 : op_type = TREE_TYPE (op);
470 : : }
471 : : return res;
472 : : }
473 : :
474 : : /* OP is an integer operand to an operation that returns TYPE, and we
475 : : want to treat the operation as a widening one. So far we can treat
476 : : it as widening from *COMMON_TYPE.
477 : :
478 : : Return true if OP is suitable for such a widening operation,
479 : : either widening from *COMMON_TYPE or from some supertype of it.
480 : : Update *COMMON_TYPE to the supertype in the latter case.
481 : :
482 : : SHIFT_P is true if OP is a shift amount. */
483 : :
484 : : static bool
485 : 237790 : vect_joust_widened_integer (tree type, bool shift_p, tree op,
486 : : tree *common_type)
487 : : {
488 : : /* Calculate the minimum precision required by OP, without changing
489 : : the sign of either operand. */
490 : 237790 : unsigned int precision;
491 : 237790 : if (shift_p)
492 : : {
493 : 11555 : if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
494 : : return false;
495 : 9260 : precision = TREE_INT_CST_LOW (op);
496 : : }
497 : : else
498 : : {
499 : 226235 : precision = wi::min_precision (wi::to_widest (op),
500 : 226235 : TYPE_SIGN (*common_type));
501 : 226235 : if (precision * 2 > TYPE_PRECISION (type))
502 : : return false;
503 : : }
504 : :
505 : : /* If OP requires a wider type, switch to that type. The checks
506 : : above ensure that this is still narrower than the result. */
507 : 226129 : precision = vect_element_precision (precision);
508 : 226129 : if (TYPE_PRECISION (*common_type) < precision)
509 : 5083 : *common_type = build_nonstandard_integer_type
510 : 5083 : (precision, TYPE_UNSIGNED (*common_type));
511 : : return true;
512 : : }
513 : :
514 : : /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
515 : : is narrower than type, storing the supertype in *COMMON_TYPE if so. */
516 : :
517 : : static bool
518 : 35885 : vect_joust_widened_type (tree type, tree new_type, tree *common_type)
519 : : {
520 : 35885 : if (types_compatible_p (*common_type, new_type))
521 : : return true;
522 : :
523 : : /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
524 : 6436 : if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
525 : 6436 : && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
526 : : return true;
527 : :
528 : : /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
529 : 5791 : if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
530 : 5791 : && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
531 : : {
532 : 362 : *common_type = new_type;
533 : 362 : return true;
534 : : }
535 : :
536 : : /* We have mismatched signs, with the signed type being
537 : : no wider than the unsigned type. In this case we need
538 : : a wider signed type. */
539 : 5429 : unsigned int precision = MAX (TYPE_PRECISION (*common_type),
540 : : TYPE_PRECISION (new_type));
541 : 5429 : precision *= 2;
542 : :
543 : 5429 : if (precision * 2 > TYPE_PRECISION (type))
544 : : return false;
545 : :
546 : 17 : *common_type = build_nonstandard_integer_type (precision, false);
547 : 17 : return true;
548 : : }
549 : :
550 : : /* Check whether STMT_INFO can be viewed as a tree of integer operations
551 : : in which each node either performs CODE or WIDENED_CODE, and where
552 : : each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
553 : : specifies the maximum number of leaf operands. SHIFT_P says whether
554 : : CODE and WIDENED_CODE are some sort of shift.
555 : :
556 : : If STMT_INFO is such a tree, return the number of leaf operands
557 : : and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
558 : : to a type that (a) is narrower than the result of STMT_INFO and
559 : : (b) can hold all leaf operand values.
560 : :
561 : : If SUBTYPE then allow that the signs of the operands
562 : : may differ in signs but not in precision. SUBTYPE is updated to reflect
563 : : this.
564 : :
565 : : Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
566 : : exists. */
567 : :
568 : : static unsigned int
569 : 100120848 : vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
570 : : code_helper widened_code, bool shift_p,
571 : : unsigned int max_nops,
572 : : vect_unpromoted_value *unprom, tree *common_type,
573 : : enum optab_subtype *subtype = NULL)
574 : : {
575 : : /* Check for an integer operation with the right code. */
576 : 100120848 : gimple* stmt = stmt_info->stmt;
577 : 100120848 : if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
578 : : return 0;
579 : :
580 : 81582515 : code_helper rhs_code;
581 : 81582515 : if (is_gimple_assign (stmt))
582 : 69272546 : rhs_code = gimple_assign_rhs_code (stmt);
583 : 12309969 : else if (is_gimple_call (stmt))
584 : 12309969 : rhs_code = gimple_call_combined_fn (stmt);
585 : : else
586 : : return 0;
587 : :
588 : 81582515 : if (rhs_code != code
589 : 81582515 : && rhs_code != widened_code)
590 : : return 0;
591 : :
592 : 4789965 : tree lhs = gimple_get_lhs (stmt);
593 : 4789965 : tree type = TREE_TYPE (lhs);
594 : 4789965 : if (!INTEGRAL_TYPE_P (type))
595 : : return 0;
596 : :
597 : : /* Assume that both operands will be leaf operands. */
598 : 4308003 : max_nops -= 2;
599 : :
600 : : /* Check the operands. */
601 : 4308003 : unsigned int next_op = 0;
602 : 4912287 : for (unsigned int i = 0; i < 2; ++i)
603 : : {
604 : 4655361 : vect_unpromoted_value *this_unprom = &unprom[next_op];
605 : 4655361 : unsigned int nops = 1;
606 : 4655361 : tree op = gimple_arg (stmt, i);
607 : 4655361 : if (i == 1 && TREE_CODE (op) == INTEGER_CST)
608 : : {
609 : : /* We already have a common type from earlier operands.
610 : : Update it to account for OP. */
611 : 237790 : this_unprom->set_op (op, vect_constant_def);
612 : 237790 : if (!vect_joust_widened_integer (type, shift_p, op, common_type))
613 : : return 0;
614 : : }
615 : : else
616 : : {
617 : : /* Only allow shifts by constants. */
618 : 4417571 : if (shift_p && i == 1)
619 : : return 0;
620 : :
621 : 4409226 : if (rhs_code != code)
622 : : {
623 : : /* If rhs_code is widened_code, don't look through further
624 : : possible promotions, there is a promotion already embedded
625 : : in the WIDEN_*_EXPR. */
626 : 130 : if (TREE_CODE (op) != SSA_NAME
627 : 130 : || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
628 : 0 : return 0;
629 : :
630 : 130 : stmt_vec_info def_stmt_info;
631 : 130 : gimple *def_stmt;
632 : 130 : vect_def_type dt;
633 : 130 : if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
634 : : &def_stmt))
635 : : return 0;
636 : 130 : this_unprom->set_op (op, dt, NULL);
637 : : }
638 : 4409096 : else if (!vect_look_through_possible_promotion (vinfo, op,
639 : : this_unprom))
640 : : return 0;
641 : :
642 : 4322117 : if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
643 : : {
644 : : /* The operand isn't widened. If STMT_INFO has the code
645 : : for an unwidened operation, recursively check whether
646 : : this operand is a node of the tree. */
647 : 3934703 : if (rhs_code != code
648 : 3934703 : || max_nops == 0
649 : 3935126 : || this_unprom->dt != vect_internal_def)
650 : : return 0;
651 : :
652 : : /* Give back the leaf slot allocated above now that we're
653 : : not treating this as a leaf operand. */
654 : 423 : max_nops += 1;
655 : :
656 : : /* Recursively process the definition of the operand. */
657 : 423 : stmt_vec_info def_stmt_info
658 : 423 : = vinfo->lookup_def (this_unprom->op);
659 : 423 : nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
660 : : widened_code, shift_p, max_nops,
661 : : this_unprom, common_type,
662 : : subtype);
663 : 423 : if (nops == 0)
664 : : return 0;
665 : :
666 : 287 : max_nops -= nops;
667 : : }
668 : : else
669 : : {
670 : : /* Make sure that the operand is narrower than the result. */
671 : 387414 : if (TYPE_PRECISION (this_unprom->type) * 2
672 : 387414 : > TYPE_PRECISION (type))
673 : : return 0;
674 : :
675 : : /* Update COMMON_TYPE for the new operand. */
676 : 383112 : if (i == 0)
677 : 347227 : *common_type = this_unprom->type;
678 : 35885 : else if (!vect_joust_widened_type (type, this_unprom->type,
679 : : common_type))
680 : : {
681 : 5412 : if (subtype)
682 : : {
683 : : /* See if we can sign extend the smaller type. */
684 : 168 : if (TYPE_PRECISION (this_unprom->type)
685 : 168 : > TYPE_PRECISION (*common_type))
686 : 36 : *common_type = this_unprom->type;
687 : 168 : *subtype = optab_vector_mixed_sign;
688 : : }
689 : : else
690 : : return 0;
691 : : }
692 : : }
693 : : }
694 : 604284 : next_op += nops;
695 : : }
696 : : return next_op;
697 : : }
698 : :
699 : : /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
700 : : is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
701 : :
702 : : static tree
703 : 1235162 : vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
704 : : {
705 : 148 : return make_temp_ssa_name (type, stmt, "patt");
706 : : }
707 : :
708 : : /* STMT2_INFO describes a type conversion that could be split into STMT1
709 : : followed by a version of STMT2_INFO that takes NEW_RHS as its first
710 : : input. Try to do this using pattern statements, returning true on
711 : : success. */
712 : :
713 : : static bool
714 : 28169 : vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
715 : : gimple *stmt1, tree vectype)
716 : : {
717 : 28169 : if (is_pattern_stmt_p (stmt2_info))
718 : : {
719 : : /* STMT2_INFO is part of a pattern. Get the statement to which
720 : : the pattern is attached. */
721 : 823 : stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
722 : 823 : vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
723 : :
724 : 823 : if (dump_enabled_p ())
725 : 24 : dump_printf_loc (MSG_NOTE, vect_location,
726 : : "Splitting pattern statement: %G", stmt2_info->stmt);
727 : :
728 : : /* Since STMT2_INFO is a pattern statement, we can change it
729 : : in-situ without worrying about changing the code for the
730 : : containing block. */
731 : 823 : gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
732 : :
733 : 823 : if (dump_enabled_p ())
734 : : {
735 : 24 : dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
736 : 24 : dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
737 : : stmt2_info->stmt);
738 : : }
739 : :
740 : 823 : gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
741 : 823 : if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
742 : : /* STMT2_INFO is the actual pattern statement. Add STMT1
743 : : to the end of the definition sequence. */
744 : 823 : gimple_seq_add_stmt_without_update (def_seq, stmt1);
745 : : else
746 : : {
747 : : /* STMT2_INFO belongs to the definition sequence. Insert STMT1
748 : : before it. */
749 : 0 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
750 : 0 : gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
751 : : }
752 : 823 : return true;
753 : : }
754 : : else
755 : : {
756 : : /* STMT2_INFO doesn't yet have a pattern. Try to create a
757 : : two-statement pattern now. */
758 : 27346 : gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
759 : 27346 : tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
760 : 27346 : tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
761 : 27346 : if (!lhs_vectype)
762 : : return false;
763 : :
764 : 27346 : if (dump_enabled_p ())
765 : 2142 : dump_printf_loc (MSG_NOTE, vect_location,
766 : : "Splitting statement: %G", stmt2_info->stmt);
767 : :
768 : : /* Add STMT1 as a singleton pattern definition sequence. */
769 : 27346 : gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
770 : 27346 : vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
771 : 27346 : gimple_seq_add_stmt_without_update (def_seq, stmt1);
772 : :
773 : : /* Build the second of the two pattern statements. */
774 : 27346 : tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
775 : 27346 : gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
776 : 27346 : vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
777 : :
778 : 27346 : if (dump_enabled_p ())
779 : : {
780 : 2142 : dump_printf_loc (MSG_NOTE, vect_location,
781 : : "into pattern statements: %G", stmt1);
782 : 2142 : dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
783 : : (gimple *) new_stmt2);
784 : : }
785 : :
786 : 27346 : return true;
787 : : }
788 : : }
789 : :
790 : : /* Look for the following pattern
791 : : X = x[i]
792 : : Y = y[i]
793 : : DIFF = X - Y
794 : : DAD = ABS_EXPR<DIFF>
795 : :
796 : : ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
797 : : HALF_TYPE and UNPROM will be set should the statement be found to
798 : : be a widened operation.
799 : : DIFF_STMT will be set to the MINUS_EXPR
800 : : statement that precedes the ABS_STMT unless vect_widened_op_tree
801 : : succeeds.
802 : : */
803 : : static bool
804 : 17121825 : vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
805 : : tree *half_type,
806 : : vect_unpromoted_value unprom[2],
807 : : gassign **diff_stmt)
808 : : {
809 : 17121825 : if (!abs_stmt)
810 : : return false;
811 : :
812 : : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
813 : : inside the loop (in case we are analyzing an outer-loop). */
814 : 17121825 : enum tree_code code = gimple_assign_rhs_code (abs_stmt);
815 : 17121825 : if (code != ABS_EXPR && code != ABSU_EXPR)
816 : : return false;
817 : :
818 : 19828 : tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
819 : 19828 : tree abs_type = TREE_TYPE (abs_oprnd);
820 : 19828 : if (!abs_oprnd)
821 : : return false;
822 : 14964 : if (!ANY_INTEGRAL_TYPE_P (abs_type)
823 : 5074 : || TYPE_OVERFLOW_WRAPS (abs_type)
824 : 24777 : || TYPE_UNSIGNED (abs_type))
825 : : return false;
826 : :
827 : : /* Peel off conversions from the ABS input. This can involve sign
828 : : changes (e.g. from an unsigned subtraction to a signed ABS input)
829 : : or signed promotion, but it can't include unsigned promotion.
830 : : (Note that ABS of an unsigned promotion should have been folded
831 : : away before now anyway.) */
832 : 4949 : vect_unpromoted_value unprom_diff;
833 : 4949 : abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
834 : : &unprom_diff);
835 : 4949 : if (!abs_oprnd)
836 : : return false;
837 : 4739 : if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
838 : 4739 : && TYPE_UNSIGNED (unprom_diff.type))
839 : : return false;
840 : :
841 : : /* We then detect if the operand of abs_expr is defined by a minus_expr. */
842 : 4739 : stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
843 : 4739 : if (!diff_stmt_vinfo)
844 : : return false;
845 : :
846 : : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
847 : : inside the loop (in case we are analyzing an outer-loop). */
848 : 4454 : if (vect_widened_op_tree (vinfo, diff_stmt_vinfo,
849 : : MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
850 : : false, 2, unprom, half_type))
851 : : return true;
852 : :
853 : : /* Failed to find a widen operation so we check for a regular MINUS_EXPR. */
854 : 3249 : gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
855 : 3249 : if (diff_stmt && diff
856 : 2610 : && gimple_assign_rhs_code (diff) == MINUS_EXPR
857 : 4114 : && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
858 : : {
859 : 745 : *diff_stmt = diff;
860 : 745 : *half_type = NULL_TREE;
861 : 745 : return true;
862 : : }
863 : :
864 : : return false;
865 : : }
866 : :
867 : : /* Convert UNPROM to TYPE and return the result, adding new statements
868 : : to STMT_INFO's pattern definition statements if no better way is
869 : : available. VECTYPE is the vector form of TYPE.
870 : :
871 : : If SUBTYPE then convert the type based on the subtype. */
872 : :
873 : : static tree
874 : 374873 : vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
875 : : vect_unpromoted_value *unprom, tree vectype,
876 : : enum optab_subtype subtype = optab_default)
877 : : {
878 : : /* Update the type if the signs differ. */
879 : 374873 : if (subtype == optab_vector_mixed_sign)
880 : : {
881 : 96 : gcc_assert (!TYPE_UNSIGNED (type));
882 : 96 : if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
883 : : {
884 : 48 : type = unsigned_type_for (type);
885 : 48 : vectype = unsigned_type_for (vectype);
886 : : }
887 : : }
888 : :
889 : : /* Check for a no-op conversion. */
890 : 374873 : if (types_compatible_p (type, TREE_TYPE (unprom->op)))
891 : 126732 : return unprom->op;
892 : :
893 : : /* Allow the caller to create constant vect_unpromoted_values. */
894 : 248141 : if (TREE_CODE (unprom->op) == INTEGER_CST)
895 : 148325 : return wide_int_to_tree (type, wi::to_widest (unprom->op));
896 : :
897 : 99816 : tree input = unprom->op;
898 : 99816 : if (unprom->caster)
899 : : {
900 : 48615 : tree lhs = gimple_get_lhs (unprom->caster->stmt);
901 : 48615 : tree lhs_type = TREE_TYPE (lhs);
902 : :
903 : : /* If the result of the existing cast is the right width, use it
904 : : instead of the source of the cast. */
905 : 48615 : if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
906 : : input = lhs;
907 : : /* If the precision we want is between the source and result
908 : : precisions of the existing cast, try splitting the cast into
909 : : two and tapping into a mid-way point. */
910 : 46924 : else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
911 : 46924 : && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
912 : : {
913 : : /* In order to preserve the semantics of the original cast,
914 : : give the mid-way point the same signedness as the input value.
915 : :
916 : : It would be possible to use a signed type here instead if
917 : : TYPE is signed and UNPROM->TYPE is unsigned, but that would
918 : : make the sign of the midtype sensitive to the order in
919 : : which we process the statements, since the signedness of
920 : : TYPE is the signedness required by just one of possibly
921 : : many users. Also, unsigned promotions are usually as cheap
922 : : as or cheaper than signed ones, so it's better to keep an
923 : : unsigned promotion. */
924 : 28169 : tree midtype = build_nonstandard_integer_type
925 : 28169 : (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
926 : 28169 : tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
927 : 28169 : if (vec_midtype)
928 : : {
929 : 28169 : input = vect_recog_temp_ssa_var (midtype, NULL);
930 : 28169 : gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
931 : : unprom->op);
932 : 28169 : if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
933 : : vec_midtype))
934 : 0 : append_pattern_def_seq (vinfo, stmt_info,
935 : : new_stmt, vec_midtype);
936 : : }
937 : : }
938 : :
939 : : /* See if we can reuse an existing result. */
940 : 48615 : if (types_compatible_p (type, TREE_TYPE (input)))
941 : : return input;
942 : : }
943 : :
944 : : /* We need a new conversion statement. */
945 : 78483 : tree new_op = vect_recog_temp_ssa_var (type, NULL);
946 : 78483 : gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
947 : :
948 : : /* If OP is an external value, see if we can insert the new statement
949 : : on an incoming edge. */
950 : 78483 : if (input == unprom->op && unprom->dt == vect_external_def)
951 : 5076 : if (edge e = vect_get_external_def_edge (vinfo, input))
952 : : {
953 : 481 : basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
954 : 481 : gcc_assert (!new_bb);
955 : : return new_op;
956 : : }
957 : :
958 : : /* As a (common) last resort, add the statement to the pattern itself. */
959 : 78002 : append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
960 : 78002 : return new_op;
961 : : }
962 : :
963 : : /* Invoke vect_convert_input for N elements of UNPROM and store the
964 : : result in the corresponding elements of RESULT.
965 : :
966 : : If SUBTYPE then convert the type based on the subtype. */
967 : :
968 : : static void
969 : 190428 : vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
970 : : tree *result, tree type, vect_unpromoted_value *unprom,
971 : : tree vectype, enum optab_subtype subtype = optab_default)
972 : : {
973 : 564837 : for (unsigned int i = 0; i < n; ++i)
974 : : {
975 : : unsigned int j;
976 : 558204 : for (j = 0; j < i; ++j)
977 : 183981 : if (unprom[j].op == unprom[i].op)
978 : : break;
979 : :
980 : 374409 : if (j < i)
981 : 186 : result[i] = result[j];
982 : : else
983 : 374223 : result[i] = vect_convert_input (vinfo, stmt_info,
984 : 374223 : type, &unprom[i], vectype, subtype);
985 : : }
986 : 190428 : }
987 : :
988 : : /* The caller has created a (possibly empty) sequence of pattern definition
989 : : statements followed by a single statement PATTERN_STMT. Cast the result
990 : : of this final statement to TYPE. If a new statement is needed, add
991 : : PATTERN_STMT to the end of STMT_INFO's pattern definition statements
992 : : and return the new statement, otherwise return PATTERN_STMT as-is.
993 : : VECITYPE is the vector form of PATTERN_STMT's result type. */
994 : :
995 : : static gimple *
996 : 214526 : vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
997 : : gimple *pattern_stmt, tree vecitype)
998 : : {
999 : 214526 : tree lhs = gimple_get_lhs (pattern_stmt);
1000 : 214526 : if (!types_compatible_p (type, TREE_TYPE (lhs)))
1001 : : {
1002 : 195139 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
1003 : 195139 : tree cast_var = vect_recog_temp_ssa_var (type, NULL);
1004 : 195139 : pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
1005 : : }
1006 : 214526 : return pattern_stmt;
1007 : : }
1008 : :
1009 : : /* Return true if STMT_VINFO describes a reduction for which reassociation
1010 : : is allowed. If STMT_INFO is part of a group, assume that it's part of
1011 : : a reduction chain and optimistically assume that all statements
1012 : : except the last allow reassociation.
1013 : : Also require it to have code CODE and to be a reduction
1014 : : in the outermost loop. When returning true, store the operands in
1015 : : *OP0_OUT and *OP1_OUT. */
1016 : :
1017 : : static bool
1018 : 74886061 : vect_reassociating_reduction_p (vec_info *vinfo,
1019 : : stmt_vec_info stmt_info, tree_code code,
1020 : : tree *op0_out, tree *op1_out)
1021 : : {
1022 : 74886061 : loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
1023 : 8658256 : if (!loop_info)
1024 : : return false;
1025 : :
1026 : 8658256 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
1027 : 9419749 : if (!assign || gimple_assign_rhs_code (assign) != code)
1028 : : return false;
1029 : :
1030 : : /* We don't allow changing the order of the computation in the inner-loop
1031 : : when doing outer-loop vectorization. */
1032 : 1864365 : class loop *loop = LOOP_VINFO_LOOP (loop_info);
1033 : 76649518 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
1034 : : return false;
1035 : :
1036 : 1821081 : if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1037 : : {
1038 : 102853 : if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1039 : : code))
1040 : : return false;
1041 : : }
1042 : 1718228 : else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1043 : : return false;
1044 : :
1045 : 100908 : *op0_out = gimple_assign_rhs1 (assign);
1046 : 100908 : *op1_out = gimple_assign_rhs2 (assign);
1047 : 100908 : if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1048 : 38417 : std::swap (*op0_out, *op1_out);
1049 : : return true;
1050 : : }
1051 : :
1052 : : /* match.pd function to match
1053 : : (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1054 : : with conditions:
1055 : : 1) @1, @2, c, d, a, b are all integral type.
1056 : : 2) There's single_use for both @1 and @2.
1057 : : 3) a, c have same precision.
1058 : : 4) c and @1 have different precision.
1059 : : 5) c, d are the same type or they can differ in sign when convert is
1060 : : truncation.
1061 : :
1062 : : record a and c and d and @3. */
1063 : :
1064 : : extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1065 : :
1066 : : /* Function vect_recog_cond_expr_convert
1067 : :
1068 : : Try to find the following pattern:
1069 : :
1070 : : TYPE_AB A,B;
1071 : : TYPE_CD C,D;
1072 : : TYPE_E E;
1073 : : TYPE_E op_true = (TYPE_E) A;
1074 : : TYPE_E op_false = (TYPE_E) B;
1075 : :
1076 : : E = C cmp D ? op_true : op_false;
1077 : :
1078 : : where
1079 : : TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1080 : : TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1081 : : single_use of op_true and op_false.
1082 : : TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1083 : :
1084 : : Input:
1085 : :
1086 : : * STMT_VINFO: The stmt from which the pattern search begins.
1087 : : here it starts with E = c cmp D ? op_true : op_false;
1088 : :
1089 : : Output:
1090 : :
1091 : : TYPE1 E' = C cmp D ? A : B;
1092 : : TYPE3 E = (TYPE3) E';
1093 : :
1094 : : There may extra nop_convert for A or B to handle different signness.
1095 : :
1096 : : * TYPE_OUT: The vector type of the output of this pattern.
1097 : :
1098 : : * Return value: A new stmt that will be used to replace the sequence of
1099 : : stmts that constitute the pattern. In this case it will be:
1100 : : E = (TYPE3)E';
1101 : : E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1102 : :
1103 : : static gimple *
1104 : 25017179 : vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1105 : : stmt_vec_info stmt_vinfo, tree *type_out)
1106 : : {
1107 : 42221761 : gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1108 : 17204625 : tree lhs, match[4], temp, type, new_lhs, op2;
1109 : 17204625 : gimple *cond_stmt;
1110 : 17204625 : gimple *pattern_stmt;
1111 : :
1112 : 17204625 : if (!last_stmt)
1113 : : return NULL;
1114 : :
1115 : 17204625 : lhs = gimple_assign_lhs (last_stmt);
1116 : :
1117 : : /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1118 : : TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1119 : 17204625 : if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1120 : : return NULL;
1121 : :
1122 : 43 : vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1123 : :
1124 : 43 : op2 = match[2];
1125 : 43 : type = TREE_TYPE (match[1]);
1126 : 43 : if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1127 : : {
1128 : 3 : op2 = vect_recog_temp_ssa_var (type, NULL);
1129 : 3 : gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1130 : 3 : append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1131 : : get_vectype_for_scalar_type (vinfo, type));
1132 : : }
1133 : :
1134 : 43 : temp = vect_recog_temp_ssa_var (type, NULL);
1135 : 43 : cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1136 : : match[1], op2));
1137 : 43 : append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1138 : : get_vectype_for_scalar_type (vinfo, type));
1139 : 43 : new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1140 : 43 : pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1141 : 43 : *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1142 : :
1143 : 43 : if (dump_enabled_p ())
1144 : 0 : dump_printf_loc (MSG_NOTE, vect_location,
1145 : : "created pattern stmt: %G", pattern_stmt);
1146 : : return pattern_stmt;
1147 : : }
1148 : :
1149 : : /* Function vect_recog_dot_prod_pattern
1150 : :
1151 : : Try to find the following pattern:
1152 : :
1153 : : type1a x_t
1154 : : type1b y_t;
1155 : : TYPE1 prod;
1156 : : TYPE2 sum = init;
1157 : : loop:
1158 : : sum_0 = phi <init, sum_1>
1159 : : S1 x_t = ...
1160 : : S2 y_t = ...
1161 : : S3 x_T = (TYPE1) x_t;
1162 : : S4 y_T = (TYPE1) y_t;
1163 : : S5 prod = x_T * y_T;
1164 : : [S6 prod = (TYPE2) prod; #optional]
1165 : : S7 sum_1 = prod + sum_0;
1166 : :
1167 : : where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1168 : : the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1169 : : 'type1a' and 'type1b' can differ.
1170 : :
1171 : : Input:
1172 : :
1173 : : * STMT_VINFO: The stmt from which the pattern search begins. In the
1174 : : example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1175 : : will be detected.
1176 : :
1177 : : Output:
1178 : :
1179 : : * TYPE_OUT: The type of the output of this pattern.
1180 : :
1181 : : * Return value: A new stmt that will be used to replace the sequence of
1182 : : stmts that constitute the pattern. In this case it will be:
1183 : : WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1184 : :
1185 : : Note: The dot-prod idiom is a widening reduction pattern that is
1186 : : vectorized without preserving all the intermediate results. It
1187 : : produces only N/2 (widened) results (by summing up pairs of
1188 : : intermediate results) rather than all N results. Therefore, we
1189 : : cannot allow this pattern when we want to get all the results and in
1190 : : the correct order (as is the case when this computation is in an
1191 : : inner-loop nested in an outer-loop that us being vectorized). */
1192 : :
1193 : : static gimple *
1194 : 24962196 : vect_recog_dot_prod_pattern (vec_info *vinfo,
1195 : : stmt_vec_info stmt_vinfo, tree *type_out)
1196 : : {
1197 : 24962196 : tree oprnd0, oprnd1;
1198 : 24962196 : gimple *last_stmt = stmt_vinfo->stmt;
1199 : 24962196 : tree type, half_type;
1200 : 24962196 : gimple *pattern_stmt;
1201 : 24962196 : tree var;
1202 : :
1203 : : /* Look for the following pattern
1204 : : DX = (TYPE1) X;
1205 : : DY = (TYPE1) Y;
1206 : : DPROD = DX * DY;
1207 : : DDPROD = (TYPE2) DPROD;
1208 : : sum_1 = DDPROD + sum_0;
1209 : : In which
1210 : : - DX is double the size of X
1211 : : - DY is double the size of Y
1212 : : - DX, DY, DPROD all have the same type but the sign
1213 : : between X, Y and DPROD can differ.
1214 : : - sum is the same size of DPROD or bigger
1215 : : - sum has been recognized as a reduction variable.
1216 : :
1217 : : This is equivalent to:
1218 : : DPROD = X w* Y; #widen mult
1219 : : sum_1 = DPROD w+ sum_0; #widen summation
1220 : : or
1221 : : DPROD = X w* Y; #widen mult
1222 : : sum_1 = DPROD + sum_0; #summation
1223 : : */
1224 : :
1225 : : /* Starting from LAST_STMT, follow the defs of its uses in search
1226 : : of the above pattern. */
1227 : :
1228 : 24962196 : if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1229 : : &oprnd0, &oprnd1))
1230 : : return NULL;
1231 : :
1232 : 33819 : type = TREE_TYPE (gimple_get_lhs (last_stmt));
1233 : :
1234 : 33819 : vect_unpromoted_value unprom_mult;
1235 : 33819 : oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1236 : :
1237 : : /* So far so good. Since last_stmt was detected as a (summation) reduction,
1238 : : we know that oprnd1 is the reduction variable (defined by a loop-header
1239 : : phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1240 : : Left to check that oprnd0 is defined by a (widen_)mult_expr */
1241 : 33819 : if (!oprnd0)
1242 : : return NULL;
1243 : :
1244 : 22089 : stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1245 : 22089 : if (!mult_vinfo)
1246 : : return NULL;
1247 : :
1248 : : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1249 : : inside the loop (in case we are analyzing an outer-loop). */
1250 : 64188 : vect_unpromoted_value unprom0[2];
1251 : 21396 : enum optab_subtype subtype = optab_vector;
1252 : 21396 : if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1253 : : false, 2, unprom0, &half_type, &subtype))
1254 : : return NULL;
1255 : :
1256 : : /* If there are two widening operations, make sure they agree on the sign
1257 : : of the extension. The result of an optab_vector_mixed_sign operation
1258 : : is signed; otherwise, the result has the same sign as the operands. */
1259 : 600 : if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1260 : 977 : && (subtype == optab_vector_mixed_sign
1261 : 377 : ? TYPE_UNSIGNED (unprom_mult.type)
1262 : 215 : : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1263 : : return NULL;
1264 : :
1265 : 492 : vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1266 : :
1267 : : /* If the inputs have mixed signs, canonicalize on using the signed
1268 : : input type for analysis. This also helps when emulating mixed-sign
1269 : : operations using signed operations. */
1270 : 492 : if (subtype == optab_vector_mixed_sign)
1271 : 108 : half_type = signed_type_for (half_type);
1272 : :
1273 : 492 : tree half_vectype;
1274 : 492 : if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1275 : : type_out, &half_vectype, subtype))
1276 : : {
1277 : : /* We can emulate a mixed-sign dot-product using a sequence of
1278 : : signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1279 : 362 : if (subtype != optab_vector_mixed_sign
1280 : 362 : || !vect_supportable_direct_optab_p (vinfo, signed_type_for (type),
1281 : : DOT_PROD_EXPR, half_type,
1282 : : type_out, &half_vectype,
1283 : : optab_vector))
1284 : 326 : return NULL;
1285 : :
1286 : 36 : *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1287 : : *type_out);
1288 : : }
1289 : :
1290 : : /* Get the inputs in the appropriate types. */
1291 : 166 : tree mult_oprnd[2];
1292 : 166 : vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1293 : : unprom0, half_vectype, subtype);
1294 : :
1295 : 166 : var = vect_recog_temp_ssa_var (type, NULL);
1296 : 166 : pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1297 : : mult_oprnd[0], mult_oprnd[1], oprnd1);
1298 : :
1299 : 166 : return pattern_stmt;
1300 : : }
1301 : :
1302 : :
1303 : : /* Function vect_recog_sad_pattern
1304 : :
1305 : : Try to find the following Sum of Absolute Difference (SAD) pattern:
1306 : :
1307 : : type x_t, y_t;
1308 : : signed TYPE1 diff, abs_diff;
1309 : : TYPE2 sum = init;
1310 : : loop:
1311 : : sum_0 = phi <init, sum_1>
1312 : : S1 x_t = ...
1313 : : S2 y_t = ...
1314 : : S3 x_T = (TYPE1) x_t;
1315 : : S4 y_T = (TYPE1) y_t;
1316 : : S5 diff = x_T - y_T;
1317 : : S6 abs_diff = ABS_EXPR <diff>;
1318 : : [S7 abs_diff = (TYPE2) abs_diff; #optional]
1319 : : S8 sum_1 = abs_diff + sum_0;
1320 : :
1321 : : where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1322 : : same size of 'TYPE1' or bigger. This is a special case of a reduction
1323 : : computation.
1324 : :
1325 : : Input:
1326 : :
1327 : : * STMT_VINFO: The stmt from which the pattern search begins. In the
1328 : : example, when this function is called with S8, the pattern
1329 : : {S3,S4,S5,S6,S7,S8} will be detected.
1330 : :
1331 : : Output:
1332 : :
1333 : : * TYPE_OUT: The type of the output of this pattern.
1334 : :
1335 : : * Return value: A new stmt that will be used to replace the sequence of
1336 : : stmts that constitute the pattern. In this case it will be:
1337 : : SAD_EXPR <x_t, y_t, sum_0>
1338 : : */
1339 : :
1340 : : static gimple *
1341 : 24962041 : vect_recog_sad_pattern (vec_info *vinfo,
1342 : : stmt_vec_info stmt_vinfo, tree *type_out)
1343 : : {
1344 : 24962041 : gimple *last_stmt = stmt_vinfo->stmt;
1345 : 24962041 : tree half_type;
1346 : :
1347 : : /* Look for the following pattern
1348 : : DX = (TYPE1) X;
1349 : : DY = (TYPE1) Y;
1350 : : DDIFF = DX - DY;
1351 : : DAD = ABS_EXPR <DDIFF>;
1352 : : DDPROD = (TYPE2) DPROD;
1353 : : sum_1 = DAD + sum_0;
1354 : : In which
1355 : : - DX is at least double the size of X
1356 : : - DY is at least double the size of Y
1357 : : - DX, DY, DDIFF, DAD all have the same type
1358 : : - sum is the same size of DAD or bigger
1359 : : - sum has been recognized as a reduction variable.
1360 : :
1361 : : This is equivalent to:
1362 : : DDIFF = X w- Y; #widen sub
1363 : : DAD = ABS_EXPR <DDIFF>;
1364 : : sum_1 = DAD w+ sum_0; #widen summation
1365 : : or
1366 : : DDIFF = X w- Y; #widen sub
1367 : : DAD = ABS_EXPR <DDIFF>;
1368 : : sum_1 = DAD + sum_0; #summation
1369 : : */
1370 : :
1371 : : /* Starting from LAST_STMT, follow the defs of its uses in search
1372 : : of the above pattern. */
1373 : :
1374 : 24962041 : tree plus_oprnd0, plus_oprnd1;
1375 : 24962041 : if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1376 : : &plus_oprnd0, &plus_oprnd1))
1377 : : return NULL;
1378 : :
1379 : 33653 : tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1380 : :
1381 : : /* Any non-truncating sequence of conversions is OK here, since
1382 : : with a successful match, the result of the ABS(U) is known to fit
1383 : : within the nonnegative range of the result type. (It cannot be the
1384 : : negative of the minimum signed value due to the range of the widening
1385 : : MINUS_EXPR.) */
1386 : 33653 : vect_unpromoted_value unprom_abs;
1387 : 33653 : plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1388 : : &unprom_abs);
1389 : :
1390 : : /* So far so good. Since last_stmt was detected as a (summation) reduction,
1391 : : we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1392 : : phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1393 : : Then check that plus_oprnd0 is defined by an abs_expr. */
1394 : :
1395 : 33653 : if (!plus_oprnd0)
1396 : : return NULL;
1397 : :
1398 : 21923 : stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1399 : 21923 : if (!abs_stmt_vinfo)
1400 : : return NULL;
1401 : :
1402 : : /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1403 : : inside the loop (in case we are analyzing an outer-loop). */
1404 : 21230 : gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1405 : 63690 : vect_unpromoted_value unprom[2];
1406 : :
1407 : 21230 : if (!abs_stmt)
1408 : : {
1409 : 24962107 : gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
1410 : 283 : if (!abd_stmt
1411 : 283 : || !gimple_call_internal_p (abd_stmt)
1412 : 0 : || gimple_call_num_args (abd_stmt) != 2)
1413 : : return NULL;
1414 : :
1415 : 0 : tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1416 : 0 : tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1417 : :
1418 : 0 : if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
1419 : : {
1420 : 0 : if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
1421 : : &unprom[0])
1422 : 0 : || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
1423 : : &unprom[1]))
1424 : 0 : return NULL;
1425 : : }
1426 : 0 : else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
1427 : : {
1428 : 0 : unprom[0].op = abd_oprnd0;
1429 : 0 : unprom[0].type = TREE_TYPE (abd_oprnd0);
1430 : 0 : unprom[1].op = abd_oprnd1;
1431 : 0 : unprom[1].type = TREE_TYPE (abd_oprnd1);
1432 : : }
1433 : : else
1434 : : return NULL;
1435 : :
1436 : 0 : half_type = unprom[0].type;
1437 : : }
1438 : 20892 : else if (!vect_recog_absolute_difference (vinfo, abs_stmt, &half_type,
1439 : : unprom, NULL))
1440 : : return NULL;
1441 : :
1442 : 358 : vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1443 : :
1444 : 358 : tree half_vectype;
1445 : 358 : if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1446 : : type_out, &half_vectype))
1447 : : return NULL;
1448 : :
1449 : : /* Get the inputs to the SAD_EXPR in the appropriate types. */
1450 : 217 : tree sad_oprnd[2];
1451 : 217 : vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1452 : : unprom, half_vectype);
1453 : :
1454 : 217 : tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1455 : 217 : gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1456 : : sad_oprnd[1], plus_oprnd1);
1457 : :
1458 : 217 : return pattern_stmt;
1459 : : }
1460 : :
1461 : : /* Function vect_recog_abd_pattern
1462 : :
1463 : : Try to find the following ABsolute Difference (ABD) or
1464 : : widening ABD (WIDEN_ABD) pattern:
1465 : :
1466 : : TYPE1 x;
1467 : : TYPE2 y;
1468 : : TYPE3 x_cast = (TYPE3) x; // widening or no-op
1469 : : TYPE3 y_cast = (TYPE3) y; // widening or no-op
1470 : : TYPE3 diff = x_cast - y_cast;
1471 : : TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1472 : : TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1473 : :
1474 : : WIDEN_ABD exists to optimize the case where TYPE4 is at least
1475 : : twice as wide as TYPE3.
1476 : :
1477 : : Input:
1478 : :
1479 : : * STMT_VINFO: The stmt from which the pattern search begins
1480 : :
1481 : : Output:
1482 : :
1483 : : * TYPE_OUT: The type of the output of this pattern
1484 : :
1485 : : * Return value: A new stmt that will be used to replace the sequence of
1486 : : stmts that constitute the pattern, principally:
1487 : : out = IFN_ABD (x, y)
1488 : : out = IFN_WIDEN_ABD (x, y)
1489 : : */
1490 : :
1491 : : static gimple *
1492 : 24913365 : vect_recog_abd_pattern (vec_info *vinfo,
1493 : : stmt_vec_info stmt_vinfo, tree *type_out)
1494 : : {
1495 : 42014298 : gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1496 : 17100933 : if (!last_stmt)
1497 : : return NULL;
1498 : :
1499 : 17100933 : tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1500 : :
1501 : 51302799 : vect_unpromoted_value unprom[2];
1502 : 17100933 : gassign *diff_stmt;
1503 : 17100933 : tree half_type;
1504 : 17100933 : if (!vect_recog_absolute_difference (vinfo, last_stmt, &half_type,
1505 : : unprom, &diff_stmt))
1506 : : return NULL;
1507 : :
1508 : 1592 : tree abd_in_type, abd_out_type;
1509 : :
1510 : 1592 : if (half_type)
1511 : : {
1512 : : abd_in_type = half_type;
1513 : : abd_out_type = abd_in_type;
1514 : : }
1515 : : else
1516 : : {
1517 : 745 : unprom[0].op = gimple_assign_rhs1 (diff_stmt);
1518 : 745 : unprom[1].op = gimple_assign_rhs2 (diff_stmt);
1519 : 745 : abd_in_type = signed_type_for (out_type);
1520 : 745 : abd_out_type = abd_in_type;
1521 : : }
1522 : :
1523 : 1592 : tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1524 : 1592 : if (!vectype_in)
1525 : : return NULL;
1526 : :
1527 : 1384 : internal_fn ifn = IFN_ABD;
1528 : 1384 : tree vectype_out = vectype_in;
1529 : :
1530 : 1384 : if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1531 : 1384 : && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1532 : : {
1533 : 740 : tree mid_type
1534 : 740 : = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1535 : 740 : TYPE_UNSIGNED (abd_in_type));
1536 : 740 : tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1537 : :
1538 : 740 : code_helper dummy_code;
1539 : 740 : int dummy_int;
1540 : 740 : auto_vec<tree> dummy_vec;
1541 : 740 : if (mid_vectype
1542 : 740 : && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1543 : : stmt_vinfo, mid_vectype,
1544 : : vectype_in,
1545 : : &dummy_code, &dummy_code,
1546 : : &dummy_int, &dummy_vec))
1547 : : {
1548 : 0 : ifn = IFN_VEC_WIDEN_ABD;
1549 : 0 : abd_out_type = mid_type;
1550 : 0 : vectype_out = mid_vectype;
1551 : : }
1552 : 740 : }
1553 : :
1554 : 1384 : if (ifn == IFN_ABD
1555 : 1384 : && !direct_internal_fn_supported_p (ifn, vectype_in,
1556 : : OPTIMIZE_FOR_SPEED))
1557 : : return NULL;
1558 : :
1559 : 0 : vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
1560 : :
1561 : 0 : tree abd_oprnds[2];
1562 : 0 : vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
1563 : : abd_in_type, unprom, vectype_in);
1564 : :
1565 : 0 : *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1566 : :
1567 : 0 : tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
1568 : 0 : gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1569 : : abd_oprnds[0], abd_oprnds[1]);
1570 : 0 : gimple_call_set_lhs (abd_stmt, abd_result);
1571 : 0 : gimple_set_location (abd_stmt, gimple_location (last_stmt));
1572 : :
1573 : 0 : gimple *stmt = abd_stmt;
1574 : 0 : if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1575 : 0 : && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1576 : 0 : && !TYPE_UNSIGNED (abd_out_type))
1577 : : {
1578 : 0 : tree unsign = unsigned_type_for (abd_out_type);
1579 : 0 : stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt, vectype_out);
1580 : 0 : vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
1581 : : }
1582 : :
1583 : 0 : return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
1584 : : }
1585 : :
1586 : : /* Recognize an operation that performs ORIG_CODE on widened inputs,
1587 : : so that it can be treated as though it had the form:
1588 : :
1589 : : A_TYPE a;
1590 : : B_TYPE b;
1591 : : HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1592 : : HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1593 : : | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1594 : : | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1595 : : | RES_TYPE res = a_extend ORIG_CODE b_extend;
1596 : :
1597 : : Try to replace the pattern with:
1598 : :
1599 : : A_TYPE a;
1600 : : B_TYPE b;
1601 : : HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1602 : : HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1603 : : | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1604 : : | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1605 : :
1606 : : where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1607 : :
1608 : : SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1609 : : name of the pattern being matched, for dump purposes. */
1610 : :
1611 : : static gimple *
1612 : 100072914 : vect_recog_widen_op_pattern (vec_info *vinfo,
1613 : : stmt_vec_info last_stmt_info, tree *type_out,
1614 : : tree_code orig_code, code_helper wide_code,
1615 : : bool shift_p, const char *name)
1616 : : {
1617 : 100072914 : gimple *last_stmt = last_stmt_info->stmt;
1618 : :
1619 : 300218742 : vect_unpromoted_value unprom[2];
1620 : 100072914 : tree half_type;
1621 : 100072914 : if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1622 : : shift_p, 2, unprom, &half_type))
1623 : :
1624 : : return NULL;
1625 : :
1626 : : /* Pattern detected. */
1627 : 251482 : vect_pattern_detected (name, last_stmt);
1628 : :
1629 : 251482 : tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1630 : 251482 : tree itype = type;
1631 : 251482 : if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1632 : 251482 : || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1633 : 180956 : itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1634 : 180956 : TYPE_UNSIGNED (half_type));
1635 : :
1636 : : /* Check target support */
1637 : 251482 : tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1638 : 251482 : tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1639 : 251482 : tree ctype = itype;
1640 : 251482 : tree vecctype = vecitype;
1641 : 251482 : if (orig_code == MINUS_EXPR
1642 : 7606 : && TYPE_UNSIGNED (itype)
1643 : 255060 : && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1644 : : {
1645 : : /* Subtraction is special, even if half_type is unsigned and no matter
1646 : : whether type is signed or unsigned, if type is wider than itype,
1647 : : we need to sign-extend from the widening operation result to the
1648 : : result type.
1649 : : Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1650 : : itype unsigned short and type either int or unsigned int.
1651 : : Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1652 : : (unsigned short) 0xffff, but for type int we want the result -1
1653 : : and for type unsigned int 0xffffffff rather than 0xffff. */
1654 : 611 : ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1655 : 611 : vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1656 : : }
1657 : :
1658 : 251482 : code_helper dummy_code;
1659 : 251482 : int dummy_int;
1660 : 251482 : auto_vec<tree> dummy_vec;
1661 : 251482 : if (!vectype
1662 : 251482 : || !vecitype
1663 : 199710 : || !vecctype
1664 : 451192 : || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1665 : : vecitype, vectype,
1666 : : &dummy_code, &dummy_code,
1667 : : &dummy_int, &dummy_vec))
1668 : 171115 : return NULL;
1669 : :
1670 : 80367 : *type_out = get_vectype_for_scalar_type (vinfo, type);
1671 : 80367 : if (!*type_out)
1672 : : return NULL;
1673 : :
1674 : 80367 : tree oprnd[2];
1675 : 80367 : vect_convert_inputs (vinfo, last_stmt_info,
1676 : : 2, oprnd, half_type, unprom, vectype);
1677 : :
1678 : 80367 : tree var = vect_recog_temp_ssa_var (itype, NULL);
1679 : 80367 : gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1680 : :
1681 : 80367 : if (vecctype != vecitype)
1682 : 0 : pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1683 : : pattern_stmt, vecitype);
1684 : :
1685 : 80367 : return vect_convert_output (vinfo, last_stmt_info,
1686 : 80367 : type, pattern_stmt, vecctype);
1687 : 251482 : }
1688 : :
1689 : : /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1690 : : to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1691 : :
1692 : : static gimple *
1693 : 24979354 : vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1694 : : tree *type_out)
1695 : : {
1696 : 24979354 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1697 : : MULT_EXPR, WIDEN_MULT_EXPR, false,
1698 : 24979354 : "vect_recog_widen_mult_pattern");
1699 : : }
1700 : :
1701 : : /* Try to detect addition on widened inputs, converting PLUS_EXPR
1702 : : to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1703 : :
1704 : : static gimple *
1705 : 25065760 : vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1706 : : tree *type_out)
1707 : : {
1708 : 25065760 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1709 : : PLUS_EXPR, IFN_VEC_WIDEN_PLUS,
1710 : 25065760 : false, "vect_recog_widen_plus_pattern");
1711 : : }
1712 : :
1713 : : /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1714 : : to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1715 : : static gimple *
1716 : 25065760 : vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1717 : : tree *type_out)
1718 : : {
1719 : 25065760 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1720 : : MINUS_EXPR, IFN_VEC_WIDEN_MINUS,
1721 : 25065760 : false, "vect_recog_widen_minus_pattern");
1722 : : }
1723 : :
1724 : : /* Try to detect abd on widened inputs, converting IFN_ABD
1725 : : to IFN_VEC_WIDEN_ABD. */
1726 : : static gimple *
1727 : 25065760 : vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1728 : : tree *type_out)
1729 : : {
1730 : 25065760 : gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1731 : 23664148 : if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1732 : : return NULL;
1733 : :
1734 : 2365310 : tree last_rhs = gimple_assign_rhs1 (last_stmt);
1735 : :
1736 : 2365310 : tree in_type = TREE_TYPE (last_rhs);
1737 : 2365310 : tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1738 : 2365310 : if (!INTEGRAL_TYPE_P (in_type)
1739 : 2112721 : || !INTEGRAL_TYPE_P (out_type)
1740 : 2024100 : || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1741 : 2829423 : || !TYPE_UNSIGNED (in_type))
1742 : : return NULL;
1743 : :
1744 : 144771 : vect_unpromoted_value unprom;
1745 : 144771 : tree op = vect_look_through_possible_promotion (vinfo, last_rhs, &unprom);
1746 : 144771 : if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1747 : : return NULL;
1748 : :
1749 : 144228 : stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1750 : 144228 : if (!abd_pattern_vinfo)
1751 : : return NULL;
1752 : :
1753 : 131359 : abd_pattern_vinfo = vect_stmt_to_vectorize (abd_pattern_vinfo);
1754 : 25074741 : gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1755 : 8981 : if (!abd_stmt
1756 : 8981 : || !gimple_call_internal_p (abd_stmt)
1757 : 226 : || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
1758 : : return NULL;
1759 : :
1760 : 0 : tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1761 : 0 : tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1762 : :
1763 : 0 : code_helper dummy_code;
1764 : 0 : int dummy_int;
1765 : 0 : auto_vec<tree> dummy_vec;
1766 : 0 : if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1767 : : vectype_out, vectype_in,
1768 : : &dummy_code, &dummy_code,
1769 : : &dummy_int, &dummy_vec))
1770 : : return NULL;
1771 : :
1772 : 0 : vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
1773 : :
1774 : 0 : *type_out = vectype_out;
1775 : :
1776 : 0 : tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
1777 : 0 : tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
1778 : 0 : tree widen_abd_result = vect_recog_temp_ssa_var (out_type, NULL);
1779 : 0 : gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1780 : : abd_oprnd0, abd_oprnd1);
1781 : 0 : gimple_call_set_lhs (widen_abd_stmt, widen_abd_result);
1782 : 0 : gimple_set_location (widen_abd_stmt, gimple_location (last_stmt));
1783 : 0 : return widen_abd_stmt;
1784 : 0 : }
1785 : :
1786 : : /* Function vect_recog_ctz_ffs_pattern
1787 : :
1788 : : Try to find the following pattern:
1789 : :
1790 : : TYPE1 A;
1791 : : TYPE1 B;
1792 : :
1793 : : B = __builtin_ctz{,l,ll} (A);
1794 : :
1795 : : or
1796 : :
1797 : : B = __builtin_ffs{,l,ll} (A);
1798 : :
1799 : : Input:
1800 : :
1801 : : * STMT_VINFO: The stmt from which the pattern search begins.
1802 : : here it starts with B = __builtin_* (A);
1803 : :
1804 : : Output:
1805 : :
1806 : : * TYPE_OUT: The vector type of the output of this pattern.
1807 : :
1808 : : * Return value: A new stmt that will be used to replace the sequence of
1809 : : stmts that constitute the pattern, using clz or popcount builtins. */
1810 : :
1811 : : static gimple *
1812 : 24961948 : vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1813 : : tree *type_out)
1814 : : {
1815 : 24961948 : gimple *call_stmt = stmt_vinfo->stmt;
1816 : 24961948 : gimple *pattern_stmt;
1817 : 24961948 : tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1818 : 24961948 : tree new_var;
1819 : 24961948 : internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1820 : 24961948 : bool defined_at_zero = true, defined_at_zero_new = false;
1821 : 24961948 : int val = 0, val_new = 0, val_cmp = 0;
1822 : 24961948 : int prec;
1823 : 24961948 : int sub = 0, add = 0;
1824 : 24961948 : location_t loc;
1825 : :
1826 : 24961948 : if (!is_gimple_call (call_stmt))
1827 : : return NULL;
1828 : :
1829 : 3079552 : if (gimple_call_num_args (call_stmt) != 1
1830 : 3079552 : && gimple_call_num_args (call_stmt) != 2)
1831 : : return NULL;
1832 : :
1833 : 1683202 : rhs_oprnd = gimple_call_arg (call_stmt, 0);
1834 : 1683202 : rhs_type = TREE_TYPE (rhs_oprnd);
1835 : 1683202 : lhs_oprnd = gimple_call_lhs (call_stmt);
1836 : 1683202 : if (!lhs_oprnd)
1837 : : return NULL;
1838 : 851422 : lhs_type = TREE_TYPE (lhs_oprnd);
1839 : 851422 : if (!INTEGRAL_TYPE_P (lhs_type)
1840 : 277459 : || !INTEGRAL_TYPE_P (rhs_type)
1841 : 42567 : || !type_has_mode_precision_p (rhs_type)
1842 : 892559 : || TREE_CODE (rhs_oprnd) != SSA_NAME)
1843 : 820556 : return NULL;
1844 : :
1845 : 30866 : switch (gimple_call_combined_fn (call_stmt))
1846 : : {
1847 : 1069 : CASE_CFN_CTZ:
1848 : 1069 : ifn = IFN_CTZ;
1849 : 1069 : if (!gimple_call_internal_p (call_stmt)
1850 : 1069 : || gimple_call_num_args (call_stmt) != 2)
1851 : : defined_at_zero = false;
1852 : : else
1853 : 48 : val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
1854 : : break;
1855 : : CASE_CFN_FFS:
1856 : : ifn = IFN_FFS;
1857 : : break;
1858 : : default:
1859 : : return NULL;
1860 : : }
1861 : :
1862 : 1190 : prec = TYPE_PRECISION (rhs_type);
1863 : 1190 : loc = gimple_location (call_stmt);
1864 : :
1865 : 1190 : vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1866 : 1190 : if (!vec_type)
1867 : : return NULL;
1868 : :
1869 : 1190 : vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1870 : 1190 : if (!vec_rhs_type)
1871 : : return NULL;
1872 : :
1873 : : /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1874 : : ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1875 : : popcount<vector_mode>2. */
1876 : 970 : if (!vec_type
1877 : 970 : || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1878 : : OPTIMIZE_FOR_SPEED))
1879 : : return NULL;
1880 : :
1881 : 970 : if (ifn == IFN_FFS
1882 : 970 : && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1883 : : OPTIMIZE_FOR_SPEED))
1884 : : {
1885 : 0 : ifnnew = IFN_CTZ;
1886 : 0 : defined_at_zero_new
1887 : 0 : = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1888 : : val_new) == 2;
1889 : : }
1890 : 970 : else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1891 : : OPTIMIZE_FOR_SPEED))
1892 : : {
1893 : 88 : ifnnew = IFN_CLZ;
1894 : 88 : defined_at_zero_new
1895 : 176 : = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1896 : : val_new) == 2;
1897 : : }
1898 : 970 : if ((ifnnew == IFN_LAST
1899 : 88 : || (defined_at_zero && !defined_at_zero_new))
1900 : 970 : && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1901 : : OPTIMIZE_FOR_SPEED))
1902 : : {
1903 : : ifnnew = IFN_POPCOUNT;
1904 : : defined_at_zero_new = true;
1905 : : val_new = prec;
1906 : : }
1907 : 970 : if (ifnnew == IFN_LAST)
1908 : : return NULL;
1909 : :
1910 : 124 : vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
1911 : :
1912 : 124 : val_cmp = val_new;
1913 : 124 : if ((ifnnew == IFN_CLZ
1914 : 124 : && defined_at_zero
1915 : 60 : && defined_at_zero_new
1916 : 60 : && val == prec
1917 : 31 : && val_new == prec)
1918 : 93 : || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1919 : : {
1920 : : /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1921 : : .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1922 : : if (ifnnew == IFN_CLZ)
1923 : : sub = prec;
1924 : 56 : val_cmp = prec;
1925 : :
1926 : 56 : if (!TYPE_UNSIGNED (rhs_type))
1927 : : {
1928 : 12 : rhs_type = unsigned_type_for (rhs_type);
1929 : 12 : vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1930 : 12 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1931 : 12 : pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1932 : 12 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
1933 : : vec_rhs_type);
1934 : 12 : rhs_oprnd = new_var;
1935 : : }
1936 : :
1937 : 56 : tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL);
1938 : 56 : pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1939 : 56 : build_int_cst (rhs_type, -1));
1940 : 56 : gimple_set_location (pattern_stmt, loc);
1941 : 56 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1942 : :
1943 : 56 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1944 : 56 : pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1945 : 56 : gimple_set_location (pattern_stmt, loc);
1946 : 56 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1947 : 56 : rhs_oprnd = new_var;
1948 : :
1949 : 56 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1950 : 56 : pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1951 : : m1, rhs_oprnd);
1952 : 56 : gimple_set_location (pattern_stmt, loc);
1953 : 56 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1954 : 56 : rhs_oprnd = new_var;
1955 : 56 : }
1956 : 68 : else if (ifnnew == IFN_CLZ)
1957 : : {
1958 : : /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1959 : : .FFS (X) = PREC - .CLZ (X & -X). */
1960 : 57 : sub = prec - (ifn == IFN_CTZ);
1961 : 57 : val_cmp = sub - val_new;
1962 : :
1963 : 57 : tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1964 : 57 : pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1965 : 57 : gimple_set_location (pattern_stmt, loc);
1966 : 57 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1967 : :
1968 : 57 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1969 : 57 : pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1970 : : rhs_oprnd, neg);
1971 : 57 : gimple_set_location (pattern_stmt, loc);
1972 : 57 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1973 : 57 : rhs_oprnd = new_var;
1974 : : }
1975 : 11 : else if (ifnnew == IFN_POPCOUNT)
1976 : : {
1977 : : /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1978 : : .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1979 : 11 : sub = prec + (ifn == IFN_FFS);
1980 : 11 : val_cmp = sub;
1981 : :
1982 : 11 : tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
1983 : 11 : pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1984 : 11 : gimple_set_location (pattern_stmt, loc);
1985 : 11 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1986 : :
1987 : 11 : new_var = vect_recog_temp_ssa_var (rhs_type, NULL);
1988 : 11 : pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1989 : : rhs_oprnd, neg);
1990 : 11 : gimple_set_location (pattern_stmt, loc);
1991 : 11 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type);
1992 : 11 : rhs_oprnd = new_var;
1993 : : }
1994 : 0 : else if (ifnnew == IFN_CTZ)
1995 : : {
1996 : : /* .FFS (X) = .CTZ (X) + 1. */
1997 : 0 : add = 1;
1998 : 0 : val_cmp++;
1999 : : }
2000 : :
2001 : : /* Create B = .IFNNEW (A). */
2002 : 124 : new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2003 : 124 : if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
2004 : 88 : pattern_stmt
2005 : 88 : = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
2006 : : build_int_cst (integer_type_node,
2007 : : val_new));
2008 : : else
2009 : 36 : pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
2010 : 124 : gimple_call_set_lhs (pattern_stmt, new_var);
2011 : 124 : gimple_set_location (pattern_stmt, loc);
2012 : 124 : *type_out = vec_type;
2013 : :
2014 : 124 : if (sub)
2015 : : {
2016 : 99 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2017 : 99 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2018 : 99 : pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2019 : : build_int_cst (lhs_type, sub),
2020 : : new_var);
2021 : 99 : gimple_set_location (pattern_stmt, loc);
2022 : 99 : new_var = ret_var;
2023 : : }
2024 : 25 : else if (add)
2025 : : {
2026 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2027 : 0 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2028 : 0 : pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2029 : : build_int_cst (lhs_type, add));
2030 : 0 : gimple_set_location (pattern_stmt, loc);
2031 : 0 : new_var = ret_var;
2032 : : }
2033 : :
2034 : 124 : if (defined_at_zero
2035 : 88 : && (!defined_at_zero_new || val != val_cmp))
2036 : : {
2037 : 11 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2038 : 11 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2039 : 11 : rhs_oprnd = gimple_call_arg (call_stmt, 0);
2040 : 11 : rhs_type = TREE_TYPE (rhs_oprnd);
2041 : 11 : tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node,
2042 : : rhs_oprnd, build_zero_cst (rhs_type));
2043 : 11 : pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2044 : : new_var,
2045 : : build_int_cst (lhs_type, val));
2046 : : }
2047 : :
2048 : 124 : if (dump_enabled_p ())
2049 : 0 : dump_printf_loc (MSG_NOTE, vect_location,
2050 : : "created pattern stmt: %G", pattern_stmt);
2051 : :
2052 : : return pattern_stmt;
2053 : : }
2054 : :
2055 : : /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2056 : :
2057 : : Try to find the following pattern:
2058 : :
2059 : : UTYPE1 A;
2060 : : TYPE1 B;
2061 : : UTYPE2 temp_in;
2062 : : TYPE3 temp_out;
2063 : : temp_in = (UTYPE2)A;
2064 : :
2065 : : temp_out = __builtin_popcount{,l,ll} (temp_in);
2066 : : B = (TYPE1) temp_out;
2067 : :
2068 : : TYPE2 may or may not be equal to TYPE3.
2069 : : i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2070 : : i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2071 : :
2072 : : Input:
2073 : :
2074 : : * STMT_VINFO: The stmt from which the pattern search begins.
2075 : : here it starts with B = (TYPE1) temp_out;
2076 : :
2077 : : Output:
2078 : :
2079 : : * TYPE_OUT: The vector type of the output of this pattern.
2080 : :
2081 : : * Return value: A new stmt that will be used to replace the sequence of
2082 : : stmts that constitute the pattern. In this case it will be:
2083 : : B = .POPCOUNT (A);
2084 : :
2085 : : Similarly for clz, ctz and ffs.
2086 : : */
2087 : :
2088 : : static gimple *
2089 : 24961818 : vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2090 : : stmt_vec_info stmt_vinfo,
2091 : : tree *type_out)
2092 : : {
2093 : 24961818 : gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
2094 : 24961818 : gimple *call_stmt, *pattern_stmt;
2095 : 24961818 : tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2096 : 24961818 : internal_fn ifn = IFN_LAST;
2097 : 24961818 : int addend = 0;
2098 : :
2099 : : /* Find B = (TYPE1) temp_out. */
2100 : 24961818 : if (!last_stmt)
2101 : : return NULL;
2102 : 17149203 : tree_code code = gimple_assign_rhs_code (last_stmt);
2103 : 17149203 : if (!CONVERT_EXPR_CODE_P (code))
2104 : : return NULL;
2105 : :
2106 : 2343885 : lhs_oprnd = gimple_assign_lhs (last_stmt);
2107 : 2343885 : lhs_type = TREE_TYPE (lhs_oprnd);
2108 : 2343885 : if (!INTEGRAL_TYPE_P (lhs_type))
2109 : : return NULL;
2110 : :
2111 : 2199595 : rhs_oprnd = gimple_assign_rhs1 (last_stmt);
2112 : 2199595 : if (TREE_CODE (rhs_oprnd) != SSA_NAME
2113 : 2199595 : || !has_single_use (rhs_oprnd))
2114 : : return NULL;
2115 : 1161521 : call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2116 : :
2117 : : /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2118 : 1161521 : if (!is_gimple_call (call_stmt))
2119 : : return NULL;
2120 : 93905 : switch (gimple_call_combined_fn (call_stmt))
2121 : : {
2122 : : int val;
2123 : : CASE_CFN_POPCOUNT:
2124 : : ifn = IFN_POPCOUNT;
2125 : : break;
2126 : 2303 : CASE_CFN_CLZ:
2127 : 2303 : ifn = IFN_CLZ;
2128 : : /* Punt if call result is unsigned and defined value at zero
2129 : : is negative, as the negative value doesn't extend correctly. */
2130 : 2303 : if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2131 : 0 : && gimple_call_internal_p (call_stmt)
2132 : 2303 : && CLZ_DEFINED_VALUE_AT_ZERO
2133 : : (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2134 : 2303 : && val < 0)
2135 : : return NULL;
2136 : : break;
2137 : 523 : CASE_CFN_CTZ:
2138 : 523 : ifn = IFN_CTZ;
2139 : : /* Punt if call result is unsigned and defined value at zero
2140 : : is negative, as the negative value doesn't extend correctly. */
2141 : 523 : if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2142 : 0 : && gimple_call_internal_p (call_stmt)
2143 : 523 : && CTZ_DEFINED_VALUE_AT_ZERO
2144 : : (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2145 : 523 : && val < 0)
2146 : : return NULL;
2147 : : break;
2148 : 18 : CASE_CFN_FFS:
2149 : 18 : ifn = IFN_FFS;
2150 : 18 : break;
2151 : : default:
2152 : : return NULL;
2153 : : }
2154 : :
2155 : 3075 : if (gimple_call_num_args (call_stmt) != 1
2156 : 3075 : && gimple_call_num_args (call_stmt) != 2)
2157 : : return NULL;
2158 : :
2159 : 3075 : rhs_oprnd = gimple_call_arg (call_stmt, 0);
2160 : 3075 : vect_unpromoted_value unprom_diff;
2161 : 3075 : rhs_origin
2162 : 3075 : = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff);
2163 : :
2164 : 3075 : if (!rhs_origin)
2165 : : return NULL;
2166 : :
2167 : : /* Input and output of .POPCOUNT should be same-precision integer. */
2168 : 3075 : if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2169 : : return NULL;
2170 : :
2171 : : /* Also A should be unsigned or same precision as temp_in, otherwise
2172 : : different builtins/internal functions have different behaviors. */
2173 : 1268 : if (TYPE_PRECISION (unprom_diff.type)
2174 : 1268 : != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2175 : 171 : switch (ifn)
2176 : : {
2177 : 68 : case IFN_POPCOUNT:
2178 : : /* For popcount require zero extension, which doesn't add any
2179 : : further bits to the count. */
2180 : 68 : if (!TYPE_UNSIGNED (unprom_diff.type))
2181 : : return NULL;
2182 : : break;
2183 : 85 : case IFN_CLZ:
2184 : : /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2185 : : if it is undefined at zero or if it matches also for the
2186 : : defined value there. */
2187 : 85 : if (!TYPE_UNSIGNED (unprom_diff.type))
2188 : : return NULL;
2189 : 85 : if (!type_has_mode_precision_p (lhs_type)
2190 : 85 : || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2191 : 0 : return NULL;
2192 : 85 : addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2193 : 85 : - TYPE_PRECISION (lhs_type));
2194 : 85 : if (gimple_call_internal_p (call_stmt)
2195 : 85 : && gimple_call_num_args (call_stmt) == 2)
2196 : : {
2197 : 0 : int val1, val2;
2198 : 0 : val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2199 : 0 : int d2
2200 : 0 : = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2201 : : val2);
2202 : 0 : if (d2 != 2 || val1 != val2 + addend)
2203 : : return NULL;
2204 : : }
2205 : : break;
2206 : 13 : case IFN_CTZ:
2207 : : /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2208 : : if it is undefined at zero or if it matches also for the
2209 : : defined value there. */
2210 : 13 : if (gimple_call_internal_p (call_stmt)
2211 : 13 : && gimple_call_num_args (call_stmt) == 2)
2212 : : {
2213 : 0 : int val1, val2;
2214 : 0 : val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
2215 : 0 : int d2
2216 : 0 : = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2217 : : val2);
2218 : 0 : if (d2 != 2 || val1 != val2)
2219 : : return NULL;
2220 : : }
2221 : : break;
2222 : : case IFN_FFS:
2223 : : /* ffsll (x) == ffs (x) for unsigned or signed x. */
2224 : : break;
2225 : 0 : default:
2226 : 0 : gcc_unreachable ();
2227 : : }
2228 : :
2229 : 1268 : vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2230 : : /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2231 : 1268 : if (!vec_type)
2232 : : return NULL;
2233 : :
2234 : 1153 : bool supported
2235 : 1153 : = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2236 : 1153 : if (!supported)
2237 : 1067 : switch (ifn)
2238 : : {
2239 : : case IFN_POPCOUNT:
2240 : : case IFN_CLZ:
2241 : : return NULL;
2242 : 18 : case IFN_FFS:
2243 : : /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2244 : 18 : if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2245 : : OPTIMIZE_FOR_SPEED))
2246 : : break;
2247 : : /* FALLTHRU */
2248 : 335 : case IFN_CTZ:
2249 : : /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2250 : : clz or popcount. */
2251 : 335 : if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2252 : : OPTIMIZE_FOR_SPEED))
2253 : : break;
2254 : 301 : if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2255 : : OPTIMIZE_FOR_SPEED))
2256 : : break;
2257 : : return NULL;
2258 : 0 : default:
2259 : 0 : gcc_unreachable ();
2260 : : }
2261 : :
2262 : 142 : vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2263 : : call_stmt);
2264 : :
2265 : : /* Create B = .POPCOUNT (A). */
2266 : 142 : new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2267 : 142 : tree arg2 = NULL_TREE;
2268 : 142 : int val;
2269 : 142 : if (ifn == IFN_CLZ
2270 : 174 : && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2271 : : val) == 2)
2272 : 30 : arg2 = build_int_cst (integer_type_node, val);
2273 : 112 : else if (ifn == IFN_CTZ
2274 : 150 : && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2275 : : val) == 2)
2276 : 38 : arg2 = build_int_cst (integer_type_node, val);
2277 : 142 : if (arg2)
2278 : 68 : pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
2279 : : else
2280 : 74 : pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2281 : 142 : gimple_call_set_lhs (pattern_stmt, new_var);
2282 : 142 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2283 : 142 : *type_out = vec_type;
2284 : :
2285 : 142 : if (dump_enabled_p ())
2286 : 8 : dump_printf_loc (MSG_NOTE, vect_location,
2287 : : "created pattern stmt: %G", pattern_stmt);
2288 : :
2289 : 142 : if (addend)
2290 : : {
2291 : 6 : gcc_assert (supported);
2292 : 6 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
2293 : 6 : tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2294 : 6 : pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2295 : : build_int_cst (lhs_type, addend));
2296 : : }
2297 : 136 : else if (!supported)
2298 : : {
2299 : 56 : stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2300 : 56 : STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2301 : 56 : pattern_stmt
2302 : 56 : = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out);
2303 : 56 : if (pattern_stmt == NULL)
2304 : : return NULL;
2305 : 56 : if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2306 : : {
2307 : 56 : gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2308 : 56 : gimple_seq_add_seq_without_update (pseq, seq);
2309 : : }
2310 : : }
2311 : : return pattern_stmt;
2312 : : }
2313 : :
2314 : : /* Function vect_recog_pow_pattern
2315 : :
2316 : : Try to find the following pattern:
2317 : :
2318 : : x = POW (y, N);
2319 : :
2320 : : with POW being one of pow, powf, powi, powif and N being
2321 : : either 2 or 0.5.
2322 : :
2323 : : Input:
2324 : :
2325 : : * STMT_VINFO: The stmt from which the pattern search begins.
2326 : :
2327 : : Output:
2328 : :
2329 : : * TYPE_OUT: The type of the output of this pattern.
2330 : :
2331 : : * Return value: A new stmt that will be used to replace the sequence of
2332 : : stmts that constitute the pattern. In this case it will be:
2333 : : x = x * x
2334 : : or
2335 : : x = sqrt (x)
2336 : : */
2337 : :
2338 : : static gimple *
2339 : 24961824 : vect_recog_pow_pattern (vec_info *vinfo,
2340 : : stmt_vec_info stmt_vinfo, tree *type_out)
2341 : : {
2342 : 24961824 : gimple *last_stmt = stmt_vinfo->stmt;
2343 : 24961824 : tree base, exp;
2344 : 24961824 : gimple *stmt;
2345 : 24961824 : tree var;
2346 : :
2347 : 24961824 : if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
2348 : : return NULL;
2349 : :
2350 : 1353869 : switch (gimple_call_combined_fn (last_stmt))
2351 : : {
2352 : 364 : CASE_CFN_POW:
2353 : 364 : CASE_CFN_POWI:
2354 : 364 : break;
2355 : :
2356 : : default:
2357 : : return NULL;
2358 : : }
2359 : :
2360 : 364 : base = gimple_call_arg (last_stmt, 0);
2361 : 364 : exp = gimple_call_arg (last_stmt, 1);
2362 : 364 : if (TREE_CODE (exp) != REAL_CST
2363 : 341 : && TREE_CODE (exp) != INTEGER_CST)
2364 : : {
2365 : 341 : if (flag_unsafe_math_optimizations
2366 : 95 : && TREE_CODE (base) == REAL_CST
2367 : 423 : && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2368 : : {
2369 : 82 : combined_fn log_cfn;
2370 : 82 : built_in_function exp_bfn;
2371 : 82 : switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
2372 : : {
2373 : : case BUILT_IN_POW:
2374 : : log_cfn = CFN_BUILT_IN_LOG;
2375 : : exp_bfn = BUILT_IN_EXP;
2376 : : break;
2377 : 80 : case BUILT_IN_POWF:
2378 : 80 : log_cfn = CFN_BUILT_IN_LOGF;
2379 : 80 : exp_bfn = BUILT_IN_EXPF;
2380 : 80 : break;
2381 : 0 : case BUILT_IN_POWL:
2382 : 0 : log_cfn = CFN_BUILT_IN_LOGL;
2383 : 0 : exp_bfn = BUILT_IN_EXPL;
2384 : 0 : break;
2385 : : default:
2386 : : return NULL;
2387 : : }
2388 : 82 : tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2389 : 82 : tree exp_decl = builtin_decl_implicit (exp_bfn);
2390 : : /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2391 : : does that, but if C is a power of 2, we want to use
2392 : : exp2 (log2 (C) * x) in the non-vectorized version, but for
2393 : : vectorization we don't have vectorized exp2. */
2394 : 82 : if (logc
2395 : 2 : && TREE_CODE (logc) == REAL_CST
2396 : 2 : && exp_decl
2397 : 84 : && lookup_attribute ("omp declare simd",
2398 : 2 : DECL_ATTRIBUTES (exp_decl)))
2399 : : {
2400 : 2 : cgraph_node *node = cgraph_node::get_create (exp_decl);
2401 : 2 : if (node->simd_clones == NULL)
2402 : : {
2403 : 2 : if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2404 : 2 : || node->definition)
2405 : : return NULL;
2406 : 2 : expand_simd_clones (node);
2407 : 2 : if (node->simd_clones == NULL)
2408 : : return NULL;
2409 : : }
2410 : 2 : *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2411 : 2 : if (!*type_out)
2412 : : return NULL;
2413 : 2 : tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2414 : 2 : gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2415 : 2 : append_pattern_def_seq (vinfo, stmt_vinfo, g);
2416 : 2 : tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2417 : 2 : g = gimple_build_call (exp_decl, 1, def);
2418 : 2 : gimple_call_set_lhs (g, res);
2419 : 2 : return g;
2420 : : }
2421 : : }
2422 : :
2423 : 339 : return NULL;
2424 : : }
2425 : :
2426 : : /* We now have a pow or powi builtin function call with a constant
2427 : : exponent. */
2428 : :
2429 : : /* Catch squaring. */
2430 : 23 : if ((tree_fits_shwi_p (exp)
2431 : 0 : && tree_to_shwi (exp) == 2)
2432 : 23 : || (TREE_CODE (exp) == REAL_CST
2433 : 23 : && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2434 : : {
2435 : 0 : if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
2436 : 0 : TREE_TYPE (base), type_out))
2437 : : return NULL;
2438 : :
2439 : 0 : var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2440 : 0 : stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2441 : 0 : return stmt;
2442 : : }
2443 : :
2444 : : /* Catch square root. */
2445 : 23 : if (TREE_CODE (exp) == REAL_CST
2446 : 23 : && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2447 : : {
2448 : 8 : *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2449 : 8 : if (*type_out
2450 : 8 : && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2451 : : OPTIMIZE_FOR_SPEED))
2452 : : {
2453 : 6 : gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2454 : 6 : var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2455 : 6 : gimple_call_set_lhs (stmt, var);
2456 : 6 : gimple_call_set_nothrow (stmt, true);
2457 : 6 : return stmt;
2458 : : }
2459 : : }
2460 : :
2461 : : return NULL;
2462 : : }
2463 : :
2464 : :
2465 : : /* Function vect_recog_widen_sum_pattern
2466 : :
2467 : : Try to find the following pattern:
2468 : :
2469 : : type x_t;
2470 : : TYPE x_T, sum = init;
2471 : : loop:
2472 : : sum_0 = phi <init, sum_1>
2473 : : S1 x_t = *p;
2474 : : S2 x_T = (TYPE) x_t;
2475 : : S3 sum_1 = x_T + sum_0;
2476 : :
2477 : : where type 'TYPE' is at least double the size of type 'type', i.e - we're
2478 : : summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2479 : : a special case of a reduction computation.
2480 : :
2481 : : Input:
2482 : :
2483 : : * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2484 : : when this function is called with S3, the pattern {S2,S3} will be detected.
2485 : :
2486 : : Output:
2487 : :
2488 : : * TYPE_OUT: The type of the output of this pattern.
2489 : :
2490 : : * Return value: A new stmt that will be used to replace the sequence of
2491 : : stmts that constitute the pattern. In this case it will be:
2492 : : WIDEN_SUM <x_t, sum_0>
2493 : :
2494 : : Note: The widening-sum idiom is a widening reduction pattern that is
2495 : : vectorized without preserving all the intermediate results. It
2496 : : produces only N/2 (widened) results (by summing up pairs of
2497 : : intermediate results) rather than all N results. Therefore, we
2498 : : cannot allow this pattern when we want to get all the results and in
2499 : : the correct order (as is the case when this computation is in an
2500 : : inner-loop nested in an outer-loop that us being vectorized). */
2501 : :
2502 : : static gimple *
2503 : 24961824 : vect_recog_widen_sum_pattern (vec_info *vinfo,
2504 : : stmt_vec_info stmt_vinfo, tree *type_out)
2505 : : {
2506 : 24961824 : gimple *last_stmt = stmt_vinfo->stmt;
2507 : 24961824 : tree oprnd0, oprnd1;
2508 : 24961824 : tree type;
2509 : 24961824 : gimple *pattern_stmt;
2510 : 24961824 : tree var;
2511 : :
2512 : : /* Look for the following pattern
2513 : : DX = (TYPE) X;
2514 : : sum_1 = DX + sum_0;
2515 : : In which DX is at least double the size of X, and sum_1 has been
2516 : : recognized as a reduction variable.
2517 : : */
2518 : :
2519 : : /* Starting from LAST_STMT, follow the defs of its uses in search
2520 : : of the above pattern. */
2521 : :
2522 : 24961824 : if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
2523 : : &oprnd0, &oprnd1)
2524 : 33436 : || TREE_CODE (oprnd0) != SSA_NAME
2525 : 24995089 : || !vinfo->lookup_def (oprnd0))
2526 : 24928609 : return NULL;
2527 : :
2528 : 33215 : type = TREE_TYPE (gimple_get_lhs (last_stmt));
2529 : :
2530 : : /* So far so good. Since last_stmt was detected as a (summation) reduction,
2531 : : we know that oprnd1 is the reduction variable (defined by a loop-header
2532 : : phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2533 : : Left to check that oprnd0 is defined by a cast from type 'type' to type
2534 : : 'TYPE'. */
2535 : :
2536 : 33215 : vect_unpromoted_value unprom0;
2537 : 33215 : if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
2538 : 33215 : || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2539 : : return NULL;
2540 : :
2541 : 1555 : vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
2542 : :
2543 : 1555 : if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
2544 : : unprom0.type, type_out))
2545 : : return NULL;
2546 : :
2547 : 0 : var = vect_recog_temp_ssa_var (type, NULL);
2548 : 0 : pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2549 : :
2550 : 0 : return pattern_stmt;
2551 : : }
2552 : :
2553 : : /* Function vect_recog_bitfield_ref_pattern
2554 : :
2555 : : Try to find the following pattern:
2556 : :
2557 : : bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2558 : : result = (type_out) bf_value;
2559 : :
2560 : : or
2561 : :
2562 : : if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2563 : :
2564 : : where type_out is a non-bitfield type, that is to say, it's precision matches
2565 : : 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2566 : :
2567 : : Input:
2568 : :
2569 : : * STMT_VINFO: The stmt from which the pattern search begins.
2570 : : here it starts with:
2571 : : result = (type_out) bf_value;
2572 : :
2573 : : or
2574 : :
2575 : : if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2576 : :
2577 : : Output:
2578 : :
2579 : : * TYPE_OUT: The vector type of the output of this pattern.
2580 : :
2581 : : * Return value: A new stmt that will be used to replace the sequence of
2582 : : stmts that constitute the pattern. If the precision of type_out is bigger
2583 : : than the precision type of _1 we perform the widening before the shifting,
2584 : : since the new precision will be large enough to shift the value and moving
2585 : : widening operations up the statement chain enables the generation of
2586 : : widening loads. If we are widening and the operation after the pattern is
2587 : : an addition then we mask first and shift later, to enable the generation of
2588 : : shifting adds. In the case of narrowing we will always mask first, shift
2589 : : last and then perform a narrowing operation. This will enable the
2590 : : generation of narrowing shifts.
2591 : :
2592 : : Widening with mask first, shift later:
2593 : : container = (type_out) container;
2594 : : masked = container & (((1 << bitsize) - 1) << bitpos);
2595 : : result = masked >> bitpos;
2596 : :
2597 : : Widening with shift first, mask last:
2598 : : container = (type_out) container;
2599 : : shifted = container >> bitpos;
2600 : : result = shifted & ((1 << bitsize) - 1);
2601 : :
2602 : : Narrowing:
2603 : : masked = container & (((1 << bitsize) - 1) << bitpos);
2604 : : result = masked >> bitpos;
2605 : : result = (type_out) result;
2606 : :
2607 : : If the bitfield is signed and it's wider than type_out, we need to
2608 : : keep the result sign-extended:
2609 : : container = (type) container;
2610 : : masked = container << (prec - bitsize - bitpos);
2611 : : result = (type_out) (masked >> (prec - bitsize));
2612 : :
2613 : : Here type is the signed variant of the wider of type_out and the type
2614 : : of container.
2615 : :
2616 : : The shifting is always optional depending on whether bitpos != 0.
2617 : :
2618 : : When the original bitfield was inside a gcond then an new gcond is also
2619 : : generated with the newly `result` as the operand to the comparison.
2620 : :
2621 : : */
2622 : :
2623 : : static gimple *
2624 : 24910980 : vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2625 : : tree *type_out)
2626 : : {
2627 : 24910980 : gimple *bf_stmt = NULL;
2628 : 24910980 : tree lhs = NULL_TREE;
2629 : 24910980 : tree ret_type = NULL_TREE;
2630 : 24910980 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
2631 : 24910980 : if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
2632 : : {
2633 : 3913811 : tree op = gimple_cond_lhs (cond_stmt);
2634 : 3913811 : if (TREE_CODE (op) != SSA_NAME)
2635 : : return NULL;
2636 : 3860028 : bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2637 : 3860028 : if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2638 : : return NULL;
2639 : : }
2640 : 41994338 : else if (is_gimple_assign (stmt)
2641 : 17098120 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2642 : 23275805 : && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2643 : : {
2644 : 2238022 : gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2645 : 2238022 : bf_stmt = dyn_cast <gassign *> (second_stmt);
2646 : 2238022 : lhs = gimple_assign_lhs (stmt);
2647 : 2238022 : ret_type = TREE_TYPE (lhs);
2648 : : }
2649 : :
2650 : 23598490 : if (!bf_stmt
2651 : 23598490 : || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
2652 : : return NULL;
2653 : :
2654 : 12555 : tree bf_ref = gimple_assign_rhs1 (bf_stmt);
2655 : 12555 : tree container = TREE_OPERAND (bf_ref, 0);
2656 : 12555 : ret_type = ret_type ? ret_type : TREE_TYPE (container);
2657 : :
2658 : 12555 : if (!bit_field_offset (bf_ref).is_constant ()
2659 : 12555 : || !bit_field_size (bf_ref).is_constant ()
2660 : 12555 : || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2661 : 0 : return NULL;
2662 : :
2663 : 24894 : if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2664 : 12553 : || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2665 : 14037 : || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2666 : 11073 : return NULL;
2667 : :
2668 : 1482 : gimple *use_stmt, *pattern_stmt;
2669 : 1482 : use_operand_p use_p;
2670 : 1482 : bool shift_first = true;
2671 : 1482 : tree container_type = TREE_TYPE (container);
2672 : 1482 : tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2673 : :
2674 : : /* Calculate shift_n before the adjustments for widening loads, otherwise
2675 : : the container may change and we have to consider offset change for
2676 : : widening loads on big endianness. The shift_n calculated here can be
2677 : : independent of widening. */
2678 : 1482 : unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
2679 : 1482 : unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
2680 : 1482 : unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2681 : 1482 : if (BYTES_BIG_ENDIAN)
2682 : : shift_n = prec - shift_n - mask_width;
2683 : :
2684 : 1482 : bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2685 : 1107 : TYPE_PRECISION (ret_type) > mask_width);
2686 : 1482 : bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2687 : 1482 : TYPE_PRECISION (ret_type));
2688 : :
2689 : : /* We move the conversion earlier if the loaded type is smaller than the
2690 : : return type to enable the use of widening loads. And if we need a
2691 : : sign extension, we need to convert the loaded value early to a signed
2692 : : type as well. */
2693 : 1482 : if (ref_sext || load_widen)
2694 : : {
2695 : 681 : tree type = load_widen ? ret_type : container_type;
2696 : 681 : if (ref_sext)
2697 : 653 : type = gimple_signed_type (type);
2698 : 681 : pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2699 : : NOP_EXPR, container);
2700 : 681 : container = gimple_get_lhs (pattern_stmt);
2701 : 681 : container_type = TREE_TYPE (container);
2702 : 681 : prec = tree_to_uhwi (TYPE_SIZE (container_type));
2703 : 681 : vectype = get_vectype_for_scalar_type (vinfo, container_type);
2704 : 681 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2705 : : }
2706 : 801 : else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2707 : : /* If we are doing the conversion last then also delay the shift as we may
2708 : : be able to combine the shift and conversion in certain cases. */
2709 : : shift_first = false;
2710 : :
2711 : : /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2712 : : PLUS_EXPR then do the shift last as some targets can combine the shift and
2713 : : add into a single instruction. */
2714 : 1482 : if (lhs && single_imm_use (lhs, &use_p, &use_stmt))
2715 : : {
2716 : 719 : if (gimple_code (use_stmt) == GIMPLE_ASSIGN
2717 : 719 : && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
2718 : : shift_first = false;
2719 : : }
2720 : :
2721 : : /* If we don't have to shift we only generate the mask, so just fix the
2722 : : code-path to shift_first. */
2723 : 1482 : if (shift_n == 0)
2724 : 612 : shift_first = true;
2725 : :
2726 : 1482 : tree result;
2727 : 1482 : if (shift_first && !ref_sext)
2728 : : {
2729 : 408 : tree shifted = container;
2730 : 408 : if (shift_n)
2731 : : {
2732 : 48 : pattern_stmt
2733 : 48 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2734 : : RSHIFT_EXPR, container,
2735 : : build_int_cst (sizetype, shift_n));
2736 : 48 : shifted = gimple_assign_lhs (pattern_stmt);
2737 : 48 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2738 : : }
2739 : :
2740 : 408 : tree mask = wide_int_to_tree (container_type,
2741 : 408 : wi::mask (mask_width, false, prec));
2742 : :
2743 : 408 : pattern_stmt
2744 : 408 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2745 : : BIT_AND_EXPR, shifted, mask);
2746 : 408 : result = gimple_assign_lhs (pattern_stmt);
2747 : : }
2748 : : else
2749 : : {
2750 : 1074 : tree temp = vect_recog_temp_ssa_var (container_type);
2751 : 1074 : if (!ref_sext)
2752 : : {
2753 : 421 : tree mask = wide_int_to_tree (container_type,
2754 : 421 : wi::shifted_mask (shift_n,
2755 : : mask_width,
2756 : : false, prec));
2757 : 421 : pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2758 : : container, mask);
2759 : : }
2760 : : else
2761 : : {
2762 : 653 : HOST_WIDE_INT shl = prec - shift_n - mask_width;
2763 : 653 : shift_n += shl;
2764 : 653 : pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2765 : : container,
2766 : : build_int_cst (sizetype,
2767 : : shl));
2768 : : }
2769 : :
2770 : 1074 : tree masked = gimple_assign_lhs (pattern_stmt);
2771 : 1074 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2772 : 1074 : pattern_stmt
2773 : 1074 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2774 : : RSHIFT_EXPR, masked,
2775 : : build_int_cst (sizetype, shift_n));
2776 : 1074 : result = gimple_assign_lhs (pattern_stmt);
2777 : : }
2778 : :
2779 : 1482 : if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2780 : : {
2781 : 869 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2782 : 869 : pattern_stmt
2783 : 869 : = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
2784 : : NOP_EXPR, result);
2785 : : }
2786 : :
2787 : 1482 : if (!lhs)
2788 : : {
2789 : 536 : if (!vectype)
2790 : : return NULL;
2791 : :
2792 : 428 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
2793 : 428 : vectype = truth_type_for (vectype);
2794 : :
2795 : : /* FIXME: This part extracts the boolean value out of the bitfield in the
2796 : : same way as vect_recog_gcond_pattern does. However because
2797 : : patterns cannot match the same root twice, when we handle and
2798 : : lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2799 : : apply anymore. We should really fix it so that we don't need to
2800 : : duplicate transformations like these. */
2801 : 428 : tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2802 : 428 : gcond *cond_stmt = dyn_cast <gcond *> (stmt_info->stmt);
2803 : 428 : tree cond_cst = gimple_cond_rhs (cond_stmt);
2804 : 428 : gimple *new_stmt
2805 : 428 : = gimple_build_assign (new_lhs, gimple_cond_code (cond_stmt),
2806 : : gimple_get_lhs (pattern_stmt),
2807 : : fold_convert (container_type, cond_cst));
2808 : 428 : append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, container_type);
2809 : 428 : pattern_stmt
2810 : 428 : = gimple_build_cond (NE_EXPR, new_lhs,
2811 : 428 : build_zero_cst (TREE_TYPE (new_lhs)),
2812 : : NULL_TREE, NULL_TREE);
2813 : : }
2814 : :
2815 : 1374 : *type_out = STMT_VINFO_VECTYPE (stmt_info);
2816 : 1374 : vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
2817 : :
2818 : 1374 : return pattern_stmt;
2819 : : }
2820 : :
2821 : : /* Function vect_recog_bit_insert_pattern
2822 : :
2823 : : Try to find the following pattern:
2824 : :
2825 : : written = BIT_INSERT_EXPR (container, value, bitpos);
2826 : :
2827 : : Input:
2828 : :
2829 : : * STMT_VINFO: The stmt we want to replace.
2830 : :
2831 : : Output:
2832 : :
2833 : : * TYPE_OUT: The vector type of the output of this pattern.
2834 : :
2835 : : * Return value: A new stmt that will be used to replace the sequence of
2836 : : stmts that constitute the pattern. In this case it will be:
2837 : : value = (container_type) value; // Make sure
2838 : : shifted = value << bitpos; // Shift value into place
2839 : : masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2840 : : // the 'to-write value'.
2841 : : cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2842 : : // write to from the value we want
2843 : : // to write to.
2844 : : written = cleared | masked; // Write bits.
2845 : :
2846 : :
2847 : : where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2848 : : bits corresponding to the real size of the bitfield value we are writing to.
2849 : : The shifting is always optional depending on whether bitpos != 0.
2850 : :
2851 : : */
2852 : :
2853 : : static gimple *
2854 : 24912925 : vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2855 : : tree *type_out)
2856 : : {
2857 : 24912925 : gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
2858 : 23315176 : if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
2859 : : return NULL;
2860 : :
2861 : 1278 : tree container = gimple_assign_rhs1 (bf_stmt);
2862 : 1278 : tree value = gimple_assign_rhs2 (bf_stmt);
2863 : 1278 : tree shift = gimple_assign_rhs3 (bf_stmt);
2864 : :
2865 : 1278 : tree bf_type = TREE_TYPE (value);
2866 : 1278 : tree container_type = TREE_TYPE (container);
2867 : :
2868 : 1278 : if (!INTEGRAL_TYPE_P (container_type)
2869 : 1278 : || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2870 : : return NULL;
2871 : :
2872 : 650 : gimple *pattern_stmt;
2873 : :
2874 : 650 : vect_unpromoted_value unprom;
2875 : 650 : unprom.set_op (value, vect_internal_def);
2876 : 650 : value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
2877 : : get_vectype_for_scalar_type (vinfo,
2878 : : container_type));
2879 : :
2880 : 650 : unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2881 : 650 : unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2882 : 650 : unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2883 : 650 : if (BYTES_BIG_ENDIAN)
2884 : : {
2885 : : shift_n = prec - shift_n - mask_width;
2886 : : shift = build_int_cst (TREE_TYPE (shift), shift_n);
2887 : : }
2888 : :
2889 : 650 : if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2890 : : {
2891 : 0 : pattern_stmt =
2892 : 0 : gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2893 : : NOP_EXPR, value);
2894 : 0 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2895 : 0 : value = gimple_get_lhs (pattern_stmt);
2896 : : }
2897 : :
2898 : : /* Shift VALUE into place. */
2899 : 650 : tree shifted = value;
2900 : 650 : if (shift_n)
2901 : : {
2902 : 360 : gimple_seq stmts = NULL;
2903 : 360 : shifted
2904 : 360 : = gimple_build (&stmts, LSHIFT_EXPR, container_type, value, shift);
2905 : 360 : if (!gimple_seq_empty_p (stmts))
2906 : 112 : append_pattern_def_seq (vinfo, stmt_info,
2907 : : gimple_seq_first_stmt (stmts));
2908 : : }
2909 : :
2910 : 650 : tree mask_t
2911 : 650 : = wide_int_to_tree (container_type,
2912 : 650 : wi::shifted_mask (shift_n, mask_width, false, prec));
2913 : :
2914 : : /* Clear bits we don't want to write back from SHIFTED. */
2915 : 650 : gimple_seq stmts = NULL;
2916 : 650 : tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
2917 : : mask_t);
2918 : 650 : if (!gimple_seq_empty_p (stmts))
2919 : : {
2920 : 110 : pattern_stmt = gimple_seq_first_stmt (stmts);
2921 : 110 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2922 : : }
2923 : :
2924 : : /* Mask off the bits in the container that we are to write to. */
2925 : 650 : mask_t = wide_int_to_tree (container_type,
2926 : 650 : wi::shifted_mask (shift_n, mask_width, true, prec));
2927 : 650 : tree cleared = vect_recog_temp_ssa_var (container_type);
2928 : 650 : pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2929 : 650 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
2930 : :
2931 : : /* Write MASKED into CLEARED. */
2932 : 650 : pattern_stmt
2933 : 650 : = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
2934 : : BIT_IOR_EXPR, cleared, masked);
2935 : :
2936 : 650 : *type_out = STMT_VINFO_VECTYPE (stmt_info);
2937 : 650 : vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
2938 : :
2939 : 650 : return pattern_stmt;
2940 : : }
2941 : :
2942 : :
2943 : : /* Recognize cases in which an operation is performed in one type WTYPE
2944 : : but could be done more efficiently in a narrower type NTYPE. For example,
2945 : : if we have:
2946 : :
2947 : : ATYPE a; // narrower than NTYPE
2948 : : BTYPE b; // narrower than NTYPE
2949 : : WTYPE aw = (WTYPE) a;
2950 : : WTYPE bw = (WTYPE) b;
2951 : : WTYPE res = aw + bw; // only uses of aw and bw
2952 : :
2953 : : then it would be more efficient to do:
2954 : :
2955 : : NTYPE an = (NTYPE) a;
2956 : : NTYPE bn = (NTYPE) b;
2957 : : NTYPE resn = an + bn;
2958 : : WTYPE res = (WTYPE) resn;
2959 : :
2960 : : Other situations include things like:
2961 : :
2962 : : ATYPE a; // NTYPE or narrower
2963 : : WTYPE aw = (WTYPE) a;
2964 : : WTYPE res = aw + b;
2965 : :
2966 : : when only "(NTYPE) res" is significant. In that case it's more efficient
2967 : : to truncate "b" and do the operation on NTYPE instead:
2968 : :
2969 : : NTYPE an = (NTYPE) a;
2970 : : NTYPE bn = (NTYPE) b; // truncation
2971 : : NTYPE resn = an + bn;
2972 : : WTYPE res = (WTYPE) resn;
2973 : :
2974 : : All users of "res" should then use "resn" instead, making the final
2975 : : statement dead (not marked as relevant). The final statement is still
2976 : : needed to maintain the type correctness of the IR.
2977 : :
2978 : : vect_determine_precisions has already determined the minimum
2979 : : precison of the operation and the minimum precision required
2980 : : by users of the result. */
2981 : :
2982 : : static gimple *
2983 : 24913365 : vect_recog_over_widening_pattern (vec_info *vinfo,
2984 : : stmt_vec_info last_stmt_info, tree *type_out)
2985 : : {
2986 : 24913365 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2987 : 17100933 : if (!last_stmt)
2988 : : return NULL;
2989 : :
2990 : : /* See whether we have found that this operation can be done on a
2991 : : narrower type without changing its semantics. */
2992 : 17100933 : unsigned int new_precision = last_stmt_info->operation_precision;
2993 : 17100933 : if (!new_precision)
2994 : : return NULL;
2995 : :
2996 : 950952 : tree lhs = gimple_assign_lhs (last_stmt);
2997 : 950952 : tree type = TREE_TYPE (lhs);
2998 : 950952 : tree_code code = gimple_assign_rhs_code (last_stmt);
2999 : :
3000 : : /* Punt for reductions where we don't handle the type conversions. */
3001 : 950952 : if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
3002 : : return NULL;
3003 : :
3004 : : /* Keep the first operand of a COND_EXPR as-is: only the other two
3005 : : operands are interesting. */
3006 : 948986 : unsigned int first_op = (code == COND_EXPR ? 2 : 1);
3007 : :
3008 : : /* Check the operands. */
3009 : 948986 : unsigned int nops = gimple_num_ops (last_stmt) - first_op;
3010 : 948986 : auto_vec <vect_unpromoted_value, 3> unprom (nops);
3011 : 948986 : unprom.quick_grow_cleared (nops);
3012 : 948986 : unsigned int min_precision = 0;
3013 : 948986 : bool single_use_p = false;
3014 : 2835317 : for (unsigned int i = 0; i < nops; ++i)
3015 : : {
3016 : 1886795 : tree op = gimple_op (last_stmt, first_op + i);
3017 : 1886795 : if (TREE_CODE (op) == INTEGER_CST)
3018 : 843751 : unprom[i].set_op (op, vect_constant_def);
3019 : 1043044 : else if (TREE_CODE (op) == SSA_NAME)
3020 : : {
3021 : 1043044 : bool op_single_use_p = true;
3022 : 1043044 : if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
3023 : : &op_single_use_p))
3024 : 464 : return NULL;
3025 : : /* If:
3026 : :
3027 : : (1) N bits of the result are needed;
3028 : : (2) all inputs are widened from M<N bits; and
3029 : : (3) one operand OP is a single-use SSA name
3030 : :
3031 : : we can shift the M->N widening from OP to the output
3032 : : without changing the number or type of extensions involved.
3033 : : This then reduces the number of copies of STMT_INFO.
3034 : :
3035 : : If instead of (3) more than one operand is a single-use SSA name,
3036 : : shifting the extension to the output is even more of a win.
3037 : :
3038 : : If instead:
3039 : :
3040 : : (1) N bits of the result are needed;
3041 : : (2) one operand OP2 is widened from M2<N bits;
3042 : : (3) another operand OP1 is widened from M1<M2 bits; and
3043 : : (4) both OP1 and OP2 are single-use
3044 : :
3045 : : the choice is between:
3046 : :
3047 : : (a) truncating OP2 to M1, doing the operation on M1,
3048 : : and then widening the result to N
3049 : :
3050 : : (b) widening OP1 to M2, doing the operation on M2, and then
3051 : : widening the result to N
3052 : :
3053 : : Both shift the M2->N widening of the inputs to the output.
3054 : : (a) additionally shifts the M1->M2 widening to the output;
3055 : : it requires fewer copies of STMT_INFO but requires an extra
3056 : : M2->M1 truncation.
3057 : :
3058 : : Which is better will depend on the complexity and cost of
3059 : : STMT_INFO, which is hard to predict at this stage. However,
3060 : : a clear tie-breaker in favor of (b) is the fact that the
3061 : : truncation in (a) increases the length of the operation chain.
3062 : :
3063 : : If instead of (4) only one of OP1 or OP2 is single-use,
3064 : : (b) is still a win over doing the operation in N bits:
3065 : : it still shifts the M2->N widening on the single-use operand
3066 : : to the output and reduces the number of STMT_INFO copies.
3067 : :
3068 : : If neither operand is single-use then operating on fewer than
3069 : : N bits might lead to more extensions overall. Whether it does
3070 : : or not depends on global information about the vectorization
3071 : : region, and whether that's a good trade-off would again
3072 : : depend on the complexity and cost of the statements involved,
3073 : : as well as things like register pressure that are not normally
3074 : : modelled at this stage. We therefore ignore these cases
3075 : : and just optimize the clear single-use wins above.
3076 : :
3077 : : Thus we take the maximum precision of the unpromoted operands
3078 : : and record whether any operand is single-use. */
3079 : 1042580 : if (unprom[i].dt == vect_internal_def)
3080 : : {
3081 : 718887 : min_precision = MAX (min_precision,
3082 : : TYPE_PRECISION (unprom[i].type));
3083 : 718887 : single_use_p |= op_single_use_p;
3084 : : }
3085 : : }
3086 : : else
3087 : : return NULL;
3088 : : }
3089 : :
3090 : : /* Although the operation could be done in operation_precision, we have
3091 : : to balance that against introducing extra truncations or extensions.
3092 : : Calculate the minimum precision that can be handled efficiently.
3093 : :
3094 : : The loop above determined that the operation could be handled
3095 : : efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3096 : : extension from the inputs to the output without introducing more
3097 : : instructions, and would reduce the number of instructions required
3098 : : for STMT_INFO itself.
3099 : :
3100 : : vect_determine_precisions has also determined that the result only
3101 : : needs min_output_precision bits. Truncating by a factor of N times
3102 : : requires a tree of N - 1 instructions, so if TYPE is N times wider
3103 : : than min_output_precision, doing the operation in TYPE and truncating
3104 : : the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3105 : : In contrast:
3106 : :
3107 : : - truncating the input to a unary operation and doing the operation
3108 : : in the new type requires at most N - 1 + 1 = N instructions per
3109 : : output vector
3110 : :
3111 : : - doing the same for a binary operation requires at most
3112 : : (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3113 : :
3114 : : Both unary and binary operations require fewer instructions than
3115 : : this if the operands were extended from a suitable truncated form.
3116 : : Thus there is usually nothing to lose by doing operations in
3117 : : min_output_precision bits, but there can be something to gain. */
3118 : 948522 : if (!single_use_p)
3119 : 695647 : min_precision = last_stmt_info->min_output_precision;
3120 : : else
3121 : 252875 : min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3122 : :
3123 : : /* Apply the minimum efficient precision we just calculated. */
3124 : 948522 : if (new_precision < min_precision)
3125 : 843302 : new_precision = min_precision;
3126 : 948522 : new_precision = vect_element_precision (new_precision);
3127 : 948522 : if (new_precision >= TYPE_PRECISION (type))
3128 : : return NULL;
3129 : :
3130 : 120936 : vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
3131 : :
3132 : 120936 : *type_out = get_vectype_for_scalar_type (vinfo, type);
3133 : 120936 : if (!*type_out)
3134 : : return NULL;
3135 : :
3136 : : /* We've found a viable pattern. Get the new type of the operation. */
3137 : 109221 : bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3138 : 109221 : tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3139 : :
3140 : : /* If we're truncating an operation, we need to make sure that we
3141 : : don't introduce new undefined overflow. The codes tested here are
3142 : : a subset of those accepted by vect_truncatable_operation_p. */
3143 : 109221 : tree op_type = new_type;
3144 : 109221 : if (TYPE_OVERFLOW_UNDEFINED (new_type)
3145 : 141760 : && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3146 : 24481 : op_type = build_nonstandard_integer_type (new_precision, true);
3147 : :
3148 : : /* We specifically don't check here whether the target supports the
3149 : : new operation, since it might be something that a later pattern
3150 : : wants to rewrite anyway. If targets have a minimum element size
3151 : : for some optabs, we should pattern-match smaller ops to larger ops
3152 : : where beneficial. */
3153 : 109221 : tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3154 : 109221 : tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3155 : 109221 : if (!new_vectype || !op_vectype)
3156 : : return NULL;
3157 : :
3158 : 109221 : if (dump_enabled_p ())
3159 : 4086 : dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3160 : : type, new_type);
3161 : :
3162 : : /* Calculate the rhs operands for an operation on OP_TYPE. */
3163 : 109221 : tree ops[3] = {};
3164 : 109311 : for (unsigned int i = 1; i < first_op; ++i)
3165 : 90 : ops[i - 1] = gimple_op (last_stmt, i);
3166 : 109221 : vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
3167 : 109221 : op_type, &unprom[0], op_vectype);
3168 : :
3169 : : /* Use the operation to produce a result of type OP_TYPE. */
3170 : 109221 : tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
3171 : 109221 : gimple *pattern_stmt = gimple_build_assign (new_var, code,
3172 : : ops[0], ops[1], ops[2]);
3173 : 109221 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3174 : :
3175 : 109221 : if (dump_enabled_p ())
3176 : 4086 : dump_printf_loc (MSG_NOTE, vect_location,
3177 : : "created pattern stmt: %G", pattern_stmt);
3178 : :
3179 : : /* Convert back to the original signedness, if OP_TYPE is different
3180 : : from NEW_TYPE. */
3181 : 109221 : if (op_type != new_type)
3182 : 24481 : pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
3183 : : pattern_stmt, op_vectype);
3184 : :
3185 : : /* Promote the result to the original type. */
3186 : 109221 : pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
3187 : : pattern_stmt, new_vectype);
3188 : :
3189 : 109221 : return pattern_stmt;
3190 : 948986 : }
3191 : :
3192 : : /* Recognize the following patterns:
3193 : :
3194 : : ATYPE a; // narrower than TYPE
3195 : : BTYPE b; // narrower than TYPE
3196 : :
3197 : : 1) Multiply high with scaling
3198 : : TYPE res = ((TYPE) a * (TYPE) b) >> c;
3199 : : Here, c is bitsize (TYPE) / 2 - 1.
3200 : :
3201 : : 2) ... or also with rounding
3202 : : TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3203 : : Here, d is bitsize (TYPE) / 2 - 2.
3204 : :
3205 : : 3) Normal multiply high
3206 : : TYPE res = ((TYPE) a * (TYPE) b) >> e;
3207 : : Here, e is bitsize (TYPE) / 2.
3208 : :
3209 : : where only the bottom half of res is used. */
3210 : :
3211 : : static gimple *
3212 : 25017182 : vect_recog_mulhs_pattern (vec_info *vinfo,
3213 : : stmt_vec_info last_stmt_info, tree *type_out)
3214 : : {
3215 : : /* Check for a right shift. */
3216 : 25017182 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3217 : 17204628 : if (!last_stmt
3218 : 17204628 : || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
3219 : : return NULL;
3220 : :
3221 : : /* Check that the shift result is wider than the users of the
3222 : : result need (i.e. that narrowing would be a natural choice). */
3223 : 281105 : tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3224 : 281105 : unsigned int target_precision
3225 : 281105 : = vect_element_precision (last_stmt_info->min_output_precision);
3226 : 281105 : if (!INTEGRAL_TYPE_P (lhs_type)
3227 : 281105 : || target_precision >= TYPE_PRECISION (lhs_type))
3228 : : return NULL;
3229 : :
3230 : : /* Look through any change in sign on the outer shift input. */
3231 : 39296 : vect_unpromoted_value unprom_rshift_input;
3232 : 39296 : tree rshift_input = vect_look_through_possible_promotion
3233 : 39296 : (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
3234 : 39296 : if (!rshift_input
3235 : 39296 : || TYPE_PRECISION (TREE_TYPE (rshift_input))
3236 : 38630 : != TYPE_PRECISION (lhs_type))
3237 : : return NULL;
3238 : :
3239 : : /* Get the definition of the shift input. */
3240 : 36526 : stmt_vec_info rshift_input_stmt_info
3241 : 36526 : = vect_get_internal_def (vinfo, rshift_input);
3242 : 36526 : if (!rshift_input_stmt_info)
3243 : : return NULL;
3244 : 32261 : gassign *rshift_input_stmt
3245 : 25045528 : = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
3246 : 28415 : if (!rshift_input_stmt)
3247 : : return NULL;
3248 : :
3249 : 28415 : stmt_vec_info mulh_stmt_info;
3250 : 28415 : tree scale_term;
3251 : 28415 : bool rounding_p = false;
3252 : :
3253 : : /* Check for the presence of the rounding term. */
3254 : 34517 : if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
3255 : : {
3256 : : /* Check that the outer shift was by 1. */
3257 : 16582 : if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
3258 : 8243 : return NULL;
3259 : :
3260 : : /* Check that the second operand of the PLUS_EXPR is 1. */
3261 : 1704 : if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
3262 : : return NULL;
3263 : :
3264 : : /* Look through any change in sign on the addition input. */
3265 : 91 : vect_unpromoted_value unprom_plus_input;
3266 : 91 : tree plus_input = vect_look_through_possible_promotion
3267 : 91 : (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
3268 : 91 : if (!plus_input
3269 : 91 : || TYPE_PRECISION (TREE_TYPE (plus_input))
3270 : 91 : != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3271 : : return NULL;
3272 : :
3273 : : /* Get the definition of the multiply-high-scale part. */
3274 : 91 : stmt_vec_info plus_input_stmt_info
3275 : 91 : = vect_get_internal_def (vinfo, plus_input);
3276 : 91 : if (!plus_input_stmt_info)
3277 : : return NULL;
3278 : 91 : gassign *plus_input_stmt
3279 : 8334 : = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
3280 : 91 : if (!plus_input_stmt
3281 : 91 : || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
3282 : : return NULL;
3283 : :
3284 : : /* Look through any change in sign on the scaling input. */
3285 : 48 : vect_unpromoted_value unprom_scale_input;
3286 : 48 : tree scale_input = vect_look_through_possible_promotion
3287 : 48 : (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
3288 : 48 : if (!scale_input
3289 : 48 : || TYPE_PRECISION (TREE_TYPE (scale_input))
3290 : 48 : != TYPE_PRECISION (TREE_TYPE (plus_input)))
3291 : : return NULL;
3292 : :
3293 : : /* Get the definition of the multiply-high part. */
3294 : 48 : mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
3295 : 48 : if (!mulh_stmt_info)
3296 : : return NULL;
3297 : :
3298 : : /* Get the scaling term. */
3299 : 48 : scale_term = gimple_assign_rhs2 (plus_input_stmt);
3300 : 48 : rounding_p = true;
3301 : : }
3302 : : else
3303 : : {
3304 : 20124 : mulh_stmt_info = rshift_input_stmt_info;
3305 : 20124 : scale_term = gimple_assign_rhs2 (last_stmt);
3306 : : }
3307 : :
3308 : : /* Check that the scaling factor is constant. */
3309 : 20172 : if (TREE_CODE (scale_term) != INTEGER_CST)
3310 : : return NULL;
3311 : :
3312 : : /* Check whether the scaling input term can be seen as two widened
3313 : : inputs multiplied together. */
3314 : 57702 : vect_unpromoted_value unprom_mult[2];
3315 : 19234 : tree new_type;
3316 : 19234 : unsigned int nops
3317 : 19234 : = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
3318 : : false, 2, unprom_mult, &new_type);
3319 : 19234 : if (nops != 2)
3320 : : return NULL;
3321 : :
3322 : : /* Adjust output precision. */
3323 : 2248 : if (TYPE_PRECISION (new_type) < target_precision)
3324 : 0 : new_type = build_nonstandard_integer_type
3325 : 0 : (target_precision, TYPE_UNSIGNED (new_type));
3326 : :
3327 : 2248 : unsigned mult_precision = TYPE_PRECISION (new_type);
3328 : 2248 : internal_fn ifn;
3329 : : /* Check that the scaling factor is expected. Instead of
3330 : : target_precision, we should use the one that we actually
3331 : : use for internal function. */
3332 : 2248 : if (rounding_p)
3333 : : {
3334 : : /* Check pattern 2). */
3335 : 96 : if (wi::to_widest (scale_term) + mult_precision + 2
3336 : 144 : != TYPE_PRECISION (lhs_type))
3337 : : return NULL;
3338 : :
3339 : : ifn = IFN_MULHRS;
3340 : : }
3341 : : else
3342 : : {
3343 : : /* Check for pattern 1). */
3344 : 4400 : if (wi::to_widest (scale_term) + mult_precision + 1
3345 : 6600 : == TYPE_PRECISION (lhs_type))
3346 : : ifn = IFN_MULHS;
3347 : : /* Check for pattern 3). */
3348 : 2156 : else if (wi::to_widest (scale_term) + mult_precision
3349 : 4312 : == TYPE_PRECISION (lhs_type))
3350 : : ifn = IFN_MULH;
3351 : : else
3352 : : return NULL;
3353 : : }
3354 : :
3355 : 2176 : vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
3356 : :
3357 : : /* Check for target support. */
3358 : 2176 : tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3359 : 2176 : if (!new_vectype
3360 : 4336 : || !direct_internal_fn_supported_p
3361 : 2160 : (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3362 : 2107 : return NULL;
3363 : :
3364 : : /* The IR requires a valid vector type for the cast result, even though
3365 : : it's likely to be discarded. */
3366 : 69 : *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3367 : 69 : if (!*type_out)
3368 : : return NULL;
3369 : :
3370 : : /* Generate the IFN_MULHRS call. */
3371 : 69 : tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3372 : 69 : tree new_ops[2];
3373 : 69 : vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3374 : : unprom_mult, new_vectype);
3375 : 69 : gcall *mulhrs_stmt
3376 : 69 : = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3377 : 69 : gimple_call_set_lhs (mulhrs_stmt, new_var);
3378 : 69 : gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
3379 : :
3380 : 69 : if (dump_enabled_p ())
3381 : 0 : dump_printf_loc (MSG_NOTE, vect_location,
3382 : : "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3383 : :
3384 : 69 : return vect_convert_output (vinfo, last_stmt_info, lhs_type,
3385 : 69 : mulhrs_stmt, new_vectype);
3386 : : }
3387 : :
3388 : : /* Recognize the patterns:
3389 : :
3390 : : ATYPE a; // narrower than TYPE
3391 : : BTYPE b; // narrower than TYPE
3392 : : (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3393 : : or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3394 : :
3395 : : where only the bottom half of avg is used. Try to transform them into:
3396 : :
3397 : : (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3398 : : or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3399 : :
3400 : : followed by:
3401 : :
3402 : : TYPE avg = (TYPE) avg';
3403 : :
3404 : : where NTYPE is no wider than half of TYPE. Since only the bottom half
3405 : : of avg is used, all or part of the cast of avg' should become redundant.
3406 : :
3407 : : If there is no target support available, generate code to distribute rshift
3408 : : over plus and add a carry. */
3409 : :
3410 : : static gimple *
3411 : 25015492 : vect_recog_average_pattern (vec_info *vinfo,
3412 : : stmt_vec_info last_stmt_info, tree *type_out)
3413 : : {
3414 : : /* Check for a shift right by one bit. */
3415 : 25015492 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3416 : 17203060 : if (!last_stmt
3417 : 17203060 : || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
3418 : 280961 : || !integer_onep (gimple_assign_rhs2 (last_stmt)))
3419 : 24975475 : return NULL;
3420 : :
3421 : : /* Check that the shift result is wider than the users of the
3422 : : result need (i.e. that narrowing would be a natural choice). */
3423 : 40017 : tree lhs = gimple_assign_lhs (last_stmt);
3424 : 40017 : tree type = TREE_TYPE (lhs);
3425 : 40017 : unsigned int target_precision
3426 : 40017 : = vect_element_precision (last_stmt_info->min_output_precision);
3427 : 40017 : if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3428 : : return NULL;
3429 : :
3430 : : /* Look through any change in sign on the shift input. */
3431 : 2455 : tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
3432 : 2455 : vect_unpromoted_value unprom_plus;
3433 : 2455 : rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
3434 : : &unprom_plus);
3435 : 2455 : if (!rshift_rhs
3436 : 2455 : || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3437 : : return NULL;
3438 : :
3439 : : /* Get the definition of the shift input. */
3440 : 2445 : stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
3441 : 2445 : if (!plus_stmt_info)
3442 : : return NULL;
3443 : :
3444 : : /* Check whether the shift input can be seen as a tree of additions on
3445 : : 2 or 3 widened inputs.
3446 : :
3447 : : Note that the pattern should be a win even if the result of one or
3448 : : more additions is reused elsewhere: if the pattern matches, we'd be
3449 : : replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3450 : 9708 : internal_fn ifn = IFN_AVG_FLOOR;
3451 : 9708 : vect_unpromoted_value unprom[3];
3452 : 2427 : tree new_type;
3453 : 2427 : unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
3454 : : IFN_VEC_WIDEN_PLUS, false, 3,
3455 : : unprom, &new_type);
3456 : 2427 : if (nops == 0)
3457 : : return NULL;
3458 : 1104 : if (nops == 3)
3459 : : {
3460 : : /* Check that one operand is 1. */
3461 : : unsigned int i;
3462 : 921 : for (i = 0; i < 3; ++i)
3463 : 861 : if (integer_onep (unprom[i].op))
3464 : : break;
3465 : 287 : if (i == 3)
3466 : : return NULL;
3467 : : /* Throw away the 1 operand and keep the other two. */
3468 : 227 : if (i < 2)
3469 : 0 : unprom[i] = unprom[2];
3470 : : ifn = IFN_AVG_CEIL;
3471 : : }
3472 : :
3473 : 1044 : vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
3474 : :
3475 : : /* We know that:
3476 : :
3477 : : (a) the operation can be viewed as:
3478 : :
3479 : : TYPE widened0 = (TYPE) UNPROM[0];
3480 : : TYPE widened1 = (TYPE) UNPROM[1];
3481 : : TYPE tmp1 = widened0 + widened1 {+ 1};
3482 : : TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3483 : :
3484 : : (b) the first two statements are equivalent to:
3485 : :
3486 : : TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3487 : : TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3488 : :
3489 : : (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3490 : : where sensible;
3491 : :
3492 : : (d) all the operations can be performed correctly at twice the width of
3493 : : NEW_TYPE, due to the nature of the average operation; and
3494 : :
3495 : : (e) users of the result of the right shift need only TARGET_PRECISION
3496 : : bits, where TARGET_PRECISION is no more than half of TYPE's
3497 : : precision.
3498 : :
3499 : : Under these circumstances, the only situation in which NEW_TYPE
3500 : : could be narrower than TARGET_PRECISION is if widened0, widened1
3501 : : and an addition result are all used more than once. Thus we can
3502 : : treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3503 : : as "free", whereas widening the result of the average instruction
3504 : : from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3505 : : therefore better not to go narrower than TARGET_PRECISION. */
3506 : 1044 : if (TYPE_PRECISION (new_type) < target_precision)
3507 : 8 : new_type = build_nonstandard_integer_type (target_precision,
3508 : 8 : TYPE_UNSIGNED (new_type));
3509 : :
3510 : : /* Check for target support. */
3511 : 1044 : tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3512 : 1044 : if (!new_vectype)
3513 : : return NULL;
3514 : :
3515 : 1044 : bool fallback_p = false;
3516 : :
3517 : 1044 : if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3518 : : ;
3519 : 922 : else if (TYPE_UNSIGNED (new_type)
3520 : 267 : && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3521 : 267 : && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3522 : 267 : && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3523 : 1189 : && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3524 : : fallback_p = true;
3525 : : else
3526 : 655 : return NULL;
3527 : :
3528 : : /* The IR requires a valid vector type for the cast result, even though
3529 : : it's likely to be discarded. */
3530 : 389 : *type_out = get_vectype_for_scalar_type (vinfo, type);
3531 : 389 : if (!*type_out)
3532 : : return NULL;
3533 : :
3534 : 388 : tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
3535 : 388 : tree new_ops[2];
3536 : 388 : vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
3537 : : unprom, new_vectype);
3538 : :
3539 : 388 : if (fallback_p)
3540 : : {
3541 : : /* As a fallback, generate code for following sequence:
3542 : :
3543 : : shifted_op0 = new_ops[0] >> 1;
3544 : : shifted_op1 = new_ops[1] >> 1;
3545 : : sum_of_shifted = shifted_op0 + shifted_op1;
3546 : : unmasked_carry = new_ops[0] and/or new_ops[1];
3547 : : carry = unmasked_carry & 1;
3548 : : new_var = sum_of_shifted + carry;
3549 : : */
3550 : :
3551 : 266 : tree one_cst = build_one_cst (new_type);
3552 : 266 : gassign *g;
3553 : :
3554 : 266 : tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
3555 : 266 : g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3556 : 266 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3557 : :
3558 : 266 : tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
3559 : 266 : g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3560 : 266 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3561 : :
3562 : 266 : tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
3563 : 266 : g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3564 : : shifted_op0, shifted_op1);
3565 : 266 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3566 : :
3567 : 266 : tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
3568 : 266 : tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3569 : 266 : g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3570 : 266 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3571 : :
3572 : 266 : tree carry = vect_recog_temp_ssa_var (new_type, NULL);
3573 : 266 : g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3574 : 266 : append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
3575 : :
3576 : 266 : g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3577 : 266 : return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
3578 : : }
3579 : :
3580 : : /* Generate the IFN_AVG* call. */
3581 : 122 : gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3582 : : new_ops[1]);
3583 : 122 : gimple_call_set_lhs (average_stmt, new_var);
3584 : 122 : gimple_set_location (average_stmt, gimple_location (last_stmt));
3585 : :
3586 : 122 : if (dump_enabled_p ())
3587 : 42 : dump_printf_loc (MSG_NOTE, vect_location,
3588 : : "created pattern stmt: %G", (gimple *) average_stmt);
3589 : :
3590 : 122 : return vect_convert_output (vinfo, last_stmt_info,
3591 : 122 : type, average_stmt, new_vectype);
3592 : : }
3593 : :
3594 : : /* Recognize cases in which the input to a cast is wider than its
3595 : : output, and the input is fed by a widening operation. Fold this
3596 : : by removing the unnecessary intermediate widening. E.g.:
3597 : :
3598 : : unsigned char a;
3599 : : unsigned int b = (unsigned int) a;
3600 : : unsigned short c = (unsigned short) b;
3601 : :
3602 : : -->
3603 : :
3604 : : unsigned short c = (unsigned short) a;
3605 : :
3606 : : Although this is rare in input IR, it is an expected side-effect
3607 : : of the over-widening pattern above.
3608 : :
3609 : : This is beneficial also for integer-to-float conversions, if the
3610 : : widened integer has more bits than the float, and if the unwidened
3611 : : input doesn't. */
3612 : :
3613 : : static gimple *
3614 : 25017182 : vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3615 : : stmt_vec_info last_stmt_info, tree *type_out)
3616 : : {
3617 : : /* Check for a cast, including an integer-to-float conversion. */
3618 : 42183913 : gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
3619 : 17204559 : if (!last_stmt)
3620 : : return NULL;
3621 : 17204559 : tree_code code = gimple_assign_rhs_code (last_stmt);
3622 : 17204559 : if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3623 : : return NULL;
3624 : :
3625 : : /* Make sure that the rhs is a scalar with a natural bitsize. */
3626 : 2423568 : tree lhs = gimple_assign_lhs (last_stmt);
3627 : 2423568 : if (!lhs)
3628 : : return NULL;
3629 : 2423568 : tree lhs_type = TREE_TYPE (lhs);
3630 : 2423568 : scalar_mode lhs_mode;
3631 : 2406823 : if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3632 : 4827818 : || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
3633 : 24590 : return NULL;
3634 : :
3635 : : /* Check for a narrowing operation (from a vector point of view). */
3636 : 2398978 : tree rhs = gimple_assign_rhs1 (last_stmt);
3637 : 2398978 : tree rhs_type = TREE_TYPE (rhs);
3638 : 2398978 : if (!INTEGRAL_TYPE_P (rhs_type)
3639 : 2150800 : || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3640 : 6541028 : || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
3641 : : return NULL;
3642 : :
3643 : : /* Try to find an unpromoted input. */
3644 : 295997 : vect_unpromoted_value unprom;
3645 : 295997 : if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
3646 : 295997 : || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3647 : : return NULL;
3648 : :
3649 : : /* If the bits above RHS_TYPE matter, make sure that they're the
3650 : : same when extending from UNPROM as they are when extending from RHS. */
3651 : 37948 : if (!INTEGRAL_TYPE_P (lhs_type)
3652 : 37948 : && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3653 : : return NULL;
3654 : :
3655 : : /* We can get the same result by casting UNPROM directly, to avoid
3656 : : the unnecessary widening and narrowing. */
3657 : 37828 : vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
3658 : :
3659 : 37828 : *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3660 : 37828 : if (!*type_out)
3661 : : return NULL;
3662 : :
3663 : 37828 : tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
3664 : 37828 : gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3665 : 37828 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3666 : :
3667 : 37828 : return pattern_stmt;
3668 : : }
3669 : :
3670 : : /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3671 : : to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3672 : :
3673 : : static gimple *
3674 : 24962040 : vect_recog_widen_shift_pattern (vec_info *vinfo,
3675 : : stmt_vec_info last_stmt_info, tree *type_out)
3676 : : {
3677 : 24962040 : return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3678 : : LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
3679 : 24962040 : "vect_recog_widen_shift_pattern");
3680 : : }
3681 : :
3682 : : /* Detect a rotate pattern wouldn't be otherwise vectorized:
3683 : :
3684 : : type a_t, b_t, c_t;
3685 : :
3686 : : S0 a_t = b_t r<< c_t;
3687 : :
3688 : : Input/Output:
3689 : :
3690 : : * STMT_VINFO: The stmt from which the pattern search begins,
3691 : : i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3692 : : with a sequence:
3693 : :
3694 : : S1 d_t = -c_t;
3695 : : S2 e_t = d_t & (B - 1);
3696 : : S3 f_t = b_t << c_t;
3697 : : S4 g_t = b_t >> e_t;
3698 : : S0 a_t = f_t | g_t;
3699 : :
3700 : : where B is element bitsize of type.
3701 : :
3702 : : Output:
3703 : :
3704 : : * TYPE_OUT: The type of the output of this pattern.
3705 : :
3706 : : * Return value: A new stmt that will be used to replace the rotate
3707 : : S0 stmt. */
3708 : :
3709 : : static gimple *
3710 : 24962040 : vect_recog_rotate_pattern (vec_info *vinfo,
3711 : : stmt_vec_info stmt_vinfo, tree *type_out)
3712 : : {
3713 : 24962040 : gimple *last_stmt = stmt_vinfo->stmt;
3714 : 24962040 : tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3715 : 24962040 : gimple *pattern_stmt, *def_stmt;
3716 : 24962040 : enum tree_code rhs_code;
3717 : 24962040 : enum vect_def_type dt;
3718 : 24962040 : optab optab1, optab2;
3719 : 24962040 : edge ext_def = NULL;
3720 : 24962040 : bool bswap16_p = false;
3721 : :
3722 : 24962040 : if (is_gimple_assign (last_stmt))
3723 : : {
3724 : 17149388 : rhs_code = gimple_assign_rhs_code (last_stmt);
3725 : 17149388 : switch (rhs_code)
3726 : : {
3727 : 3963 : case LROTATE_EXPR:
3728 : 3963 : case RROTATE_EXPR:
3729 : 3963 : break;
3730 : : default:
3731 : : return NULL;
3732 : : }
3733 : :
3734 : 3963 : lhs = gimple_assign_lhs (last_stmt);
3735 : 3963 : oprnd0 = gimple_assign_rhs1 (last_stmt);
3736 : 3963 : type = TREE_TYPE (oprnd0);
3737 : 3963 : oprnd1 = gimple_assign_rhs2 (last_stmt);
3738 : : }
3739 : 7812652 : else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3740 : : {
3741 : : /* __builtin_bswap16 (x) is another form of x r>> 8.
3742 : : The vectorizer has bswap support, but only if the argument isn't
3743 : : promoted. */
3744 : 136 : lhs = gimple_call_lhs (last_stmt);
3745 : 136 : oprnd0 = gimple_call_arg (last_stmt, 0);
3746 : 136 : type = TREE_TYPE (oprnd0);
3747 : 136 : if (!lhs
3748 : 136 : || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3749 : 136 : || TYPE_PRECISION (type) <= 16
3750 : 124 : || TREE_CODE (oprnd0) != SSA_NAME
3751 : 260 : || BITS_PER_UNIT != 8)
3752 : 19 : return NULL;
3753 : :
3754 : 124 : stmt_vec_info def_stmt_info;
3755 : 124 : if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3756 : : return NULL;
3757 : :
3758 : 124 : if (dt != vect_internal_def)
3759 : : return NULL;
3760 : :
3761 : 120 : if (gimple_assign_cast_p (def_stmt))
3762 : : {
3763 : 70 : def = gimple_assign_rhs1 (def_stmt);
3764 : 140 : if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3765 : 140 : && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3766 : : oprnd0 = def;
3767 : : }
3768 : :
3769 : 120 : type = TREE_TYPE (lhs);
3770 : 120 : vectype = get_vectype_for_scalar_type (vinfo, type);
3771 : 120 : if (vectype == NULL_TREE)
3772 : : return NULL;
3773 : :
3774 : 120 : if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3775 : : {
3776 : : /* The encoding uses one stepped pattern for each byte in the
3777 : : 16-bit word. */
3778 : 120 : vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
3779 : 480 : for (unsigned i = 0; i < 3; ++i)
3780 : 1080 : for (unsigned j = 0; j < 2; ++j)
3781 : 720 : elts.quick_push ((i + 1) * 2 - j - 1);
3782 : :
3783 : 120 : vec_perm_indices indices (elts, 1,
3784 : 120 : TYPE_VECTOR_SUBPARTS (char_vectype));
3785 : 120 : machine_mode vmode = TYPE_MODE (char_vectype);
3786 : 120 : if (can_vec_perm_const_p (vmode, vmode, indices))
3787 : : {
3788 : : /* vectorizable_bswap can handle the __builtin_bswap16 if we
3789 : : undo the argument promotion. */
3790 : 3 : if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3791 : : {
3792 : 0 : def = vect_recog_temp_ssa_var (type, NULL);
3793 : 0 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3794 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3795 : 0 : oprnd0 = def;
3796 : : }
3797 : :
3798 : : /* Pattern detected. */
3799 : 3 : vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3800 : :
3801 : 3 : *type_out = vectype;
3802 : :
3803 : : /* Pattern supported. Create a stmt to be used to replace the
3804 : : pattern, with the unpromoted argument. */
3805 : 3 : var = vect_recog_temp_ssa_var (type, NULL);
3806 : 3 : pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
3807 : : 1, oprnd0);
3808 : 3 : gimple_call_set_lhs (pattern_stmt, var);
3809 : 3 : gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
3810 : : gimple_call_fntype (last_stmt));
3811 : 3 : return pattern_stmt;
3812 : : }
3813 : 120 : }
3814 : :
3815 : 117 : oprnd1 = build_int_cst (integer_type_node, 8);
3816 : 117 : rhs_code = LROTATE_EXPR;
3817 : 117 : bswap16_p = true;
3818 : : }
3819 : : else
3820 : : return NULL;
3821 : :
3822 : 4080 : if (TREE_CODE (oprnd0) != SSA_NAME
3823 : 3958 : || !INTEGRAL_TYPE_P (type)
3824 : 8031 : || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3825 : : return NULL;
3826 : :
3827 : 3951 : stmt_vec_info def_stmt_info;
3828 : 3951 : if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3829 : : return NULL;
3830 : :
3831 : 3951 : if (dt != vect_internal_def
3832 : 3739 : && dt != vect_constant_def
3833 : 21 : && dt != vect_external_def)
3834 : : return NULL;
3835 : :
3836 : 3945 : vectype = get_vectype_for_scalar_type (vinfo, type);
3837 : 3945 : if (vectype == NULL_TREE)
3838 : : return NULL;
3839 : :
3840 : : /* If vector/vector or vector/scalar rotate is supported by the target,
3841 : : don't do anything here. */
3842 : 3802 : optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3843 : 3802 : if (optab1
3844 : 3802 : && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3845 : : {
3846 : 34 : use_rotate:
3847 : 34 : if (bswap16_p)
3848 : : {
3849 : 0 : if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3850 : : {
3851 : 0 : def = vect_recog_temp_ssa_var (type, NULL);
3852 : 0 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3853 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3854 : 0 : oprnd0 = def;
3855 : : }
3856 : :
3857 : : /* Pattern detected. */
3858 : 0 : vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3859 : :
3860 : 0 : *type_out = vectype;
3861 : :
3862 : : /* Pattern supported. Create a stmt to be used to replace the
3863 : : pattern. */
3864 : 0 : var = vect_recog_temp_ssa_var (type, NULL);
3865 : 0 : pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3866 : : oprnd1);
3867 : 0 : return pattern_stmt;
3868 : : }
3869 : : return NULL;
3870 : : }
3871 : :
3872 : 3768 : if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
3873 : : {
3874 : 3707 : optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3875 : 3707 : if (optab2
3876 : 3707 : && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3877 : 0 : goto use_rotate;
3878 : : }
3879 : :
3880 : 3768 : tree utype = unsigned_type_for (type);
3881 : 3768 : tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3882 : 3768 : if (!uvectype)
3883 : : return NULL;
3884 : :
3885 : : /* If vector/vector or vector/scalar shifts aren't supported by the target,
3886 : : don't do anything here either. */
3887 : 3768 : optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3888 : 3768 : optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3889 : 3768 : if (!optab1
3890 : 3768 : || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3891 : 331 : || !optab2
3892 : 4099 : || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3893 : : {
3894 : 3437 : if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
3895 : : return NULL;
3896 : 3394 : optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3897 : 3394 : optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3898 : 3394 : if (!optab1
3899 : 3394 : || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3900 : 2869 : || !optab2
3901 : 6263 : || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3902 : 525 : return NULL;
3903 : : }
3904 : :
3905 : 3200 : *type_out = vectype;
3906 : :
3907 : 3200 : if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3908 : : {
3909 : 101 : def = vect_recog_temp_ssa_var (utype, NULL);
3910 : 101 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3911 : 101 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3912 : 101 : oprnd0 = def;
3913 : : }
3914 : :
3915 : 3200 : if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3916 : 13 : ext_def = vect_get_external_def_edge (vinfo, oprnd1);
3917 : :
3918 : 3200 : def = NULL_TREE;
3919 : 3200 : scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3920 : 3200 : if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3921 : : def = oprnd1;
3922 : 31 : else if (def_stmt && gimple_assign_cast_p (def_stmt))
3923 : : {
3924 : 0 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
3925 : 0 : if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3926 : 0 : && TYPE_PRECISION (TREE_TYPE (rhs1))
3927 : 0 : == TYPE_PRECISION (type))
3928 : : def = rhs1;
3929 : : }
3930 : :
3931 : 3169 : if (def == NULL_TREE)
3932 : : {
3933 : 31 : def = vect_recog_temp_ssa_var (utype, NULL);
3934 : 31 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3935 : 31 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3936 : : }
3937 : 3200 : stype = TREE_TYPE (def);
3938 : :
3939 : 3200 : if (TREE_CODE (def) == INTEGER_CST)
3940 : : {
3941 : 3086 : if (!tree_fits_uhwi_p (def)
3942 : 3086 : || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3943 : 6172 : || integer_zerop (def))
3944 : 0 : return NULL;
3945 : 3086 : def2 = build_int_cst (stype,
3946 : 3086 : GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3947 : : }
3948 : : else
3949 : : {
3950 : 114 : tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3951 : :
3952 : 114 : if (vecstype == NULL_TREE)
3953 : : return NULL;
3954 : 114 : def2 = vect_recog_temp_ssa_var (stype, NULL);
3955 : 114 : def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3956 : 114 : if (ext_def)
3957 : : {
3958 : 13 : basic_block new_bb
3959 : 13 : = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3960 : 13 : gcc_assert (!new_bb);
3961 : : }
3962 : : else
3963 : 101 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3964 : :
3965 : 114 : def2 = vect_recog_temp_ssa_var (stype, NULL);
3966 : 114 : tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3967 : 114 : def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3968 : : gimple_assign_lhs (def_stmt), mask);
3969 : 114 : if (ext_def)
3970 : : {
3971 : 13 : basic_block new_bb
3972 : 13 : = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3973 : 13 : gcc_assert (!new_bb);
3974 : : }
3975 : : else
3976 : 101 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
3977 : : }
3978 : :
3979 : 3200 : var1 = vect_recog_temp_ssa_var (utype, NULL);
3980 : 6196 : def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3981 : : ? LSHIFT_EXPR : RSHIFT_EXPR,
3982 : : oprnd0, def);
3983 : 3200 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3984 : :
3985 : 3200 : var2 = vect_recog_temp_ssa_var (utype, NULL);
3986 : 6196 : def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3987 : : ? RSHIFT_EXPR : LSHIFT_EXPR,
3988 : : oprnd0, def2);
3989 : 3200 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, uvectype);
3990 : :
3991 : : /* Pattern detected. */
3992 : 3200 : vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
3993 : :
3994 : : /* Pattern supported. Create a stmt to be used to replace the pattern. */
3995 : 3200 : var = vect_recog_temp_ssa_var (utype, NULL);
3996 : 3200 : pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3997 : :
3998 : 3200 : if (!useless_type_conversion_p (type, utype))
3999 : : {
4000 : 49 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, uvectype);
4001 : 49 : tree result = vect_recog_temp_ssa_var (type, NULL);
4002 : 49 : pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
4003 : : }
4004 : : return pattern_stmt;
4005 : : }
4006 : :
4007 : : /* Detect a vector by vector shift pattern that wouldn't be otherwise
4008 : : vectorized:
4009 : :
4010 : : type a_t;
4011 : : TYPE b_T, res_T;
4012 : :
4013 : : S1 a_t = ;
4014 : : S2 b_T = ;
4015 : : S3 res_T = b_T op a_t;
4016 : :
4017 : : where type 'TYPE' is a type with different size than 'type',
4018 : : and op is <<, >> or rotate.
4019 : :
4020 : : Also detect cases:
4021 : :
4022 : : type a_t;
4023 : : TYPE b_T, c_T, res_T;
4024 : :
4025 : : S0 c_T = ;
4026 : : S1 a_t = (type) c_T;
4027 : : S2 b_T = ;
4028 : : S3 res_T = b_T op a_t;
4029 : :
4030 : : Input/Output:
4031 : :
4032 : : * STMT_VINFO: The stmt from which the pattern search begins,
4033 : : i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4034 : : with a shift/rotate which has same type on both operands, in the
4035 : : second case just b_T op c_T, in the first case with added cast
4036 : : from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4037 : :
4038 : : Output:
4039 : :
4040 : : * TYPE_OUT: The type of the output of this pattern.
4041 : :
4042 : : * Return value: A new stmt that will be used to replace the shift/rotate
4043 : : S3 stmt. */
4044 : :
4045 : : static gimple *
4046 : 24965620 : vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4047 : : stmt_vec_info stmt_vinfo,
4048 : : tree *type_out)
4049 : : {
4050 : 24965620 : gimple *last_stmt = stmt_vinfo->stmt;
4051 : 24965620 : tree oprnd0, oprnd1, lhs, var;
4052 : 24965620 : gimple *pattern_stmt;
4053 : 24965620 : enum tree_code rhs_code;
4054 : :
4055 : 24965620 : if (!is_gimple_assign (last_stmt))
4056 : : return NULL;
4057 : :
4058 : 17153088 : rhs_code = gimple_assign_rhs_code (last_stmt);
4059 : 17153088 : switch (rhs_code)
4060 : : {
4061 : 428758 : case LSHIFT_EXPR:
4062 : 428758 : case RSHIFT_EXPR:
4063 : 428758 : case LROTATE_EXPR:
4064 : 428758 : case RROTATE_EXPR:
4065 : 428758 : break;
4066 : : default:
4067 : : return NULL;
4068 : : }
4069 : :
4070 : 428758 : lhs = gimple_assign_lhs (last_stmt);
4071 : 428758 : oprnd0 = gimple_assign_rhs1 (last_stmt);
4072 : 428758 : oprnd1 = gimple_assign_rhs2 (last_stmt);
4073 : 428758 : if (TREE_CODE (oprnd0) != SSA_NAME
4074 : 388306 : || TREE_CODE (oprnd1) != SSA_NAME
4075 : 48241 : || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4076 : 17845 : || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4077 : 17579 : || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4078 : 446337 : || TYPE_PRECISION (TREE_TYPE (lhs))
4079 : 17579 : != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4080 : 411179 : return NULL;
4081 : :
4082 : 17579 : stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
4083 : 17579 : if (!def_vinfo)
4084 : : return NULL;
4085 : :
4086 : 15704 : *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4087 : 15704 : if (*type_out == NULL_TREE)
4088 : : return NULL;
4089 : :
4090 : 10651 : tree def = NULL_TREE;
4091 : 10651 : gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
4092 : 8969 : if (def_stmt && gimple_assign_cast_p (def_stmt))
4093 : : {
4094 : 1781 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
4095 : 1781 : if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4096 : 1781 : && TYPE_PRECISION (TREE_TYPE (rhs1))
4097 : 782 : == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4098 : : {
4099 : 782 : if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4100 : 782 : >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4101 : : def = rhs1;
4102 : : else
4103 : : {
4104 : 779 : tree mask
4105 : 779 : = build_low_bits_mask (TREE_TYPE (rhs1),
4106 : 779 : TYPE_PRECISION (TREE_TYPE (oprnd1)));
4107 : 779 : def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4108 : 779 : def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4109 : 779 : tree vecstype = get_vectype_for_scalar_type (vinfo,
4110 : 779 : TREE_TYPE (rhs1));
4111 : 779 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
4112 : : }
4113 : : }
4114 : : }
4115 : :
4116 : 782 : if (def == NULL_TREE)
4117 : : {
4118 : 9869 : def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4119 : 9869 : def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4120 : 9869 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4121 : : }
4122 : :
4123 : : /* Pattern detected. */
4124 : 10651 : vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
4125 : :
4126 : : /* Pattern supported. Create a stmt to be used to replace the pattern. */
4127 : 10651 : var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4128 : 10651 : pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4129 : :
4130 : 10651 : return pattern_stmt;
4131 : : }
4132 : :
4133 : : /* Return true iff the target has a vector optab implementing the operation
4134 : : CODE on type VECTYPE. */
4135 : :
4136 : : static bool
4137 : 395383 : target_has_vecop_for_code (tree_code code, tree vectype)
4138 : : {
4139 : 395383 : optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4140 : 395383 : return voptab
4141 : 395383 : && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4142 : : }
4143 : :
4144 : : /* Verify that the target has optabs of VECTYPE to perform all the steps
4145 : : needed by the multiplication-by-immediate synthesis algorithm described by
4146 : : ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4147 : : present. Return true iff the target supports all the steps. */
4148 : :
4149 : : static bool
4150 : 197327 : target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4151 : : tree vectype, bool synth_shift_p)
4152 : : {
4153 : 197327 : if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4154 : : return false;
4155 : :
4156 : 197327 : bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
4157 : 197327 : bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
4158 : :
4159 : 197327 : if (var == negate_variant
4160 : 197327 : && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
4161 : : return false;
4162 : :
4163 : : /* If we must synthesize shifts with additions make sure that vector
4164 : : addition is available. */
4165 : 196915 : if ((var == add_variant || synth_shift_p) && !supports_vplus)
4166 : : return false;
4167 : :
4168 : 115046 : for (int i = 1; i < alg->ops; i++)
4169 : : {
4170 : 91740 : switch (alg->op[i])
4171 : : {
4172 : : case alg_shift:
4173 : : break;
4174 : 24495 : case alg_add_t_m2:
4175 : 24495 : case alg_add_t2_m:
4176 : 24495 : case alg_add_factor:
4177 : 24495 : if (!supports_vplus)
4178 : : return false;
4179 : : break;
4180 : 16394 : case alg_sub_t_m2:
4181 : 16394 : case alg_sub_t2_m:
4182 : 16394 : case alg_sub_factor:
4183 : 16394 : if (!supports_vminus)
4184 : : return false;
4185 : : break;
4186 : : case alg_unknown:
4187 : : case alg_m:
4188 : : case alg_zero:
4189 : : case alg_impossible:
4190 : : return false;
4191 : 0 : default:
4192 : 0 : gcc_unreachable ();
4193 : : }
4194 : : }
4195 : :
4196 : : return true;
4197 : : }
4198 : :
4199 : : /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4200 : : putting the final result in DEST. Append all statements but the last into
4201 : : VINFO. Return the last statement. */
4202 : :
4203 : : static gimple *
4204 : 0 : synth_lshift_by_additions (vec_info *vinfo,
4205 : : tree dest, tree op, HOST_WIDE_INT amnt,
4206 : : stmt_vec_info stmt_info)
4207 : : {
4208 : 0 : HOST_WIDE_INT i;
4209 : 0 : tree itype = TREE_TYPE (op);
4210 : 0 : tree prev_res = op;
4211 : 0 : gcc_assert (amnt >= 0);
4212 : 0 : for (i = 0; i < amnt; i++)
4213 : : {
4214 : 0 : tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
4215 : 0 : : dest;
4216 : 0 : gimple *stmt
4217 : 0 : = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4218 : 0 : prev_res = tmp_var;
4219 : 0 : if (i < amnt - 1)
4220 : 0 : append_pattern_def_seq (vinfo, stmt_info, stmt);
4221 : : else
4222 : 0 : return stmt;
4223 : : }
4224 : 0 : gcc_unreachable ();
4225 : : return NULL;
4226 : : }
4227 : :
4228 : : /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4229 : : CODE to operands OP1 and OP2, creating a new temporary SSA var in
4230 : : the process if necessary. Append the resulting assignment statements
4231 : : to the sequence in STMT_VINFO. Return the SSA variable that holds the
4232 : : result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4233 : : left shifts using additions. */
4234 : :
4235 : : static tree
4236 : 40789 : apply_binop_and_append_stmt (vec_info *vinfo,
4237 : : tree_code code, tree op1, tree op2,
4238 : : stmt_vec_info stmt_vinfo, bool synth_shift_p)
4239 : : {
4240 : 40789 : if (integer_zerop (op2)
4241 : 40789 : && (code == LSHIFT_EXPR
4242 : 35511 : || code == PLUS_EXPR))
4243 : : {
4244 : 35511 : gcc_assert (TREE_CODE (op1) == SSA_NAME);
4245 : : return op1;
4246 : : }
4247 : :
4248 : 5278 : gimple *stmt;
4249 : 5278 : tree itype = TREE_TYPE (op1);
4250 : 5278 : tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
4251 : :
4252 : 5278 : if (code == LSHIFT_EXPR
4253 : 5278 : && synth_shift_p)
4254 : : {
4255 : 0 : stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
4256 : 0 : TREE_INT_CST_LOW (op2), stmt_vinfo);
4257 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4258 : 0 : return tmp_var;
4259 : : }
4260 : :
4261 : 5278 : stmt = gimple_build_assign (tmp_var, code, op1, op2);
4262 : 5278 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4263 : 5278 : return tmp_var;
4264 : : }
4265 : :
4266 : : /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4267 : : and simple arithmetic operations to be vectorized. Record the statements
4268 : : produced in STMT_VINFO and return the last statement in the sequence or
4269 : : NULL if it's not possible to synthesize such a multiplication.
4270 : : This function mirrors the behavior of expand_mult_const in expmed.cc but
4271 : : works on tree-ssa form. */
4272 : :
4273 : : static gimple *
4274 : 199919 : vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4275 : : stmt_vec_info stmt_vinfo)
4276 : : {
4277 : 199919 : tree itype = TREE_TYPE (op);
4278 : 199919 : machine_mode mode = TYPE_MODE (itype);
4279 : 199919 : struct algorithm alg;
4280 : 199919 : mult_variant variant;
4281 : 199919 : if (!tree_fits_shwi_p (val))
4282 : : return NULL;
4283 : :
4284 : : /* Multiplication synthesis by shifts, adds and subs can introduce
4285 : : signed overflow where the original operation didn't. Perform the
4286 : : operations on an unsigned type and cast back to avoid this.
4287 : : In the future we may want to relax this for synthesis algorithms
4288 : : that we can prove do not cause unexpected overflow. */
4289 : 197327 : bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4290 : :
4291 : 41641 : tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4292 : 197327 : tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4293 : 197327 : if (!vectype)
4294 : : return NULL;
4295 : :
4296 : : /* Targets that don't support vector shifts but support vector additions
4297 : : can synthesize shifts that way. */
4298 : 197327 : bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4299 : :
4300 : 197327 : HOST_WIDE_INT hwval = tree_to_shwi (val);
4301 : : /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4302 : : The vectorizer's benefit analysis will decide whether it's beneficial
4303 : : to do this. */
4304 : 394651 : bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4305 : 197324 : ? TYPE_MODE (vectype) : mode,
4306 : : hwval, &alg, &variant, MAX_COST);
4307 : 197327 : if (!possible)
4308 : : return NULL;
4309 : :
4310 : 197327 : if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
4311 : : return NULL;
4312 : :
4313 : 23306 : tree accumulator;
4314 : :
4315 : : /* Clear out the sequence of statements so we can populate it below. */
4316 : 23306 : gimple *stmt = NULL;
4317 : :
4318 : 23306 : if (cast_to_unsigned_p)
4319 : : {
4320 : 10722 : tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
4321 : 10722 : stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4322 : 10722 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4323 : 10722 : op = tmp_op;
4324 : : }
4325 : :
4326 : 23306 : if (alg.op[0] == alg_zero)
4327 : 168 : accumulator = build_int_cst (multtype, 0);
4328 : : else
4329 : : accumulator = op;
4330 : :
4331 : 23306 : bool needs_fixup = (variant == negate_variant)
4332 : 23306 : || (variant == add_variant);
4333 : :
4334 : 114893 : for (int i = 1; i < alg.ops; i++)
4335 : : {
4336 : 91587 : tree shft_log = build_int_cst (multtype, alg.log[i]);
4337 : 91587 : tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4338 : 91587 : tree tmp_var = NULL_TREE;
4339 : :
4340 : 91587 : switch (alg.op[i])
4341 : : {
4342 : 50798 : case alg_shift:
4343 : 50798 : if (synth_shift_p)
4344 : 0 : stmt
4345 : 0 : = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
4346 : 0 : alg.log[i], stmt_vinfo);
4347 : : else
4348 : 50798 : stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4349 : : shft_log);
4350 : : break;
4351 : 19939 : case alg_add_t_m2:
4352 : 19939 : tmp_var
4353 : 19939 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
4354 : : stmt_vinfo, synth_shift_p);
4355 : 19939 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4356 : : tmp_var);
4357 : 19939 : break;
4358 : 15735 : case alg_sub_t_m2:
4359 : 15735 : tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
4360 : : shft_log, stmt_vinfo,
4361 : : synth_shift_p);
4362 : : /* In some algorithms the first step involves zeroing the
4363 : : accumulator. If subtracting from such an accumulator
4364 : : just emit the negation directly. */
4365 : 15735 : if (integer_zerop (accumulator))
4366 : 168 : stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4367 : : else
4368 : 15567 : stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4369 : : tmp_var);
4370 : : break;
4371 : 0 : case alg_add_t2_m:
4372 : 0 : tmp_var
4373 : 0 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4374 : : shft_log, stmt_vinfo, synth_shift_p);
4375 : 0 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4376 : 0 : break;
4377 : 0 : case alg_sub_t2_m:
4378 : 0 : tmp_var
4379 : 0 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4380 : : shft_log, stmt_vinfo, synth_shift_p);
4381 : 0 : stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4382 : 0 : break;
4383 : 4505 : case alg_add_factor:
4384 : 4505 : tmp_var
4385 : 4505 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4386 : : shft_log, stmt_vinfo, synth_shift_p);
4387 : 4505 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4388 : : tmp_var);
4389 : 4505 : break;
4390 : 610 : case alg_sub_factor:
4391 : 610 : tmp_var
4392 : 610 : = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
4393 : : shft_log, stmt_vinfo, synth_shift_p);
4394 : 610 : stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4395 : : accumulator);
4396 : 610 : break;
4397 : 0 : default:
4398 : 0 : gcc_unreachable ();
4399 : : }
4400 : : /* We don't want to append the last stmt in the sequence to stmt_vinfo
4401 : : but rather return it directly. */
4402 : :
4403 : 91587 : if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4404 : 79209 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4405 : 91587 : accumulator = accum_tmp;
4406 : : }
4407 : 23306 : if (variant == negate_variant)
4408 : : {
4409 : 317 : tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4410 : 317 : stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4411 : 317 : accumulator = accum_tmp;
4412 : 317 : if (cast_to_unsigned_p)
4413 : 121 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4414 : : }
4415 : 22989 : else if (variant == add_variant)
4416 : : {
4417 : 78 : tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
4418 : 78 : stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4419 : 78 : accumulator = accum_tmp;
4420 : 78 : if (cast_to_unsigned_p)
4421 : 68 : append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
4422 : : }
4423 : : /* Move back to a signed if needed. */
4424 : 23306 : if (cast_to_unsigned_p)
4425 : : {
4426 : 10722 : tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
4427 : 10722 : stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4428 : : }
4429 : :
4430 : : return stmt;
4431 : : }
4432 : :
4433 : : /* Detect multiplication by constant and convert it into a sequence of
4434 : : shifts and additions, subtractions, negations. We reuse the
4435 : : choose_mult_variant algorithms from expmed.cc
4436 : :
4437 : : Input/Output:
4438 : :
4439 : : STMT_VINFO: The stmt from which the pattern search begins,
4440 : : i.e. the mult stmt.
4441 : :
4442 : : Output:
4443 : :
4444 : : * TYPE_OUT: The type of the output of this pattern.
4445 : :
4446 : : * Return value: A new stmt that will be used to replace
4447 : : the multiplication. */
4448 : :
4449 : : static gimple *
4450 : 25035648 : vect_recog_mult_pattern (vec_info *vinfo,
4451 : : stmt_vec_info stmt_vinfo, tree *type_out)
4452 : : {
4453 : 25035648 : gimple *last_stmt = stmt_vinfo->stmt;
4454 : 25035648 : tree oprnd0, oprnd1, vectype, itype;
4455 : 25035648 : gimple *pattern_stmt;
4456 : :
4457 : 25035648 : if (!is_gimple_assign (last_stmt))
4458 : : return NULL;
4459 : :
4460 : 17223116 : if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
4461 : : return NULL;
4462 : :
4463 : 1076534 : oprnd0 = gimple_assign_rhs1 (last_stmt);
4464 : 1076534 : oprnd1 = gimple_assign_rhs2 (last_stmt);
4465 : 1076534 : itype = TREE_TYPE (oprnd0);
4466 : :
4467 : 1076534 : if (TREE_CODE (oprnd0) != SSA_NAME
4468 : 1075542 : || TREE_CODE (oprnd1) != INTEGER_CST
4469 : 619455 : || !INTEGRAL_TYPE_P (itype)
4470 : 1695989 : || !type_has_mode_precision_p (itype))
4471 : 457131 : return NULL;
4472 : :
4473 : 619403 : vectype = get_vectype_for_scalar_type (vinfo, itype);
4474 : 619403 : if (vectype == NULL_TREE)
4475 : : return NULL;
4476 : :
4477 : : /* If the target can handle vectorized multiplication natively,
4478 : : don't attempt to optimize this. */
4479 : 510914 : optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4480 : 510914 : if (mul_optab != unknown_optab)
4481 : : {
4482 : 510914 : machine_mode vec_mode = TYPE_MODE (vectype);
4483 : 510914 : int icode = (int) optab_handler (mul_optab, vec_mode);
4484 : 510914 : if (icode != CODE_FOR_nothing)
4485 : : return NULL;
4486 : : }
4487 : :
4488 : 199919 : pattern_stmt = vect_synth_mult_by_constant (vinfo,
4489 : : oprnd0, oprnd1, stmt_vinfo);
4490 : 199919 : if (!pattern_stmt)
4491 : : return NULL;
4492 : :
4493 : : /* Pattern detected. */
4494 : 23306 : vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
4495 : :
4496 : 23306 : *type_out = vectype;
4497 : :
4498 : 23306 : return pattern_stmt;
4499 : : }
4500 : :
4501 : : /* Detect a signed division by a constant that wouldn't be
4502 : : otherwise vectorized:
4503 : :
4504 : : type a_t, b_t;
4505 : :
4506 : : S1 a_t = b_t / N;
4507 : :
4508 : : where type 'type' is an integral type and N is a constant.
4509 : :
4510 : : Similarly handle modulo by a constant:
4511 : :
4512 : : S4 a_t = b_t % N;
4513 : :
4514 : : Input/Output:
4515 : :
4516 : : * STMT_VINFO: The stmt from which the pattern search begins,
4517 : : i.e. the division stmt. S1 is replaced by if N is a power
4518 : : of two constant and type is signed:
4519 : : S3 y_t = b_t < 0 ? N - 1 : 0;
4520 : : S2 x_t = b_t + y_t;
4521 : : S1' a_t = x_t >> log2 (N);
4522 : :
4523 : : S4 is replaced if N is a power of two constant and
4524 : : type is signed by (where *_T temporaries have unsigned type):
4525 : : S9 y_T = b_t < 0 ? -1U : 0U;
4526 : : S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4527 : : S7 z_t = (type) z_T;
4528 : : S6 w_t = b_t + z_t;
4529 : : S5 x_t = w_t & (N - 1);
4530 : : S4' a_t = x_t - z_t;
4531 : :
4532 : : Output:
4533 : :
4534 : : * TYPE_OUT: The type of the output of this pattern.
4535 : :
4536 : : * Return value: A new stmt that will be used to replace the division
4537 : : S1 or modulo S4 stmt. */
4538 : :
4539 : : static gimple *
4540 : 24965617 : vect_recog_divmod_pattern (vec_info *vinfo,
4541 : : stmt_vec_info stmt_vinfo, tree *type_out)
4542 : : {
4543 : 24965617 : gimple *last_stmt = stmt_vinfo->stmt;
4544 : 24965617 : tree oprnd0, oprnd1, vectype, itype, cond;
4545 : 24965617 : gimple *pattern_stmt, *def_stmt;
4546 : 24965617 : enum tree_code rhs_code;
4547 : 24965617 : optab optab;
4548 : 24965617 : tree q, cst;
4549 : 24965617 : int dummy_int, prec;
4550 : :
4551 : 24965617 : if (!is_gimple_assign (last_stmt))
4552 : : return NULL;
4553 : :
4554 : 17153085 : rhs_code = gimple_assign_rhs_code (last_stmt);
4555 : 17153085 : switch (rhs_code)
4556 : : {
4557 : 220927 : case TRUNC_DIV_EXPR:
4558 : 220927 : case EXACT_DIV_EXPR:
4559 : 220927 : case TRUNC_MOD_EXPR:
4560 : 220927 : break;
4561 : : default:
4562 : : return NULL;
4563 : : }
4564 : :
4565 : 220927 : oprnd0 = gimple_assign_rhs1 (last_stmt);
4566 : 220927 : oprnd1 = gimple_assign_rhs2 (last_stmt);
4567 : 220927 : itype = TREE_TYPE (oprnd0);
4568 : 220927 : if (TREE_CODE (oprnd0) != SSA_NAME
4569 : 205153 : || TREE_CODE (oprnd1) != INTEGER_CST
4570 : 116151 : || TREE_CODE (itype) != INTEGER_TYPE
4571 : 337078 : || !type_has_mode_precision_p (itype))
4572 : 104776 : return NULL;
4573 : :
4574 : 116151 : scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4575 : 116151 : vectype = get_vectype_for_scalar_type (vinfo, itype);
4576 : 116151 : if (vectype == NULL_TREE)
4577 : : return NULL;
4578 : :
4579 : 96321 : if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
4580 : : {
4581 : : /* If the target can handle vectorized division or modulo natively,
4582 : : don't attempt to optimize this, since native division is likely
4583 : : to give smaller code. */
4584 : 1173 : optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4585 : 1173 : if (optab != unknown_optab)
4586 : : {
4587 : 1173 : machine_mode vec_mode = TYPE_MODE (vectype);
4588 : 1173 : int icode = (int) optab_handler (optab, vec_mode);
4589 : 1173 : if (icode != CODE_FOR_nothing)
4590 : : return NULL;
4591 : : }
4592 : : }
4593 : :
4594 : 96321 : prec = TYPE_PRECISION (itype);
4595 : 96321 : if (integer_pow2p (oprnd1))
4596 : : {
4597 : 44903 : if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4598 : 3180 : return NULL;
4599 : :
4600 : : /* Pattern detected. */
4601 : 41723 : vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
4602 : :
4603 : 41723 : *type_out = vectype;
4604 : :
4605 : : /* Check if the target supports this internal function. */
4606 : 41723 : internal_fn ifn = IFN_DIV_POW2;
4607 : 41723 : if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4608 : : {
4609 : 0 : tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4610 : :
4611 : 0 : tree var_div = vect_recog_temp_ssa_var (itype, NULL);
4612 : 0 : gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4613 : 0 : gimple_call_set_lhs (div_stmt, var_div);
4614 : :
4615 : 0 : if (rhs_code == TRUNC_MOD_EXPR)
4616 : : {
4617 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
4618 : 0 : def_stmt
4619 : 0 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4620 : : LSHIFT_EXPR, var_div, shift);
4621 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4622 : 0 : pattern_stmt
4623 : 0 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4624 : : MINUS_EXPR, oprnd0,
4625 : : gimple_assign_lhs (def_stmt));
4626 : : }
4627 : : else
4628 : : pattern_stmt = div_stmt;
4629 : 0 : gimple_set_location (pattern_stmt, gimple_location (last_stmt));
4630 : :
4631 : 0 : return pattern_stmt;
4632 : : }
4633 : :
4634 : 41723 : cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4635 : 41723 : build_int_cst (itype, 0));
4636 : 41723 : if (rhs_code == TRUNC_DIV_EXPR
4637 : 41723 : || rhs_code == EXACT_DIV_EXPR)
4638 : : {
4639 : 39803 : tree var = vect_recog_temp_ssa_var (itype, NULL);
4640 : 39803 : tree shift;
4641 : 39803 : def_stmt
4642 : 39803 : = gimple_build_assign (var, COND_EXPR, cond,
4643 : 39803 : fold_build2 (MINUS_EXPR, itype, oprnd1,
4644 : : build_int_cst (itype, 1)),
4645 : 39803 : build_int_cst (itype, 0));
4646 : 39803 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4647 : 39803 : var = vect_recog_temp_ssa_var (itype, NULL);
4648 : 39803 : def_stmt
4649 : 39803 : = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4650 : : gimple_assign_lhs (def_stmt));
4651 : 39803 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4652 : :
4653 : 39803 : shift = build_int_cst (itype, tree_log2 (oprnd1));
4654 : 39803 : pattern_stmt
4655 : 39803 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4656 : : RSHIFT_EXPR, var, shift);
4657 : : }
4658 : : else
4659 : : {
4660 : 1920 : tree signmask;
4661 : 1920 : if (compare_tree_int (oprnd1, 2) == 0)
4662 : : {
4663 : 777 : signmask = vect_recog_temp_ssa_var (itype, NULL);
4664 : 777 : def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4665 : 777 : build_int_cst (itype, 1),
4666 : 777 : build_int_cst (itype, 0));
4667 : 777 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4668 : : }
4669 : : else
4670 : : {
4671 : 1143 : tree utype
4672 : 1143 : = build_nonstandard_integer_type (prec, 1);
4673 : 1143 : tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4674 : 1143 : tree shift
4675 : 1143 : = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
4676 : 1143 : - tree_log2 (oprnd1));
4677 : 1143 : tree var = vect_recog_temp_ssa_var (utype, NULL);
4678 : :
4679 : 1143 : def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4680 : 1143 : build_int_cst (utype, -1),
4681 : 1143 : build_int_cst (utype, 0));
4682 : 1143 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4683 : 1143 : var = vect_recog_temp_ssa_var (utype, NULL);
4684 : 1143 : def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4685 : : gimple_assign_lhs (def_stmt),
4686 : : shift);
4687 : 1143 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
4688 : 1143 : signmask = vect_recog_temp_ssa_var (itype, NULL);
4689 : 1143 : def_stmt
4690 : 1143 : = gimple_build_assign (signmask, NOP_EXPR, var);
4691 : 1143 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4692 : : }
4693 : 1920 : def_stmt
4694 : 1920 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4695 : : PLUS_EXPR, oprnd0, signmask);
4696 : 1920 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4697 : 1920 : def_stmt
4698 : 1920 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4699 : : BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
4700 : 1920 : fold_build2 (MINUS_EXPR, itype, oprnd1,
4701 : : build_int_cst (itype, 1)));
4702 : 1920 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4703 : :
4704 : 1920 : pattern_stmt
4705 : 1920 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4706 : : MINUS_EXPR, gimple_assign_lhs (def_stmt),
4707 : : signmask);
4708 : : }
4709 : :
4710 : 41723 : return pattern_stmt;
4711 : : }
4712 : :
4713 : 51418 : if ((cst = uniform_integer_cst_p (oprnd1))
4714 : 51418 : && TYPE_UNSIGNED (itype)
4715 : : && rhs_code == TRUNC_DIV_EXPR
4716 : 29879 : && vectype
4717 : 70883 : && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4718 : : {
4719 : : /* We can use the relationship:
4720 : :
4721 : : x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4722 : :
4723 : : to optimize cases where N+1 is a power of 2, and where // (N+1)
4724 : : is therefore a shift right. When operating in modes that are
4725 : : multiples of a byte in size, there are two cases:
4726 : :
4727 : : (1) N(N+3) is not representable, in which case the question
4728 : : becomes whether the replacement expression overflows.
4729 : : It is enough to test that x+N+2 does not overflow,
4730 : : i.e. that x < MAX-(N+1).
4731 : :
4732 : : (2) N(N+3) is representable, in which case it is the (only)
4733 : : bound that we need to check.
4734 : :
4735 : : ??? For now we just handle the case where // (N+1) is a shift
4736 : : right by half the precision, since some architectures can
4737 : : optimize the associated addition and shift combinations
4738 : : into single instructions. */
4739 : :
4740 : 12727 : auto wcst = wi::to_wide (cst);
4741 : 12727 : int pow = wi::exact_log2 (wcst + 1);
4742 : 12727 : if (pow == prec / 2)
4743 : : {
4744 : 307 : gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4745 : :
4746 : 307 : gimple_ranger ranger;
4747 : 307 : int_range_max r;
4748 : :
4749 : : /* Check that no overflow will occur. If we don't have range
4750 : : information we can't perform the optimization. */
4751 : :
4752 : 307 : if (ranger.range_of_expr (r, oprnd0, stmt) && !r.undefined_p ())
4753 : : {
4754 : 305 : wide_int max = r.upper_bound ();
4755 : 305 : wide_int one = wi::shwi (1, prec);
4756 : 305 : wide_int adder = wi::add (one, wi::lshift (one, pow));
4757 : 305 : wi::overflow_type ovf;
4758 : 305 : wi::add (max, adder, UNSIGNED, &ovf);
4759 : 305 : if (ovf == wi::OVF_NONE)
4760 : : {
4761 : 248 : *type_out = vectype;
4762 : 248 : tree tadder = wide_int_to_tree (itype, adder);
4763 : 248 : tree rshift = wide_int_to_tree (itype, pow);
4764 : :
4765 : 248 : tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
4766 : 248 : gassign *patt1
4767 : 248 : = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
4768 : 248 : append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4769 : :
4770 : 248 : tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
4771 : 248 : patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
4772 : : rshift);
4773 : 248 : append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4774 : :
4775 : 248 : tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
4776 : 248 : patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
4777 : : oprnd0);
4778 : 248 : append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
4779 : :
4780 : 248 : tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
4781 : 248 : pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
4782 : : new_lhs3, rshift);
4783 : :
4784 : 248 : return pattern_stmt;
4785 : : }
4786 : 305 : }
4787 : 307 : }
4788 : : }
4789 : :
4790 : 51170 : if (prec > HOST_BITS_PER_WIDE_INT
4791 : 51170 : || integer_zerop (oprnd1))
4792 : 451 : return NULL;
4793 : :
4794 : 50719 : if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
4795 : : return NULL;
4796 : :
4797 : 14251 : if (TYPE_UNSIGNED (itype))
4798 : : {
4799 : 9446 : unsigned HOST_WIDE_INT mh, ml;
4800 : 9446 : int pre_shift, post_shift;
4801 : 9446 : unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
4802 : 9446 : & GET_MODE_MASK (itype_mode));
4803 : 9446 : tree t1, t2, t3, t4;
4804 : :
4805 : 9446 : if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
4806 : : /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4807 : 30 : return NULL;
4808 : :
4809 : : /* Find a suitable multiplier and right shift count
4810 : : instead of multiplying with D. */
4811 : 9416 : mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
4812 : :
4813 : : /* If the suggested multiplier is more than SIZE bits, we can do better
4814 : : for even divisors, using an initial right shift. */
4815 : 9416 : if (mh != 0 && (d & 1) == 0)
4816 : : {
4817 : 248 : pre_shift = ctz_or_zero (d);
4818 : 248 : mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
4819 : : &ml, &post_shift, &dummy_int);
4820 : 248 : gcc_assert (!mh);
4821 : : }
4822 : : else
4823 : : pre_shift = 0;
4824 : :
4825 : 609 : if (mh != 0)
4826 : : {
4827 : 609 : if (post_shift - 1 >= prec)
4828 : : return NULL;
4829 : :
4830 : : /* t1 = oprnd0 h* ml;
4831 : : t2 = oprnd0 - t1;
4832 : : t3 = t2 >> 1;
4833 : : t4 = t1 + t3;
4834 : : q = t4 >> (post_shift - 1); */
4835 : 609 : t1 = vect_recog_temp_ssa_var (itype, NULL);
4836 : 609 : def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4837 : : build_int_cst (itype, ml));
4838 : 609 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4839 : :
4840 : 609 : t2 = vect_recog_temp_ssa_var (itype, NULL);
4841 : 609 : def_stmt
4842 : 609 : = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
4843 : 609 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4844 : :
4845 : 609 : t3 = vect_recog_temp_ssa_var (itype, NULL);
4846 : 609 : def_stmt
4847 : 609 : = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4848 : 609 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4849 : :
4850 : 609 : t4 = vect_recog_temp_ssa_var (itype, NULL);
4851 : 609 : def_stmt
4852 : 609 : = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4853 : :
4854 : 609 : if (post_shift != 1)
4855 : : {
4856 : 609 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4857 : :
4858 : 609 : q = vect_recog_temp_ssa_var (itype, NULL);
4859 : 609 : pattern_stmt
4860 : 609 : = gimple_build_assign (q, RSHIFT_EXPR, t4,
4861 : 609 : build_int_cst (itype, post_shift - 1));
4862 : : }
4863 : : else
4864 : : {
4865 : : q = t4;
4866 : : pattern_stmt = def_stmt;
4867 : : }
4868 : : }
4869 : : else
4870 : : {
4871 : 8807 : if (pre_shift >= prec || post_shift >= prec)
4872 : : return NULL;
4873 : :
4874 : : /* t1 = oprnd0 >> pre_shift;
4875 : : t2 = t1 h* ml;
4876 : : q = t2 >> post_shift; */
4877 : 8807 : if (pre_shift)
4878 : : {
4879 : 248 : t1 = vect_recog_temp_ssa_var (itype, NULL);
4880 : 248 : def_stmt
4881 : 248 : = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4882 : : build_int_cst (NULL, pre_shift));
4883 : 248 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4884 : : }
4885 : : else
4886 : : t1 = oprnd0;
4887 : :
4888 : 8807 : t2 = vect_recog_temp_ssa_var (itype, NULL);
4889 : 8807 : def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4890 : : build_int_cst (itype, ml));
4891 : :
4892 : 8807 : if (post_shift)
4893 : : {
4894 : 8801 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4895 : :
4896 : 8801 : q = vect_recog_temp_ssa_var (itype, NULL);
4897 : 8801 : def_stmt
4898 : 8801 : = gimple_build_assign (q, RSHIFT_EXPR, t2,
4899 : : build_int_cst (itype, post_shift));
4900 : : }
4901 : : else
4902 : : q = t2;
4903 : :
4904 : : pattern_stmt = def_stmt;
4905 : : }
4906 : : }
4907 : : else
4908 : : {
4909 : 4805 : unsigned HOST_WIDE_INT ml;
4910 : 4805 : int post_shift;
4911 : 4805 : HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4912 : 4805 : unsigned HOST_WIDE_INT abs_d;
4913 : 4805 : bool add = false;
4914 : 4805 : tree t1, t2, t3, t4;
4915 : :
4916 : : /* Give up for -1. */
4917 : 4805 : if (d == -1)
4918 : 0 : return NULL;
4919 : :
4920 : : /* Since d might be INT_MIN, we have to cast to
4921 : : unsigned HOST_WIDE_INT before negating to avoid
4922 : : undefined signed overflow. */
4923 : 9610 : abs_d = (d >= 0
4924 : 4805 : ? (unsigned HOST_WIDE_INT) d
4925 : 157 : : - (unsigned HOST_WIDE_INT) d);
4926 : :
4927 : : /* n rem d = n rem -d */
4928 : 4805 : if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4929 : : {
4930 : 0 : d = abs_d;
4931 : 0 : oprnd1 = build_int_cst (itype, abs_d);
4932 : : }
4933 : 4805 : if (HOST_BITS_PER_WIDE_INT >= prec
4934 : 4805 : && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4935 : : /* This case is not handled correctly below. */
4936 : : return NULL;
4937 : :
4938 : 4805 : choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4939 : 4805 : if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4940 : : {
4941 : 1420 : add = true;
4942 : 1420 : ml |= HOST_WIDE_INT_M1U << (prec - 1);
4943 : : }
4944 : 4805 : if (post_shift >= prec)
4945 : : return NULL;
4946 : :
4947 : : /* t1 = oprnd0 h* ml; */
4948 : 4805 : t1 = vect_recog_temp_ssa_var (itype, NULL);
4949 : 4805 : def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4950 : : build_int_cst (itype, ml));
4951 : :
4952 : 4805 : if (add)
4953 : : {
4954 : : /* t2 = t1 + oprnd0; */
4955 : 1420 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4956 : 1420 : t2 = vect_recog_temp_ssa_var (itype, NULL);
4957 : 1420 : def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4958 : : }
4959 : : else
4960 : : t2 = t1;
4961 : :
4962 : 4805 : if (post_shift)
4963 : : {
4964 : : /* t3 = t2 >> post_shift; */
4965 : 4180 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4966 : 4180 : t3 = vect_recog_temp_ssa_var (itype, NULL);
4967 : 4180 : def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4968 : : build_int_cst (itype, post_shift));
4969 : : }
4970 : : else
4971 : : t3 = t2;
4972 : :
4973 : 4805 : int msb = 1;
4974 : 4805 : value_range r;
4975 : 9610 : get_range_query (cfun)->range_of_expr (r, oprnd0);
4976 : 4805 : if (!r.varying_p () && !r.undefined_p ())
4977 : : {
4978 : 2792 : if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
4979 : : msb = 0;
4980 : 626 : else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
4981 : : msb = -1;
4982 : : }
4983 : :
4984 : 2166 : if (msb == 0 && d >= 0)
4985 : : {
4986 : : /* q = t3; */
4987 : : q = t3;
4988 : : pattern_stmt = def_stmt;
4989 : : }
4990 : : else
4991 : : {
4992 : : /* t4 = oprnd0 >> (prec - 1);
4993 : : or if we know from VRP that oprnd0 >= 0
4994 : : t4 = 0;
4995 : : or if we know from VRP that oprnd0 < 0
4996 : : t4 = -1; */
4997 : 2705 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
4998 : 2705 : t4 = vect_recog_temp_ssa_var (itype, NULL);
4999 : 2705 : if (msb != 1)
5000 : 74 : def_stmt = gimple_build_assign (t4, INTEGER_CST,
5001 : : build_int_cst (itype, msb));
5002 : : else
5003 : 2631 : def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
5004 : 2631 : build_int_cst (itype, prec - 1));
5005 : 2705 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5006 : :
5007 : : /* q = t3 - t4; or q = t4 - t3; */
5008 : 2705 : q = vect_recog_temp_ssa_var (itype, NULL);
5009 : 5253 : pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
5010 : : d < 0 ? t3 : t4);
5011 : : }
5012 : 4805 : }
5013 : :
5014 : 14221 : if (rhs_code == TRUNC_MOD_EXPR)
5015 : : {
5016 : 5856 : tree r, t1;
5017 : :
5018 : : /* We divided. Now finish by:
5019 : : t1 = q * oprnd1;
5020 : : r = oprnd0 - t1; */
5021 : 5856 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
5022 : :
5023 : 5856 : t1 = vect_recog_temp_ssa_var (itype, NULL);
5024 : 5856 : def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5025 : 5856 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
5026 : :
5027 : 5856 : r = vect_recog_temp_ssa_var (itype, NULL);
5028 : 5856 : pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5029 : : }
5030 : :
5031 : : /* Pattern detected. */
5032 : 14221 : vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
5033 : :
5034 : 14221 : *type_out = vectype;
5035 : 14221 : return pattern_stmt;
5036 : : }
5037 : :
5038 : : /* Function vect_recog_mixed_size_cond_pattern
5039 : :
5040 : : Try to find the following pattern:
5041 : :
5042 : : type x_t, y_t;
5043 : : TYPE a_T, b_T, c_T;
5044 : : loop:
5045 : : S1 a_T = x_t CMP y_t ? b_T : c_T;
5046 : :
5047 : : where type 'TYPE' is an integral type which has different size
5048 : : from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5049 : : than 'type', the constants need to fit into an integer type
5050 : : with the same width as 'type') or results of conversion from 'type'.
5051 : :
5052 : : Input:
5053 : :
5054 : : * STMT_VINFO: The stmt from which the pattern search begins.
5055 : :
5056 : : Output:
5057 : :
5058 : : * TYPE_OUT: The type of the output of this pattern.
5059 : :
5060 : : * Return value: A new stmt that will be used to replace the pattern.
5061 : : Additionally a def_stmt is added.
5062 : :
5063 : : a_it = x_t CMP y_t ? b_it : c_it;
5064 : : a_T = (TYPE) a_it; */
5065 : :
5066 : : static gimple *
5067 : 25109271 : vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5068 : : stmt_vec_info stmt_vinfo, tree *type_out)
5069 : : {
5070 : 25109271 : gimple *last_stmt = stmt_vinfo->stmt;
5071 : 25109271 : tree cond_expr, then_clause, else_clause;
5072 : 25109271 : tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5073 : 25109271 : gimple *pattern_stmt, *def_stmt;
5074 : 25109271 : tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5075 : 25109271 : gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5076 : 25109271 : bool promotion;
5077 : 25109271 : tree comp_scalar_type;
5078 : :
5079 : 25109271 : if (!is_gimple_assign (last_stmt)
5080 : 17296739 : || gimple_assign_rhs_code (last_stmt) != COND_EXPR
5081 : 25219428 : || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5082 : : return NULL;
5083 : :
5084 : 108392 : cond_expr = gimple_assign_rhs1 (last_stmt);
5085 : 108392 : then_clause = gimple_assign_rhs2 (last_stmt);
5086 : 108392 : else_clause = gimple_assign_rhs3 (last_stmt);
5087 : :
5088 : 108392 : if (!COMPARISON_CLASS_P (cond_expr))
5089 : : return NULL;
5090 : :
5091 : 41723 : comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5092 : 41723 : comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5093 : 41723 : if (comp_vectype == NULL_TREE)
5094 : : return NULL;
5095 : :
5096 : 41723 : type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5097 : 41723 : if (types_compatible_p (type, comp_scalar_type)
5098 : 1143 : || ((TREE_CODE (then_clause) != INTEGER_CST
5099 : 1143 : || TREE_CODE (else_clause) != INTEGER_CST)
5100 : 0 : && !INTEGRAL_TYPE_P (comp_scalar_type))
5101 : 42866 : || !INTEGRAL_TYPE_P (type))
5102 : : return NULL;
5103 : :
5104 : 1143 : if ((TREE_CODE (then_clause) != INTEGER_CST
5105 : 0 : && !type_conversion_p (vinfo, then_clause, false,
5106 : : &orig_type0, &def_stmt0, &promotion))
5107 : 1143 : || (TREE_CODE (else_clause) != INTEGER_CST
5108 : 0 : && !type_conversion_p (vinfo, else_clause, false,
5109 : : &orig_type1, &def_stmt1, &promotion)))
5110 : 0 : return NULL;
5111 : :
5112 : 0 : if (orig_type0 && orig_type1
5113 : 1143 : && !types_compatible_p (orig_type0, orig_type1))
5114 : : return NULL;
5115 : :
5116 : 1143 : if (orig_type0)
5117 : : {
5118 : 0 : if (!types_compatible_p (orig_type0, comp_scalar_type))
5119 : : return NULL;
5120 : 0 : then_clause = gimple_assign_rhs1 (def_stmt0);
5121 : 0 : itype = orig_type0;
5122 : : }
5123 : :
5124 : 1143 : if (orig_type1)
5125 : : {
5126 : 0 : if (!types_compatible_p (orig_type1, comp_scalar_type))
5127 : : return NULL;
5128 : 0 : else_clause = gimple_assign_rhs1 (def_stmt1);
5129 : 0 : itype = orig_type1;
5130 : : }
5131 : :
5132 : :
5133 : 1143 : HOST_WIDE_INT cmp_mode_size
5134 : 1143 : = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5135 : :
5136 : 1143 : scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5137 : 2286 : if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
5138 : : return NULL;
5139 : :
5140 : 0 : vectype = get_vectype_for_scalar_type (vinfo, type);
5141 : 0 : if (vectype == NULL_TREE)
5142 : : return NULL;
5143 : :
5144 : 0 : if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5145 : : return NULL;
5146 : :
5147 : 0 : if (itype == NULL_TREE)
5148 : 0 : itype = build_nonstandard_integer_type (cmp_mode_size,
5149 : 0 : TYPE_UNSIGNED (type));
5150 : :
5151 : 0 : if (itype == NULL_TREE
5152 : 0 : || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5153 : 0 : return NULL;
5154 : :
5155 : 0 : vecitype = get_vectype_for_scalar_type (vinfo, itype);
5156 : 0 : if (vecitype == NULL_TREE)
5157 : : return NULL;
5158 : :
5159 : 0 : if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5160 : : return NULL;
5161 : :
5162 : 0 : if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
5163 : : {
5164 : 0 : if ((TREE_CODE (then_clause) == INTEGER_CST
5165 : 0 : && !int_fits_type_p (then_clause, itype))
5166 : 0 : || (TREE_CODE (else_clause) == INTEGER_CST
5167 : 0 : && !int_fits_type_p (else_clause, itype)))
5168 : : return NULL;
5169 : : }
5170 : :
5171 : 0 : def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5172 : : COND_EXPR, unshare_expr (cond_expr),
5173 : : fold_convert (itype, then_clause),
5174 : : fold_convert (itype, else_clause));
5175 : 0 : pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5176 : : NOP_EXPR, gimple_assign_lhs (def_stmt));
5177 : :
5178 : 0 : append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
5179 : 0 : *type_out = vectype;
5180 : :
5181 : 0 : vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
5182 : :
5183 : 0 : return pattern_stmt;
5184 : : }
5185 : :
5186 : :
5187 : : /* Helper function of vect_recog_bool_pattern. Called recursively, return
5188 : : true if bool VAR can and should be optimized that way. Assume it shouldn't
5189 : : in case it's a result of a comparison which can be directly vectorized into
5190 : : a vector comparison. Fills in STMTS with all stmts visited during the
5191 : : walk. */
5192 : :
5193 : : static bool
5194 : 174037 : check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5195 : : {
5196 : 174037 : tree rhs1;
5197 : 174037 : enum tree_code rhs_code;
5198 : :
5199 : 174037 : stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5200 : 174037 : if (!def_stmt_info)
5201 : : return false;
5202 : :
5203 : 337985 : gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
5204 : 163948 : if (!def_stmt)
5205 : : return false;
5206 : :
5207 : 163948 : if (stmts.contains (def_stmt))
5208 : : return true;
5209 : :
5210 : 163948 : rhs1 = gimple_assign_rhs1 (def_stmt);
5211 : 163948 : rhs_code = gimple_assign_rhs_code (def_stmt);
5212 : 163948 : switch (rhs_code)
5213 : : {
5214 : 0 : case SSA_NAME:
5215 : 0 : if (! check_bool_pattern (rhs1, vinfo, stmts))
5216 : : return false;
5217 : : break;
5218 : :
5219 : 2227 : CASE_CONVERT:
5220 : 2227 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5221 : : return false;
5222 : 163 : if (! check_bool_pattern (rhs1, vinfo, stmts))
5223 : : return false;
5224 : : break;
5225 : :
5226 : 2762 : case BIT_NOT_EXPR:
5227 : 2762 : if (! check_bool_pattern (rhs1, vinfo, stmts))
5228 : : return false;
5229 : : break;
5230 : :
5231 : 20582 : case BIT_AND_EXPR:
5232 : 20582 : case BIT_IOR_EXPR:
5233 : 20582 : case BIT_XOR_EXPR:
5234 : 20582 : if (! check_bool_pattern (rhs1, vinfo, stmts)
5235 : 20582 : || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
5236 : 20582 : return false;
5237 : : break;
5238 : :
5239 : 138377 : default:
5240 : 138377 : if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5241 : : {
5242 : 124856 : tree vecitype, comp_vectype;
5243 : :
5244 : : /* If the comparison can throw, then is_gimple_condexpr will be
5245 : : false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5246 : 124856 : if (stmt_could_throw_p (cfun, def_stmt))
5247 : : return false;
5248 : :
5249 : 124785 : comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5250 : 124785 : if (comp_vectype == NULL_TREE)
5251 : : return false;
5252 : :
5253 : 114323 : tree mask_type = get_mask_type_for_scalar_type (vinfo,
5254 : 114323 : TREE_TYPE (rhs1));
5255 : 114323 : if (mask_type
5256 : 114323 : && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5257 : : return false;
5258 : :
5259 : 36261 : if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5260 : : {
5261 : 5163 : scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5262 : 5163 : tree itype
5263 : 5163 : = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5264 : 5163 : vecitype = get_vectype_for_scalar_type (vinfo, itype);
5265 : 5163 : if (vecitype == NULL_TREE)
5266 : 0 : return false;
5267 : : }
5268 : : else
5269 : : vecitype = comp_vectype;
5270 : 36261 : if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5271 : : return false;
5272 : : }
5273 : : else
5274 : : return false;
5275 : : break;
5276 : : }
5277 : :
5278 : 0 : bool res = stmts.add (def_stmt);
5279 : : /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5280 : 0 : gcc_assert (!res);
5281 : :
5282 : : return true;
5283 : : }
5284 : :
5285 : :
5286 : : /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5287 : : stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5288 : : pattern sequence. */
5289 : :
5290 : : static tree
5291 : 0 : adjust_bool_pattern_cast (vec_info *vinfo,
5292 : : tree type, tree var, stmt_vec_info stmt_info)
5293 : : {
5294 : 0 : gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5295 : : NOP_EXPR, var);
5296 : 0 : append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
5297 : : get_vectype_for_scalar_type (vinfo, type));
5298 : 0 : return gimple_assign_lhs (cast_stmt);
5299 : : }
5300 : :
5301 : : /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5302 : : VAR is an SSA_NAME that should be transformed from bool to a wider integer
5303 : : type, OUT_TYPE is the desired final integer type of the whole pattern.
5304 : : STMT_INFO is the info of the pattern root and is where pattern stmts should
5305 : : be associated with. DEFS is a map of pattern defs. */
5306 : :
5307 : : static void
5308 : 0 : adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5309 : : stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5310 : : {
5311 : 0 : gimple *stmt = SSA_NAME_DEF_STMT (var);
5312 : 0 : enum tree_code rhs_code, def_rhs_code;
5313 : 0 : tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5314 : 0 : location_t loc;
5315 : 0 : gimple *pattern_stmt, *def_stmt;
5316 : 0 : tree trueval = NULL_TREE;
5317 : :
5318 : 0 : rhs1 = gimple_assign_rhs1 (stmt);
5319 : 0 : rhs2 = gimple_assign_rhs2 (stmt);
5320 : 0 : rhs_code = gimple_assign_rhs_code (stmt);
5321 : 0 : loc = gimple_location (stmt);
5322 : 0 : switch (rhs_code)
5323 : : {
5324 : 0 : case SSA_NAME:
5325 : 0 : CASE_CONVERT:
5326 : 0 : irhs1 = *defs.get (rhs1);
5327 : 0 : itype = TREE_TYPE (irhs1);
5328 : 0 : pattern_stmt
5329 : 0 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5330 : : SSA_NAME, irhs1);
5331 : 0 : break;
5332 : :
5333 : 0 : case BIT_NOT_EXPR:
5334 : 0 : irhs1 = *defs.get (rhs1);
5335 : 0 : itype = TREE_TYPE (irhs1);
5336 : 0 : pattern_stmt
5337 : 0 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5338 : 0 : BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5339 : 0 : break;
5340 : :
5341 : 0 : case BIT_AND_EXPR:
5342 : : /* Try to optimize x = y & (a < b ? 1 : 0); into
5343 : : x = (a < b ? y : 0);
5344 : :
5345 : : E.g. for:
5346 : : bool a_b, b_b, c_b;
5347 : : TYPE d_T;
5348 : :
5349 : : S1 a_b = x1 CMP1 y1;
5350 : : S2 b_b = x2 CMP2 y2;
5351 : : S3 c_b = a_b & b_b;
5352 : : S4 d_T = (TYPE) c_b;
5353 : :
5354 : : we would normally emit:
5355 : :
5356 : : S1' a_T = x1 CMP1 y1 ? 1 : 0;
5357 : : S2' b_T = x2 CMP2 y2 ? 1 : 0;
5358 : : S3' c_T = a_T & b_T;
5359 : : S4' d_T = c_T;
5360 : :
5361 : : but we can save one stmt by using the
5362 : : result of one of the COND_EXPRs in the other COND_EXPR and leave
5363 : : BIT_AND_EXPR stmt out:
5364 : :
5365 : : S1' a_T = x1 CMP1 y1 ? 1 : 0;
5366 : : S3' c_T = x2 CMP2 y2 ? a_T : 0;
5367 : : S4' f_T = c_T;
5368 : :
5369 : : At least when VEC_COND_EXPR is implemented using masks
5370 : : cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5371 : : computes the comparison masks and ands it, in one case with
5372 : : all ones vector, in the other case with a vector register.
5373 : : Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5374 : : often more expensive. */
5375 : 0 : def_stmt = SSA_NAME_DEF_STMT (rhs2);
5376 : 0 : def_rhs_code = gimple_assign_rhs_code (def_stmt);
5377 : 0 : if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5378 : : {
5379 : 0 : irhs1 = *defs.get (rhs1);
5380 : 0 : tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5381 : 0 : if (TYPE_PRECISION (TREE_TYPE (irhs1))
5382 : 0 : == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5383 : : {
5384 : 0 : rhs_code = def_rhs_code;
5385 : 0 : rhs1 = def_rhs1;
5386 : 0 : rhs2 = gimple_assign_rhs2 (def_stmt);
5387 : 0 : trueval = irhs1;
5388 : 0 : goto do_compare;
5389 : : }
5390 : : else
5391 : 0 : irhs2 = *defs.get (rhs2);
5392 : 0 : goto and_ior_xor;
5393 : : }
5394 : 0 : def_stmt = SSA_NAME_DEF_STMT (rhs1);
5395 : 0 : def_rhs_code = gimple_assign_rhs_code (def_stmt);
5396 : 0 : if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5397 : : {
5398 : 0 : irhs2 = *defs.get (rhs2);
5399 : 0 : tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
5400 : 0 : if (TYPE_PRECISION (TREE_TYPE (irhs2))
5401 : 0 : == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5402 : : {
5403 : 0 : rhs_code = def_rhs_code;
5404 : 0 : rhs1 = def_rhs1;
5405 : 0 : rhs2 = gimple_assign_rhs2 (def_stmt);
5406 : 0 : trueval = irhs2;
5407 : 0 : goto do_compare;
5408 : : }
5409 : : else
5410 : 0 : irhs1 = *defs.get (rhs1);
5411 : 0 : goto and_ior_xor;
5412 : : }
5413 : : /* FALLTHRU */
5414 : 0 : case BIT_IOR_EXPR:
5415 : 0 : case BIT_XOR_EXPR:
5416 : 0 : irhs1 = *defs.get (rhs1);
5417 : 0 : irhs2 = *defs.get (rhs2);
5418 : 0 : and_ior_xor:
5419 : 0 : if (TYPE_PRECISION (TREE_TYPE (irhs1))
5420 : 0 : != TYPE_PRECISION (TREE_TYPE (irhs2)))
5421 : : {
5422 : 0 : int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5423 : 0 : int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5424 : 0 : int out_prec = TYPE_PRECISION (out_type);
5425 : 0 : if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
5426 : 0 : irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
5427 : : stmt_info);
5428 : 0 : else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
5429 : 0 : irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
5430 : : stmt_info);
5431 : : else
5432 : : {
5433 : 0 : irhs1 = adjust_bool_pattern_cast (vinfo,
5434 : : out_type, irhs1, stmt_info);
5435 : 0 : irhs2 = adjust_bool_pattern_cast (vinfo,
5436 : : out_type, irhs2, stmt_info);
5437 : : }
5438 : : }
5439 : 0 : itype = TREE_TYPE (irhs1);
5440 : 0 : pattern_stmt
5441 : 0 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5442 : : rhs_code, irhs1, irhs2);
5443 : 0 : break;
5444 : :
5445 : 0 : default:
5446 : 0 : do_compare:
5447 : 0 : gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5448 : 0 : if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5449 : 0 : || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5450 : 0 : || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5451 : 0 : GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5452 : : {
5453 : 0 : scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5454 : 0 : itype
5455 : 0 : = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5456 : : }
5457 : : else
5458 : 0 : itype = TREE_TYPE (rhs1);
5459 : 0 : cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
5460 : 0 : if (trueval == NULL_TREE)
5461 : 0 : trueval = build_int_cst (itype, 1);
5462 : : else
5463 : 0 : gcc_checking_assert (useless_type_conversion_p (itype,
5464 : : TREE_TYPE (trueval)));
5465 : 0 : pattern_stmt
5466 : 0 : = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
5467 : : COND_EXPR, cond_expr, trueval,
5468 : 0 : build_int_cst (itype, 0));
5469 : 0 : break;
5470 : : }
5471 : :
5472 : 0 : gimple_set_location (pattern_stmt, loc);
5473 : 0 : append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
5474 : : get_vectype_for_scalar_type (vinfo, itype));
5475 : 0 : defs.put (var, gimple_assign_lhs (pattern_stmt));
5476 : 0 : }
5477 : :
5478 : : /* Comparison function to qsort a vector of gimple stmts after UID. */
5479 : :
5480 : : static int
5481 : 0 : sort_after_uid (const void *p1, const void *p2)
5482 : : {
5483 : 0 : const gimple *stmt1 = *(const gimple * const *)p1;
5484 : 0 : const gimple *stmt2 = *(const gimple * const *)p2;
5485 : 0 : return gimple_uid (stmt1) - gimple_uid (stmt2);
5486 : : }
5487 : :
5488 : : /* Create pattern stmts for all stmts participating in the bool pattern
5489 : : specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5490 : : OUT_TYPE. Return the def of the pattern root. */
5491 : :
5492 : : static tree
5493 : 0 : adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5494 : : tree out_type, stmt_vec_info stmt_info)
5495 : : {
5496 : : /* Gather original stmts in the bool pattern in their order of appearance
5497 : : in the IL. */
5498 : 0 : auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5499 : 0 : for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5500 : 0 : i != bool_stmt_set.end (); ++i)
5501 : 0 : bool_stmts.quick_push (*i);
5502 : 0 : bool_stmts.qsort (sort_after_uid);
5503 : :
5504 : : /* Now process them in that order, producing pattern stmts. */
5505 : 0 : hash_map <tree, tree> defs;
5506 : 0 : for (unsigned i = 0; i < bool_stmts.length (); ++i)
5507 : 0 : adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
5508 : : out_type, stmt_info, defs);
5509 : :
5510 : : /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5511 : 0 : gimple *pattern_stmt
5512 : 0 : = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5513 : 0 : return gimple_assign_lhs (pattern_stmt);
5514 : 0 : }
5515 : :
5516 : : /* Return the proper type for converting bool VAR into
5517 : : an integer value or NULL_TREE if no such type exists.
5518 : : The type is chosen so that the converted value has the
5519 : : same number of elements as VAR's vector type. */
5520 : :
5521 : : static tree
5522 : 2680591 : integer_type_for_mask (tree var, vec_info *vinfo)
5523 : : {
5524 : 2680591 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5525 : : return NULL_TREE;
5526 : :
5527 : 844149 : stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
5528 : 844149 : if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
5529 : : return NULL_TREE;
5530 : :
5531 : 432155 : return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5532 : : }
5533 : :
5534 : : /* Function vect_recog_gcond_pattern
5535 : :
5536 : : Try to find pattern like following:
5537 : :
5538 : : if (a op b)
5539 : :
5540 : : where operator 'op' is not != and convert it to an adjusted boolean pattern
5541 : :
5542 : : mask = a op b
5543 : : if (mask != 0)
5544 : :
5545 : : and set the mask type on MASK.
5546 : :
5547 : : Input:
5548 : :
5549 : : * STMT_VINFO: The stmt at the end from which the pattern
5550 : : search begins, i.e. cast of a bool to
5551 : : an integer type.
5552 : :
5553 : : Output:
5554 : :
5555 : : * TYPE_OUT: The type of the output of this pattern.
5556 : :
5557 : : * Return value: A new stmt that will be used to replace the pattern. */
5558 : :
5559 : : static gimple *
5560 : 25109271 : vect_recog_gcond_pattern (vec_info *vinfo,
5561 : : stmt_vec_info stmt_vinfo, tree *type_out)
5562 : : {
5563 : : /* Currently we only support this for loop vectorization and when multiple
5564 : : exits. */
5565 : 25109271 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5566 : 2899216 : if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
5567 : : return NULL;
5568 : :
5569 : 808566 : gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5570 : 808566 : gcond* cond = NULL;
5571 : 25126758 : if (!(cond = dyn_cast <gcond *> (last_stmt)))
5572 : : return NULL;
5573 : :
5574 : 214963 : auto lhs = gimple_cond_lhs (cond);
5575 : 214963 : auto rhs = gimple_cond_rhs (cond);
5576 : 214963 : auto code = gimple_cond_code (cond);
5577 : :
5578 : 214963 : tree scalar_type = TREE_TYPE (lhs);
5579 : 214963 : if (VECTOR_TYPE_P (scalar_type))
5580 : : return NULL;
5581 : :
5582 : 214963 : if (code == NE_EXPR
5583 : 126980 : && zerop (rhs)
5584 : 267815 : && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
5585 : : return NULL;
5586 : :
5587 : 210401 : tree vecitype = get_vectype_for_scalar_type (vinfo, scalar_type);
5588 : 210401 : if (vecitype == NULL_TREE)
5589 : : return NULL;
5590 : :
5591 : 197476 : tree vectype = truth_type_for (vecitype);
5592 : :
5593 : 197476 : tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5594 : 197476 : gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
5595 : 197476 : append_pattern_def_seq (vinfo, stmt_vinfo, new_stmt, vectype, scalar_type);
5596 : :
5597 : 197476 : gimple *pattern_stmt
5598 : 197476 : = gimple_build_cond (NE_EXPR, new_lhs,
5599 : 197476 : build_int_cst (TREE_TYPE (new_lhs), 0),
5600 : : NULL_TREE, NULL_TREE);
5601 : 197476 : *type_out = vectype;
5602 : 197476 : vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt);
5603 : 197476 : return pattern_stmt;
5604 : : }
5605 : :
5606 : : /* Function vect_recog_bool_pattern
5607 : :
5608 : : Try to find pattern like following:
5609 : :
5610 : : bool a_b, b_b, c_b, d_b, e_b;
5611 : : TYPE f_T;
5612 : : loop:
5613 : : S1 a_b = x1 CMP1 y1;
5614 : : S2 b_b = x2 CMP2 y2;
5615 : : S3 c_b = a_b & b_b;
5616 : : S4 d_b = x3 CMP3 y3;
5617 : : S5 e_b = c_b | d_b;
5618 : : S6 f_T = (TYPE) e_b;
5619 : :
5620 : : where type 'TYPE' is an integral type. Or a similar pattern
5621 : : ending in
5622 : :
5623 : : S6 f_Y = e_b ? r_Y : s_Y;
5624 : :
5625 : : as results from if-conversion of a complex condition.
5626 : :
5627 : : Input:
5628 : :
5629 : : * STMT_VINFO: The stmt at the end from which the pattern
5630 : : search begins, i.e. cast of a bool to
5631 : : an integer type.
5632 : :
5633 : : Output:
5634 : :
5635 : : * TYPE_OUT: The type of the output of this pattern.
5636 : :
5637 : : * Return value: A new stmt that will be used to replace the pattern.
5638 : :
5639 : : Assuming size of TYPE is the same as size of all comparisons
5640 : : (otherwise some casts would be added where needed), the above
5641 : : sequence we create related pattern stmts:
5642 : : S1' a_T = x1 CMP1 y1 ? 1 : 0;
5643 : : S3' c_T = x2 CMP2 y2 ? a_T : 0;
5644 : : S4' d_T = x3 CMP3 y3 ? 1 : 0;
5645 : : S5' e_T = c_T | d_T;
5646 : : S6' f_T = e_T;
5647 : :
5648 : : Instead of the above S3' we could emit:
5649 : : S2' b_T = x2 CMP2 y2 ? 1 : 0;
5650 : : S3' c_T = a_T | b_T;
5651 : : but the above is more efficient. */
5652 : :
5653 : : static gimple *
5654 : 25109271 : vect_recog_bool_pattern (vec_info *vinfo,
5655 : : stmt_vec_info stmt_vinfo, tree *type_out)
5656 : : {
5657 : 25109271 : gimple *last_stmt = stmt_vinfo->stmt;
5658 : 25109271 : enum tree_code rhs_code;
5659 : 25109271 : tree var, lhs, rhs, vectype;
5660 : 25109271 : gimple *pattern_stmt;
5661 : :
5662 : 25109271 : if (!is_gimple_assign (last_stmt))
5663 : : return NULL;
5664 : :
5665 : 17494215 : var = gimple_assign_rhs1 (last_stmt);
5666 : 17494215 : lhs = gimple_assign_lhs (last_stmt);
5667 : 17494215 : rhs_code = gimple_assign_rhs_code (last_stmt);
5668 : :
5669 : 17494215 : if (rhs_code == VIEW_CONVERT_EXPR)
5670 : 167704 : var = TREE_OPERAND (var, 0);
5671 : :
5672 : 17494215 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5673 : : return NULL;
5674 : :
5675 : 573595 : hash_set<gimple *> bool_stmts;
5676 : :
5677 : 573595 : if (CONVERT_EXPR_CODE_P (rhs_code)
5678 : 490984 : || rhs_code == VIEW_CONVERT_EXPR)
5679 : : {
5680 : 171113 : if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5681 : 170987 : || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5682 : : return NULL;
5683 : 82513 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5684 : :
5685 : 82513 : if (check_bool_pattern (var, vinfo, bool_stmts))
5686 : : {
5687 : 0 : rhs = adjust_bool_stmts (vinfo, bool_stmts,
5688 : 0 : TREE_TYPE (lhs), stmt_vinfo);
5689 : 0 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5690 : 0 : if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5691 : 0 : pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5692 : : else
5693 : 0 : pattern_stmt
5694 : 0 : = gimple_build_assign (lhs, NOP_EXPR, rhs);
5695 : : }
5696 : : else
5697 : : {
5698 : 82513 : tree type = integer_type_for_mask (var, vinfo);
5699 : 82513 : tree cst0, cst1, tmp;
5700 : :
5701 : 82513 : if (!type)
5702 : : return NULL;
5703 : :
5704 : : /* We may directly use cond with narrowed type to avoid
5705 : : multiple cond exprs with following result packing and
5706 : : perform single cond with packed mask instead. In case
5707 : : of widening we better make cond first and then extract
5708 : : results. */
5709 : 37637 : if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
5710 : 26281 : type = TREE_TYPE (lhs);
5711 : :
5712 : 37637 : cst0 = build_int_cst (type, 0);
5713 : 37637 : cst1 = build_int_cst (type, 1);
5714 : 37637 : tmp = vect_recog_temp_ssa_var (type, NULL);
5715 : 37637 : pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
5716 : :
5717 : 37637 : if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
5718 : : {
5719 : 11356 : tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
5720 : 11356 : append_pattern_def_seq (vinfo, stmt_vinfo,
5721 : : pattern_stmt, new_vectype);
5722 : :
5723 : 11356 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5724 : 11356 : pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
5725 : : }
5726 : : }
5727 : :
5728 : 37637 : *type_out = vectype;
5729 : 37637 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5730 : :
5731 : 37637 : return pattern_stmt;
5732 : : }
5733 : 487700 : else if (rhs_code == COND_EXPR
5734 : 110137 : && TREE_CODE (var) == SSA_NAME)
5735 : : {
5736 : 68414 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5737 : 68414 : if (vectype == NULL_TREE)
5738 : : return NULL;
5739 : :
5740 : : /* Build a scalar type for the boolean result that when
5741 : : vectorized matches the vector type of the result in
5742 : : size and number of elements. */
5743 : 60097 : unsigned prec
5744 : 60097 : = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
5745 : : TYPE_VECTOR_SUBPARTS (vectype));
5746 : :
5747 : 60097 : tree type
5748 : 120194 : = build_nonstandard_integer_type (prec,
5749 : 60097 : TYPE_UNSIGNED (TREE_TYPE (var)));
5750 : 60097 : if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
5751 : : return NULL;
5752 : :
5753 : 60097 : if (check_bool_pattern (var, vinfo, bool_stmts))
5754 : 0 : var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
5755 : 60097 : else if (integer_type_for_mask (var, vinfo))
5756 : : return NULL;
5757 : :
5758 : 21335 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5759 : 21335 : pattern_stmt
5760 : 21335 : = gimple_build_assign (lhs, COND_EXPR,
5761 : : build2 (NE_EXPR, boolean_type_node,
5762 : 21335 : var, build_int_cst (TREE_TYPE (var), 0)),
5763 : : gimple_assign_rhs2 (last_stmt),
5764 : : gimple_assign_rhs3 (last_stmt));
5765 : 21335 : *type_out = vectype;
5766 : 21335 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5767 : :
5768 : 21335 : return pattern_stmt;
5769 : : }
5770 : 419286 : else if (rhs_code == SSA_NAME
5771 : 16510 : && STMT_VINFO_DATA_REF (stmt_vinfo))
5772 : : {
5773 : 7920 : stmt_vec_info pattern_stmt_info;
5774 : 7920 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5775 : 7920 : if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
5776 : 0 : return NULL;
5777 : :
5778 : 7920 : if (check_bool_pattern (var, vinfo, bool_stmts))
5779 : 0 : rhs = adjust_bool_stmts (vinfo, bool_stmts,
5780 : 0 : TREE_TYPE (vectype), stmt_vinfo);
5781 : : else
5782 : : {
5783 : 7920 : tree type = integer_type_for_mask (var, vinfo);
5784 : 7920 : tree cst0, cst1, new_vectype;
5785 : :
5786 : 7920 : if (!type)
5787 : : return NULL;
5788 : :
5789 : 4536 : if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
5790 : 521 : type = TREE_TYPE (vectype);
5791 : :
5792 : 4536 : cst0 = build_int_cst (type, 0);
5793 : 4536 : cst1 = build_int_cst (type, 1);
5794 : 4536 : new_vectype = get_vectype_for_scalar_type (vinfo, type);
5795 : :
5796 : 4536 : rhs = vect_recog_temp_ssa_var (type, NULL);
5797 : 4536 : pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
5798 : 4536 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
5799 : : }
5800 : :
5801 : 4536 : lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
5802 : 4536 : if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5803 : : {
5804 : 4015 : tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5805 : 4015 : gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
5806 : 4015 : append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
5807 : 4015 : rhs = rhs2;
5808 : : }
5809 : 4536 : pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5810 : 4536 : pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5811 : 4536 : vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5812 : 4536 : *type_out = vectype;
5813 : 4536 : vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
5814 : :
5815 : 4536 : return pattern_stmt;
5816 : : }
5817 : : else
5818 : : return NULL;
5819 : 573595 : }
5820 : :
5821 : :
5822 : : /* A helper for vect_recog_mask_conversion_pattern. Build
5823 : : conversion of MASK to a type suitable for masking VECTYPE.
5824 : : Built statement gets required vectype and is appended to
5825 : : a pattern sequence of STMT_VINFO.
5826 : :
5827 : : Return converted mask. */
5828 : :
5829 : : static tree
5830 : 32514 : build_mask_conversion (vec_info *vinfo,
5831 : : tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5832 : : {
5833 : 32514 : gimple *stmt;
5834 : 32514 : tree masktype, tmp;
5835 : :
5836 : 32514 : masktype = truth_type_for (vectype);
5837 : 32514 : tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5838 : 32514 : stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5839 : 32514 : append_pattern_def_seq (vinfo, stmt_vinfo,
5840 : 32514 : stmt, masktype, TREE_TYPE (vectype));
5841 : :
5842 : 32514 : return tmp;
5843 : : }
5844 : :
5845 : :
5846 : : /* Function vect_recog_mask_conversion_pattern
5847 : :
5848 : : Try to find statements which require boolean type
5849 : : converison. Additional conversion statements are
5850 : : added to handle such cases. For example:
5851 : :
5852 : : bool m_1, m_2, m_3;
5853 : : int i_4, i_5;
5854 : : double d_6, d_7;
5855 : : char c_1, c_2, c_3;
5856 : :
5857 : : S1 m_1 = i_4 > i_5;
5858 : : S2 m_2 = d_6 < d_7;
5859 : : S3 m_3 = m_1 & m_2;
5860 : : S4 c_1 = m_3 ? c_2 : c_3;
5861 : :
5862 : : Will be transformed into:
5863 : :
5864 : : S1 m_1 = i_4 > i_5;
5865 : : S2 m_2 = d_6 < d_7;
5866 : : S3'' m_2' = (_Bool[bitsize=32])m_2
5867 : : S3' m_3' = m_1 & m_2';
5868 : : S4'' m_3'' = (_Bool[bitsize=8])m_3'
5869 : : S4' c_1' = m_3'' ? c_2 : c_3; */
5870 : :
5871 : : static gimple *
5872 : 25065696 : vect_recog_mask_conversion_pattern (vec_info *vinfo,
5873 : : stmt_vec_info stmt_vinfo, tree *type_out)
5874 : : {
5875 : 25065696 : gimple *last_stmt = stmt_vinfo->stmt;
5876 : 25065696 : enum tree_code rhs_code;
5877 : 25065696 : tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
5878 : 25065696 : tree vectype1, vectype2;
5879 : 25065696 : stmt_vec_info pattern_stmt_info;
5880 : 25065696 : tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
5881 : 25065696 : tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
5882 : :
5883 : : /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
5884 : : conversion. */
5885 : 25065696 : if (is_gimple_call (last_stmt)
5886 : 25065696 : && gimple_call_internal_p (last_stmt))
5887 : : {
5888 : 82444 : gcall *pattern_stmt;
5889 : :
5890 : 82444 : internal_fn ifn = gimple_call_internal_fn (last_stmt);
5891 : 82444 : int mask_argno = internal_fn_mask_index (ifn);
5892 : 82444 : if (mask_argno < 0)
5893 : : return NULL;
5894 : :
5895 : 9425 : bool store_p = internal_store_fn_p (ifn);
5896 : 9425 : bool load_p = internal_store_fn_p (ifn);
5897 : 9425 : if (store_p)
5898 : : {
5899 : 2484 : int rhs_index = internal_fn_stored_value_index (ifn);
5900 : 2484 : tree rhs = gimple_call_arg (last_stmt, rhs_index);
5901 : 2484 : vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
5902 : : }
5903 : : else
5904 : : {
5905 : 6941 : lhs = gimple_call_lhs (last_stmt);
5906 : 6941 : if (!lhs)
5907 : : return NULL;
5908 : 6941 : vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5909 : : }
5910 : :
5911 : 9425 : if (!vectype1)
5912 : : return NULL;
5913 : :
5914 : 9314 : tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
5915 : 9314 : tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
5916 : 9314 : if (mask_arg_type)
5917 : : {
5918 : 8297 : vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
5919 : :
5920 : 8297 : if (!vectype2
5921 : 8297 : || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5922 : : TYPE_VECTOR_SUBPARTS (vectype2)))
5923 : 5079 : return NULL;
5924 : : }
5925 : 1017 : else if (store_p || load_p)
5926 : : return NULL;
5927 : :
5928 : 4020 : tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
5929 : :
5930 : 4020 : auto_vec<tree, 8> args;
5931 : 4020 : unsigned int nargs = gimple_call_num_args (last_stmt);
5932 : 4020 : args.safe_grow (nargs, true);
5933 : 18733 : for (unsigned int i = 0; i < nargs; ++i)
5934 : 14713 : args[i] = ((int) i == mask_argno
5935 : 14713 : ? tmp
5936 : 10693 : : gimple_call_arg (last_stmt, i));
5937 : 4020 : pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5938 : :
5939 : 4020 : if (!store_p)
5940 : : {
5941 : 3567 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5942 : 3567 : gimple_call_set_lhs (pattern_stmt, lhs);
5943 : : }
5944 : :
5945 : 3567 : if (load_p || store_p)
5946 : 453 : gimple_call_set_nothrow (pattern_stmt, true);
5947 : :
5948 : 4020 : pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5949 : 4020 : if (STMT_VINFO_DATA_REF (stmt_vinfo))
5950 : 1820 : vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5951 : :
5952 : 4020 : *type_out = vectype1;
5953 : 4020 : vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
5954 : :
5955 : 4020 : return pattern_stmt;
5956 : 4020 : }
5957 : :
5958 : 24983252 : if (!is_gimple_assign (last_stmt))
5959 : : return NULL;
5960 : :
5961 : 17450640 : gimple *pattern_stmt;
5962 : 17450640 : lhs = gimple_assign_lhs (last_stmt);
5963 : 17450640 : rhs1 = gimple_assign_rhs1 (last_stmt);
5964 : 17450640 : rhs_code = gimple_assign_rhs_code (last_stmt);
5965 : :
5966 : : /* Check for cond expression requiring mask conversion. */
5967 : 17450640 : if (rhs_code == COND_EXPR)
5968 : : {
5969 : 104740 : vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5970 : :
5971 : 104740 : if (TREE_CODE (rhs1) == SSA_NAME)
5972 : : {
5973 : 62991 : rhs1_type = integer_type_for_mask (rhs1, vinfo);
5974 : 62991 : if (!rhs1_type)
5975 : : return NULL;
5976 : : }
5977 : 41749 : else if (COMPARISON_CLASS_P (rhs1))
5978 : : {
5979 : : /* Check whether we're comparing scalar booleans and (if so)
5980 : : whether a better mask type exists than the mask associated
5981 : : with boolean-sized elements. This avoids unnecessary packs
5982 : : and unpacks if the booleans are set from comparisons of
5983 : : wider types. E.g. in:
5984 : :
5985 : : int x1, x2, x3, x4, y1, y1;
5986 : : ...
5987 : : bool b1 = (x1 == x2);
5988 : : bool b2 = (x3 == x4);
5989 : : ... = b1 == b2 ? y1 : y2;
5990 : :
5991 : : it is better for b1 and b2 to use the mask type associated
5992 : : with int elements rather bool (byte) elements. */
5993 : 41749 : rhs1_op0 = TREE_OPERAND (rhs1, 0);
5994 : 41749 : rhs1_op1 = TREE_OPERAND (rhs1, 1);
5995 : 41749 : if (!rhs1_op0 || !rhs1_op1)
5996 : : return NULL;
5997 : 41749 : rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
5998 : 41749 : rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
5999 : :
6000 : 41749 : if (!rhs1_op0_type)
6001 : 41749 : rhs1_type = TREE_TYPE (rhs1_op0);
6002 : 0 : else if (!rhs1_op1_type)
6003 : 0 : rhs1_type = TREE_TYPE (rhs1_op1);
6004 : 0 : else if (TYPE_PRECISION (rhs1_op0_type)
6005 : 0 : != TYPE_PRECISION (rhs1_op1_type))
6006 : : {
6007 : 0 : int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
6008 : 0 : - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6009 : 0 : int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
6010 : 0 : - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6011 : 0 : if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
6012 : : {
6013 : 0 : if (abs (tmp0) > abs (tmp1))
6014 : : rhs1_type = rhs1_op1_type;
6015 : : else
6016 : 0 : rhs1_type = rhs1_op0_type;
6017 : : }
6018 : : else
6019 : 0 : rhs1_type = build_nonstandard_integer_type
6020 : 0 : (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
6021 : : }
6022 : : else
6023 : : rhs1_type = rhs1_op0_type;
6024 : : }
6025 : : else
6026 : : return NULL;
6027 : :
6028 : 97923 : vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6029 : :
6030 : 97923 : if (!vectype1 || !vectype2)
6031 : : return NULL;
6032 : :
6033 : : /* Continue if a conversion is needed. Also continue if we have
6034 : : a comparison whose vector type would normally be different from
6035 : : VECTYPE2 when considered in isolation. In that case we'll
6036 : : replace the comparison with an SSA name (so that we can record
6037 : : its vector type) and behave as though the comparison was an SSA
6038 : : name from the outset. */
6039 : 96403 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6040 : : TYPE_VECTOR_SUBPARTS (vectype2))
6041 : 80303 : && !rhs1_op0_type
6042 : 96403 : && !rhs1_op1_type)
6043 : 80303 : return NULL;
6044 : :
6045 : : /* If rhs1 is invariant and we can promote it leave the COND_EXPR
6046 : : in place, we can handle it in vectorizable_condition. This avoids
6047 : : unnecessary promotion stmts and increased vectorization factor. */
6048 : 16100 : if (COMPARISON_CLASS_P (rhs1)
6049 : 20 : && INTEGRAL_TYPE_P (rhs1_type)
6050 : 16120 : && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
6051 : : TYPE_VECTOR_SUBPARTS (vectype2)))
6052 : : {
6053 : 20 : enum vect_def_type dt;
6054 : 20 : if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
6055 : 20 : && dt == vect_external_def
6056 : 0 : && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
6057 : 20 : && (dt == vect_external_def
6058 : 0 : || dt == vect_constant_def))
6059 : : {
6060 : 0 : tree wide_scalar_type = build_nonstandard_integer_type
6061 : 0 : (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
6062 : 0 : tree vectype3 = get_vectype_for_scalar_type (vinfo,
6063 : : wide_scalar_type);
6064 : 0 : if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
6065 : 0 : return NULL;
6066 : : }
6067 : : }
6068 : :
6069 : : /* If rhs1 is a comparison we need to move it into a
6070 : : separate statement. */
6071 : 16100 : if (TREE_CODE (rhs1) != SSA_NAME)
6072 : : {
6073 : 20 : tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
6074 : 20 : if (rhs1_op0_type
6075 : 20 : && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
6076 : 0 : rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
6077 : : vectype2, stmt_vinfo);
6078 : 20 : if (rhs1_op1_type
6079 : 20 : && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
6080 : 0 : rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
6081 : : vectype2, stmt_vinfo);
6082 : 20 : pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
6083 : : rhs1_op0, rhs1_op1);
6084 : 20 : rhs1 = tmp;
6085 : 20 : append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
6086 : : rhs1_type);
6087 : : }
6088 : :
6089 : 16100 : if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
6090 : 32200 : TYPE_VECTOR_SUBPARTS (vectype2)))
6091 : 16100 : tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6092 : : else
6093 : : tmp = rhs1;
6094 : :
6095 : 16100 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6096 : 16100 : pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6097 : : gimple_assign_rhs2 (last_stmt),
6098 : : gimple_assign_rhs3 (last_stmt));
6099 : :
6100 : 16100 : *type_out = vectype1;
6101 : 16100 : vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6102 : :
6103 : 16100 : return pattern_stmt;
6104 : : }
6105 : :
6106 : : /* Now check for binary boolean operations requiring conversion for
6107 : : one of operands. */
6108 : 17345900 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6109 : : return NULL;
6110 : :
6111 : 1287529 : if (rhs_code != BIT_IOR_EXPR
6112 : : && rhs_code != BIT_XOR_EXPR
6113 : 1287529 : && rhs_code != BIT_AND_EXPR
6114 : 979850 : && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6115 : : return NULL;
6116 : :
6117 : 1187129 : rhs2 = gimple_assign_rhs2 (last_stmt);
6118 : :
6119 : 1187129 : rhs1_type = integer_type_for_mask (rhs1, vinfo);
6120 : 1187129 : rhs2_type = integer_type_for_mask (rhs2, vinfo);
6121 : :
6122 : 1187129 : if (!rhs1_type || !rhs2_type
6123 : 1187129 : || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6124 : : return NULL;
6125 : :
6126 : 12394 : if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6127 : : {
6128 : 8120 : vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6129 : 8120 : if (!vectype1)
6130 : : return NULL;
6131 : 8120 : rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
6132 : : }
6133 : : else
6134 : : {
6135 : 4274 : vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6136 : 4274 : if (!vectype1)
6137 : : return NULL;
6138 : 4274 : rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
6139 : : }
6140 : :
6141 : 12394 : lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6142 : 12394 : pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6143 : :
6144 : 12394 : *type_out = vectype1;
6145 : 12394 : vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
6146 : :
6147 : 12394 : return pattern_stmt;
6148 : : }
6149 : :
6150 : : /* STMT_INFO is a load or store. If the load or store is conditional, return
6151 : : the boolean condition under which it occurs, otherwise return null. */
6152 : :
6153 : : static tree
6154 : 28959 : vect_get_load_store_mask (stmt_vec_info stmt_info)
6155 : : {
6156 : 28959 : if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
6157 : : {
6158 : 27529 : gcc_assert (gimple_assign_single_p (def_assign));
6159 : : return NULL_TREE;
6160 : : }
6161 : :
6162 : 1430 : if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
6163 : : {
6164 : 1430 : internal_fn ifn = gimple_call_internal_fn (def_call);
6165 : 1430 : int mask_index = internal_fn_mask_index (ifn);
6166 : 1430 : return gimple_call_arg (def_call, mask_index);
6167 : : }
6168 : :
6169 : 0 : gcc_unreachable ();
6170 : : }
6171 : :
6172 : : /* Return MASK if MASK is suitable for masking an operation on vectors
6173 : : of type VECTYPE, otherwise convert it into such a form and return
6174 : : the result. Associate any conversion statements with STMT_INFO's
6175 : : pattern. */
6176 : :
6177 : : static tree
6178 : 0 : vect_convert_mask_for_vectype (tree mask, tree vectype,
6179 : : stmt_vec_info stmt_info, vec_info *vinfo)
6180 : : {
6181 : 0 : tree mask_type = integer_type_for_mask (mask, vinfo);
6182 : 0 : if (mask_type)
6183 : : {
6184 : 0 : tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6185 : 0 : if (mask_vectype
6186 : 0 : && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
6187 : 0 : TYPE_VECTOR_SUBPARTS (mask_vectype)))
6188 : 0 : mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
6189 : : }
6190 : 0 : return mask;
6191 : : }
6192 : :
6193 : : /* Return the equivalent of:
6194 : :
6195 : : fold_convert (TYPE, VALUE)
6196 : :
6197 : : with the expectation that the operation will be vectorized.
6198 : : If new statements are needed, add them as pattern statements
6199 : : to STMT_INFO. */
6200 : :
6201 : : static tree
6202 : 0 : vect_add_conversion_to_pattern (vec_info *vinfo,
6203 : : tree type, tree value, stmt_vec_info stmt_info)
6204 : : {
6205 : 0 : if (useless_type_conversion_p (type, TREE_TYPE (value)))
6206 : : return value;
6207 : :
6208 : 0 : tree new_value = vect_recog_temp_ssa_var (type, NULL);
6209 : 0 : gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6210 : 0 : append_pattern_def_seq (vinfo, stmt_info, conversion,
6211 : : get_vectype_for_scalar_type (vinfo, type));
6212 : 0 : return new_value;
6213 : : }
6214 : :
6215 : : /* Try to convert STMT_INFO into a call to a gather load or scatter store
6216 : : internal function. Return the final statement on success and set
6217 : : *TYPE_OUT to the vector type being loaded or stored.
6218 : :
6219 : : This function only handles gathers and scatters that were recognized
6220 : : as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6221 : :
6222 : : static gimple *
6223 : 25065696 : vect_recog_gather_scatter_pattern (vec_info *vinfo,
6224 : : stmt_vec_info stmt_info, tree *type_out)
6225 : : {
6226 : : /* Currently we only support this for loop vectorization. */
6227 : 27958991 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6228 : 2893295 : if (!loop_vinfo)
6229 : : return NULL;
6230 : :
6231 : : /* Make sure that we're looking at a gather load or scatter store. */
6232 : 2893295 : data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6233 : 2893295 : if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6234 : : return NULL;
6235 : :
6236 : : /* Get the boolean that controls whether the load or store happens.
6237 : : This is null if the operation is unconditional. */
6238 : 28959 : tree mask = vect_get_load_store_mask (stmt_info);
6239 : :
6240 : : /* Make sure that the target supports an appropriate internal
6241 : : function for the gather/scatter operation. */
6242 : 28959 : gather_scatter_info gs_info;
6243 : 28959 : if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6244 : 28959 : || gs_info.ifn == IFN_LAST)
6245 : : return NULL;
6246 : :
6247 : : /* Convert the mask to the right form. */
6248 : 0 : tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6249 : : gs_info.element_type);
6250 : 0 : if (mask)
6251 : 0 : mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
6252 : : loop_vinfo);
6253 : 0 : else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6254 : 0 : || gs_info.ifn == IFN_MASK_GATHER_LOAD
6255 : 0 : || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6256 : 0 : || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6257 : 0 : mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6258 : :
6259 : : /* Get the invariant base and non-invariant offset, converting the
6260 : : latter to the same width as the vector elements. */
6261 : 0 : tree base = gs_info.base;
6262 : 0 : tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6263 : 0 : tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
6264 : : gs_info.offset, stmt_info);
6265 : :
6266 : : /* Build the new pattern statement. */
6267 : 0 : tree scale = size_int (gs_info.scale);
6268 : 0 : gcall *pattern_stmt;
6269 : 0 : if (DR_IS_READ (dr))
6270 : : {
6271 : 0 : tree zero = build_zero_cst (gs_info.element_type);
6272 : 0 : if (mask != NULL)
6273 : 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6274 : : offset, scale, zero, mask);
6275 : : else
6276 : 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6277 : : offset, scale, zero);
6278 : 0 : tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
6279 : 0 : gimple_call_set_lhs (pattern_stmt, load_lhs);
6280 : : }
6281 : : else
6282 : : {
6283 : 0 : tree rhs = vect_get_store_rhs (stmt_info);
6284 : 0 : if (mask != NULL)
6285 : 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6286 : : base, offset, scale, rhs,
6287 : : mask);
6288 : : else
6289 : 0 : pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6290 : : base, offset, scale, rhs);
6291 : : }
6292 : 0 : gimple_call_set_nothrow (pattern_stmt, true);
6293 : :
6294 : : /* Copy across relevant vectorization info and associate DR with the
6295 : : new pattern statement instead of the original statement. */
6296 : 0 : stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6297 : 0 : loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6298 : :
6299 : 0 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6300 : 0 : *type_out = vectype;
6301 : 0 : vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
6302 : :
6303 : 0 : return pattern_stmt;
6304 : : }
6305 : :
6306 : : /* Return true if TYPE is a non-boolean integer type. These are the types
6307 : : that we want to consider for narrowing. */
6308 : :
6309 : : static bool
6310 : 50243102 : vect_narrowable_type_p (tree type)
6311 : : {
6312 : 50243102 : return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6313 : : }
6314 : :
6315 : : /* Return true if the operation given by CODE can be truncated to N bits
6316 : : when only N bits of the output are needed. This is only true if bit N+1
6317 : : of the inputs has no effect on the low N bits of the result. */
6318 : :
6319 : : static bool
6320 : 12005717 : vect_truncatable_operation_p (tree_code code)
6321 : : {
6322 : 12005717 : switch (code)
6323 : : {
6324 : : case NEGATE_EXPR:
6325 : : case PLUS_EXPR:
6326 : : case MINUS_EXPR:
6327 : : case MULT_EXPR:
6328 : : case BIT_NOT_EXPR:
6329 : : case BIT_AND_EXPR:
6330 : : case BIT_IOR_EXPR:
6331 : : case BIT_XOR_EXPR:
6332 : : case COND_EXPR:
6333 : : return true;
6334 : :
6335 : 4812918 : default:
6336 : 4812918 : return false;
6337 : : }
6338 : : }
6339 : :
6340 : : /* Record that STMT_INFO could be changed from operating on TYPE to
6341 : : operating on a type with the precision and sign given by PRECISION
6342 : : and SIGN respectively. PRECISION is an arbitrary bit precision;
6343 : : it might not be a whole number of bytes. */
6344 : :
6345 : : static void
6346 : 1575525 : vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6347 : : unsigned int precision, signop sign)
6348 : : {
6349 : : /* Round the precision up to a whole number of bytes. */
6350 : 1575525 : precision = vect_element_precision (precision);
6351 : 1575525 : if (precision < TYPE_PRECISION (type)
6352 : 1575525 : && (!stmt_info->operation_precision
6353 : 32827 : || stmt_info->operation_precision > precision))
6354 : : {
6355 : 957151 : stmt_info->operation_precision = precision;
6356 : 957151 : stmt_info->operation_sign = sign;
6357 : : }
6358 : 1575525 : }
6359 : :
6360 : : /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6361 : : non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6362 : : is an arbitrary bit precision; it might not be a whole number of bytes. */
6363 : :
6364 : : static void
6365 : 8541799 : vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6366 : : unsigned int min_input_precision)
6367 : : {
6368 : : /* This operation in isolation only requires the inputs to have
6369 : : MIN_INPUT_PRECISION of precision, However, that doesn't mean
6370 : : that MIN_INPUT_PRECISION is a natural precision for the chain
6371 : : as a whole. E.g. consider something like:
6372 : :
6373 : : unsigned short *x, *y;
6374 : : *y = ((*x & 0xf0) >> 4) | (*y << 4);
6375 : :
6376 : : The right shift can be done on unsigned chars, and only requires the
6377 : : result of "*x & 0xf0" to be done on unsigned chars. But taking that
6378 : : approach would mean turning a natural chain of single-vector unsigned
6379 : : short operations into one that truncates "*x" and then extends
6380 : : "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6381 : : operation and one vector for each unsigned char operation.
6382 : : This would be a significant pessimization.
6383 : :
6384 : : Instead only propagate the maximum of this precision and the precision
6385 : : required by the users of the result. This means that we don't pessimize
6386 : : the case above but continue to optimize things like:
6387 : :
6388 : : unsigned char *y;
6389 : : unsigned short *x;
6390 : : *y = ((*x & 0xf0) >> 4) | (*y << 4);
6391 : :
6392 : : Here we would truncate two vectors of *x to a single vector of
6393 : : unsigned chars and use single-vector unsigned char operations for
6394 : : everything else, rather than doing two unsigned short copies of
6395 : : "(*x & 0xf0) >> 4" and then truncating the result. */
6396 : 8541799 : min_input_precision = MAX (min_input_precision,
6397 : : stmt_info->min_output_precision);
6398 : :
6399 : 8541799 : if (min_input_precision < TYPE_PRECISION (type)
6400 : 8541799 : && (!stmt_info->min_input_precision
6401 : 55309 : || stmt_info->min_input_precision > min_input_precision))
6402 : 497674 : stmt_info->min_input_precision = min_input_precision;
6403 : 8541799 : }
6404 : :
6405 : : /* Subroutine of vect_determine_min_output_precision. Return true if
6406 : : we can calculate a reduced number of output bits for STMT_INFO,
6407 : : whose result is LHS. */
6408 : :
6409 : : static bool
6410 : 11598215 : vect_determine_min_output_precision_1 (vec_info *vinfo,
6411 : : stmt_vec_info stmt_info, tree lhs)
6412 : : {
6413 : : /* Take the maximum precision required by users of the result. */
6414 : 11598215 : unsigned int precision = 0;
6415 : 11598215 : imm_use_iterator iter;
6416 : 11598215 : use_operand_p use;
6417 : 12198214 : FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6418 : : {
6419 : 11947491 : gimple *use_stmt = USE_STMT (use);
6420 : 11947491 : if (is_gimple_debug (use_stmt))
6421 : 343975 : continue;
6422 : 11603516 : stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6423 : 11603516 : if (!use_stmt_info || !use_stmt_info->min_input_precision)
6424 : : return false;
6425 : : /* The input precision recorded for COND_EXPRs applies only to the
6426 : : "then" and "else" values. */
6427 : 256436 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
6428 : 225010 : if (assign
6429 : 225010 : && gimple_assign_rhs_code (assign) == COND_EXPR
6430 : 412 : && use->use != gimple_assign_rhs2_ptr (assign)
6431 : 412 : && use->use != gimple_assign_rhs3_ptr (assign))
6432 : : return false;
6433 : 256024 : precision = MAX (precision, use_stmt_info->min_input_precision);
6434 : : }
6435 : :
6436 : 250723 : if (dump_enabled_p ())
6437 : 6901 : dump_printf_loc (MSG_NOTE, vect_location,
6438 : : "only the low %d bits of %T are significant\n",
6439 : : precision, lhs);
6440 : 250723 : stmt_info->min_output_precision = precision;
6441 : 250723 : return true;
6442 : : }
6443 : :
6444 : : /* Calculate min_output_precision for STMT_INFO. */
6445 : :
6446 : : static void
6447 : 30529773 : vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6448 : : {
6449 : : /* We're only interested in statements with a narrowable result. */
6450 : 30529773 : tree lhs = gimple_get_lhs (stmt_info->stmt);
6451 : 30529773 : if (!lhs
6452 : 24070591 : || TREE_CODE (lhs) != SSA_NAME
6453 : 50455005 : || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6454 : : return;
6455 : :
6456 : 11598215 : if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6457 : 11347492 : stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6458 : : }
6459 : :
6460 : : /* Use range information to decide whether STMT (described by STMT_INFO)
6461 : : could be done in a narrower type. This is effectively a forward
6462 : : propagation, since it uses context-independent information that applies
6463 : : to all users of an SSA name. */
6464 : :
6465 : : static void
6466 : 17098120 : vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6467 : : {
6468 : 17098120 : tree lhs = gimple_assign_lhs (stmt);
6469 : 17098120 : if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6470 : 15622663 : return;
6471 : :
6472 : 13219750 : tree type = TREE_TYPE (lhs);
6473 : 13219750 : if (!vect_narrowable_type_p (type))
6474 : : return;
6475 : :
6476 : : /* First see whether we have any useful range information for the result. */
6477 : 8866647 : unsigned int precision = TYPE_PRECISION (type);
6478 : 8866647 : signop sign = TYPE_SIGN (type);
6479 : 8866647 : wide_int min_value, max_value;
6480 : 8866647 : if (!vect_get_range_info (lhs, &min_value, &max_value))
6481 : : return;
6482 : :
6483 : 3861879 : tree_code code = gimple_assign_rhs_code (stmt);
6484 : 3861879 : unsigned int nops = gimple_num_ops (stmt);
6485 : :
6486 : 3861879 : if (!vect_truncatable_operation_p (code))
6487 : : {
6488 : : /* Handle operations that can be computed in type T if all inputs
6489 : : and outputs can be represented in type T. Also handle left and
6490 : : right shifts, where (in addition) the maximum shift amount must
6491 : : be less than the number of bits in T. */
6492 : 1468387 : bool is_shift;
6493 : 1468387 : switch (code)
6494 : : {
6495 : : case LSHIFT_EXPR:
6496 : : case RSHIFT_EXPR:
6497 : : is_shift = true;
6498 : : break;
6499 : :
6500 : 166900 : case ABS_EXPR:
6501 : 166900 : case MIN_EXPR:
6502 : 166900 : case MAX_EXPR:
6503 : 166900 : case TRUNC_DIV_EXPR:
6504 : 166900 : case CEIL_DIV_EXPR:
6505 : 166900 : case FLOOR_DIV_EXPR:
6506 : 166900 : case ROUND_DIV_EXPR:
6507 : 166900 : case EXACT_DIV_EXPR:
6508 : : /* Modulus is excluded because it is typically calculated by doing
6509 : : a division, for which minimum signed / -1 isn't representable in
6510 : : the original signed type. We could take the division range into
6511 : : account instead, if handling modulus ever becomes important. */
6512 : 166900 : is_shift = false;
6513 : 166900 : break;
6514 : :
6515 : : default:
6516 : : return;
6517 : : }
6518 : 941897 : for (unsigned int i = 1; i < nops; ++i)
6519 : : {
6520 : 734301 : tree op = gimple_op (stmt, i);
6521 : 734301 : wide_int op_min_value, op_max_value;
6522 : 734301 : if (TREE_CODE (op) == INTEGER_CST)
6523 : : {
6524 : 209717 : unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
6525 : 209717 : op_min_value = op_max_value = wi::to_wide (op, op_precision);
6526 : : }
6527 : 524584 : else if (TREE_CODE (op) == SSA_NAME)
6528 : : {
6529 : 524584 : if (!vect_get_range_info (op, &op_min_value, &op_max_value))
6530 : : return;
6531 : : }
6532 : : else
6533 : : return;
6534 : :
6535 : 466764 : if (is_shift && i == 2)
6536 : : {
6537 : : /* There needs to be one more bit than the maximum shift amount.
6538 : :
6539 : : If the maximum shift amount is already 1 less than PRECISION
6540 : : then we can't narrow the shift further. Dealing with that
6541 : : case first ensures that we can safely use an unsigned range
6542 : : below.
6543 : :
6544 : : op_min_value isn't relevant, since shifts by negative amounts
6545 : : are UB. */
6546 : 153640 : if (wi::geu_p (op_max_value, precision - 1))
6547 : : return;
6548 : 140497 : unsigned int min_bits = op_max_value.to_uhwi () + 1;
6549 : :
6550 : : /* As explained below, we can convert a signed shift into an
6551 : : unsigned shift if the sign bit is always clear. At this
6552 : : point we've already processed the ranges of the output and
6553 : : the first input. */
6554 : 140497 : auto op_sign = sign;
6555 : 140497 : if (sign == SIGNED && !wi::neg_p (min_value))
6556 : : op_sign = UNSIGNED;
6557 : 280994 : op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
6558 : 140497 : precision, op_sign);
6559 : 280994 : op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
6560 : 140497 : precision, op_sign);
6561 : : }
6562 : 453621 : min_value = wi::min (min_value, op_min_value, sign);
6563 : 453621 : max_value = wi::max (max_value, op_max_value, sign);
6564 : 734301 : }
6565 : : }
6566 : :
6567 : : /* Try to switch signed types for unsigned types if we can.
6568 : : This is better for two reasons. First, unsigned ops tend
6569 : : to be cheaper than signed ops. Second, it means that we can
6570 : : handle things like:
6571 : :
6572 : : signed char c;
6573 : : int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6574 : :
6575 : : as:
6576 : :
6577 : : signed char c;
6578 : : unsigned short res_1 = (unsigned short) c & 0xff00;
6579 : : int res = (int) res_1;
6580 : :
6581 : : where the intermediate result res_1 has unsigned rather than
6582 : : signed type. */
6583 : 2601088 : if (sign == SIGNED && !wi::neg_p (min_value))
6584 : : sign = UNSIGNED;
6585 : :
6586 : : /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6587 : 2601088 : unsigned int precision1 = wi::min_precision (min_value, sign);
6588 : 2601088 : unsigned int precision2 = wi::min_precision (max_value, sign);
6589 : 2601088 : unsigned int value_precision = MAX (precision1, precision2);
6590 : 2601088 : if (value_precision >= precision)
6591 : : return;
6592 : :
6593 : 1475457 : if (dump_enabled_p ())
6594 : 75049 : dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6595 : : " without loss of precision: %G",
6596 : : sign == SIGNED ? "signed" : "unsigned",
6597 : : value_precision, (gimple *) stmt);
6598 : :
6599 : 1475457 : vect_set_operation_type (stmt_info, type, value_precision, sign);
6600 : 1475457 : vect_set_min_input_precision (stmt_info, type, value_precision);
6601 : 8866647 : }
6602 : :
6603 : : /* Use information about the users of STMT's result to decide whether
6604 : : STMT (described by STMT_INFO) could be done in a narrower type.
6605 : : This is effectively a backward propagation. */
6606 : :
6607 : : static void
6608 : 17098120 : vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
6609 : : {
6610 : 17098120 : tree_code code = gimple_assign_rhs_code (stmt);
6611 : 17098120 : unsigned int opno = (code == COND_EXPR ? 2 : 1);
6612 : 17098120 : tree type = TREE_TYPE (gimple_op (stmt, opno));
6613 : 17098120 : if (!vect_narrowable_type_p (type))
6614 : 10031778 : return;
6615 : :
6616 : 10498305 : unsigned int precision = TYPE_PRECISION (type);
6617 : 10498305 : unsigned int operation_precision, min_input_precision;
6618 : 10498305 : switch (code)
6619 : : {
6620 : 1943396 : CASE_CONVERT:
6621 : : /* Only the bits that contribute to the output matter. Don't change
6622 : : the precision of the operation itself. */
6623 : 1943396 : operation_precision = precision;
6624 : 1943396 : min_input_precision = stmt_info->min_output_precision;
6625 : 1943396 : break;
6626 : :
6627 : 411071 : case LSHIFT_EXPR:
6628 : 411071 : case RSHIFT_EXPR:
6629 : 411071 : {
6630 : 411071 : tree shift = gimple_assign_rhs2 (stmt);
6631 : 411071 : if (TREE_CODE (shift) != INTEGER_CST
6632 : 734746 : || !wi::ltu_p (wi::to_widest (shift), precision))
6633 : 87432 : return;
6634 : 323639 : unsigned int const_shift = TREE_INT_CST_LOW (shift);
6635 : 323639 : if (code == LSHIFT_EXPR)
6636 : : {
6637 : : /* Avoid creating an undefined shift.
6638 : :
6639 : : ??? We could instead use min_output_precision as-is and
6640 : : optimize out-of-range shifts to zero. However, only
6641 : : degenerate testcases shift away all their useful input data,
6642 : : and it isn't natural to drop input operations in the middle
6643 : : of vectorization. This sort of thing should really be
6644 : : handled before vectorization. */
6645 : 79517 : operation_precision = MAX (stmt_info->min_output_precision,
6646 : : const_shift + 1);
6647 : : /* We need CONST_SHIFT fewer bits of the input. */
6648 : 79517 : min_input_precision = (MAX (operation_precision, const_shift)
6649 : 79517 : - const_shift);
6650 : : }
6651 : : else
6652 : : {
6653 : : /* We need CONST_SHIFT extra bits to do the operation. */
6654 : 244122 : operation_precision = (stmt_info->min_output_precision
6655 : : + const_shift);
6656 : 244122 : min_input_precision = operation_precision;
6657 : : }
6658 : : break;
6659 : : }
6660 : :
6661 : 8143838 : default:
6662 : 8143838 : if (vect_truncatable_operation_p (code))
6663 : : {
6664 : : /* Input bit N has no effect on output bits N-1 and lower. */
6665 : 4799307 : operation_precision = stmt_info->min_output_precision;
6666 : 4799307 : min_input_precision = operation_precision;
6667 : 4799307 : break;
6668 : : }
6669 : : return;
6670 : : }
6671 : :
6672 : 7066342 : if (operation_precision < precision)
6673 : : {
6674 : 100068 : if (dump_enabled_p ())
6675 : 3521 : dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6676 : : " without affecting users: %G",
6677 : 3521 : TYPE_UNSIGNED (type) ? "unsigned" : "signed",
6678 : : operation_precision, (gimple *) stmt);
6679 : 200136 : vect_set_operation_type (stmt_info, type, operation_precision,
6680 : 100068 : TYPE_SIGN (type));
6681 : : }
6682 : 7066342 : vect_set_min_input_precision (stmt_info, type, min_input_precision);
6683 : : }
6684 : :
6685 : : /* Return true if the statement described by STMT_INFO sets a boolean
6686 : : SSA_NAME and if we know how to vectorize this kind of statement using
6687 : : vector mask types. */
6688 : :
6689 : : static bool
6690 : 30529773 : possible_vector_mask_operation_p (stmt_vec_info stmt_info)
6691 : : {
6692 : 30529773 : tree lhs = gimple_get_lhs (stmt_info->stmt);
6693 : 30529773 : tree_code code = ERROR_MARK;
6694 : 30529773 : gassign *assign = NULL;
6695 : 30529773 : gcond *cond = NULL;
6696 : :
6697 : 30529773 : if ((assign = dyn_cast <gassign *> (stmt_info->stmt)))
6698 : 23312803 : code = gimple_assign_rhs_code (assign);
6699 : 13431653 : else if ((cond = dyn_cast <gcond *> (stmt_info->stmt)))
6700 : : {
6701 : 3913811 : lhs = gimple_cond_lhs (cond);
6702 : 3913811 : code = gimple_cond_code (cond);
6703 : : }
6704 : :
6705 : 30529773 : if (!lhs
6706 : 27984402 : || TREE_CODE (lhs) != SSA_NAME
6707 : 54315033 : || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6708 : : return false;
6709 : :
6710 : 1733999 : if (code != ERROR_MARK)
6711 : : {
6712 : 1499658 : switch (code)
6713 : : {
6714 : : CASE_CONVERT:
6715 : : case SSA_NAME:
6716 : : case BIT_NOT_EXPR:
6717 : : case BIT_IOR_EXPR:
6718 : : case BIT_XOR_EXPR:
6719 : : case BIT_AND_EXPR:
6720 : : return true;
6721 : :
6722 : 1160861 : default:
6723 : 1160861 : return TREE_CODE_CLASS (code) == tcc_comparison;
6724 : : }
6725 : : }
6726 : 234341 : else if (is_a <gphi *> (stmt_info->stmt))
6727 : 144685 : return true;
6728 : : return false;
6729 : : }
6730 : :
6731 : : /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6732 : : a vector mask type instead of a normal vector type. Record the
6733 : : result in STMT_INFO->mask_precision. */
6734 : :
6735 : : static void
6736 : 30529773 : vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6737 : : {
6738 : 30529773 : if (!possible_vector_mask_operation_p (stmt_info))
6739 : : return;
6740 : :
6741 : : /* If at least one boolean input uses a vector mask type,
6742 : : pick the mask type with the narrowest elements.
6743 : :
6744 : : ??? This is the traditional behavior. It should always produce
6745 : : the smallest number of operations, but isn't necessarily the
6746 : : optimal choice. For example, if we have:
6747 : :
6748 : : a = b & c
6749 : :
6750 : : where:
6751 : :
6752 : : - the user of a wants it to have a mask type for 16-bit elements (M16)
6753 : : - b also uses M16
6754 : : - c uses a mask type for 8-bit elements (M8)
6755 : :
6756 : : then picking M8 gives:
6757 : :
6758 : : - 1 M16->M8 pack for b
6759 : : - 1 M8 AND for a
6760 : : - 2 M8->M16 unpacks for the user of a
6761 : :
6762 : : whereas picking M16 would have given:
6763 : :
6764 : : - 2 M8->M16 unpacks for c
6765 : : - 2 M16 ANDs for a
6766 : :
6767 : : The number of operations are equal, but M16 would have given
6768 : : a shorter dependency chain and allowed more ILP. */
6769 : 1610750 : unsigned int precision = ~0U;
6770 : 1610750 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
6771 : :
6772 : : /* If the statement compares two values that shouldn't use vector masks,
6773 : : try comparing the values as normal scalars instead. */
6774 : 1610750 : tree_code code = ERROR_MARK;
6775 : 1610750 : tree op0_type;
6776 : 1610750 : unsigned int nops = -1;
6777 : 1610750 : unsigned int ops_start = 0;
6778 : :
6779 : 1610750 : if (gassign *assign = dyn_cast <gassign *> (stmt))
6780 : : {
6781 : 1020343 : code = gimple_assign_rhs_code (assign);
6782 : 1020343 : op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
6783 : 1020343 : nops = gimple_num_ops (assign);
6784 : 1020343 : ops_start = 1;
6785 : : }
6786 : 590407 : else if (gcond *cond = dyn_cast <gcond *> (stmt))
6787 : : {
6788 : 445722 : code = gimple_cond_code (cond);
6789 : 445722 : op0_type = TREE_TYPE (gimple_cond_lhs (cond));
6790 : 445722 : nops = 2;
6791 : 445722 : ops_start = 0;
6792 : : }
6793 : :
6794 : 1610750 : if (code != ERROR_MARK)
6795 : : {
6796 : 4367077 : for (unsigned int i = ops_start; i < nops; ++i)
6797 : : {
6798 : 2901012 : tree rhs = gimple_op (stmt, i);
6799 : 2901012 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
6800 : 1373670 : continue;
6801 : :
6802 : 1527342 : stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6803 : 1527342 : if (!def_stmt_info)
6804 : : /* Don't let external or constant operands influence the choice.
6805 : : We can convert them to whichever vector type we pick. */
6806 : 456978 : continue;
6807 : :
6808 : 1070364 : if (def_stmt_info->mask_precision)
6809 : : {
6810 : 905320 : if (precision > def_stmt_info->mask_precision)
6811 : 2901012 : precision = def_stmt_info->mask_precision;
6812 : : }
6813 : : }
6814 : :
6815 : 1466065 : if (precision == ~0U
6816 : 1180607 : && TREE_CODE_CLASS (code) == tcc_comparison)
6817 : : {
6818 : 1005994 : scalar_mode mode;
6819 : 1005994 : tree vectype, mask_type;
6820 : 1005994 : if (is_a <scalar_mode> (TYPE_MODE (op0_type), &mode)
6821 : 1005994 : && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
6822 : 860481 : && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
6823 : 860481 : && expand_vec_cmp_expr_p (vectype, mask_type, code))
6824 : 1329952 : precision = GET_MODE_BITSIZE (mode);
6825 : : }
6826 : : }
6827 : : else
6828 : : {
6829 : 144685 : gphi *phi = as_a <gphi *> (stmt_info->stmt);
6830 : 524680 : for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
6831 : : {
6832 : 379995 : tree rhs = gimple_phi_arg_def (phi, i);
6833 : :
6834 : 379995 : stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6835 : 379995 : if (!def_stmt_info)
6836 : : /* Don't let external or constant operands influence the choice.
6837 : : We can convert them to whichever vector type we pick. */
6838 : 228877 : continue;
6839 : :
6840 : 151118 : if (def_stmt_info->mask_precision)
6841 : : {
6842 : 130506 : if (precision > def_stmt_info->mask_precision)
6843 : 379995 : precision = def_stmt_info->mask_precision;
6844 : : }
6845 : : }
6846 : : }
6847 : :
6848 : 1610750 : if (dump_enabled_p ())
6849 : : {
6850 : 6541 : if (precision == ~0U)
6851 : 1633 : dump_printf_loc (MSG_NOTE, vect_location,
6852 : : "using normal nonmask vectors for %G",
6853 : : stmt_info->stmt);
6854 : : else
6855 : 4908 : dump_printf_loc (MSG_NOTE, vect_location,
6856 : : "using boolean precision %d for %G",
6857 : : precision, stmt_info->stmt);
6858 : : }
6859 : :
6860 : 1610750 : stmt_info->mask_precision = precision;
6861 : : }
6862 : :
6863 : : /* Handle vect_determine_precisions for STMT_INFO, given that we
6864 : : have already done so for the users of its result. */
6865 : :
6866 : : void
6867 : 30529773 : vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
6868 : : {
6869 : 30529773 : vect_determine_min_output_precision (vinfo, stmt_info);
6870 : 30529773 : if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6871 : : {
6872 : 17098120 : vect_determine_precisions_from_range (stmt_info, stmt);
6873 : 17098120 : vect_determine_precisions_from_users (stmt_info, stmt);
6874 : : }
6875 : 30529773 : }
6876 : :
6877 : : /* Walk backwards through the vectorizable region to determine the
6878 : : values of these fields:
6879 : :
6880 : : - min_output_precision
6881 : : - min_input_precision
6882 : : - operation_precision
6883 : : - operation_sign. */
6884 : :
6885 : : void
6886 : 858521 : vect_determine_precisions (vec_info *vinfo)
6887 : : {
6888 : 858521 : DUMP_VECT_SCOPE ("vect_determine_precisions");
6889 : :
6890 : 858521 : if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
6891 : : {
6892 : 284010 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6893 : 284010 : basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6894 : 284010 : unsigned int nbbs = loop->num_nodes;
6895 : :
6896 : 985251 : for (unsigned int i = 0; i < nbbs; i++)
6897 : : {
6898 : 701241 : basic_block bb = bbs[i];
6899 : 701241 : for (auto gsi = gsi_start_phis (bb);
6900 : 1455770 : !gsi_end_p (gsi); gsi_next (&gsi))
6901 : : {
6902 : 754529 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6903 : 754529 : if (stmt_info)
6904 : 754529 : vect_determine_mask_precision (vinfo, stmt_info);
6905 : : }
6906 : 5350893 : for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
6907 : 3948411 : if (!is_gimple_debug (gsi_stmt (si)))
6908 : 2881246 : vect_determine_mask_precision
6909 : 2881246 : (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6910 : : }
6911 : 985251 : for (unsigned int i = 0; i < nbbs; i++)
6912 : : {
6913 : 701241 : basic_block bb = bbs[nbbs - i - 1];
6914 : 701241 : for (gimple_stmt_iterator si = gsi_last_bb (bb);
6915 : 8598063 : !gsi_end_p (si); gsi_prev (&si))
6916 : 3948411 : if (!is_gimple_debug (gsi_stmt (si)))
6917 : 2881246 : vect_determine_stmt_precisions
6918 : 2881246 : (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
6919 : 701241 : for (auto gsi = gsi_start_phis (bb);
6920 : 1455770 : !gsi_end_p (gsi); gsi_next (&gsi))
6921 : : {
6922 : 754529 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6923 : 754529 : if (stmt_info)
6924 : 754529 : vect_determine_stmt_precisions (vinfo, stmt_info);
6925 : : }
6926 : : }
6927 : : }
6928 : : else
6929 : : {
6930 : 574511 : bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6931 : 18228792 : for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6932 : : {
6933 : 8539885 : basic_block bb = bb_vinfo->bbs[i];
6934 : 13582734 : for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6935 : : {
6936 : 5042849 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6937 : 5042849 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6938 : 4864264 : vect_determine_mask_precision (vinfo, stmt_info);
6939 : : }
6940 : 80764028 : for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6941 : : {
6942 : 63684258 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6943 : 63684258 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6944 : 22029734 : vect_determine_mask_precision (vinfo, stmt_info);
6945 : : }
6946 : : }
6947 : 9114396 : for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
6948 : : {
6949 : 8539885 : for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
6950 : 135908401 : !gsi_end_p (gsi); gsi_prev (&gsi))
6951 : : {
6952 : 63684258 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
6953 : 63684258 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6954 : 22029734 : vect_determine_stmt_precisions (vinfo, stmt_info);
6955 : : }
6956 : 8539885 : for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
6957 : 13582734 : !gsi_end_p (gsi); gsi_next (&gsi))
6958 : : {
6959 : 5042849 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6960 : 5042849 : if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6961 : 4864264 : vect_determine_stmt_precisions (vinfo, stmt_info);
6962 : : }
6963 : : }
6964 : : }
6965 : 858521 : }
6966 : :
6967 : : typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
6968 : :
6969 : : struct vect_recog_func
6970 : : {
6971 : : vect_recog_func_ptr fn;
6972 : : const char *name;
6973 : : };
6974 : :
6975 : : /* Note that ordering matters - the first pattern matching on a stmt is
6976 : : taken which means usually the more complex one needs to preceed the
6977 : : less comples onex (widen_sum only after dot_prod or sad for example). */
6978 : : static vect_recog_func vect_vect_recog_func_ptrs[] = {
6979 : : { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
6980 : : { vect_recog_bit_insert_pattern, "bit_insert" },
6981 : : { vect_recog_abd_pattern, "abd" },
6982 : : { vect_recog_over_widening_pattern, "over_widening" },
6983 : : /* Must come after over_widening, which narrows the shift as much as
6984 : : possible beforehand. */
6985 : : { vect_recog_average_pattern, "average" },
6986 : : { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
6987 : : { vect_recog_mulhs_pattern, "mult_high" },
6988 : : { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
6989 : : { vect_recog_widen_mult_pattern, "widen_mult" },
6990 : : { vect_recog_dot_prod_pattern, "dot_prod" },
6991 : : { vect_recog_sad_pattern, "sad" },
6992 : : { vect_recog_widen_sum_pattern, "widen_sum" },
6993 : : { vect_recog_pow_pattern, "pow" },
6994 : : { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" },
6995 : : { vect_recog_ctz_ffs_pattern, "ctz_ffs" },
6996 : : { vect_recog_widen_shift_pattern, "widen_shift" },
6997 : : { vect_recog_rotate_pattern, "rotate" },
6998 : : { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
6999 : : { vect_recog_divmod_pattern, "divmod" },
7000 : : { vect_recog_mult_pattern, "mult" },
7001 : : { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
7002 : : { vect_recog_gcond_pattern, "gcond" },
7003 : : { vect_recog_bool_pattern, "bool" },
7004 : : /* This must come before mask conversion, and includes the parts
7005 : : of mask conversion that are needed for gather and scatter
7006 : : internal functions. */
7007 : : { vect_recog_gather_scatter_pattern, "gather_scatter" },
7008 : : { vect_recog_mask_conversion_pattern, "mask_conversion" },
7009 : : { vect_recog_widen_plus_pattern, "widen_plus" },
7010 : : { vect_recog_widen_minus_pattern, "widen_minus" },
7011 : : { vect_recog_widen_abd_pattern, "widen_abd" },
7012 : : /* These must come after the double widening ones. */
7013 : : };
7014 : :
7015 : : const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
7016 : :
7017 : : /* Mark statements that are involved in a pattern. */
7018 : :
7019 : : void
7020 : 617391 : vect_mark_pattern_stmts (vec_info *vinfo,
7021 : : stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
7022 : : tree pattern_vectype)
7023 : : {
7024 : 617391 : stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
7025 : 617391 : gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7026 : :
7027 : 617391 : gimple *orig_pattern_stmt = NULL;
7028 : 617391 : if (is_pattern_stmt_p (orig_stmt_info))
7029 : : {
7030 : : /* We're replacing a statement in an existing pattern definition
7031 : : sequence. */
7032 : 7680 : orig_pattern_stmt = orig_stmt_info->stmt;
7033 : 7680 : if (dump_enabled_p ())
7034 : 496 : dump_printf_loc (MSG_NOTE, vect_location,
7035 : : "replacing earlier pattern %G", orig_pattern_stmt);
7036 : :
7037 : : /* To keep the book-keeping simple, just swap the lhs of the
7038 : : old and new statements, so that the old one has a valid but
7039 : : unused lhs. */
7040 : 7680 : tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
7041 : 7680 : gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
7042 : 7680 : gimple_set_lhs (pattern_stmt, old_lhs);
7043 : :
7044 : 7680 : if (dump_enabled_p ())
7045 : 496 : dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
7046 : :
7047 : : /* Switch to the statement that ORIG replaces. */
7048 : 7680 : orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
7049 : :
7050 : : /* We shouldn't be replacing the main pattern statement. */
7051 : 7680 : gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
7052 : : != orig_pattern_stmt);
7053 : : }
7054 : :
7055 : 617391 : if (def_seq)
7056 : : for (gimple_stmt_iterator si = gsi_start (def_seq);
7057 : 1270567 : !gsi_end_p (si); gsi_next (&si))
7058 : : {
7059 : 764491 : if (dump_enabled_p ())
7060 : 20732 : dump_printf_loc (MSG_NOTE, vect_location,
7061 : : "extra pattern stmt: %G", gsi_stmt (si));
7062 : 764491 : stmt_vec_info pattern_stmt_info
7063 : 764491 : = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
7064 : : orig_stmt_info, pattern_vectype);
7065 : : /* Stmts in the def sequence are not vectorizable cycle or
7066 : : induction defs, instead they should all be vect_internal_def
7067 : : feeding the main pattern stmt which retains this def type. */
7068 : 764491 : STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
7069 : : }
7070 : :
7071 : 617391 : if (orig_pattern_stmt)
7072 : : {
7073 : 7680 : vect_init_pattern_stmt (vinfo, pattern_stmt,
7074 : : orig_stmt_info, pattern_vectype);
7075 : :
7076 : : /* Insert all the new pattern statements before the original one. */
7077 : 7680 : gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7078 : 7680 : gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
7079 : : orig_def_seq);
7080 : 7680 : gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
7081 : 7680 : gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
7082 : :
7083 : : /* Remove the pattern statement that this new pattern replaces. */
7084 : 7680 : gsi_remove (&gsi, false);
7085 : : }
7086 : : else
7087 : 609711 : vect_set_pattern_stmt (vinfo,
7088 : : pattern_stmt, orig_stmt_info, pattern_vectype);
7089 : :
7090 : : /* For any conditionals mark them as vect_condition_def. */
7091 : 617391 : if (is_a <gcond *> (pattern_stmt))
7092 : 197904 : STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
7093 : :
7094 : : /* Transfer reduction path info to the pattern. */
7095 : 617391 : if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
7096 : : {
7097 : 3586 : gimple_match_op op;
7098 : 3586 : if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
7099 : 0 : gcc_unreachable ();
7100 : 3586 : tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
7101 : : /* Search the pattern def sequence and the main pattern stmt. Note
7102 : : we may have inserted all into a containing pattern def sequence
7103 : : so the following is a bit awkward. */
7104 : 3586 : gimple_stmt_iterator si;
7105 : 3586 : gimple *s;
7106 : 3586 : if (def_seq)
7107 : : {
7108 : 2740 : si = gsi_start (def_seq);
7109 : 2740 : s = gsi_stmt (si);
7110 : 2740 : gsi_next (&si);
7111 : : }
7112 : : else
7113 : : {
7114 : : si = gsi_none ();
7115 : : s = pattern_stmt;
7116 : : }
7117 : 6437 : do
7118 : : {
7119 : 6437 : bool found = false;
7120 : 6437 : if (gimple_extract_op (s, &op))
7121 : 14027 : for (unsigned i = 0; i < op.num_ops; ++i)
7122 : 11329 : if (op.ops[i] == lookfor)
7123 : : {
7124 : 3739 : STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7125 : 3739 : lookfor = gimple_get_lhs (s);
7126 : 3739 : found = true;
7127 : 3739 : break;
7128 : : }
7129 : 6437 : if (s == pattern_stmt)
7130 : : {
7131 : 3586 : if (!found && dump_enabled_p ())
7132 : 0 : dump_printf_loc (MSG_NOTE, vect_location,
7133 : : "failed to update reduction index.\n");
7134 : 3586 : break;
7135 : : }
7136 : 2851 : if (gsi_end_p (si))
7137 : : s = pattern_stmt;
7138 : : else
7139 : : {
7140 : 111 : s = gsi_stmt (si);
7141 : 111 : if (s == pattern_stmt)
7142 : : /* Found the end inside a bigger pattern def seq. */
7143 : : si = gsi_none ();
7144 : : else
7145 : 111 : gsi_next (&si);
7146 : : }
7147 : : } while (1);
7148 : : }
7149 : 617391 : }
7150 : :
7151 : : /* Function vect_pattern_recog_1
7152 : :
7153 : : Input:
7154 : : PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7155 : : computation pattern.
7156 : : STMT_INFO: A stmt from which the pattern search should start.
7157 : :
7158 : : If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7159 : : a sequence of statements that has the same functionality and can be
7160 : : used to replace STMT_INFO. It returns the last statement in the sequence
7161 : : and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7162 : : PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7163 : : statement, having first checked that the target supports the new operation
7164 : : in that type.
7165 : :
7166 : : This function also does some bookkeeping, as explained in the documentation
7167 : : for vect_recog_pattern. */
7168 : :
7169 : : static void
7170 : 707336510 : vect_pattern_recog_1 (vec_info *vinfo,
7171 : : vect_recog_func *recog_func, stmt_vec_info stmt_info)
7172 : : {
7173 : 707336510 : gimple *pattern_stmt;
7174 : 707336510 : loop_vec_info loop_vinfo;
7175 : 707336510 : tree pattern_vectype;
7176 : :
7177 : : /* If this statement has already been replaced with pattern statements,
7178 : : leave the original statement alone, since the first match wins.
7179 : : Instead try to match against the definition statements that feed
7180 : : the main pattern statement. */
7181 : 707336510 : if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7182 : : {
7183 : 7320441 : gimple_stmt_iterator gsi;
7184 : 7320441 : for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7185 : 17149511 : !gsi_end_p (gsi); gsi_next (&gsi))
7186 : 9829070 : vect_pattern_recog_1 (vinfo, recog_func,
7187 : : vinfo->lookup_stmt (gsi_stmt (gsi)));
7188 : : return;
7189 : : }
7190 : :
7191 : 700016069 : gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7192 : 700016069 : pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
7193 : 700016069 : if (!pattern_stmt)
7194 : : {
7195 : : /* Clear any half-formed pattern definition sequence. */
7196 : 699398678 : STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7197 : 699398678 : return;
7198 : : }
7199 : :
7200 : 617391 : loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7201 : :
7202 : : /* Found a vectorizable pattern. */
7203 : 617391 : if (dump_enabled_p ())
7204 : 17148 : dump_printf_loc (MSG_NOTE, vect_location,
7205 : : "%s pattern recognized: %G",
7206 : : recog_func->name, pattern_stmt);
7207 : :
7208 : : /* Mark the stmts that are involved in the pattern. */
7209 : 617391 : vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
7210 : :
7211 : : /* Patterns cannot be vectorized using SLP, because they change the order of
7212 : : computation. */
7213 : 617391 : if (loop_vinfo)
7214 : : {
7215 : 278034 : unsigned ix, ix2;
7216 : 278034 : stmt_vec_info *elem_ptr;
7217 : 324526 : VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
7218 : : elem_ptr, *elem_ptr == stmt_info);
7219 : : }
7220 : : }
7221 : :
7222 : :
7223 : : /* Function vect_pattern_recog
7224 : :
7225 : : Input:
7226 : : LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7227 : : computation idioms.
7228 : :
7229 : : Output - for each computation idiom that is detected we create a new stmt
7230 : : that provides the same functionality and that can be vectorized. We
7231 : : also record some information in the struct_stmt_info of the relevant
7232 : : stmts, as explained below:
7233 : :
7234 : : At the entry to this function we have the following stmts, with the
7235 : : following initial value in the STMT_VINFO fields:
7236 : :
7237 : : stmt in_pattern_p related_stmt vec_stmt
7238 : : S1: a_i = .... - - -
7239 : : S2: a_2 = ..use(a_i).. - - -
7240 : : S3: a_1 = ..use(a_2).. - - -
7241 : : S4: a_0 = ..use(a_1).. - - -
7242 : : S5: ... = ..use(a_0).. - - -
7243 : :
7244 : : Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7245 : : represented by a single stmt. We then:
7246 : : - create a new stmt S6 equivalent to the pattern (the stmt is not
7247 : : inserted into the code)
7248 : : - fill in the STMT_VINFO fields as follows:
7249 : :
7250 : : in_pattern_p related_stmt vec_stmt
7251 : : S1: a_i = .... - - -
7252 : : S2: a_2 = ..use(a_i).. - - -
7253 : : S3: a_1 = ..use(a_2).. - - -
7254 : : S4: a_0 = ..use(a_1).. true S6 -
7255 : : '---> S6: a_new = .... - S4 -
7256 : : S5: ... = ..use(a_0).. - - -
7257 : :
7258 : : (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7259 : : to each other through the RELATED_STMT field).
7260 : :
7261 : : S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7262 : : of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7263 : : remain irrelevant unless used by stmts other than S4.
7264 : :
7265 : : If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7266 : : (because they are marked as irrelevant). It will vectorize S6, and record
7267 : : a pointer to the new vector stmt VS6 from S6 (as usual).
7268 : : S4 will be skipped, and S5 will be vectorized as usual:
7269 : :
7270 : : in_pattern_p related_stmt vec_stmt
7271 : : S1: a_i = .... - - -
7272 : : S2: a_2 = ..use(a_i).. - - -
7273 : : S3: a_1 = ..use(a_2).. - - -
7274 : : > VS6: va_new = .... - - -
7275 : : S4: a_0 = ..use(a_1).. true S6 VS6
7276 : : '---> S6: a_new = .... - S4 VS6
7277 : : > VS5: ... = ..vuse(va_new).. - - -
7278 : : S5: ... = ..use(a_0).. - - -
7279 : :
7280 : : DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7281 : : elsewhere), and we'll end up with:
7282 : :
7283 : : VS6: va_new = ....
7284 : : VS5: ... = ..vuse(va_new)..
7285 : :
7286 : : In case of more than one pattern statements, e.g., widen-mult with
7287 : : intermediate type:
7288 : :
7289 : : S1 a_t = ;
7290 : : S2 a_T = (TYPE) a_t;
7291 : : '--> S3: a_it = (interm_type) a_t;
7292 : : S4 prod_T = a_T * CONST;
7293 : : '--> S5: prod_T' = a_it w* CONST;
7294 : :
7295 : : there may be other users of a_T outside the pattern. In that case S2 will
7296 : : be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7297 : : and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7298 : : be recorded in S3. */
7299 : :
7300 : : void
7301 : 858521 : vect_pattern_recog (vec_info *vinfo)
7302 : : {
7303 : 858521 : class loop *loop;
7304 : 858521 : basic_block *bbs;
7305 : 858521 : unsigned int nbbs;
7306 : 858521 : gimple_stmt_iterator si;
7307 : 858521 : unsigned int i, j;
7308 : :
7309 : 858521 : vect_determine_precisions (vinfo);
7310 : :
7311 : 858521 : DUMP_VECT_SCOPE ("vect_pattern_recog");
7312 : :
7313 : 858521 : if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
7314 : : {
7315 : 284010 : loop = LOOP_VINFO_LOOP (loop_vinfo);
7316 : 284010 : bbs = LOOP_VINFO_BBS (loop_vinfo);
7317 : 284010 : nbbs = loop->num_nodes;
7318 : :
7319 : : /* Scan through the loop stmts, applying the pattern recognition
7320 : : functions starting at each stmt visited: */
7321 : 985251 : for (i = 0; i < nbbs; i++)
7322 : : {
7323 : 701241 : basic_block bb = bbs[i];
7324 : 5350893 : for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7325 : : {
7326 : 3948411 : if (is_gimple_debug (gsi_stmt (si)))
7327 : 1067165 : continue;
7328 : 2881246 : stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
7329 : : /* Scan over all generic vect_recog_xxx_pattern functions. */
7330 : 86437380 : for (j = 0; j < NUM_PATTERNS; j++)
7331 : 80674888 : vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
7332 : : stmt_info);
7333 : : }
7334 : : }
7335 : : }
7336 : : else
7337 : : {
7338 : 574511 : bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
7339 : 18228792 : for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
7340 : 17079770 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
7341 : 72224143 : !gsi_end_p (gsi); gsi_next (&gsi))
7342 : : {
7343 : 63684258 : stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
7344 : 63684258 : if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7345 : 41654524 : continue;
7346 : :
7347 : : /* Scan over all generic vect_recog_xxx_pattern functions. */
7348 : 638862286 : for (j = 0; j < NUM_PATTERNS; j++)
7349 : 616832552 : vect_pattern_recog_1 (vinfo,
7350 : : &vect_vect_recog_func_ptrs[j], stmt_info);
7351 : : }
7352 : : }
7353 : :
7354 : : /* After this no more add_stmt calls are allowed. */
7355 : 858521 : vinfo->stmt_vec_info_ro = true;
7356 : 858521 : }
7357 : :
7358 : : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7359 : : or internal_fn contained in ch, respectively. */
7360 : : gimple *
7361 : 111911 : vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7362 : : {
7363 : 111911 : gcc_assert (op0 != NULL_TREE);
7364 : 111911 : if (ch.is_tree_code ())
7365 : 111911 : return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7366 : :
7367 : 0 : gcc_assert (ch.is_internal_fn ());
7368 : 0 : gimple* stmt = gimple_build_call_internal (as_internal_fn ((combined_fn) ch),
7369 : : op1 == NULL_TREE ? 1 : 2,
7370 : : op0, op1);
7371 : 0 : gimple_call_set_lhs (stmt, lhs);
7372 : 0 : return stmt;
7373 : : }
|