Line data Source code
1 : /* Statement Analysis and Transformation for Vectorization
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 : Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 : and Ira Rosen <irar@il.ibm.com>
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify it under
9 : the terms of the GNU General Public License as published by the Free
10 : Software Foundation; either version 3, or (at your option) any later
11 : version.
12 :
13 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 : for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : #include "config.h"
23 : #include "system.h"
24 : #include "coretypes.h"
25 : #include "backend.h"
26 : #include "target.h"
27 : #include "rtl.h"
28 : #include "tree.h"
29 : #include "gimple.h"
30 : #include "ssa.h"
31 : #include "optabs-tree.h"
32 : #include "insn-config.h"
33 : #include "recog.h" /* FIXME: for insn_data */
34 : #include "cgraph.h"
35 : #include "dumpfile.h"
36 : #include "alias.h"
37 : #include "fold-const.h"
38 : #include "stor-layout.h"
39 : #include "tree-eh.h"
40 : #include "gimplify.h"
41 : #include "gimple-iterator.h"
42 : #include "gimplify-me.h"
43 : #include "tree-cfg.h"
44 : #include "tree-ssa-loop-manip.h"
45 : #include "cfgloop.h"
46 : #include "explow.h"
47 : #include "tree-ssa-loop.h"
48 : #include "tree-scalar-evolution.h"
49 : #include "tree-vectorizer.h"
50 : #include "builtins.h"
51 : #include "internal-fn.h"
52 : #include "tree-vector-builder.h"
53 : #include "vec-perm-indices.h"
54 : #include "gimple-range.h"
55 : #include "tree-ssa-loop-niter.h"
56 : #include "gimple-fold.h"
57 : #include "regs.h"
58 : #include "attribs.h"
59 : #include "optabs-libfuncs.h"
60 : #include "tree-dfa.h"
61 :
62 : /* For lang_hooks.types.type_for_mode. */
63 : #include "langhooks.h"
64 :
65 : static tree vector_vector_composition_type (tree, poly_uint64, tree *,
66 : bool = false);
67 :
68 : /* Return TRUE iff the given statement is in an inner loop relative to
69 : the loop being vectorized. */
70 : bool
71 3810527 : stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
72 : {
73 3810527 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
74 3810527 : basic_block bb = gimple_bb (stmt);
75 3810527 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
76 794019 : class loop* loop;
77 :
78 794019 : if (!loop_vinfo)
79 : return false;
80 :
81 794019 : loop = LOOP_VINFO_LOOP (loop_vinfo);
82 :
83 794019 : return (bb->loop_father == loop->inner);
84 : }
85 :
86 : /* Record the cost of a statement, either by directly informing the
87 : target model or by saving it in a vector for later processing.
88 : Return a preliminary estimate of the statement's cost. */
89 :
90 : unsigned
91 9303289 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
92 : enum vect_cost_for_stmt kind,
93 : stmt_vec_info stmt_info, slp_tree node,
94 : tree vectype, int misalign,
95 : enum vect_cost_model_location where)
96 : {
97 9303289 : if ((kind == vector_load || kind == unaligned_load)
98 1354346 : && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
99 : kind = vector_gather_load;
100 9303289 : if ((kind == vector_store || kind == unaligned_store)
101 902543 : && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
102 9303289 : kind = vector_scatter_store;
103 :
104 9303289 : stmt_info_for_cost si
105 9303289 : = { count, kind, where, stmt_info, node, vectype, misalign };
106 9303289 : body_cost_vec->safe_push (si);
107 :
108 9303289 : return (unsigned)
109 9303289 : (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 : }
111 :
112 : unsigned
113 4844265 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
114 : enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
115 : tree vectype, int misalign,
116 : enum vect_cost_model_location where)
117 : {
118 4844265 : return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
119 4844265 : vectype, misalign, where);
120 : }
121 :
122 : unsigned
123 1582914 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
124 : enum vect_cost_for_stmt kind, slp_tree node,
125 : tree vectype, int misalign,
126 : enum vect_cost_model_location where)
127 : {
128 1582914 : return record_stmt_cost (body_cost_vec, count, kind,
129 : SLP_TREE_REPRESENTATIVE (node), node,
130 1582914 : vectype, misalign, where);
131 : }
132 :
133 : unsigned
134 176692 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
135 : enum vect_cost_for_stmt kind,
136 : enum vect_cost_model_location where)
137 : {
138 176692 : gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
139 : || kind == scalar_stmt);
140 176692 : return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
141 176692 : NULL_TREE, 0, where);
142 : }
143 :
144 : /* Return a variable of type ELEM_TYPE[NELEMS]. */
145 :
146 : static tree
147 0 : create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
148 : {
149 0 : return create_tmp_var (build_array_type_nelts (elem_type, nelems),
150 0 : "vect_array");
151 : }
152 :
153 : /* ARRAY is an array of vectors created by create_vector_array.
154 : Return an SSA_NAME for the vector in index N. The reference
155 : is part of the vectorization of STMT_INFO and the vector is associated
156 : with scalar destination SCALAR_DEST.
157 : If we need to ensure that inactive elements are set to zero,
158 : NEED_ZEROING is true, MASK contains the loop mask to be used. */
159 :
160 : static tree
161 0 : read_vector_array (vec_info *vinfo,
162 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
163 : tree scalar_dest, tree array, unsigned HOST_WIDE_INT n,
164 : bool need_zeroing, tree mask)
165 : {
166 0 : tree vect_type, vect, vect_name, tmp, tmp_name, array_ref;
167 0 : gimple *new_stmt;
168 :
169 0 : gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 0 : vect_type = TREE_TYPE (TREE_TYPE (array));
171 0 : tmp = vect_create_destination_var (scalar_dest, vect_type);
172 0 : vect = vect_create_destination_var (scalar_dest, vect_type);
173 0 : array_ref = build4 (ARRAY_REF, vect_type, array,
174 0 : build_int_cst (size_type_node, n),
175 : NULL_TREE, NULL_TREE);
176 :
177 0 : new_stmt = gimple_build_assign (tmp, array_ref);
178 0 : tmp_name = make_ssa_name (vect, new_stmt);
179 0 : gimple_assign_set_lhs (new_stmt, tmp_name);
180 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 :
182 0 : if (need_zeroing)
183 : {
184 0 : tree vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
185 : vect_type);
186 0 : vect_name = make_ssa_name (vect, new_stmt);
187 0 : new_stmt
188 0 : = gimple_build_assign (vect_name, VEC_COND_EXPR,
189 : mask, tmp_name, vec_els);
190 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
191 : }
192 : else
193 : vect_name = tmp_name;
194 :
195 0 : return vect_name;
196 : }
197 :
198 : /* ARRAY is an array of vectors created by create_vector_array.
199 : Emit code to store SSA_NAME VECT in index N of the array.
200 : The store is part of the vectorization of STMT_INFO. */
201 :
202 : static void
203 0 : write_vector_array (vec_info *vinfo,
204 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
205 : tree vect, tree array, unsigned HOST_WIDE_INT n)
206 : {
207 0 : tree array_ref;
208 0 : gimple *new_stmt;
209 :
210 0 : array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
211 0 : build_int_cst (size_type_node, n),
212 : NULL_TREE, NULL_TREE);
213 :
214 0 : new_stmt = gimple_build_assign (array_ref, vect);
215 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
216 0 : }
217 :
218 : /* PTR is a pointer to an array of type TYPE. Return a representation
219 : of *PTR. The memory reference replaces those in FIRST_DR
220 : (and its group). */
221 :
222 : static tree
223 0 : create_array_ref (tree type, tree ptr, tree alias_ptr_type)
224 : {
225 0 : tree mem_ref;
226 :
227 0 : mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
228 : /* Arrays have the same alignment as their type. */
229 0 : set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
230 0 : return mem_ref;
231 : }
232 :
233 : /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
234 : Emit the clobber before *GSI. */
235 :
236 : static void
237 15 : vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
238 : gimple_stmt_iterator *gsi, tree var)
239 : {
240 15 : tree clobber = build_clobber (TREE_TYPE (var));
241 15 : gimple *new_stmt = gimple_build_assign (var, clobber);
242 15 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
243 15 : }
244 :
245 : /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
246 :
247 : /* Function vect_mark_relevant.
248 :
249 : Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
250 :
251 : static void
252 2749778 : vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
253 : enum vect_relevant relevant, bool live_p)
254 : {
255 2749778 : enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
256 2749778 : bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
257 :
258 2749778 : if (dump_enabled_p ())
259 159495 : dump_printf_loc (MSG_NOTE, vect_location,
260 : "mark relevant %d, live %d: %G", relevant, live_p,
261 : stmt_info->stmt);
262 :
263 : /* If this stmt is an original stmt in a pattern, we might need to mark its
264 : related pattern stmt instead of the original stmt. However, such stmts
265 : may have their own uses that are not in any pattern, in such cases the
266 : stmt itself should be marked. */
267 2749778 : if (STMT_VINFO_IN_PATTERN_P (stmt_info))
268 : {
269 : /* This is the last stmt in a sequence that was detected as a
270 : pattern that can potentially be vectorized. Don't mark the stmt
271 : as relevant/live because it's not going to be vectorized.
272 : Instead mark the pattern-stmt that replaces it. */
273 :
274 240549 : if (dump_enabled_p ())
275 2646 : dump_printf_loc (MSG_NOTE, vect_location,
276 : "last stmt in pattern. don't mark"
277 : " relevant/live.\n");
278 :
279 240549 : stmt_vec_info old_stmt_info = stmt_info;
280 240549 : stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
281 240549 : gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
282 240549 : save_relevant = STMT_VINFO_RELEVANT (stmt_info);
283 240549 : save_live_p = STMT_VINFO_LIVE_P (stmt_info);
284 :
285 240549 : if (live_p && relevant == vect_unused_in_scope)
286 : {
287 122 : if (dump_enabled_p ())
288 10 : dump_printf_loc (MSG_NOTE, vect_location,
289 : "vec_stmt_relevant_p: forcing live pattern stmt "
290 : "relevant.\n");
291 : relevant = vect_used_only_live;
292 : }
293 :
294 240549 : if (dump_enabled_p ())
295 2646 : dump_printf_loc (MSG_NOTE, vect_location,
296 : "mark relevant %d, live %d: %G", relevant, live_p,
297 : stmt_info->stmt);
298 : }
299 :
300 2749778 : STMT_VINFO_LIVE_P (stmt_info) |= live_p;
301 2749778 : if (relevant > STMT_VINFO_RELEVANT (stmt_info))
302 2487378 : STMT_VINFO_RELEVANT (stmt_info) = relevant;
303 :
304 2749778 : if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
305 262400 : && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
306 : {
307 261684 : if (dump_enabled_p ())
308 19145 : dump_printf_loc (MSG_NOTE, vect_location,
309 : "already marked relevant/live.\n");
310 261684 : return;
311 : }
312 :
313 2488094 : worklist->safe_push (stmt_info);
314 : }
315 :
316 :
317 : /* Function is_simple_and_all_uses_invariant
318 :
319 : Return true if STMT_INFO is simple and all uses of it are invariant. */
320 :
321 : bool
322 225785 : is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
323 : loop_vec_info loop_vinfo)
324 : {
325 225785 : tree op;
326 225785 : ssa_op_iter iter;
327 :
328 398818 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
329 173871 : if (!stmt)
330 : return false;
331 :
332 181415 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
333 : {
334 180577 : enum vect_def_type dt = vect_uninitialized_def;
335 :
336 180577 : if (!vect_is_simple_use (op, loop_vinfo, &dt))
337 : {
338 5481 : if (dump_enabled_p ())
339 16 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 : "use not simple.\n");
341 173033 : return false;
342 : }
343 :
344 175096 : if (dt != vect_external_def && dt != vect_constant_def)
345 : return false;
346 : }
347 : return true;
348 : }
349 :
350 : /* Function vect_stmt_relevant_p.
351 :
352 : Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
353 : is "relevant for vectorization".
354 :
355 : A stmt is considered "relevant for vectorization" if:
356 : - it has uses outside the loop.
357 : - it has vdefs (it alters memory).
358 : - control stmts in the loop (except for the exit condition).
359 :
360 : CHECKME: what other side effects would the vectorizer allow? */
361 :
362 : static bool
363 4324775 : vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
364 : enum vect_relevant *relevant, bool *live_p)
365 : {
366 4324775 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
367 4324775 : ssa_op_iter op_iter;
368 4324775 : imm_use_iterator imm_iter;
369 4324775 : use_operand_p use_p;
370 4324775 : def_operand_p def_p;
371 :
372 4324775 : *relevant = vect_unused_in_scope;
373 4324775 : *live_p = false;
374 :
375 : /* cond stmt other than loop exit cond. */
376 4324775 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
377 4324775 : if (is_ctrl_stmt (stmt)
378 540700 : && LOOP_VINFO_LOOP_IV_COND (loop_vinfo) != stmt
379 4555062 : && (!loop->inner || gimple_bb (stmt)->loop_father == loop))
380 228556 : *relevant = vect_used_in_scope;
381 :
382 : /* changing memory. */
383 4324775 : if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
384 3588096 : if (gimple_vdef (stmt_info->stmt)
385 3047396 : && !gimple_clobber_p (stmt_info->stmt))
386 : {
387 292019 : if (dump_enabled_p ())
388 27202 : dump_printf_loc (MSG_NOTE, vect_location,
389 : "vec_stmt_relevant_p: stmt has vdefs.\n");
390 292019 : *relevant = vect_used_in_scope;
391 292019 : if (! STMT_VINFO_DATA_REF (stmt_info)
392 292019 : && zero_ssa_operands (stmt_info->stmt, SSA_OP_DEF))
393 20 : LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo).safe_push (stmt_info);
394 : }
395 :
396 : /* uses outside the loop. */
397 12135231 : FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
398 : {
399 12914671 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
400 : {
401 5943309 : basic_block bb = gimple_bb (USE_STMT (use_p));
402 5943309 : if (!flow_bb_inside_loop_p (loop, bb))
403 : {
404 241942 : if (is_gimple_debug (USE_STMT (use_p)))
405 1075 : continue;
406 :
407 240867 : if (dump_enabled_p ())
408 5763 : dump_printf_loc (MSG_NOTE, vect_location,
409 : "vec_stmt_relevant_p: used out of loop.\n");
410 :
411 : /* We expect all such uses to be in the loop exit phis
412 : (because of loop closed form) */
413 240867 : gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
414 :
415 240867 : *live_p = true;
416 : }
417 3485681 : }
418 : }
419 :
420 225787 : if (*live_p && *relevant == vect_unused_in_scope
421 4550560 : && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
422 : {
423 224947 : if (dump_enabled_p ())
424 5623 : dump_printf_loc (MSG_NOTE, vect_location,
425 : "vec_stmt_relevant_p: stmt live but not relevant.\n");
426 224947 : *relevant = vect_used_only_live;
427 : }
428 :
429 4324775 : return (*live_p || *relevant);
430 : }
431 :
432 :
433 : /* Function exist_non_indexing_operands_for_use_p
434 :
435 : USE is one of the uses attached to STMT_INFO. Check if USE is
436 : used in STMT_INFO for anything other than indexing an array. */
437 :
438 : static bool
439 3673608 : exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
440 : {
441 3673608 : tree operand;
442 :
443 : /* USE corresponds to some operand in STMT. If there is no data
444 : reference in STMT, then any operand that corresponds to USE
445 : is not indexing an array. */
446 3673608 : if (!STMT_VINFO_DATA_REF (stmt_info))
447 : return true;
448 :
449 : /* STMT has a data_ref. FORNOW this means that its of one of
450 : the following forms:
451 : -1- ARRAY_REF = var
452 : -2- var = ARRAY_REF
453 : (This should have been verified in analyze_data_refs).
454 :
455 : 'var' in the second case corresponds to a def, not a use,
456 : so USE cannot correspond to any operands that are not used
457 : for array indexing.
458 :
459 : Therefore, all we need to check is if STMT falls into the
460 : first case, and whether var corresponds to USE. */
461 :
462 1211031 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
463 1197214 : if (!assign || !gimple_assign_copy_p (assign))
464 : {
465 678685 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
466 13817 : if (call && gimple_call_internal_p (call))
467 : {
468 13817 : internal_fn ifn = gimple_call_internal_fn (call);
469 13817 : int mask_index = internal_fn_mask_index (ifn);
470 13817 : if (mask_index >= 0
471 13817 : && use == gimple_call_arg (call, mask_index))
472 : return true;
473 9057 : int els_index = internal_fn_else_index (ifn);
474 9057 : if (els_index >= 0
475 9057 : && use == gimple_call_arg (call, els_index))
476 : return true;
477 7755 : int stored_value_index = internal_fn_stored_value_index (ifn);
478 7755 : if (stored_value_index >= 0
479 7755 : && use == gimple_call_arg (call, stored_value_index))
480 : return true;
481 6258 : if (internal_gather_scatter_fn_p (ifn)
482 6258 : && use == gimple_call_arg (call, 1))
483 : return true;
484 : }
485 671126 : return false;
486 : }
487 :
488 532346 : if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
489 : return false;
490 532346 : operand = gimple_assign_rhs1 (assign);
491 532346 : if (TREE_CODE (operand) != SSA_NAME)
492 : return false;
493 :
494 462035 : if (operand == use)
495 : return true;
496 :
497 : return false;
498 : }
499 :
500 :
501 : /*
502 : Function process_use.
503 :
504 : Inputs:
505 : - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
506 : - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
507 : that defined USE. This is done by calling mark_relevant and passing it
508 : the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
509 : - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
510 : be performed.
511 :
512 : Outputs:
513 : Generally, LIVE_P and RELEVANT are used to define the liveness and
514 : relevance info of the DEF_STMT of this USE:
515 : STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
516 : STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
517 : Exceptions:
518 : - case 1: If USE is used only for address computations (e.g. array indexing),
519 : which does not need to be directly vectorized, then the liveness/relevance
520 : of the respective DEF_STMT is left unchanged.
521 : - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
522 : we skip DEF_STMT cause it had already been processed.
523 : - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
524 : "relevant" will be modified accordingly.
525 :
526 : Return true if everything is as expected. Return false otherwise. */
527 :
528 : static opt_result
529 3734456 : process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
530 : enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
531 : bool force)
532 : {
533 3734456 : stmt_vec_info dstmt_vinfo;
534 3734456 : enum vect_def_type dt;
535 :
536 : /* case 1: we are only interested in uses that need to be vectorized. Uses
537 : that are used for address computation are not considered relevant. */
538 3734456 : if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
539 985717 : return opt_result::success ();
540 :
541 2748739 : if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
542 35555 : return opt_result::failure_at (stmt_vinfo->stmt,
543 : "not vectorized:"
544 : " unsupported use in stmt.\n");
545 :
546 2713184 : if (!dstmt_vinfo)
547 536839 : return opt_result::success ();
548 :
549 2176345 : basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
550 2176345 : basic_block bb = gimple_bb (stmt_vinfo->stmt);
551 :
552 : /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
553 : We have to force the stmt live since the epilogue loop needs it to
554 : continue computing the reduction. */
555 2176345 : if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
556 227829 : && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
557 59349 : && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
558 59349 : && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
559 2235694 : && bb->loop_father == def_bb->loop_father)
560 : {
561 59349 : if (dump_enabled_p ())
562 3744 : dump_printf_loc (MSG_NOTE, vect_location,
563 : "reduc-stmt defining reduc-phi in the same nest.\n");
564 59349 : vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
565 59349 : return opt_result::success ();
566 : }
567 :
568 : /* case 3a: outer-loop stmt defining an inner-loop stmt:
569 : outer-loop-header-bb:
570 : d = dstmt_vinfo
571 : inner-loop:
572 : stmt # use (d)
573 : outer-loop-tail-bb:
574 : ... */
575 2116996 : if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
576 : {
577 1899 : if (dump_enabled_p ())
578 320 : dump_printf_loc (MSG_NOTE, vect_location,
579 : "outer-loop def-stmt defining inner-loop stmt.\n");
580 :
581 1899 : switch (relevant)
582 : {
583 0 : case vect_unused_in_scope:
584 0 : relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
585 : vect_used_in_scope : vect_unused_in_scope;
586 : break;
587 :
588 637 : case vect_used_in_outer_by_reduction:
589 637 : gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
590 : relevant = vect_used_by_reduction;
591 : break;
592 :
593 1032 : case vect_used_in_outer:
594 1032 : gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
595 : relevant = vect_used_in_scope;
596 : break;
597 :
598 : case vect_used_in_scope:
599 : break;
600 :
601 0 : default:
602 0 : gcc_unreachable ();
603 : }
604 : }
605 :
606 : /* case 3b: inner-loop stmt defining an outer-loop stmt:
607 : outer-loop-header-bb:
608 : ...
609 : inner-loop:
610 : d = dstmt_vinfo
611 : outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
612 : stmt # use (d) */
613 2115097 : else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
614 : {
615 1833 : if (dump_enabled_p ())
616 625 : dump_printf_loc (MSG_NOTE, vect_location,
617 : "inner-loop def-stmt defining outer-loop stmt.\n");
618 :
619 1833 : switch (relevant)
620 : {
621 0 : case vect_unused_in_scope:
622 0 : relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
623 0 : || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
624 : vect_used_in_outer_by_reduction : vect_unused_in_scope;
625 : break;
626 :
627 : case vect_used_by_reduction:
628 : case vect_used_only_live:
629 : relevant = vect_used_in_outer_by_reduction;
630 : break;
631 :
632 : case vect_used_in_scope:
633 1954212 : relevant = vect_used_in_outer;
634 : break;
635 :
636 0 : default:
637 0 : gcc_unreachable ();
638 : }
639 : }
640 : /* We are also not interested in uses on loop PHI backedges that are
641 : inductions. Otherwise we'll needlessly vectorize the IV increment
642 : and cause hybrid SLP for SLP inductions. */
643 2113264 : else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
644 165534 : && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
645 2276048 : && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
646 : loop_latch_edge (bb->loop_father))
647 : == use))
648 : {
649 162784 : if (dump_enabled_p ())
650 4774 : dump_printf_loc (MSG_NOTE, vect_location,
651 : "induction value on backedge.\n");
652 162784 : return opt_result::success ();
653 : }
654 :
655 1954212 : vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
656 1954212 : return opt_result::success ();
657 : }
658 :
659 :
660 : /* Function vect_mark_stmts_to_be_vectorized.
661 :
662 : Not all stmts in the loop need to be vectorized. For example:
663 :
664 : for i...
665 : for j...
666 : 1. T0 = i + j
667 : 2. T1 = a[T0]
668 :
669 : 3. j = j + 1
670 :
671 : Stmt 1 and 3 do not need to be vectorized, because loop control and
672 : addressing of vectorized data-refs are handled differently.
673 :
674 : This pass detects such stmts. */
675 :
676 : opt_result
677 368491 : vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
678 : {
679 368491 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
680 368491 : basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
681 368491 : unsigned int nbbs = loop->num_nodes;
682 368491 : gimple_stmt_iterator si;
683 368491 : unsigned int i;
684 368491 : basic_block bb;
685 368491 : bool live_p;
686 368491 : enum vect_relevant relevant;
687 :
688 368491 : DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
689 :
690 368491 : auto_vec<stmt_vec_info, 64> worklist;
691 :
692 : /* 1. Init worklist. */
693 1270881 : for (i = 0; i < nbbs; i++)
694 : {
695 912677 : bb = bbs[i];
696 1821920 : for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
697 : {
698 1838772 : if (virtual_operand_p (gimple_phi_result (gsi_stmt (si))))
699 182707 : continue;
700 736679 : stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
701 736679 : if (dump_enabled_p ())
702 40216 : dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
703 : phi_info->stmt);
704 :
705 736679 : if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
706 : {
707 45708 : if (STMT_VINFO_DEF_TYPE (phi_info) == vect_unknown_def_type)
708 10143 : return opt_result::failure_at
709 10143 : (*si, "not vectorized: unhandled relevant PHI: %G", *si);
710 35565 : vect_mark_relevant (&worklist, phi_info, relevant, live_p);
711 : }
712 : }
713 7084535 : for (si = gsi_after_labels (bb); !gsi_end_p (si); gsi_next (&si))
714 : {
715 6182145 : gimple *stmt = gsi_stmt (si);
716 6182145 : if (is_gimple_debug (stmt))
717 2593905 : continue;
718 3588240 : stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
719 3588240 : if (dump_enabled_p ())
720 215365 : dump_printf_loc (MSG_NOTE, vect_location,
721 : "init: stmt relevant? %G", stmt);
722 :
723 3588240 : if (gimple_get_lhs (stmt) == NULL_TREE
724 546225 : && !is_a <gcond *> (stmt)
725 3593765 : && !is_a <gcall *> (stmt))
726 144 : return opt_result::failure_at
727 144 : (stmt, "not vectorized: irregular stmt: %G", stmt);
728 :
729 3588096 : if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
730 700652 : vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
731 : }
732 : }
733 :
734 : /* 2. Process_worklist */
735 2718931 : while (worklist.length () > 0)
736 : {
737 2396284 : use_operand_p use_p;
738 2396284 : ssa_op_iter iter;
739 :
740 2396284 : stmt_vec_info stmt_vinfo = worklist.pop ();
741 2396284 : if (dump_enabled_p ())
742 139740 : dump_printf_loc (MSG_NOTE, vect_location,
743 : "worklist: examine stmt: %G", stmt_vinfo->stmt);
744 :
745 : /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
746 : (DEF_STMT) as relevant/irrelevant according to the relevance property
747 : of STMT. */
748 2396284 : relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
749 :
750 : /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
751 : propagated as is to the DEF_STMTs of its USEs.
752 :
753 : One exception is when STMT has been identified as defining a reduction
754 : variable; in this case we set the relevance to vect_used_by_reduction.
755 : This is because we distinguish between two kinds of relevant stmts -
756 : those that are used by a reduction computation, and those that are
757 : (also) used by a regular computation. This allows us later on to
758 : identify stmts that are used solely by a reduction, and therefore the
759 : order of the results that they produce does not have to be kept. */
760 :
761 2396284 : switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
762 : {
763 120789 : case vect_reduction_def:
764 120789 : gcc_assert (relevant != vect_unused_in_scope);
765 120789 : if (relevant != vect_unused_in_scope
766 120789 : && relevant != vect_used_in_scope
767 120789 : && relevant != vect_used_by_reduction
768 120789 : && relevant != vect_used_only_live)
769 0 : return opt_result::failure_at
770 0 : (stmt_vinfo->stmt, "unsupported use of reduction.\n");
771 : break;
772 :
773 1996 : case vect_nested_cycle:
774 1996 : if (relevant != vect_unused_in_scope
775 1996 : && relevant != vect_used_in_outer_by_reduction
776 1482 : && relevant != vect_used_in_outer)
777 2 : return opt_result::failure_at
778 2 : (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
779 : break;
780 :
781 1035 : case vect_double_reduction_def:
782 1035 : if (relevant != vect_unused_in_scope
783 1035 : && relevant != vect_used_by_reduction
784 351 : && relevant != vect_used_only_live)
785 0 : return opt_result::failure_at
786 0 : (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
787 : break;
788 :
789 : default:
790 : break;
791 : }
792 :
793 2396282 : if (is_pattern_stmt_p (stmt_vinfo))
794 : {
795 : /* Pattern statements are not inserted into the code, so
796 : FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
797 : have to scan the RHS or function arguments instead. */
798 586860 : if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
799 : {
800 372199 : enum tree_code rhs_code = gimple_assign_rhs_code (assign);
801 372199 : tree op = gimple_assign_rhs1 (assign);
802 :
803 372199 : i = 1;
804 372199 : if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
805 : {
806 0 : opt_result res
807 0 : = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
808 : loop_vinfo, relevant, &worklist, false);
809 0 : if (!res)
810 0 : return res;
811 0 : res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
812 : loop_vinfo, relevant, &worklist, false);
813 0 : if (!res)
814 0 : return res;
815 : i = 2;
816 : }
817 1074288 : for (; i < gimple_num_ops (assign); i++)
818 : {
819 706067 : op = gimple_op (assign, i);
820 706067 : if (TREE_CODE (op) == SSA_NAME)
821 : {
822 538446 : opt_result res
823 538446 : = process_use (stmt_vinfo, op, loop_vinfo, relevant,
824 : &worklist, false);
825 538446 : if (!res)
826 3978 : return res;
827 : }
828 : }
829 : }
830 214661 : else if (gcond *cond = dyn_cast <gcond *> (stmt_vinfo->stmt))
831 : {
832 210307 : tree_code rhs_code = gimple_cond_code (cond);
833 210307 : gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
834 210307 : opt_result res
835 210307 : = process_use (stmt_vinfo, gimple_cond_lhs (cond),
836 : loop_vinfo, relevant, &worklist, false);
837 210307 : if (!res)
838 35557 : return res;
839 210307 : res = process_use (stmt_vinfo, gimple_cond_rhs (cond),
840 : loop_vinfo, relevant, &worklist, false);
841 210307 : if (!res)
842 0 : return res;
843 : }
844 4354 : else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
845 : {
846 21018 : for (i = 0; i < gimple_call_num_args (call); i++)
847 : {
848 16664 : tree arg = gimple_call_arg (call, i);
849 16664 : opt_result res
850 16664 : = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
851 : &worklist, false);
852 16664 : if (!res)
853 0 : return res;
854 : }
855 : }
856 : else
857 0 : gcc_unreachable ();
858 : }
859 : else
860 6298364 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
861 : {
862 2697884 : tree op = USE_FROM_PTR (use_p);
863 2697884 : opt_result res
864 2697884 : = process_use (stmt_vinfo, op, loop_vinfo, relevant,
865 : &worklist, false);
866 2697884 : if (!res)
867 18364 : return res;
868 : }
869 :
870 2373940 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
871 : {
872 60848 : gather_scatter_info gs_info;
873 60848 : if (!vect_check_gather_scatter (stmt_vinfo,
874 : STMT_VINFO_VECTYPE (stmt_vinfo),
875 : loop_vinfo, &gs_info))
876 0 : gcc_unreachable ();
877 60848 : opt_result res
878 60848 : = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
879 : &worklist, true);
880 60848 : if (!res)
881 : {
882 13213 : if (fatal)
883 13213 : *fatal = false;
884 13213 : return res;
885 : }
886 : }
887 : } /* while worklist */
888 :
889 322647 : return opt_result::success ();
890 368491 : }
891 :
892 : /* Function vect_model_simple_cost.
893 :
894 : Models cost for simple operations, i.e. those that only emit N operations
895 : of the same KIND. */
896 :
897 : static void
898 627433 : vect_model_simple_cost (vec_info *vinfo, int n, slp_tree node,
899 : stmt_vector_for_cost *cost_vec,
900 : vect_cost_for_stmt kind = vector_stmt)
901 : {
902 627433 : int inside_cost = 0, prologue_cost = 0;
903 :
904 627433 : gcc_assert (cost_vec != NULL);
905 :
906 627433 : n *= vect_get_num_copies (vinfo, node);
907 :
908 : /* Pass the inside-of-loop statements to the target-specific cost model. */
909 627433 : inside_cost += record_stmt_cost (cost_vec, n, kind, node, 0, vect_body);
910 :
911 627433 : if (dump_enabled_p ())
912 32610 : dump_printf_loc (MSG_NOTE, vect_location,
913 : "vect_model_simple_cost: inside_cost = %d, "
914 : "prologue_cost = %d .\n", inside_cost, prologue_cost);
915 627433 : }
916 :
917 :
918 : /* Model cost for type demotion and promotion operations. PWR is
919 : normally zero for single-step promotions and demotions. It will be
920 : one if two-step promotion/demotion is required, and so on. NCOPIES
921 : is the number of vector results (and thus number of instructions)
922 : for the narrowest end of the operation chain. Each additional
923 : step doubles the number of instructions required. If WIDEN_ARITH
924 : is true the stmt is doing widening arithmetic. */
925 :
926 : static void
927 53092 : vect_model_promotion_demotion_cost (slp_tree slp_node,
928 : unsigned int ncopies, int pwr,
929 : stmt_vector_for_cost *cost_vec,
930 : bool widen_arith)
931 : {
932 53092 : int i;
933 53092 : int inside_cost = 0, prologue_cost = 0;
934 :
935 124670 : for (i = 0; i < pwr + 1; i++)
936 : {
937 141368 : inside_cost += record_stmt_cost (cost_vec, ncopies,
938 : widen_arith
939 : ? vector_stmt : vec_promote_demote,
940 : slp_node, 0, vect_body);
941 71578 : ncopies *= 2;
942 : }
943 :
944 53092 : if (dump_enabled_p ())
945 6155 : dump_printf_loc (MSG_NOTE, vect_location,
946 : "vect_model_promotion_demotion_cost: inside_cost = %d, "
947 : "prologue_cost = %d .\n", inside_cost, prologue_cost);
948 53092 : }
949 :
950 : /* Returns true if the current function returns DECL. */
951 :
952 : static bool
953 535613 : cfun_returns (tree decl)
954 : {
955 535613 : edge_iterator ei;
956 535613 : edge e;
957 1054173 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
958 : {
959 1060424 : greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
960 530212 : if (!ret)
961 0 : continue;
962 530212 : if (gimple_return_retval (ret) == decl)
963 : return true;
964 : /* We often end up with an aggregate copy to the result decl,
965 : handle that case as well. First skip intermediate clobbers
966 : though. */
967 : gimple *def = ret;
968 1569995 : do
969 : {
970 3139990 : def = SSA_NAME_DEF_STMT (gimple_vuse (def));
971 : }
972 1569995 : while (gimple_clobber_p (def));
973 519296 : if (is_a <gassign *> (def)
974 59518 : && gimple_assign_lhs (def) == gimple_return_retval (ret)
975 525895 : && gimple_assign_rhs1 (def) == decl)
976 : return true;
977 : }
978 : return false;
979 : }
980 :
981 : /* Calculate cost of DR's memory access. */
982 : void
983 892546 : vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node,
984 : int ncopies, dr_alignment_support alignment_support_scheme,
985 : int misalignment,
986 : unsigned int *inside_cost,
987 : stmt_vector_for_cost *body_cost_vec)
988 : {
989 892546 : tree vectype
990 892546 : = slp_node ? SLP_TREE_VECTYPE (slp_node) : STMT_VINFO_VECTYPE (stmt_info);
991 892546 : switch (alignment_support_scheme)
992 : {
993 472622 : case dr_aligned:
994 472622 : {
995 472622 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
996 : vector_store, stmt_info, slp_node,
997 : vectype, 0, vect_body);
998 :
999 472622 : if (dump_enabled_p ())
1000 13675 : dump_printf_loc (MSG_NOTE, vect_location,
1001 : "vect_model_store_cost: aligned.\n");
1002 : break;
1003 : }
1004 :
1005 419924 : case dr_unaligned_supported:
1006 419924 : {
1007 : /* Here, we assign an additional cost for the unaligned store. */
1008 419924 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1009 : unaligned_store, stmt_info, slp_node,
1010 : vectype, misalignment, vect_body);
1011 419924 : if (dump_enabled_p ())
1012 12574 : dump_printf_loc (MSG_NOTE, vect_location,
1013 : "vect_model_store_cost: unaligned supported by "
1014 : "hardware.\n");
1015 : break;
1016 : }
1017 :
1018 0 : case dr_unaligned_unsupported:
1019 0 : {
1020 0 : *inside_cost = VECT_MAX_COST;
1021 :
1022 0 : if (dump_enabled_p ())
1023 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1024 : "vect_model_store_cost: unsupported access.\n");
1025 : break;
1026 : }
1027 :
1028 0 : default:
1029 0 : gcc_unreachable ();
1030 : }
1031 892546 : }
1032 :
1033 : /* Calculate cost of DR's memory access. */
1034 : void
1035 740026 : vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node,
1036 : int ncopies, dr_alignment_support alignment_support_scheme,
1037 : int misalignment,
1038 : bool add_realign_cost, unsigned int *inside_cost,
1039 : unsigned int *prologue_cost,
1040 : stmt_vector_for_cost *prologue_cost_vec,
1041 : stmt_vector_for_cost *body_cost_vec,
1042 : bool record_prologue_costs)
1043 : {
1044 740026 : tree vectype
1045 740026 : = slp_node ? SLP_TREE_VECTYPE (slp_node) : STMT_VINFO_VECTYPE (stmt_info);
1046 740026 : switch (alignment_support_scheme)
1047 : {
1048 429098 : case dr_aligned:
1049 429098 : {
1050 429098 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1051 : stmt_info, slp_node, vectype,
1052 : 0, vect_body);
1053 :
1054 429098 : if (dump_enabled_p ())
1055 17688 : dump_printf_loc (MSG_NOTE, vect_location,
1056 : "vect_model_load_cost: aligned.\n");
1057 :
1058 : break;
1059 : }
1060 255700 : case dr_unaligned_supported:
1061 255700 : {
1062 : /* Here, we assign an additional cost for the unaligned load. */
1063 255700 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1064 : unaligned_load, stmt_info, slp_node,
1065 : vectype, misalignment, vect_body);
1066 :
1067 255700 : if (dump_enabled_p ())
1068 20881 : dump_printf_loc (MSG_NOTE, vect_location,
1069 : "vect_model_load_cost: unaligned supported by "
1070 : "hardware.\n");
1071 :
1072 : break;
1073 : }
1074 0 : case dr_explicit_realign:
1075 0 : {
1076 0 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1077 : vector_load, stmt_info, slp_node,
1078 : vectype, 0, vect_body);
1079 0 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1080 : vec_perm, stmt_info, slp_node,
1081 : vectype, 0, vect_body);
1082 :
1083 : /* FIXME: If the misalignment remains fixed across the iterations of
1084 : the containing loop, the following cost should be added to the
1085 : prologue costs. */
1086 0 : if (targetm.vectorize.builtin_mask_for_load)
1087 0 : *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1088 : stmt_info, slp_node, vectype,
1089 : 0, vect_body);
1090 :
1091 0 : if (dump_enabled_p ())
1092 0 : dump_printf_loc (MSG_NOTE, vect_location,
1093 : "vect_model_load_cost: explicit realign\n");
1094 :
1095 : break;
1096 : }
1097 0 : case dr_explicit_realign_optimized:
1098 0 : {
1099 0 : if (dump_enabled_p ())
1100 0 : dump_printf_loc (MSG_NOTE, vect_location,
1101 : "vect_model_load_cost: unaligned software "
1102 : "pipelined.\n");
1103 :
1104 : /* Unaligned software pipeline has a load of an address, an initial
1105 : load, and possibly a mask operation to "prime" the loop. However,
1106 : if this is an access in a group of loads, which provide grouped
1107 : access, then the above cost should only be considered for one
1108 : access in the group. Inside the loop, there is a load op
1109 : and a realignment op. */
1110 :
1111 0 : if (add_realign_cost && record_prologue_costs)
1112 : {
1113 0 : *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1114 : vector_stmt, stmt_info,
1115 : slp_node, vectype,
1116 : 0, vect_prologue);
1117 0 : if (targetm.vectorize.builtin_mask_for_load)
1118 0 : *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1119 : vector_stmt, stmt_info,
1120 : slp_node, vectype,
1121 : 0, vect_prologue);
1122 : }
1123 :
1124 0 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1125 : stmt_info, slp_node, vectype,
1126 : 0, vect_body);
1127 0 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1128 : stmt_info, slp_node, vectype,
1129 : 0, vect_body);
1130 :
1131 0 : if (dump_enabled_p ())
1132 0 : dump_printf_loc (MSG_NOTE, vect_location,
1133 : "vect_model_load_cost: explicit realign optimized"
1134 : "\n");
1135 :
1136 : break;
1137 : }
1138 :
1139 55228 : case dr_unaligned_unsupported:
1140 55228 : {
1141 55228 : *inside_cost = VECT_MAX_COST;
1142 :
1143 55228 : if (dump_enabled_p ())
1144 91 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1145 : "vect_model_load_cost: unsupported access.\n");
1146 : break;
1147 : }
1148 :
1149 0 : default:
1150 0 : gcc_unreachable ();
1151 : }
1152 740026 : }
1153 :
1154 : /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1155 : the loop preheader for the vectorized stmt STMT_VINFO. */
1156 :
1157 : static void
1158 6400 : vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1159 : gimple_stmt_iterator *gsi)
1160 : {
1161 6400 : if (gsi)
1162 3077 : vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1163 : else
1164 3323 : vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1165 :
1166 6400 : if (dump_enabled_p ())
1167 1857 : dump_printf_loc (MSG_NOTE, vect_location,
1168 : "created new init_stmt: %G", new_stmt);
1169 6400 : }
1170 :
1171 : /* Function vect_init_vector.
1172 :
1173 : Insert a new stmt (INIT_STMT) that initializes a new variable of type
1174 : TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1175 : vector type a vector with all elements equal to VAL is created first.
1176 : Place the initialization at GSI if it is not NULL. Otherwise, place the
1177 : initialization at the loop preheader.
1178 : Return the DEF of INIT_STMT.
1179 : It will be used in the vectorization of STMT_INFO. */
1180 :
1181 : tree
1182 4684 : vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1183 : gimple_stmt_iterator *gsi)
1184 : {
1185 4684 : gimple *init_stmt;
1186 4684 : tree new_temp;
1187 :
1188 : /* We abuse this function to push sth to a SSA name with initial 'val'. */
1189 4684 : if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1190 : {
1191 1361 : gcc_assert (VECTOR_TYPE_P (type));
1192 1361 : if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1193 : {
1194 : /* Scalar boolean value should be transformed into
1195 : all zeros or all ones value before building a vector. */
1196 8 : if (VECTOR_BOOLEAN_TYPE_P (type))
1197 : {
1198 0 : tree true_val = build_all_ones_cst (TREE_TYPE (type));
1199 0 : tree false_val = build_zero_cst (TREE_TYPE (type));
1200 :
1201 0 : if (CONSTANT_CLASS_P (val))
1202 0 : val = integer_zerop (val) ? false_val : true_val;
1203 : else
1204 : {
1205 0 : new_temp = make_ssa_name (TREE_TYPE (type));
1206 0 : init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1207 : val, true_val, false_val);
1208 0 : vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1209 0 : val = new_temp;
1210 : }
1211 : }
1212 : else
1213 : {
1214 8 : gimple_seq stmts = NULL;
1215 8 : if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1216 8 : val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1217 8 : TREE_TYPE (type), val);
1218 : else
1219 : /* ??? Condition vectorization expects us to do
1220 : promotion of invariant/external defs. */
1221 0 : val = gimple_convert (&stmts, TREE_TYPE (type), val);
1222 16 : for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1223 16 : !gsi_end_p (gsi2); )
1224 : {
1225 8 : init_stmt = gsi_stmt (gsi2);
1226 8 : gsi_remove (&gsi2, false);
1227 8 : vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1228 : }
1229 : }
1230 : }
1231 1361 : val = build_vector_from_val (type, val);
1232 : }
1233 :
1234 4684 : new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1235 4684 : init_stmt = gimple_build_assign (new_temp, val);
1236 4684 : vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1237 4684 : return new_temp;
1238 : }
1239 :
1240 :
1241 : /* Get vectorized definitions for OP0 and OP1. */
1242 :
1243 : void
1244 185975 : vect_get_vec_defs (vec_info *, slp_tree slp_node,
1245 : tree op0, vec<tree> *vec_oprnds0,
1246 : tree op1, vec<tree> *vec_oprnds1,
1247 : tree op2, vec<tree> *vec_oprnds2,
1248 : tree op3, vec<tree> *vec_oprnds3)
1249 : {
1250 185975 : if (op0)
1251 184330 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1252 185975 : if (op1)
1253 138720 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1254 185975 : if (op2)
1255 9179 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1256 185975 : if (op3)
1257 0 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1258 185975 : }
1259 :
1260 : /* Helper function called by vect_finish_replace_stmt and
1261 : vect_finish_stmt_generation. Set the location of the new
1262 : statement and create and return a stmt_vec_info for it. */
1263 :
1264 : static void
1265 1415601 : vect_finish_stmt_generation_1 (vec_info *,
1266 : stmt_vec_info stmt_info, gimple *vec_stmt)
1267 : {
1268 1415601 : if (dump_enabled_p ())
1269 148493 : dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1270 :
1271 1415601 : if (stmt_info)
1272 : {
1273 1384203 : gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1274 :
1275 : /* While EH edges will generally prevent vectorization, stmt might
1276 : e.g. be in a must-not-throw region. Ensure newly created stmts
1277 : that could throw are part of the same region. */
1278 1384203 : int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1279 1384203 : if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1280 48 : add_stmt_to_eh_lp (vec_stmt, lp_nr);
1281 : }
1282 : else
1283 31398 : gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1284 1415601 : }
1285 :
1286 : /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1287 : which sets the same scalar result as STMT_INFO did. Create and return a
1288 : stmt_vec_info for VEC_STMT. */
1289 :
1290 : void
1291 843 : vect_finish_replace_stmt (vec_info *vinfo,
1292 : stmt_vec_info stmt_info, gimple *vec_stmt)
1293 : {
1294 843 : gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1295 843 : gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1296 :
1297 843 : gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1298 843 : gsi_replace (&gsi, vec_stmt, true);
1299 :
1300 843 : vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1301 843 : }
1302 :
1303 : /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1304 : before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1305 :
1306 : void
1307 1414758 : vect_finish_stmt_generation (vec_info *vinfo,
1308 : stmt_vec_info stmt_info, gimple *vec_stmt,
1309 : gimple_stmt_iterator *gsi)
1310 : {
1311 1414758 : gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1312 :
1313 1414758 : if (!gsi_end_p (*gsi)
1314 2828408 : && gimple_has_mem_ops (vec_stmt))
1315 : {
1316 1413650 : gimple *at_stmt = gsi_stmt (*gsi);
1317 1413650 : tree vuse = gimple_vuse (at_stmt);
1318 1407455 : if (vuse && TREE_CODE (vuse) == SSA_NAME)
1319 : {
1320 1266780 : tree vdef = gimple_vdef (at_stmt);
1321 1266780 : gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1322 1266780 : gimple_set_modified (vec_stmt, true);
1323 : /* If we have an SSA vuse and insert a store, update virtual
1324 : SSA form to avoid triggering the renamer. Do so only
1325 : if we can easily see all uses - which is what almost always
1326 : happens with the way vectorized stmts are inserted. */
1327 745930 : if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1328 2012674 : && ((is_gimple_assign (vec_stmt)
1329 745016 : && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1330 64605 : || (is_gimple_call (vec_stmt)
1331 878 : && (!(gimple_call_flags (vec_stmt)
1332 878 : & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1333 1 : || (gimple_call_lhs (vec_stmt)
1334 1 : && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1335 : {
1336 682166 : tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1337 682166 : gimple_set_vdef (vec_stmt, new_vdef);
1338 682166 : SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1339 : }
1340 : }
1341 : }
1342 1414758 : gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1343 1414758 : vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1344 1414758 : }
1345 :
1346 : /* We want to vectorize a call to combined function CFN with function
1347 : decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1348 : as the types of all inputs. Check whether this is possible using
1349 : an internal function, returning its code if so or IFN_LAST if not. */
1350 :
1351 : static internal_fn
1352 11866 : vectorizable_internal_function (combined_fn cfn, tree fndecl,
1353 : tree vectype_out, tree vectype_in)
1354 : {
1355 11866 : internal_fn ifn;
1356 11866 : if (internal_fn_p (cfn))
1357 9929 : ifn = as_internal_fn (cfn);
1358 : else
1359 1937 : ifn = associated_internal_fn (fndecl);
1360 11866 : if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1361 : {
1362 8422 : const direct_internal_fn_info &info = direct_internal_fn (ifn);
1363 8422 : if (info.vectorizable)
1364 : {
1365 8422 : bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1366 8422 : tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1367 8422 : tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1368 :
1369 : /* The type size of both the vectype_in and vectype_out should be
1370 : exactly the same when vectype_out isn't participating the optab.
1371 : While there is no restriction for type size when vectype_out
1372 : is part of the optab query. */
1373 8422 : if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1374 : return IFN_LAST;
1375 :
1376 8402 : if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1377 : OPTIMIZE_FOR_SPEED))
1378 : return ifn;
1379 : }
1380 : }
1381 : return IFN_LAST;
1382 : }
1383 :
1384 :
1385 : static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1386 : gimple_stmt_iterator *);
1387 :
1388 : /* Check whether a load or store statement in the loop described by
1389 : LOOP_VINFO is possible in a loop using partial vectors. This is
1390 : testing whether the vectorizer pass has the appropriate support,
1391 : as well as whether the target does.
1392 :
1393 : VLS_TYPE says whether the statement is a load or store and VECTYPE
1394 : is the type of the vector being loaded or stored. SLP_NODE is the SLP
1395 : node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1396 : says how the load or store is going to be implemented and GROUP_SIZE
1397 : is the number of load or store statements in the containing group.
1398 : If the access is a gather load or scatter store, GS_INFO describes
1399 : its arguments. If the load or store is conditional, SCALAR_MASK is the
1400 : condition under which it occurs.
1401 :
1402 : Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1403 : vectors is not supported, otherwise record the required rgroup control
1404 : types.
1405 :
1406 : If partial vectors can be used and ELSVALS is nonzero the supported
1407 : else values will be added to the vector ELSVALS points to. */
1408 :
1409 : static void
1410 231425 : check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1411 : slp_tree slp_node,
1412 : vec_load_store_type vls_type,
1413 : int group_size,
1414 : vect_load_store_data *ls,
1415 : slp_tree mask_node,
1416 : vec<int> *elsvals = nullptr)
1417 : {
1418 231425 : vect_memory_access_type memory_access_type = ls->memory_access_type;
1419 :
1420 : /* Invariant loads need no special support. */
1421 231425 : if (memory_access_type == VMAT_INVARIANT)
1422 34623 : return;
1423 :
1424 : /* Figure whether the mask is uniform. scalar_mask is used to
1425 : populate the scalar_cond_masked_set. */
1426 230397 : tree scalar_mask = NULL_TREE;
1427 230397 : if (mask_node)
1428 3538 : for (unsigned i = 0; i < SLP_TREE_LANES (mask_node); ++i)
1429 : {
1430 1820 : tree def = vect_get_slp_scalar_def (mask_node, i);
1431 1820 : if (!def
1432 1820 : || (scalar_mask && def != scalar_mask))
1433 : {
1434 : scalar_mask = NULL;
1435 : break;
1436 : }
1437 : else
1438 1789 : scalar_mask = def;
1439 : }
1440 :
1441 230397 : unsigned int nvectors = vect_get_num_copies (loop_vinfo, slp_node);
1442 230397 : vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1443 230397 : vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1444 230397 : machine_mode vecmode = TYPE_MODE (vectype);
1445 230397 : bool is_load = (vls_type == VLS_LOAD);
1446 230397 : if (memory_access_type == VMAT_LOAD_STORE_LANES)
1447 : {
1448 0 : nvectors /= group_size;
1449 0 : internal_fn ifn
1450 0 : = (is_load ? vect_load_lanes_supported (vectype, group_size, true,
1451 : elsvals)
1452 0 : : vect_store_lanes_supported (vectype, group_size, true));
1453 0 : if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1454 0 : vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1455 0 : else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1456 0 : vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1457 : scalar_mask);
1458 : else
1459 : {
1460 0 : if (dump_enabled_p ())
1461 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1462 : "can't operate on partial vectors because"
1463 : " the target doesn't have an appropriate"
1464 : " load/store-lanes instruction.\n");
1465 0 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1466 : }
1467 0 : return;
1468 : }
1469 :
1470 230397 : if (mat_gather_scatter_p (memory_access_type))
1471 : {
1472 1441 : internal_fn ifn = (is_load
1473 1441 : ? IFN_MASK_GATHER_LOAD
1474 : : IFN_MASK_SCATTER_STORE);
1475 334 : internal_fn len_ifn = (is_load
1476 : ? IFN_MASK_LEN_GATHER_LOAD
1477 : : IFN_MASK_LEN_SCATTER_STORE);
1478 1441 : stmt_vec_info repr = SLP_TREE_REPRESENTATIVE (slp_node);
1479 1441 : tree off_vectype = (STMT_VINFO_GATHER_SCATTER_P (repr)
1480 1441 : ? SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0])
1481 1441 : : ls->strided_offset_vectype);
1482 1441 : tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr));
1483 1441 : int scale = SLP_TREE_GS_SCALE (slp_node);
1484 :
1485 : /* The following "supported" checks just verify what we established in
1486 : get_load_store_type and don't try different offset types.
1487 : Therefore, off_vectype must be a supported offset type. In case
1488 : we chose a different one use this instead. */
1489 1441 : if (ls->supported_offset_vectype)
1490 0 : off_vectype = ls->supported_offset_vectype;
1491 : /* Same for scale. */
1492 1441 : if (ls->supported_scale)
1493 0 : scale = ls->supported_scale;
1494 :
1495 1441 : if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1496 : memory_type,
1497 : off_vectype, scale,
1498 : elsvals))
1499 0 : vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1500 1441 : else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1501 : memory_type,
1502 : off_vectype, scale,
1503 : elsvals)
1504 1441 : || memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
1505 339 : vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1506 : scalar_mask);
1507 : else
1508 : {
1509 1102 : if (dump_enabled_p ())
1510 22 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1511 : "can't operate on partial vectors because"
1512 : " the target doesn't have an appropriate"
1513 : " gather load or scatter store instruction.\n");
1514 1102 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1515 : }
1516 1441 : return;
1517 : }
1518 :
1519 228956 : if (memory_access_type != VMAT_CONTIGUOUS)
1520 : {
1521 : /* Element X of the data must come from iteration i * VF + X of the
1522 : scalar loop. We need more work to support other mappings. */
1523 32154 : if (dump_enabled_p ())
1524 722 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1525 : "can't operate on partial vectors because an"
1526 : " access isn't contiguous.\n");
1527 32154 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1528 32154 : return;
1529 : }
1530 :
1531 196802 : if (!VECTOR_MODE_P (vecmode))
1532 : {
1533 0 : if (dump_enabled_p ())
1534 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1535 : "can't operate on partial vectors when emulating"
1536 : " vector operations.\n");
1537 0 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1538 0 : return;
1539 : }
1540 :
1541 : /* We might load more scalars than we need for permuting SLP loads.
1542 : We checked in get_load_store_type that the extra elements
1543 : don't leak into a new vector. */
1544 262409 : auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1545 : {
1546 65607 : unsigned int nvectors;
1547 131214 : if (can_div_away_from_zero_p (size, nunits, &nvectors))
1548 65607 : return nvectors;
1549 : gcc_unreachable ();
1550 : };
1551 :
1552 196802 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1553 196802 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1554 196802 : machine_mode mask_mode;
1555 196802 : machine_mode vmode;
1556 196802 : bool using_partial_vectors_p = false;
1557 196802 : if (get_len_load_store_mode
1558 196802 : (vecmode, is_load, nullptr, elsvals).exists (&vmode))
1559 : {
1560 0 : nvectors = group_memory_nvectors (group_size * vf, nunits);
1561 0 : unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1562 0 : vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1563 0 : using_partial_vectors_p = true;
1564 : }
1565 262409 : else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1566 196802 : && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL,
1567 : elsvals))
1568 : {
1569 65607 : nvectors = group_memory_nvectors (group_size * vf, nunits);
1570 65607 : vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1571 65607 : using_partial_vectors_p = true;
1572 : }
1573 :
1574 65607 : if (!using_partial_vectors_p)
1575 : {
1576 131195 : if (dump_enabled_p ())
1577 11251 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1578 : "can't operate on partial vectors because the"
1579 : " target doesn't have the appropriate partial"
1580 : " vectorization load or store.\n");
1581 131195 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1582 : }
1583 : }
1584 :
1585 : /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1586 : form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1587 : that needs to be applied to all loads and stores in a vectorized loop.
1588 : Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1589 : otherwise return VEC_MASK & LOOP_MASK.
1590 :
1591 : MASK_TYPE is the type of both masks. If new statements are needed,
1592 : insert them before GSI. */
1593 :
1594 : tree
1595 1706 : prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1596 : tree vec_mask, gimple_stmt_iterator *gsi)
1597 : {
1598 1706 : gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1599 1706 : if (!loop_mask)
1600 : return vec_mask;
1601 :
1602 139 : gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1603 :
1604 139 : if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1605 : return vec_mask;
1606 :
1607 139 : tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1608 139 : gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1609 : vec_mask, loop_mask);
1610 :
1611 139 : gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1612 139 : return and_res;
1613 : }
1614 :
1615 : /* Determine whether we can use a gather load or scatter store to vectorize
1616 : strided load or store STMT_INFO by truncating the current offset to a
1617 : smaller width. We need to be able to construct an offset vector:
1618 :
1619 : { 0, X, X*2, X*3, ... }
1620 :
1621 : without loss of precision, where X is STMT_INFO's DR_STEP.
1622 :
1623 : Return true if this is possible, describing the gather load or scatter
1624 : store in GS_INFO. MASKED_P is true if the load or store is conditional.
1625 :
1626 : If we can use gather/scatter and ELSVALS is nonzero the supported
1627 : else values will be stored in the vector ELSVALS points to. */
1628 :
1629 : static bool
1630 69435 : vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype,
1631 : loop_vec_info loop_vinfo, bool masked_p,
1632 : gather_scatter_info *gs_info,
1633 : vec<int> *elsvals)
1634 : {
1635 69435 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1636 69435 : data_reference *dr = dr_info->dr;
1637 69435 : tree step = DR_STEP (dr);
1638 69435 : if (TREE_CODE (step) != INTEGER_CST)
1639 : {
1640 : /* ??? Perhaps we could use range information here? */
1641 28184 : if (dump_enabled_p ())
1642 229 : dump_printf_loc (MSG_NOTE, vect_location,
1643 : "cannot truncate variable step.\n");
1644 28184 : return false;
1645 : }
1646 :
1647 : /* Get the number of bits in an element. */
1648 41251 : scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1649 41251 : unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1650 :
1651 : /* Set COUNT to the upper limit on the number of elements - 1.
1652 : Start with the maximum vectorization factor. */
1653 41251 : unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1654 :
1655 : /* Try lowering COUNT to the number of scalar latch iterations. */
1656 41251 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1657 41251 : widest_int max_iters;
1658 41251 : if (max_loop_iterations (loop, &max_iters)
1659 81793 : && max_iters < count)
1660 2363 : count = max_iters.to_shwi ();
1661 :
1662 : /* Try scales of 1 and the element size. */
1663 41251 : unsigned int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1664 41251 : wi::overflow_type overflow = wi::OVF_NONE;
1665 123753 : for (int i = 0; i < 2; ++i)
1666 : {
1667 82502 : unsigned int scale = scales[i];
1668 82502 : widest_int factor;
1669 82502 : if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1670 0 : continue;
1671 :
1672 : /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1673 82502 : widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1674 82502 : if (overflow)
1675 0 : continue;
1676 82502 : signop sign = range >= 0 ? UNSIGNED : SIGNED;
1677 82502 : unsigned int min_offset_bits = wi::min_precision (range, sign);
1678 :
1679 : /* Find the narrowest viable offset type. */
1680 82502 : unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1681 82502 : tree offset_type = build_nonstandard_integer_type (offset_bits,
1682 : sign == UNSIGNED);
1683 :
1684 : /* See whether the target supports the operation with an offset
1685 : no narrower than OFFSET_TYPE. */
1686 82502 : tree memory_type = TREE_TYPE (DR_REF (dr));
1687 82502 : tree tmp_offset_vectype;
1688 82502 : int tmp_scale;
1689 82502 : if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1690 : vectype, memory_type, offset_type,
1691 : scale, &tmp_scale,
1692 : &gs_info->ifn, &gs_info->offset_vectype,
1693 : &tmp_offset_vectype, elsvals)
1694 82502 : || gs_info->ifn == IFN_LAST)
1695 82502 : continue;
1696 :
1697 0 : gs_info->decl = NULL_TREE;
1698 : /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1699 : but we don't need to store that here. */
1700 0 : gs_info->base = NULL_TREE;
1701 0 : gs_info->alias_ptr = build_int_cst
1702 0 : (reference_alias_ptr_type (DR_REF (dr)),
1703 0 : get_object_alignment (DR_REF (dr)));
1704 0 : gs_info->element_type = TREE_TYPE (vectype);
1705 0 : gs_info->offset = fold_convert (offset_type, step);
1706 0 : gs_info->scale = scale;
1707 0 : gs_info->memory_type = memory_type;
1708 0 : return true;
1709 165004 : }
1710 :
1711 41251 : if (overflow && dump_enabled_p ())
1712 0 : dump_printf_loc (MSG_NOTE, vect_location,
1713 : "truncating gather/scatter offset to %d bits"
1714 : " might change its value.\n", element_bits);
1715 :
1716 : return false;
1717 41251 : }
1718 :
1719 : /* Return true if we can use gather/scatter or strided internal functions
1720 : to vectorize STMT_INFO, which is a grouped or strided load or store
1721 : with multiple lanes and will be implemented by a type-punned access
1722 : of a vector with element size that matches the number of lanes.
1723 :
1724 : MASKED_P is true if load or store is conditional.
1725 : When returning true, fill in GS_INFO with the information required to
1726 : perform the operation. Also, store the punning type in PUNNED_VECTYPE.
1727 :
1728 : If successful and ELSVALS is nonzero the supported
1729 : else values will be stored in the vector ELSVALS points to. */
1730 :
1731 : static bool
1732 3663 : vect_use_grouped_gather (dr_vec_info *dr_info, tree vectype,
1733 : loop_vec_info loop_vinfo, bool masked_p,
1734 : unsigned int nelts,
1735 : gather_scatter_info *info, vec<int> *elsvals,
1736 : tree *pun_vectype)
1737 : {
1738 3663 : data_reference *dr = dr_info->dr;
1739 :
1740 : /* TODO: We can support nelts > BITS_PER_UNIT or non-power-of-two by
1741 : multiple gathers/scatter. */
1742 7024 : if (nelts > BITS_PER_UNIT || !pow2p_hwi (nelts))
1743 : return false;
1744 :
1745 : /* Pun the vectype with one of the same size but an element spanning
1746 : NELTS elements of VECTYPE.
1747 : The punned type of a V16QI with NELTS = 4 would be V4SI.
1748 : */
1749 3097 : tree tmp;
1750 3097 : unsigned int pieces;
1751 3097 : if (!can_div_trunc_p (TYPE_VECTOR_SUBPARTS (vectype), nelts, &pieces)
1752 3097 : || !pieces)
1753 193 : return false;
1754 :
1755 2904 : *pun_vectype = vector_vector_composition_type (vectype, pieces, &tmp, true);
1756 :
1757 2904 : if (!*pun_vectype || !VECTOR_TYPE_P (*pun_vectype))
1758 : return false;
1759 :
1760 2540 : internal_fn ifn;
1761 2540 : tree offset_vectype = *pun_vectype;
1762 :
1763 1641 : internal_fn strided_ifn = DR_IS_READ (dr)
1764 2540 : ? IFN_MASK_LEN_STRIDED_LOAD : IFN_MASK_LEN_STRIDED_STORE;
1765 :
1766 : /* Check if we have a gather/scatter with the new type. We're just trying
1767 : with the type itself as offset for now. If not, check if we have a
1768 : strided load/store. These have fewer constraints (for example no offset
1769 : type must exist) so it is possible that even though a gather/scatter is
1770 : not available we still have a strided load/store. */
1771 2540 : bool ok = false;
1772 2540 : tree tmp_vectype;
1773 2540 : int tmp_scale;
1774 2540 : if (vect_gather_scatter_fn_p
1775 2540 : (loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
1776 2540 : TREE_TYPE (*pun_vectype), *pun_vectype, 1, &tmp_scale, &ifn,
1777 : &offset_vectype, &tmp_vectype, elsvals))
1778 : ok = true;
1779 2540 : else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
1780 : elsvals))
1781 : {
1782 : /* Use gather/scatter IFNs, vect_get_strided_load_store_ops
1783 : will switch back to the strided variants. */
1784 0 : ifn = DR_IS_READ (dr) ? IFN_MASK_LEN_GATHER_LOAD :
1785 : IFN_MASK_LEN_SCATTER_STORE;
1786 0 : ok = true;
1787 : }
1788 :
1789 0 : if (ok)
1790 : {
1791 0 : info->ifn = ifn;
1792 0 : info->decl = NULL_TREE;
1793 0 : info->base = dr->ref;
1794 0 : info->alias_ptr = build_int_cst
1795 0 : (reference_alias_ptr_type (DR_REF (dr)),
1796 0 : get_object_alignment (DR_REF (dr)));
1797 0 : info->element_type = TREE_TYPE (*pun_vectype);
1798 0 : info->offset_vectype = offset_vectype;
1799 : /* No need to set the offset, vect_get_strided_load_store_ops
1800 : will do that. */
1801 0 : info->scale = 1;
1802 0 : info->memory_type = TREE_TYPE (DR_REF (dr));
1803 0 : return true;
1804 : }
1805 :
1806 : return false;
1807 : }
1808 :
1809 :
1810 : /* Return true if we can use gather/scatter internal functions to
1811 : vectorize STMT_INFO, which is a grouped or strided load or store.
1812 : MASKED_P is true if load or store is conditional. When returning
1813 : true, fill in GS_INFO with the information required to perform the
1814 : operation.
1815 :
1816 : If we can use gather/scatter and ELSVALS is nonzero the supported
1817 : else values will be stored in the vector ELSVALS points to. */
1818 :
1819 : static bool
1820 69435 : vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype,
1821 : loop_vec_info loop_vinfo, bool masked_p,
1822 : gather_scatter_info *gs_info,
1823 : vec<int> *elsvals,
1824 : unsigned int group_size,
1825 : bool single_element_p)
1826 : {
1827 69435 : if (!vect_check_gather_scatter (stmt_info, vectype,
1828 : loop_vinfo, gs_info, elsvals)
1829 69435 : || gs_info->ifn == IFN_LAST)
1830 : {
1831 69435 : if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
1832 : masked_p, gs_info, elsvals))
1833 : return false;
1834 : }
1835 :
1836 0 : if (!single_element_p
1837 0 : && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype),
1838 : gs_info->scale,
1839 : group_size))
1840 : return false;
1841 :
1842 0 : if (dump_enabled_p ())
1843 0 : dump_printf_loc (MSG_NOTE, vect_location,
1844 : "using gather/scatter for strided/grouped access,"
1845 : " scale = %d\n", gs_info->scale);
1846 :
1847 : return true;
1848 : }
1849 :
1850 : /* STMT_INFO is a non-strided load or store, meaning that it accesses
1851 : elements with a known constant step. Return -1 if that step
1852 : is negative, 0 if it is zero, and 1 if it is greater than zero. */
1853 :
1854 : int
1855 1355998 : compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1856 : {
1857 1355998 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1858 1355998 : return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1859 1355998 : size_zero_node);
1860 : }
1861 :
1862 : /* If the target supports a permute mask that reverses the elements in
1863 : a vector of type VECTYPE, return that mask, otherwise return null. */
1864 :
1865 : tree
1866 8799 : perm_mask_for_reverse (tree vectype)
1867 : {
1868 8799 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1869 :
1870 : /* The encoding has a single stepped pattern. */
1871 8799 : vec_perm_builder sel (nunits, 1, 3);
1872 35196 : for (int i = 0; i < 3; ++i)
1873 26397 : sel.quick_push (nunits - 1 - i);
1874 :
1875 8799 : vec_perm_indices indices (sel, 1, nunits);
1876 8799 : if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1877 : indices))
1878 : return NULL_TREE;
1879 7653 : return vect_gen_perm_mask_checked (vectype, indices);
1880 8799 : }
1881 :
1882 : /* A subroutine of get_load_store_type, with a subset of the same
1883 : arguments. Handle the case where STMT_INFO is a load or store that
1884 : accesses consecutive elements with a negative step. Sets *POFFSET
1885 : to the offset to be applied to the DR for the first access. */
1886 :
1887 : static vect_memory_access_type
1888 11533 : get_negative_load_store_type (vec_info *vinfo,
1889 : stmt_vec_info stmt_info, tree vectype,
1890 : vec_load_store_type vls_type,
1891 : unsigned int ncopies, poly_int64 *poffset)
1892 : {
1893 11533 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1894 11533 : dr_alignment_support alignment_support_scheme;
1895 :
1896 11533 : if (ncopies > 1)
1897 : {
1898 0 : if (dump_enabled_p ())
1899 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 : "multiple types with negative step.\n");
1901 0 : return VMAT_ELEMENTWISE;
1902 : }
1903 :
1904 : /* For backward running DRs the first access in vectype actually is
1905 : N-1 elements before the address of the DR. */
1906 11533 : *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1907 11533 : * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1908 :
1909 11533 : int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1910 11533 : alignment_support_scheme
1911 11533 : = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1912 11533 : if (alignment_support_scheme != dr_aligned
1913 11533 : && alignment_support_scheme != dr_unaligned_supported)
1914 : {
1915 4586 : if (dump_enabled_p ())
1916 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1917 : "negative step but alignment required.\n");
1918 4586 : *poffset = 0;
1919 4586 : return VMAT_ELEMENTWISE;
1920 : }
1921 :
1922 6947 : if (vls_type == VLS_STORE_INVARIANT)
1923 : {
1924 715 : if (dump_enabled_p ())
1925 21 : dump_printf_loc (MSG_NOTE, vect_location,
1926 : "negative step with invariant source;"
1927 : " no permute needed.\n");
1928 715 : return VMAT_CONTIGUOUS_DOWN;
1929 : }
1930 :
1931 6232 : if (!perm_mask_for_reverse (vectype))
1932 : {
1933 1146 : if (dump_enabled_p ())
1934 52 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 : "negative step and reversing not supported.\n");
1936 1146 : *poffset = 0;
1937 1146 : return VMAT_ELEMENTWISE;
1938 : }
1939 :
1940 : return VMAT_CONTIGUOUS_REVERSE;
1941 : }
1942 :
1943 : /* STMT_INFO is either a masked or unconditional store. Return the value
1944 : being stored. */
1945 :
1946 : tree
1947 0 : vect_get_store_rhs (stmt_vec_info stmt_info)
1948 : {
1949 0 : if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1950 : {
1951 0 : gcc_assert (gimple_assign_single_p (assign));
1952 0 : return gimple_assign_rhs1 (assign);
1953 : }
1954 0 : if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1955 : {
1956 0 : internal_fn ifn = gimple_call_internal_fn (call);
1957 0 : int index = internal_fn_stored_value_index (ifn);
1958 0 : gcc_assert (index >= 0);
1959 0 : return gimple_call_arg (call, index);
1960 : }
1961 0 : gcc_unreachable ();
1962 : }
1963 :
1964 : /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1965 :
1966 : This function returns a vector type which can be composed with NELTS pieces,
1967 : whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1968 : same vector size as the return vector. It checks target whether supports
1969 : pieces-size vector mode for construction firstly, if target fails to, check
1970 : pieces-size scalar mode for construction further. It returns NULL_TREE if
1971 : fails to find the available composition. If the caller only wants scalar
1972 : pieces where PTYPE e.g. is a possible gather/scatter element type
1973 : SCALAR_PTYPE_ONLY must be true.
1974 :
1975 : For example, for (vtype=V16QI, nelts=4), we can probably get:
1976 : - V16QI with PTYPE V4QI.
1977 : - V4SI with PTYPE SI.
1978 : - NULL_TREE. */
1979 :
1980 : static tree
1981 13078 : vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype,
1982 : bool scalar_ptype_only)
1983 : {
1984 13078 : gcc_assert (VECTOR_TYPE_P (vtype));
1985 13078 : gcc_assert (known_gt (nelts, 0U));
1986 :
1987 13078 : machine_mode vmode = TYPE_MODE (vtype);
1988 13078 : if (!VECTOR_MODE_P (vmode))
1989 : return NULL_TREE;
1990 :
1991 : /* When we are asked to compose the vector from its components let
1992 : that happen directly. */
1993 13078 : if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1994 : {
1995 5804 : *ptype = TREE_TYPE (vtype);
1996 5804 : return vtype;
1997 : }
1998 :
1999 14548 : poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2000 7274 : unsigned int pbsize;
2001 7274 : if (constant_multiple_p (vbsize, nelts, &pbsize))
2002 : {
2003 : /* First check if vec_init optab supports construction from
2004 : vector pieces directly. */
2005 7274 : scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2006 14548 : poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2007 7274 : machine_mode rmode;
2008 7274 : if (!scalar_ptype_only
2009 4370 : && related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2010 11266 : && (convert_optab_handler (vec_init_optab, vmode, rmode)
2011 : != CODE_FOR_nothing))
2012 : {
2013 3375 : *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2014 3375 : return vtype;
2015 : }
2016 :
2017 : /* Otherwise check if exists an integer type of the same piece size and
2018 : if vec_init optab supports construction from it directly. */
2019 3899 : if (int_mode_for_size (pbsize, 0).exists (&elmode)
2020 3899 : && related_vector_mode (vmode, elmode, nelts).exists (&rmode))
2021 : {
2022 3499 : if (scalar_ptype_only
2023 3499 : || convert_optab_handler (vec_init_optab, rmode, elmode)
2024 : != CODE_FOR_nothing)
2025 : {
2026 3499 : *ptype = build_nonstandard_integer_type (pbsize, 1);
2027 3499 : return build_vector_type (*ptype, nelts);
2028 : }
2029 : }
2030 : }
2031 :
2032 : return NULL_TREE;
2033 : }
2034 :
2035 : /* Check if the load permutation of NODE only refers to a consecutive
2036 : subset of the group indices where GROUP_SIZE is the size of the
2037 : dataref's group. We also assert that the length of the permutation
2038 : divides the group size and is a power of two.
2039 : Such load permutations can be elided in strided access schemes as
2040 : we can "jump over" the gap they leave. */
2041 :
2042 : bool
2043 43091 : has_consecutive_load_permutation (slp_tree node, unsigned group_size)
2044 : {
2045 43091 : load_permutation_t perm = SLP_TREE_LOAD_PERMUTATION (node);
2046 43091 : if (!perm.exists ()
2047 1839 : || perm.length () <= 1
2048 411 : || !pow2p_hwi (perm.length ())
2049 43488 : || group_size % perm.length ())
2050 : return false;
2051 :
2052 354 : return vect_load_perm_consecutive_p (node);
2053 : }
2054 :
2055 :
2056 : /* Analyze load or store SLP_NODE of type VLS_TYPE. Return true
2057 : if there is a memory access type that the vectorized form can use,
2058 : storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2059 : or scatters, fill in GS_INFO accordingly. In addition
2060 : *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2061 : the target does not support the alignment scheme. *MISALIGNMENT
2062 : is set according to the alignment of the access (including
2063 : DR_MISALIGNMENT_UNKNOWN when it is unknown).
2064 :
2065 : MASKED_P is true if the statement is conditional on a vectorized mask.
2066 : VECTYPE is the vector type that the vectorized statements will use.
2067 :
2068 : If ELSVALS is nonzero the supported else values will be stored in the
2069 : vector ELSVALS points to. */
2070 :
2071 : static bool
2072 1251638 : get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2073 : tree vectype, slp_tree slp_node,
2074 : bool masked_p, vec_load_store_type vls_type,
2075 : vect_load_store_data *ls)
2076 : {
2077 1251638 : vect_memory_access_type *memory_access_type = &ls->memory_access_type;
2078 1251638 : poly_int64 *poffset = &ls->poffset;
2079 1251638 : dr_alignment_support *alignment_support_scheme
2080 : = &ls->alignment_support_scheme;
2081 1251638 : int *misalignment = &ls->misalignment;
2082 1251638 : internal_fn *lanes_ifn = &ls->lanes_ifn;
2083 1251638 : vec<int> *elsvals = &ls->elsvals;
2084 1251638 : tree *ls_type = &ls->ls_type;
2085 1251638 : bool *slp_perm = &ls->slp_perm;
2086 1251638 : unsigned *n_perms = &ls->n_perms;
2087 1251638 : unsigned *n_loads = &ls->n_loads;
2088 1251638 : tree *supported_offset_vectype = &ls->supported_offset_vectype;
2089 1251638 : int *supported_scale = &ls->supported_scale;
2090 1251638 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2091 1251638 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2092 1251638 : class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2093 1251638 : stmt_vec_info first_stmt_info;
2094 1251638 : unsigned int group_size;
2095 1251638 : unsigned HOST_WIDE_INT gap;
2096 1251638 : bool single_element_p;
2097 1251638 : poly_int64 neg_ldst_offset = 0;
2098 :
2099 1251638 : *misalignment = DR_MISALIGNMENT_UNKNOWN;
2100 1251638 : *poffset = 0;
2101 1251638 : *ls_type = NULL_TREE;
2102 1251638 : *slp_perm = false;
2103 1251638 : *n_perms = -1U;
2104 1251638 : *n_loads = -1U;
2105 1251638 : ls->subchain_p = false;
2106 :
2107 1251638 : bool perm_ok = true;
2108 1251638 : poly_int64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
2109 :
2110 1251638 : if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2111 73034 : perm_ok = vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
2112 73034 : vf, true, n_perms, n_loads);
2113 :
2114 1251638 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2115 : {
2116 860517 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2117 860517 : group_size = DR_GROUP_SIZE (first_stmt_info);
2118 860517 : gap = DR_GROUP_GAP (first_stmt_info);
2119 860517 : single_element_p = (stmt_info == first_stmt_info
2120 860517 : && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2121 : }
2122 : else
2123 : {
2124 : first_stmt_info = stmt_info;
2125 : group_size = 1;
2126 : gap = 0;
2127 : single_element_p = true;
2128 : }
2129 1251638 : dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2130 :
2131 : /* True if the vectorized statements would access beyond the last
2132 : statement in the group. */
2133 1251638 : bool overrun_p = false;
2134 :
2135 : /* True if we can cope with such overrun by peeling for gaps, so that
2136 : there is at least one final scalar iteration after the vector loop. */
2137 2503276 : bool can_overrun_p = (!masked_p
2138 1251638 : && vls_type == VLS_LOAD
2139 477818 : && loop_vinfo
2140 1601957 : && !loop->inner);
2141 :
2142 : /* There can only be a gap at the end of the group if the stride is
2143 : known at compile time. */
2144 1251638 : gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2145 :
2146 : /* For SLP vectorization we directly vectorize a subchain
2147 : without permutation. */
2148 1251638 : if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2149 1178604 : first_dr_info = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2150 :
2151 1251638 : if (STMT_VINFO_STRIDED_P (first_stmt_info))
2152 : {
2153 : /* Try to use consecutive accesses of as many elements as possible,
2154 : separated by the stride, until we have a complete vector.
2155 : Fall back to scalar accesses if that isn't possible. */
2156 43091 : *memory_access_type = VMAT_STRIDED_SLP;
2157 :
2158 : /* If the load permutation is consecutive we can reduce the group to
2159 : the elements the permutation accesses. Then we release the
2160 : permutation. */
2161 43091 : if (has_consecutive_load_permutation (slp_node, group_size))
2162 : {
2163 20 : ls->subchain_p = true;
2164 20 : group_size = SLP_TREE_LANES (slp_node);
2165 20 : SLP_TREE_LOAD_PERMUTATION (slp_node).release ();
2166 : }
2167 : }
2168 1208547 : else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2169 : {
2170 10354 : slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
2171 10354 : tree offset_vectype = SLP_TREE_VECTYPE (offset_node);
2172 10354 : int scale = SLP_TREE_GS_SCALE (slp_node);
2173 10354 : tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
2174 10354 : tree tem;
2175 10354 : if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
2176 : masked_p, vectype, memory_type,
2177 : offset_vectype, scale, supported_scale,
2178 : &ls->gs.ifn, &tem,
2179 : supported_offset_vectype, elsvals))
2180 : {
2181 0 : if (dump_enabled_p ())
2182 : {
2183 0 : dump_printf_loc (MSG_NOTE, vect_location,
2184 : "gather/scatter with required "
2185 : "offset type "
2186 : "%T and offset scale %d.\n",
2187 : offset_vectype, scale);
2188 0 : if (*supported_offset_vectype)
2189 0 : dump_printf_loc (MSG_NOTE, vect_location,
2190 : " target supports offset type %T.\n",
2191 : *supported_offset_vectype);
2192 0 : if (*supported_scale)
2193 0 : dump_printf_loc (MSG_NOTE, vect_location,
2194 : " target supports offset scale %d.\n",
2195 : *supported_scale);
2196 : }
2197 0 : *memory_access_type = VMAT_GATHER_SCATTER_IFN;
2198 : }
2199 10354 : else if (vls_type == VLS_LOAD
2200 10354 : ? (targetm.vectorize.builtin_gather
2201 8936 : && (ls->gs.decl
2202 8936 : = targetm.vectorize.builtin_gather (vectype,
2203 8936 : TREE_TYPE
2204 : (offset_vectype),
2205 : scale)))
2206 1418 : : (targetm.vectorize.builtin_scatter
2207 1418 : && (ls->gs.decl
2208 1418 : = targetm.vectorize.builtin_scatter (vectype,
2209 1418 : TREE_TYPE
2210 : (offset_vectype),
2211 : scale))))
2212 345 : *memory_access_type = VMAT_GATHER_SCATTER_LEGACY;
2213 : else
2214 : {
2215 : /* GATHER_SCATTER_EMULATED_P. */
2216 10009 : if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2217 10009 : || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant ()
2218 10009 : || VECTOR_BOOLEAN_TYPE_P (offset_vectype)
2219 10009 : || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype),
2220 10009 : TYPE_VECTOR_SUBPARTS (vectype)))
2221 : {
2222 2602 : if (dump_enabled_p ())
2223 450 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2224 : "unsupported vector types for emulated "
2225 : "gather.\n");
2226 2602 : return false;
2227 : }
2228 7407 : *memory_access_type = VMAT_GATHER_SCATTER_EMULATED;
2229 : }
2230 : }
2231 : else
2232 : {
2233 1198193 : int cmp = compare_step_with_zero (vinfo, stmt_info);
2234 1198193 : if (cmp < 0)
2235 : {
2236 11672 : if (single_element_p)
2237 : /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2238 : only correct for single element "interleaving" SLP. */
2239 11533 : *memory_access_type = get_negative_load_store_type
2240 11533 : (vinfo, stmt_info, vectype, vls_type, 1,
2241 : &neg_ldst_offset);
2242 : else
2243 : /* We can fall back to VMAT_STRIDED_SLP since that does
2244 : not care whether the stride between the group instances
2245 : is positive or negative. */
2246 139 : *memory_access_type = VMAT_STRIDED_SLP;
2247 : }
2248 1186521 : else if (cmp == 0 && loop_vinfo)
2249 : {
2250 3046 : gcc_assert (vls_type == VLS_LOAD);
2251 3046 : *memory_access_type = VMAT_INVARIANT;
2252 : }
2253 : /* Try using LOAD/STORE_LANES. */
2254 1183475 : else if (slp_node->ldst_lanes
2255 1183475 : && (*lanes_ifn
2256 0 : = (vls_type == VLS_LOAD
2257 0 : ? vect_load_lanes_supported (vectype, group_size,
2258 : masked_p, elsvals)
2259 0 : : vect_store_lanes_supported (vectype, group_size,
2260 : masked_p))) != IFN_LAST)
2261 0 : *memory_access_type = VMAT_LOAD_STORE_LANES;
2262 1183475 : else if (!loop_vinfo && slp_node->avoid_stlf_fail)
2263 : {
2264 73 : *memory_access_type = VMAT_ELEMENTWISE;
2265 73 : if (dump_enabled_p ())
2266 2 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2267 : "using element-wise load to avoid disrupting "
2268 : "cross iteration store-to-load forwarding\n");
2269 : }
2270 : else
2271 1183402 : *memory_access_type = VMAT_CONTIGUOUS;
2272 :
2273 : /* If this is single-element interleaving with an element
2274 : distance that leaves unused vector loads around fall back
2275 : to elementwise access if possible - we otherwise least
2276 : create very sub-optimal code in that case (and
2277 : blow up memory, see PR65518). */
2278 1198193 : if (loop_vinfo
2279 1198193 : && single_element_p
2280 379304 : && (*memory_access_type == VMAT_CONTIGUOUS
2281 14579 : || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
2282 1577497 : && maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
2283 : {
2284 24136 : *memory_access_type = VMAT_ELEMENTWISE;
2285 24136 : if (dump_enabled_p ())
2286 274 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2287 : "single-element interleaving not supported "
2288 : "for not adjacent vector loads, using "
2289 : "elementwise access\n");
2290 : }
2291 :
2292 : /* Also fall back to elementwise access in case we did not lower a
2293 : permutation and cannot code generate it. */
2294 1198193 : if (loop_vinfo
2295 424976 : && *memory_access_type != VMAT_ELEMENTWISE
2296 395108 : && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2297 1220737 : && !perm_ok)
2298 : {
2299 2020 : *memory_access_type = VMAT_ELEMENTWISE;
2300 2020 : if (dump_enabled_p ())
2301 246 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 : "permutation not supported, using elementwise "
2303 : "access\n");
2304 : }
2305 :
2306 424976 : overrun_p = (loop_vinfo && gap != 0
2307 1247258 : && *memory_access_type != VMAT_ELEMENTWISE);
2308 1198193 : if (overrun_p && vls_type != VLS_LOAD)
2309 : {
2310 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2311 : "Grouped store with gaps requires"
2312 : " non-consecutive accesses\n");
2313 9 : return false;
2314 : }
2315 :
2316 1198193 : unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (first_dr_info);
2317 1198193 : poly_int64 off = 0;
2318 1198193 : if (*memory_access_type == VMAT_CONTIGUOUS_REVERSE)
2319 4927 : off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
2320 :
2321 : /* An overrun is fine if the trailing elements are smaller
2322 : than the alignment boundary B. Every vector access will
2323 : be a multiple of B and so we are guaranteed to access a
2324 : non-gap element in the same B-sized block. */
2325 1198193 : if (overrun_p
2326 1198193 : && gap < (vect_known_alignment_in_bytes (first_dr_info,
2327 22741 : vectype, off) / dr_size))
2328 : overrun_p = false;
2329 :
2330 : /* When we have a contiguous access across loop iterations
2331 : but the access in the loop doesn't cover the full vector
2332 : we can end up with no gap recorded but still excess
2333 : elements accessed, see PR103116. Make sure we peel for
2334 : gaps if necessary and sufficient and give up if not.
2335 :
2336 : If there is a combination of the access not covering the full
2337 : vector and a gap recorded then we may need to peel twice. */
2338 1198193 : bool large_vector_overrun_p = false;
2339 1198193 : if (loop_vinfo
2340 424976 : && (*memory_access_type == VMAT_CONTIGUOUS
2341 40712 : || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
2342 389191 : && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2343 1218415 : && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2344 : nunits))
2345 : large_vector_overrun_p = overrun_p = true;
2346 :
2347 : /* If the gap splits the vector in half and the target
2348 : can do half-vector operations avoid the epilogue peeling
2349 : by simply loading half of the vector only. Usually
2350 : the construction with an upper zero half will be elided. */
2351 1198193 : dr_alignment_support alss;
2352 1198193 : int misalign = dr_misalignment (first_dr_info, vectype, off);
2353 1198193 : tree half_vtype;
2354 1198193 : poly_uint64 remain;
2355 1198193 : unsigned HOST_WIDE_INT tem, num;
2356 1198193 : if (overrun_p
2357 1198193 : && !masked_p
2358 17270 : && *memory_access_type != VMAT_LOAD_STORE_LANES
2359 17270 : && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2360 : vectype, misalign)))
2361 : == dr_aligned
2362 14829 : || alss == dr_unaligned_supported)
2363 9636 : && can_div_trunc_p (group_size
2364 9636 : * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
2365 : nunits, &tem, &remain)
2366 1207829 : && (known_eq (remain, 0u)
2367 7252 : || (known_ne (remain, 0u)
2368 5621 : && constant_multiple_p (nunits, remain, &num)
2369 1195809 : && (vector_vector_composition_type (vectype, num, &half_vtype)
2370 : != NULL_TREE))))
2371 8005 : overrun_p = false;
2372 :
2373 1198193 : if (overrun_p && !can_overrun_p)
2374 : {
2375 6 : if (dump_enabled_p ())
2376 6 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 : "Peeling for outer loop is not supported\n");
2378 6 : return false;
2379 : }
2380 :
2381 : /* Peeling for gaps assumes that a single scalar iteration
2382 : is enough to make sure the last vector iteration doesn't
2383 : access excess elements. */
2384 1198187 : if (overrun_p
2385 1198187 : && (!can_div_trunc_p (group_size
2386 9259 : * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
2387 : nunits, &tem, &remain)
2388 9259 : || maybe_lt (remain + group_size, nunits)))
2389 : {
2390 : /* But peeling a single scalar iteration is enough if
2391 : we can use the next power-of-two sized partial
2392 : access and that is sufficiently small to be covered
2393 : by the single scalar iteration. */
2394 16 : unsigned HOST_WIDE_INT cnunits, cvf, cremain, cpart_size;
2395 16 : if (masked_p
2396 16 : || !nunits.is_constant (&cnunits)
2397 16 : || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2398 16 : || (((cremain = (group_size * cvf - gap) % cnunits), true)
2399 16 : && ((cpart_size = (1 << ceil_log2 (cremain))), true)
2400 16 : && (cremain + group_size < cpart_size
2401 13 : || (vector_vector_composition_type (vectype,
2402 13 : cnunits / cpart_size,
2403 : &half_vtype)
2404 : == NULL_TREE))))
2405 : {
2406 : /* If all fails we can still resort to niter masking unless
2407 : the vectors used are too big, so enforce the use of
2408 : partial vectors. */
2409 3 : if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
2410 3 : && !large_vector_overrun_p)
2411 : {
2412 0 : if (dump_enabled_p ())
2413 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2414 : "peeling for gaps insufficient for "
2415 : "access unless using partial "
2416 : "vectors\n");
2417 0 : LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
2418 : }
2419 : else
2420 : {
2421 3 : if (dump_enabled_p ())
2422 3 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2423 : "peeling for gaps insufficient for "
2424 : "access\n");
2425 3 : return false;
2426 : }
2427 : }
2428 13 : else if (large_vector_overrun_p)
2429 : {
2430 13 : if (dump_enabled_p ())
2431 12 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 : "can't operate on partial vectors because "
2433 : "only unmasked loads handle access "
2434 : "shortening required because of gaps at "
2435 : "the end of the access\n");
2436 13 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
2437 : }
2438 : }
2439 : }
2440 :
2441 : /* As a last resort, trying using a gather load or scatter store.
2442 :
2443 : ??? Although the code can handle all group sizes correctly,
2444 : it probably isn't a win to use separate strided accesses based
2445 : on nearby locations. Or, even if it's a win over scalar code,
2446 : it might not be a win over vectorizing at a lower VF, if that
2447 : allows us to use contiguous accesses. */
2448 1249027 : vect_memory_access_type grouped_gather_fallback = VMAT_UNINITIALIZED;
2449 1249027 : if (loop_vinfo
2450 475810 : && (*memory_access_type == VMAT_ELEMENTWISE
2451 475810 : || *memory_access_type == VMAT_STRIDED_SLP))
2452 : {
2453 75115 : gather_scatter_info gs_info;
2454 75115 : if (SLP_TREE_LANES (slp_node) == 1
2455 71225 : && (!SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2456 27696 : || single_element_p)
2457 144550 : && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
2458 : masked_p, &gs_info, elsvals,
2459 : group_size, single_element_p))
2460 : {
2461 : /* vect_use_strided_gather_scatters_p does not save the actually
2462 : supported scale and offset type so do that here.
2463 : We need it later in check_load_store_for_partial_vectors
2464 : where we only check if the given internal function is supported
2465 : (to choose whether to use the IFN, LEGACY, or EMULATED flavor
2466 : of gather/scatter) and don't re-do the full analysis. */
2467 0 : tree tmp;
2468 0 : gcc_assert (vect_gather_scatter_fn_p
2469 : (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype,
2470 : gs_info.memory_type, TREE_TYPE (gs_info.offset),
2471 : gs_info.scale, supported_scale, &gs_info.ifn,
2472 : &tmp, supported_offset_vectype, elsvals));
2473 :
2474 0 : SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
2475 0 : SLP_TREE_GS_BASE (slp_node) = error_mark_node;
2476 0 : ls->gs.ifn = gs_info.ifn;
2477 0 : ls->strided_offset_vectype = gs_info.offset_vectype;
2478 0 : *memory_access_type = VMAT_GATHER_SCATTER_IFN;
2479 : }
2480 75115 : else if (SLP_TREE_LANES (slp_node) > 1
2481 : && !masked_p
2482 3890 : && !single_element_p
2483 78778 : && vect_use_grouped_gather (STMT_VINFO_DR_INFO (stmt_info),
2484 : vectype, loop_vinfo,
2485 : masked_p, group_size,
2486 : &gs_info, elsvals, ls_type))
2487 : {
2488 0 : SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
2489 0 : SLP_TREE_GS_BASE (slp_node) = error_mark_node;
2490 0 : grouped_gather_fallback = *memory_access_type;
2491 0 : *memory_access_type = VMAT_GATHER_SCATTER_IFN;
2492 0 : ls->gs.ifn = gs_info.ifn;
2493 0 : vectype = *ls_type;
2494 0 : ls->strided_offset_vectype = gs_info.offset_vectype;
2495 : }
2496 : }
2497 :
2498 1249027 : if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
2499 1249027 : || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
2500 5638 : *poffset = neg_ldst_offset;
2501 :
2502 1249027 : if (*memory_access_type == VMAT_ELEMENTWISE
2503 1217066 : || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY
2504 1216721 : || *memory_access_type == VMAT_STRIDED_SLP
2505 1173494 : || *memory_access_type == VMAT_INVARIANT)
2506 : {
2507 78579 : *alignment_support_scheme = dr_unaligned_supported;
2508 78579 : *misalignment = DR_MISALIGNMENT_UNKNOWN;
2509 : }
2510 : else
2511 : {
2512 1170448 : if (mat_gather_scatter_p (*memory_access_type)
2513 : && !first_dr_info)
2514 : *misalignment = DR_MISALIGNMENT_UNKNOWN;
2515 : else
2516 1170448 : *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2517 1170448 : *alignment_support_scheme
2518 1170448 : = vect_supportable_dr_alignment
2519 1170448 : (vinfo, first_dr_info, vectype, *misalignment,
2520 1170448 : mat_gather_scatter_p (*memory_access_type));
2521 1170448 : if (grouped_gather_fallback != VMAT_UNINITIALIZED
2522 0 : && *alignment_support_scheme != dr_aligned
2523 0 : && *alignment_support_scheme != dr_unaligned_supported)
2524 : {
2525 : /* No supportable alignment for a grouped gather, fall back to the
2526 : original memory access type. Even though VMAT_STRIDED_SLP might
2527 : also try aligned vector loads it can still choose vector
2528 : construction from scalars. */
2529 0 : *memory_access_type = grouped_gather_fallback;
2530 0 : *alignment_support_scheme = dr_unaligned_supported;
2531 0 : *misalignment = DR_MISALIGNMENT_UNKNOWN;
2532 : }
2533 : }
2534 :
2535 1249027 : if (overrun_p)
2536 : {
2537 9256 : gcc_assert (can_overrun_p);
2538 9256 : if (dump_enabled_p ())
2539 503 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2540 : "Data access with gaps requires scalar "
2541 : "epilogue loop\n");
2542 9256 : LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2543 : }
2544 :
2545 1249027 : if ((*memory_access_type == VMAT_ELEMENTWISE
2546 1249027 : || *memory_access_type == VMAT_STRIDED_SLP)
2547 : && !nunits.is_constant ())
2548 : {
2549 : if (dump_enabled_p ())
2550 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 : "Not using elementwise accesses due to variable "
2552 : "vectorization factor.\n");
2553 : return false;
2554 : }
2555 :
2556 : /* Checks if all scalar iterations are known to be inbounds. */
2557 1249027 : bool inbounds = DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_info));
2558 :
2559 : /* Check if we support the operation if early breaks are needed. Here we
2560 : must ensure that we don't access any more than the scalar code would
2561 : have. A masked operation would ensure this, so for these load types
2562 : force masking. */
2563 1249027 : if (loop_vinfo
2564 475810 : && dr_safe_speculative_read_required (stmt_info)
2565 181027 : && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
2566 1430054 : && (mat_gather_scatter_p (*memory_access_type)
2567 176697 : || *memory_access_type == VMAT_STRIDED_SLP))
2568 : {
2569 9020 : if (dump_enabled_p ())
2570 8 : dump_printf_loc (MSG_NOTE, vect_location,
2571 : "early break not supported: cannot peel for "
2572 : "alignment. With non-contiguous memory vectorization"
2573 : " could read out of bounds at %G ",
2574 : STMT_VINFO_STMT (stmt_info));
2575 9020 : if (inbounds)
2576 0 : LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
2577 : else
2578 : return false;
2579 : }
2580 :
2581 : /* If this DR needs alignment for correctness, we must ensure the target
2582 : alignment is a constant power-of-two multiple of the amount read per
2583 : vector iteration or force masking. */
2584 1240007 : if (dr_safe_speculative_read_required (stmt_info)
2585 1240007 : && (*alignment_support_scheme == dr_aligned
2586 82779 : && !mat_gather_scatter_p (*memory_access_type)))
2587 : {
2588 : /* We can only peel for loops, of course. */
2589 82779 : gcc_checking_assert (loop_vinfo);
2590 :
2591 82779 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2592 82779 : poly_uint64 read_amount
2593 82779 : = vf * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2594 82779 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2595 82779 : read_amount *= group_size;
2596 :
2597 82779 : auto target_alignment
2598 82779 : = DR_TARGET_ALIGNMENT (STMT_VINFO_DR_INFO (stmt_info));
2599 82779 : if (!multiple_p (target_alignment, read_amount))
2600 : {
2601 3154 : if (dump_enabled_p ())
2602 : {
2603 6 : dump_printf_loc (MSG_NOTE, vect_location,
2604 : "desired alignment not met, target was ");
2605 6 : dump_dec (MSG_NOTE, target_alignment);
2606 6 : dump_printf (MSG_NOTE, " previously, but read amount is ");
2607 6 : dump_dec (MSG_NOTE, read_amount);
2608 6 : dump_printf (MSG_NOTE, " at %G.\n", STMT_VINFO_STMT (stmt_info));
2609 : }
2610 4164 : return false;
2611 : }
2612 :
2613 : /* When using a group access the first element may be aligned but the
2614 : subsequent loads may not be. For LOAD_LANES since the loads are based
2615 : on the first DR then all loads in the group are aligned. For
2616 : non-LOAD_LANES this is not the case. In particular a load + blend when
2617 : there are gaps can have the non first loads issued unaligned, even
2618 : partially overlapping the memory of the first load in order to simplify
2619 : the blend. This is what the x86_64 backend does for instance. As
2620 : such only the first load in the group is aligned, the rest are not.
2621 : Because of this the permutes may break the alignment requirements that
2622 : have been set, and as such we should for now, reject them. */
2623 79625 : if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2624 : {
2625 1010 : if (dump_enabled_p ())
2626 72 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2627 : "loads with load permutations not supported for "
2628 : "speculative early break loads for %G",
2629 : STMT_VINFO_STMT (stmt_info));
2630 1010 : return false;
2631 : }
2632 :
2633 : /* Reject vectorization if we know the read mount per vector iteration
2634 : exceeds the min page size. */
2635 78615 : if (known_gt (read_amount, (unsigned) param_min_pagesize))
2636 : {
2637 0 : if (dump_enabled_p ())
2638 : {
2639 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2640 : "alignment required for correctness (");
2641 0 : dump_dec (MSG_MISSED_OPTIMIZATION, read_amount);
2642 0 : dump_printf (MSG_NOTE, ") may exceed page size.\n");
2643 : }
2644 0 : return false;
2645 : }
2646 :
2647 78615 : if (!vf.is_constant ())
2648 : {
2649 : /* For VLA modes, we need a runtime check to ensure any speculative
2650 : read amount does not exceed the page size. Here we record the max
2651 : possible read amount for the check. */
2652 : if (maybe_gt (read_amount,
2653 : LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo)))
2654 : LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo) = read_amount;
2655 :
2656 : /* For VLA modes, we must use partial vectors. */
2657 : LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
2658 : }
2659 : }
2660 :
2661 1235843 : if (*alignment_support_scheme == dr_unaligned_unsupported)
2662 : {
2663 64501 : if (dump_enabled_p ())
2664 248 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2665 : "unsupported unaligned access\n");
2666 64501 : return false;
2667 : }
2668 :
2669 : /* FIXME: At the moment the cost model seems to underestimate the
2670 : cost of using elementwise accesses. This check preserves the
2671 : traditional behavior until that can be fixed. */
2672 1171342 : if (*memory_access_type == VMAT_ELEMENTWISE
2673 31961 : && !STMT_VINFO_STRIDED_P (first_stmt_info)
2674 1203303 : && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
2675 26738 : && single_element_p
2676 26095 : && !pow2p_hwi (group_size)))
2677 : {
2678 10364 : if (dump_enabled_p ())
2679 365 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2680 : "not falling back to elementwise accesses\n");
2681 10364 : return false;
2682 : }
2683 :
2684 : /* For BB vectorization build up the vector from existing scalar defs. */
2685 1160978 : if (!loop_vinfo && *memory_access_type == VMAT_ELEMENTWISE)
2686 : return false;
2687 :
2688 : /* Some loads need to explicitly permute the loaded data if there
2689 : is a load permutation. Among those are:
2690 : - VMAT_ELEMENTWISE.
2691 : - VMAT_STRIDED_SLP.
2692 : - VMAT_GATHER_SCATTER:
2693 : - Strided gather (fallback for VMAT_STRIDED_SLP if #lanes == 1).
2694 : - Grouped strided gather (ditto but for #lanes > 1).
2695 :
2696 : For VMAT_ELEMENTWISE we can fold the load permutation into the
2697 : individual indices we access directly, eliding the permutation.
2698 : Strided gather only allows load permutations for the
2699 : single-element case. */
2700 :
2701 1160978 : if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2702 1160978 : && !(*memory_access_type == VMAT_ELEMENTWISE
2703 39816 : || (mat_gather_scatter_p (*memory_access_type)
2704 0 : && SLP_TREE_LANES (slp_node) == 1
2705 0 : && single_element_p)))
2706 : {
2707 39816 : if (!loop_vinfo)
2708 : {
2709 : /* In BB vectorization we may not actually use a loaded vector
2710 : accessing elements in excess of DR_GROUP_SIZE. */
2711 24158 : stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2712 24158 : group_info = DR_GROUP_FIRST_ELEMENT (group_info);
2713 24158 : unsigned HOST_WIDE_INT nunits;
2714 24158 : unsigned j, k, maxk = 0;
2715 85784 : FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
2716 61626 : if (k > maxk)
2717 : maxk = k;
2718 24158 : tree vectype = SLP_TREE_VECTYPE (slp_node);
2719 44055 : if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2720 24158 : || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
2721 : {
2722 4261 : if (dump_enabled_p ())
2723 31 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2724 : "BB vectorization with gaps at the end of "
2725 : "a load is not supported\n");
2726 4261 : return false;
2727 : }
2728 : }
2729 :
2730 35555 : if (!perm_ok)
2731 : {
2732 2005 : if (dump_enabled_p ())
2733 8 : dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2734 : vect_location,
2735 : "unsupported load permutation\n");
2736 2005 : return false;
2737 : }
2738 :
2739 33550 : *slp_perm = true;
2740 : }
2741 :
2742 : return true;
2743 : }
2744 :
2745 : /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2746 : conditional operation STMT_INFO. When returning true, store the mask
2747 : in *MASK_NODE, the type of its definition in *MASK_DT_OUT and the type of
2748 : the vectorized mask in *MASK_VECTYPE_OUT. */
2749 :
2750 : static bool
2751 8097 : vect_check_scalar_mask (vec_info *vinfo,
2752 : slp_tree slp_node, unsigned mask_index,
2753 : slp_tree *mask_node,
2754 : vect_def_type *mask_dt_out, tree *mask_vectype_out)
2755 : {
2756 8097 : enum vect_def_type mask_dt;
2757 8097 : tree mask_vectype;
2758 8097 : slp_tree mask_node_1;
2759 8097 : tree mask_;
2760 8097 : if (!vect_is_simple_use (vinfo, slp_node, mask_index,
2761 : &mask_, &mask_node_1, &mask_dt, &mask_vectype))
2762 : {
2763 0 : if (dump_enabled_p ())
2764 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2765 : "mask use not simple.\n");
2766 0 : return false;
2767 : }
2768 :
2769 8097 : if ((mask_dt == vect_constant_def || mask_dt == vect_external_def)
2770 8097 : && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask_)))
2771 : {
2772 0 : if (dump_enabled_p ())
2773 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2774 : "mask argument is not a boolean.\n");
2775 0 : return false;
2776 : }
2777 :
2778 8097 : tree vectype = SLP_TREE_VECTYPE (slp_node);
2779 8097 : if (!mask_vectype)
2780 17 : mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2781 : mask_node_1);
2782 :
2783 8097 : if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2784 : {
2785 0 : if (dump_enabled_p ())
2786 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2787 : "could not find an appropriate vector mask type.\n");
2788 0 : return false;
2789 : }
2790 :
2791 8097 : if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2792 16194 : TYPE_VECTOR_SUBPARTS (vectype)))
2793 : {
2794 0 : if (dump_enabled_p ())
2795 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2796 : "vector mask type %T"
2797 : " does not match vector data type %T.\n",
2798 : mask_vectype, vectype);
2799 :
2800 0 : return false;
2801 : }
2802 :
2803 8097 : *mask_dt_out = mask_dt;
2804 8097 : *mask_vectype_out = mask_vectype;
2805 8097 : *mask_node = mask_node_1;
2806 8097 : return true;
2807 : }
2808 :
2809 :
2810 : /* Return true if stored value is suitable for vectorizing store
2811 : statement STMT_INFO. When returning true, store the scalar stored
2812 : in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2813 : the type of the vectorized store value in
2814 : *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2815 :
2816 : static bool
2817 1313470 : vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2818 : slp_tree slp_node, slp_tree *rhs_node,
2819 : vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2820 : vec_load_store_type *vls_type_out)
2821 : {
2822 1313470 : int op_no = 0;
2823 1313470 : if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2824 : {
2825 1470 : if (gimple_call_internal_p (call)
2826 1470 : && internal_store_fn_p (gimple_call_internal_fn (call)))
2827 1470 : op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2828 : }
2829 1313470 : op_no = vect_slp_child_index_for_operand
2830 1313470 : (stmt_info->stmt, op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2831 :
2832 1313470 : enum vect_def_type rhs_dt;
2833 1313470 : tree rhs_vectype;
2834 1313470 : tree rhs;
2835 1313470 : if (!vect_is_simple_use (vinfo, slp_node, op_no,
2836 : &rhs, rhs_node, &rhs_dt, &rhs_vectype))
2837 : {
2838 0 : if (dump_enabled_p ())
2839 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2840 : "use not simple.\n");
2841 0 : return false;
2842 : }
2843 :
2844 : /* In the case this is a store from a constant make sure
2845 : native_encode_expr can handle it. */
2846 1313470 : if (rhs_dt == vect_constant_def
2847 1313470 : && CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2848 : {
2849 0 : if (dump_enabled_p ())
2850 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2851 : "cannot encode constant as a byte sequence.\n");
2852 0 : return false;
2853 : }
2854 :
2855 1313470 : tree vectype = SLP_TREE_VECTYPE (slp_node);
2856 1313470 : if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2857 : {
2858 24 : if (dump_enabled_p ())
2859 24 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2860 : "incompatible vector types.\n");
2861 24 : return false;
2862 : }
2863 :
2864 1313446 : *rhs_dt_out = rhs_dt;
2865 1313446 : *rhs_vectype_out = rhs_vectype;
2866 1313446 : if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2867 991538 : *vls_type_out = VLS_STORE_INVARIANT;
2868 : else
2869 321908 : *vls_type_out = VLS_STORE;
2870 : return true;
2871 : }
2872 :
2873 : /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2874 : Note that we support masks with floating-point type, in which case the
2875 : floats are interpreted as a bitmask. */
2876 :
2877 : static tree
2878 165 : vect_build_all_ones_mask (vec_info *vinfo,
2879 : stmt_vec_info stmt_info, tree masktype)
2880 : {
2881 165 : if (TREE_CODE (masktype) == INTEGER_TYPE)
2882 98 : return build_int_cst (masktype, -1);
2883 67 : else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2884 134 : || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2885 : {
2886 14 : tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2887 14 : mask = build_vector_from_val (masktype, mask);
2888 14 : return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2889 : }
2890 53 : else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2891 : {
2892 : REAL_VALUE_TYPE r;
2893 : long tmp[6];
2894 371 : for (int j = 0; j < 6; ++j)
2895 318 : tmp[j] = -1;
2896 53 : real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2897 53 : tree mask = build_real (TREE_TYPE (masktype), r);
2898 53 : mask = build_vector_from_val (masktype, mask);
2899 53 : return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2900 : }
2901 0 : gcc_unreachable ();
2902 : }
2903 :
2904 : /* Build an all-zero merge value of type VECTYPE while vectorizing
2905 : STMT_INFO as a gather load. */
2906 :
2907 : static tree
2908 158 : vect_build_zero_merge_argument (vec_info *vinfo,
2909 : stmt_vec_info stmt_info, tree vectype)
2910 : {
2911 158 : tree merge;
2912 158 : if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2913 49 : merge = build_int_cst (TREE_TYPE (vectype), 0);
2914 109 : else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2915 : {
2916 : REAL_VALUE_TYPE r;
2917 : long tmp[6];
2918 763 : for (int j = 0; j < 6; ++j)
2919 654 : tmp[j] = 0;
2920 109 : real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2921 109 : merge = build_real (TREE_TYPE (vectype), r);
2922 : }
2923 : else
2924 0 : gcc_unreachable ();
2925 158 : merge = build_vector_from_val (vectype, merge);
2926 158 : return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2927 : }
2928 :
2929 : /* Return the corresponding else value for an else value constant
2930 : ELSVAL with type TYPE. */
2931 :
2932 : tree
2933 1941 : vect_get_mask_load_else (int elsval, tree type)
2934 : {
2935 1941 : tree els;
2936 1941 : if (elsval == MASK_LOAD_ELSE_UNDEFINED)
2937 : {
2938 0 : tree tmp = create_tmp_var (type);
2939 : /* No need to warn about anything. */
2940 0 : TREE_NO_WARNING (tmp) = 1;
2941 0 : els = get_or_create_ssa_default_def (cfun, tmp);
2942 : }
2943 1941 : else if (elsval == MASK_LOAD_ELSE_M1)
2944 0 : els = build_minus_one_cst (type);
2945 1941 : else if (elsval == MASK_LOAD_ELSE_ZERO)
2946 1941 : els = build_zero_cst (type);
2947 : else
2948 0 : gcc_unreachable ();
2949 :
2950 1941 : return els;
2951 : }
2952 :
2953 : /* Build a gather load call while vectorizing STMT_INFO. Insert new
2954 : instructions before GSI and add them to VEC_STMT. GS_INFO describes
2955 : the gather load operation. If the load is conditional, MASK is the
2956 : vectorized condition, otherwise MASK is null. PTR is the base
2957 : pointer and OFFSET is the vectorized offset. */
2958 :
2959 : static gimple *
2960 346 : vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2961 : slp_tree slp_node, tree vectype,
2962 : gimple_stmt_iterator *gsi, tree decl,
2963 : tree ptr, tree offset, tree mask)
2964 : {
2965 346 : tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
2966 346 : tree rettype = TREE_TYPE (TREE_TYPE (decl));
2967 346 : tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2968 346 : /* ptrtype */ arglist = TREE_CHAIN (arglist);
2969 346 : tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2970 346 : tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2971 346 : tree scaletype = TREE_VALUE (arglist);
2972 346 : tree var;
2973 346 : gcc_checking_assert (types_compatible_p (srctype, rettype)
2974 : && (!mask
2975 : || TREE_CODE (masktype) == INTEGER_TYPE
2976 : || types_compatible_p (srctype, masktype)));
2977 :
2978 346 : tree op = offset;
2979 346 : if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2980 : {
2981 100 : gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2982 : TYPE_VECTOR_SUBPARTS (idxtype)));
2983 100 : var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2984 100 : op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2985 100 : gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2986 100 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2987 100 : op = var;
2988 : }
2989 :
2990 346 : tree src_op = NULL_TREE;
2991 346 : tree mask_op = NULL_TREE;
2992 346 : if (mask)
2993 : {
2994 188 : if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2995 : {
2996 188 : tree utype, optype = TREE_TYPE (mask);
2997 188 : if (VECTOR_TYPE_P (masktype)
2998 188 : || TYPE_MODE (masktype) == TYPE_MODE (optype))
2999 : utype = masktype;
3000 : else
3001 6 : utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3002 188 : var = vect_get_new_ssa_name (utype, vect_scalar_var);
3003 188 : tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
3004 188 : gassign *new_stmt
3005 188 : = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3006 188 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3007 188 : mask_arg = var;
3008 188 : if (!useless_type_conversion_p (masktype, utype))
3009 : {
3010 6 : gcc_assert (TYPE_PRECISION (utype)
3011 : <= TYPE_PRECISION (masktype));
3012 6 : var = vect_get_new_ssa_name (masktype, vect_scalar_var);
3013 6 : new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3014 6 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3015 6 : mask_arg = var;
3016 : }
3017 188 : src_op = build_zero_cst (srctype);
3018 188 : mask_op = mask_arg;
3019 : }
3020 : else
3021 : {
3022 : src_op = mask;
3023 : mask_op = mask;
3024 : }
3025 : }
3026 : else
3027 : {
3028 158 : src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
3029 158 : mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
3030 : }
3031 :
3032 346 : tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
3033 346 : gimple *new_stmt = gimple_build_call (decl, 5, src_op, ptr, op,
3034 : mask_op, scale);
3035 :
3036 346 : if (!useless_type_conversion_p (vectype, rettype))
3037 : {
3038 49 : gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3039 : TYPE_VECTOR_SUBPARTS (rettype)));
3040 49 : op = vect_get_new_ssa_name (rettype, vect_simple_var);
3041 49 : gimple_call_set_lhs (new_stmt, op);
3042 49 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3043 49 : op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3044 49 : new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
3045 : }
3046 :
3047 346 : return new_stmt;
3048 : }
3049 :
3050 : /* Build a scatter store call while vectorizing STMT_INFO. Insert new
3051 : instructions before GSI. GS_INFO describes the scatter store operation.
3052 : PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
3053 : vectorized data to store.
3054 : If the store is conditional, MASK is the vectorized condition, otherwise
3055 : MASK is null. */
3056 :
3057 : static gimple *
3058 161 : vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
3059 : slp_tree slp_node,
3060 : gimple_stmt_iterator *gsi,
3061 : tree decl,
3062 : tree ptr, tree offset, tree oprnd, tree mask)
3063 : {
3064 161 : tree rettype = TREE_TYPE (TREE_TYPE (decl));
3065 161 : tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
3066 161 : /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
3067 161 : tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3068 161 : tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3069 161 : tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3070 161 : tree scaletype = TREE_VALUE (arglist);
3071 161 : gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
3072 : && TREE_CODE (rettype) == VOID_TYPE);
3073 :
3074 161 : tree mask_arg = NULL_TREE;
3075 161 : if (mask)
3076 : {
3077 110 : mask_arg = mask;
3078 110 : tree optype = TREE_TYPE (mask_arg);
3079 110 : tree utype;
3080 110 : if (TYPE_MODE (masktype) == TYPE_MODE (optype))
3081 : utype = masktype;
3082 : else
3083 8 : utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3084 110 : tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
3085 110 : mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
3086 110 : gassign *new_stmt
3087 110 : = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3088 110 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3089 110 : mask_arg = var;
3090 110 : if (!useless_type_conversion_p (masktype, utype))
3091 : {
3092 8 : gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
3093 8 : tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
3094 8 : new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3095 8 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3096 8 : mask_arg = var;
3097 : }
3098 : }
3099 : else
3100 : {
3101 51 : mask_arg = build_int_cst (masktype, -1);
3102 51 : mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
3103 : }
3104 :
3105 161 : tree src = oprnd;
3106 161 : if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
3107 : {
3108 0 : gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
3109 : TYPE_VECTOR_SUBPARTS (srctype)));
3110 0 : tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
3111 0 : src = build1 (VIEW_CONVERT_EXPR, srctype, src);
3112 0 : gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
3113 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3114 0 : src = var;
3115 : }
3116 :
3117 161 : tree op = offset;
3118 161 : if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
3119 : {
3120 16 : gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
3121 : TYPE_VECTOR_SUBPARTS (idxtype)));
3122 16 : tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3123 16 : op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
3124 16 : gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3125 16 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3126 16 : op = var;
3127 : }
3128 :
3129 161 : tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
3130 161 : gcall *new_stmt
3131 161 : = gimple_build_call (decl, 5, ptr, mask_arg, op, src, scale);
3132 161 : return new_stmt;
3133 : }
3134 :
3135 : /* Prepare the base and offset in GS_INFO for vectorization.
3136 : Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3137 : to the vectorized offset argument for the first copy of STMT_INFO.
3138 : STMT_INFO is the statement described by GS_INFO and LOOP is the
3139 : containing loop. */
3140 :
3141 : static void
3142 1219 : vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node,
3143 : tree *dataref_ptr, vec<tree> *vec_offset)
3144 : {
3145 1219 : gimple_seq stmts = NULL;
3146 1219 : *dataref_ptr = force_gimple_operand (SLP_TREE_GS_BASE (slp_node),
3147 : &stmts, true, NULL_TREE);
3148 1219 : if (stmts != NULL)
3149 : {
3150 986 : basic_block new_bb;
3151 986 : edge pe = loop_preheader_edge (loop);
3152 986 : new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3153 986 : gcc_assert (!new_bb);
3154 : }
3155 1219 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3156 1219 : }
3157 :
3158 : /* Prepare to implement a grouped or strided load or store using
3159 : the gather load or scatter store operation described by GS_INFO.
3160 : STMT_INFO is the load or store statement.
3161 :
3162 : Set *DATAREF_BUMP to the amount that should be added to the base
3163 : address after each copy of the vectorized statement. Set *VEC_OFFSET
3164 : to an invariant offset vector in which element I has the value
3165 : I * DR_STEP / SCALE. */
3166 :
3167 : static void
3168 0 : vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node,
3169 : tree vectype, tree offset_vectype,
3170 : loop_vec_info loop_vinfo,
3171 : gimple_stmt_iterator *gsi,
3172 : tree *dataref_bump, tree *vec_offset,
3173 : vec_loop_lens *loop_lens)
3174 : {
3175 0 : struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3176 :
3177 0 : if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3178 : {
3179 : /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
3180 : ivtmp_8 = _31 * 16 (step in bytes);
3181 : .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
3182 : vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
3183 0 : tree loop_len
3184 0 : = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0, true);
3185 0 : tree tmp
3186 0 : = fold_build2 (MULT_EXPR, sizetype,
3187 : fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3188 : loop_len);
3189 0 : *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
3190 : GSI_SAME_STMT);
3191 : }
3192 : else
3193 : {
3194 0 : tree bump
3195 0 : = size_binop (MULT_EXPR,
3196 : fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3197 : size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3198 0 : *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3199 : }
3200 :
3201 0 : internal_fn ifn
3202 0 : = DR_IS_READ (dr) ? IFN_MASK_LEN_STRIDED_LOAD : IFN_MASK_LEN_STRIDED_STORE;
3203 0 : if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3204 : {
3205 0 : *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo,
3206 : unshare_expr (DR_STEP (dr)));
3207 0 : return;
3208 : }
3209 :
3210 : /* The offset given in GS_INFO can have pointer type, so use the element
3211 : type of the vector instead. */
3212 0 : tree offset_type = TREE_TYPE (offset_vectype);
3213 :
3214 : /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3215 0 : tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3216 : ssize_int (SLP_TREE_GS_SCALE (node)));
3217 0 : step = fold_convert (offset_type, step);
3218 :
3219 : /* Create {0, X, X*2, X*3, ...}. */
3220 0 : tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype,
3221 : build_zero_cst (offset_type), step);
3222 0 : *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3223 : }
3224 :
3225 : /* Prepare the pointer IVs which needs to be updated by a variable amount.
3226 : Such variable amount is the outcome of .SELECT_VL. In this case, we can
3227 : allow each iteration process the flexible number of elements as long as
3228 : the number <= vf elments.
3229 :
3230 : Return data reference according to SELECT_VL.
3231 : If new statements are needed, insert them before GSI. */
3232 :
3233 : static tree
3234 0 : vect_get_loop_variant_data_ptr_increment (
3235 : vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
3236 : vec_loop_lens *loop_lens, dr_vec_info *dr_info,
3237 : vect_memory_access_type memory_access_type)
3238 : {
3239 0 : loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3240 0 : tree step = vect_dr_behavior (vinfo, dr_info)->step;
3241 :
3242 : /* gather/scatter never reach here. */
3243 0 : gcc_assert (!mat_gather_scatter_p (memory_access_type));
3244 :
3245 : /* When we support SELECT_VL pattern, we dynamic adjust
3246 : the memory address by .SELECT_VL result.
3247 :
3248 : The result of .SELECT_VL is the number of elements to
3249 : be processed of each iteration. So the memory address
3250 : adjustment operation should be:
3251 :
3252 : addr = addr + .SELECT_VL (ARG..) * step;
3253 : */
3254 0 : tree loop_len
3255 0 : = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0, true);
3256 0 : tree len_type = TREE_TYPE (loop_len);
3257 : /* Since the outcome of .SELECT_VL is element size, we should adjust
3258 : it into bytesize so that it can be used in address pointer variable
3259 : amount IVs adjustment. */
3260 0 : tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3261 : wide_int_to_tree (len_type, wi::to_widest (step)));
3262 0 : tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3263 0 : gassign *assign = gimple_build_assign (bump, tmp);
3264 0 : gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3265 0 : return bump;
3266 : }
3267 :
3268 : /* Return the amount that should be added to a vector pointer to move
3269 : to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3270 : being vectorized and MEMORY_ACCESS_TYPE describes the type of
3271 : vectorization. */
3272 :
3273 : static tree
3274 695725 : vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3275 : dr_vec_info *dr_info, tree aggr_type,
3276 : vect_memory_access_type memory_access_type,
3277 : vec_loop_lens *loop_lens)
3278 : {
3279 695725 : if (memory_access_type == VMAT_INVARIANT)
3280 0 : return size_zero_node;
3281 :
3282 695725 : loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3283 134335 : if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3284 0 : return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3285 : loop_lens, dr_info,
3286 0 : memory_access_type);
3287 :
3288 695725 : tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3289 695725 : tree step = vect_dr_behavior (vinfo, dr_info)->step;
3290 695725 : if (tree_int_cst_sgn (step) == -1)
3291 2828 : iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3292 : return iv_step;
3293 : }
3294 :
3295 : /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3296 :
3297 : static bool
3298 206 : vectorizable_bswap (vec_info *vinfo,
3299 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3300 : slp_tree slp_node,
3301 : slp_tree *slp_op,
3302 : tree vectype_in, stmt_vector_for_cost *cost_vec)
3303 : {
3304 206 : tree op, vectype;
3305 206 : gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3306 :
3307 206 : op = gimple_call_arg (stmt, 0);
3308 206 : vectype = SLP_TREE_VECTYPE (slp_node);
3309 206 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3310 :
3311 206 : if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype))
3312 : {
3313 0 : if (dump_enabled_p ())
3314 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3315 : "mismatched vector sizes %T and %T\n",
3316 : vectype_in, vectype);
3317 0 : return false;
3318 : }
3319 :
3320 206 : tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3321 206 : if (! char_vectype)
3322 : return false;
3323 :
3324 206 : poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3325 206 : unsigned word_bytes;
3326 206 : if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3327 : return false;
3328 :
3329 : /* The encoding uses one stepped pattern for each byte in the word. */
3330 206 : vec_perm_builder elts (num_bytes, word_bytes, 3);
3331 824 : for (unsigned i = 0; i < 3; ++i)
3332 3318 : for (unsigned j = 0; j < word_bytes; ++j)
3333 2700 : elts.quick_push ((i + 1) * word_bytes - j - 1);
3334 :
3335 206 : vec_perm_indices indices (elts, 1, num_bytes);
3336 206 : machine_mode vmode = TYPE_MODE (char_vectype);
3337 206 : if (!can_vec_perm_const_p (vmode, vmode, indices))
3338 : return false;
3339 :
3340 152 : if (cost_vec)
3341 : {
3342 140 : if (!vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3343 : {
3344 0 : if (dump_enabled_p ())
3345 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3346 : "incompatible vector types for invariants\n");
3347 0 : return false;
3348 : }
3349 :
3350 140 : SLP_TREE_TYPE (slp_node) = call_vec_info_type;
3351 140 : DUMP_VECT_SCOPE ("vectorizable_bswap");
3352 140 : record_stmt_cost (cost_vec,
3353 : 1, vector_stmt, slp_node, 0, vect_prologue);
3354 140 : record_stmt_cost (cost_vec,
3355 140 : vect_get_num_copies (vinfo, slp_node),
3356 : vec_perm, slp_node, 0, vect_body);
3357 140 : return true;
3358 : }
3359 :
3360 12 : tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3361 :
3362 : /* Transform. */
3363 12 : vec<tree> vec_oprnds = vNULL;
3364 12 : vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds);
3365 : /* Arguments are ready. create the new vector stmt. */
3366 12 : unsigned i;
3367 12 : tree vop;
3368 24 : FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3369 : {
3370 12 : gimple *new_stmt;
3371 12 : tree tem = make_ssa_name (char_vectype);
3372 12 : new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3373 : char_vectype, vop));
3374 12 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3375 12 : tree tem2 = make_ssa_name (char_vectype);
3376 12 : new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3377 : tem, tem, bswap_vconst);
3378 12 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3379 12 : tem = make_ssa_name (vectype);
3380 12 : new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3381 : vectype, tem2));
3382 12 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3383 12 : slp_node->push_vec_def (new_stmt);
3384 : }
3385 :
3386 12 : vec_oprnds.release ();
3387 12 : return true;
3388 206 : }
3389 :
3390 : /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3391 : integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3392 : in a single step. On success, store the binary pack code in
3393 : *CONVERT_CODE. */
3394 :
3395 : static bool
3396 156 : simple_integer_narrowing (tree vectype_out, tree vectype_in,
3397 : code_helper *convert_code)
3398 : {
3399 312 : if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3400 312 : || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3401 : return false;
3402 :
3403 66 : code_helper code;
3404 66 : int multi_step_cvt = 0;
3405 66 : auto_vec <tree, 8> interm_types;
3406 97 : if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3407 : &code, &multi_step_cvt, &interm_types)
3408 66 : || multi_step_cvt)
3409 31 : return false;
3410 :
3411 35 : *convert_code = code;
3412 35 : return true;
3413 66 : }
3414 :
3415 : /* Function vectorizable_call.
3416 :
3417 : Check if STMT_INFO performs a function call that can be vectorized.
3418 : If COST_VEC is passed, calculate costs but don't change anything,
3419 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
3420 : it, and insert it at GSI.
3421 : Return true if STMT_INFO is vectorizable in this way. */
3422 :
3423 : static bool
3424 2348636 : vectorizable_call (vec_info *vinfo,
3425 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3426 : slp_tree slp_node,
3427 : stmt_vector_for_cost *cost_vec)
3428 : {
3429 2348636 : gcall *stmt;
3430 2348636 : tree vec_dest;
3431 2348636 : tree scalar_dest;
3432 2348636 : tree op;
3433 2348636 : tree vec_oprnd0 = NULL_TREE;
3434 2348636 : tree vectype_out, vectype_in;
3435 2348636 : poly_uint64 nunits_in;
3436 2348636 : poly_uint64 nunits_out;
3437 2348636 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3438 2348636 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3439 2348636 : tree fndecl, new_temp, rhs_type;
3440 2348636 : enum vect_def_type dt[5]
3441 : = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3442 : vect_unknown_def_type, vect_unknown_def_type };
3443 2348636 : tree vectypes[ARRAY_SIZE (dt)] = {};
3444 2348636 : slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3445 2348636 : auto_vec<tree, 8> vargs;
3446 2348636 : enum { NARROW, NONE, WIDEN } modifier;
3447 2348636 : size_t i, nargs;
3448 2348636 : tree clz_ctz_arg1 = NULL_TREE;
3449 :
3450 2348636 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3451 : return false;
3452 :
3453 2348636 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3454 194531 : && cost_vec)
3455 : return false;
3456 :
3457 : /* Is STMT_INFO a vectorizable call? */
3458 2358185 : stmt = dyn_cast <gcall *> (stmt_info->stmt);
3459 19326 : if (!stmt)
3460 : return false;
3461 :
3462 19326 : if (gimple_call_internal_p (stmt)
3463 19326 : && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3464 12184 : || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3465 : /* Handled by vectorizable_load and vectorizable_store. */
3466 2692 : return false;
3467 :
3468 16634 : if (gimple_call_lhs (stmt) == NULL_TREE
3469 16634 : || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3470 : return false;
3471 :
3472 16628 : gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3473 :
3474 16628 : vectype_out = SLP_TREE_VECTYPE (slp_node);
3475 :
3476 : /* Process function arguments. */
3477 16628 : rhs_type = NULL_TREE;
3478 16628 : vectype_in = NULL_TREE;
3479 16628 : nargs = gimple_call_num_args (stmt);
3480 :
3481 : /* Bail out if the function has more than four arguments, we do not have
3482 : interesting builtin functions to vectorize with more than two arguments
3483 : except for fma (cond_fma has more). No arguments is also not good. */
3484 16628 : if (nargs == 0 || nargs > 5)
3485 : return false;
3486 :
3487 : /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3488 16548 : combined_fn cfn = gimple_call_combined_fn (stmt);
3489 16548 : if (cfn == CFN_GOMP_SIMD_LANE)
3490 : {
3491 3207 : nargs = 0;
3492 3207 : rhs_type = unsigned_type_node;
3493 : }
3494 : /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3495 : argument just says whether it is well-defined at zero or not and what
3496 : value should be returned for it. */
3497 16548 : if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
3498 : {
3499 118 : nargs = 1;
3500 118 : clz_ctz_arg1 = gimple_call_arg (stmt, 1);
3501 : }
3502 :
3503 16548 : int mask_opno = -1;
3504 16548 : if (internal_fn_p (cfn))
3505 : {
3506 : /* We can only handle direct internal masked calls here,
3507 : vectorizable_simd_clone_call is for the rest. */
3508 14081 : if (cfn == CFN_MASK_CALL)
3509 : return false;
3510 13927 : mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3511 : }
3512 :
3513 45912 : for (i = 0; i < nargs; i++)
3514 : {
3515 30762 : if ((int) i == mask_opno)
3516 : {
3517 4220 : if (!vect_check_scalar_mask (vinfo, slp_node, mask_opno,
3518 : &slp_op[i], &dt[i], &vectypes[i]))
3519 : return false;
3520 4220 : continue;
3521 : }
3522 :
3523 26542 : if (!vect_is_simple_use (vinfo, slp_node,
3524 : i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3525 : {
3526 0 : if (dump_enabled_p ())
3527 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3528 : "use not simple.\n");
3529 0 : return false;
3530 : }
3531 :
3532 : /* We can only handle calls with arguments of the same type. */
3533 26542 : if (rhs_type
3534 26542 : && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3535 : {
3536 1244 : if (dump_enabled_p ())
3537 200 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3538 : "argument types differ.\n");
3539 1244 : return false;
3540 : }
3541 25298 : if (!rhs_type)
3542 13187 : rhs_type = TREE_TYPE (op);
3543 :
3544 25298 : if (!vectype_in)
3545 13642 : vectype_in = vectypes[i];
3546 11656 : else if (vectypes[i]
3547 11656 : && !types_compatible_p (vectypes[i], vectype_in))
3548 : {
3549 0 : if (dump_enabled_p ())
3550 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3551 : "argument vector types differ.\n");
3552 0 : return false;
3553 : }
3554 : }
3555 : /* If all arguments are external or constant defs, infer the vector type
3556 : from the scalar type. */
3557 15150 : if (!vectype_in)
3558 5554 : vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3559 15150 : if (!cost_vec)
3560 4210 : gcc_assert (vectype_in);
3561 10940 : if (!vectype_in)
3562 : {
3563 1065 : if (dump_enabled_p ())
3564 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3565 : "no vectype for scalar type %T\n", rhs_type);
3566 :
3567 1065 : return false;
3568 : }
3569 :
3570 28170 : if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3571 14085 : != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3572 : {
3573 12 : if (dump_enabled_p ())
3574 12 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3575 : "mixed mask and nonmask vector types\n");
3576 12 : return false;
3577 : }
3578 :
3579 14073 : if (vect_emulated_vector_p (vectype_in)
3580 14073 : || vect_emulated_vector_p (vectype_out))
3581 : {
3582 0 : if (dump_enabled_p ())
3583 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3584 : "use emulated vector type for call\n");
3585 0 : return false;
3586 : }
3587 :
3588 : /* FORNOW */
3589 14073 : nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3590 14073 : nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3591 14073 : if (known_eq (nunits_in * 2, nunits_out))
3592 : modifier = NARROW;
3593 13530 : else if (known_eq (nunits_out, nunits_in))
3594 : modifier = NONE;
3595 45 : else if (known_eq (nunits_out * 2, nunits_in))
3596 : modifier = WIDEN;
3597 : else
3598 : return false;
3599 :
3600 : /* We only handle functions that do not read or clobber memory. */
3601 28146 : if (gimple_vuse (stmt))
3602 : {
3603 1241 : if (dump_enabled_p ())
3604 14 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3605 : "function reads from or writes to memory.\n");
3606 1241 : return false;
3607 : }
3608 :
3609 : /* For now, we only vectorize functions if a target specific builtin
3610 : is available. TODO -- in some cases, it might be profitable to
3611 : insert the calls for pieces of the vector, in order to be able
3612 : to vectorize other operations in the loop. */
3613 12832 : fndecl = NULL_TREE;
3614 12832 : internal_fn ifn = IFN_LAST;
3615 12832 : tree callee = gimple_call_fndecl (stmt);
3616 :
3617 : /* First try using an internal function. */
3618 12832 : code_helper convert_code = MAX_TREE_CODES;
3619 12832 : if (cfn != CFN_LAST
3620 12832 : && (modifier == NONE
3621 168 : || (modifier == NARROW
3622 156 : && simple_integer_narrowing (vectype_out, vectype_in,
3623 : &convert_code))))
3624 11866 : ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3625 : vectype_in);
3626 :
3627 : /* Check if the operation traps. */
3628 12832 : bool could_trap = gimple_could_trap_p (STMT_VINFO_STMT (stmt_info));
3629 12832 : if (could_trap && cost_vec && loop_vinfo)
3630 : {
3631 : /* If the operation can trap it must be conditional, otherwise fail. */
3632 414 : internal_fn cond_fn = (internal_fn_mask_index (ifn) != -1
3633 414 : ? ifn : get_conditional_internal_fn (ifn));
3634 414 : internal_fn cond_len_fn = get_len_internal_fn (cond_fn);
3635 414 : if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
3636 : {
3637 : /* We assume that BB SLP fills all lanes, so no inactive lanes can
3638 : cause issues. */
3639 52 : if ((cond_fn == IFN_LAST
3640 35 : || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3641 : OPTIMIZE_FOR_SPEED))
3642 87 : && (cond_len_fn == IFN_LAST
3643 35 : || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3644 : OPTIMIZE_FOR_SPEED)))
3645 : {
3646 52 : if (dump_enabled_p ())
3647 10 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3648 : "can't use a fully-masked loop because no"
3649 : " conditional operation is available.\n");
3650 52 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3651 : }
3652 : }
3653 : }
3654 :
3655 : /* If that fails, try asking for a target-specific built-in function. */
3656 12832 : if (ifn == IFN_LAST)
3657 : {
3658 6762 : if (cfn != CFN_LAST)
3659 5929 : fndecl = targetm.vectorize.builtin_vectorized_function
3660 5929 : (cfn, vectype_out, vectype_in);
3661 833 : else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3662 24 : fndecl = targetm.vectorize.builtin_md_vectorized_function
3663 24 : (callee, vectype_out, vectype_in);
3664 : }
3665 :
3666 12832 : if (ifn == IFN_LAST && !fndecl)
3667 : {
3668 6468 : if (cfn == CFN_GOMP_SIMD_LANE
3669 3207 : && SLP_TREE_LANES (slp_node) == 1
3670 3207 : && loop_vinfo
3671 3207 : && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3672 3207 : && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3673 12882 : && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3674 3207 : == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3675 : {
3676 : /* We can handle IFN_GOMP_SIMD_LANE by returning a
3677 : { 0, 1, 2, ... vf - 1 } vector. */
3678 3207 : gcc_assert (nargs == 0);
3679 : }
3680 3261 : else if (modifier == NONE
3681 3261 : && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3682 2931 : || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3683 2783 : || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3684 2751 : || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3685 206 : return vectorizable_bswap (vinfo, stmt_info, gsi, slp_node,
3686 206 : slp_op, vectype_in, cost_vec);
3687 : else
3688 : {
3689 3055 : if (dump_enabled_p ())
3690 266 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3691 : "function is not vectorizable.\n");
3692 3055 : return false;
3693 : }
3694 : }
3695 :
3696 9571 : int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
3697 9571 : internal_fn cond_fn = (internal_fn_mask_index (ifn) != -1
3698 9571 : ? ifn : get_conditional_internal_fn (ifn));
3699 9571 : internal_fn cond_len_fn = get_len_internal_fn (cond_fn);
3700 9571 : vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3701 7691 : vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3702 9571 : unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
3703 9571 : if (cost_vec) /* transformation not required. */
3704 : {
3705 16386 : for (i = 0; i < nargs; ++i)
3706 11013 : if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3707 11013 : vectypes[i]
3708 : ? vectypes[i] : vectype_in))
3709 : {
3710 0 : if (dump_enabled_p ())
3711 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3712 : "incompatible vector types for invariants\n");
3713 0 : return false;
3714 : }
3715 5373 : SLP_TREE_TYPE (slp_node) = call_vec_info_type;
3716 5373 : DUMP_VECT_SCOPE ("vectorizable_call");
3717 5373 : vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
3718 :
3719 5373 : if (loop_vinfo
3720 4428 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3721 3070 : && (reduc_idx >= 0 || could_trap || mask_opno >= 0))
3722 : {
3723 1930 : if (reduc_idx >= 0
3724 1438 : && (cond_fn == IFN_LAST
3725 1438 : || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3726 : OPTIMIZE_FOR_SPEED))
3727 1940 : && (cond_len_fn == IFN_LAST
3728 10 : || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3729 : OPTIMIZE_FOR_SPEED)))
3730 : {
3731 10 : if (dump_enabled_p ())
3732 6 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3733 : "can't use a fully-masked loop because no"
3734 : " conditional operation is available.\n");
3735 10 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3736 : }
3737 : else
3738 : {
3739 1920 : tree scalar_mask = NULL_TREE;
3740 1920 : if (mask_opno >= 0)
3741 1920 : scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3742 1920 : if (cond_len_fn != IFN_LAST
3743 1920 : && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3744 : OPTIMIZE_FOR_SPEED))
3745 0 : vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3746 : 1);
3747 : else
3748 1920 : vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3749 : scalar_mask);
3750 : }
3751 : }
3752 5373 : return true;
3753 : }
3754 :
3755 : /* Transform. */
3756 :
3757 4198 : if (dump_enabled_p ())
3758 415 : dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3759 :
3760 : /* Handle def. */
3761 4198 : scalar_dest = gimple_call_lhs (stmt);
3762 4198 : vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3763 :
3764 4198 : bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3765 3263 : bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3766 4198 : unsigned int vect_nargs = nargs;
3767 4198 : if (len_loop_p && (reduc_idx >= 0 || could_trap || mask_opno >= 0))
3768 : {
3769 0 : ifn = cond_len_fn;
3770 : /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3771 0 : vect_nargs += 2;
3772 : /* But unless there's a mask argument already we need that
3773 : as well, and an else value. */
3774 0 : if (mask_opno == -1)
3775 0 : vect_nargs += 2;
3776 : }
3777 4198 : else if (masked_loop_p && mask_opno == -1 && (reduc_idx >= 0 || could_trap))
3778 : {
3779 0 : ifn = cond_fn;
3780 0 : vect_nargs += 2;
3781 : }
3782 4198 : int len_opno = internal_fn_len_index (ifn);
3783 4198 : if (clz_ctz_arg1)
3784 59 : ++vect_nargs;
3785 :
3786 4198 : if (modifier == NONE || ifn != IFN_LAST)
3787 : {
3788 4166 : tree prev_res = NULL_TREE;
3789 4166 : vargs.safe_grow (vect_nargs, true);
3790 4166 : auto_vec<vec<tree> > vec_defs (nargs);
3791 :
3792 : /* Build argument list for the vectorized call. */
3793 4166 : if (cfn == CFN_GOMP_SIMD_LANE)
3794 : {
3795 3308 : for (i = 0; i < nvectors; ++i)
3796 : {
3797 : /* ??? For multi-lane SLP we'd need to build
3798 : { 0, 0, .., 1, 1, ... }. */
3799 1708 : tree cst = build_index_vector (vectype_out,
3800 : i * nunits_out, 1);
3801 1708 : tree new_var
3802 1708 : = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3803 1708 : gimple *init_stmt = gimple_build_assign (new_var, cst);
3804 1708 : vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3805 1708 : new_temp = make_ssa_name (vec_dest);
3806 1708 : gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3807 1708 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3808 1708 : slp_node->push_vec_def (new_stmt);
3809 : }
3810 : }
3811 : else
3812 : {
3813 2566 : vec<tree> vec_oprnds0;
3814 2566 : vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3815 2566 : vec_oprnds0 = vec_defs[0];
3816 :
3817 : /* Arguments are ready. Create the new vector stmt. */
3818 5279 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3819 : {
3820 2713 : int varg = 0;
3821 : /* Add the mask if necessary. */
3822 38 : if ((masked_loop_p || len_loop_p) && mask_opno == -1
3823 2715 : && internal_fn_mask_index (ifn) != -1)
3824 : {
3825 0 : gcc_assert (internal_fn_mask_index (ifn) == varg);
3826 0 : if (masked_loop_p)
3827 : {
3828 0 : unsigned int vec_num = vec_oprnds0.length ();
3829 0 : vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi,
3830 : masks, vec_num,
3831 : vectype_out, i);
3832 : }
3833 : else
3834 : {
3835 0 : tree mask_vectype = truth_type_for (vectype_out);
3836 0 : vargs[varg++] = vect_build_all_ones_mask (loop_vinfo,
3837 : stmt_info,
3838 : mask_vectype);
3839 : }
3840 : }
3841 : size_t k;
3842 9932 : for (k = 0; k < nargs; k++)
3843 : {
3844 7219 : vec<tree> vec_oprndsk = vec_defs[k];
3845 7219 : vargs[varg++] = vec_oprndsk[i];
3846 : }
3847 : /* Add the else value if necessary. */
3848 38 : if ((masked_loop_p || len_loop_p) && mask_opno == -1
3849 2715 : && internal_fn_else_index (ifn) != -1)
3850 : {
3851 0 : gcc_assert (internal_fn_else_index (ifn) == varg);
3852 0 : if (reduc_idx >= 0)
3853 0 : vargs[varg++] = vargs[reduc_idx + 1];
3854 : else
3855 : {
3856 0 : auto else_value = targetm.preferred_else_value
3857 0 : (ifn, vectype_out, varg - 1, &vargs[1]);
3858 0 : vargs[varg++] = else_value;
3859 : }
3860 : }
3861 2713 : if (clz_ctz_arg1)
3862 59 : vargs[varg++] = clz_ctz_arg1;
3863 :
3864 2713 : gimple *new_stmt;
3865 2713 : if (modifier == NARROW)
3866 : {
3867 : /* We don't define any narrowing conditional functions
3868 : at present. */
3869 0 : gcc_assert (mask_opno < 0);
3870 0 : tree half_res = make_ssa_name (vectype_in);
3871 0 : gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3872 0 : gimple_call_set_lhs (call, half_res);
3873 0 : gimple_call_set_nothrow (call, true);
3874 0 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3875 0 : if ((i & 1) == 0)
3876 : {
3877 0 : prev_res = half_res;
3878 0 : continue;
3879 : }
3880 0 : new_temp = make_ssa_name (vec_dest);
3881 0 : new_stmt = vect_gimple_build (new_temp, convert_code,
3882 : prev_res, half_res);
3883 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3884 : }
3885 : else
3886 : {
3887 2713 : if (len_opno >= 0 && len_loop_p)
3888 : {
3889 0 : unsigned int vec_num = vec_oprnds0.length ();
3890 0 : tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
3891 : vec_num, vectype_out, i, 1, true);
3892 0 : signed char biasval
3893 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3894 0 : tree bias = build_int_cst (intQI_type_node, biasval);
3895 0 : vargs[len_opno] = len;
3896 0 : vargs[len_opno + 1] = bias;
3897 : }
3898 2713 : else if (mask_opno >= 0 && masked_loop_p)
3899 : {
3900 36 : unsigned int vec_num = vec_oprnds0.length ();
3901 36 : tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
3902 : vec_num, vectype_out, i);
3903 36 : vargs[mask_opno]
3904 72 : = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3905 36 : vargs[mask_opno], gsi);
3906 : }
3907 :
3908 2713 : gcall *call;
3909 2713 : if (ifn != IFN_LAST)
3910 2632 : call = gimple_build_call_internal_vec (ifn, vargs);
3911 : else
3912 81 : call = gimple_build_call_vec (fndecl, vargs);
3913 2713 : new_temp = make_ssa_name (vec_dest, call);
3914 2713 : gimple_call_set_lhs (call, new_temp);
3915 2713 : gimple_call_set_nothrow (call, true);
3916 2713 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3917 2713 : new_stmt = call;
3918 : }
3919 2713 : slp_node->push_vec_def (new_stmt);
3920 : }
3921 : }
3922 :
3923 11035 : for (i = 0; i < nargs; i++)
3924 : {
3925 6869 : vec<tree> vec_oprndsi = vec_defs[i];
3926 6869 : vec_oprndsi.release ();
3927 : }
3928 4166 : }
3929 32 : else if (modifier == NARROW)
3930 : {
3931 32 : auto_vec<vec<tree> > vec_defs (nargs);
3932 : /* We don't define any narrowing conditional functions at present. */
3933 32 : gcc_assert (mask_opno < 0);
3934 :
3935 : /* Build argument list for the vectorized call. */
3936 32 : vargs.create (nargs * 2);
3937 :
3938 32 : vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3939 32 : vec<tree> vec_oprnds0 = vec_defs[0];
3940 :
3941 : /* Arguments are ready. Create the new vector stmt. */
3942 64 : for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3943 : {
3944 32 : size_t k;
3945 32 : vargs.truncate (0);
3946 64 : for (k = 0; k < nargs; k++)
3947 : {
3948 32 : vec<tree> vec_oprndsk = vec_defs[k];
3949 32 : vargs.quick_push (vec_oprndsk[i]);
3950 32 : vargs.quick_push (vec_oprndsk[i + 1]);
3951 : }
3952 32 : gcall *call;
3953 32 : if (ifn != IFN_LAST)
3954 : call = gimple_build_call_internal_vec (ifn, vargs);
3955 : else
3956 32 : call = gimple_build_call_vec (fndecl, vargs);
3957 32 : new_temp = make_ssa_name (vec_dest, call);
3958 32 : gimple_call_set_lhs (call, new_temp);
3959 32 : gimple_call_set_nothrow (call, true);
3960 32 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3961 32 : slp_node->push_vec_def (call);
3962 : }
3963 :
3964 64 : for (i = 0; i < nargs; i++)
3965 : {
3966 32 : vec<tree> vec_oprndsi = vec_defs[i];
3967 32 : vec_oprndsi.release ();
3968 : }
3969 32 : }
3970 : else
3971 : /* No current target implements this case. */
3972 : return false;
3973 :
3974 4198 : vargs.release ();
3975 :
3976 4198 : return true;
3977 2348636 : }
3978 :
3979 :
3980 : struct simd_call_arg_info
3981 : {
3982 : tree vectype;
3983 : tree op;
3984 : HOST_WIDE_INT linear_step;
3985 : enum vect_def_type dt;
3986 : unsigned int align;
3987 : bool simd_lane_linear;
3988 : };
3989 :
3990 : /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3991 : is linear within simd lane (but not within whole loop), note it in
3992 : *ARGINFO. */
3993 :
3994 : static void
3995 15 : vect_simd_lane_linear (tree op, class loop *loop,
3996 : struct simd_call_arg_info *arginfo)
3997 : {
3998 15 : gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3999 :
4000 15 : if (!is_gimple_assign (def_stmt)
4001 15 : || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
4002 27 : || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
4003 3 : return;
4004 :
4005 12 : tree base = gimple_assign_rhs1 (def_stmt);
4006 12 : HOST_WIDE_INT linear_step = 0;
4007 12 : tree v = gimple_assign_rhs2 (def_stmt);
4008 48 : while (TREE_CODE (v) == SSA_NAME)
4009 : {
4010 36 : tree t;
4011 36 : def_stmt = SSA_NAME_DEF_STMT (v);
4012 36 : if (is_gimple_assign (def_stmt))
4013 24 : switch (gimple_assign_rhs_code (def_stmt))
4014 : {
4015 0 : case PLUS_EXPR:
4016 0 : t = gimple_assign_rhs2 (def_stmt);
4017 0 : if (linear_step || TREE_CODE (t) != INTEGER_CST)
4018 : return;
4019 0 : base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
4020 0 : v = gimple_assign_rhs1 (def_stmt);
4021 0 : continue;
4022 12 : case MULT_EXPR:
4023 12 : t = gimple_assign_rhs2 (def_stmt);
4024 12 : if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
4025 0 : return;
4026 12 : linear_step = tree_to_shwi (t);
4027 12 : v = gimple_assign_rhs1 (def_stmt);
4028 12 : continue;
4029 12 : CASE_CONVERT:
4030 12 : t = gimple_assign_rhs1 (def_stmt);
4031 12 : if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
4032 12 : || (TYPE_PRECISION (TREE_TYPE (v))
4033 12 : < TYPE_PRECISION (TREE_TYPE (t))))
4034 : return;
4035 12 : if (!linear_step)
4036 0 : linear_step = 1;
4037 12 : v = t;
4038 12 : continue;
4039 : default:
4040 : return;
4041 : }
4042 12 : else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
4043 12 : && loop->simduid
4044 12 : && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
4045 24 : && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
4046 : == loop->simduid))
4047 : {
4048 12 : if (!linear_step)
4049 0 : linear_step = 1;
4050 12 : arginfo->linear_step = linear_step;
4051 12 : arginfo->op = base;
4052 12 : arginfo->simd_lane_linear = true;
4053 12 : return;
4054 : }
4055 : }
4056 : }
4057 :
4058 : /* Function vectorizable_simd_clone_call.
4059 :
4060 : Check if STMT_INFO performs a function call that can be vectorized
4061 : by calling a simd clone of the function.
4062 : If COST_VEC is passed, calculate costs but don't change anything,
4063 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
4064 : it, and insert it at GSI.
4065 : Return true if STMT_INFO is vectorizable in this way. */
4066 :
4067 : static bool
4068 2339270 : vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
4069 : gimple_stmt_iterator *gsi,
4070 : slp_tree slp_node,
4071 : stmt_vector_for_cost *cost_vec)
4072 : {
4073 2339270 : tree vec_dest;
4074 2339270 : tree scalar_dest;
4075 2339270 : tree vec_oprnd0 = NULL_TREE;
4076 2339270 : tree vectype;
4077 2339270 : poly_uint64 nunits;
4078 2339270 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4079 2339270 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4080 2339270 : class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4081 2339270 : tree fndecl, new_temp;
4082 2339270 : int j;
4083 2339270 : auto_vec<simd_call_arg_info> arginfo;
4084 2339270 : vec<tree> vargs = vNULL;
4085 2339270 : size_t i, nargs;
4086 2339270 : tree rtype, ratype;
4087 2339270 : vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4088 2339270 : int masked_call_offset = 0;
4089 :
4090 : /* Is STMT a vectorizable call? */
4091 2339270 : gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4092 10894 : if (!stmt)
4093 : return false;
4094 :
4095 10894 : fndecl = gimple_call_fndecl (stmt);
4096 10894 : if (fndecl == NULL_TREE
4097 10894 : && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4098 : {
4099 220 : fndecl = gimple_call_arg (stmt, 0);
4100 220 : gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4101 220 : fndecl = TREE_OPERAND (fndecl, 0);
4102 220 : gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4103 : masked_call_offset = 1;
4104 : }
4105 10674 : if (fndecl == NULL_TREE)
4106 : return false;
4107 :
4108 4655 : struct cgraph_node *node = cgraph_node::get (fndecl);
4109 4655 : if (node == NULL || node->simd_clones == NULL)
4110 : return false;
4111 :
4112 1454 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4113 : return false;
4114 :
4115 1454 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4116 0 : && cost_vec)
4117 : return false;
4118 :
4119 1454 : if (gimple_call_lhs (stmt)
4120 1454 : && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4121 : return false;
4122 :
4123 1454 : gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4124 :
4125 1454 : vectype = SLP_TREE_VECTYPE (slp_node);
4126 :
4127 2339334 : if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4128 : return false;
4129 :
4130 : /* Process function arguments. */
4131 1454 : nargs = gimple_call_num_args (stmt) - masked_call_offset;
4132 :
4133 : /* Bail out if the function has zero arguments. */
4134 1454 : if (nargs == 0)
4135 : return false;
4136 :
4137 1390 : vect_simd_clone_data _data;
4138 1390 : vect_simd_clone_data &data = slp_node->get_data (_data);
4139 1390 : vec<tree>& simd_clone_info = data.simd_clone_info;
4140 1390 : arginfo.reserve (nargs, true);
4141 1390 : auto_vec<slp_tree> slp_op;
4142 1390 : slp_op.safe_grow_cleared (nargs);
4143 :
4144 3996 : for (i = 0; i < nargs; i++)
4145 : {
4146 2606 : simd_call_arg_info thisarginfo;
4147 2606 : affine_iv iv;
4148 2606 : tree op;
4149 :
4150 2606 : thisarginfo.linear_step = 0;
4151 2606 : thisarginfo.align = 0;
4152 2606 : thisarginfo.op = NULL_TREE;
4153 2606 : thisarginfo.simd_lane_linear = false;
4154 :
4155 5212 : int op_no = vect_slp_child_index_for_operand (stmt,
4156 2606 : i + masked_call_offset,
4157 : false);
4158 5212 : if (!vect_is_simple_use (vinfo, slp_node,
4159 2606 : op_no, &op, &slp_op[i],
4160 : &thisarginfo.dt, &thisarginfo.vectype)
4161 2606 : || thisarginfo.dt == vect_uninitialized_def)
4162 : {
4163 0 : if (dump_enabled_p ())
4164 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4165 : "use not simple.\n");
4166 0 : return false;
4167 : }
4168 :
4169 2606 : if (thisarginfo.dt == vect_constant_def
4170 2606 : || thisarginfo.dt == vect_external_def)
4171 : {
4172 : /* With SLP we determine the vector type of constants/externals
4173 : at analysis time, handling conflicts via
4174 : vect_maybe_update_slp_op_vectype. At transform time
4175 : we have a vector type recorded for SLP. */
4176 699 : gcc_assert (cost_vec
4177 : || thisarginfo.vectype != NULL_TREE);
4178 : if (cost_vec)
4179 568 : thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
4180 568 : TREE_TYPE (op),
4181 : slp_node);
4182 : }
4183 : else
4184 1907 : gcc_assert (thisarginfo.vectype != NULL_TREE);
4185 :
4186 : /* For linear arguments, the analyze phase should have saved
4187 : the base and step. */
4188 2475 : if (!cost_vec
4189 1584 : && i * 3 + 4 <= simd_clone_info.length ()
4190 2685 : && simd_clone_info[i * 3 + 2])
4191 : {
4192 118 : thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
4193 118 : thisarginfo.op = simd_clone_info[i * 3 + 1];
4194 118 : thisarginfo.simd_lane_linear
4195 118 : = (simd_clone_info[i * 3 + 3] == boolean_true_node);
4196 : /* If loop has been peeled for alignment, we need to adjust it. */
4197 118 : tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4198 118 : tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4199 118 : if (n1 != n2 && !thisarginfo.simd_lane_linear)
4200 : {
4201 0 : tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4202 0 : tree step = simd_clone_info[i * 3 + 2];
4203 0 : tree opt = TREE_TYPE (thisarginfo.op);
4204 0 : bias = fold_convert (TREE_TYPE (step), bias);
4205 0 : bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4206 0 : thisarginfo.op
4207 0 : = fold_build2 (POINTER_TYPE_P (opt)
4208 : ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4209 : thisarginfo.op, bias);
4210 : }
4211 : }
4212 2488 : else if (cost_vec
4213 1814 : && thisarginfo.dt != vect_constant_def
4214 1692 : && thisarginfo.dt != vect_external_def
4215 1246 : && loop_vinfo
4216 1241 : && SLP_TREE_LANES (slp_node) == 1
4217 1217 : && TREE_CODE (op) == SSA_NAME
4218 2434 : && simple_iv (loop, loop_containing_stmt (stmt), op,
4219 : &iv, false)
4220 2694 : && tree_fits_shwi_p (iv.step))
4221 : {
4222 206 : thisarginfo.linear_step = tree_to_shwi (iv.step);
4223 206 : thisarginfo.op = iv.base;
4224 : }
4225 2282 : else if ((thisarginfo.dt == vect_constant_def
4226 2282 : || thisarginfo.dt == vect_external_def)
4227 699 : && SLP_TREE_LANES (slp_node) == 1
4228 2583 : && POINTER_TYPE_P (TREE_TYPE (op)))
4229 86 : thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4230 : /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4231 : linear too. */
4232 2606 : if (SLP_TREE_LANES (slp_node) == 1
4233 2162 : && POINTER_TYPE_P (TREE_TYPE (op))
4234 196 : && !thisarginfo.linear_step
4235 112 : && cost_vec
4236 58 : && thisarginfo.dt != vect_constant_def
4237 58 : && thisarginfo.dt != vect_external_def
4238 15 : && loop_vinfo
4239 2621 : && TREE_CODE (op) == SSA_NAME)
4240 15 : vect_simd_lane_linear (op, loop, &thisarginfo);
4241 :
4242 2606 : if (!vectype)
4243 12 : vectype = thisarginfo.vectype;
4244 2606 : arginfo.quick_push (thisarginfo);
4245 : }
4246 :
4247 1390 : poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4248 1390 : unsigned group_size = SLP_TREE_LANES (slp_node);
4249 1390 : unsigned int badness = 0;
4250 1390 : unsigned int badness_inbranch = 0;
4251 1390 : struct cgraph_node *bestn = NULL;
4252 1390 : struct cgraph_node *bestn_inbranch = NULL;
4253 1390 : if (!cost_vec)
4254 357 : bestn = ((loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4255 357 : ? data.clone_inbranch : data.clone);
4256 : else
4257 5979 : for (struct cgraph_node *n = node->simd_clones; n != NULL;
4258 4946 : n = n->simdclone->next_clone)
4259 : {
4260 4946 : unsigned int this_badness = 0;
4261 4946 : unsigned int num_calls;
4262 : /* The number of arguments in the call and the number of parameters in
4263 : the simdclone should match. However, when the simdclone is
4264 : 'inbranch', it could have one more paramater than nargs when using
4265 : an inbranch simdclone to call a non-inbranch call, either in a
4266 : non-masked loop using a all true constant mask, or inside a masked
4267 : loop using it's mask. */
4268 4946 : size_t simd_nargs = n->simdclone->nargs;
4269 4946 : if (!masked_call_offset && n->simdclone->inbranch)
4270 2333 : simd_nargs--;
4271 4946 : if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4272 : &num_calls)
4273 1952 : || (!n->simdclone->inbranch && (masked_call_offset > 0))
4274 1768 : || (nargs != simd_nargs))
4275 3178 : continue;
4276 1768 : if (num_calls != 1)
4277 1136 : this_badness += floor_log2 (num_calls) * 4096;
4278 1768 : if (n->simdclone->inbranch)
4279 764 : this_badness += 8192;
4280 :
4281 : /* If SLP_TREE_VECTYPE has not been set yet pass the general vector
4282 : mode, which for targets that use it will determine what ISA we can
4283 : vectorize this code with. */
4284 1768 : machine_mode vector_mode = vinfo->vector_mode;
4285 1768 : if (vectype)
4286 1768 : vector_mode = TYPE_MODE (vectype);
4287 1768 : int target_badness = targetm.simd_clone.usable (n, vector_mode);
4288 1768 : if (target_badness < 0)
4289 368 : continue;
4290 1400 : this_badness += target_badness * 512;
4291 4146 : for (i = 0; i < nargs; i++)
4292 : {
4293 2994 : switch (n->simdclone->args[i].arg_type)
4294 : {
4295 2064 : case SIMD_CLONE_ARG_TYPE_VECTOR:
4296 2064 : if (VECTOR_BOOLEAN_TYPE_P (n->simdclone->args[i].vector_type))
4297 : /* Vector mask arguments are not supported. */
4298 : i = -1;
4299 2056 : else if (!useless_type_conversion_p
4300 2056 : (n->simdclone->args[i].orig_type,
4301 2056 : TREE_TYPE (gimple_call_arg (stmt,
4302 : i + masked_call_offset))))
4303 : i = -1;
4304 2056 : else if (arginfo[i].dt == vect_constant_def
4305 1951 : || arginfo[i].dt == vect_external_def
4306 3943 : || arginfo[i].linear_step)
4307 392 : this_badness += 64;
4308 : break;
4309 310 : case SIMD_CLONE_ARG_TYPE_UNIFORM:
4310 310 : if ((arginfo[i].dt != vect_constant_def
4311 145 : && arginfo[i].dt != vect_external_def)
4312 410 : || SLP_TREE_LANES (slp_node) != 1)
4313 : i = -1;
4314 : break;
4315 324 : case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4316 324 : case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4317 324 : if (arginfo[i].dt == vect_constant_def
4318 324 : || arginfo[i].dt == vect_external_def
4319 324 : || (arginfo[i].linear_step
4320 324 : != n->simdclone->args[i].linear_step))
4321 : i = -1;
4322 : break;
4323 : case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4324 : case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4325 : case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4326 : case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4327 : case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4328 : case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4329 : /* FORNOW */
4330 : i = -1;
4331 : break;
4332 296 : case SIMD_CLONE_ARG_TYPE_MASK:
4333 296 : if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4334 264 : && n->simdclone->mask_mode != VOIDmode)
4335 : i = -1;
4336 : /* While we can create a traditional data vector from
4337 : an incoming integer mode mask we have no good way to
4338 : force generate an integer mode mask from a traditional
4339 : boolean vector input. */
4340 296 : else if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4341 296 : && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4342 : i = -1;
4343 290 : else if (n->simdclone->mask_mode == VOIDmode
4344 : /* FORNOW we only have partial support for vector-type
4345 : masks that can't hold all of simdlen. */
4346 554 : && (maybe_ne (TYPE_VECTOR_SUBPARTS (n->simdclone->args[i].vector_type),
4347 264 : TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
4348 : /* Verify we can compute the mask argument. */
4349 111 : || !expand_vec_cond_expr_p (n->simdclone->args[i].vector_type,
4350 111 : arginfo[i].vectype)))
4351 : i = -1;
4352 125 : else if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4353 : /* FORNOW we only have partial support for
4354 : integer-type masks that represent the same number
4355 : of lanes as the vectorized mask inputs. */
4356 151 : && maybe_ne (exact_div (n->simdclone->simdlen,
4357 : n->simdclone->args[i].linear_step),
4358 26 : TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4359 : i = -1;
4360 107 : else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4361 107 : && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4362 8 : this_badness += 2048;
4363 : break;
4364 : }
4365 183 : if (i == (size_t) -1)
4366 : break;
4367 2746 : if (n->simdclone->args[i].alignment > arginfo[i].align)
4368 : {
4369 : i = -1;
4370 : break;
4371 : }
4372 2746 : if (arginfo[i].align)
4373 110 : this_badness += (exact_log2 (arginfo[i].align)
4374 160 : - exact_log2 (n->simdclone->args[i].alignment));
4375 : }
4376 1400 : if (i == (size_t) -1)
4377 248 : continue;
4378 1152 : if (masked_call_offset == 0
4379 1045 : && n->simdclone->inbranch
4380 340 : && n->simdclone->nargs > nargs)
4381 : {
4382 340 : gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4383 : SIMD_CLONE_ARG_TYPE_MASK);
4384 : /* Penalize using a masked SIMD clone in a non-masked loop, that is
4385 : not in a branch, as we'd have to construct an all-true mask. */
4386 340 : this_badness += 64;
4387 : }
4388 1152 : if (bestn == NULL || this_badness < badness)
4389 : {
4390 797 : bestn = n;
4391 797 : badness = this_badness;
4392 : }
4393 1152 : if (n->simdclone->inbranch
4394 447 : && (bestn_inbranch == NULL || this_badness < badness_inbranch))
4395 : {
4396 4946 : bestn_inbranch = n;
4397 4946 : badness_inbranch = this_badness;
4398 : }
4399 : }
4400 :
4401 1390 : if (bestn == NULL)
4402 : return false;
4403 :
4404 809 : fndecl = bestn->decl;
4405 809 : nunits = bestn->simdclone->simdlen;
4406 809 : int ncopies = vector_unroll_factor (vf * group_size, nunits);
4407 :
4408 : /* If the function isn't const, only allow it in simd loops where user
4409 : has asserted that at least nunits consecutive iterations can be
4410 : performed using SIMD instructions. */
4411 804 : if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4412 966 : && gimple_vuse (stmt))
4413 : return false;
4414 :
4415 : /* ncopies is the number of SIMD clone calls we create, since simdlen
4416 : is not necessarily matching nunits of the vector types used, track
4417 : that in ncopies_in. */
4418 809 : int ncopies_in = vect_get_num_vectors (vf * group_size, vectype);
4419 :
4420 : /* Sanity check: make sure that at least one copy of the vectorized stmt
4421 : needs to be generated. */
4422 809 : gcc_assert (ncopies >= 1);
4423 :
4424 809 : if (cost_vec) /* transformation not required. */
4425 : {
4426 1483 : for (unsigned i = 0; i < nargs; ++i)
4427 1031 : if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4428 : {
4429 0 : if (dump_enabled_p ())
4430 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4431 : "incompatible vector types for invariants\n");
4432 0 : return false;
4433 : }
4434 :
4435 452 : if (!bestn_inbranch && loop_vinfo)
4436 : {
4437 237 : if (dump_enabled_p ()
4438 237 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4439 171 : dump_printf_loc (MSG_NOTE, vect_location,
4440 : "can't use a fully-masked loop because no"
4441 : " masked simd clone was available.\n");
4442 237 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4443 : }
4444 :
4445 : /* When the original call is pure or const but the SIMD ABI dictates
4446 : an aggregate return we will have to use a virtual definition and
4447 : in a loop eventually even need to add a virtual PHI. That's
4448 : not straight-forward so allow to fix this up via renaming. */
4449 452 : if (gimple_call_lhs (stmt)
4450 446 : && !gimple_vdef (stmt)
4451 802 : && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4452 27 : vinfo->any_known_not_updated_vssa = true;
4453 : /* ??? For SLP code-gen we end up inserting after the last
4454 : vector argument def rather than at the original call position
4455 : so automagic virtual operand updating doesn't work. */
4456 904 : if (gimple_vuse (stmt))
4457 139 : vinfo->any_known_not_updated_vssa = true;
4458 :
4459 452 : data.clone = bestn;
4460 452 : data.clone_inbranch = bestn_inbranch;
4461 :
4462 452 : simd_clone_info.safe_push (NULL_TREE);
4463 1628 : for (i = 0;
4464 2445 : i < (bestn_inbranch ? bestn_inbranch : bestn)->simdclone->nargs; i++)
4465 : {
4466 1176 : if (loop_vinfo
4467 1170 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
4468 473 : && (bestn_inbranch->simdclone->args[i].arg_type
4469 : == SIMD_CLONE_ARG_TYPE_MASK))
4470 : {
4471 170 : if (masked_call_offset)
4472 : /* When there is an explicit mask we require the
4473 : number of elements to match up. */
4474 49 : vect_record_loop_mask (loop_vinfo,
4475 : &LOOP_VINFO_MASKS (loop_vinfo),
4476 : ncopies_in, vectype, NULL_TREE);
4477 : else
4478 : {
4479 : /* When there is no explicit mask on the call we have
4480 : more relaxed requirements. */
4481 121 : tree masktype;
4482 121 : poly_uint64 callee_nelements;
4483 121 : if (SCALAR_INT_MODE_P (bestn_inbranch->simdclone->mask_mode))
4484 : {
4485 12 : callee_nelements
4486 12 : = exact_div (bestn_inbranch->simdclone->simdlen,
4487 : bestn_inbranch->simdclone->args[i].linear_step);
4488 12 : masktype = get_related_vectype_for_scalar_type
4489 12 : (vinfo->vector_mode, TREE_TYPE (vectype),
4490 : callee_nelements);
4491 : }
4492 : else
4493 : {
4494 109 : masktype = bestn_inbranch->simdclone->args[i].vector_type;
4495 : /* The aarch64 port will add custom attributes to types
4496 : for SVE simdclones which make the types different. We
4497 : should use canonincal types for masks within the
4498 : vectorizer, hence we construct the related vectype
4499 : here. */
4500 109 : masktype
4501 : = build_truth_vector_type_for_mode
4502 109 : (TYPE_VECTOR_SUBPARTS (masktype),
4503 109 : TYPE_MODE (masktype));
4504 109 : callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4505 : }
4506 121 : auto o = vector_unroll_factor (nunits, callee_nelements);
4507 121 : vect_record_loop_mask (loop_vinfo,
4508 : &LOOP_VINFO_MASKS (loop_vinfo),
4509 : ncopies * o, masktype, NULL_TREE);
4510 : }
4511 : }
4512 1006 : else if ((bestn->simdclone->args[i].arg_type
4513 : == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4514 899 : || (bestn->simdclone->args[i].arg_type
4515 : == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)
4516 888 : || (bestn_inbranch
4517 359 : && ((bestn_inbranch->simdclone->args[i].arg_type
4518 : == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4519 359 : || (bestn_inbranch->simdclone->args[i].arg_type
4520 : == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))))
4521 : {
4522 118 : simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
4523 118 : simd_clone_info.safe_push (arginfo[i].op);
4524 202 : tree lst = (POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4525 202 : ? size_type_node : TREE_TYPE (arginfo[i].op));
4526 118 : tree ls = build_int_cst (lst, arginfo[i].linear_step);
4527 118 : simd_clone_info.safe_push (ls);
4528 118 : tree sll = (arginfo[i].simd_lane_linear
4529 118 : ? boolean_true_node : boolean_false_node);
4530 118 : simd_clone_info.safe_push (sll);
4531 : }
4532 : }
4533 :
4534 452 : SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type;
4535 452 : slp_node->data = new vect_simd_clone_data (std::move (_data));
4536 452 : DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4537 : /* vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); */
4538 452 : return true;
4539 : }
4540 :
4541 : /* Transform. */
4542 :
4543 357 : if (dump_enabled_p ())
4544 246 : dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4545 :
4546 : /* Handle def. */
4547 357 : scalar_dest = gimple_call_lhs (stmt);
4548 357 : vec_dest = NULL_TREE;
4549 357 : rtype = NULL_TREE;
4550 357 : ratype = NULL_TREE;
4551 357 : if (scalar_dest)
4552 : {
4553 351 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
4554 351 : rtype = TREE_TYPE (TREE_TYPE (fndecl));
4555 351 : if (TREE_CODE (rtype) == ARRAY_TYPE)
4556 : {
4557 9 : ratype = rtype;
4558 9 : rtype = TREE_TYPE (ratype);
4559 : }
4560 : }
4561 :
4562 714 : auto_vec<vec<tree> > vec_oprnds;
4563 357 : auto_vec<unsigned> vec_oprnds_i;
4564 357 : vec_oprnds_i.safe_grow_cleared (nargs, true);
4565 357 : vec_oprnds.reserve_exact (nargs);
4566 357 : vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4567 823 : for (j = 0; j < ncopies; ++j)
4568 : {
4569 466 : poly_uint64 callee_nelements;
4570 466 : poly_uint64 caller_nelements;
4571 : /* Build argument list for the vectorized call. */
4572 466 : if (j == 0)
4573 357 : vargs.create (nargs);
4574 : else
4575 109 : vargs.truncate (0);
4576 :
4577 1570 : for (i = 0; i < nargs; i++)
4578 : {
4579 1104 : unsigned int k, l, m, o;
4580 1104 : tree atype;
4581 1104 : tree op = gimple_call_arg (stmt, i + masked_call_offset);
4582 1104 : switch (bestn->simdclone->args[i].arg_type)
4583 : {
4584 815 : case SIMD_CLONE_ARG_TYPE_VECTOR:
4585 815 : atype = bestn->simdclone->args[i].vector_type;
4586 815 : caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4587 815 : callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4588 815 : o = vector_unroll_factor (nunits, callee_nelements);
4589 1860 : for (m = j * o; m < (j + 1) * o; m++)
4590 : {
4591 1045 : if (known_lt (callee_nelements, caller_nelements))
4592 : {
4593 516 : poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4594 258 : if (!constant_multiple_p (caller_nelements,
4595 : callee_nelements, &k))
4596 0 : gcc_unreachable ();
4597 :
4598 258 : gcc_assert ((k & (k - 1)) == 0);
4599 258 : if (m == 0)
4600 : {
4601 57 : vec_oprnds_i[i] = 0;
4602 57 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4603 : }
4604 : else
4605 : {
4606 201 : vec_oprnd0 = arginfo[i].op;
4607 201 : if ((m & (k - 1)) == 0)
4608 72 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4609 : }
4610 258 : arginfo[i].op = vec_oprnd0;
4611 258 : vec_oprnd0
4612 258 : = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4613 258 : bitsize_int (prec),
4614 258 : bitsize_int ((m & (k - 1)) * prec));
4615 258 : gassign *new_stmt
4616 258 : = gimple_build_assign (make_ssa_name (atype),
4617 : vec_oprnd0);
4618 258 : vect_finish_stmt_generation (vinfo, stmt_info,
4619 : new_stmt, gsi);
4620 258 : vargs.safe_push (gimple_assign_lhs (new_stmt));
4621 : }
4622 : else
4623 : {
4624 787 : if (!constant_multiple_p (callee_nelements,
4625 : caller_nelements, &k))
4626 0 : gcc_unreachable ();
4627 787 : gcc_assert ((k & (k - 1)) == 0);
4628 787 : vec<constructor_elt, va_gc> *ctor_elts;
4629 787 : if (k != 1)
4630 14 : vec_alloc (ctor_elts, k);
4631 : else
4632 773 : ctor_elts = NULL;
4633 815 : for (l = 0; l < k; l++)
4634 : {
4635 801 : if (m == 0 && l == 0)
4636 : {
4637 449 : vec_oprnds_i[i] = 0;
4638 449 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4639 : }
4640 : else
4641 352 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4642 801 : arginfo[i].op = vec_oprnd0;
4643 801 : if (k == 1)
4644 : break;
4645 28 : CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4646 : vec_oprnd0);
4647 : }
4648 787 : if (k == 1)
4649 773 : if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4650 : atype))
4651 : {
4652 0 : vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4653 : vec_oprnd0);
4654 0 : gassign *new_stmt
4655 0 : = gimple_build_assign (make_ssa_name (atype),
4656 : vec_oprnd0);
4657 0 : vect_finish_stmt_generation (vinfo, stmt_info,
4658 : new_stmt, gsi);
4659 0 : vargs.safe_push (gimple_get_lhs (new_stmt));
4660 : }
4661 : else
4662 773 : vargs.safe_push (vec_oprnd0);
4663 : else
4664 : {
4665 14 : vec_oprnd0 = build_constructor (atype, ctor_elts);
4666 14 : gassign *new_stmt
4667 14 : = gimple_build_assign (make_ssa_name (atype),
4668 : vec_oprnd0);
4669 14 : vect_finish_stmt_generation (vinfo, stmt_info,
4670 : new_stmt, gsi);
4671 14 : vargs.safe_push (gimple_assign_lhs (new_stmt));
4672 : }
4673 : }
4674 : }
4675 : break;
4676 66 : case SIMD_CLONE_ARG_TYPE_MASK:
4677 66 : if (bestn->simdclone->mask_mode == VOIDmode)
4678 : {
4679 60 : atype = bestn->simdclone->args[i].vector_type;
4680 60 : tree elt_type = TREE_TYPE (atype);
4681 60 : tree one = fold_convert (elt_type, integer_one_node);
4682 60 : tree zero = fold_convert (elt_type, integer_zero_node);
4683 60 : callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4684 60 : caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4685 60 : o = vector_unroll_factor (nunits, callee_nelements);
4686 120 : for (m = j * o; m < (j + 1) * o; m++)
4687 : {
4688 60 : if (maybe_lt (callee_nelements, caller_nelements))
4689 : {
4690 : /* The mask type has fewer elements than simdlen. */
4691 :
4692 : /* FORNOW */
4693 0 : gcc_unreachable ();
4694 : }
4695 60 : else if (known_eq (callee_nelements, caller_nelements))
4696 : {
4697 : /* The SIMD clone function has the same number of
4698 : elements as the current function. */
4699 60 : if (m == 0)
4700 60 : vec_oprnds_i[i] = 0;
4701 60 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4702 60 : if (loop_vinfo
4703 60 : && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4704 : {
4705 0 : vec_loop_masks *loop_masks
4706 : = &LOOP_VINFO_MASKS (loop_vinfo);
4707 0 : tree loop_mask
4708 0 : = vect_get_loop_mask (loop_vinfo, gsi,
4709 : loop_masks, ncopies_in,
4710 0 : vectype, j);
4711 0 : vec_oprnd0
4712 0 : = prepare_vec_mask (loop_vinfo,
4713 0 : TREE_TYPE (loop_mask),
4714 : loop_mask, vec_oprnd0,
4715 : gsi);
4716 0 : loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4717 : loop_mask });
4718 :
4719 : }
4720 60 : vec_oprnd0
4721 60 : = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4722 : build_vector_from_val (atype, one),
4723 : build_vector_from_val (atype, zero));
4724 60 : gassign *new_stmt
4725 60 : = gimple_build_assign (make_ssa_name (atype),
4726 : vec_oprnd0);
4727 60 : vect_finish_stmt_generation (vinfo, stmt_info,
4728 : new_stmt, gsi);
4729 60 : vargs.safe_push (gimple_assign_lhs (new_stmt));
4730 : }
4731 : else
4732 : {
4733 : /* The mask type has more elements than simdlen. */
4734 :
4735 : /* FORNOW */
4736 0 : gcc_unreachable ();
4737 : }
4738 : }
4739 : }
4740 6 : else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4741 : {
4742 6 : atype = bestn->simdclone->args[i].vector_type;
4743 6 : poly_uint64 atype_subparts
4744 6 : = exact_div (bestn->simdclone->simdlen,
4745 : bestn->simdclone->args[i].linear_step);
4746 6 : o = bestn->simdclone->args[i].linear_step;
4747 12 : for (m = j * o; m < (j + 1) * o; m++)
4748 : {
4749 6 : if (m == 0)
4750 6 : vec_oprnds_i[i] = 0;
4751 6 : if (maybe_lt (atype_subparts,
4752 6 : TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4753 : {
4754 : /* The mask argument has fewer elements than the
4755 : input vector. */
4756 : /* FORNOW */
4757 0 : gcc_unreachable ();
4758 : }
4759 6 : else if (known_eq (atype_subparts,
4760 : TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4761 : {
4762 6 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4763 6 : if (loop_vinfo
4764 6 : && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4765 : {
4766 1 : vec_loop_masks *loop_masks
4767 : = &LOOP_VINFO_MASKS (loop_vinfo);
4768 1 : tree loop_mask
4769 1 : = vect_get_loop_mask (loop_vinfo, gsi,
4770 : loop_masks, ncopies_in,
4771 : vectype, j);
4772 1 : vec_oprnd0
4773 1 : = prepare_vec_mask (loop_vinfo,
4774 1 : TREE_TYPE (loop_mask),
4775 : loop_mask, vec_oprnd0,
4776 : gsi);
4777 : }
4778 : /* The vector mask argument matches the input
4779 : in the number of lanes, but not necessarily
4780 : in the mode. */
4781 6 : tree st = lang_hooks.types.type_for_mode
4782 6 : (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4783 6 : vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4784 : vec_oprnd0);
4785 6 : gassign *new_stmt
4786 6 : = gimple_build_assign (make_ssa_name (st),
4787 : vec_oprnd0);
4788 6 : vect_finish_stmt_generation (vinfo, stmt_info,
4789 : new_stmt, gsi);
4790 6 : if (!types_compatible_p (atype, st))
4791 : {
4792 6 : new_stmt
4793 6 : = gimple_build_assign (make_ssa_name (atype),
4794 : NOP_EXPR,
4795 : gimple_assign_lhs
4796 : (new_stmt));
4797 6 : vect_finish_stmt_generation (vinfo, stmt_info,
4798 : new_stmt, gsi);
4799 : }
4800 6 : vargs.safe_push (gimple_assign_lhs (new_stmt));
4801 : }
4802 : else
4803 : {
4804 : /* The mask argument has more elements than the
4805 : input vector. */
4806 : /* FORNOW */
4807 0 : gcc_unreachable ();
4808 : }
4809 : }
4810 : }
4811 : else
4812 0 : gcc_unreachable ();
4813 : break;
4814 102 : case SIMD_CLONE_ARG_TYPE_UNIFORM:
4815 102 : vargs.safe_push (op);
4816 102 : break;
4817 121 : case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4818 121 : case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4819 121 : if (j == 0)
4820 : {
4821 118 : gimple_seq stmts;
4822 118 : arginfo[i].op
4823 118 : = force_gimple_operand (unshare_expr (arginfo[i].op),
4824 : &stmts, true, NULL_TREE);
4825 118 : if (stmts != NULL)
4826 : {
4827 0 : basic_block new_bb;
4828 0 : edge pe = loop_preheader_edge (loop);
4829 0 : new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4830 0 : gcc_assert (!new_bb);
4831 : }
4832 118 : if (arginfo[i].simd_lane_linear)
4833 : {
4834 6 : vargs.safe_push (arginfo[i].op);
4835 6 : break;
4836 : }
4837 112 : tree phi_res = copy_ssa_name (op);
4838 112 : gphi *new_phi = create_phi_node (phi_res, loop->header);
4839 112 : add_phi_arg (new_phi, arginfo[i].op,
4840 : loop_preheader_edge (loop), UNKNOWN_LOCATION);
4841 112 : enum tree_code code
4842 196 : = POINTER_TYPE_P (TREE_TYPE (op))
4843 112 : ? POINTER_PLUS_EXPR : PLUS_EXPR;
4844 196 : tree type = POINTER_TYPE_P (TREE_TYPE (op))
4845 196 : ? sizetype : TREE_TYPE (op);
4846 112 : poly_widest_int cst
4847 112 : = wi::mul (bestn->simdclone->args[i].linear_step,
4848 112 : ncopies * nunits);
4849 112 : tree tcst = wide_int_to_tree (type, cst);
4850 112 : tree phi_arg = copy_ssa_name (op);
4851 112 : gassign *new_stmt
4852 112 : = gimple_build_assign (phi_arg, code, phi_res, tcst);
4853 112 : gimple_stmt_iterator si = gsi_after_labels (loop->header);
4854 112 : gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4855 112 : add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4856 : UNKNOWN_LOCATION);
4857 112 : arginfo[i].op = phi_res;
4858 112 : vargs.safe_push (phi_res);
4859 112 : }
4860 : else
4861 : {
4862 3 : enum tree_code code
4863 6 : = POINTER_TYPE_P (TREE_TYPE (op))
4864 3 : ? POINTER_PLUS_EXPR : PLUS_EXPR;
4865 6 : tree type = POINTER_TYPE_P (TREE_TYPE (op))
4866 6 : ? sizetype : TREE_TYPE (op);
4867 3 : poly_widest_int cst
4868 3 : = wi::mul (bestn->simdclone->args[i].linear_step,
4869 3 : j * nunits);
4870 3 : tree tcst = wide_int_to_tree (type, cst);
4871 3 : new_temp = make_ssa_name (TREE_TYPE (op));
4872 3 : gassign *new_stmt
4873 6 : = gimple_build_assign (new_temp, code,
4874 3 : arginfo[i].op, tcst);
4875 3 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4876 3 : vargs.safe_push (new_temp);
4877 3 : }
4878 : break;
4879 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4880 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4881 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4882 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4883 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4884 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4885 0 : default:
4886 0 : gcc_unreachable ();
4887 : }
4888 : }
4889 :
4890 466 : if (masked_call_offset == 0
4891 400 : && bestn->simdclone->inbranch
4892 8 : && bestn->simdclone->nargs > nargs)
4893 : {
4894 8 : unsigned long m, o;
4895 8 : size_t mask_i = bestn->simdclone->nargs - 1;
4896 8 : tree mask;
4897 8 : gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4898 : SIMD_CLONE_ARG_TYPE_MASK);
4899 :
4900 8 : tree mask_argtype = bestn->simdclone->args[mask_i].vector_type;
4901 8 : tree mask_vectype;
4902 8 : if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4903 : {
4904 2 : callee_nelements = exact_div (bestn->simdclone->simdlen,
4905 : bestn->simdclone->args[i].linear_step);
4906 2 : mask_vectype = get_related_vectype_for_scalar_type
4907 2 : (vinfo->vector_mode, TREE_TYPE (vectype), callee_nelements);
4908 : }
4909 : else
4910 : {
4911 6 : mask_vectype = mask_argtype;
4912 6 : callee_nelements = TYPE_VECTOR_SUBPARTS (mask_vectype);
4913 : }
4914 8 : o = vector_unroll_factor (nunits, callee_nelements);
4915 16 : for (m = j * o; m < (j + 1) * o; m++)
4916 : {
4917 8 : if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4918 : {
4919 1 : vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4920 1 : mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4921 : ncopies * o, mask_vectype, m);
4922 : }
4923 : else
4924 7 : mask = vect_build_all_ones_mask (vinfo, stmt_info,
4925 : mask_argtype);
4926 :
4927 8 : gassign *new_stmt;
4928 8 : if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4929 : {
4930 : /* This means we are dealing with integer mask modes.
4931 : First convert to an integer type with the same size as
4932 : the current vector type. */
4933 2 : unsigned HOST_WIDE_INT intermediate_size
4934 2 : = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4935 2 : tree mid_int_type =
4936 2 : build_nonstandard_integer_type (intermediate_size, 1);
4937 2 : mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4938 2 : new_stmt
4939 2 : = gimple_build_assign (make_ssa_name (mid_int_type),
4940 : mask);
4941 2 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4942 : /* Then zero-extend to the mask mode. */
4943 2 : mask = fold_build1 (NOP_EXPR, mask_argtype,
4944 : gimple_get_lhs (new_stmt));
4945 : }
4946 6 : else if (bestn->simdclone->mask_mode == VOIDmode)
4947 6 : mask = build3 (VEC_COND_EXPR, mask_argtype, mask,
4948 : build_one_cst (mask_argtype),
4949 : build_zero_cst (mask_argtype));
4950 : else
4951 0 : gcc_unreachable ();
4952 :
4953 8 : new_stmt = gimple_build_assign (make_ssa_name (mask_argtype),
4954 : mask);
4955 8 : vect_finish_stmt_generation (vinfo, stmt_info,
4956 : new_stmt, gsi);
4957 8 : mask = gimple_assign_lhs (new_stmt);
4958 8 : vargs.safe_push (mask);
4959 : }
4960 : }
4961 :
4962 466 : gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4963 466 : if (vec_dest)
4964 : {
4965 460 : gcc_assert (ratype
4966 : || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4967 460 : if (ratype)
4968 15 : new_temp = create_tmp_var (ratype);
4969 445 : else if (useless_type_conversion_p (vectype, rtype))
4970 423 : new_temp = make_ssa_name (vec_dest, new_call);
4971 : else
4972 22 : new_temp = make_ssa_name (rtype, new_call);
4973 460 : gimple_call_set_lhs (new_call, new_temp);
4974 : }
4975 466 : vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4976 466 : gimple *new_stmt = new_call;
4977 :
4978 466 : if (vec_dest)
4979 : {
4980 460 : if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4981 : {
4982 21 : unsigned int k, l;
4983 42 : poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4984 42 : poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4985 21 : k = vector_unroll_factor (nunits,
4986 : TYPE_VECTOR_SUBPARTS (vectype));
4987 21 : gcc_assert ((k & (k - 1)) == 0);
4988 75 : for (l = 0; l < k; l++)
4989 : {
4990 54 : tree t;
4991 54 : if (ratype)
4992 : {
4993 42 : t = build_fold_addr_expr (new_temp);
4994 42 : t = build2 (MEM_REF, vectype, t,
4995 42 : build_int_cst (TREE_TYPE (t), l * bytes));
4996 : }
4997 : else
4998 12 : t = build3 (BIT_FIELD_REF, vectype, new_temp,
4999 12 : bitsize_int (prec), bitsize_int (l * prec));
5000 54 : new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
5001 54 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5002 :
5003 54 : SLP_TREE_VEC_DEFS (slp_node)
5004 54 : .quick_push (gimple_assign_lhs (new_stmt));
5005 : }
5006 :
5007 21 : if (ratype)
5008 15 : vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
5009 21 : continue;
5010 21 : }
5011 439 : else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
5012 : {
5013 16 : unsigned int k;
5014 16 : if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
5015 16 : TYPE_VECTOR_SUBPARTS (rtype), &k))
5016 0 : gcc_unreachable ();
5017 16 : gcc_assert ((k & (k - 1)) == 0);
5018 16 : if ((j & (k - 1)) == 0)
5019 8 : vec_alloc (ret_ctor_elts, k);
5020 16 : if (ratype)
5021 : {
5022 0 : unsigned int m, o;
5023 0 : o = vector_unroll_factor (nunits,
5024 : TYPE_VECTOR_SUBPARTS (rtype));
5025 0 : for (m = 0; m < o; m++)
5026 : {
5027 0 : tree tem = build4 (ARRAY_REF, rtype, new_temp,
5028 0 : size_int (m), NULL_TREE, NULL_TREE);
5029 0 : new_stmt = gimple_build_assign (make_ssa_name (rtype),
5030 : tem);
5031 0 : vect_finish_stmt_generation (vinfo, stmt_info,
5032 : new_stmt, gsi);
5033 0 : CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
5034 : gimple_assign_lhs (new_stmt));
5035 : }
5036 0 : vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
5037 : }
5038 : else
5039 16 : CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
5040 16 : if ((j & (k - 1)) != k - 1)
5041 8 : continue;
5042 8 : vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
5043 8 : new_stmt
5044 8 : = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
5045 8 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5046 :
5047 8 : SLP_TREE_VEC_DEFS (slp_node)
5048 8 : .quick_push (gimple_assign_lhs (new_stmt));
5049 8 : continue;
5050 8 : }
5051 423 : else if (ratype)
5052 : {
5053 0 : tree t = build_fold_addr_expr (new_temp);
5054 0 : t = build2 (MEM_REF, vectype, t,
5055 0 : build_int_cst (TREE_TYPE (t), 0));
5056 0 : new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
5057 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5058 0 : vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
5059 : }
5060 423 : else if (!useless_type_conversion_p (vectype, rtype))
5061 : {
5062 0 : vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
5063 0 : new_stmt
5064 0 : = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
5065 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5066 : }
5067 : }
5068 :
5069 429 : if (gimple_get_lhs (new_stmt))
5070 423 : SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
5071 : }
5072 :
5073 1149 : for (i = 0; i < nargs; ++i)
5074 : {
5075 792 : vec<tree> oprndsi = vec_oprnds[i];
5076 792 : oprndsi.release ();
5077 : }
5078 357 : vargs.release ();
5079 :
5080 : /* Mark the clone as no longer being a candidate for GC. */
5081 357 : bestn->gc_candidate = false;
5082 :
5083 357 : return true;
5084 1390 : }
5085 :
5086 :
5087 : /* Function vect_gen_widened_results_half
5088 :
5089 : Create a vector stmt whose code, type, number of arguments, and result
5090 : variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5091 : VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5092 : In the case that CODE is a CALL_EXPR, this means that a call to DECL
5093 : needs to be created (DECL is a function-decl of a target-builtin).
5094 : STMT_INFO is the original scalar stmt that we are vectorizing. */
5095 :
5096 : static gimple *
5097 30918 : vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
5098 : tree vec_oprnd0, tree vec_oprnd1, int op_type,
5099 : tree vec_dest, gimple_stmt_iterator *gsi,
5100 : stmt_vec_info stmt_info)
5101 : {
5102 30918 : gimple *new_stmt;
5103 30918 : tree new_temp;
5104 :
5105 : /* Generate half of the widened result: */
5106 30918 : if (op_type != binary_op)
5107 29798 : vec_oprnd1 = NULL;
5108 30918 : new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
5109 30918 : new_temp = make_ssa_name (vec_dest, new_stmt);
5110 30918 : gimple_set_lhs (new_stmt, new_temp);
5111 30918 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5112 :
5113 30918 : return new_stmt;
5114 : }
5115 :
5116 :
5117 : /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5118 : For multi-step conversions store the resulting vectors and call the function
5119 : recursively. When NARROW_SRC_P is true, there's still a conversion after
5120 : narrowing, don't store the vectors in the SLP_NODE or in vector info of
5121 : the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5122 :
5123 : static void
5124 11979 : vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
5125 : int multi_step_cvt,
5126 : stmt_vec_info stmt_info,
5127 : vec<tree> &vec_dsts,
5128 : gimple_stmt_iterator *gsi,
5129 : slp_tree slp_node, code_helper code,
5130 : bool narrow_src_p)
5131 : {
5132 11979 : unsigned int i;
5133 11979 : tree vop0, vop1, new_tmp, vec_dest;
5134 :
5135 11979 : vec_dest = vec_dsts.pop ();
5136 :
5137 28351 : for (i = 0; i < vec_oprnds->length (); i += 2)
5138 : {
5139 : /* Create demotion operation. */
5140 16372 : vop0 = (*vec_oprnds)[i];
5141 16372 : vop1 = (*vec_oprnds)[i + 1];
5142 16372 : gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
5143 16372 : new_tmp = make_ssa_name (vec_dest, new_stmt);
5144 16372 : gimple_set_lhs (new_stmt, new_tmp);
5145 16372 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5146 16372 : if (multi_step_cvt || narrow_src_p)
5147 : /* Store the resulting vector for next recursive call,
5148 : or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5149 6697 : (*vec_oprnds)[i/2] = new_tmp;
5150 : else
5151 : {
5152 : /* This is the last step of the conversion sequence. Store the
5153 : vectors in SLP_NODE. */
5154 9675 : slp_node->push_vec_def (new_stmt);
5155 : }
5156 : }
5157 :
5158 : /* For multi-step demotion operations we first generate demotion operations
5159 : from the source type to the intermediate types, and then combine the
5160 : results (stored in VEC_OPRNDS) in demotion operation to the destination
5161 : type. */
5162 11979 : if (multi_step_cvt)
5163 : {
5164 : /* At each level of recursion we have half of the operands we had at the
5165 : previous level. */
5166 2970 : vec_oprnds->truncate ((i+1)/2);
5167 2970 : vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5168 : multi_step_cvt - 1,
5169 : stmt_info, vec_dsts, gsi,
5170 2970 : slp_node, VEC_PACK_TRUNC_EXPR,
5171 : narrow_src_p);
5172 : }
5173 :
5174 11979 : vec_dsts.quick_push (vec_dest);
5175 11979 : }
5176 :
5177 :
5178 : /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5179 : and VEC_OPRNDS1, for a binary operation associated with scalar statement
5180 : STMT_INFO. For multi-step conversions store the resulting vectors and
5181 : call the function recursively. */
5182 :
5183 : static void
5184 11279 : vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5185 : vec<tree> *vec_oprnds0,
5186 : vec<tree> *vec_oprnds1,
5187 : stmt_vec_info stmt_info, tree vec_dest,
5188 : gimple_stmt_iterator *gsi,
5189 : code_helper ch1,
5190 : code_helper ch2, int op_type)
5191 : {
5192 11279 : int i;
5193 11279 : tree vop0, vop1, new_tmp1, new_tmp2;
5194 11279 : gimple *new_stmt1, *new_stmt2;
5195 11279 : vec<tree> vec_tmp = vNULL;
5196 :
5197 11279 : vec_tmp.create (vec_oprnds0->length () * 2);
5198 38017 : FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5199 : {
5200 15459 : if (op_type == binary_op)
5201 560 : vop1 = (*vec_oprnds1)[i];
5202 : else
5203 : vop1 = NULL_TREE;
5204 :
5205 : /* Generate the two halves of promotion operation. */
5206 15459 : new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5207 : op_type, vec_dest, gsi,
5208 : stmt_info);
5209 15459 : new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5210 : op_type, vec_dest, gsi,
5211 : stmt_info);
5212 15459 : if (is_gimple_call (new_stmt1))
5213 : {
5214 0 : new_tmp1 = gimple_call_lhs (new_stmt1);
5215 0 : new_tmp2 = gimple_call_lhs (new_stmt2);
5216 : }
5217 : else
5218 : {
5219 15459 : new_tmp1 = gimple_assign_lhs (new_stmt1);
5220 15459 : new_tmp2 = gimple_assign_lhs (new_stmt2);
5221 : }
5222 :
5223 : /* Store the results for the next step. */
5224 15459 : vec_tmp.quick_push (new_tmp1);
5225 15459 : vec_tmp.quick_push (new_tmp2);
5226 : }
5227 :
5228 11279 : vec_oprnds0->release ();
5229 11279 : *vec_oprnds0 = vec_tmp;
5230 11279 : }
5231 :
5232 : /* Create vectorized promotion stmts for widening stmts using only half the
5233 : potential vector size for input. */
5234 : static void
5235 14 : vect_create_half_widening_stmts (vec_info *vinfo,
5236 : vec<tree> *vec_oprnds0,
5237 : vec<tree> *vec_oprnds1,
5238 : stmt_vec_info stmt_info, tree vec_dest,
5239 : gimple_stmt_iterator *gsi,
5240 : code_helper code1,
5241 : int op_type)
5242 : {
5243 14 : int i;
5244 14 : tree vop0, vop1;
5245 14 : gimple *new_stmt1;
5246 14 : gimple *new_stmt2;
5247 14 : gimple *new_stmt3;
5248 14 : vec<tree> vec_tmp = vNULL;
5249 :
5250 14 : vec_tmp.create (vec_oprnds0->length ());
5251 28 : FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5252 : {
5253 14 : tree new_tmp1, new_tmp2, new_tmp3, out_type;
5254 :
5255 14 : gcc_assert (op_type == binary_op);
5256 14 : vop1 = (*vec_oprnds1)[i];
5257 :
5258 : /* Widen the first vector input. */
5259 14 : out_type = TREE_TYPE (vec_dest);
5260 14 : new_tmp1 = make_ssa_name (out_type);
5261 14 : new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5262 14 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5263 14 : if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5264 : {
5265 : /* Widen the second vector input. */
5266 14 : new_tmp2 = make_ssa_name (out_type);
5267 14 : new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5268 14 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5269 : /* Perform the operation. With both vector inputs widened. */
5270 14 : new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5271 : }
5272 : else
5273 : {
5274 : /* Perform the operation. With the single vector input widened. */
5275 0 : new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5276 : }
5277 :
5278 14 : new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5279 14 : gimple_assign_set_lhs (new_stmt3, new_tmp3);
5280 14 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5281 :
5282 : /* Store the results for the next step. */
5283 14 : vec_tmp.quick_push (new_tmp3);
5284 : }
5285 :
5286 14 : vec_oprnds0->release ();
5287 14 : *vec_oprnds0 = vec_tmp;
5288 14 : }
5289 :
5290 :
5291 : /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5292 : If COST_VEC is passed, calculate costs but don't change anything,
5293 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
5294 : it, and insert it at GSI.
5295 : Return true if STMT_INFO is vectorizable in this way. */
5296 :
5297 : static bool
5298 2361092 : vectorizable_conversion (vec_info *vinfo,
5299 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5300 : slp_tree slp_node,
5301 : stmt_vector_for_cost *cost_vec)
5302 : {
5303 2361092 : tree vec_dest, cvt_op = NULL_TREE;
5304 2361092 : tree scalar_dest;
5305 2361092 : tree op0, op1 = NULL_TREE;
5306 2361092 : tree_code tc1;
5307 2361092 : code_helper code, code1, code2;
5308 2361092 : code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5309 2361092 : tree new_temp;
5310 2361092 : enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5311 2361092 : poly_uint64 nunits_in;
5312 2361092 : poly_uint64 nunits_out;
5313 2361092 : tree vectype_out, vectype_in;
5314 2361092 : int i;
5315 2361092 : tree lhs_type, rhs_type;
5316 : /* For conversions between floating point and integer, there're 2 NARROW
5317 : cases. NARROW_SRC is for FLOAT_EXPR, means
5318 : integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5319 : This is safe when the range of the source integer can fit into the lower
5320 : precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5321 : floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5322 : For other conversions, when there's narrowing, NARROW_DST is used as
5323 : default. */
5324 2361092 : enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5325 2361092 : vec<tree> vec_oprnds0 = vNULL;
5326 2361092 : vec<tree> vec_oprnds1 = vNULL;
5327 2361092 : tree vop0;
5328 2361092 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5329 2361092 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5330 2361092 : int multi_step_cvt = 0;
5331 2361092 : vec<tree> interm_types = vNULL;
5332 2361092 : tree intermediate_type, cvt_type = NULL_TREE;
5333 2361092 : int op_type;
5334 2361092 : unsigned short fltsz;
5335 :
5336 : /* Is STMT a vectorizable conversion? */
5337 :
5338 2361092 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5339 : return false;
5340 :
5341 2361092 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5342 194531 : && cost_vec)
5343 : return false;
5344 :
5345 2166561 : gimple* stmt = stmt_info->stmt;
5346 2166561 : if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5347 : return false;
5348 :
5349 2108757 : if (gimple_get_lhs (stmt) == NULL_TREE
5350 2108757 : || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5351 772079 : return false;
5352 :
5353 1336678 : if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5354 : return false;
5355 :
5356 1336678 : if (is_gimple_assign (stmt))
5357 : {
5358 1328446 : code = gimple_assign_rhs_code (stmt);
5359 1328446 : op_type = TREE_CODE_LENGTH ((tree_code) code);
5360 : }
5361 8232 : else if (gimple_call_internal_p (stmt))
5362 : {
5363 4359 : code = gimple_call_internal_fn (stmt);
5364 4359 : op_type = gimple_call_num_args (stmt);
5365 : }
5366 : else
5367 : return false;
5368 :
5369 1332805 : bool widen_arith = (code == WIDEN_MULT_EXPR
5370 1330402 : || code == WIDEN_LSHIFT_EXPR
5371 2663207 : || widening_fn_p (code));
5372 :
5373 1330402 : if (!widen_arith
5374 1330402 : && !CONVERT_EXPR_CODE_P (code)
5375 1193484 : && code != FIX_TRUNC_EXPR
5376 1192002 : && code != FLOAT_EXPR)
5377 : return false;
5378 :
5379 : /* Check types of lhs and rhs. */
5380 154214 : scalar_dest = gimple_get_lhs (stmt);
5381 154214 : lhs_type = TREE_TYPE (scalar_dest);
5382 154214 : vectype_out = SLP_TREE_VECTYPE (slp_node);
5383 :
5384 : /* Check the operands of the operation. */
5385 154214 : slp_tree slp_op0, slp_op1 = NULL;
5386 154214 : if (!vect_is_simple_use (vinfo, slp_node,
5387 : 0, &op0, &slp_op0, &dt[0], &vectype_in))
5388 : {
5389 0 : if (dump_enabled_p ())
5390 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5391 : "use not simple.\n");
5392 0 : return false;
5393 : }
5394 :
5395 154214 : rhs_type = TREE_TYPE (op0);
5396 152732 : if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5397 296904 : && !((INTEGRAL_TYPE_P (lhs_type)
5398 131088 : && INTEGRAL_TYPE_P (rhs_type))
5399 : || (SCALAR_FLOAT_TYPE_P (lhs_type)
5400 7162 : && SCALAR_FLOAT_TYPE_P (rhs_type))))
5401 : return false;
5402 :
5403 149774 : if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5404 134003 : && INTEGRAL_TYPE_P (lhs_type)
5405 263204 : && !type_has_mode_precision_p (lhs_type))
5406 : {
5407 445 : if (dump_enabled_p ())
5408 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5409 : "type conversion to bit-precision unsupported\n");
5410 445 : return false;
5411 : }
5412 :
5413 149329 : if (op_type == binary_op)
5414 : {
5415 2403 : gcc_assert (code == WIDEN_MULT_EXPR
5416 : || code == WIDEN_LSHIFT_EXPR
5417 : || widening_fn_p (code));
5418 :
5419 2403 : op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5420 0 : gimple_call_arg (stmt, 0);
5421 2403 : tree vectype1_in;
5422 2403 : if (!vect_is_simple_use (vinfo, slp_node, 1,
5423 : &op1, &slp_op1, &dt[1], &vectype1_in))
5424 : {
5425 0 : if (dump_enabled_p ())
5426 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5427 : "use not simple.\n");
5428 0 : return false;
5429 : }
5430 : /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5431 : OP1. */
5432 2403 : if (!vectype_in)
5433 101 : vectype_in = vectype1_in;
5434 : }
5435 :
5436 : /* If op0 is an external or constant def, infer the vector type
5437 : from the scalar type. */
5438 149329 : if (!vectype_in)
5439 19876 : vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5440 149329 : if (!cost_vec)
5441 22631 : gcc_assert (vectype_in);
5442 149329 : if (!vectype_in)
5443 : {
5444 252 : if (dump_enabled_p ())
5445 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5446 : "no vectype for scalar type %T\n", rhs_type);
5447 :
5448 252 : return false;
5449 : }
5450 :
5451 298154 : if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5452 149077 : != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5453 : {
5454 229 : if (dump_enabled_p ())
5455 36 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5456 : "can't convert between boolean and non "
5457 : "boolean vectors %T\n", rhs_type);
5458 :
5459 229 : return false;
5460 : }
5461 :
5462 148848 : nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5463 148848 : nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5464 148848 : if (known_eq (nunits_out, nunits_in))
5465 70928 : if (widen_arith)
5466 : modifier = WIDEN;
5467 : else
5468 148848 : modifier = NONE;
5469 77920 : else if (multiple_p (nunits_out, nunits_in))
5470 : modifier = NARROW_DST;
5471 : else
5472 : {
5473 43356 : gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5474 : modifier = WIDEN;
5475 : }
5476 :
5477 148848 : bool found_mode = false;
5478 148848 : scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5479 148848 : scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5480 148848 : opt_scalar_mode rhs_mode_iter;
5481 148848 : auto_vec<std::pair<tree, tree_code>, 2> converts;
5482 148848 : bool evenodd_ok = false;
5483 :
5484 : /* Supportable by target? */
5485 148848 : switch (modifier)
5486 : {
5487 70685 : case NONE:
5488 70685 : if (code != FIX_TRUNC_EXPR
5489 69810 : && code != FLOAT_EXPR
5490 134321 : && !CONVERT_EXPR_CODE_P (code))
5491 : return false;
5492 70685 : gcc_assert (code.is_tree_code ());
5493 70685 : if (supportable_indirect_convert_operation (code,
5494 : vectype_out, vectype_in,
5495 : converts, op0, slp_op0))
5496 : {
5497 15964 : gcc_assert (converts.length () <= 2);
5498 15964 : if (converts.length () == 1)
5499 15890 : code1 = converts[0].second;
5500 : else
5501 : {
5502 74 : cvt_type = NULL_TREE;
5503 74 : multi_step_cvt = converts.length () - 1;
5504 74 : codecvt1 = converts[0].second;
5505 74 : code1 = converts[1].second;
5506 74 : interm_types.safe_push (converts[0].first);
5507 : }
5508 : break;
5509 : }
5510 :
5511 : /* FALLTHRU */
5512 54721 : unsupported:
5513 61125 : if (dump_enabled_p ())
5514 5888 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5515 : "conversion not supported by target.\n");
5516 : return false;
5517 :
5518 43599 : case WIDEN:
5519 43599 : if (known_eq (nunits_in, nunits_out))
5520 : {
5521 486 : if (!(code.is_tree_code ()
5522 243 : && supportable_half_widening_operation ((tree_code) code,
5523 : vectype_out, vectype_in,
5524 : &tc1)))
5525 74 : goto unsupported;
5526 169 : code1 = tc1;
5527 169 : gcc_assert (!(multi_step_cvt && op_type == binary_op));
5528 : break;
5529 : }
5530 : /* Elements in a vector can only be reordered if used in a reduction
5531 : operation only. */
5532 43356 : if (code == WIDEN_MULT_EXPR
5533 2160 : && loop_vinfo
5534 2111 : && !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)
5535 : /* For a SLP reduction we cannot swizzle lanes, detecting a
5536 : reduction chain isn't possible here. */
5537 45445 : && SLP_TREE_LANES (slp_node) == 1)
5538 : {
5539 : /* ??? There is no way to look for SLP uses, so work on
5540 : the stmt and what the stmt-based cycle detection gives us. */
5541 1991 : tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
5542 1991 : stmt_vec_info use_stmt_info
5543 1991 : = lhs ? loop_vinfo->lookup_single_use (lhs) : NULL;
5544 1991 : if (use_stmt_info
5545 1843 : && STMT_VINFO_REDUC_DEF (use_stmt_info))
5546 43356 : evenodd_ok = true;
5547 : }
5548 43356 : if (supportable_widening_operation (code, vectype_out, vectype_in,
5549 : evenodd_ok, &code1,
5550 : &code2, &multi_step_cvt,
5551 : &interm_types))
5552 : {
5553 : /* Binary widening operation can only be supported directly by the
5554 : architecture. */
5555 41656 : gcc_assert (!(multi_step_cvt && op_type == binary_op));
5556 : break;
5557 : }
5558 :
5559 1700 : if (code != FLOAT_EXPR
5560 2024 : || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5561 1538 : goto unsupported;
5562 :
5563 162 : fltsz = GET_MODE_SIZE (lhs_mode);
5564 237 : FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5565 : {
5566 237 : rhs_mode = rhs_mode_iter.require ();
5567 474 : if (GET_MODE_SIZE (rhs_mode) > fltsz)
5568 : break;
5569 :
5570 237 : cvt_type
5571 237 : = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5572 237 : cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5573 237 : if (cvt_type == NULL_TREE)
5574 0 : goto unsupported;
5575 :
5576 474 : if (GET_MODE_SIZE (rhs_mode) == fltsz)
5577 : {
5578 57 : tc1 = ERROR_MARK;
5579 57 : gcc_assert (code.is_tree_code ());
5580 57 : if (!supportable_convert_operation ((tree_code) code, vectype_out,
5581 : cvt_type, &tc1))
5582 22 : goto unsupported;
5583 35 : codecvt1 = tc1;
5584 : }
5585 180 : else if (!supportable_widening_operation (code, vectype_out,
5586 : cvt_type, evenodd_ok,
5587 : &codecvt1,
5588 : &codecvt2, &multi_step_cvt,
5589 : &interm_types))
5590 75 : continue;
5591 : else
5592 105 : gcc_assert (multi_step_cvt == 0);
5593 :
5594 140 : if (supportable_widening_operation (NOP_EXPR, cvt_type,
5595 : vectype_in, evenodd_ok, &code1,
5596 : &code2, &multi_step_cvt,
5597 : &interm_types))
5598 : {
5599 : found_mode = true;
5600 : break;
5601 : }
5602 : }
5603 :
5604 140 : if (!found_mode)
5605 0 : goto unsupported;
5606 :
5607 280 : if (GET_MODE_SIZE (rhs_mode) == fltsz)
5608 35 : codecvt2 = ERROR_MARK;
5609 : else
5610 : {
5611 105 : multi_step_cvt++;
5612 105 : interm_types.safe_push (cvt_type);
5613 105 : cvt_type = NULL_TREE;
5614 : }
5615 : break;
5616 :
5617 34564 : case NARROW_DST:
5618 34564 : gcc_assert (op_type == unary_op);
5619 34564 : if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5620 : &code1, &multi_step_cvt,
5621 : &interm_types))
5622 : break;
5623 :
5624 14568 : if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5625 966 : goto unsupported;
5626 :
5627 3890 : if (code == FIX_TRUNC_EXPR)
5628 : {
5629 82 : cvt_type
5630 82 : = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5631 82 : cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5632 82 : if (cvt_type == NULL_TREE)
5633 0 : goto unsupported;
5634 82 : if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5635 : &tc1))
5636 80 : codecvt1 = tc1;
5637 : else
5638 2 : goto unsupported;
5639 80 : if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5640 : &code1, &multi_step_cvt,
5641 : &interm_types))
5642 : break;
5643 : }
5644 : /* If op0 can be represented with low precision integer,
5645 : truncate it to cvt_type and the do FLOAT_EXPR. */
5646 3808 : else if (code == FLOAT_EXPR)
5647 : {
5648 96 : if (cost_vec)
5649 : {
5650 91 : wide_int op_min_value, op_max_value;
5651 91 : tree def;
5652 :
5653 : /* ??? Merge ranges in case of more than one lane. */
5654 91 : if (SLP_TREE_LANES (slp_op0) != 1
5655 89 : || !(def = vect_get_slp_scalar_def (slp_op0, 0))
5656 180 : || !vect_get_range_info (def, &op_min_value, &op_max_value))
5657 86 : goto unsupported;
5658 :
5659 5 : if ((wi::min_precision (op_max_value, SIGNED)
5660 5 : > GET_MODE_BITSIZE (lhs_mode))
5661 5 : || (wi::min_precision (op_min_value, SIGNED)
5662 5 : > GET_MODE_BITSIZE (lhs_mode)))
5663 0 : goto unsupported;
5664 91 : }
5665 :
5666 10 : cvt_type
5667 10 : = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5668 10 : cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5669 10 : if (cvt_type == NULL_TREE)
5670 0 : goto unsupported;
5671 10 : if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5672 : &code1, &multi_step_cvt,
5673 : &interm_types))
5674 0 : goto unsupported;
5675 10 : if (supportable_convert_operation ((tree_code) code, vectype_out,
5676 : cvt_type, &tc1))
5677 : {
5678 10 : codecvt1 = tc1;
5679 10 : modifier = NARROW_SRC;
5680 10 : break;
5681 : }
5682 : }
5683 :
5684 3716 : goto unsupported;
5685 :
5686 : default:
5687 : gcc_unreachable ();
5688 : }
5689 :
5690 87723 : if (modifier == WIDEN
5691 87723 : && loop_vinfo
5692 40871 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5693 104883 : && (code1 == VEC_WIDEN_MULT_EVEN_EXPR
5694 17144 : || widening_evenodd_fn_p (code1)))
5695 : {
5696 16 : if (dump_enabled_p ())
5697 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5698 : "can't use a fully-masked loop because"
5699 : " widening operation on even/odd elements"
5700 : " mixes up lanes.\n");
5701 16 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
5702 : }
5703 :
5704 87723 : if (cost_vec) /* transformation not required. */
5705 : {
5706 65092 : if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5707 65092 : || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in))
5708 : {
5709 0 : if (dump_enabled_p ())
5710 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5711 : "incompatible vector types for invariants\n");
5712 0 : return false;
5713 : }
5714 65092 : DUMP_VECT_SCOPE ("vectorizable_conversion");
5715 65092 : unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
5716 65092 : if (modifier == NONE)
5717 : {
5718 12000 : SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type;
5719 12000 : vect_model_simple_cost (vinfo, (1 + multi_step_cvt),
5720 : slp_node, cost_vec);
5721 : }
5722 53092 : else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5723 : {
5724 20785 : SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type;
5725 : /* The final packing step produces one vector result per copy. */
5726 20785 : vect_model_promotion_demotion_cost (slp_node, nvectors,
5727 : multi_step_cvt, cost_vec,
5728 : widen_arith);
5729 : }
5730 : else
5731 : {
5732 32307 : SLP_TREE_TYPE (slp_node) = type_promotion_vec_info_type;
5733 : /* The initial unpacking step produces two vector results
5734 : per copy. MULTI_STEP_CVT is 0 for a single conversion,
5735 : so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5736 32307 : vect_model_promotion_demotion_cost (slp_node,
5737 : nvectors >> multi_step_cvt,
5738 : multi_step_cvt, cost_vec,
5739 : widen_arith);
5740 : }
5741 65092 : interm_types.release ();
5742 65092 : return true;
5743 65092 : }
5744 :
5745 : /* Transform. */
5746 22631 : if (dump_enabled_p ())
5747 4266 : dump_printf_loc (MSG_NOTE, vect_location, "transform conversion.\n");
5748 :
5749 22631 : if (op_type == binary_op)
5750 : {
5751 513 : if (CONSTANT_CLASS_P (op0))
5752 0 : op0 = fold_convert (TREE_TYPE (op1), op0);
5753 513 : else if (CONSTANT_CLASS_P (op1))
5754 237 : op1 = fold_convert (TREE_TYPE (op0), op1);
5755 : }
5756 :
5757 : /* In case of multi-step conversion, we first generate conversion operations
5758 : to the intermediate types, and then from that types to the final one.
5759 : We create vector destinations for the intermediate type (TYPES) received
5760 : from supportable_*_operation, and store them in the correct order
5761 : for future use in vect_create_vectorized_*_stmts (). */
5762 22631 : auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5763 22631 : bool widen_or_narrow_float_p
5764 22631 : = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5765 22631 : vec_dest = vect_create_destination_var (scalar_dest,
5766 : widen_or_narrow_float_p
5767 : ? cvt_type : vectype_out);
5768 22631 : vec_dsts.quick_push (vec_dest);
5769 :
5770 22631 : if (multi_step_cvt)
5771 : {
5772 8760 : for (i = interm_types.length () - 1;
5773 8760 : interm_types.iterate (i, &intermediate_type); i--)
5774 : {
5775 4626 : vec_dest = vect_create_destination_var (scalar_dest,
5776 : intermediate_type);
5777 4626 : vec_dsts.quick_push (vec_dest);
5778 : }
5779 : }
5780 :
5781 22631 : if (cvt_type)
5782 73 : vec_dest = vect_create_destination_var (scalar_dest,
5783 : widen_or_narrow_float_p
5784 : ? vectype_out : cvt_type);
5785 :
5786 22631 : switch (modifier)
5787 : {
5788 3964 : case NONE:
5789 3964 : vect_get_vec_defs (vinfo, slp_node, op0, &vec_oprnds0);
5790 : /* vec_dest is intermediate type operand when multi_step_cvt. */
5791 3964 : if (multi_step_cvt)
5792 : {
5793 21 : cvt_op = vec_dest;
5794 21 : vec_dest = vec_dsts[0];
5795 : }
5796 :
5797 8304 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5798 : {
5799 : /* Arguments are ready, create the new vector stmt. */
5800 4340 : gimple* new_stmt;
5801 4340 : if (multi_step_cvt)
5802 : {
5803 21 : gcc_assert (multi_step_cvt == 1);
5804 21 : new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5805 21 : new_temp = make_ssa_name (cvt_op, new_stmt);
5806 21 : gimple_assign_set_lhs (new_stmt, new_temp);
5807 21 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5808 21 : vop0 = new_temp;
5809 : }
5810 4340 : new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5811 4340 : new_temp = make_ssa_name (vec_dest, new_stmt);
5812 4340 : gimple_set_lhs (new_stmt, new_temp);
5813 4340 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5814 :
5815 4340 : slp_node->push_vec_def (new_stmt);
5816 : }
5817 : break;
5818 :
5819 9658 : case WIDEN:
5820 : /* In case the vectorization factor (VF) is bigger than the number
5821 : of elements that we can fit in a vectype (nunits), we have to
5822 : generate more than one vector stmt - i.e - we need to "unroll"
5823 : the vector stmt by a factor VF/nunits. */
5824 9658 : vect_get_vec_defs (vinfo, slp_node, op0, &vec_oprnds0,
5825 9658 : code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5826 : &vec_oprnds1);
5827 9658 : if (code == WIDEN_LSHIFT_EXPR)
5828 : {
5829 0 : int oprnds_size = vec_oprnds0.length ();
5830 0 : vec_oprnds1.create (oprnds_size);
5831 0 : for (i = 0; i < oprnds_size; ++i)
5832 0 : vec_oprnds1.quick_push (op1);
5833 : }
5834 : /* Arguments are ready. Create the new vector stmts. */
5835 20951 : for (i = multi_step_cvt; i >= 0; i--)
5836 : {
5837 11293 : tree this_dest = vec_dsts[i];
5838 11293 : code_helper c1 = code1, c2 = code2;
5839 11293 : if (i == 0 && codecvt2 != ERROR_MARK)
5840 : {
5841 48 : c1 = codecvt1;
5842 48 : c2 = codecvt2;
5843 : }
5844 11293 : if (known_eq (nunits_out, nunits_in))
5845 14 : vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5846 : stmt_info, this_dest, gsi, c1,
5847 : op_type);
5848 : else
5849 11279 : vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5850 : &vec_oprnds1, stmt_info,
5851 : this_dest, gsi,
5852 : c1, c2, op_type);
5853 : }
5854 :
5855 36700 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5856 : {
5857 27042 : gimple *new_stmt;
5858 27042 : if (cvt_type)
5859 : {
5860 120 : new_temp = make_ssa_name (vec_dest);
5861 120 : new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5862 120 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5863 : }
5864 : else
5865 26922 : new_stmt = SSA_NAME_DEF_STMT (vop0);
5866 :
5867 27042 : slp_node->push_vec_def (new_stmt);
5868 : }
5869 : break;
5870 :
5871 9009 : case NARROW_SRC:
5872 9009 : case NARROW_DST:
5873 : /* In case the vectorization factor (VF) is bigger than the number
5874 : of elements that we can fit in a vectype (nunits), we have to
5875 : generate more than one vector stmt - i.e - we need to "unroll"
5876 : the vector stmt by a factor VF/nunits. */
5877 9009 : vect_get_vec_defs (vinfo, slp_node, op0, &vec_oprnds0);
5878 : /* Arguments are ready. Create the new vector stmts. */
5879 9009 : if (cvt_type && modifier == NARROW_DST)
5880 153 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5881 : {
5882 124 : new_temp = make_ssa_name (vec_dest);
5883 124 : gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5884 124 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5885 124 : vec_oprnds0[i] = new_temp;
5886 : }
5887 :
5888 9009 : vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5889 : multi_step_cvt,
5890 : stmt_info, vec_dsts, gsi,
5891 : slp_node, code1,
5892 : modifier == NARROW_SRC);
5893 : /* After demoting op0 to cvt_type, convert it to dest. */
5894 9009 : if (cvt_type && code == FLOAT_EXPR)
5895 : {
5896 10 : for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5897 : {
5898 : /* Arguments are ready, create the new vector stmt. */
5899 5 : gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5900 5 : gimple *new_stmt
5901 5 : = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5902 5 : new_temp = make_ssa_name (vec_dest, new_stmt);
5903 5 : gimple_set_lhs (new_stmt, new_temp);
5904 5 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5905 :
5906 : /* This is the last step of the conversion sequence. Store the
5907 : vectors in SLP_NODE or in vector info of the scalar statement
5908 : (or in STMT_VINFO_RELATED_STMT chain). */
5909 5 : slp_node->push_vec_def (new_stmt);
5910 : }
5911 : }
5912 : break;
5913 : }
5914 :
5915 22631 : vec_oprnds0.release ();
5916 22631 : vec_oprnds1.release ();
5917 22631 : interm_types.release ();
5918 :
5919 22631 : return true;
5920 148848 : }
5921 :
5922 : /* Return true if we can assume from the scalar form of STMT_INFO that
5923 : neither the scalar nor the vector forms will generate code. STMT_INFO
5924 : is known not to involve a data reference. */
5925 :
5926 : bool
5927 1009190 : vect_nop_conversion_p (stmt_vec_info stmt_info)
5928 : {
5929 1009190 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5930 744937 : if (!stmt)
5931 : return false;
5932 :
5933 744937 : tree lhs = gimple_assign_lhs (stmt);
5934 744937 : tree_code code = gimple_assign_rhs_code (stmt);
5935 744937 : tree rhs = gimple_assign_rhs1 (stmt);
5936 :
5937 744937 : if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5938 : return true;
5939 :
5940 742644 : if (CONVERT_EXPR_CODE_P (code))
5941 192070 : return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5942 :
5943 : return false;
5944 : }
5945 :
5946 : /* Function vectorizable_assignment.
5947 :
5948 : Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5949 : If COST_VEC is passed, calculate costs but don't change anything,
5950 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
5951 : it, and insert it at GSI.
5952 : Return true if STMT_INFO is vectorizable in this way. */
5953 :
5954 : static bool
5955 1869638 : vectorizable_assignment (vec_info *vinfo,
5956 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5957 : slp_tree slp_node,
5958 : stmt_vector_for_cost *cost_vec)
5959 : {
5960 1869638 : tree vec_dest;
5961 1869638 : tree scalar_dest;
5962 1869638 : tree op;
5963 1869638 : tree new_temp;
5964 1869638 : enum vect_def_type dt[1] = {vect_unknown_def_type};
5965 1869638 : int i;
5966 1869638 : vec<tree> vec_oprnds = vNULL;
5967 1869638 : tree vop;
5968 1869638 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5969 1869638 : enum tree_code code;
5970 1869638 : tree vectype_in;
5971 :
5972 1869638 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5973 : return false;
5974 :
5975 1869638 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5976 194531 : && cost_vec)
5977 : return false;
5978 :
5979 : /* Is vectorizable assignment? */
5980 3405346 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5981 1608152 : if (!stmt)
5982 : return false;
5983 :
5984 1608152 : scalar_dest = gimple_assign_lhs (stmt);
5985 1608152 : if (TREE_CODE (scalar_dest) != SSA_NAME)
5986 : return false;
5987 :
5988 836992 : if (STMT_VINFO_DATA_REF (stmt_info))
5989 : return false;
5990 :
5991 358779 : code = gimple_assign_rhs_code (stmt);
5992 358779 : if (!(gimple_assign_single_p (stmt)
5993 357427 : || code == PAREN_EXPR
5994 356369 : || CONVERT_EXPR_CODE_P (code)))
5995 : return false;
5996 :
5997 82561 : tree vectype = SLP_TREE_VECTYPE (slp_node);
5998 82561 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5999 :
6000 82561 : slp_tree slp_op;
6001 82561 : if (!vect_is_simple_use (vinfo, slp_node, 0, &op, &slp_op,
6002 : &dt[0], &vectype_in))
6003 : {
6004 0 : if (dump_enabled_p ())
6005 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6006 : "use not simple.\n");
6007 0 : return false;
6008 : }
6009 82561 : if (!vectype_in)
6010 17726 : vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
6011 :
6012 : /* We can handle VIEW_CONVERT conversions that do not change the number
6013 : of elements or the vector size or other conversions when the component
6014 : types are nop-convertible. */
6015 82561 : if (!vectype_in
6016 82289 : || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
6017 75575 : || (code == VIEW_CONVERT_EXPR
6018 2448 : && maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
6019 2448 : GET_MODE_SIZE (TYPE_MODE (vectype_in))))
6020 158136 : || (CONVERT_EXPR_CODE_P (code)
6021 73197 : && !tree_nop_conversion_p (TREE_TYPE (vectype),
6022 73197 : TREE_TYPE (vectype_in))))
6023 9849 : return false;
6024 :
6025 218058 : if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
6026 : {
6027 2 : if (dump_enabled_p ())
6028 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6029 : "can't convert between boolean and non "
6030 0 : "boolean vectors %T\n", TREE_TYPE (op));
6031 :
6032 2 : return false;
6033 : }
6034 :
6035 : /* We do not handle bit-precision changes. */
6036 72710 : if ((CONVERT_EXPR_CODE_P (code)
6037 2378 : || code == VIEW_CONVERT_EXPR)
6038 71556 : && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6039 70307 : && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6040 71250 : || (INTEGRAL_TYPE_P (TREE_TYPE (op))
6041 66647 : && !type_has_mode_precision_p (TREE_TYPE (op))))
6042 : /* But a conversion that does not change the bit-pattern is ok. */
6043 73408 : && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6044 698 : && INTEGRAL_TYPE_P (TREE_TYPE (op))
6045 698 : && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
6046 698 : > TYPE_PRECISION (TREE_TYPE (op)))
6047 392 : && TYPE_UNSIGNED (TREE_TYPE (op)))
6048 322 : || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
6049 322 : == TYPE_PRECISION (TREE_TYPE (op))))))
6050 : {
6051 266 : if (dump_enabled_p ())
6052 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6053 : "type conversion to/from bit-precision "
6054 : "unsupported.\n");
6055 266 : return false;
6056 : }
6057 :
6058 72444 : if (cost_vec) /* transformation not required. */
6059 : {
6060 57456 : if (!vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6061 : {
6062 0 : if (dump_enabled_p ())
6063 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6064 : "incompatible vector types for invariants\n");
6065 0 : return false;
6066 : }
6067 57456 : SLP_TREE_TYPE (slp_node) = assignment_vec_info_type;
6068 57456 : DUMP_VECT_SCOPE ("vectorizable_assignment");
6069 57456 : if (!vect_nop_conversion_p (stmt_info))
6070 825 : vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
6071 57456 : return true;
6072 : }
6073 :
6074 : /* Transform. */
6075 14988 : if (dump_enabled_p ())
6076 3591 : dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6077 :
6078 : /* Handle def. */
6079 14988 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6080 :
6081 : /* Handle use. */
6082 14988 : vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds);
6083 :
6084 : /* Arguments are ready. create the new vector stmt. */
6085 34086 : FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6086 : {
6087 19098 : if (CONVERT_EXPR_CODE_P (code)
6088 683 : || code == VIEW_CONVERT_EXPR)
6089 18549 : vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6090 19098 : gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6091 19098 : new_temp = make_ssa_name (vec_dest, new_stmt);
6092 19098 : gimple_assign_set_lhs (new_stmt, new_temp);
6093 19098 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6094 19098 : slp_node->push_vec_def (new_stmt);
6095 : }
6096 :
6097 14988 : vec_oprnds.release ();
6098 14988 : return true;
6099 : }
6100 :
6101 :
6102 : /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6103 : either as shift by a scalar or by a vector. */
6104 :
6105 : bool
6106 278264 : vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6107 : {
6108 278264 : optab optab;
6109 278264 : tree vectype;
6110 :
6111 278264 : vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6112 278264 : if (!vectype)
6113 : return false;
6114 :
6115 278264 : optab = optab_for_tree_code (code, vectype, optab_scalar);
6116 278264 : if (optab && can_implement_p (optab, TYPE_MODE (vectype)))
6117 : return true;
6118 :
6119 245089 : optab = optab_for_tree_code (code, vectype, optab_vector);
6120 245089 : if (optab && can_implement_p (optab, TYPE_MODE (vectype)))
6121 : return true;
6122 :
6123 : return false;
6124 : }
6125 :
6126 :
6127 : /* Function vectorizable_shift.
6128 :
6129 : Check if STMT_INFO performs a shift operation that can be vectorized.
6130 : If COST_VEC is passed, calculate costs but don't change anything,
6131 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
6132 : it, and insert it at GSI.
6133 : Return true if STMT_INFO is vectorizable in this way. */
6134 :
6135 : static bool
6136 650919 : vectorizable_shift (vec_info *vinfo,
6137 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6138 : slp_tree slp_node,
6139 : stmt_vector_for_cost *cost_vec)
6140 : {
6141 650919 : tree vec_dest;
6142 650919 : tree scalar_dest;
6143 650919 : tree op0, op1 = NULL;
6144 650919 : tree vec_oprnd1 = NULL_TREE;
6145 650919 : tree vectype;
6146 650919 : enum tree_code code;
6147 650919 : machine_mode vec_mode;
6148 650919 : tree new_temp;
6149 650919 : optab optab;
6150 650919 : int icode;
6151 650919 : machine_mode optab_op2_mode;
6152 650919 : enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6153 650919 : poly_uint64 nunits_in;
6154 650919 : poly_uint64 nunits_out;
6155 650919 : tree vectype_out;
6156 650919 : tree op1_vectype;
6157 650919 : int i;
6158 650919 : vec<tree> vec_oprnds0 = vNULL;
6159 650919 : vec<tree> vec_oprnds1 = vNULL;
6160 650919 : tree vop0, vop1;
6161 650919 : unsigned int k;
6162 650919 : bool scalar_shift_arg = true;
6163 650919 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6164 650919 : bool incompatible_op1_vectype_p = false;
6165 :
6166 650919 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6167 : return false;
6168 :
6169 650919 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6170 194531 : && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6171 193104 : && cost_vec)
6172 : return false;
6173 :
6174 : /* Is STMT a vectorizable binary/unary operation? */
6175 989009 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6176 391206 : if (!stmt)
6177 : return false;
6178 :
6179 391206 : if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6180 : return false;
6181 :
6182 390724 : code = gimple_assign_rhs_code (stmt);
6183 :
6184 390724 : if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6185 : || code == RROTATE_EXPR))
6186 : return false;
6187 :
6188 58438 : scalar_dest = gimple_assign_lhs (stmt);
6189 58438 : vectype_out = SLP_TREE_VECTYPE (slp_node);
6190 58438 : if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6191 : {
6192 0 : if (dump_enabled_p ())
6193 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6194 : "bit-precision shifts not supported.\n");
6195 0 : return false;
6196 : }
6197 :
6198 58438 : slp_tree slp_op0;
6199 58438 : if (!vect_is_simple_use (vinfo, slp_node,
6200 : 0, &op0, &slp_op0, &dt[0], &vectype))
6201 : {
6202 0 : if (dump_enabled_p ())
6203 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6204 : "use not simple.\n");
6205 0 : return false;
6206 : }
6207 : /* If op0 is an external or constant def, infer the vector type
6208 : from the scalar type. */
6209 58438 : if (!vectype)
6210 12169 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6211 58438 : if (!cost_vec)
6212 7726 : gcc_assert (vectype);
6213 58438 : if (!vectype)
6214 : {
6215 0 : if (dump_enabled_p ())
6216 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6217 : "no vectype for scalar type\n");
6218 0 : return false;
6219 : }
6220 :
6221 58438 : nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6222 58438 : nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6223 58438 : if (maybe_ne (nunits_out, nunits_in))
6224 : return false;
6225 :
6226 58438 : stmt_vec_info op1_def_stmt_info;
6227 58438 : slp_tree slp_op1;
6228 58438 : if (!vect_is_simple_use (vinfo, slp_node, 1, &op1, &slp_op1,
6229 : &dt[1], &op1_vectype, &op1_def_stmt_info))
6230 : {
6231 0 : if (dump_enabled_p ())
6232 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6233 : "use not simple.\n");
6234 0 : return false;
6235 : }
6236 :
6237 : /* Determine whether the shift amount is a vector, or scalar. If the
6238 : shift/rotate amount is a vector, use the vector/vector shift optabs. */
6239 :
6240 58438 : if ((dt[1] == vect_internal_def
6241 58438 : || dt[1] == vect_induction_def
6242 45533 : || dt[1] == vect_nested_cycle)
6243 12923 : && SLP_TREE_LANES (slp_node) == 1)
6244 : scalar_shift_arg = false;
6245 45570 : else if (dt[1] == vect_constant_def
6246 : || dt[1] == vect_external_def
6247 45570 : || dt[1] == vect_internal_def)
6248 : {
6249 : /* In SLP, need to check whether the shift count is the same,
6250 : in loops if it is a constant or invariant, it is always
6251 : a scalar shift. */
6252 45564 : vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6253 45564 : stmt_vec_info slpstmt_info;
6254 :
6255 122160 : FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6256 76596 : if (slpstmt_info)
6257 : {
6258 76596 : gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6259 153192 : if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6260 76596 : scalar_shift_arg = false;
6261 : }
6262 :
6263 : /* For internal SLP defs we have to make sure we see scalar stmts
6264 : for all vector elements.
6265 : ??? For different vectors we could resort to a different
6266 : scalar shift operand but code-generation below simply always
6267 : takes the first. */
6268 45564 : if (dt[1] == vect_internal_def
6269 45613 : && maybe_ne (nunits_out * vect_get_num_copies (vinfo, slp_node),
6270 49 : stmts.length ()))
6271 : scalar_shift_arg = false;
6272 :
6273 : /* If the shift amount is computed by a pattern stmt we cannot
6274 : use the scalar amount directly thus give up and use a vector
6275 : shift. */
6276 45564 : if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6277 : scalar_shift_arg = false;
6278 : }
6279 : else
6280 : {
6281 6 : if (dump_enabled_p ())
6282 6 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6283 : "operand mode requires invariant argument.\n");
6284 6 : return false;
6285 : }
6286 :
6287 : /* Vector shifted by vector. */
6288 58470 : bool was_scalar_shift_arg = scalar_shift_arg;
6289 45555 : if (!scalar_shift_arg)
6290 : {
6291 12915 : optab = optab_for_tree_code (code, vectype, optab_vector);
6292 12915 : if (dump_enabled_p ())
6293 1196 : dump_printf_loc (MSG_NOTE, vect_location,
6294 : "vector/vector shift/rotate found.\n");
6295 :
6296 12915 : if (!op1_vectype)
6297 15 : op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6298 : slp_op1);
6299 12915 : incompatible_op1_vectype_p
6300 25830 : = (op1_vectype == NULL_TREE
6301 12915 : || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6302 12915 : TYPE_VECTOR_SUBPARTS (vectype))
6303 25828 : || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6304 12908 : if (incompatible_op1_vectype_p
6305 7 : && (SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6306 1 : || slp_op1->refcnt != 1))
6307 : {
6308 6 : if (dump_enabled_p ())
6309 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6310 : "unusable type for last operand in"
6311 : " vector/vector shift/rotate.\n");
6312 6 : return false;
6313 : }
6314 : }
6315 : /* See if the machine has a vector shifted by scalar insn and if not
6316 : then see if it has a vector shifted by vector insn. */
6317 : else
6318 : {
6319 45517 : optab = optab_for_tree_code (code, vectype, optab_scalar);
6320 45517 : if (optab
6321 45517 : && can_implement_p (optab, TYPE_MODE (vectype)))
6322 : {
6323 45517 : if (dump_enabled_p ())
6324 4914 : dump_printf_loc (MSG_NOTE, vect_location,
6325 : "vector/scalar shift/rotate found.\n");
6326 : }
6327 : else
6328 : {
6329 0 : optab = optab_for_tree_code (code, vectype, optab_vector);
6330 0 : if (optab
6331 0 : && can_implement_p (optab, TYPE_MODE (vectype)))
6332 : {
6333 0 : scalar_shift_arg = false;
6334 :
6335 0 : if (dump_enabled_p ())
6336 0 : dump_printf_loc (MSG_NOTE, vect_location,
6337 : "vector/vector shift/rotate found.\n");
6338 :
6339 0 : if (!op1_vectype)
6340 0 : op1_vectype = get_vectype_for_scalar_type (vinfo,
6341 0 : TREE_TYPE (op1),
6342 : slp_op1);
6343 :
6344 : /* Unlike the other binary operators, shifts/rotates have
6345 : the rhs being int, instead of the same type as the lhs,
6346 : so make sure the scalar is the right type if we are
6347 : dealing with vectors of long long/long/short/char. */
6348 0 : incompatible_op1_vectype_p
6349 0 : = (!op1_vectype
6350 0 : || !tree_nop_conversion_p (TREE_TYPE (vectype),
6351 0 : TREE_TYPE (op1)));
6352 0 : if (incompatible_op1_vectype_p
6353 0 : && dt[1] == vect_internal_def)
6354 : {
6355 0 : if (dump_enabled_p ())
6356 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6357 : "unusable type for last operand in"
6358 : " vector/vector shift/rotate.\n");
6359 0 : return false;
6360 : }
6361 : }
6362 : }
6363 : }
6364 :
6365 : /* Supportable by target? */
6366 58426 : if (!optab)
6367 : {
6368 0 : if (dump_enabled_p ())
6369 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6370 : "no shift optab for %s and %T.\n",
6371 : get_tree_code_name (code), vectype);
6372 0 : return false;
6373 : }
6374 58426 : vec_mode = TYPE_MODE (vectype);
6375 58426 : icode = (int) optab_handler (optab, vec_mode);
6376 58426 : if (icode == CODE_FOR_nothing)
6377 : {
6378 5310 : if (dump_enabled_p ())
6379 886 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6380 : "shift op not supported by target.\n");
6381 5310 : return false;
6382 : }
6383 : /* vector lowering cannot optimize vector shifts using word arithmetic. */
6384 53116 : if (vect_emulated_vector_p (vectype))
6385 : return false;
6386 :
6387 53116 : if (cost_vec) /* transformation not required. */
6388 : {
6389 45390 : if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6390 45390 : || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6391 5462 : && (!incompatible_op1_vectype_p
6392 1 : || dt[1] == vect_constant_def)
6393 5462 : && !vect_maybe_update_slp_op_vectype
6394 5462 : (slp_op1,
6395 : incompatible_op1_vectype_p ? vectype : op1_vectype)))
6396 : {
6397 0 : if (dump_enabled_p ())
6398 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6399 : "incompatible vector types for invariants\n");
6400 0 : return false;
6401 : }
6402 : /* Now adjust the constant shift amount in place. */
6403 45390 : if (incompatible_op1_vectype_p
6404 1 : && dt[1] == vect_constant_def)
6405 4 : for (unsigned i = 0;
6406 5 : i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6407 : {
6408 4 : SLP_TREE_SCALAR_OPS (slp_op1)[i]
6409 4 : = fold_convert (TREE_TYPE (vectype),
6410 : SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6411 4 : gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6412 : == INTEGER_CST));
6413 : }
6414 45390 : SLP_TREE_TYPE (slp_node) = shift_vec_info_type;
6415 45390 : DUMP_VECT_SCOPE ("vectorizable_shift");
6416 45390 : vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
6417 45390 : return true;
6418 : }
6419 :
6420 : /* Transform. */
6421 :
6422 7726 : if (dump_enabled_p ())
6423 2014 : dump_printf_loc (MSG_NOTE, vect_location,
6424 : "transform binary/unary operation.\n");
6425 :
6426 : /* Handle def. */
6427 7726 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6428 :
6429 7726 : unsigned nvectors = vect_get_num_copies (vinfo, slp_node);
6430 7726 : if (scalar_shift_arg && dt[1] != vect_internal_def)
6431 : {
6432 : /* Vector shl and shr insn patterns can be defined with scalar
6433 : operand 2 (shift operand). In this case, use constant or loop
6434 : invariant op1 directly, without extending it to vector mode
6435 : first. */
6436 5569 : optab_op2_mode = insn_data[icode].operand[2].mode;
6437 5569 : if (!VECTOR_MODE_P (optab_op2_mode))
6438 : {
6439 5569 : if (dump_enabled_p ())
6440 1899 : dump_printf_loc (MSG_NOTE, vect_location,
6441 : "operand 1 using scalar mode.\n");
6442 5569 : vec_oprnd1 = op1;
6443 5569 : vec_oprnds1.create (nvectors);
6444 5569 : vec_oprnds1.quick_push (vec_oprnd1);
6445 : /* Store vec_oprnd1 for every vector stmt to be created.
6446 : We check during the analysis that all the shift arguments
6447 : are the same.
6448 : TODO: Allow different constants for different vector
6449 : stmts generated for an SLP instance. */
6450 13161 : for (k = 0; k < nvectors - 1; k++)
6451 2023 : vec_oprnds1.quick_push (vec_oprnd1);
6452 : }
6453 : }
6454 2157 : else if (!scalar_shift_arg && incompatible_op1_vectype_p)
6455 : {
6456 0 : if (was_scalar_shift_arg)
6457 : {
6458 : /* If the argument was the same in all lanes create the
6459 : correctly typed vector shift amount directly. Note
6460 : we made SLP scheduling think we use the original scalars,
6461 : so place the compensation code next to the shift which
6462 : is conservative. See PR119640 where it otherwise breaks. */
6463 0 : op1 = fold_convert (TREE_TYPE (vectype), op1);
6464 0 : op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6465 : gsi);
6466 0 : vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6467 : gsi);
6468 0 : vec_oprnds1.create (nvectors);
6469 0 : for (k = 0; k < nvectors; k++)
6470 0 : vec_oprnds1.quick_push (vec_oprnd1);
6471 : }
6472 0 : else if (dt[1] == vect_constant_def)
6473 : /* The constant shift amount has been adjusted in place. */
6474 : ;
6475 : else
6476 0 : gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6477 : }
6478 :
6479 : /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6480 : (a special case for certain kind of vector shifts); otherwise,
6481 : operand 1 should be of a vector type (the usual case). */
6482 2157 : vect_get_vec_defs (vinfo, slp_node,
6483 : op0, &vec_oprnds0,
6484 7726 : vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6485 :
6486 : /* Arguments are ready. Create the new vector stmt. */
6487 20977 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6488 : {
6489 : /* For internal defs where we need to use a scalar shift arg
6490 : extract the first lane. */
6491 13251 : if (scalar_shift_arg && dt[1] == vect_internal_def)
6492 : {
6493 10 : vop1 = vec_oprnds1[0];
6494 10 : new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6495 10 : gassign *new_stmt
6496 10 : = gimple_build_assign (new_temp,
6497 10 : build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6498 : vop1,
6499 10 : TYPE_SIZE (TREE_TYPE (new_temp)),
6500 : bitsize_zero_node));
6501 10 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6502 10 : vop1 = new_temp;
6503 10 : }
6504 : else
6505 13241 : vop1 = vec_oprnds1[i];
6506 13251 : gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6507 13251 : new_temp = make_ssa_name (vec_dest, new_stmt);
6508 13251 : gimple_assign_set_lhs (new_stmt, new_temp);
6509 13251 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6510 13251 : slp_node->push_vec_def (new_stmt);
6511 : }
6512 :
6513 7726 : vec_oprnds0.release ();
6514 7726 : vec_oprnds1.release ();
6515 :
6516 7726 : return true;
6517 : }
6518 :
6519 : /* Function vectorizable_operation.
6520 :
6521 : Check if STMT_INFO performs a binary, unary or ternary operation that can
6522 : be vectorized.
6523 : If COST_VEC is passed, calculate costs but don't change anything,
6524 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
6525 : it, and insert it at GSI.
6526 : Return true if STMT_INFO is vectorizable in this way. */
6527 :
6528 : static bool
6529 2388712 : vectorizable_operation (vec_info *vinfo,
6530 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6531 : slp_tree slp_node,
6532 : stmt_vector_for_cost *cost_vec)
6533 : {
6534 2388712 : tree vec_dest;
6535 2388712 : tree scalar_dest;
6536 2388712 : tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6537 2388712 : tree vectype;
6538 2388712 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6539 2388712 : enum tree_code code, orig_code;
6540 2388712 : machine_mode vec_mode;
6541 2388712 : tree new_temp;
6542 2388712 : int op_type;
6543 2388712 : optab optab;
6544 2388712 : bool target_support_p;
6545 2388712 : enum vect_def_type dt[3]
6546 : = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6547 2388712 : poly_uint64 nunits_in;
6548 2388712 : poly_uint64 nunits_out;
6549 2388712 : tree vectype_out;
6550 2388712 : int i;
6551 2388712 : vec<tree> vec_oprnds0 = vNULL;
6552 2388712 : vec<tree> vec_oprnds1 = vNULL;
6553 2388712 : vec<tree> vec_oprnds2 = vNULL;
6554 2388712 : tree vop0, vop1, vop2;
6555 2388712 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6556 :
6557 2388712 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6558 : return false;
6559 :
6560 2388712 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6561 194531 : && cost_vec)
6562 : return false;
6563 :
6564 : /* Is STMT a vectorizable binary/unary operation? */
6565 3981876 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6566 2127226 : if (!stmt)
6567 : return false;
6568 :
6569 : /* Loads and stores are handled in vectorizable_{load,store}. */
6570 2127226 : if (STMT_VINFO_DATA_REF (stmt_info))
6571 : return false;
6572 :
6573 877853 : orig_code = code = gimple_assign_rhs_code (stmt);
6574 :
6575 : /* Shifts are handled in vectorizable_shift. */
6576 877853 : if (code == LSHIFT_EXPR
6577 : || code == RSHIFT_EXPR
6578 : || code == LROTATE_EXPR
6579 877853 : || code == RROTATE_EXPR)
6580 : return false;
6581 :
6582 : /* Comparisons are handled in vectorizable_comparison. */
6583 827141 : if (TREE_CODE_CLASS (code) == tcc_comparison)
6584 : return false;
6585 :
6586 : /* Conditions are handled in vectorizable_condition. */
6587 651176 : if (code == COND_EXPR)
6588 : return false;
6589 :
6590 : /* For pointer addition and subtraction, we should use the normal
6591 : plus and minus for the vector operation. */
6592 631816 : if (code == POINTER_PLUS_EXPR)
6593 : code = PLUS_EXPR;
6594 612832 : if (code == POINTER_DIFF_EXPR)
6595 975 : code = MINUS_EXPR;
6596 :
6597 : /* Support only unary or binary operations. */
6598 631816 : op_type = TREE_CODE_LENGTH (code);
6599 631816 : if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6600 : {
6601 0 : if (dump_enabled_p ())
6602 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6603 : "num. args = %d (not unary/binary/ternary op).\n",
6604 : op_type);
6605 0 : return false;
6606 : }
6607 :
6608 631816 : scalar_dest = gimple_assign_lhs (stmt);
6609 631816 : vectype_out = SLP_TREE_VECTYPE (slp_node);
6610 :
6611 : /* Most operations cannot handle bit-precision types without extra
6612 : truncations. */
6613 631816 : bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6614 623213 : if (!mask_op_p
6615 623213 : && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6616 : /* Exception are bitwise binary operations. */
6617 : && code != BIT_IOR_EXPR
6618 1560 : && code != BIT_XOR_EXPR
6619 1235 : && code != BIT_AND_EXPR)
6620 : {
6621 997 : if (dump_enabled_p ())
6622 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6623 : "bit-precision arithmetic not supported.\n");
6624 997 : return false;
6625 : }
6626 :
6627 630819 : slp_tree slp_op0;
6628 630819 : if (!vect_is_simple_use (vinfo, slp_node,
6629 : 0, &op0, &slp_op0, &dt[0], &vectype))
6630 : {
6631 0 : if (dump_enabled_p ())
6632 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6633 : "use not simple.\n");
6634 0 : return false;
6635 : }
6636 630819 : bool is_invariant = (dt[0] == vect_external_def
6637 630819 : || dt[0] == vect_constant_def);
6638 : /* If op0 is an external or constant def, infer the vector type
6639 : from the scalar type. */
6640 630819 : if (!vectype)
6641 : {
6642 : /* For boolean type we cannot determine vectype by
6643 : invariant value (don't know whether it is a vector
6644 : of booleans or vector of integers). We use output
6645 : vectype because operations on boolean don't change
6646 : type. */
6647 70597 : if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6648 : {
6649 1125 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6650 : {
6651 233 : if (dump_enabled_p ())
6652 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6653 : "not supported operation on bool value.\n");
6654 233 : return false;
6655 : }
6656 892 : vectype = vectype_out;
6657 : }
6658 : else
6659 69472 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6660 : slp_node);
6661 : }
6662 630586 : if (!cost_vec)
6663 115343 : gcc_assert (vectype);
6664 630586 : if (!vectype)
6665 : {
6666 284 : if (dump_enabled_p ())
6667 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6668 : "no vectype for scalar type %T\n",
6669 0 : TREE_TYPE (op0));
6670 :
6671 284 : return false;
6672 : }
6673 :
6674 630302 : nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6675 630302 : nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6676 630302 : if (maybe_ne (nunits_out, nunits_in)
6677 630302 : || !tree_nop_conversion_p (TREE_TYPE (vectype_out), TREE_TYPE (vectype)))
6678 10685 : return false;
6679 :
6680 619617 : tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6681 619617 : slp_tree slp_op1 = NULL, slp_op2 = NULL;
6682 619617 : if (op_type == binary_op || op_type == ternary_op)
6683 : {
6684 553319 : if (!vect_is_simple_use (vinfo, slp_node,
6685 : 1, &op1, &slp_op1, &dt[1], &vectype2))
6686 : {
6687 0 : if (dump_enabled_p ())
6688 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6689 : "use not simple.\n");
6690 0 : return false;
6691 : }
6692 553319 : is_invariant &= (dt[1] == vect_external_def
6693 553319 : || dt[1] == vect_constant_def);
6694 553319 : if (vectype2
6695 920378 : && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2))
6696 367059 : || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6697 367059 : TREE_TYPE (vectype2))))
6698 4 : return false;
6699 : }
6700 619613 : if (op_type == ternary_op)
6701 : {
6702 0 : if (!vect_is_simple_use (vinfo, slp_node,
6703 : 2, &op2, &slp_op2, &dt[2], &vectype3))
6704 : {
6705 0 : if (dump_enabled_p ())
6706 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6707 : "use not simple.\n");
6708 0 : return false;
6709 : }
6710 0 : is_invariant &= (dt[2] == vect_external_def
6711 0 : || dt[2] == vect_constant_def);
6712 0 : if (vectype3
6713 0 : && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3))
6714 0 : || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6715 0 : TREE_TYPE (vectype3))))
6716 0 : return false;
6717 : }
6718 :
6719 : /* Multiple types in SLP are handled by creating the appropriate number of
6720 : vectorized stmts for each SLP node. */
6721 619613 : auto vec_num = vect_get_num_copies (vinfo, slp_node);
6722 :
6723 : /* Reject attempts to combine mask types with nonmask types, e.g. if
6724 : we have an AND between a (nonmask) boolean loaded from memory and
6725 : a (mask) boolean result of a comparison.
6726 :
6727 : TODO: We could easily fix these cases up using pattern statements. */
6728 619613 : if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6729 980890 : || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6730 1239226 : || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6731 : {
6732 0 : if (dump_enabled_p ())
6733 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6734 : "mixed mask and nonmask vector types\n");
6735 0 : return false;
6736 : }
6737 :
6738 : /* Supportable by target? */
6739 :
6740 619613 : vec_mode = TYPE_MODE (vectype);
6741 619613 : optab = optab_for_tree_code (code, vectype, optab_default);
6742 619613 : if (!optab)
6743 : {
6744 56893 : if (dump_enabled_p ())
6745 5801 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6746 : "no optab for %s and %T.\n",
6747 : get_tree_code_name (code), vectype);
6748 56893 : return false;
6749 : }
6750 562720 : target_support_p = can_implement_p (optab, vec_mode);
6751 :
6752 562720 : bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6753 562720 : if (!target_support_p || using_emulated_vectors_p)
6754 : {
6755 29243 : if (dump_enabled_p ())
6756 1112 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6757 : "op not supported by target.\n");
6758 : /* When vec_mode is not a vector mode and we verified ops we
6759 : do not have to lower like AND are natively supported let
6760 : those through even when the mode isn't word_mode. For
6761 : ops we have to lower the lowering code assumes we are
6762 : dealing with word_mode. */
6763 58486 : if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
6764 29149 : || !GET_MODE_SIZE (vec_mode).is_constant ()
6765 29149 : || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6766 23787 : || !target_support_p)
6767 62128 : && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6768 : /* Check only during analysis. */
6769 40110 : || (cost_vec && !vect_can_vectorize_without_simd_p (code)))
6770 : {
6771 28479 : if (dump_enabled_p ())
6772 1112 : dump_printf (MSG_NOTE, "using word mode not possible.\n");
6773 28479 : return false;
6774 : }
6775 764 : if (dump_enabled_p ())
6776 0 : dump_printf_loc (MSG_NOTE, vect_location,
6777 : "proceeding using word mode.\n");
6778 : using_emulated_vectors_p = true;
6779 : }
6780 :
6781 534241 : int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
6782 534241 : vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6783 355665 : vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6784 534241 : internal_fn cond_fn = get_conditional_internal_fn (code);
6785 534241 : internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6786 :
6787 : /* If operating on inactive elements could generate spurious traps,
6788 : we need to restrict the operation to active lanes. Note that this
6789 : specifically doesn't apply to unhoisted invariants, since they
6790 : operate on the same value for every lane.
6791 :
6792 : Similarly, if this operation is part of a reduction, a fully-masked
6793 : loop should only change the active lanes of the reduction chain,
6794 : keeping the inactive lanes as-is. */
6795 506740 : bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6796 989703 : || reduc_idx >= 0);
6797 :
6798 534241 : if (cost_vec) /* transformation not required. */
6799 : {
6800 418898 : if (loop_vinfo
6801 251820 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6802 68338 : && mask_out_inactive)
6803 : {
6804 15258 : if (cond_len_fn != IFN_LAST
6805 15258 : && direct_internal_fn_supported_p (cond_len_fn, vectype,
6806 : OPTIMIZE_FOR_SPEED))
6807 0 : vect_record_loop_len (loop_vinfo, lens, vec_num, vectype,
6808 : 1);
6809 15258 : else if (cond_fn != IFN_LAST
6810 15258 : && direct_internal_fn_supported_p (cond_fn, vectype,
6811 : OPTIMIZE_FOR_SPEED))
6812 6873 : vect_record_loop_mask (loop_vinfo, masks, vec_num,
6813 : vectype, NULL);
6814 : else
6815 : {
6816 8385 : if (dump_enabled_p ())
6817 591 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6818 : "can't use a fully-masked loop because no"
6819 : " conditional operation is available.\n");
6820 8385 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6821 : }
6822 : }
6823 :
6824 : /* Put types on constant and invariant SLP children. */
6825 418898 : if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6826 418820 : || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6827 837617 : || !vect_maybe_update_slp_op_vectype (slp_op2, vectype))
6828 : {
6829 179 : if (dump_enabled_p ())
6830 3 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6831 : "incompatible vector types for invariants\n");
6832 179 : return false;
6833 : }
6834 :
6835 418719 : SLP_TREE_TYPE (slp_node) = op_vec_info_type;
6836 418719 : DUMP_VECT_SCOPE ("vectorizable_operation");
6837 418719 : vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
6838 418719 : if (using_emulated_vectors_p)
6839 : {
6840 : /* The above vect_model_simple_cost call handles constants
6841 : in the prologue and (mis-)costs one of the stmts as
6842 : vector stmt. See below for the actual lowering that will
6843 : be applied. */
6844 762 : unsigned n = vect_get_num_copies (vinfo, slp_node);
6845 762 : switch (code)
6846 : {
6847 269 : case PLUS_EXPR:
6848 269 : n *= 5;
6849 269 : break;
6850 466 : case MINUS_EXPR:
6851 466 : n *= 6;
6852 466 : break;
6853 0 : case NEGATE_EXPR:
6854 0 : n *= 4;
6855 0 : break;
6856 : default:
6857 : /* Bit operations do not have extra cost and are accounted
6858 : as vector stmt by vect_model_simple_cost. */
6859 : n = 0;
6860 : break;
6861 : }
6862 735 : if (n != 0)
6863 : {
6864 : /* We also need to materialize two large constants. */
6865 735 : record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6866 : 0, vect_prologue);
6867 735 : record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6868 : 0, vect_body);
6869 : }
6870 : }
6871 418719 : return true;
6872 : }
6873 :
6874 : /* Transform. */
6875 :
6876 115343 : if (dump_enabled_p ())
6877 16854 : dump_printf_loc (MSG_NOTE, vect_location,
6878 : "transform binary/unary operation.\n");
6879 :
6880 115343 : bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6881 103845 : bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6882 :
6883 : /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6884 : vectors with unsigned elements, but the result is signed. So, we
6885 : need to compute the MINUS_EXPR into vectype temporary and
6886 : VIEW_CONVERT_EXPR it into the final vectype_out result. */
6887 115343 : tree vec_cvt_dest = NULL_TREE;
6888 115343 : if (orig_code == POINTER_DIFF_EXPR)
6889 : {
6890 110 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6891 110 : vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6892 : }
6893 : /* For reduction operations with undefined overflow behavior make sure to
6894 : pun them to unsigned since we change the order of evaluation.
6895 : ??? Avoid for in-order reductions? */
6896 115233 : else if (arith_code_with_undefined_signed_overflow (orig_code)
6897 98788 : && ANY_INTEGRAL_TYPE_P (vectype)
6898 48630 : && TYPE_OVERFLOW_UNDEFINED (vectype)
6899 141505 : && SLP_TREE_REDUC_IDX (slp_node) != -1)
6900 : {
6901 2492 : gcc_assert (orig_code == PLUS_EXPR || orig_code == MINUS_EXPR
6902 : || orig_code == MULT_EXPR || orig_code == POINTER_PLUS_EXPR);
6903 2492 : vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6904 2492 : vectype = unsigned_type_for (vectype);
6905 2492 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6906 : }
6907 : /* Handle def. */
6908 : else
6909 112741 : vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6910 :
6911 115343 : vect_get_vec_defs (vinfo, slp_node,
6912 : op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6913 : /* Arguments are ready. Create the new vector stmt. */
6914 254789 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6915 : {
6916 139446 : gimple *new_stmt = NULL;
6917 278892 : vop1 = ((op_type == binary_op || op_type == ternary_op)
6918 139446 : ? vec_oprnds1[i] : NULL_TREE);
6919 139446 : vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6920 :
6921 139446 : if (vec_cvt_dest
6922 139446 : && !useless_type_conversion_p (vectype, TREE_TYPE (vop0)))
6923 : {
6924 2920 : new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop0);
6925 2920 : new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
6926 : new_temp);
6927 2920 : new_temp = make_ssa_name (vec_dest, new_stmt);
6928 2920 : gimple_assign_set_lhs (new_stmt, new_temp);
6929 2920 : vect_finish_stmt_generation (vinfo, stmt_info,
6930 : new_stmt, gsi);
6931 2920 : vop0 = new_temp;
6932 : }
6933 139446 : if (vop1
6934 136943 : && vec_cvt_dest
6935 142491 : && !useless_type_conversion_p (vectype, TREE_TYPE (vop1)))
6936 : {
6937 2920 : new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop1);
6938 2920 : new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
6939 : new_temp);
6940 2920 : new_temp = make_ssa_name (vec_dest, new_stmt);
6941 2920 : gimple_assign_set_lhs (new_stmt, new_temp);
6942 2920 : vect_finish_stmt_generation (vinfo, stmt_info,
6943 : new_stmt, gsi);
6944 2920 : vop1 = new_temp;
6945 : }
6946 139446 : if (vop2
6947 0 : && vec_cvt_dest
6948 139446 : && !useless_type_conversion_p (vectype, TREE_TYPE (vop2)))
6949 : {
6950 0 : new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop2);
6951 0 : new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
6952 : new_temp);
6953 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
6954 0 : gimple_assign_set_lhs (new_stmt, new_temp);
6955 0 : vect_finish_stmt_generation (vinfo, stmt_info,
6956 : new_stmt, gsi);
6957 0 : vop2 = new_temp;
6958 : }
6959 :
6960 139446 : if (using_emulated_vectors_p)
6961 : {
6962 : /* Lower the operation. This follows vector lowering. */
6963 2 : tree word_type = build_nonstandard_integer_type
6964 2 : (GET_MODE_BITSIZE (vec_mode).to_constant (), 1);
6965 2 : tree wvop0 = make_ssa_name (word_type);
6966 2 : new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6967 : build1 (VIEW_CONVERT_EXPR,
6968 : word_type, vop0));
6969 2 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6970 2 : tree wvop1 = NULL_TREE;
6971 2 : if (vop1)
6972 : {
6973 2 : wvop1 = make_ssa_name (word_type);
6974 2 : new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6975 : build1 (VIEW_CONVERT_EXPR,
6976 : word_type, vop1));
6977 2 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6978 : }
6979 :
6980 2 : tree result_low;
6981 2 : if (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6982 : {
6983 1 : unsigned int width = vector_element_bits (vectype);
6984 1 : tree inner_type = TREE_TYPE (vectype);
6985 1 : HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6986 1 : tree low_bits
6987 1 : = build_replicated_int_cst (word_type, width, max >> 1);
6988 1 : tree high_bits
6989 2 : = build_replicated_int_cst (word_type,
6990 1 : width, max & ~(max >> 1));
6991 1 : tree signs;
6992 1 : if (code == PLUS_EXPR || code == MINUS_EXPR)
6993 : {
6994 1 : signs = make_ssa_name (word_type);
6995 1 : new_stmt = gimple_build_assign (signs,
6996 : BIT_XOR_EXPR, wvop0, wvop1);
6997 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6998 1 : tree b_low = make_ssa_name (word_type);
6999 1 : new_stmt = gimple_build_assign (b_low, BIT_AND_EXPR,
7000 : wvop1, low_bits);
7001 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7002 1 : tree a_low = make_ssa_name (word_type);
7003 1 : if (code == PLUS_EXPR)
7004 1 : new_stmt = gimple_build_assign (a_low, BIT_AND_EXPR,
7005 : wvop0, low_bits);
7006 : else
7007 0 : new_stmt = gimple_build_assign (a_low, BIT_IOR_EXPR,
7008 : wvop0, high_bits);
7009 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7010 1 : if (code == MINUS_EXPR)
7011 : {
7012 0 : new_stmt = gimple_build_assign (NULL_TREE,
7013 : BIT_NOT_EXPR, signs);
7014 0 : signs = make_ssa_name (word_type);
7015 0 : gimple_assign_set_lhs (new_stmt, signs);
7016 0 : vect_finish_stmt_generation (vinfo, stmt_info,
7017 : new_stmt, gsi);
7018 : }
7019 1 : new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR,
7020 : signs, high_bits);
7021 1 : signs = make_ssa_name (word_type);
7022 1 : gimple_assign_set_lhs (new_stmt, signs);
7023 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7024 1 : result_low = make_ssa_name (word_type);
7025 1 : new_stmt = gimple_build_assign (result_low, code,
7026 : a_low, b_low);
7027 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7028 : }
7029 : else /* if (code == NEGATE_EXPR) */
7030 : {
7031 0 : tree a_low = make_ssa_name (word_type);
7032 0 : new_stmt = gimple_build_assign (a_low, BIT_AND_EXPR,
7033 : wvop0, low_bits);
7034 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7035 0 : signs = make_ssa_name (word_type);
7036 0 : new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7037 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7038 0 : new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR,
7039 : signs, high_bits);
7040 0 : signs = make_ssa_name (word_type);
7041 0 : gimple_assign_set_lhs (new_stmt, signs);
7042 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7043 0 : result_low = make_ssa_name (word_type);
7044 0 : new_stmt = gimple_build_assign (result_low,
7045 : MINUS_EXPR, high_bits, a_low);
7046 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7047 : }
7048 1 : new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR,
7049 : result_low, signs);
7050 1 : result_low = make_ssa_name (word_type);
7051 1 : gimple_assign_set_lhs (new_stmt, result_low);
7052 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7053 : }
7054 : else
7055 : {
7056 1 : new_stmt = gimple_build_assign (NULL_TREE, code, wvop0, wvop1);
7057 1 : result_low = make_ssa_name (word_type);
7058 1 : gimple_assign_set_lhs (new_stmt, result_low);
7059 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7060 :
7061 : }
7062 2 : new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7063 : build1 (VIEW_CONVERT_EXPR,
7064 : vectype, result_low));
7065 2 : new_temp = make_ssa_name (vectype);
7066 2 : gimple_assign_set_lhs (new_stmt, new_temp);
7067 2 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7068 : }
7069 139444 : else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7070 : {
7071 16 : tree mask;
7072 16 : if (masked_loop_p)
7073 16 : mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7074 : vec_num, vectype, i);
7075 : else
7076 : /* Dummy mask. */
7077 0 : mask = build_minus_one_cst (truth_type_for (vectype));
7078 16 : auto_vec<tree> vops (6);
7079 16 : vops.quick_push (mask);
7080 16 : vops.quick_push (vop0);
7081 16 : if (vop1)
7082 16 : vops.quick_push (vop1);
7083 16 : if (vop2)
7084 0 : vops.quick_push (vop2);
7085 16 : if (reduc_idx >= 0)
7086 : {
7087 : /* Perform the operation on active elements only and take
7088 : inactive elements from the reduction chain input. */
7089 8 : gcc_assert (!vop2);
7090 8 : vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7091 : }
7092 : else
7093 : {
7094 8 : auto else_value = targetm.preferred_else_value
7095 8 : (cond_fn, vectype, vops.length () - 1, &vops[1]);
7096 8 : vops.quick_push (else_value);
7097 : }
7098 16 : if (len_loop_p)
7099 : {
7100 0 : tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7101 0 : vec_num, vectype, i, 1, true);
7102 0 : signed char biasval
7103 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7104 0 : tree bias = build_int_cst (intQI_type_node, biasval);
7105 0 : vops.quick_push (len);
7106 0 : vops.quick_push (bias);
7107 : }
7108 16 : gcall *call
7109 16 : = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7110 : : cond_len_fn,
7111 : vops);
7112 16 : new_temp = make_ssa_name (vec_dest, call);
7113 16 : gimple_call_set_lhs (call, new_temp);
7114 16 : gimple_call_set_nothrow (call, true);
7115 16 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7116 16 : new_stmt = call;
7117 16 : }
7118 : else
7119 : {
7120 139428 : tree mask = NULL_TREE;
7121 : /* When combining two masks check if either of them is elsewhere
7122 : combined with a loop mask, if that's the case we can mark that the
7123 : new combined mask doesn't need to be combined with a loop mask. */
7124 139428 : if (masked_loop_p
7125 139428 : && code == BIT_AND_EXPR
7126 139428 : && VECTOR_BOOLEAN_TYPE_P (vectype))
7127 : {
7128 8 : if (loop_vinfo->scalar_cond_masked_set.contains ({ op0, vec_num }))
7129 : {
7130 0 : mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7131 : vec_num, vectype, i);
7132 :
7133 0 : vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7134 : vop0, gsi);
7135 : }
7136 :
7137 8 : if (loop_vinfo->scalar_cond_masked_set.contains ({ op1, vec_num }))
7138 : {
7139 0 : mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7140 : vec_num, vectype, i);
7141 :
7142 0 : vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7143 : vop1, gsi);
7144 : }
7145 : }
7146 :
7147 139428 : new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7148 139428 : new_temp = make_ssa_name (vec_dest, new_stmt);
7149 139428 : gimple_assign_set_lhs (new_stmt, new_temp);
7150 139428 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7151 139428 : if (using_emulated_vectors_p)
7152 : suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7153 :
7154 : /* Enter the combined value into the vector cond hash so we don't
7155 : AND it with a loop mask again. */
7156 139428 : if (mask)
7157 0 : loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7158 : }
7159 :
7160 139446 : if (vec_cvt_dest)
7161 : {
7162 3045 : new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7163 3045 : new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7164 : new_temp);
7165 3045 : new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7166 3045 : gimple_assign_set_lhs (new_stmt, new_temp);
7167 3045 : vect_finish_stmt_generation (vinfo, stmt_info,
7168 : new_stmt, gsi);
7169 : }
7170 :
7171 139446 : slp_node->push_vec_def (new_stmt);
7172 : }
7173 :
7174 115343 : vec_oprnds0.release ();
7175 115343 : vec_oprnds1.release ();
7176 115343 : vec_oprnds2.release ();
7177 :
7178 115343 : return true;
7179 : }
7180 :
7181 : /* A helper function to ensure data reference DR_INFO's base alignment. */
7182 :
7183 : static void
7184 1858722 : ensure_base_align (dr_vec_info *dr_info)
7185 : {
7186 : /* Alignment is only analyzed for the first element of a DR group,
7187 : use that to look at base alignment we need to enforce. */
7188 1858722 : if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7189 1419228 : dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7190 :
7191 1858722 : gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7192 :
7193 1858722 : if (dr_info->base_misaligned)
7194 : {
7195 168387 : tree base_decl = dr_info->base_decl;
7196 :
7197 : // We should only be able to increase the alignment of a base object if
7198 : // we know what its new alignment should be at compile time.
7199 168387 : unsigned HOST_WIDE_INT align_base_to =
7200 168387 : DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7201 :
7202 168387 : if (decl_in_symtab_p (base_decl))
7203 4612 : symtab_node::get (base_decl)->increase_alignment (align_base_to);
7204 163775 : else if (DECL_ALIGN (base_decl) < align_base_to)
7205 : {
7206 131241 : SET_DECL_ALIGN (base_decl, align_base_to);
7207 131241 : DECL_USER_ALIGN (base_decl) = 1;
7208 : }
7209 168387 : dr_info->base_misaligned = false;
7210 : }
7211 1858722 : }
7212 :
7213 :
7214 : /* Function get_group_alias_ptr_type.
7215 :
7216 : Return the alias type for the group starting at FIRST_STMT_INFO. */
7217 :
7218 : static tree
7219 1589398 : get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7220 : {
7221 1589398 : struct data_reference *first_dr, *next_dr;
7222 :
7223 1589398 : first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7224 1589398 : stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7225 3842019 : while (next_stmt_info)
7226 : {
7227 2383904 : next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7228 4767808 : if (get_alias_set (DR_REF (first_dr))
7229 2383904 : != get_alias_set (DR_REF (next_dr)))
7230 : {
7231 131283 : if (dump_enabled_p ())
7232 30 : dump_printf_loc (MSG_NOTE, vect_location,
7233 : "conflicting alias set types.\n");
7234 131283 : return ptr_type_node;
7235 : }
7236 2252621 : next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7237 : }
7238 1458115 : return reference_alias_ptr_type (DR_REF (first_dr));
7239 : }
7240 :
7241 :
7242 : /* Function scan_operand_equal_p.
7243 :
7244 : Helper function for check_scan_store. Compare two references
7245 : with .GOMP_SIMD_LANE bases. */
7246 :
7247 : static bool
7248 1284 : scan_operand_equal_p (tree ref1, tree ref2)
7249 : {
7250 1284 : tree ref[2] = { ref1, ref2 };
7251 1284 : poly_int64 bitsize[2], bitpos[2];
7252 : tree offset[2], base[2];
7253 3852 : for (int i = 0; i < 2; ++i)
7254 : {
7255 2568 : machine_mode mode;
7256 2568 : int unsignedp, reversep, volatilep = 0;
7257 2568 : base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7258 : &offset[i], &mode, &unsignedp,
7259 : &reversep, &volatilep);
7260 2568 : if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7261 0 : return false;
7262 2568 : if (TREE_CODE (base[i]) == MEM_REF
7263 42 : && offset[i] == NULL_TREE
7264 2610 : && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7265 : {
7266 42 : gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7267 42 : if (is_gimple_assign (def_stmt)
7268 42 : && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7269 42 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7270 84 : && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7271 : {
7272 42 : if (maybe_ne (mem_ref_offset (base[i]), 0))
7273 : return false;
7274 42 : base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7275 42 : offset[i] = gimple_assign_rhs2 (def_stmt);
7276 : }
7277 : }
7278 : }
7279 :
7280 1284 : if (!operand_equal_p (base[0], base[1], 0))
7281 : return false;
7282 934 : if (maybe_ne (bitsize[0], bitsize[1]))
7283 : return false;
7284 934 : if (offset[0] != offset[1])
7285 : {
7286 916 : if (!offset[0] || !offset[1])
7287 : return false;
7288 916 : if (!operand_equal_p (offset[0], offset[1], 0))
7289 : {
7290 : tree step[2];
7291 0 : for (int i = 0; i < 2; ++i)
7292 : {
7293 0 : step[i] = integer_one_node;
7294 0 : if (TREE_CODE (offset[i]) == SSA_NAME)
7295 : {
7296 0 : gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7297 0 : if (is_gimple_assign (def_stmt)
7298 0 : && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7299 0 : && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7300 : == INTEGER_CST))
7301 : {
7302 0 : step[i] = gimple_assign_rhs2 (def_stmt);
7303 0 : offset[i] = gimple_assign_rhs1 (def_stmt);
7304 : }
7305 : }
7306 0 : else if (TREE_CODE (offset[i]) == MULT_EXPR)
7307 : {
7308 0 : step[i] = TREE_OPERAND (offset[i], 1);
7309 0 : offset[i] = TREE_OPERAND (offset[i], 0);
7310 : }
7311 0 : tree rhs1 = NULL_TREE;
7312 0 : if (TREE_CODE (offset[i]) == SSA_NAME)
7313 : {
7314 0 : gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7315 0 : if (gimple_assign_cast_p (def_stmt))
7316 0 : rhs1 = gimple_assign_rhs1 (def_stmt);
7317 : }
7318 0 : else if (CONVERT_EXPR_P (offset[i]))
7319 0 : rhs1 = TREE_OPERAND (offset[i], 0);
7320 0 : if (rhs1
7321 0 : && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7322 0 : && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7323 0 : && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7324 0 : >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7325 0 : offset[i] = rhs1;
7326 : }
7327 0 : if (!operand_equal_p (offset[0], offset[1], 0)
7328 0 : || !operand_equal_p (step[0], step[1], 0))
7329 0 : return false;
7330 : }
7331 : }
7332 : return true;
7333 : }
7334 :
7335 :
7336 : enum scan_store_kind {
7337 : /* Normal permutation. */
7338 : scan_store_kind_perm,
7339 :
7340 : /* Whole vector left shift permutation with zero init. */
7341 : scan_store_kind_lshift_zero,
7342 :
7343 : /* Whole vector left shift permutation and VEC_COND_EXPR. */
7344 : scan_store_kind_lshift_cond
7345 : };
7346 :
7347 : /* Function check_scan_store.
7348 :
7349 : Verify if we can perform the needed permutations or whole vector shifts.
7350 : Return -1 on failure, otherwise exact log2 of vectype's nunits.
7351 : USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7352 : to do at each step. */
7353 :
7354 : static int
7355 1024 : scan_store_can_perm_p (tree vectype, tree init,
7356 : vec<enum scan_store_kind> *use_whole_vector = NULL)
7357 : {
7358 1024 : enum machine_mode vec_mode = TYPE_MODE (vectype);
7359 1024 : unsigned HOST_WIDE_INT nunits;
7360 1024 : if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7361 : return -1;
7362 1024 : int units_log2 = exact_log2 (nunits);
7363 1024 : if (units_log2 <= 0)
7364 : return -1;
7365 :
7366 : int i;
7367 : enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7368 4784 : for (i = 0; i <= units_log2; ++i)
7369 : {
7370 3760 : unsigned HOST_WIDE_INT j, k;
7371 3760 : enum scan_store_kind kind = scan_store_kind_perm;
7372 3760 : vec_perm_builder sel (nunits, nunits, 1);
7373 3760 : sel.quick_grow (nunits);
7374 3760 : if (i == units_log2)
7375 : {
7376 9728 : for (j = 0; j < nunits; ++j)
7377 8704 : sel[j] = nunits - 1;
7378 : }
7379 : else
7380 : {
7381 10416 : for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7382 7680 : sel[j] = j;
7383 26416 : for (k = 0; j < nunits; ++j, ++k)
7384 23680 : sel[j] = nunits + k;
7385 : }
7386 6496 : vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7387 3760 : if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7388 : {
7389 0 : if (i == units_log2)
7390 : return -1;
7391 :
7392 0 : if (whole_vector_shift_kind == scan_store_kind_perm)
7393 : {
7394 0 : if (!can_implement_p (vec_shl_optab, vec_mode))
7395 : return -1;
7396 0 : whole_vector_shift_kind = scan_store_kind_lshift_zero;
7397 : /* Whole vector shifts shift in zeros, so if init is all zero
7398 : constant, there is no need to do anything further. */
7399 0 : if ((TREE_CODE (init) != INTEGER_CST
7400 0 : && TREE_CODE (init) != REAL_CST)
7401 0 : || !initializer_zerop (init))
7402 : {
7403 0 : tree masktype = truth_type_for (vectype);
7404 0 : if (!expand_vec_cond_expr_p (vectype, masktype))
7405 : return -1;
7406 : whole_vector_shift_kind = scan_store_kind_lshift_cond;
7407 : }
7408 : }
7409 0 : kind = whole_vector_shift_kind;
7410 : }
7411 3760 : if (use_whole_vector)
7412 : {
7413 1880 : if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7414 0 : use_whole_vector->safe_grow_cleared (i, true);
7415 5640 : if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7416 0 : use_whole_vector->safe_push (kind);
7417 : }
7418 3760 : }
7419 :
7420 : return units_log2;
7421 : }
7422 :
7423 :
7424 : /* Function check_scan_store.
7425 :
7426 : Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7427 :
7428 : static bool
7429 1076 : check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7430 : enum vect_def_type rhs_dt, slp_tree slp_node,
7431 : slp_tree mask_node,
7432 : vect_memory_access_type memory_access_type)
7433 : {
7434 1076 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7435 1076 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7436 1076 : tree ref_type;
7437 :
7438 1076 : gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7439 1076 : if (SLP_TREE_LANES (slp_node) > 1
7440 1076 : || mask_node
7441 1076 : || memory_access_type != VMAT_CONTIGUOUS
7442 1076 : || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7443 1076 : || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7444 1076 : || loop_vinfo == NULL
7445 1076 : || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7446 1076 : || LOOP_VINFO_EPILOGUE_P (loop_vinfo)
7447 1076 : || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7448 1076 : || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7449 1076 : || !integer_zerop (DR_INIT (dr_info->dr))
7450 1076 : || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7451 2152 : || !alias_sets_conflict_p (get_alias_set (vectype),
7452 1076 : get_alias_set (TREE_TYPE (ref_type))))
7453 : {
7454 0 : if (dump_enabled_p ())
7455 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7456 : "unsupported OpenMP scan store.\n");
7457 0 : return false;
7458 : }
7459 :
7460 : /* We need to pattern match code built by OpenMP lowering and simplified
7461 : by following optimizations into something we can handle.
7462 : #pragma omp simd reduction(inscan,+:r)
7463 : for (...)
7464 : {
7465 : r += something ();
7466 : #pragma omp scan inclusive (r)
7467 : use (r);
7468 : }
7469 : shall have body with:
7470 : // Initialization for input phase, store the reduction initializer:
7471 : _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7472 : _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7473 : D.2042[_21] = 0;
7474 : // Actual input phase:
7475 : ...
7476 : r.0_5 = D.2042[_20];
7477 : _6 = _4 + r.0_5;
7478 : D.2042[_20] = _6;
7479 : // Initialization for scan phase:
7480 : _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7481 : _26 = D.2043[_25];
7482 : _27 = D.2042[_25];
7483 : _28 = _26 + _27;
7484 : D.2043[_25] = _28;
7485 : D.2042[_25] = _28;
7486 : // Actual scan phase:
7487 : ...
7488 : r.1_8 = D.2042[_20];
7489 : ...
7490 : The "omp simd array" variable D.2042 holds the privatized copy used
7491 : inside of the loop and D.2043 is another one that holds copies of
7492 : the current original list item. The separate GOMP_SIMD_LANE ifn
7493 : kinds are there in order to allow optimizing the initializer store
7494 : and combiner sequence, e.g. if it is originally some C++ish user
7495 : defined reduction, but allow the vectorizer to pattern recognize it
7496 : and turn into the appropriate vectorized scan.
7497 :
7498 : For exclusive scan, this is slightly different:
7499 : #pragma omp simd reduction(inscan,+:r)
7500 : for (...)
7501 : {
7502 : use (r);
7503 : #pragma omp scan exclusive (r)
7504 : r += something ();
7505 : }
7506 : shall have body with:
7507 : // Initialization for input phase, store the reduction initializer:
7508 : _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7509 : _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7510 : D.2042[_21] = 0;
7511 : // Actual input phase:
7512 : ...
7513 : r.0_5 = D.2042[_20];
7514 : _6 = _4 + r.0_5;
7515 : D.2042[_20] = _6;
7516 : // Initialization for scan phase:
7517 : _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7518 : _26 = D.2043[_25];
7519 : D.2044[_25] = _26;
7520 : _27 = D.2042[_25];
7521 : _28 = _26 + _27;
7522 : D.2043[_25] = _28;
7523 : // Actual scan phase:
7524 : ...
7525 : r.1_8 = D.2044[_20];
7526 : ... */
7527 :
7528 1076 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7529 : {
7530 : /* Match the D.2042[_21] = 0; store above. Just require that
7531 : it is a constant or external definition store. */
7532 564 : if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7533 : {
7534 0 : fail_init:
7535 0 : if (dump_enabled_p ())
7536 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7537 : "unsupported OpenMP scan initializer store.\n");
7538 0 : return false;
7539 : }
7540 :
7541 564 : if (! loop_vinfo->scan_map)
7542 322 : loop_vinfo->scan_map = new hash_map<tree, tree>;
7543 564 : tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7544 564 : tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7545 564 : if (cached)
7546 0 : goto fail_init;
7547 564 : cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7548 :
7549 : /* These stores can be vectorized normally. */
7550 564 : return true;
7551 : }
7552 :
7553 512 : if (rhs_dt != vect_internal_def)
7554 : {
7555 0 : fail:
7556 0 : if (dump_enabled_p ())
7557 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7558 : "unsupported OpenMP scan combiner pattern.\n");
7559 0 : return false;
7560 : }
7561 :
7562 512 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
7563 512 : tree rhs = gimple_assign_rhs1 (stmt);
7564 512 : if (TREE_CODE (rhs) != SSA_NAME)
7565 0 : goto fail;
7566 :
7567 512 : gimple *other_store_stmt = NULL;
7568 512 : tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7569 512 : bool inscan_var_store
7570 512 : = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7571 :
7572 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7573 : {
7574 252 : if (!inscan_var_store)
7575 : {
7576 126 : use_operand_p use_p;
7577 126 : imm_use_iterator iter;
7578 378 : FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7579 : {
7580 252 : gimple *use_stmt = USE_STMT (use_p);
7581 252 : if (use_stmt == stmt || is_gimple_debug (use_stmt))
7582 126 : continue;
7583 126 : if (gimple_bb (use_stmt) != gimple_bb (stmt)
7584 126 : || !is_gimple_assign (use_stmt)
7585 126 : || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7586 126 : || other_store_stmt
7587 252 : || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7588 0 : goto fail;
7589 126 : other_store_stmt = use_stmt;
7590 0 : }
7591 126 : if (other_store_stmt == NULL)
7592 0 : goto fail;
7593 126 : rhs = gimple_assign_lhs (other_store_stmt);
7594 126 : if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7595 0 : goto fail;
7596 : }
7597 : }
7598 260 : else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7599 : {
7600 260 : use_operand_p use_p;
7601 260 : imm_use_iterator iter;
7602 1040 : FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7603 : {
7604 520 : gimple *use_stmt = USE_STMT (use_p);
7605 520 : if (use_stmt == stmt || is_gimple_debug (use_stmt))
7606 260 : continue;
7607 260 : if (other_store_stmt)
7608 0 : goto fail;
7609 260 : other_store_stmt = use_stmt;
7610 260 : }
7611 : }
7612 : else
7613 0 : goto fail;
7614 :
7615 512 : gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7616 512 : if (gimple_bb (def_stmt) != gimple_bb (stmt)
7617 512 : || !is_gimple_assign (def_stmt)
7618 1024 : || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7619 0 : goto fail;
7620 :
7621 512 : enum tree_code code = gimple_assign_rhs_code (def_stmt);
7622 : /* For pointer addition, we should use the normal plus for the vector
7623 : operation. */
7624 512 : switch (code)
7625 : {
7626 0 : case POINTER_PLUS_EXPR:
7627 0 : code = PLUS_EXPR;
7628 0 : break;
7629 0 : case MULT_HIGHPART_EXPR:
7630 0 : goto fail;
7631 : default:
7632 : break;
7633 : }
7634 512 : if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7635 0 : goto fail;
7636 :
7637 512 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
7638 512 : tree rhs2 = gimple_assign_rhs2 (def_stmt);
7639 512 : if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7640 0 : goto fail;
7641 :
7642 512 : gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7643 512 : gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7644 512 : if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7645 512 : || !gimple_assign_load_p (load1_stmt)
7646 512 : || gimple_bb (load2_stmt) != gimple_bb (stmt)
7647 1024 : || !gimple_assign_load_p (load2_stmt))
7648 0 : goto fail;
7649 :
7650 512 : stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7651 512 : stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7652 512 : if (load1_stmt_info == NULL
7653 512 : || load2_stmt_info == NULL
7654 512 : || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7655 512 : != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7656 512 : || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7657 512 : != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7658 0 : goto fail;
7659 :
7660 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7661 : {
7662 126 : dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7663 126 : if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7664 126 : || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7665 0 : goto fail;
7666 126 : tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7667 126 : tree lrhs;
7668 126 : if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7669 : lrhs = rhs1;
7670 : else
7671 16 : lrhs = rhs2;
7672 126 : use_operand_p use_p;
7673 126 : imm_use_iterator iter;
7674 504 : FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7675 : {
7676 252 : gimple *use_stmt = USE_STMT (use_p);
7677 252 : if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7678 126 : continue;
7679 126 : if (other_store_stmt)
7680 0 : goto fail;
7681 126 : other_store_stmt = use_stmt;
7682 126 : }
7683 : }
7684 :
7685 512 : if (other_store_stmt == NULL)
7686 0 : goto fail;
7687 512 : if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7688 512 : || !gimple_store_p (other_store_stmt))
7689 0 : goto fail;
7690 :
7691 512 : stmt_vec_info other_store_stmt_info
7692 512 : = loop_vinfo->lookup_stmt (other_store_stmt);
7693 512 : if (other_store_stmt_info == NULL
7694 512 : || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7695 512 : != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7696 0 : goto fail;
7697 :
7698 512 : gimple *stmt1 = stmt;
7699 512 : gimple *stmt2 = other_store_stmt;
7700 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7701 : std::swap (stmt1, stmt2);
7702 512 : if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7703 : gimple_assign_rhs1 (load2_stmt)))
7704 : {
7705 162 : std::swap (rhs1, rhs2);
7706 162 : std::swap (load1_stmt, load2_stmt);
7707 162 : std::swap (load1_stmt_info, load2_stmt_info);
7708 : }
7709 512 : if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7710 : gimple_assign_rhs1 (load1_stmt)))
7711 0 : goto fail;
7712 :
7713 512 : tree var3 = NULL_TREE;
7714 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7715 512 : && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7716 : gimple_assign_rhs1 (load2_stmt)))
7717 0 : goto fail;
7718 512 : else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7719 : {
7720 252 : dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7721 252 : if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7722 252 : || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7723 0 : goto fail;
7724 252 : var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7725 252 : if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7726 252 : || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7727 504 : || lookup_attribute ("omp simd inscan exclusive",
7728 252 : DECL_ATTRIBUTES (var3)))
7729 0 : goto fail;
7730 : }
7731 :
7732 512 : dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7733 512 : if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7734 512 : || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7735 0 : goto fail;
7736 :
7737 512 : tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7738 512 : tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7739 512 : if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7740 512 : || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7741 1024 : || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7742 512 : == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7743 0 : goto fail;
7744 :
7745 512 : if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7746 256 : std::swap (var1, var2);
7747 :
7748 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7749 : {
7750 252 : if (!lookup_attribute ("omp simd inscan exclusive",
7751 252 : DECL_ATTRIBUTES (var1)))
7752 0 : goto fail;
7753 252 : var1 = var3;
7754 : }
7755 :
7756 512 : if (loop_vinfo->scan_map == NULL)
7757 0 : goto fail;
7758 512 : tree *init = loop_vinfo->scan_map->get (var1);
7759 512 : if (init == NULL)
7760 0 : goto fail;
7761 :
7762 : /* The IL is as expected, now check if we can actually vectorize it.
7763 : Inclusive scan:
7764 : _26 = D.2043[_25];
7765 : _27 = D.2042[_25];
7766 : _28 = _26 + _27;
7767 : D.2043[_25] = _28;
7768 : D.2042[_25] = _28;
7769 : should be vectorized as (where _40 is the vectorized rhs
7770 : from the D.2042[_21] = 0; store):
7771 : _30 = MEM <vector(8) int> [(int *)&D.2043];
7772 : _31 = MEM <vector(8) int> [(int *)&D.2042];
7773 : _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7774 : _33 = _31 + _32;
7775 : // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7776 : _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7777 : _35 = _33 + _34;
7778 : // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7779 : // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7780 : _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7781 : _37 = _35 + _36;
7782 : // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7783 : // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7784 : _38 = _30 + _37;
7785 : _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7786 : MEM <vector(8) int> [(int *)&D.2043] = _39;
7787 : MEM <vector(8) int> [(int *)&D.2042] = _38;
7788 : Exclusive scan:
7789 : _26 = D.2043[_25];
7790 : D.2044[_25] = _26;
7791 : _27 = D.2042[_25];
7792 : _28 = _26 + _27;
7793 : D.2043[_25] = _28;
7794 : should be vectorized as (where _40 is the vectorized rhs
7795 : from the D.2042[_21] = 0; store):
7796 : _30 = MEM <vector(8) int> [(int *)&D.2043];
7797 : _31 = MEM <vector(8) int> [(int *)&D.2042];
7798 : _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7799 : _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7800 : _34 = _32 + _33;
7801 : // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7802 : // _31[3]+_31[4], ... _31[5]+.._31[6] };
7803 : _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7804 : _36 = _34 + _35;
7805 : // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7806 : // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7807 : _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7808 : _38 = _36 + _37;
7809 : // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7810 : // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7811 : _39 = _30 + _38;
7812 : _50 = _31 + _39;
7813 : _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7814 : MEM <vector(8) int> [(int *)&D.2044] = _39;
7815 : MEM <vector(8) int> [(int *)&D.2042] = _51; */
7816 512 : enum machine_mode vec_mode = TYPE_MODE (vectype);
7817 512 : optab optab = optab_for_tree_code (code, vectype, optab_default);
7818 512 : if (!optab || !can_implement_p (optab, vec_mode))
7819 0 : goto fail;
7820 :
7821 512 : int units_log2 = scan_store_can_perm_p (vectype, *init);
7822 512 : if (units_log2 == -1)
7823 0 : goto fail;
7824 :
7825 : return true;
7826 : }
7827 :
7828 :
7829 : /* Function vectorizable_scan_store.
7830 :
7831 : Helper of vectorizable_score, arguments like on vectorizable_store.
7832 : Handle only the transformation, checking is done in check_scan_store. */
7833 :
7834 : static bool
7835 512 : vectorizable_scan_store (vec_info *vinfo, stmt_vec_info stmt_info,
7836 : slp_tree slp_node, gimple_stmt_iterator *gsi)
7837 : {
7838 512 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7839 512 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7840 512 : tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7841 512 : tree vectype = SLP_TREE_VECTYPE (slp_node);
7842 :
7843 512 : if (dump_enabled_p ())
7844 492 : dump_printf_loc (MSG_NOTE, vect_location,
7845 : "transform scan store.\n");
7846 :
7847 512 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
7848 512 : tree rhs = gimple_assign_rhs1 (stmt);
7849 512 : gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7850 :
7851 512 : tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7852 512 : bool inscan_var_store
7853 512 : = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7854 :
7855 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7856 : {
7857 126 : use_operand_p use_p;
7858 126 : imm_use_iterator iter;
7859 252 : FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7860 : {
7861 126 : gimple *use_stmt = USE_STMT (use_p);
7862 126 : if (use_stmt == stmt || is_gimple_debug (use_stmt))
7863 0 : continue;
7864 126 : rhs = gimple_assign_lhs (use_stmt);
7865 126 : break;
7866 126 : }
7867 : }
7868 :
7869 512 : gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7870 512 : enum tree_code code = gimple_assign_rhs_code (def_stmt);
7871 512 : if (code == POINTER_PLUS_EXPR)
7872 0 : code = PLUS_EXPR;
7873 512 : gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7874 : && commutative_tree_code (code));
7875 512 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
7876 512 : tree rhs2 = gimple_assign_rhs2 (def_stmt);
7877 512 : gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7878 512 : gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7879 512 : gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7880 512 : stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7881 512 : stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7882 512 : dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7883 512 : dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7884 512 : tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7885 512 : tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7886 :
7887 512 : if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7888 : {
7889 436 : std::swap (rhs1, rhs2);
7890 436 : std::swap (var1, var2);
7891 436 : std::swap (load1_dr_info, load2_dr_info);
7892 : }
7893 :
7894 512 : tree *init = loop_vinfo->scan_map->get (var1);
7895 512 : gcc_assert (init);
7896 :
7897 512 : unsigned HOST_WIDE_INT nunits;
7898 512 : if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7899 : gcc_unreachable ();
7900 512 : auto_vec<enum scan_store_kind, 16> use_whole_vector;
7901 512 : int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7902 512 : gcc_assert (units_log2 > 0);
7903 512 : auto_vec<tree, 16> perms;
7904 512 : perms.quick_grow (units_log2 + 1);
7905 512 : tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7906 2392 : for (int i = 0; i <= units_log2; ++i)
7907 : {
7908 1880 : unsigned HOST_WIDE_INT j, k;
7909 1880 : vec_perm_builder sel (nunits, nunits, 1);
7910 1880 : sel.quick_grow (nunits);
7911 1880 : if (i == units_log2)
7912 4864 : for (j = 0; j < nunits; ++j)
7913 4352 : sel[j] = nunits - 1;
7914 : else
7915 : {
7916 5208 : for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7917 3840 : sel[j] = j;
7918 13208 : for (k = 0; j < nunits; ++j, ++k)
7919 11840 : sel[j] = nunits + k;
7920 : }
7921 3248 : vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7922 1880 : if (!use_whole_vector.is_empty ()
7923 0 : && use_whole_vector[i] != scan_store_kind_perm)
7924 : {
7925 0 : if (zero_vec == NULL_TREE)
7926 0 : zero_vec = build_zero_cst (vectype);
7927 0 : if (masktype == NULL_TREE
7928 0 : && use_whole_vector[i] == scan_store_kind_lshift_cond)
7929 0 : masktype = truth_type_for (vectype);
7930 0 : perms[i] = vect_gen_perm_mask_any (vectype, indices);
7931 : }
7932 : else
7933 1880 : perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7934 1880 : }
7935 :
7936 512 : vec_loop_lens *loop_lens
7937 512 : = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7938 : ? &LOOP_VINFO_LENS (loop_vinfo)
7939 0 : : NULL);
7940 :
7941 512 : tree vec_oprnd1 = NULL_TREE;
7942 512 : tree vec_oprnd2 = NULL_TREE;
7943 512 : tree vec_oprnd3 = NULL_TREE;
7944 512 : tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7945 512 : tree dataref_offset = build_int_cst (ref_type, 0);
7946 512 : tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7947 : vectype, VMAT_CONTIGUOUS,
7948 : loop_lens);
7949 512 : tree ldataref_ptr = NULL_TREE;
7950 512 : tree orig = NULL_TREE;
7951 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7952 126 : ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7953 : /* The initialization is invariant. */
7954 512 : vec_oprnd1 = vect_init_vector (vinfo, stmt_info, *init, vectype, NULL);
7955 512 : auto_vec<tree> vec_oprnds2;
7956 512 : auto_vec<tree> vec_oprnds3;
7957 512 : if (ldataref_ptr == NULL)
7958 : {
7959 : /* We want to lookup the vector operands of the reduction, not those
7960 : of the store - for SLP we have to use the proper SLP node for the
7961 : lookup, which should be the single child of the scan store. */
7962 386 : vect_get_vec_defs (vinfo, SLP_TREE_CHILDREN (slp_node)[0],
7963 : rhs1, &vec_oprnds2, rhs2, &vec_oprnds3);
7964 : /* ??? For SLP we do not key the def on 'rhs1' or 'rhs2' but get
7965 : them in SLP child order. So we have to swap here with logic
7966 : similar to above. */
7967 386 : stmt_vec_info load
7968 386 : = SLP_TREE_SCALAR_STMTS (SLP_TREE_CHILDREN
7969 386 : (SLP_TREE_CHILDREN (slp_node)[0])[0])[0];
7970 386 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (load);
7971 386 : tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7972 386 : if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)))
7973 820 : for (unsigned i = 0; i < vec_oprnds2.length (); ++i)
7974 494 : std::swap (vec_oprnds2[i], vec_oprnds3[i]);;
7975 : }
7976 : else
7977 126 : vect_get_vec_defs (vinfo, slp_node,
7978 : rhs2, &vec_oprnds3);
7979 1248 : for (unsigned j = 0; j < vec_oprnds3.length (); j++)
7980 : {
7981 736 : if (ldataref_ptr == NULL)
7982 554 : vec_oprnd2 = vec_oprnds2[j];
7983 736 : vec_oprnd3 = vec_oprnds3[j];
7984 736 : if (j == 0)
7985 : orig = vec_oprnd3;
7986 224 : else if (!inscan_var_store)
7987 112 : dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7988 :
7989 736 : if (ldataref_ptr)
7990 : {
7991 182 : vec_oprnd2 = make_ssa_name (vectype);
7992 182 : tree data_ref = fold_build2 (MEM_REF, vectype,
7993 : unshare_expr (ldataref_ptr),
7994 : dataref_offset);
7995 182 : vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7996 182 : gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7997 182 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7998 : }
7999 :
8000 736 : tree v = vec_oprnd2;
8001 3068 : for (int i = 0; i < units_log2; ++i)
8002 : {
8003 2332 : tree new_temp = make_ssa_name (vectype);
8004 2332 : gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
8005 : (zero_vec
8006 0 : && (use_whole_vector[i]
8007 0 : != scan_store_kind_perm))
8008 : ? zero_vec : vec_oprnd1, v,
8009 2332 : perms[i]);
8010 2332 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8011 :
8012 2332 : if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
8013 : {
8014 : /* Whole vector shift shifted in zero bits, but if *init
8015 : is not initializer_zerop, we need to replace those elements
8016 : with elements from vec_oprnd1. */
8017 0 : tree_vector_builder vb (masktype, nunits, 1);
8018 0 : for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8019 0 : vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8020 : ? boolean_false_node : boolean_true_node);
8021 :
8022 0 : tree new_temp2 = make_ssa_name (vectype);
8023 0 : g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8024 : new_temp, vec_oprnd1);
8025 0 : vect_finish_stmt_generation (vinfo, stmt_info,
8026 : g, gsi);
8027 0 : new_temp = new_temp2;
8028 0 : }
8029 :
8030 : /* For exclusive scan, perform the perms[i] permutation once
8031 : more. */
8032 2332 : if (i == 0
8033 1100 : && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8034 728 : && v == vec_oprnd2)
8035 : {
8036 364 : v = new_temp;
8037 364 : --i;
8038 364 : continue;
8039 : }
8040 :
8041 1968 : tree new_temp2 = make_ssa_name (vectype);
8042 1968 : g = gimple_build_assign (new_temp2, code, v, new_temp);
8043 1968 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8044 :
8045 1968 : v = new_temp2;
8046 : }
8047 :
8048 736 : tree new_temp = make_ssa_name (vectype);
8049 736 : gimple *g = gimple_build_assign (new_temp, code, orig, v);
8050 736 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8051 :
8052 736 : tree last_perm_arg = new_temp;
8053 : /* For exclusive scan, new_temp computed above is the exclusive scan
8054 : prefix sum. Turn it into inclusive prefix sum for the broadcast
8055 : of the last element into orig. */
8056 736 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8057 : {
8058 364 : last_perm_arg = make_ssa_name (vectype);
8059 364 : g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8060 364 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8061 : }
8062 :
8063 736 : orig = make_ssa_name (vectype);
8064 2208 : g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8065 736 : last_perm_arg, perms[units_log2]);
8066 736 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8067 :
8068 736 : if (!inscan_var_store)
8069 : {
8070 368 : tree data_ref = fold_build2 (MEM_REF, vectype,
8071 : unshare_expr (dataref_ptr),
8072 : dataref_offset);
8073 368 : vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8074 368 : g = gimple_build_assign (data_ref, new_temp);
8075 368 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8076 : }
8077 : }
8078 :
8079 512 : if (inscan_var_store)
8080 624 : for (unsigned j = 0; j < vec_oprnds3.length (); j++)
8081 : {
8082 368 : if (j != 0)
8083 112 : dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8084 :
8085 368 : tree data_ref = fold_build2 (MEM_REF, vectype,
8086 : unshare_expr (dataref_ptr),
8087 : dataref_offset);
8088 368 : vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8089 368 : gimple *g = gimple_build_assign (data_ref, orig);
8090 368 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8091 : }
8092 512 : return true;
8093 512 : }
8094 :
8095 :
8096 : /* Function vectorizable_store.
8097 :
8098 : Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8099 : that can be vectorized.
8100 : If COST_VEC is passed, calculate costs but don't change anything,
8101 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
8102 : it, and insert it at GSI.
8103 : Return true if STMT_INFO is vectorizable in this way. */
8104 :
8105 : static bool
8106 1955993 : vectorizable_store (vec_info *vinfo,
8107 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8108 : slp_tree slp_node,
8109 : stmt_vector_for_cost *cost_vec)
8110 : {
8111 1955993 : tree data_ref;
8112 1955993 : tree vec_oprnd = NULL_TREE;
8113 1955993 : tree elem_type;
8114 1955993 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8115 1955993 : class loop *loop = NULL;
8116 1955993 : machine_mode vec_mode;
8117 1955993 : tree dummy;
8118 1955993 : enum vect_def_type rhs_dt = vect_unknown_def_type;
8119 1955993 : enum vect_def_type mask_dt = vect_unknown_def_type;
8120 1955993 : tree dataref_ptr = NULL_TREE;
8121 1955993 : tree dataref_offset = NULL_TREE;
8122 1955993 : gimple *ptr_incr = NULL;
8123 1955993 : int j;
8124 1955993 : stmt_vec_info first_stmt_info;
8125 1955993 : bool grouped_store;
8126 1955993 : unsigned int group_size, i;
8127 1955993 : unsigned int vec_num;
8128 1955993 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8129 1955993 : tree aggr_type;
8130 1955993 : poly_uint64 vf;
8131 1955993 : vec_load_store_type vls_type;
8132 1955993 : tree ref_type;
8133 :
8134 1955993 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8135 : return false;
8136 :
8137 1955993 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8138 194531 : && cost_vec)
8139 : return false;
8140 :
8141 : /* Is vectorizable store? */
8142 :
8143 1761462 : tree mask_vectype = NULL_TREE;
8144 1761462 : slp_tree mask_node = NULL;
8145 1761462 : if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8146 : {
8147 1695006 : tree scalar_dest = gimple_assign_lhs (assign);
8148 1695006 : if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8149 1695006 : && is_pattern_stmt_p (stmt_info))
8150 1393 : scalar_dest = TREE_OPERAND (scalar_dest, 0);
8151 1695006 : if (TREE_CODE (scalar_dest) != ARRAY_REF
8152 1695006 : && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8153 : && TREE_CODE (scalar_dest) != INDIRECT_REF
8154 : && TREE_CODE (scalar_dest) != COMPONENT_REF
8155 : && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8156 : && TREE_CODE (scalar_dest) != REALPART_EXPR
8157 : && TREE_CODE (scalar_dest) != MEM_REF)
8158 : return false;
8159 : }
8160 : else
8161 : {
8162 651199 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8163 8652 : if (!call || !gimple_call_internal_p (call))
8164 : return false;
8165 :
8166 4779 : internal_fn ifn = gimple_call_internal_fn (call);
8167 4779 : if (!internal_store_fn_p (ifn))
8168 : return false;
8169 :
8170 1470 : int mask_index = internal_fn_mask_index (ifn);
8171 1470 : if (mask_index >= 0)
8172 1470 : mask_index = vect_slp_child_index_for_operand
8173 1470 : (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8174 1470 : if (mask_index >= 0
8175 1470 : && !vect_check_scalar_mask (vinfo, slp_node, mask_index,
8176 : &mask_node, &mask_dt,
8177 : &mask_vectype))
8178 : return false;
8179 : }
8180 :
8181 1313478 : tree vectype = SLP_TREE_VECTYPE (slp_node), rhs_vectype = NULL_TREE;
8182 1313478 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8183 :
8184 1313478 : if (loop_vinfo)
8185 : {
8186 189150 : loop = LOOP_VINFO_LOOP (loop_vinfo);
8187 189150 : vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8188 : }
8189 : else
8190 : vf = 1;
8191 1313478 : vec_num = vect_get_num_copies (vinfo, slp_node);
8192 :
8193 : /* FORNOW. This restriction should be relaxed. */
8194 1313478 : if (loop
8195 1313703 : && nested_in_vect_loop_p (loop, stmt_info)
8196 1313711 : && vec_num > 1)
8197 : {
8198 8 : if (dump_enabled_p ())
8199 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8200 : "multiple types in nested loop.\n");
8201 8 : return false;
8202 : }
8203 :
8204 1313470 : slp_tree op_node;
8205 1313470 : if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8206 : &op_node, &rhs_dt, &rhs_vectype, &vls_type))
8207 : return false;
8208 :
8209 1313446 : elem_type = TREE_TYPE (vectype);
8210 1313446 : vec_mode = TYPE_MODE (vectype);
8211 :
8212 1313446 : if (!STMT_VINFO_DATA_REF (stmt_info))
8213 : return false;
8214 :
8215 1313446 : vect_load_store_data _ls_data{};
8216 1313446 : vect_load_store_data &ls = slp_node->get_data (_ls_data);
8217 1313446 : if (cost_vec
8218 1313446 : && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
8219 : vls_type, &_ls_data))
8220 : return false;
8221 : /* Temporary aliases to analysis data, should not be modified through
8222 : these. */
8223 1312906 : const vect_memory_access_type memory_access_type = ls.memory_access_type;
8224 1312906 : const dr_alignment_support alignment_support_scheme
8225 : = ls.alignment_support_scheme;
8226 1312906 : const int misalignment = ls.misalignment;
8227 1312906 : const poly_int64 poffset = ls.poffset;
8228 :
8229 1312906 : if (slp_node->ldst_lanes
8230 0 : && memory_access_type != VMAT_LOAD_STORE_LANES)
8231 : {
8232 0 : if (dump_enabled_p ())
8233 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8234 : "discovered store-lane but cannot use it.\n");
8235 0 : return false;
8236 : }
8237 :
8238 1312906 : if (mask_node)
8239 : {
8240 1380 : if (memory_access_type == VMAT_CONTIGUOUS)
8241 : {
8242 459 : if (!VECTOR_MODE_P (vec_mode)
8243 2276 : || !can_vec_mask_load_store_p (vec_mode,
8244 1138 : TYPE_MODE (mask_vectype), false))
8245 18 : return false;
8246 : }
8247 242 : else if (memory_access_type != VMAT_LOAD_STORE_LANES
8248 242 : && (!mat_gather_scatter_p (memory_access_type)
8249 218 : || (memory_access_type == VMAT_GATHER_SCATTER_LEGACY
8250 154 : && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8251 : {
8252 24 : if (dump_enabled_p ())
8253 24 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8254 : "unsupported access type for masked store.\n");
8255 24 : return false;
8256 : }
8257 218 : else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED)
8258 : {
8259 64 : if (dump_enabled_p ())
8260 24 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8261 : "unsupported masked emulated scatter.\n");
8262 64 : return false;
8263 : }
8264 : }
8265 : else
8266 : {
8267 : /* FORNOW. In some cases can vectorize even if data-type not supported
8268 : (e.g. - array initialization with 0). */
8269 1311526 : if (!can_implement_p (mov_optab, vec_mode))
8270 : return false;
8271 : }
8272 :
8273 1312800 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8274 1312800 : grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8275 2454795 : && !mat_gather_scatter_p (memory_access_type));
8276 1141995 : if (grouped_store)
8277 : {
8278 1141995 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8279 1141995 : first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8280 1141995 : group_size = DR_GROUP_SIZE (first_stmt_info);
8281 : }
8282 : else
8283 : {
8284 1312800 : first_stmt_info = stmt_info;
8285 1312800 : first_dr_info = dr_info;
8286 : group_size = 1;
8287 : }
8288 :
8289 1312800 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && cost_vec)
8290 : {
8291 1076 : if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node,
8292 : mask_node, memory_access_type))
8293 : return false;
8294 : }
8295 :
8296 2624832 : bool costing_p = cost_vec;
8297 1312032 : if (costing_p) /* transformation not required. */
8298 : {
8299 771401 : if (loop_vinfo
8300 125686 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8301 57290 : check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8302 : vls_type, group_size, &ls,
8303 : mask_node);
8304 :
8305 771401 : if (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8306 771401 : || (mask_node
8307 723 : && !vect_maybe_update_slp_op_vectype (mask_node,
8308 : mask_vectype)))
8309 : {
8310 0 : if (dump_enabled_p ())
8311 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8312 : "incompatible vector types for invariants\n");
8313 0 : return false;
8314 : }
8315 :
8316 771401 : if (dump_enabled_p ()
8317 : && memory_access_type != VMAT_ELEMENTWISE
8318 14645 : && memory_access_type != VMAT_STRIDED_SLP
8319 13972 : && memory_access_type != VMAT_INVARIANT
8320 785373 : && alignment_support_scheme != dr_aligned)
8321 4835 : dump_printf_loc (MSG_NOTE, vect_location,
8322 : "Vectorizing an unaligned access.\n");
8323 :
8324 771401 : SLP_TREE_TYPE (slp_node) = store_vec_info_type;
8325 771401 : slp_node->data = new vect_load_store_data (std::move (ls));
8326 : }
8327 :
8328 : /* Transform. */
8329 :
8330 1312800 : ensure_base_align (dr_info);
8331 :
8332 1312800 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8333 : {
8334 1024 : gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8335 1024 : gcc_assert (SLP_TREE_LANES (slp_node) == 1);
8336 1024 : if (costing_p)
8337 : {
8338 512 : unsigned int inside_cost = 0, prologue_cost = 0;
8339 512 : if (vls_type == VLS_STORE_INVARIANT)
8340 0 : prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8341 : slp_node, 0, vect_prologue);
8342 512 : vect_get_store_cost (vinfo, stmt_info, slp_node, 1,
8343 : alignment_support_scheme, misalignment,
8344 : &inside_cost, cost_vec);
8345 :
8346 512 : if (dump_enabled_p ())
8347 492 : dump_printf_loc (MSG_NOTE, vect_location,
8348 : "vect_model_store_cost: inside_cost = %d, "
8349 : "prologue_cost = %d .\n",
8350 : inside_cost, prologue_cost);
8351 :
8352 512 : return true;
8353 : }
8354 512 : return vectorizable_scan_store (vinfo, stmt_info, slp_node, gsi);
8355 : }
8356 :
8357 : /* FORNOW */
8358 1311776 : gcc_assert (!grouped_store
8359 : || !loop
8360 : || !nested_in_vect_loop_p (loop, stmt_info));
8361 :
8362 1311776 : grouped_store = false;
8363 1311776 : first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8364 1311776 : gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info)
8365 : || (DR_GROUP_FIRST_ELEMENT (first_stmt_info) == first_stmt_info));
8366 1311776 : first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8367 :
8368 1311776 : ref_type = get_group_alias_ptr_type (first_stmt_info);
8369 :
8370 1311776 : if (!costing_p && dump_enabled_p ())
8371 12240 : dump_printf_loc (MSG_NOTE, vect_location, "transform store.\n");
8372 :
8373 1311776 : if (memory_access_type == VMAT_ELEMENTWISE
8374 1311776 : || memory_access_type == VMAT_STRIDED_SLP)
8375 : {
8376 28172 : unsigned inside_cost = 0, prologue_cost = 0;
8377 28172 : gimple_stmt_iterator incr_gsi;
8378 28172 : bool insert_after;
8379 28172 : tree offvar = NULL_TREE;
8380 28172 : tree ivstep;
8381 28172 : tree running_off;
8382 28172 : tree stride_base, stride_step, alias_off;
8383 28172 : tree vec_oprnd = NULL_TREE;
8384 28172 : tree dr_offset;
8385 : /* Checked by get_load_store_type. */
8386 28172 : unsigned int const_nunits = nunits.to_constant ();
8387 :
8388 28172 : gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8389 28172 : gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8390 :
8391 28172 : dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8392 28172 : stride_base
8393 28172 : = fold_build_pointer_plus
8394 : (DR_BASE_ADDRESS (first_dr_info->dr),
8395 : size_binop (PLUS_EXPR,
8396 : convert_to_ptrofftype (dr_offset),
8397 : convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8398 28172 : stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8399 :
8400 : /* For a store with loop-invariant (but other than power-of-2)
8401 : stride (i.e. not a grouped access) like so:
8402 :
8403 : for (i = 0; i < n; i += stride)
8404 : array[i] = ...;
8405 :
8406 : we generate a new induction variable and new stores from
8407 : the components of the (vectorized) rhs:
8408 :
8409 : for (j = 0; ; j += VF*stride)
8410 : vectemp = ...;
8411 : tmp1 = vectemp[0];
8412 : array[j] = tmp1;
8413 : tmp2 = vectemp[1];
8414 : array[j + stride] = tmp2;
8415 : ...
8416 : */
8417 :
8418 : /* ??? Modify local copies of alignment_support_scheme and
8419 : misalignment, but this part of analysis should be done
8420 : earlier and remembered, likewise the chosen load mode. */
8421 28172 : const dr_alignment_support tem = alignment_support_scheme;
8422 28172 : dr_alignment_support alignment_support_scheme = tem;
8423 28172 : const int tem2 = misalignment;
8424 28172 : int misalignment = tem2;
8425 :
8426 28172 : unsigned nstores = const_nunits;
8427 28172 : unsigned lnel = 1;
8428 28172 : tree ltype = elem_type;
8429 28172 : tree lvectype = vectype;
8430 28172 : HOST_WIDE_INT n = gcd (group_size, const_nunits);
8431 28172 : if (n == const_nunits)
8432 : {
8433 2426 : int mis_align = dr_misalignment (first_dr_info, vectype);
8434 : /* With VF > 1 we advance the DR by step, if that is constant
8435 : and only aligned when performed VF times, DR alignment
8436 : analysis can analyze this as aligned since it assumes
8437 : contiguous accesses. But that is not how we code generate
8438 : here, so adjust for this. */
8439 2426 : if (maybe_gt (vf, 1u)
8440 3736 : && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
8441 3510 : DR_TARGET_ALIGNMENT (first_dr_info)))
8442 226 : mis_align = -1;
8443 2426 : dr_alignment_support dr_align
8444 2426 : = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8445 : mis_align);
8446 2426 : if (dr_align == dr_aligned
8447 2426 : || dr_align == dr_unaligned_supported)
8448 : {
8449 28172 : nstores = 1;
8450 28172 : lnel = const_nunits;
8451 28172 : ltype = vectype;
8452 28172 : lvectype = vectype;
8453 28172 : alignment_support_scheme = dr_align;
8454 28172 : misalignment = mis_align;
8455 : }
8456 : }
8457 25746 : else if (n > 1)
8458 : {
8459 1932 : nstores = const_nunits / n;
8460 1932 : lnel = n;
8461 1932 : ltype = build_vector_type (elem_type, n);
8462 1932 : lvectype = vectype;
8463 1932 : int mis_align = dr_misalignment (first_dr_info, ltype);
8464 1932 : if (maybe_gt (vf, 1u)
8465 3864 : && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
8466 3048 : DR_TARGET_ALIGNMENT (first_dr_info)))
8467 816 : mis_align = -1;
8468 1932 : dr_alignment_support dr_align
8469 1932 : = vect_supportable_dr_alignment (vinfo, dr_info, ltype,
8470 : mis_align);
8471 1932 : alignment_support_scheme = dr_align;
8472 1932 : misalignment = mis_align;
8473 :
8474 : /* First check if vec_extract optab doesn't support extraction
8475 : of vector elts directly. */
8476 1932 : scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8477 1932 : machine_mode vmode;
8478 3864 : if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8479 2160 : || !related_vector_mode (TYPE_MODE (vectype), elmode,
8480 1932 : n).exists (&vmode)
8481 1756 : || (convert_optab_handler (vec_extract_optab,
8482 1756 : TYPE_MODE (vectype), vmode)
8483 : == CODE_FOR_nothing)
8484 1932 : || !(dr_align == dr_aligned
8485 228 : || dr_align == dr_unaligned_supported))
8486 : {
8487 : /* Try to avoid emitting an extract of vector elements
8488 : by performing the extracts using an integer type of the
8489 : same size, extracting from a vector of those and then
8490 : re-interpreting it as the original vector type if
8491 : supported. */
8492 1704 : unsigned lsize = n * GET_MODE_BITSIZE (elmode);
8493 1704 : unsigned int lnunits = const_nunits / n;
8494 : /* If we can't construct such a vector fall back to
8495 : element extracts from the original vector type and
8496 : element size stores. */
8497 1704 : if (int_mode_for_size (lsize, 0).exists (&elmode)
8498 1704 : && VECTOR_MODE_P (TYPE_MODE (vectype))
8499 1704 : && related_vector_mode (TYPE_MODE (vectype), elmode,
8500 1704 : lnunits).exists (&vmode)
8501 1670 : && (convert_optab_handler (vec_extract_optab,
8502 : vmode, elmode)
8503 : != CODE_FOR_nothing))
8504 : {
8505 1670 : nstores = lnunits;
8506 1670 : lnel = n;
8507 1670 : ltype = build_nonstandard_integer_type (lsize, 1);
8508 1670 : lvectype = build_vector_type (ltype, nstores);
8509 : }
8510 : /* Else fall back to vector extraction anyway.
8511 : Fewer stores are more important than avoiding spilling
8512 : of the vector we extract from. Compared to the
8513 : construction case in vectorizable_load no store-forwarding
8514 : issue exists here for reasonable archs. But only
8515 : if the store is supported. */
8516 34 : else if (!(dr_align == dr_aligned
8517 34 : || dr_align == dr_unaligned_supported))
8518 : {
8519 : nstores = const_nunits;
8520 : lnel = 1;
8521 : ltype = elem_type;
8522 : lvectype = vectype;
8523 : }
8524 : }
8525 : }
8526 28172 : unsigned align;
8527 28172 : if (alignment_support_scheme == dr_aligned)
8528 930 : align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8529 : else
8530 27242 : align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8531 : /* Alignment is at most the access size if we do multiple stores. */
8532 28172 : if (nstores > 1)
8533 25746 : align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
8534 28172 : ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
8535 28172 : int ncopies = vec_num;
8536 :
8537 28172 : if (!costing_p)
8538 : {
8539 3517 : ivstep = stride_step;
8540 3517 : ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8541 : build_int_cst (TREE_TYPE (ivstep), vf));
8542 :
8543 3517 : standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8544 :
8545 3517 : stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8546 3517 : ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8547 3517 : create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8548 : insert_after, &offvar, NULL);
8549 :
8550 3517 : stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8551 : }
8552 :
8553 28172 : alias_off = build_int_cst (ref_type, 0);
8554 28172 : auto_vec<tree> vec_oprnds;
8555 : /* For costing some adjacent vector stores, we'd like to cost with
8556 : the total number of them once instead of cost each one by one. */
8557 28172 : unsigned int n_adjacent_stores = 0;
8558 28172 : running_off = offvar;
8559 28172 : if (!costing_p)
8560 3517 : vect_get_slp_defs (op_node, &vec_oprnds);
8561 28172 : unsigned int group_el = 0;
8562 28172 : unsigned HOST_WIDE_INT elsz
8563 28172 : = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8564 66775 : for (j = 0; j < ncopies; j++)
8565 : {
8566 38603 : if (!costing_p)
8567 : {
8568 5416 : vec_oprnd = vec_oprnds[j];
8569 : /* Pun the vector to extract from if necessary. */
8570 5416 : if (lvectype != vectype)
8571 : {
8572 1302 : tree tem = make_ssa_name (lvectype);
8573 1302 : tree cvt = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8574 1302 : gimple *pun = gimple_build_assign (tem, cvt);
8575 1302 : vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8576 1302 : vec_oprnd = tem;
8577 : }
8578 : }
8579 174030 : for (i = 0; i < nstores; i++)
8580 : {
8581 135427 : if (costing_p)
8582 : {
8583 118561 : n_adjacent_stores++;
8584 118561 : continue;
8585 : }
8586 16866 : tree newref, newoff;
8587 16866 : gimple *incr, *assign;
8588 16866 : tree size = TYPE_SIZE (ltype);
8589 : /* Extract the i'th component. */
8590 16866 : tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8591 : bitsize_int (i), size);
8592 16866 : tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8593 : size, pos);
8594 :
8595 16866 : elem = force_gimple_operand_gsi (gsi, elem, true, NULL_TREE, true,
8596 : GSI_SAME_STMT);
8597 :
8598 16866 : tree this_off = build_int_cst (TREE_TYPE (alias_off),
8599 16866 : group_el * elsz);
8600 16866 : newref = build2 (MEM_REF, ltype, running_off, this_off);
8601 16866 : vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8602 :
8603 : /* And store it to *running_off. */
8604 16866 : assign = gimple_build_assign (newref, elem);
8605 16866 : vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8606 :
8607 16866 : group_el += lnel;
8608 16866 : if (group_el == group_size)
8609 : {
8610 15143 : newoff = copy_ssa_name (running_off, NULL);
8611 15143 : incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8612 : running_off, stride_step);
8613 15143 : vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8614 :
8615 15143 : running_off = newoff;
8616 15143 : group_el = 0;
8617 : }
8618 : }
8619 : }
8620 :
8621 28172 : if (costing_p)
8622 : {
8623 24655 : if (n_adjacent_stores > 0)
8624 : {
8625 : /* Take a single lane vector type store as scalar
8626 : store to avoid ICE like 110776. */
8627 24655 : if (VECTOR_TYPE_P (ltype)
8628 24655 : && maybe_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8629 1197 : vect_get_store_cost (vinfo, stmt_info, slp_node,
8630 : n_adjacent_stores, alignment_support_scheme,
8631 : misalignment, &inside_cost, cost_vec);
8632 : else
8633 23458 : inside_cost
8634 23458 : += record_stmt_cost (cost_vec, n_adjacent_stores,
8635 : scalar_store, slp_node, 0, vect_body);
8636 : /* Only need vector extracting when there are more
8637 : than one stores. */
8638 24655 : if (nstores > 1)
8639 23069 : inside_cost
8640 23069 : += record_stmt_cost (cost_vec, n_adjacent_stores,
8641 : vec_to_scalar, slp_node, 0, vect_body);
8642 : }
8643 24655 : if (dump_enabled_p ())
8644 673 : dump_printf_loc (MSG_NOTE, vect_location,
8645 : "vect_model_store_cost: inside_cost = %d, "
8646 : "prologue_cost = %d .\n",
8647 : inside_cost, prologue_cost);
8648 : }
8649 :
8650 28172 : return true;
8651 28172 : }
8652 :
8653 1283604 : gcc_assert (alignment_support_scheme);
8654 1283604 : vec_loop_masks *loop_masks
8655 159276 : = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8656 1283604 : ? &LOOP_VINFO_MASKS (loop_vinfo)
8657 11 : : NULL);
8658 11 : vec_loop_lens *loop_lens
8659 159276 : = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8660 : ? &LOOP_VINFO_LENS (loop_vinfo)
8661 0 : : NULL);
8662 :
8663 : /* The vect_transform_stmt and vect_analyze_stmt will go here but there
8664 : are some difference here. We cannot enable both the lens and masks
8665 : during transform but it is allowed during analysis.
8666 : Shouldn't go with length-based approach if fully masked. */
8667 1283604 : if (cost_vec == NULL)
8668 : /* The cost_vec is NULL during transfrom. */
8669 537370 : gcc_assert ((!loop_lens || !loop_masks));
8670 :
8671 : /* Targets with store-lane instructions must not require explicit
8672 : realignment. vect_supportable_dr_alignment always returns either
8673 : dr_aligned or dr_unaligned_supported for masked operations. */
8674 1283604 : gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8675 : && !mask_node
8676 : && !loop_masks)
8677 : || alignment_support_scheme == dr_aligned
8678 : || alignment_support_scheme == dr_unaligned_supported);
8679 :
8680 1283604 : tree offset = NULL_TREE;
8681 1283604 : if (!known_eq (poffset, 0))
8682 4056 : offset = size_int (poffset);
8683 :
8684 1283604 : tree bump;
8685 1283604 : tree vec_offset = NULL_TREE;
8686 1283604 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8687 : {
8688 1366 : aggr_type = NULL_TREE;
8689 1366 : bump = NULL_TREE;
8690 : }
8691 1282238 : else if (mat_gather_scatter_p (memory_access_type))
8692 : {
8693 0 : aggr_type = elem_type;
8694 0 : if (!costing_p)
8695 : {
8696 0 : tree vtype = ls.ls_type ? ls.ls_type : vectype;
8697 0 : vect_get_strided_load_store_ops (stmt_info, slp_node, vtype,
8698 : ls.strided_offset_vectype,
8699 : loop_vinfo, gsi,
8700 : &bump, &vec_offset, loop_lens);
8701 : }
8702 : }
8703 : else
8704 : {
8705 1282238 : if (memory_access_type == VMAT_LOAD_STORE_LANES)
8706 0 : aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
8707 : else
8708 : aggr_type = vectype;
8709 1282238 : if (!costing_p)
8710 536900 : bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8711 : memory_access_type, loop_lens);
8712 : }
8713 :
8714 1283604 : if (loop_vinfo && mask_node && !costing_p)
8715 550 : LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8716 :
8717 : /* In case the vectorization factor (VF) is bigger than the number
8718 : of elements that we can fit in a vectype (nunits), we have to generate
8719 : more than one vector stmt - i.e - we need to "unroll" the
8720 : vector stmt by a factor VF/nunits. */
8721 :
8722 1283604 : auto_vec<tree> dr_chain (group_size);
8723 1283604 : auto_vec<tree> vec_masks;
8724 1283604 : tree vec_mask = NULL;
8725 1283604 : auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8726 5824529 : for (i = 0; i < group_size; i++)
8727 3257321 : gvec_oprnds.quick_push (new auto_vec<tree> ());
8728 :
8729 1283604 : if (memory_access_type == VMAT_LOAD_STORE_LANES)
8730 : {
8731 0 : const internal_fn lanes_ifn = ls.lanes_ifn;
8732 :
8733 0 : if (costing_p)
8734 : /* Update all incoming store operand nodes, the general handling
8735 : above only handles the mask and the first store operand node. */
8736 0 : for (slp_tree child : SLP_TREE_CHILDREN (slp_node))
8737 0 : if (child != mask_node
8738 0 : && !vect_maybe_update_slp_op_vectype (child, vectype))
8739 : {
8740 0 : if (dump_enabled_p ())
8741 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8742 : "incompatible vector types for invariants\n");
8743 0 : return false;
8744 : }
8745 0 : unsigned inside_cost = 0, prologue_cost = 0;
8746 : /* For costing some adjacent vector stores, we'd like to cost with
8747 : the total number of them once instead of cost each one by one. */
8748 0 : unsigned int n_adjacent_stores = 0;
8749 0 : int ncopies = vec_num / group_size;
8750 0 : for (j = 0; j < ncopies; j++)
8751 : {
8752 0 : if (j == 0)
8753 : {
8754 0 : if (!costing_p)
8755 : {
8756 0 : if (mask_node)
8757 : {
8758 0 : vect_get_slp_defs (mask_node, &vec_masks);
8759 0 : vec_mask = vec_masks[0];
8760 : }
8761 0 : dataref_ptr
8762 0 : = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8763 : aggr_type, NULL, offset, &dummy,
8764 : gsi, &ptr_incr, false, bump);
8765 : }
8766 : }
8767 0 : else if (!costing_p)
8768 : {
8769 0 : gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8770 0 : if (mask_node)
8771 0 : vec_mask = vec_masks[j];
8772 0 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8773 : stmt_info, bump);
8774 : }
8775 :
8776 0 : if (costing_p)
8777 : {
8778 0 : n_adjacent_stores += group_size;
8779 0 : continue;
8780 : }
8781 :
8782 : /* Get an array into which we can store the individual vectors. */
8783 0 : tree vec_array = create_vector_array (vectype, group_size);
8784 :
8785 : /* Invalidate the current contents of VEC_ARRAY. This should
8786 : become an RTL clobber too, which prevents the vector registers
8787 : from being upward-exposed. */
8788 0 : vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8789 :
8790 : /* Store the individual vectors into the array. */
8791 0 : for (i = 0; i < group_size; i++)
8792 : {
8793 0 : slp_tree child;
8794 0 : if (i == 0 || !mask_node)
8795 0 : child = SLP_TREE_CHILDREN (slp_node)[i];
8796 : else
8797 0 : child = SLP_TREE_CHILDREN (slp_node)[i + 1];
8798 0 : vec_oprnd = SLP_TREE_VEC_DEFS (child)[j];
8799 0 : write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8800 : i);
8801 : }
8802 :
8803 0 : tree final_mask = NULL;
8804 0 : tree final_len = NULL;
8805 0 : tree bias = NULL;
8806 0 : if (loop_masks)
8807 0 : final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8808 : ncopies, vectype, j);
8809 0 : if (vec_mask)
8810 0 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8811 : vec_mask, gsi);
8812 :
8813 0 : if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8814 : {
8815 0 : if (loop_lens)
8816 0 : final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8817 : ncopies, vectype, j, 1, true);
8818 : else
8819 0 : final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8820 0 : signed char biasval
8821 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8822 0 : bias = build_int_cst (intQI_type_node, biasval);
8823 0 : if (!final_mask)
8824 : {
8825 0 : mask_vectype = truth_type_for (vectype);
8826 0 : final_mask = build_minus_one_cst (mask_vectype);
8827 : }
8828 : }
8829 :
8830 0 : gcall *call;
8831 0 : if (final_len && final_mask)
8832 : {
8833 : /* Emit:
8834 : MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8835 : LEN, BIAS, VEC_ARRAY). */
8836 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8837 0 : tree alias_ptr = build_int_cst (ref_type, align);
8838 0 : call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8839 : dataref_ptr, alias_ptr,
8840 : final_mask, final_len, bias,
8841 : vec_array);
8842 : }
8843 0 : else if (final_mask)
8844 : {
8845 : /* Emit:
8846 : MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8847 : VEC_ARRAY). */
8848 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8849 0 : tree alias_ptr = build_int_cst (ref_type, align);
8850 0 : call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8851 : dataref_ptr, alias_ptr,
8852 : final_mask, vec_array);
8853 : }
8854 : else
8855 : {
8856 : /* Emit:
8857 : MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8858 0 : data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8859 0 : call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8860 0 : gimple_call_set_lhs (call, data_ref);
8861 : }
8862 0 : gimple_call_set_nothrow (call, true);
8863 0 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8864 :
8865 : /* Record that VEC_ARRAY is now dead. */
8866 0 : vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8867 : }
8868 :
8869 0 : if (costing_p)
8870 : {
8871 0 : if (n_adjacent_stores > 0)
8872 0 : vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
8873 : alignment_support_scheme, misalignment,
8874 : &inside_cost, cost_vec);
8875 0 : if (dump_enabled_p ())
8876 0 : dump_printf_loc (MSG_NOTE, vect_location,
8877 : "vect_model_store_cost: inside_cost = %d, "
8878 : "prologue_cost = %d .\n",
8879 : inside_cost, prologue_cost);
8880 : }
8881 :
8882 0 : return true;
8883 : }
8884 :
8885 1283604 : if (mat_gather_scatter_p (memory_access_type))
8886 : {
8887 1366 : gcc_assert (!grouped_store || ls.ls_type);
8888 1366 : if (ls.ls_type)
8889 0 : vectype = ls.ls_type;
8890 1366 : auto_vec<tree> vec_offsets;
8891 1366 : unsigned int inside_cost = 0, prologue_cost = 0;
8892 1366 : int num_stmts = vec_num;
8893 3124 : for (j = 0; j < num_stmts; j++)
8894 : {
8895 1758 : gimple *new_stmt;
8896 1758 : if (j == 0)
8897 : {
8898 1366 : if (costing_p && vls_type == VLS_STORE_INVARIANT)
8899 210 : prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8900 : slp_node, 0, vect_prologue);
8901 : else if (!costing_p)
8902 : {
8903 : /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8904 : DR_CHAIN is of size 1. */
8905 470 : gcc_assert (group_size == 1);
8906 470 : vect_get_slp_defs (op_node, gvec_oprnds[0]);
8907 470 : if (mask_node)
8908 70 : vect_get_slp_defs (mask_node, &vec_masks);
8909 :
8910 470 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8911 470 : vect_get_gather_scatter_ops (loop, slp_node,
8912 : &dataref_ptr, &vec_offsets);
8913 : else
8914 0 : dataref_ptr
8915 0 : = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8916 : aggr_type, NULL, offset,
8917 : &dummy, gsi, &ptr_incr, false,
8918 : bump);
8919 : }
8920 : }
8921 392 : else if (!costing_p)
8922 : {
8923 34 : gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8924 34 : if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8925 0 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8926 : gsi, stmt_info, bump);
8927 : }
8928 :
8929 2472 : new_stmt = NULL;
8930 714 : if (!costing_p)
8931 : {
8932 504 : vec_oprnd = (*gvec_oprnds[0])[j];
8933 504 : if (mask_node)
8934 90 : vec_mask = vec_masks[j];
8935 : /* We should have caught mismatched types earlier. */
8936 504 : gcc_assert (ls.ls_type
8937 : || useless_type_conversion_p
8938 : (vectype, TREE_TYPE (vec_oprnd)));
8939 : }
8940 504 : tree final_mask = NULL_TREE;
8941 2262 : tree final_len = NULL_TREE;
8942 2262 : tree bias = NULL_TREE;
8943 504 : if (!costing_p)
8944 : {
8945 504 : if (loop_masks)
8946 0 : final_mask = vect_get_loop_mask (loop_vinfo, gsi,
8947 : loop_masks, num_stmts,
8948 : vectype, j);
8949 504 : if (vec_mask)
8950 90 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8951 : final_mask, vec_mask, gsi);
8952 : }
8953 :
8954 1758 : unsigned align = get_object_alignment (DR_REF (first_dr_info->dr));
8955 1758 : tree alias_align_ptr = build_int_cst (ref_type, align);
8956 1758 : if (memory_access_type == VMAT_GATHER_SCATTER_IFN)
8957 : {
8958 0 : if (costing_p)
8959 : {
8960 0 : if (ls.supported_offset_vectype)
8961 0 : inside_cost
8962 0 : += record_stmt_cost (cost_vec, 1, vector_stmt,
8963 : slp_node, 0, vect_body);
8964 0 : if (ls.supported_scale)
8965 0 : inside_cost
8966 0 : += record_stmt_cost (cost_vec, 1, vector_stmt,
8967 : slp_node, 0, vect_body);
8968 :
8969 0 : unsigned int cnunits = vect_nunits_for_cost (vectype);
8970 0 : inside_cost
8971 0 : += record_stmt_cost (cost_vec, cnunits, scalar_store,
8972 : slp_node, 0, vect_body);
8973 1758 : continue;
8974 0 : }
8975 :
8976 0 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8977 0 : vec_offset = vec_offsets[j];
8978 :
8979 0 : tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
8980 0 : bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
8981 :
8982 : /* Perform the offset conversion and scaling if necessary. */
8983 0 : if (!strided
8984 0 : && (ls.supported_offset_vectype || ls.supported_scale))
8985 : {
8986 0 : gimple_seq stmts = NULL;
8987 0 : if (ls.supported_offset_vectype)
8988 0 : vec_offset = gimple_convert
8989 0 : (&stmts, ls.supported_offset_vectype, vec_offset);
8990 0 : if (ls.supported_scale)
8991 : {
8992 : /* Only scale the vec_offset if we haven't already. */
8993 0 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
8994 0 : || j == 0)
8995 : {
8996 0 : tree mult_cst = build_int_cst
8997 0 : (TREE_TYPE (TREE_TYPE (vec_offset)),
8998 0 : SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
8999 0 : tree mult = build_vector_from_val
9000 0 : (TREE_TYPE (vec_offset), mult_cst);
9001 0 : vec_offset = gimple_build
9002 0 : (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
9003 : vec_offset, mult);
9004 : }
9005 0 : scale = size_int (ls.supported_scale);
9006 : }
9007 0 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9008 : }
9009 :
9010 0 : if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
9011 : {
9012 0 : if (loop_lens)
9013 0 : final_len = vect_get_loop_len (loop_vinfo, gsi,
9014 : loop_lens, num_stmts,
9015 : vectype, j, 1, true);
9016 : else
9017 0 : final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9018 :
9019 0 : signed char biasval
9020 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9021 0 : bias = build_int_cst (intQI_type_node, biasval);
9022 0 : if (!final_mask)
9023 : {
9024 0 : mask_vectype = truth_type_for (vectype);
9025 0 : final_mask = build_minus_one_cst (mask_vectype);
9026 : }
9027 : }
9028 :
9029 0 : if (ls.ls_type)
9030 : {
9031 0 : gimple *conv_stmt
9032 0 : = gimple_build_assign (make_ssa_name (vectype),
9033 : VIEW_CONVERT_EXPR,
9034 : build1 (VIEW_CONVERT_EXPR, vectype,
9035 : vec_oprnd));
9036 0 : vect_finish_stmt_generation (vinfo, stmt_info, conv_stmt,
9037 : gsi);
9038 0 : vec_oprnd = gimple_get_lhs (conv_stmt);
9039 : }
9040 :
9041 0 : gcall *call;
9042 0 : if (final_len && final_mask)
9043 : {
9044 0 : if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
9045 0 : call = gimple_build_call_internal (
9046 : IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr,
9047 : alias_align_ptr,
9048 : vec_offset, scale, vec_oprnd, final_mask, final_len,
9049 : bias);
9050 : else
9051 : /* Non-vector offset indicates that prefer to take
9052 : MASK_LEN_STRIDED_STORE instead of the
9053 : IFN_MASK_SCATTER_STORE with direct stride arg.
9054 : Similar to the gather case we have checked the
9055 : alignment for a scatter already and assume
9056 : that the strided store has the same requirements. */
9057 0 : call = gimple_build_call_internal (
9058 : IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr,
9059 : vec_offset, vec_oprnd, final_mask, final_len, bias);
9060 : }
9061 0 : else if (final_mask)
9062 0 : call = gimple_build_call_internal
9063 0 : (IFN_MASK_SCATTER_STORE, 6, dataref_ptr,
9064 : alias_align_ptr,
9065 : vec_offset, scale, vec_oprnd, final_mask);
9066 : else
9067 0 : call = gimple_build_call_internal (IFN_SCATTER_STORE, 5,
9068 : dataref_ptr,
9069 : alias_align_ptr,
9070 : vec_offset,
9071 : scale, vec_oprnd);
9072 0 : gimple_call_set_nothrow (call, true);
9073 0 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9074 0 : new_stmt = call;
9075 : }
9076 1758 : else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
9077 : {
9078 : /* The builtin decls path for scatter is legacy, x86 only. */
9079 310 : gcc_assert (nunits.is_constant ()
9080 : && (!final_mask
9081 : || SCALAR_INT_MODE_P
9082 : (TYPE_MODE (TREE_TYPE (final_mask)))));
9083 310 : if (costing_p)
9084 : {
9085 179 : unsigned int cnunits = vect_nunits_for_cost (vectype);
9086 179 : inside_cost
9087 179 : += record_stmt_cost (cost_vec, cnunits, scalar_store,
9088 : slp_node, 0, vect_body);
9089 179 : continue;
9090 179 : }
9091 :
9092 131 : tree offset_vectype = TREE_TYPE (vec_offsets[0]);
9093 131 : poly_uint64 offset_nunits
9094 131 : = TYPE_VECTOR_SUBPARTS (offset_vectype);
9095 131 : if (known_eq (nunits, offset_nunits))
9096 : {
9097 55 : new_stmt = vect_build_one_scatter_store_call
9098 110 : (vinfo, stmt_info, slp_node, gsi,
9099 55 : ls.gs.decl, dataref_ptr, vec_offsets[j],
9100 : vec_oprnd, final_mask);
9101 55 : vect_finish_stmt_generation (vinfo, stmt_info,
9102 : new_stmt, gsi);
9103 : }
9104 76 : else if (known_eq (nunits, offset_nunits * 2))
9105 : {
9106 : /* We have a offset vector with half the number of
9107 : lanes but the builtins will store full vectype
9108 : data from the lower lanes. */
9109 30 : new_stmt = vect_build_one_scatter_store_call
9110 60 : (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
9111 30 : dataref_ptr, vec_offsets[2 * j],
9112 : vec_oprnd, final_mask);
9113 30 : vect_finish_stmt_generation (vinfo, stmt_info,
9114 : new_stmt, gsi);
9115 30 : int count = nunits.to_constant ();
9116 30 : vec_perm_builder sel (count, count, 1);
9117 30 : sel.quick_grow (count);
9118 382 : for (int i = 0; i < count; ++i)
9119 352 : sel[i] = i | (count / 2);
9120 30 : vec_perm_indices indices (sel, 2, count);
9121 30 : tree perm_mask
9122 30 : = vect_gen_perm_mask_checked (vectype, indices);
9123 30 : new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9124 : vec_oprnd, vec_oprnd,
9125 : perm_mask);
9126 30 : vec_oprnd = make_ssa_name (vectype);
9127 30 : gimple_set_lhs (new_stmt, vec_oprnd);
9128 30 : vect_finish_stmt_generation (vinfo, stmt_info,
9129 : new_stmt, gsi);
9130 30 : if (final_mask)
9131 : {
9132 20 : new_stmt = gimple_build_assign (NULL_TREE,
9133 : VEC_UNPACK_HI_EXPR,
9134 : final_mask);
9135 20 : final_mask = make_ssa_name
9136 20 : (truth_type_for (offset_vectype));
9137 20 : gimple_set_lhs (new_stmt, final_mask);
9138 20 : vect_finish_stmt_generation (vinfo, stmt_info,
9139 : new_stmt, gsi);
9140 : }
9141 :
9142 30 : new_stmt = vect_build_one_scatter_store_call
9143 60 : (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
9144 30 : dataref_ptr, vec_offsets[2 * j + 1],
9145 : vec_oprnd, final_mask);
9146 30 : vect_finish_stmt_generation (vinfo, stmt_info,
9147 : new_stmt, gsi);
9148 30 : }
9149 46 : else if (known_eq (nunits * 2, offset_nunits))
9150 : {
9151 : /* We have a offset vector with double the number of
9152 : lanes. Select the low/high part accordingly. */
9153 46 : vec_offset = vec_offsets[j / 2];
9154 46 : if (j & 1)
9155 : {
9156 23 : int count = offset_nunits.to_constant ();
9157 23 : vec_perm_builder sel (count, count, 1);
9158 23 : sel.quick_grow (count);
9159 263 : for (int i = 0; i < count; ++i)
9160 240 : sel[i] = i | (count / 2);
9161 23 : vec_perm_indices indices (sel, 2, count);
9162 23 : tree perm_mask = vect_gen_perm_mask_checked
9163 23 : (TREE_TYPE (vec_offset), indices);
9164 23 : new_stmt = gimple_build_assign (NULL_TREE,
9165 : VEC_PERM_EXPR,
9166 : vec_offset,
9167 : vec_offset,
9168 : perm_mask);
9169 23 : vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9170 23 : gimple_set_lhs (new_stmt, vec_offset);
9171 23 : vect_finish_stmt_generation (vinfo, stmt_info,
9172 : new_stmt, gsi);
9173 23 : }
9174 :
9175 46 : new_stmt = vect_build_one_scatter_store_call
9176 46 : (vinfo, stmt_info, slp_node, gsi,
9177 : ls.gs.decl, dataref_ptr, vec_offset,
9178 : vec_oprnd, final_mask);
9179 46 : vect_finish_stmt_generation (vinfo, stmt_info,
9180 : new_stmt, gsi);
9181 : }
9182 : else
9183 0 : gcc_unreachable ();
9184 : }
9185 : else
9186 : {
9187 : /* Emulated scatter. */
9188 1448 : gcc_assert (!final_mask);
9189 1448 : if (costing_p)
9190 : {
9191 1075 : unsigned int cnunits = vect_nunits_for_cost (vectype);
9192 : /* For emulated scatter N offset vector element extracts
9193 : (we assume the scalar scaling and ptr + offset add is
9194 : consumed by the load). */
9195 1075 : inside_cost
9196 1075 : += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9197 : slp_node, 0, vect_body);
9198 : /* N scalar stores plus extracting the elements. */
9199 1075 : inside_cost
9200 1075 : += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9201 : slp_node, 0, vect_body);
9202 1075 : inside_cost
9203 1075 : += record_stmt_cost (cost_vec, cnunits, scalar_store,
9204 : slp_node, 0, vect_body);
9205 1075 : continue;
9206 1075 : }
9207 :
9208 373 : tree offset_vectype = TREE_TYPE (vec_offsets[0]);
9209 373 : unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9210 373 : unsigned HOST_WIDE_INT const_offset_nunits
9211 373 : = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
9212 373 : vec<constructor_elt, va_gc> *ctor_elts;
9213 373 : vec_alloc (ctor_elts, const_nunits);
9214 373 : gimple_seq stmts = NULL;
9215 373 : tree elt_type = TREE_TYPE (vectype);
9216 373 : unsigned HOST_WIDE_INT elt_size
9217 373 : = tree_to_uhwi (TYPE_SIZE (elt_type));
9218 : /* We support offset vectors with more elements
9219 : than the data vector for now. */
9220 373 : unsigned HOST_WIDE_INT factor
9221 : = const_offset_nunits / const_nunits;
9222 373 : vec_offset = vec_offsets[j / factor];
9223 373 : unsigned elt_offset
9224 373 : = (j % factor) * const_nunits;
9225 373 : tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9226 373 : tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
9227 373 : tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9228 1519 : for (unsigned k = 0; k < const_nunits; ++k)
9229 : {
9230 : /* Compute the offsetted pointer. */
9231 1146 : tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9232 : bitsize_int (k + elt_offset));
9233 1146 : tree idx
9234 2292 : = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
9235 1146 : vec_offset, TYPE_SIZE (idx_type), boff);
9236 1146 : idx = gimple_convert (&stmts, sizetype, idx);
9237 1146 : idx = gimple_build (&stmts, MULT_EXPR, sizetype,
9238 : idx, scale);
9239 1146 : tree ptr
9240 1146 : = gimple_build (&stmts, PLUS_EXPR,
9241 1146 : TREE_TYPE (dataref_ptr),
9242 : dataref_ptr, idx);
9243 1146 : ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9244 : /* Extract the element to be stored. */
9245 1146 : tree elt
9246 2292 : = gimple_build (&stmts, BIT_FIELD_REF,
9247 1146 : TREE_TYPE (vectype),
9248 1146 : vec_oprnd, TYPE_SIZE (elt_type),
9249 1146 : bitsize_int (k * elt_size));
9250 1146 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9251 1146 : stmts = NULL;
9252 1146 : tree ref
9253 1146 : = build2 (MEM_REF, ltype, ptr,
9254 : build_int_cst (ref_type, 0));
9255 1146 : new_stmt = gimple_build_assign (ref, elt);
9256 1146 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9257 : }
9258 :
9259 373 : slp_node->push_vec_def (new_stmt);
9260 : }
9261 : }
9262 :
9263 1366 : if (costing_p && dump_enabled_p ())
9264 68 : dump_printf_loc (MSG_NOTE, vect_location,
9265 : "vect_model_store_cost: inside_cost = %d, "
9266 : "prologue_cost = %d .\n",
9267 : inside_cost, prologue_cost);
9268 :
9269 1366 : return true;
9270 1366 : }
9271 :
9272 1282238 : gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9273 : || memory_access_type == VMAT_CONTIGUOUS_DOWN
9274 : || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9275 :
9276 1282238 : unsigned inside_cost = 0, prologue_cost = 0;
9277 : /* For costing some adjacent vector stores, we'd like to cost with
9278 : the total number of them once instead of cost each one by one. */
9279 1282238 : unsigned int n_adjacent_stores = 0;
9280 1282238 : auto_vec<tree> result_chain (group_size);
9281 1282238 : auto_vec<tree, 1> vec_oprnds;
9282 1282238 : gimple *new_stmt;
9283 1282238 : if (!costing_p)
9284 : {
9285 : /* Get vectorized arguments for SLP_NODE. */
9286 536900 : vect_get_slp_defs (op_node, &vec_oprnds);
9287 536900 : vec_oprnd = vec_oprnds[0];
9288 536900 : if (mask_node)
9289 : {
9290 481 : vect_get_slp_defs (mask_node, &vec_masks);
9291 481 : vec_mask = vec_masks[0];
9292 : }
9293 : }
9294 :
9295 : /* We should have caught mismatched types earlier. */
9296 536900 : gcc_assert (costing_p
9297 : || useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
9298 1282238 : bool simd_lane_access_p
9299 1282238 : = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9300 1282238 : if (!costing_p
9301 1282238 : && simd_lane_access_p
9302 4374 : && !loop_masks
9303 4374 : && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9304 4374 : && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9305 4374 : && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9306 4374 : && integer_zerop (DR_INIT (first_dr_info->dr))
9307 1286612 : && alias_sets_conflict_p (get_alias_set (aggr_type),
9308 4374 : get_alias_set (TREE_TYPE (ref_type))))
9309 : {
9310 4366 : dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9311 4366 : dataref_offset = build_int_cst (ref_type, 0);
9312 : }
9313 1277872 : else if (!costing_p)
9314 1065060 : dataref_ptr = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9315 : simd_lane_access_p ? loop : NULL,
9316 : offset, &dummy, gsi, &ptr_incr,
9317 : simd_lane_access_p, bump);
9318 :
9319 1282238 : new_stmt = NULL;
9320 1282238 : gcc_assert (!grouped_store);
9321 2846510 : for (i = 0; i < vec_num; i++)
9322 : {
9323 1564272 : if (!costing_p)
9324 663255 : vec_oprnd = vec_oprnds[i];
9325 :
9326 1564272 : if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9327 : {
9328 3108 : if (costing_p)
9329 1972 : inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9330 : slp_node, 0, vect_body);
9331 : else
9332 : {
9333 1136 : tree perm_mask = perm_mask_for_reverse (vectype);
9334 1136 : tree new_temp = make_ssa_name (vectype);
9335 :
9336 : /* Generate the permute statement. */
9337 1136 : gimple *perm_stmt
9338 1136 : = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9339 : vec_oprnd, perm_mask);
9340 1136 : vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9341 :
9342 1136 : perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9343 1564272 : vec_oprnd = new_temp;
9344 : }
9345 : }
9346 :
9347 1564272 : if (costing_p)
9348 : {
9349 901017 : n_adjacent_stores++;
9350 901017 : continue;
9351 : }
9352 :
9353 663255 : tree final_mask = NULL_TREE;
9354 663255 : tree final_len = NULL_TREE;
9355 663255 : tree bias = NULL_TREE;
9356 663255 : if (loop_masks)
9357 77 : final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9358 : vec_num, vectype, i);
9359 663255 : if (vec_mask)
9360 702 : vec_mask = vec_masks[i];
9361 702 : if (vec_mask)
9362 702 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9363 : vec_mask, gsi);
9364 :
9365 663255 : if (i > 0)
9366 : /* Bump the vector pointer. */
9367 126355 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9368 : stmt_info, bump);
9369 :
9370 663255 : unsigned misalign;
9371 663255 : unsigned HOST_WIDE_INT align;
9372 663255 : align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9373 663255 : if (alignment_support_scheme == dr_aligned)
9374 : misalign = 0;
9375 306580 : else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9376 : {
9377 158675 : align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9378 158675 : misalign = 0;
9379 : }
9380 : else
9381 147905 : misalign = misalignment;
9382 663255 : if (dataref_offset == NULL_TREE
9383 657875 : && TREE_CODE (dataref_ptr) == SSA_NAME)
9384 179069 : set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign);
9385 663255 : align = least_bit_hwi (misalign | align);
9386 :
9387 : /* Compute IFN when LOOP_LENS or final_mask valid. */
9388 663255 : machine_mode vmode = TYPE_MODE (vectype);
9389 663255 : machine_mode new_vmode = vmode;
9390 663255 : internal_fn partial_ifn = IFN_LAST;
9391 663255 : if (loop_lens)
9392 : {
9393 0 : opt_machine_mode new_ovmode
9394 0 : = get_len_load_store_mode (vmode, false, &partial_ifn);
9395 0 : new_vmode = new_ovmode.require ();
9396 0 : unsigned factor
9397 0 : = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9398 0 : final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9399 : vec_num, vectype, i, factor, true);
9400 : }
9401 663255 : else if (final_mask)
9402 : {
9403 714 : if (!can_vec_mask_load_store_p (vmode,
9404 714 : TYPE_MODE (TREE_TYPE (final_mask)),
9405 : false, &partial_ifn))
9406 0 : gcc_unreachable ();
9407 : }
9408 :
9409 663255 : if (partial_ifn == IFN_MASK_LEN_STORE)
9410 : {
9411 0 : if (!final_len)
9412 : {
9413 : /* Pass VF value to 'len' argument of
9414 : MASK_LEN_STORE if LOOP_LENS is invalid. */
9415 0 : final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9416 : }
9417 0 : if (!final_mask)
9418 : {
9419 : /* Pass all ones value to 'mask' argument of
9420 : MASK_LEN_STORE if final_mask is invalid. */
9421 0 : mask_vectype = truth_type_for (vectype);
9422 0 : final_mask = build_minus_one_cst (mask_vectype);
9423 : }
9424 : }
9425 663255 : if (final_len)
9426 : {
9427 0 : signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9428 0 : bias = build_int_cst (intQI_type_node, biasval);
9429 : }
9430 :
9431 : /* Arguments are ready. Create the new vector stmt. */
9432 663255 : if (final_len)
9433 : {
9434 0 : gcall *call;
9435 0 : tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9436 : /* Need conversion if it's wrapped with VnQI. */
9437 0 : if (vmode != new_vmode)
9438 : {
9439 0 : tree new_vtype
9440 0 : = build_vector_type_for_mode (unsigned_intQI_type_node,
9441 : new_vmode);
9442 0 : tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9443 0 : vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9444 0 : gassign *new_stmt
9445 0 : = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9446 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9447 0 : vec_oprnd = var;
9448 : }
9449 :
9450 0 : if (partial_ifn == IFN_MASK_LEN_STORE)
9451 0 : call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9452 : dataref_ptr, ptr, final_mask,
9453 : final_len, bias, vec_oprnd);
9454 : else
9455 0 : call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9456 : dataref_ptr, ptr, final_len,
9457 : bias, vec_oprnd);
9458 0 : gimple_call_set_nothrow (call, true);
9459 0 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9460 0 : new_stmt = call;
9461 : }
9462 663255 : else if (final_mask)
9463 : {
9464 714 : tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9465 714 : gcall *call
9466 714 : = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9467 : ptr, final_mask, vec_oprnd);
9468 714 : gimple_call_set_nothrow (call, true);
9469 714 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9470 714 : new_stmt = call;
9471 : }
9472 : else
9473 : {
9474 662541 : data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
9475 : dataref_offset ? dataref_offset
9476 : : build_int_cst (ref_type, 0));
9477 662541 : if (alignment_support_scheme == dr_aligned
9478 662541 : && align >= TYPE_ALIGN_UNIT (vectype))
9479 : ;
9480 : else
9481 306036 : TREE_TYPE (data_ref)
9482 612072 : = build_aligned_type (TREE_TYPE (data_ref),
9483 : align * BITS_PER_UNIT);
9484 662541 : vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9485 662541 : new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9486 662541 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9487 : }
9488 : }
9489 :
9490 1282238 : if (costing_p)
9491 : {
9492 745338 : if (n_adjacent_stores > 0)
9493 745338 : vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
9494 : alignment_support_scheme, misalignment,
9495 : &inside_cost, cost_vec);
9496 :
9497 : /* When vectorizing a store into the function result assign
9498 : a penalty if the function returns in a multi-register location.
9499 : In this case we assume we'll end up with having to spill the
9500 : vector result and do piecewise loads as a conservative estimate. */
9501 745338 : tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9502 745338 : if (base
9503 745338 : && (TREE_CODE (base) == RESULT_DECL
9504 695114 : || (DECL_P (base) && cfun_returns (base)))
9505 807214 : && !aggregate_value_p (base, cfun->decl))
9506 : {
9507 11012 : rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9508 : /* ??? Handle PARALLEL in some way. */
9509 11012 : if (REG_P (reg))
9510 : {
9511 10810 : int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9512 : /* Assume that a single reg-reg move is possible and cheap,
9513 : do not account for vector to gp register move cost. */
9514 10810 : if (nregs > 1)
9515 : {
9516 : /* Spill. */
9517 9997 : prologue_cost
9518 9997 : += record_stmt_cost (cost_vec, 1, vector_store,
9519 : slp_node, 0, vect_epilogue);
9520 : /* Loads. */
9521 9997 : prologue_cost
9522 9997 : += record_stmt_cost (cost_vec, nregs, scalar_load,
9523 : slp_node, 0, vect_epilogue);
9524 : }
9525 : }
9526 : }
9527 745338 : if (dump_enabled_p ())
9528 13412 : dump_printf_loc (MSG_NOTE, vect_location,
9529 : "vect_model_store_cost: inside_cost = %d, "
9530 : "prologue_cost = %d .\n",
9531 : inside_cost, prologue_cost);
9532 : }
9533 :
9534 1282238 : return true;
9535 2595684 : }
9536 :
9537 : /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9538 : VECTOR_CST mask. No checks are made that the target platform supports the
9539 : mask, so callers may wish to test can_vec_perm_const_p separately, or use
9540 : vect_gen_perm_mask_checked. */
9541 :
9542 : tree
9543 61395 : vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9544 : {
9545 61395 : tree mask_type;
9546 :
9547 61395 : poly_uint64 nunits = sel.length ();
9548 61395 : gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9549 :
9550 61395 : mask_type = build_vector_type (ssizetype, nunits);
9551 61395 : return vec_perm_indices_to_tree (mask_type, sel);
9552 : }
9553 :
9554 : /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9555 : i.e. that the target supports the pattern _for arbitrary input vectors_. */
9556 :
9557 : tree
9558 58634 : vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9559 : {
9560 58634 : machine_mode vmode = TYPE_MODE (vectype);
9561 58634 : gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9562 58634 : return vect_gen_perm_mask_any (vectype, sel);
9563 : }
9564 :
9565 : /* Given a vector variable X and Y, that was generated for the scalar
9566 : STMT_INFO, generate instructions to permute the vector elements of X and Y
9567 : using permutation mask MASK_VEC, insert them at *GSI and return the
9568 : permuted vector variable. */
9569 :
9570 : static tree
9571 1431 : permute_vec_elements (vec_info *vinfo,
9572 : tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9573 : gimple_stmt_iterator *gsi)
9574 : {
9575 1431 : tree vectype = TREE_TYPE (x);
9576 1431 : tree perm_dest, data_ref;
9577 1431 : gimple *perm_stmt;
9578 :
9579 1431 : tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9580 1431 : if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9581 1431 : perm_dest = vect_create_destination_var (scalar_dest, vectype);
9582 : else
9583 0 : perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9584 1431 : data_ref = make_ssa_name (perm_dest);
9585 :
9586 : /* Generate the permute statement. */
9587 1431 : perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9588 1431 : vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9589 :
9590 1431 : return data_ref;
9591 : }
9592 :
9593 : /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9594 : inserting them on the loops preheader edge. Returns true if we
9595 : were successful in doing so (and thus STMT_INFO can be moved then),
9596 : otherwise returns false. HOIST_P indicates if we want to hoist the
9597 : definitions of all SSA uses, it would be false when we are costing. */
9598 :
9599 : static bool
9600 3765 : hoist_defs_of_uses (gimple *stmt, class loop *loop, bool hoist_p)
9601 : {
9602 3765 : ssa_op_iter i;
9603 3765 : use_operand_p use_p;
9604 3765 : auto_vec<use_operand_p, 8> to_hoist;
9605 :
9606 7188 : FOR_EACH_SSA_USE_OPERAND (use_p, stmt, i, SSA_OP_USE)
9607 : {
9608 3451 : gimple *def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
9609 3451 : if (!gimple_nop_p (def_stmt)
9610 3451 : && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9611 : {
9612 : /* Make sure we don't need to recurse. While we could do
9613 : so in simple cases when there are more complex use webs
9614 : we don't have an easy way to preserve stmt order to fulfil
9615 : dependencies within them. */
9616 111 : tree op2;
9617 111 : ssa_op_iter i2;
9618 111 : if (gimple_code (def_stmt) == GIMPLE_PHI
9619 111 : || (single_ssa_def_operand (def_stmt, SSA_OP_DEF)
9620 : == NULL_DEF_OPERAND_P))
9621 28 : return false;
9622 226 : FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9623 : {
9624 143 : gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9625 143 : if (!gimple_nop_p (def_stmt2)
9626 143 : && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9627 : return false;
9628 : }
9629 83 : to_hoist.safe_push (use_p);
9630 : }
9631 : }
9632 :
9633 7474 : if (to_hoist.is_empty ())
9634 : return true;
9635 :
9636 59 : if (!hoist_p)
9637 : return true;
9638 :
9639 : /* Instead of moving defs we copy them so we can zero their UID to not
9640 : confuse dominance queries in the preheader. */
9641 9 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
9642 36 : for (use_operand_p use_p : to_hoist)
9643 : {
9644 9 : gimple *def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
9645 9 : gimple *copy = gimple_copy (def_stmt);
9646 9 : gimple_set_uid (copy, 0);
9647 9 : def_operand_p def_p = single_ssa_def_operand (def_stmt, SSA_OP_DEF);
9648 9 : tree new_def = duplicate_ssa_name (DEF_FROM_PTR (def_p), copy);
9649 9 : update_stmt (copy);
9650 9 : def_p = single_ssa_def_operand (copy, SSA_OP_DEF);
9651 9 : SET_DEF (def_p, new_def);
9652 9 : SET_USE (use_p, new_def);
9653 9 : gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
9654 : }
9655 :
9656 : return true;
9657 3765 : }
9658 :
9659 : /* vectorizable_load.
9660 :
9661 : Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9662 : that can be vectorized.
9663 : If COST_VEC is passed, calculate costs but don't change anything,
9664 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
9665 : it, and insert it at GSI.
9666 : Return true if STMT_INFO is vectorizable in this way. */
9667 :
9668 : static bool
9669 1960516 : vectorizable_load (vec_info *vinfo,
9670 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9671 : slp_tree slp_node,
9672 : stmt_vector_for_cost *cost_vec)
9673 : {
9674 1960516 : tree scalar_dest;
9675 1960516 : tree vec_dest = NULL;
9676 1960516 : tree data_ref = NULL;
9677 1960516 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9678 1960516 : class loop *loop = NULL;
9679 1960516 : class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9680 1960516 : bool nested_in_vect_loop = false;
9681 1960516 : tree elem_type;
9682 : /* Avoid false positive uninitialized warning, see PR110652. */
9683 1960516 : tree new_temp = NULL_TREE;
9684 1960516 : machine_mode mode;
9685 1960516 : tree dummy;
9686 1960516 : tree dataref_ptr = NULL_TREE;
9687 1960516 : tree dataref_offset = NULL_TREE;
9688 1960516 : gimple *ptr_incr = NULL;
9689 1960516 : int i, j;
9690 1960516 : unsigned int group_size;
9691 1960516 : poly_uint64 group_gap_adj;
9692 1960516 : tree msq = NULL_TREE, lsq;
9693 1960516 : tree realignment_token = NULL_TREE;
9694 1960516 : gphi *phi = NULL;
9695 1960516 : bool grouped_load = false;
9696 1960516 : stmt_vec_info first_stmt_info;
9697 1960516 : stmt_vec_info first_stmt_info_for_drptr = NULL;
9698 1960516 : bool compute_in_loop = false;
9699 1960516 : class loop *at_loop;
9700 1960516 : int vec_num;
9701 1960516 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9702 1960516 : poly_uint64 vf;
9703 1960516 : tree aggr_type;
9704 1960516 : tree ref_type;
9705 1960516 : enum vect_def_type mask_dt = vect_unknown_def_type;
9706 1960516 : enum vect_def_type els_dt = vect_unknown_def_type;
9707 :
9708 1960516 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9709 : return false;
9710 :
9711 1960516 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9712 194531 : && cost_vec)
9713 : return false;
9714 :
9715 1765985 : if (!STMT_VINFO_DATA_REF (stmt_info))
9716 : return false;
9717 :
9718 1415387 : tree mask_vectype = NULL_TREE;
9719 1415387 : tree els = NULL_TREE; tree els_vectype = NULL_TREE;
9720 :
9721 1415387 : int mask_index = -1;
9722 1415387 : int els_index = -1;
9723 1415387 : slp_tree mask_node = NULL;
9724 1415387 : slp_tree els_op = NULL;
9725 1415387 : if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9726 : {
9727 1412061 : scalar_dest = gimple_assign_lhs (assign);
9728 1412061 : if (TREE_CODE (scalar_dest) != SSA_NAME)
9729 : return false;
9730 :
9731 640901 : tree_code code = gimple_assign_rhs_code (assign);
9732 640901 : if (code != ARRAY_REF
9733 640901 : && code != BIT_FIELD_REF
9734 640901 : && code != INDIRECT_REF
9735 444193 : && code != COMPONENT_REF
9736 444193 : && code != IMAGPART_EXPR
9737 306069 : && code != REALPART_EXPR
9738 306069 : && code != MEM_REF
9739 237 : && TREE_CODE_CLASS (code) != tcc_declaration)
9740 : return false;
9741 : }
9742 : else
9743 : {
9744 1320929 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9745 3326 : if (!call || !gimple_call_internal_p (call))
9746 : return false;
9747 :
9748 3326 : internal_fn ifn = gimple_call_internal_fn (call);
9749 3326 : if (!internal_load_fn_p (ifn))
9750 : return false;
9751 :
9752 2407 : scalar_dest = gimple_call_lhs (call);
9753 2407 : if (!scalar_dest)
9754 : return false;
9755 :
9756 2407 : mask_index = internal_fn_mask_index (ifn);
9757 2407 : if (mask_index >= 0)
9758 2407 : mask_index = vect_slp_child_index_for_operand
9759 2407 : (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9760 2407 : if (mask_index >= 0
9761 2407 : && !vect_check_scalar_mask (vinfo, slp_node, mask_index,
9762 : &mask_node, &mask_dt, &mask_vectype))
9763 : return false;
9764 :
9765 2407 : els_index = internal_fn_else_index (ifn);
9766 2407 : if (els_index >= 0)
9767 2407 : els_index = vect_slp_child_index_for_operand
9768 2407 : (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9769 2407 : if (els_index >= 0
9770 2407 : && !vect_is_simple_use (vinfo, slp_node, els_index,
9771 : &els, &els_op, &els_dt, &els_vectype))
9772 : return false;
9773 : }
9774 :
9775 643241 : tree vectype = SLP_TREE_VECTYPE (slp_node);
9776 643241 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9777 :
9778 643241 : if (loop_vinfo)
9779 : {
9780 432962 : loop = LOOP_VINFO_LOOP (loop_vinfo);
9781 432962 : nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9782 432962 : vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9783 : }
9784 : else
9785 : vf = 1;
9786 :
9787 643241 : vec_num = vect_get_num_copies (vinfo, slp_node);
9788 :
9789 : /* FORNOW. This restriction should be relaxed. */
9790 643241 : if (nested_in_vect_loop && vec_num > 1)
9791 : {
9792 316 : if (dump_enabled_p ())
9793 66 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9794 : "multiple types in nested loop.\n");
9795 316 : return false;
9796 : }
9797 :
9798 642925 : elem_type = TREE_TYPE (vectype);
9799 642925 : mode = TYPE_MODE (vectype);
9800 :
9801 : /* FORNOW. In some cases can vectorize even if data-type not supported
9802 : (e.g. - data copies). */
9803 642925 : if (!can_implement_p (mov_optab, mode))
9804 : {
9805 0 : if (dump_enabled_p ())
9806 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9807 : "Aligned load, but unsupported type.\n");
9808 0 : return false;
9809 : }
9810 :
9811 : /* Check if the load is a part of an interleaving chain. */
9812 642925 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9813 : {
9814 303715 : grouped_load = true;
9815 : /* FORNOW */
9816 303715 : gcc_assert (!nested_in_vect_loop);
9817 303715 : gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9818 :
9819 303715 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9820 303715 : group_size = DR_GROUP_SIZE (first_stmt_info);
9821 :
9822 : /* Invalidate assumptions made by dependence analysis when vectorization
9823 : on the unrolled body effectively re-orders stmts. */
9824 303715 : if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9825 303715 : && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9826 : STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9827 : {
9828 12 : if (dump_enabled_p ())
9829 12 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9830 : "cannot perform implicit CSE when performing "
9831 : "group loads with negative dependence distance\n");
9832 12 : return false;
9833 : }
9834 : }
9835 : else
9836 : group_size = 1;
9837 :
9838 642913 : vect_load_store_data _ls_data{};
9839 642913 : vect_load_store_data &ls = slp_node->get_data (_ls_data);
9840 642913 : if (cost_vec
9841 642913 : && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
9842 : VLS_LOAD, &ls))
9843 : return false;
9844 : /* Temporary aliases to analysis data, should not be modified through
9845 : these. */
9846 546527 : const vect_memory_access_type memory_access_type = ls.memory_access_type;
9847 546527 : const dr_alignment_support alignment_support_scheme
9848 : = ls.alignment_support_scheme;
9849 546527 : const int misalignment = ls.misalignment;
9850 546527 : const poly_int64 poffset = ls.poffset;
9851 546527 : const vec<int> &elsvals = ls.elsvals;
9852 :
9853 546527 : int maskload_elsval = 0;
9854 546527 : bool need_zeroing = false;
9855 :
9856 : /* We might need to explicitly zero inactive elements if there are
9857 : padding bits in the type that might leak otherwise.
9858 : Refer to PR115336. */
9859 546527 : tree scalar_type = TREE_TYPE (scalar_dest);
9860 546527 : bool type_mode_padding_p
9861 1093054 : = TYPE_PRECISION (scalar_type) < GET_MODE_PRECISION (GET_MODE_INNER (mode));
9862 :
9863 546527 : if (slp_node->ldst_lanes
9864 0 : && memory_access_type != VMAT_LOAD_STORE_LANES)
9865 : {
9866 0 : if (dump_enabled_p ())
9867 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9868 : "discovered load-lane but cannot use it.\n");
9869 0 : return false;
9870 : }
9871 :
9872 546527 : if (mask_node)
9873 : {
9874 2289 : if (memory_access_type == VMAT_CONTIGUOUS)
9875 : {
9876 1507 : machine_mode vec_mode = TYPE_MODE (vectype);
9877 395 : if (!VECTOR_MODE_P (vec_mode)
9878 3014 : || !can_vec_mask_load_store_p (vec_mode,
9879 1507 : TYPE_MODE (mask_vectype),
9880 : true, NULL, &ls.elsvals))
9881 67 : return false;
9882 : }
9883 782 : else if (memory_access_type != VMAT_LOAD_STORE_LANES
9884 782 : && !mat_gather_scatter_p (memory_access_type))
9885 : {
9886 62 : if (dump_enabled_p ())
9887 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9888 : "unsupported access type for masked load.\n");
9889 62 : return false;
9890 : }
9891 720 : else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED)
9892 : {
9893 476 : if (dump_enabled_p ())
9894 26 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9895 : "unsupported masked emulated gather.\n");
9896 476 : return false;
9897 : }
9898 : else if (memory_access_type == VMAT_ELEMENTWISE
9899 : || memory_access_type == VMAT_STRIDED_SLP)
9900 : {
9901 : if (dump_enabled_p ())
9902 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9903 : "unsupported masked strided access.\n");
9904 : return false;
9905 : }
9906 : }
9907 :
9908 545922 : bool costing_p = cost_vec;
9909 :
9910 545922 : if (costing_p) /* transformation not required. */
9911 : {
9912 382600 : if (mask_node
9913 382600 : && !vect_maybe_update_slp_op_vectype (mask_node,
9914 : mask_vectype))
9915 : {
9916 0 : if (dump_enabled_p ())
9917 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9918 : "incompatible vector types for invariants\n");
9919 0 : return false;
9920 : }
9921 :
9922 382600 : if (loop_vinfo
9923 261428 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9924 174135 : check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9925 : VLS_LOAD, group_size, &ls,
9926 : mask_node, &ls.elsvals);
9927 :
9928 382600 : if (dump_enabled_p ()
9929 24487 : && memory_access_type != VMAT_ELEMENTWISE
9930 24278 : && !mat_gather_scatter_p (memory_access_type)
9931 24056 : && memory_access_type != VMAT_STRIDED_SLP
9932 24056 : && memory_access_type != VMAT_INVARIANT
9933 405763 : && alignment_support_scheme != dr_aligned)
9934 9405 : dump_printf_loc (MSG_NOTE, vect_location,
9935 : "Vectorizing an unaligned access.\n");
9936 :
9937 382600 : if (memory_access_type == VMAT_LOAD_STORE_LANES)
9938 0 : vinfo->any_known_not_updated_vssa = true;
9939 :
9940 382600 : SLP_TREE_TYPE (slp_node) = load_vec_info_type;
9941 382600 : slp_node->data = new vect_load_store_data (std::move (ls));
9942 : }
9943 :
9944 : /* If the type needs padding we must zero inactive elements.
9945 : Check if we can do that with a VEC_COND_EXPR and store the
9946 : elsval we choose in MASKLOAD_ELSVAL. */
9947 545922 : if (elsvals.length ()
9948 23231 : && type_mode_padding_p
9949 3 : && !elsvals.contains (MASK_LOAD_ELSE_ZERO)
9950 23231 : && !expand_vec_cond_expr_p (vectype, truth_type_for (vectype)))
9951 : {
9952 0 : if (dump_enabled_p ())
9953 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9954 : "cannot zero inactive elements.\n");
9955 0 : return false;
9956 : }
9957 :
9958 : /* For now just use the first available else value.
9959 : get_supported_else_vals tries MASK_LOAD_ELSE_ZERO first so we will
9960 : select it here if it is supported. */
9961 545922 : if (elsvals.length ())
9962 23231 : maskload_elsval = *elsvals.begin ();
9963 :
9964 545922 : if (dump_enabled_p () && !costing_p)
9965 16630 : dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n");
9966 :
9967 : /* Transform. */
9968 :
9969 545922 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9970 545922 : ensure_base_align (dr_info);
9971 :
9972 545922 : if (memory_access_type == VMAT_INVARIANT)
9973 : {
9974 3854 : gcc_assert (!grouped_load && !mask_node && !bb_vinfo);
9975 : /* If we have versioned for aliasing or the loop doesn't
9976 : have any data dependencies that would preclude this,
9977 : then we are sure this is a loop invariant load and
9978 : thus we can insert it on the preheader edge.
9979 : TODO: hoist_defs_of_uses should ideally be computed
9980 : once at analysis time, remembered and used in the
9981 : transform time. */
9982 7708 : bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9983 3854 : && !nested_in_vect_loop);
9984 :
9985 3854 : bool uniform_p = true;
9986 16096 : for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
9987 : {
9988 : /* It is unsafe to hoist a conditional load over the conditions that
9989 : make it valid. When early break this means that any invariant load
9990 : can't be hoisted unless it's in the loop header or if we know
9991 : something else has verified the load is valid to do. Alignment
9992 : peeling would do this since getting through the prologue means the
9993 : load was done at least once and so the vector main body is free to
9994 : hoist it. However today GCC will hoist the load above the PFA
9995 : loop. As such that makes it still invalid and so we can't allow it
9996 : today. */
9997 4534 : if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
9998 1050 : && !DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (sinfo))
9999 5552 : && gimple_bb (STMT_VINFO_STMT (vect_orig_stmt (sinfo)))
10000 1018 : != loop->header)
10001 : {
10002 918 : if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
10003 918 : && dump_enabled_p ())
10004 6 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10005 : "not hoisting invariant load due to early break"
10006 : "constraints\n");
10007 912 : else if (dump_enabled_p ())
10008 16 : dump_printf_loc (MSG_NOTE, vect_location,
10009 : "not hoisting invariant load due to early break"
10010 : "constraints\n");
10011 : hoist_p = false;
10012 : }
10013 :
10014 3616 : hoist_p = hoist_p && hoist_defs_of_uses (sinfo->stmt, loop, false);
10015 4534 : if (sinfo != SLP_TREE_SCALAR_STMTS (slp_node)[0])
10016 211 : uniform_p = false;
10017 : }
10018 3854 : if (costing_p)
10019 : {
10020 2984 : if (!uniform_p && (!hoist_p || !vf.is_constant ()))
10021 : {
10022 0 : if (dump_enabled_p ())
10023 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10024 : "not vectorizing non-uniform invariant "
10025 : "load\n");
10026 0 : return false;
10027 : }
10028 1379 : enum vect_cost_model_location cost_loc
10029 2984 : = hoist_p ? vect_prologue : vect_body;
10030 2984 : unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10031 : slp_node, 0, cost_loc);
10032 2984 : cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
10033 : slp_node, 0, cost_loc);
10034 2984 : unsigned int prologue_cost = hoist_p ? cost : 0;
10035 1379 : unsigned int inside_cost = hoist_p ? 0 : cost;
10036 2984 : if (dump_enabled_p ())
10037 508 : dump_printf_loc (MSG_NOTE, vect_location,
10038 : "vect_model_load_cost: inside_cost = %d, "
10039 : "prologue_cost = %d .\n",
10040 : inside_cost, prologue_cost);
10041 2984 : return true;
10042 : }
10043 870 : if (hoist_p)
10044 : {
10045 : /* ??? For non-uniform lanes there could be still duplicates.
10046 : We're leaving those to post-vectorizer CSE for the moment. */
10047 675 : auto_vec<tree> scalar_defs (SLP_TREE_LANES (slp_node));
10048 2182 : for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
10049 : {
10050 777 : gassign *stmt = as_a <gassign *> (sinfo->stmt);
10051 777 : if (dump_enabled_p ())
10052 368 : dump_printf_loc (MSG_NOTE, vect_location,
10053 : "hoisting out of the vectorized loop: %G",
10054 : (gimple *) stmt);
10055 777 : scalar_dest = copy_ssa_name (gimple_assign_lhs (stmt));
10056 777 : tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10057 777 : edge pe = loop_preheader_edge (loop);
10058 777 : gphi *vphi = get_virtual_phi (loop->header);
10059 777 : tree vuse;
10060 777 : if (vphi)
10061 771 : vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10062 : else
10063 6 : vuse = gimple_vuse (gsi_stmt (*gsi));
10064 777 : gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10065 777 : gimple_set_vuse (new_stmt, vuse);
10066 777 : gsi_insert_on_edge_immediate (pe, new_stmt);
10067 777 : hoist_defs_of_uses (new_stmt, loop, true);
10068 777 : if (!useless_type_conversion_p (TREE_TYPE (vectype),
10069 777 : TREE_TYPE (scalar_dest)))
10070 : {
10071 12 : tree tem = make_ssa_name (TREE_TYPE (vectype));
10072 12 : new_stmt = gimple_build_assign (tem,
10073 : NOP_EXPR, scalar_dest);
10074 12 : gsi_insert_on_edge_immediate (pe, new_stmt);
10075 12 : scalar_dest = tem;
10076 : }
10077 777 : scalar_defs.quick_push (scalar_dest);
10078 777 : if (uniform_p)
10079 : break;
10080 : }
10081 675 : if (!uniform_p)
10082 : {
10083 55 : unsigned const_nunits
10084 55 : = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
10085 124 : for (j = 0; j < (int) vec_num; ++j)
10086 : {
10087 69 : vec<constructor_elt, va_gc> *v = NULL;
10088 69 : vec_safe_reserve (v, const_nunits, true);
10089 405 : for (unsigned i = 0; i < const_nunits; ++i)
10090 : {
10091 336 : unsigned def_idx
10092 336 : = (j * const_nunits + i) % SLP_TREE_LANES (slp_node);
10093 336 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10094 : scalar_defs[def_idx]);
10095 : }
10096 69 : scalar_dest = build_constructor (vectype, v);
10097 69 : new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10098 : vectype, NULL);
10099 69 : slp_node->push_vec_def (new_temp);
10100 : }
10101 55 : return true;
10102 : }
10103 620 : new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10104 : vectype, NULL);
10105 675 : }
10106 : else
10107 : {
10108 195 : gcc_assert (uniform_p);
10109 195 : gimple_stmt_iterator gsi2 = *gsi;
10110 195 : gsi_next (&gsi2);
10111 195 : new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10112 : vectype, &gsi2);
10113 : }
10114 1704 : for (j = 0; j < (int) vec_num; ++j)
10115 889 : slp_node->push_vec_def (new_temp);
10116 : return true;
10117 : }
10118 :
10119 542068 : if (memory_access_type == VMAT_ELEMENTWISE
10120 542068 : || memory_access_type == VMAT_STRIDED_SLP)
10121 : {
10122 38832 : gimple_stmt_iterator incr_gsi;
10123 38832 : bool insert_after;
10124 38832 : tree offvar = NULL_TREE;
10125 38832 : tree ivstep;
10126 38832 : tree running_off;
10127 38832 : vec<constructor_elt, va_gc> *v = NULL;
10128 38832 : tree stride_base, stride_step, alias_off;
10129 : /* Checked by get_load_store_type. */
10130 38832 : unsigned int const_nunits = nunits.to_constant ();
10131 38832 : unsigned HOST_WIDE_INT cst_offset = 0;
10132 38832 : tree dr_offset;
10133 38832 : unsigned int inside_cost = 0;
10134 :
10135 38832 : gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10136 38832 : gcc_assert (!nested_in_vect_loop);
10137 :
10138 38832 : if (grouped_load)
10139 : {
10140 : /* If we elided a consecutive load permutation, don't
10141 : use the original first statement (which could be elided)
10142 : but the one the load permutation starts with.
10143 : This ensures the stride_base below is correct. */
10144 26175 : if (!ls.subchain_p)
10145 26143 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10146 : else
10147 32 : first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10148 26175 : first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10149 26175 : ref_type = get_group_alias_ptr_type (first_stmt_info);
10150 : }
10151 : else
10152 : {
10153 12657 : first_stmt_info = stmt_info;
10154 12657 : first_dr_info = dr_info;
10155 12657 : ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10156 : }
10157 :
10158 38832 : if (grouped_load)
10159 : {
10160 26175 : if (memory_access_type == VMAT_STRIDED_SLP)
10161 : {
10162 : /* If we elided a consecutive load permutation, adjust
10163 : the group size here. */
10164 3713 : if (!ls.subchain_p)
10165 3681 : group_size = DR_GROUP_SIZE (first_stmt_info);
10166 : else
10167 32 : group_size = SLP_TREE_LANES (slp_node);
10168 : }
10169 : else /* VMAT_ELEMENTWISE */
10170 22462 : group_size = SLP_TREE_LANES (slp_node);
10171 : }
10172 : else
10173 : group_size = 1;
10174 :
10175 38832 : if (!costing_p)
10176 : {
10177 3390 : dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10178 3390 : stride_base = fold_build_pointer_plus (
10179 : DR_BASE_ADDRESS (first_dr_info->dr),
10180 : size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10181 : convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10182 3390 : stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10183 :
10184 : /* For a load with loop-invariant (but other than power-of-2)
10185 : stride (i.e. not a grouped access) like so:
10186 :
10187 : for (i = 0; i < n; i += stride)
10188 : ... = array[i];
10189 :
10190 : we generate a new induction variable and new accesses to
10191 : form a new vector (or vectors, depending on ncopies):
10192 :
10193 : for (j = 0; ; j += VF*stride)
10194 : tmp1 = array[j];
10195 : tmp2 = array[j + stride];
10196 : ...
10197 : vectemp = {tmp1, tmp2, ...}
10198 : */
10199 :
10200 3390 : ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10201 : build_int_cst (TREE_TYPE (stride_step), vf));
10202 :
10203 3390 : standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10204 :
10205 3390 : stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10206 3390 : ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10207 3390 : create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10208 : loop, &incr_gsi, insert_after,
10209 : &offvar, NULL);
10210 :
10211 3390 : stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10212 : }
10213 :
10214 38832 : running_off = offvar;
10215 38832 : alias_off = build_int_cst (ref_type, 0);
10216 38832 : int nloads = const_nunits;
10217 38832 : int lnel = 1;
10218 38832 : tree ltype = TREE_TYPE (vectype);
10219 38832 : tree lvectype = vectype;
10220 38832 : auto_vec<tree> dr_chain;
10221 : /* ??? Modify local copies of alignment_support_scheme and
10222 : misalignment, but this part of analysis should be done
10223 : earlier and remembered, likewise the chosen load mode. */
10224 38832 : const dr_alignment_support tem = alignment_support_scheme;
10225 38832 : dr_alignment_support alignment_support_scheme = tem;
10226 38832 : const int tem2 = misalignment;
10227 38832 : int misalignment = tem2;
10228 38832 : if (memory_access_type == VMAT_STRIDED_SLP)
10229 : {
10230 16370 : HOST_WIDE_INT n = gcd (group_size, const_nunits);
10231 : /* Use the target vector type if the group size is a multiple
10232 : of it. */
10233 16370 : if (n == const_nunits)
10234 : {
10235 1950 : int mis_align = dr_misalignment (first_dr_info, vectype);
10236 : /* With VF > 1 we advance the DR by step, if that is constant
10237 : and only aligned when performed VF times, DR alignment
10238 : analysis can analyze this as aligned since it assumes
10239 : contiguous accesses. But that is not how we code generate
10240 : here, so adjust for this. */
10241 1950 : if (maybe_gt (vf, 1u)
10242 3184 : && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
10243 2980 : DR_TARGET_ALIGNMENT (first_dr_info)))
10244 204 : mis_align = -1;
10245 1950 : dr_alignment_support dr_align
10246 1950 : = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
10247 : mis_align);
10248 1950 : if (dr_align == dr_aligned
10249 1950 : || dr_align == dr_unaligned_supported)
10250 : {
10251 16370 : nloads = 1;
10252 16370 : lnel = const_nunits;
10253 16370 : ltype = vectype;
10254 16370 : alignment_support_scheme = dr_align;
10255 16370 : misalignment = mis_align;
10256 : }
10257 : }
10258 : /* Else use the biggest vector we can load the group without
10259 : accessing excess elements. */
10260 14420 : else if (n > 1)
10261 : {
10262 1770 : tree ptype;
10263 1770 : tree vtype
10264 1770 : = vector_vector_composition_type (vectype, const_nunits / n,
10265 : &ptype);
10266 1770 : if (vtype != NULL_TREE)
10267 : {
10268 1734 : dr_alignment_support dr_align;
10269 1734 : int mis_align = 0;
10270 1734 : if (VECTOR_TYPE_P (ptype))
10271 : {
10272 888 : mis_align = dr_misalignment (first_dr_info, ptype);
10273 888 : if (maybe_gt (vf, 1u)
10274 1748 : && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
10275 894 : DR_TARGET_ALIGNMENT (first_dr_info)))
10276 854 : mis_align = -1;
10277 888 : dr_align
10278 888 : = vect_supportable_dr_alignment (vinfo, dr_info, ptype,
10279 : mis_align);
10280 : }
10281 : else
10282 : dr_align = dr_unaligned_supported;
10283 1734 : if (dr_align == dr_aligned
10284 1734 : || dr_align == dr_unaligned_supported)
10285 : {
10286 1734 : nloads = const_nunits / n;
10287 1734 : lnel = n;
10288 1734 : lvectype = vtype;
10289 1734 : ltype = ptype;
10290 1734 : alignment_support_scheme = dr_align;
10291 1734 : misalignment = mis_align;
10292 : }
10293 : }
10294 : }
10295 16370 : unsigned align;
10296 16370 : if (alignment_support_scheme == dr_aligned)
10297 12 : align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
10298 : else
10299 16358 : align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
10300 : /* Alignment is at most the access size if we do multiple loads. */
10301 16370 : if (nloads > 1)
10302 14420 : align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
10303 16370 : ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
10304 : }
10305 :
10306 : /* For SLP permutation support we need to load the whole group,
10307 : not only the number of vector stmts the permutation result
10308 : fits in. */
10309 38832 : int ncopies;
10310 38832 : if (ls.slp_perm)
10311 : {
10312 2384 : gcc_assert (memory_access_type != VMAT_ELEMENTWISE);
10313 : /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10314 : variable VF. */
10315 2384 : unsigned int const_vf = vf.to_constant ();
10316 2384 : ncopies = CEIL (group_size * const_vf, const_nunits);
10317 2384 : dr_chain.create (ncopies);
10318 : }
10319 : else
10320 : ncopies = vec_num;
10321 :
10322 38832 : unsigned int group_el = 0;
10323 38832 : unsigned HOST_WIDE_INT
10324 38832 : elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10325 38832 : unsigned int n_groups = 0;
10326 : /* For costing some adjacent vector loads, we'd like to cost with
10327 : the total number of them once instead of cost each one by one. */
10328 38832 : unsigned int n_adjacent_loads = 0;
10329 98164 : for (j = 0; j < ncopies; j++)
10330 : {
10331 59332 : if (nloads > 1 && !costing_p)
10332 2878 : vec_alloc (v, nloads);
10333 : gimple *new_stmt = NULL;
10334 212886 : for (i = 0; i < nloads; i++)
10335 : {
10336 153554 : if (costing_p)
10337 : {
10338 : /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10339 : avoid ICE, see PR110776. */
10340 143478 : if (VECTOR_TYPE_P (ltype)
10341 4634 : && memory_access_type != VMAT_ELEMENTWISE)
10342 4634 : n_adjacent_loads++;
10343 : else
10344 138844 : inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10345 : slp_node, 0, vect_body);
10346 143478 : continue;
10347 : }
10348 10076 : unsigned int load_el = group_el;
10349 : /* For elementwise accesses apply a load permutation directly. */
10350 10076 : if (memory_access_type == VMAT_ELEMENTWISE
10351 10076 : && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10352 2162 : load_el = SLP_TREE_LOAD_PERMUTATION (slp_node)[group_el];
10353 10076 : tree this_off = build_int_cst (TREE_TYPE (alias_off),
10354 10076 : load_el * elsz + cst_offset);
10355 10076 : tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10356 10076 : vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10357 10076 : new_temp = make_ssa_name (ltype);
10358 10076 : new_stmt = gimple_build_assign (new_temp, data_ref);
10359 10076 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10360 10076 : if (nloads > 1)
10361 8422 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_temp);
10362 :
10363 10076 : group_el += lnel;
10364 10076 : if (group_el == group_size)
10365 : {
10366 9681 : n_groups++;
10367 : /* When doing SLP make sure to not load elements from
10368 : the next vector iteration, those will not be accessed
10369 : so just use the last element again. See PR107451. */
10370 9681 : if (known_lt (n_groups, vf))
10371 : {
10372 6269 : tree newoff = copy_ssa_name (running_off);
10373 6269 : gimple *incr
10374 6269 : = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10375 : running_off, stride_step);
10376 6269 : vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10377 6269 : running_off = newoff;
10378 : }
10379 : group_el = 0;
10380 : }
10381 : }
10382 :
10383 59332 : if (nloads > 1)
10384 : {
10385 39514 : if (costing_p)
10386 36636 : inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10387 : slp_node, 0, vect_body);
10388 : else
10389 : {
10390 2878 : tree vec_inv = build_constructor (lvectype, v);
10391 2878 : new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10392 : lvectype, gsi);
10393 2878 : new_stmt = SSA_NAME_DEF_STMT (new_temp);
10394 2878 : if (lvectype != vectype)
10395 : {
10396 239 : new_stmt
10397 239 : = gimple_build_assign (make_ssa_name (vectype),
10398 : VIEW_CONVERT_EXPR,
10399 : build1 (VIEW_CONVERT_EXPR,
10400 : vectype, new_temp));
10401 239 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10402 : gsi);
10403 : }
10404 : }
10405 : }
10406 19818 : else if (!costing_p && ltype != vectype)
10407 : {
10408 1629 : new_stmt = gimple_build_assign (make_ssa_name (vectype),
10409 : VIEW_CONVERT_EXPR,
10410 : build1 (VIEW_CONVERT_EXPR,
10411 : vectype, new_temp));
10412 1629 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10413 : gsi);
10414 : }
10415 :
10416 59332 : if (!costing_p)
10417 : {
10418 4532 : if (ls.slp_perm)
10419 1330 : dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10420 : else
10421 3202 : slp_node->push_vec_def (new_stmt);
10422 : }
10423 : }
10424 38832 : if (ls.slp_perm)
10425 : {
10426 2384 : if (costing_p)
10427 : {
10428 1763 : gcc_assert (ls.n_perms != -1U);
10429 1763 : inside_cost += record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
10430 : slp_node, 0, vect_body);
10431 : }
10432 : else
10433 : {
10434 621 : unsigned n_perms2;
10435 621 : vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10436 : false, &n_perms2);
10437 621 : gcc_assert (ls.n_perms == n_perms2);
10438 : }
10439 : }
10440 :
10441 38832 : if (costing_p)
10442 : {
10443 35442 : if (n_adjacent_loads > 0)
10444 1766 : vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
10445 : alignment_support_scheme, misalignment, false,
10446 : &inside_cost, nullptr, cost_vec, cost_vec,
10447 : true);
10448 35442 : if (dump_enabled_p ())
10449 594 : dump_printf_loc (MSG_NOTE, vect_location,
10450 : "vect_model_load_cost: inside_cost = %u, "
10451 : "prologue_cost = 0 .\n",
10452 : inside_cost);
10453 : }
10454 :
10455 38832 : return true;
10456 38832 : }
10457 :
10458 503236 : if (mat_gather_scatter_p (memory_access_type)
10459 503236 : && !ls.ls_type)
10460 : grouped_load = false;
10461 :
10462 500501 : if (grouped_load
10463 503236 : || SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10464 : {
10465 251447 : if (grouped_load)
10466 : {
10467 251058 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10468 251058 : group_size = DR_GROUP_SIZE (first_stmt_info);
10469 : }
10470 : else
10471 : {
10472 : first_stmt_info = stmt_info;
10473 : group_size = 1;
10474 : }
10475 : /* For SLP vectorization we directly vectorize a subchain
10476 : without permutation. */
10477 251447 : if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10478 202829 : first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10479 : /* For BB vectorization always use the first stmt to base
10480 : the data ref pointer on. */
10481 251447 : if (bb_vinfo)
10482 203949 : first_stmt_info_for_drptr
10483 203949 : = vect_find_first_scalar_stmt_in_slp (slp_node);
10484 :
10485 251447 : first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10486 251447 : group_gap_adj = 0;
10487 :
10488 : /* VEC_NUM is the number of vect stmts to be created for this group. */
10489 251447 : grouped_load = false;
10490 : /* If an SLP permutation is from N elements to N elements,
10491 : and if one vector holds a whole number of N, we can load
10492 : the inputs to the permutation in the same way as an
10493 : unpermuted sequence. In other cases we need to load the
10494 : whole group, not only the number of vector stmts the
10495 : permutation result fits in. */
10496 251447 : unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10497 251447 : if (nested_in_vect_loop)
10498 : /* We do not support grouped accesses in a nested loop,
10499 : instead the access is contiguous but it might be
10500 : permuted. No gap adjustment is needed though. */
10501 : ;
10502 251445 : else if (ls.slp_perm
10503 251445 : && (group_size != scalar_lanes
10504 11130 : || !multiple_p (nunits, group_size)))
10505 : {
10506 : /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10507 : variable VF; see vect_transform_slp_perm_load. */
10508 38591 : unsigned int const_vf = vf.to_constant ();
10509 38591 : unsigned int const_nunits = nunits.to_constant ();
10510 38591 : vec_num = CEIL (group_size * const_vf, const_nunits);
10511 38591 : group_gap_adj = vf * group_size - nunits * vec_num;
10512 : }
10513 : else
10514 : {
10515 212854 : group_gap_adj = group_size - scalar_lanes;
10516 : }
10517 :
10518 251447 : ref_type = get_group_alias_ptr_type (first_stmt_info);
10519 : }
10520 : else
10521 : {
10522 251789 : first_stmt_info = stmt_info;
10523 251789 : first_dr_info = dr_info;
10524 251789 : group_size = 1;
10525 251789 : group_gap_adj = 0;
10526 251789 : ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10527 : }
10528 :
10529 503236 : vec_loop_masks *loop_masks
10530 299287 : = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10531 503236 : ? &LOOP_VINFO_MASKS (loop_vinfo)
10532 31 : : NULL);
10533 31 : vec_loop_lens *loop_lens
10534 299287 : = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10535 : ? &LOOP_VINFO_LENS (loop_vinfo)
10536 0 : : NULL);
10537 :
10538 : /* The vect_transform_stmt and vect_analyze_stmt will go here but there
10539 : are some difference here. We cannot enable both the lens and masks
10540 : during transform but it is allowed during analysis.
10541 : Shouldn't go with length-based approach if fully masked. */
10542 503236 : if (cost_vec == NULL)
10543 : /* The cost_vec is NULL during transfrom. */
10544 159062 : gcc_assert ((!loop_lens || !loop_masks));
10545 :
10546 : /* Targets with store-lane instructions must not require explicit
10547 : realignment. vect_supportable_dr_alignment always returns either
10548 : dr_aligned or dr_unaligned_supported for (non-length) masked
10549 : operations. */
10550 503236 : gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10551 : && !mask_node
10552 : && !loop_masks)
10553 : || mat_gather_scatter_p (memory_access_type)
10554 : || alignment_support_scheme == dr_aligned
10555 : || alignment_support_scheme == dr_unaligned_supported);
10556 :
10557 : /* In case the vectorization factor (VF) is bigger than the number
10558 : of elements that we can fit in a vectype (nunits), we have to generate
10559 : more than one vector stmt - i.e - we need to "unroll" the
10560 : vector stmt by a factor VF/nunits. In doing so, we record a pointer
10561 : from one copy of the vector stmt to the next, in the field
10562 : STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10563 : stages to find the correct vector defs to be used when vectorizing
10564 : stmts that use the defs of the current stmt. The example below
10565 : illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10566 : need to create 4 vectorized stmts):
10567 :
10568 : before vectorization:
10569 : RELATED_STMT VEC_STMT
10570 : S1: x = memref - -
10571 : S2: z = x + 1 - -
10572 :
10573 : step 1: vectorize stmt S1:
10574 : We first create the vector stmt VS1_0, and, as usual, record a
10575 : pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10576 : Next, we create the vector stmt VS1_1, and record a pointer to
10577 : it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10578 : Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10579 : stmts and pointers:
10580 : RELATED_STMT VEC_STMT
10581 : VS1_0: vx0 = memref0 VS1_1 -
10582 : VS1_1: vx1 = memref1 VS1_2 -
10583 : VS1_2: vx2 = memref2 VS1_3 -
10584 : VS1_3: vx3 = memref3 - -
10585 : S1: x = load - VS1_0
10586 : S2: z = x + 1 - -
10587 : */
10588 :
10589 : /* If the data reference is aligned (dr_aligned) or potentially unaligned
10590 : on a target that supports unaligned accesses (dr_unaligned_supported)
10591 : we generate the following code:
10592 : p = initial_addr;
10593 : indx = 0;
10594 : loop {
10595 : p = p + indx * vectype_size;
10596 : vec_dest = *(p);
10597 : indx = indx + 1;
10598 : }
10599 :
10600 : Otherwise, the data reference is potentially unaligned on a target that
10601 : does not support unaligned accesses (dr_explicit_realign_optimized) -
10602 : then generate the following code, in which the data in each iteration is
10603 : obtained by two vector loads, one from the previous iteration, and one
10604 : from the current iteration:
10605 : p1 = initial_addr;
10606 : msq_init = *(floor(p1))
10607 : p2 = initial_addr + VS - 1;
10608 : realignment_token = call target_builtin;
10609 : indx = 0;
10610 : loop {
10611 : p2 = p2 + indx * vectype_size
10612 : lsq = *(floor(p2))
10613 : vec_dest = realign_load (msq, lsq, realignment_token)
10614 : indx = indx + 1;
10615 : msq = lsq;
10616 : } */
10617 :
10618 : /* If the misalignment remains the same throughout the execution of the
10619 : loop, we can create the init_addr and permutation mask at the loop
10620 : preheader. Otherwise, it needs to be created inside the loop.
10621 : This can only occur when vectorizing memory accesses in the inner-loop
10622 : nested within an outer-loop that is being vectorized. */
10623 :
10624 503236 : if (nested_in_vect_loop
10625 503236 : && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10626 1200 : GET_MODE_SIZE (TYPE_MODE (vectype))))
10627 : {
10628 191 : gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10629 : compute_in_loop = true;
10630 : }
10631 :
10632 503236 : bool diff_first_stmt_info
10633 503236 : = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10634 :
10635 503236 : tree offset = NULL_TREE;
10636 503236 : if ((alignment_support_scheme == dr_explicit_realign_optimized
10637 503236 : || alignment_support_scheme == dr_explicit_realign)
10638 0 : && !compute_in_loop)
10639 : {
10640 : /* If we have different first_stmt_info, we can't set up realignment
10641 : here, since we can't guarantee first_stmt_info DR has been
10642 : initialized yet, use first_stmt_info_for_drptr DR by bumping the
10643 : distance from first_stmt_info DR instead as below. */
10644 0 : if (!costing_p)
10645 : {
10646 0 : if (!diff_first_stmt_info)
10647 0 : msq = vect_setup_realignment (vinfo, first_stmt_info, vectype, gsi,
10648 : &realignment_token,
10649 : alignment_support_scheme, NULL_TREE,
10650 : &at_loop);
10651 0 : if (alignment_support_scheme == dr_explicit_realign_optimized)
10652 : {
10653 0 : phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10654 0 : offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10655 : size_one_node);
10656 0 : gcc_assert (!first_stmt_info_for_drptr);
10657 : }
10658 : }
10659 : }
10660 : else
10661 503236 : at_loop = loop;
10662 :
10663 503236 : if (!known_eq (poffset, 0))
10664 4384 : offset = (offset
10665 4384 : ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10666 4384 : : size_int (poffset));
10667 :
10668 503236 : tree bump;
10669 503236 : tree vec_offset = NULL_TREE;
10670 :
10671 503236 : auto_vec<tree> vec_offsets;
10672 503236 : auto_vec<tree> vec_masks;
10673 503236 : if (mask_node && !costing_p)
10674 634 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10675 : &vec_masks);
10676 :
10677 503236 : tree vec_mask = NULL_TREE;
10678 503236 : tree vec_els = NULL_TREE;
10679 503236 : if (memory_access_type == VMAT_LOAD_STORE_LANES)
10680 : {
10681 0 : const internal_fn lanes_ifn = ls.lanes_ifn;
10682 :
10683 0 : gcc_assert (alignment_support_scheme == dr_aligned
10684 : || alignment_support_scheme == dr_unaligned_supported);
10685 :
10686 0 : aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
10687 0 : if (!costing_p)
10688 0 : bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10689 : memory_access_type, loop_lens);
10690 :
10691 0 : unsigned int inside_cost = 0, prologue_cost = 0;
10692 : /* For costing some adjacent vector loads, we'd like to cost with
10693 : the total number of them once instead of cost each one by one. */
10694 0 : unsigned int n_adjacent_loads = 0;
10695 0 : int ncopies = vec_num / group_size;
10696 0 : for (j = 0; j < ncopies; j++)
10697 : {
10698 0 : if (costing_p)
10699 : {
10700 : /* An IFN_LOAD_LANES will load all its vector results,
10701 : regardless of which ones we actually need. Account
10702 : for the cost of unused results. */
10703 0 : if (first_stmt_info == stmt_info)
10704 : {
10705 0 : unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10706 0 : stmt_vec_info next_stmt_info = first_stmt_info;
10707 0 : do
10708 : {
10709 0 : gaps -= 1;
10710 0 : next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10711 : }
10712 0 : while (next_stmt_info);
10713 0 : if (gaps)
10714 : {
10715 0 : if (dump_enabled_p ())
10716 0 : dump_printf_loc (MSG_NOTE, vect_location,
10717 : "vect_model_load_cost: %d "
10718 : "unused vectors.\n",
10719 : gaps);
10720 0 : vect_get_load_cost (vinfo, stmt_info, slp_node, gaps,
10721 : alignment_support_scheme,
10722 : misalignment, false, &inside_cost,
10723 : &prologue_cost, cost_vec, cost_vec,
10724 : true);
10725 : }
10726 : }
10727 0 : n_adjacent_loads++;
10728 0 : continue;
10729 0 : }
10730 :
10731 : /* 1. Create the vector or array pointer update chain. */
10732 0 : if (j == 0)
10733 0 : dataref_ptr
10734 0 : = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10735 : at_loop, offset, &dummy, gsi,
10736 : &ptr_incr, false, bump);
10737 : else
10738 : {
10739 0 : gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10740 0 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10741 : stmt_info, bump);
10742 : }
10743 0 : if (mask_node)
10744 0 : vec_mask = vec_masks[j];
10745 :
10746 0 : tree vec_array = create_vector_array (vectype, group_size);
10747 :
10748 0 : tree final_mask = NULL_TREE;
10749 0 : tree final_len = NULL_TREE;
10750 0 : tree bias = NULL_TREE;
10751 0 : if (loop_masks)
10752 0 : final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10753 : ncopies, vectype, j);
10754 0 : if (vec_mask)
10755 0 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10756 : vec_mask, gsi);
10757 :
10758 0 : if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10759 : {
10760 0 : if (loop_lens)
10761 0 : final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10762 : ncopies, vectype, j, 1, true);
10763 : else
10764 0 : final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10765 0 : signed char biasval
10766 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10767 0 : bias = build_int_cst (intQI_type_node, biasval);
10768 0 : if (!final_mask)
10769 : {
10770 0 : mask_vectype = truth_type_for (vectype);
10771 0 : final_mask = build_minus_one_cst (mask_vectype);
10772 : }
10773 : }
10774 :
10775 0 : if (final_mask)
10776 : {
10777 0 : vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
10778 0 : if (type_mode_padding_p
10779 0 : && maskload_elsval != MASK_LOAD_ELSE_ZERO)
10780 0 : need_zeroing = true;
10781 : }
10782 :
10783 0 : gcall *call;
10784 0 : if (final_len && final_mask)
10785 : {
10786 : /* Emit:
10787 : VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10788 : VEC_MASK, LEN, BIAS). */
10789 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10790 0 : tree alias_ptr = build_int_cst (ref_type, align);
10791 0 : call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6,
10792 : dataref_ptr, alias_ptr,
10793 : final_mask, vec_els,
10794 : final_len, bias);
10795 : }
10796 0 : else if (final_mask)
10797 : {
10798 : /* Emit:
10799 : VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10800 : VEC_MASK). */
10801 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10802 0 : tree alias_ptr = build_int_cst (ref_type, align);
10803 0 : call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
10804 : dataref_ptr, alias_ptr,
10805 : final_mask, vec_els);
10806 : }
10807 : else
10808 : {
10809 : /* Emit:
10810 : VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10811 0 : data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10812 0 : call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10813 : }
10814 0 : gimple_call_set_lhs (call, vec_array);
10815 0 : gimple_call_set_nothrow (call, true);
10816 0 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10817 :
10818 : /* Extract each vector into an SSA_NAME. */
10819 0 : for (unsigned i = 0; i < group_size; i++)
10820 : {
10821 0 : new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10822 : vec_array, i, need_zeroing,
10823 : final_mask);
10824 0 : slp_node->push_vec_def (new_temp);
10825 : }
10826 :
10827 : /* Record that VEC_ARRAY is now dead. */
10828 0 : vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10829 : }
10830 :
10831 0 : if (costing_p)
10832 : {
10833 0 : if (n_adjacent_loads > 0)
10834 0 : vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
10835 : alignment_support_scheme, misalignment, false,
10836 : &inside_cost, &prologue_cost, cost_vec,
10837 : cost_vec, true);
10838 0 : if (dump_enabled_p ())
10839 0 : dump_printf_loc (MSG_NOTE, vect_location,
10840 : "vect_model_load_cost: inside_cost = %u, "
10841 : "prologue_cost = %u .\n",
10842 : inside_cost, prologue_cost);
10843 : }
10844 :
10845 0 : return true;
10846 : }
10847 :
10848 503236 : if (mat_gather_scatter_p (memory_access_type))
10849 : {
10850 2735 : gcc_assert ((!grouped_load && !ls.slp_perm) || ls.ls_type);
10851 :
10852 2735 : auto_vec<tree> dr_chain (vec_num);
10853 :
10854 : /* If we pun the original vectype the loads as well as costing, length,
10855 : etc. is performed with the new type. After loading we VIEW_CONVERT
10856 : the data to the original vectype. */
10857 2735 : tree original_vectype = vectype;
10858 2735 : if (ls.ls_type)
10859 0 : vectype = ls.ls_type;
10860 :
10861 : /* 1. Create the vector or array pointer update chain. */
10862 2735 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10863 : {
10864 2735 : aggr_type = NULL_TREE;
10865 2735 : bump = NULL_TREE;
10866 2735 : if (!costing_p)
10867 749 : vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr,
10868 : &vec_offsets);
10869 : }
10870 : else
10871 : {
10872 0 : aggr_type = elem_type;
10873 0 : if (!costing_p)
10874 : {
10875 0 : vect_get_strided_load_store_ops (stmt_info, slp_node, vectype,
10876 : ls.strided_offset_vectype,
10877 : loop_vinfo, gsi,
10878 : &bump, &vec_offset, loop_lens);
10879 0 : dataref_ptr
10880 0 : = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10881 : at_loop, offset, &dummy, gsi,
10882 : &ptr_incr, false, bump);
10883 : }
10884 : }
10885 :
10886 : unsigned int inside_cost = 0, prologue_cost = 0;
10887 :
10888 6163 : gimple *new_stmt = NULL;
10889 6163 : for (i = 0; i < vec_num; i++)
10890 : {
10891 3428 : tree final_mask = NULL_TREE;
10892 3428 : tree final_len = NULL_TREE;
10893 3428 : tree bias = NULL_TREE;
10894 3428 : if (!costing_p)
10895 : {
10896 963 : if (mask_node)
10897 153 : vec_mask = vec_masks[i];
10898 963 : if (loop_masks)
10899 0 : final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10900 : vec_num, vectype, i);
10901 963 : if (vec_mask)
10902 153 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10903 : final_mask, vec_mask, gsi);
10904 :
10905 963 : if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10906 0 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10907 : gsi, stmt_info, bump);
10908 : }
10909 :
10910 : /* 2. Create the vector-load in the loop. */
10911 3428 : unsigned align = get_object_alignment (DR_REF (first_dr_info->dr));
10912 3428 : tree alias_align_ptr = build_int_cst (ref_type, align);
10913 3428 : if (memory_access_type == VMAT_GATHER_SCATTER_IFN)
10914 : {
10915 0 : if (costing_p)
10916 : {
10917 0 : if (ls.supported_offset_vectype)
10918 0 : inside_cost
10919 0 : += record_stmt_cost (cost_vec, 1, vector_stmt,
10920 : slp_node, 0, vect_body);
10921 0 : if (ls.supported_scale)
10922 0 : inside_cost
10923 0 : += record_stmt_cost (cost_vec, 1, vector_stmt,
10924 : slp_node, 0, vect_body);
10925 :
10926 0 : unsigned int cnunits = vect_nunits_for_cost (vectype);
10927 0 : inside_cost
10928 0 : = record_stmt_cost (cost_vec, cnunits, scalar_load,
10929 : slp_node, 0, vect_body);
10930 3428 : continue;
10931 0 : }
10932 0 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10933 0 : vec_offset = vec_offsets[i];
10934 0 : tree zero = build_zero_cst (vectype);
10935 0 : tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
10936 0 : bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
10937 :
10938 : /* Perform the offset conversion and scaling if necessary. */
10939 0 : if (!strided
10940 0 : && (ls.supported_offset_vectype || ls.supported_scale))
10941 : {
10942 0 : gimple_seq stmts = NULL;
10943 0 : if (ls.supported_offset_vectype)
10944 0 : vec_offset = gimple_convert
10945 0 : (&stmts, ls.supported_offset_vectype, vec_offset);
10946 0 : if (ls.supported_scale)
10947 : {
10948 : /* Only scale the vec_offset if we haven't already. */
10949 0 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
10950 0 : || i == 0)
10951 : {
10952 0 : tree mult_cst = build_int_cst
10953 0 : (TREE_TYPE (TREE_TYPE (vec_offset)),
10954 0 : SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
10955 0 : tree mult = build_vector_from_val
10956 0 : (TREE_TYPE (vec_offset), mult_cst);
10957 0 : vec_offset = gimple_build
10958 0 : (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
10959 : vec_offset, mult);
10960 : }
10961 0 : scale = size_int (ls.supported_scale);
10962 : }
10963 0 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
10964 : }
10965 :
10966 0 : if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
10967 : {
10968 0 : if (loop_lens)
10969 0 : final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10970 : vec_num, vectype, i, 1, true);
10971 : else
10972 0 : final_len = build_int_cst (sizetype,
10973 0 : TYPE_VECTOR_SUBPARTS (vectype));
10974 0 : signed char biasval
10975 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10976 0 : bias = build_int_cst (intQI_type_node, biasval);
10977 0 : if (!final_mask)
10978 : {
10979 0 : mask_vectype = truth_type_for (vectype);
10980 0 : final_mask = build_minus_one_cst (mask_vectype);
10981 : }
10982 : }
10983 :
10984 0 : if (final_mask)
10985 : {
10986 0 : vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
10987 0 : if (type_mode_padding_p
10988 0 : && maskload_elsval != MASK_LOAD_ELSE_ZERO)
10989 0 : need_zeroing = true;
10990 : }
10991 :
10992 0 : gcall *call;
10993 0 : if (final_len && final_mask)
10994 : {
10995 0 : if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
10996 0 : call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
10997 : 9, dataref_ptr,
10998 : alias_align_ptr,
10999 : vec_offset, scale, zero,
11000 : final_mask, vec_els,
11001 : final_len, bias);
11002 : else
11003 : /* Non-vector offset indicates that prefer to take
11004 : MASK_LEN_STRIDED_LOAD instead of the
11005 : MASK_LEN_GATHER_LOAD with direct stride arg. */
11006 0 : call = gimple_build_call_internal
11007 0 : (IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr,
11008 : vec_offset, zero, final_mask, vec_els, final_len,
11009 : bias);
11010 : }
11011 0 : else if (final_mask)
11012 0 : call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
11013 : 7, dataref_ptr,
11014 : alias_align_ptr,
11015 : vec_offset, scale,
11016 : zero, final_mask, vec_els);
11017 : else
11018 0 : call = gimple_build_call_internal (IFN_GATHER_LOAD, 5,
11019 : dataref_ptr,
11020 : alias_align_ptr,
11021 : vec_offset, scale, zero);
11022 0 : gimple_call_set_nothrow (call, true);
11023 0 : new_stmt = call;
11024 0 : data_ref = NULL_TREE;
11025 : }
11026 3428 : else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
11027 : {
11028 : /* The builtin decls path for gather is legacy, x86 only. */
11029 570 : gcc_assert (!final_len && nunits.is_constant ());
11030 570 : if (costing_p)
11031 : {
11032 287 : unsigned int cnunits = vect_nunits_for_cost (vectype);
11033 287 : inside_cost
11034 287 : = record_stmt_cost (cost_vec, cnunits, scalar_load,
11035 : slp_node, 0, vect_body);
11036 287 : continue;
11037 287 : }
11038 283 : tree offset_vectype = TREE_TYPE (vec_offsets[0]);
11039 283 : poly_uint64 offset_nunits = TYPE_VECTOR_SUBPARTS (offset_vectype);
11040 283 : if (known_eq (nunits, offset_nunits))
11041 : {
11042 134 : new_stmt = vect_build_one_gather_load_call
11043 134 : (vinfo, stmt_info, slp_node, vectype, gsi,
11044 134 : ls.gs.decl, dataref_ptr, vec_offsets[i],
11045 : final_mask);
11046 134 : data_ref = NULL_TREE;
11047 : }
11048 149 : else if (known_eq (nunits, offset_nunits * 2))
11049 : {
11050 : /* We have a offset vector with half the number of
11051 : lanes but the builtins will produce full vectype
11052 : data with just the lower lanes filled. */
11053 63 : new_stmt = vect_build_one_gather_load_call
11054 126 : (vinfo, stmt_info, slp_node, vectype, gsi,
11055 63 : ls.gs.decl, dataref_ptr, vec_offsets[2 * i],
11056 : final_mask);
11057 63 : tree low = make_ssa_name (vectype);
11058 63 : gimple_set_lhs (new_stmt, low);
11059 63 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11060 :
11061 : /* now put upper half of final_mask in final_mask low. */
11062 63 : if (final_mask
11063 63 : && !SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (final_mask))))
11064 : {
11065 11 : int count = nunits.to_constant ();
11066 11 : vec_perm_builder sel (count, count, 1);
11067 11 : sel.quick_grow (count);
11068 87 : for (int i = 0; i < count; ++i)
11069 76 : sel[i] = i | (count / 2);
11070 11 : vec_perm_indices indices (sel, 2, count);
11071 11 : tree perm_mask = vect_gen_perm_mask_checked
11072 11 : (TREE_TYPE (final_mask), indices);
11073 11 : new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
11074 : final_mask, final_mask,
11075 : perm_mask);
11076 11 : final_mask = make_ssa_name (TREE_TYPE (final_mask));
11077 11 : gimple_set_lhs (new_stmt, final_mask);
11078 11 : vect_finish_stmt_generation (vinfo, stmt_info,
11079 : new_stmt, gsi);
11080 11 : }
11081 52 : else if (final_mask)
11082 : {
11083 24 : new_stmt = gimple_build_assign (NULL_TREE,
11084 : VEC_UNPACK_HI_EXPR,
11085 : final_mask);
11086 24 : final_mask = make_ssa_name
11087 24 : (truth_type_for (offset_vectype));
11088 24 : gimple_set_lhs (new_stmt, final_mask);
11089 24 : vect_finish_stmt_generation (vinfo, stmt_info,
11090 : new_stmt, gsi);
11091 : }
11092 :
11093 63 : new_stmt = vect_build_one_gather_load_call
11094 126 : (vinfo, stmt_info, slp_node, vectype, gsi,
11095 : ls.gs.decl, dataref_ptr,
11096 63 : vec_offsets[2 * i + 1], final_mask);
11097 63 : tree high = make_ssa_name (vectype);
11098 63 : gimple_set_lhs (new_stmt, high);
11099 63 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11100 :
11101 : /* compose low + high. */
11102 63 : int count = nunits.to_constant ();
11103 63 : vec_perm_builder sel (count, count, 1);
11104 63 : sel.quick_grow (count);
11105 647 : for (int i = 0; i < count; ++i)
11106 584 : sel[i] = i < count / 2 ? i : i + count / 2;
11107 63 : vec_perm_indices indices (sel, 2, count);
11108 63 : tree perm_mask
11109 63 : = vect_gen_perm_mask_checked (vectype, indices);
11110 63 : new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
11111 : low, high, perm_mask);
11112 63 : data_ref = NULL_TREE;
11113 63 : }
11114 86 : else if (known_eq (nunits * 2, offset_nunits))
11115 : {
11116 : /* We have a offset vector with double the number of
11117 : lanes. Select the low/high part accordingly. */
11118 86 : vec_offset = vec_offsets[i / 2];
11119 86 : if (i & 1)
11120 : {
11121 43 : int count = offset_nunits.to_constant ();
11122 43 : vec_perm_builder sel (count, count, 1);
11123 43 : sel.quick_grow (count);
11124 463 : for (int i = 0; i < count; ++i)
11125 420 : sel[i] = i | (count / 2);
11126 43 : vec_perm_indices indices (sel, 2, count);
11127 43 : tree perm_mask = vect_gen_perm_mask_checked
11128 43 : (TREE_TYPE (vec_offset), indices);
11129 43 : new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
11130 : vec_offset, vec_offset,
11131 : perm_mask);
11132 43 : vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11133 43 : gimple_set_lhs (new_stmt, vec_offset);
11134 43 : vect_finish_stmt_generation (vinfo, stmt_info,
11135 : new_stmt, gsi);
11136 43 : }
11137 86 : new_stmt = vect_build_one_gather_load_call
11138 86 : (vinfo, stmt_info, slp_node, vectype, gsi,
11139 : ls.gs.decl,
11140 : dataref_ptr, vec_offset, final_mask);
11141 86 : data_ref = NULL_TREE;
11142 : }
11143 : else
11144 0 : gcc_unreachable ();
11145 : }
11146 : else
11147 : {
11148 : /* Emulated gather-scatter. */
11149 2858 : gcc_assert (!final_mask);
11150 2858 : unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11151 2858 : if (costing_p)
11152 : {
11153 : /* For emulated gathers N offset vector element
11154 : offset add is consumed by the load). */
11155 2178 : inside_cost = record_stmt_cost (cost_vec, const_nunits,
11156 : vec_to_scalar,
11157 : slp_node, 0, vect_body);
11158 : /* N scalar loads plus gathering them into a
11159 : vector. */
11160 2178 : inside_cost
11161 2178 : = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11162 : slp_node, 0, vect_body);
11163 2178 : inside_cost
11164 2178 : = record_stmt_cost (cost_vec, 1, vec_construct,
11165 : slp_node, 0, vect_body);
11166 2178 : continue;
11167 : }
11168 680 : tree offset_vectype = TREE_TYPE (vec_offsets[0]);
11169 680 : unsigned HOST_WIDE_INT const_offset_nunits
11170 680 : = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
11171 680 : vec<constructor_elt, va_gc> *ctor_elts;
11172 680 : vec_alloc (ctor_elts, const_nunits);
11173 680 : gimple_seq stmts = NULL;
11174 : /* We support offset vectors with more elements
11175 : than the data vector for now. */
11176 680 : unsigned HOST_WIDE_INT factor
11177 : = const_offset_nunits / const_nunits;
11178 680 : vec_offset = vec_offsets[i / factor];
11179 680 : unsigned elt_offset = (i % factor) * const_nunits;
11180 680 : tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11181 680 : tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
11182 680 : tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11183 2828 : for (unsigned k = 0; k < const_nunits; ++k)
11184 : {
11185 2148 : tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11186 : bitsize_int (k + elt_offset));
11187 6444 : tree idx = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11188 2148 : vec_offset, TYPE_SIZE (idx_type),
11189 : boff);
11190 2148 : idx = gimple_convert (&stmts, sizetype, idx);
11191 2148 : idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
11192 2148 : tree ptr = gimple_build (&stmts, PLUS_EXPR,
11193 2148 : TREE_TYPE (dataref_ptr),
11194 : dataref_ptr, idx);
11195 2148 : ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11196 2148 : tree elt = make_ssa_name (TREE_TYPE (vectype));
11197 2148 : tree ref = build2 (MEM_REF, ltype, ptr,
11198 : build_int_cst (ref_type, 0));
11199 2148 : new_stmt = gimple_build_assign (elt, ref);
11200 4296 : gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11201 2148 : gimple_seq_add_stmt (&stmts, new_stmt);
11202 2148 : CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11203 : }
11204 680 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11205 680 : new_stmt = gimple_build_assign (NULL_TREE,
11206 : build_constructor (vectype,
11207 : ctor_elts));
11208 680 : data_ref = NULL_TREE;
11209 : }
11210 :
11211 963 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
11212 : /* DATA_REF is null if we've already built the statement. */
11213 963 : if (data_ref)
11214 : {
11215 : vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11216 : new_stmt = gimple_build_assign (vec_dest, data_ref);
11217 : }
11218 1926 : new_temp = (need_zeroing
11219 963 : ? make_ssa_name (vectype)
11220 963 : : make_ssa_name (vec_dest, new_stmt));
11221 963 : gimple_set_lhs (new_stmt, new_temp);
11222 963 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11223 :
11224 : /* If we need to explicitly zero inactive elements emit a
11225 : VEC_COND_EXPR that does so. */
11226 963 : if (need_zeroing)
11227 : {
11228 0 : vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
11229 : vectype);
11230 :
11231 0 : tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
11232 0 : new_stmt = gimple_build_assign (new_temp2, VEC_COND_EXPR,
11233 : final_mask, new_temp, vec_els);
11234 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11235 0 : new_temp = new_temp2;
11236 : }
11237 :
11238 963 : if (ls.ls_type)
11239 : {
11240 0 : new_stmt = gimple_build_assign (make_ssa_name
11241 : (original_vectype),
11242 : VIEW_CONVERT_EXPR,
11243 : build1 (VIEW_CONVERT_EXPR,
11244 : original_vectype,
11245 : new_temp));
11246 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11247 : }
11248 :
11249 : /* Store vector loads in the corresponding SLP_NODE. */
11250 963 : if (!costing_p)
11251 : {
11252 963 : if (ls.slp_perm)
11253 0 : dr_chain.quick_push (gimple_assign_lhs (new_stmt));
11254 : else
11255 963 : slp_node->push_vec_def (new_stmt);
11256 : }
11257 : }
11258 :
11259 2735 : if (ls.slp_perm)
11260 : {
11261 0 : if (costing_p)
11262 : {
11263 0 : gcc_assert (ls.n_perms != -1U);
11264 0 : inside_cost += record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
11265 : slp_node, 0, vect_body);
11266 : }
11267 : else
11268 : {
11269 0 : unsigned n_perms2;
11270 0 : vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
11271 : false, &n_perms2);
11272 0 : gcc_assert (ls.n_perms == n_perms2);
11273 : }
11274 : }
11275 :
11276 2735 : if (costing_p && dump_enabled_p ())
11277 222 : dump_printf_loc (MSG_NOTE, vect_location,
11278 : "vect_model_load_cost: inside_cost = %u, "
11279 : "prologue_cost = %u .\n",
11280 : inside_cost, prologue_cost);
11281 2735 : return true;
11282 2735 : }
11283 :
11284 500501 : aggr_type = vectype;
11285 500501 : if (!costing_p)
11286 158313 : bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
11287 : memory_access_type, loop_lens);
11288 :
11289 500501 : poly_uint64 group_elt = 0;
11290 500501 : unsigned int inside_cost = 0, prologue_cost = 0;
11291 : /* For costing some adjacent vector loads, we'd like to cost with
11292 : the total number of them once instead of cost each one by one. */
11293 500501 : unsigned int n_adjacent_loads = 0;
11294 :
11295 : /* 1. Create the vector or array pointer update chain. */
11296 500501 : if (!costing_p)
11297 : {
11298 158313 : bool simd_lane_access_p
11299 158313 : = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11300 158313 : if (simd_lane_access_p
11301 1629 : && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11302 1629 : && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11303 1629 : && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11304 1629 : && integer_zerop (DR_INIT (first_dr_info->dr))
11305 1629 : && alias_sets_conflict_p (get_alias_set (aggr_type),
11306 1629 : get_alias_set (TREE_TYPE (ref_type)))
11307 158313 : && (alignment_support_scheme == dr_aligned
11308 1629 : || alignment_support_scheme == dr_unaligned_supported))
11309 : {
11310 1629 : dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11311 1629 : dataref_offset = build_int_cst (ref_type, 0);
11312 : }
11313 156684 : else if (diff_first_stmt_info)
11314 : {
11315 3477 : dataref_ptr
11316 3477 : = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11317 : aggr_type, at_loop, offset, &dummy,
11318 : gsi, &ptr_incr, simd_lane_access_p,
11319 : bump);
11320 : /* Adjust the pointer by the difference to first_stmt. */
11321 3477 : data_reference_p ptrdr
11322 : = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11323 3477 : tree diff = fold_convert (sizetype,
11324 : size_binop (MINUS_EXPR,
11325 : DR_INIT (first_dr_info->dr),
11326 : DR_INIT (ptrdr)));
11327 3477 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11328 : stmt_info, diff);
11329 3477 : if (alignment_support_scheme == dr_explicit_realign)
11330 : {
11331 0 : msq = vect_setup_realignment (vinfo, first_stmt_info_for_drptr,
11332 : vectype, gsi,
11333 : &realignment_token,
11334 : alignment_support_scheme,
11335 : dataref_ptr, &at_loop);
11336 0 : gcc_assert (!compute_in_loop);
11337 : }
11338 : }
11339 : else
11340 153207 : dataref_ptr
11341 153207 : = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11342 : at_loop,
11343 : offset, &dummy, gsi, &ptr_incr,
11344 : simd_lane_access_p, bump);
11345 : }
11346 : else if (!costing_p)
11347 : {
11348 : gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11349 : if (dataref_offset)
11350 : dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
11351 : else
11352 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11353 : stmt_info, bump);
11354 : }
11355 :
11356 500501 : auto_vec<tree> dr_chain;
11357 500501 : if (grouped_load || ls.slp_perm)
11358 48618 : dr_chain.create (vec_num);
11359 :
11360 : gimple *new_stmt = NULL;
11361 1324448 : for (i = 0; i < vec_num; i++)
11362 : {
11363 823947 : tree final_mask = NULL_TREE;
11364 823947 : tree final_len = NULL_TREE;
11365 823947 : tree bias = NULL_TREE;
11366 :
11367 823947 : if (!costing_p)
11368 : {
11369 247306 : if (mask_node)
11370 707 : vec_mask = vec_masks[i];
11371 247306 : if (loop_masks)
11372 48 : final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11373 : vec_num, vectype, i);
11374 247306 : if (vec_mask)
11375 707 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11376 : final_mask, vec_mask, gsi);
11377 :
11378 247306 : if (i > 0)
11379 88993 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11380 : gsi, stmt_info, bump);
11381 : }
11382 :
11383 : /* 2. Create the vector-load in the loop. */
11384 823947 : switch (alignment_support_scheme)
11385 : {
11386 823947 : case dr_aligned:
11387 823947 : case dr_unaligned_supported:
11388 823947 : {
11389 823947 : if (costing_p)
11390 : break;
11391 :
11392 247306 : unsigned int misalign;
11393 247306 : unsigned HOST_WIDE_INT align;
11394 247306 : align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11395 247306 : if (alignment_support_scheme == dr_aligned)
11396 : misalign = 0;
11397 160450 : else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11398 : {
11399 121765 : align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11400 121765 : misalign = 0;
11401 : }
11402 : else
11403 38685 : misalign = misalignment;
11404 247306 : if (dataref_offset == NULL_TREE
11405 245179 : && TREE_CODE (dataref_ptr) == SSA_NAME)
11406 166402 : set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11407 : misalign);
11408 247306 : align = least_bit_hwi (misalign | align);
11409 :
11410 : /* Compute IFN when LOOP_LENS or final_mask valid. */
11411 247306 : machine_mode vmode = TYPE_MODE (vectype);
11412 247306 : machine_mode new_vmode = vmode;
11413 247306 : internal_fn partial_ifn = IFN_LAST;
11414 247306 : if (loop_lens)
11415 : {
11416 0 : opt_machine_mode new_ovmode
11417 0 : = get_len_load_store_mode (vmode, true, &partial_ifn);
11418 0 : new_vmode = new_ovmode.require ();
11419 0 : unsigned factor
11420 0 : = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11421 0 : final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11422 : vec_num, vectype, i, factor, true);
11423 : }
11424 247306 : else if (final_mask)
11425 : {
11426 735 : if (!can_vec_mask_load_store_p (vmode,
11427 735 : TYPE_MODE
11428 : (TREE_TYPE (final_mask)),
11429 : true, &partial_ifn))
11430 0 : gcc_unreachable ();
11431 : }
11432 :
11433 247306 : if (partial_ifn == IFN_MASK_LEN_LOAD)
11434 : {
11435 0 : if (!final_len)
11436 : {
11437 : /* Pass VF value to 'len' argument of
11438 : MASK_LEN_LOAD if LOOP_LENS is invalid. */
11439 0 : final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11440 : }
11441 0 : if (!final_mask)
11442 : {
11443 : /* Pass all ones value to 'mask' argument of
11444 : MASK_LEN_LOAD if final_mask is invalid. */
11445 0 : mask_vectype = truth_type_for (vectype);
11446 0 : final_mask = build_minus_one_cst (mask_vectype);
11447 : }
11448 : }
11449 247306 : if (final_len)
11450 : {
11451 0 : signed char biasval
11452 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11453 0 : bias = build_int_cst (intQI_type_node, biasval);
11454 : }
11455 :
11456 247306 : tree vec_els;
11457 :
11458 247306 : if (final_len)
11459 : {
11460 0 : tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11461 0 : gcall *call;
11462 :
11463 : /* Need conversion if the vectype is punned by VnQI. */
11464 0 : els_vectype = vectype;
11465 0 : if (vmode != new_vmode)
11466 0 : els_vectype
11467 0 : = build_vector_type_for_mode (unsigned_intQI_type_node,
11468 : new_vmode);
11469 0 : vec_els = vect_get_mask_load_else (maskload_elsval,
11470 : els_vectype);
11471 :
11472 0 : if (partial_ifn == IFN_MASK_LEN_LOAD)
11473 : {
11474 0 : if (type_mode_padding_p
11475 0 : && maskload_elsval != MASK_LOAD_ELSE_ZERO)
11476 0 : need_zeroing = true;
11477 0 : call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
11478 : 6, dataref_ptr, ptr,
11479 : final_mask, vec_els,
11480 : final_len, bias);
11481 : }
11482 : else
11483 0 : call = gimple_build_call_internal (IFN_LEN_LOAD, 5,
11484 : dataref_ptr, ptr,
11485 : vec_els, final_len,
11486 : bias);
11487 0 : gimple_call_set_nothrow (call, true);
11488 0 : new_stmt = call;
11489 0 : data_ref = NULL_TREE;
11490 :
11491 : /* Need conversion if it's wrapped with VnQI. */
11492 0 : if (vmode != new_vmode)
11493 : {
11494 0 : tree new_vtype
11495 0 : = build_vector_type_for_mode (unsigned_intQI_type_node,
11496 : new_vmode);
11497 0 : tree var = vect_get_new_ssa_name (new_vtype,
11498 : vect_simple_var);
11499 0 : gimple_set_lhs (call, var);
11500 0 : vect_finish_stmt_generation (vinfo, stmt_info, call,
11501 : gsi);
11502 0 : tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11503 0 : new_stmt = gimple_build_assign (vec_dest,
11504 : VIEW_CONVERT_EXPR, op);
11505 : }
11506 : }
11507 247306 : else if (final_mask)
11508 : {
11509 735 : tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11510 735 : vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
11511 735 : if (type_mode_padding_p
11512 735 : && maskload_elsval != MASK_LOAD_ELSE_ZERO)
11513 0 : need_zeroing = true;
11514 735 : gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
11515 : dataref_ptr, ptr,
11516 : final_mask,
11517 : vec_els);
11518 735 : gimple_call_set_nothrow (call, true);
11519 735 : new_stmt = call;
11520 735 : data_ref = NULL_TREE;
11521 : }
11522 : else
11523 : {
11524 246571 : tree ltype = vectype;
11525 246571 : tree new_vtype = NULL_TREE;
11526 246571 : unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11527 246571 : unsigned HOST_WIDE_INT dr_size
11528 246571 : = vect_get_scalar_dr_size (first_dr_info);
11529 246571 : poly_int64 off = 0;
11530 246571 : if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11531 1431 : off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
11532 246571 : unsigned int vect_align
11533 246571 : = vect_known_alignment_in_bytes (first_dr_info, vectype,
11534 246571 : off);
11535 : /* Try to use a single smaller load when we are about
11536 : to load excess elements compared to the unrolled
11537 : scalar loop. */
11538 246571 : if (known_gt ((i + 1) * nunits,
11539 : (group_size * vf - gap)))
11540 : {
11541 6850 : poly_uint64 remain = ((group_size * vf - gap) - i * nunits);
11542 6850 : if (known_ge ((i + 1) * nunits - (group_size * vf - gap),
11543 : nunits))
11544 : /* DR will be unused. */
11545 : ltype = NULL_TREE;
11546 2179 : else if (known_ge (vect_align,
11547 : tree_to_poly_uint64
11548 : (TYPE_SIZE_UNIT (vectype))))
11549 : /* Aligned access to excess elements is OK if
11550 : at least one element is accessed in the
11551 : scalar loop. */
11552 : ;
11553 1840 : else if (known_gt (vect_align,
11554 : ((nunits - remain) * dr_size)))
11555 : /* Aligned access to the gap area when there's
11556 : at least one element in it is OK. */
11557 : ;
11558 : else
11559 : {
11560 : /* remain should now be > 0 and < nunits. */
11561 1837 : unsigned num;
11562 1837 : if (known_ne (remain, 0u)
11563 1837 : && constant_multiple_p (nunits, remain, &num))
11564 : {
11565 1379 : tree ptype;
11566 1379 : new_vtype
11567 1379 : = vector_vector_composition_type (vectype, num,
11568 : &ptype);
11569 1379 : if (new_vtype)
11570 1379 : ltype = ptype;
11571 : }
11572 : /* Else use multiple loads or a masked load? */
11573 : /* For loop vectorization we now should have
11574 : an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
11575 : set. */
11576 1837 : if (loop_vinfo)
11577 1598 : gcc_assert (new_vtype
11578 : || LOOP_VINFO_PEELING_FOR_GAPS
11579 : (loop_vinfo));
11580 : /* But still reduce the access size to the next
11581 : required power-of-two so peeling a single
11582 : scalar iteration is sufficient. */
11583 1837 : unsigned HOST_WIDE_INT cremain;
11584 1837 : if (remain.is_constant (&cremain))
11585 : {
11586 1837 : unsigned HOST_WIDE_INT cpart_size
11587 1837 : = 1 << ceil_log2 (cremain);
11588 1837 : if (known_gt (nunits, cpart_size)
11589 1837 : && constant_multiple_p (nunits, cpart_size,
11590 : &num))
11591 : {
11592 1391 : tree ptype;
11593 1391 : new_vtype
11594 2782 : = vector_vector_composition_type (vectype,
11595 1391 : num,
11596 : &ptype);
11597 1391 : if (new_vtype)
11598 1391 : ltype = ptype;
11599 : }
11600 : }
11601 : }
11602 : }
11603 246571 : tree offset = (dataref_offset ? dataref_offset
11604 244444 : : build_int_cst (ref_type, 0));
11605 246571 : if (!ltype)
11606 : ;
11607 241900 : else if (ltype != vectype
11608 241900 : && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11609 : {
11610 21 : poly_uint64 gap_offset
11611 21 : = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
11612 21 : - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
11613 21 : tree gapcst = build_int_cstu (ref_type, gap_offset);
11614 21 : offset = size_binop (PLUS_EXPR, offset, gapcst);
11615 : }
11616 246571 : if (ltype)
11617 : {
11618 241900 : data_ref = fold_build2 (MEM_REF, ltype,
11619 : dataref_ptr, offset);
11620 241900 : if (alignment_support_scheme == dr_aligned
11621 241900 : && align >= TYPE_ALIGN_UNIT (ltype))
11622 : ;
11623 : else
11624 158734 : TREE_TYPE (data_ref)
11625 317468 : = build_aligned_type (TREE_TYPE (data_ref),
11626 : align * BITS_PER_UNIT);
11627 : }
11628 246571 : if (!ltype)
11629 4671 : data_ref = build_constructor (vectype, NULL);
11630 241900 : else if (ltype != vectype)
11631 : {
11632 1391 : vect_copy_ref_info (data_ref,
11633 1391 : DR_REF (first_dr_info->dr));
11634 1391 : tree tem = make_ssa_name (ltype);
11635 1391 : new_stmt = gimple_build_assign (tem, data_ref);
11636 1391 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11637 : gsi);
11638 1391 : data_ref = NULL;
11639 1391 : vec<constructor_elt, va_gc> *v;
11640 : /* We've computed 'num' above to statically two
11641 : or via constant_multiple_p. */
11642 1391 : unsigned num
11643 1391 : = (exact_div (tree_to_poly_uint64
11644 1391 : (TYPE_SIZE_UNIT (vectype)),
11645 : tree_to_poly_uint64
11646 1391 : (TYPE_SIZE_UNIT (ltype)))
11647 1391 : .to_constant ());
11648 1391 : vec_alloc (v, num);
11649 1391 : if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11650 : {
11651 54 : while (--num)
11652 54 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11653 : build_zero_cst (ltype));
11654 21 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11655 : }
11656 : else
11657 : {
11658 1370 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11659 1370 : while (--num)
11660 3094 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11661 : build_zero_cst (ltype));
11662 : }
11663 1391 : gcc_assert (new_vtype != NULL_TREE);
11664 1391 : if (new_vtype == vectype)
11665 1361 : new_stmt
11666 1361 : = gimple_build_assign (vec_dest,
11667 : build_constructor (vectype, v));
11668 : else
11669 : {
11670 30 : tree new_vname = make_ssa_name (new_vtype);
11671 30 : new_stmt
11672 30 : = gimple_build_assign (new_vname,
11673 : build_constructor (new_vtype,
11674 : v));
11675 30 : vect_finish_stmt_generation (vinfo, stmt_info,
11676 : new_stmt, gsi);
11677 30 : new_stmt
11678 30 : = gimple_build_assign (vec_dest,
11679 : build1 (VIEW_CONVERT_EXPR,
11680 : vectype, new_vname));
11681 : }
11682 : }
11683 : }
11684 : break;
11685 : }
11686 0 : case dr_explicit_realign:
11687 0 : {
11688 0 : if (costing_p)
11689 : break;
11690 0 : tree ptr, bump;
11691 :
11692 0 : tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11693 :
11694 0 : if (compute_in_loop)
11695 0 : msq = vect_setup_realignment (vinfo, first_stmt_info, vectype,
11696 : gsi, &realignment_token,
11697 : dr_explicit_realign,
11698 : dataref_ptr, NULL);
11699 :
11700 0 : if (TREE_CODE (dataref_ptr) == SSA_NAME)
11701 0 : ptr = copy_ssa_name (dataref_ptr);
11702 : else
11703 0 : ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11704 : // For explicit realign the target alignment should be
11705 : // known at compile time.
11706 0 : unsigned HOST_WIDE_INT align
11707 0 : = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11708 0 : new_stmt = gimple_build_assign (ptr, BIT_AND_EXPR, dataref_ptr,
11709 : build_int_cst
11710 0 : (TREE_TYPE (dataref_ptr),
11711 0 : -(HOST_WIDE_INT) align));
11712 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11713 0 : data_ref = build2 (MEM_REF, vectype,
11714 : ptr, build_int_cst (ref_type, 0));
11715 0 : vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11716 0 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
11717 0 : new_stmt = gimple_build_assign (vec_dest, data_ref);
11718 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
11719 0 : gimple_assign_set_lhs (new_stmt, new_temp);
11720 0 : gimple_move_vops (new_stmt, stmt_info->stmt);
11721 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11722 0 : msq = new_temp;
11723 :
11724 0 : bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11725 0 : bump = size_binop (MINUS_EXPR, bump, size_one_node);
11726 0 : ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11727 : bump);
11728 0 : new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR, ptr,
11729 0 : build_int_cst (TREE_TYPE (ptr),
11730 0 : -(HOST_WIDE_INT) align));
11731 0 : if (TREE_CODE (ptr) == SSA_NAME)
11732 0 : ptr = copy_ssa_name (ptr, new_stmt);
11733 : else
11734 0 : ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11735 0 : gimple_assign_set_lhs (new_stmt, ptr);
11736 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11737 0 : data_ref = build2 (MEM_REF, vectype,
11738 : ptr, build_int_cst (ref_type, 0));
11739 0 : break;
11740 : }
11741 0 : case dr_explicit_realign_optimized:
11742 0 : {
11743 0 : if (costing_p)
11744 : break;
11745 0 : if (TREE_CODE (dataref_ptr) == SSA_NAME)
11746 0 : new_temp = copy_ssa_name (dataref_ptr);
11747 : else
11748 0 : new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11749 : // We should only be doing this if we know the target
11750 : // alignment at compile time.
11751 0 : unsigned HOST_WIDE_INT align
11752 0 : = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11753 0 : new_stmt = gimple_build_assign (new_temp, BIT_AND_EXPR, dataref_ptr,
11754 0 : build_int_cst (TREE_TYPE (dataref_ptr),
11755 0 : -(HOST_WIDE_INT) align));
11756 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11757 0 : data_ref = build2 (MEM_REF, vectype, new_temp,
11758 : build_int_cst (ref_type, 0));
11759 0 : break;
11760 : }
11761 0 : default:
11762 0 : gcc_unreachable ();
11763 : }
11764 :
11765 : /* One common place to cost the above vect load for different
11766 : alignment support schemes. */
11767 823947 : if (costing_p)
11768 : {
11769 : /* For the prologue cost for realign,
11770 : we only need to count it once for the whole group. */
11771 576641 : bool first_stmt_info_p = first_stmt_info == stmt_info;
11772 576641 : bool add_realign_cost = first_stmt_info_p && i == 0;
11773 576641 : if (memory_access_type == VMAT_CONTIGUOUS
11774 576641 : || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11775 : {
11776 : /* Leave realign cases alone to keep them simple. */
11777 576641 : if (alignment_support_scheme == dr_explicit_realign_optimized
11778 : || alignment_support_scheme == dr_explicit_realign)
11779 0 : vect_get_load_cost (vinfo, stmt_info, slp_node, 1,
11780 : alignment_support_scheme, misalignment,
11781 : add_realign_cost, &inside_cost,
11782 : &prologue_cost, cost_vec, cost_vec,
11783 : true);
11784 : else
11785 576641 : n_adjacent_loads++;
11786 : }
11787 : }
11788 : else
11789 : {
11790 247306 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
11791 : /* DATA_REF is null if we've already built the statement. */
11792 247306 : if (data_ref)
11793 : {
11794 245180 : vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11795 245180 : new_stmt = gimple_build_assign (vec_dest, data_ref);
11796 : }
11797 :
11798 494612 : new_temp = (need_zeroing
11799 247306 : ? make_ssa_name (vectype)
11800 247306 : : make_ssa_name (vec_dest, new_stmt));
11801 247306 : gimple_set_lhs (new_stmt, new_temp);
11802 247306 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11803 :
11804 : /* If we need to explicitly zero inactive elements emit a
11805 : VEC_COND_EXPR that does so. */
11806 247306 : if (need_zeroing)
11807 : {
11808 0 : vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
11809 : vectype);
11810 :
11811 0 : tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
11812 0 : new_stmt = gimple_build_assign (new_temp2, VEC_COND_EXPR,
11813 : final_mask, new_temp, vec_els);
11814 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11815 : gsi);
11816 0 : new_temp = new_temp2;
11817 : }
11818 : }
11819 :
11820 : /* 3. Handle explicit realignment if necessary/supported.
11821 : Create in loop:
11822 : vec_dest = realign_load (msq, lsq, realignment_token) */
11823 823947 : if (!costing_p
11824 247306 : && (alignment_support_scheme == dr_explicit_realign_optimized
11825 : || alignment_support_scheme == dr_explicit_realign))
11826 : {
11827 0 : lsq = gimple_assign_lhs (new_stmt);
11828 0 : if (!realignment_token)
11829 0 : realignment_token = dataref_ptr;
11830 0 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
11831 0 : new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11832 : lsq, realignment_token);
11833 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
11834 0 : gimple_assign_set_lhs (new_stmt, new_temp);
11835 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11836 :
11837 0 : if (alignment_support_scheme == dr_explicit_realign_optimized)
11838 : {
11839 0 : gcc_assert (phi);
11840 0 : if (i == vec_num - 1)
11841 0 : add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11842 : UNKNOWN_LOCATION);
11843 : msq = lsq;
11844 : }
11845 : }
11846 :
11847 823947 : if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11848 : {
11849 5580 : if (costing_p)
11850 4149 : inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11851 : slp_node, 0, vect_body);
11852 : else
11853 : {
11854 1431 : tree perm_mask = perm_mask_for_reverse (vectype);
11855 1431 : new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11856 : perm_mask, stmt_info, gsi);
11857 1431 : new_stmt = SSA_NAME_DEF_STMT (new_temp);
11858 : }
11859 : }
11860 :
11861 : /* Collect vector loads and later create their permutation in
11862 : vect_transform_slp_perm_load. */
11863 823947 : if (!costing_p && (grouped_load || ls.slp_perm))
11864 72108 : dr_chain.quick_push (new_temp);
11865 :
11866 : /* Store vector loads in the corresponding SLP_NODE. */
11867 247306 : if (!costing_p && !ls.slp_perm)
11868 175198 : slp_node->push_vec_def (new_stmt);
11869 :
11870 : /* With SLP permutation we load the gaps as well, without
11871 : we need to skip the gaps after we manage to fully load
11872 : all elements. group_gap_adj is DR_GROUP_SIZE here. */
11873 823947 : group_elt += nunits;
11874 823947 : if (!costing_p
11875 247306 : && maybe_ne (group_gap_adj, 0U)
11876 44117 : && !ls.slp_perm
11877 843262 : && known_eq (group_elt, group_size - group_gap_adj))
11878 : {
11879 15385 : poly_wide_int bump_val
11880 15385 : = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11881 15385 : if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11882 0 : bump_val = -bump_val;
11883 15385 : tree bump = wide_int_to_tree (sizetype, bump_val);
11884 15385 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11885 : stmt_info, bump);
11886 15385 : group_elt = 0;
11887 15385 : }
11888 : }
11889 : /* Bump the vector pointer to account for a gap or for excess
11890 : elements loaded for a permuted SLP load. */
11891 500501 : if (!costing_p
11892 158313 : && maybe_ne (group_gap_adj, 0U)
11893 516320 : && ls.slp_perm)
11894 : {
11895 434 : poly_wide_int bump_val
11896 434 : = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11897 434 : if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11898 9 : bump_val = -bump_val;
11899 434 : tree bump = wide_int_to_tree (sizetype, bump_val);
11900 434 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11901 : stmt_info, bump);
11902 434 : }
11903 :
11904 500501 : if (ls.slp_perm)
11905 : {
11906 : /* For SLP we know we've seen all possible uses of dr_chain so
11907 : direct vect_transform_slp_perm_load to DCE the unused parts.
11908 : ??? This is a hack to prevent compile-time issues as seen
11909 : in PR101120 and friends. */
11910 48618 : if (costing_p)
11911 : {
11912 31518 : gcc_assert (ls.n_perms != -1U && ls.n_loads != -1U);
11913 31518 : if (ls.n_perms != 0)
11914 31154 : inside_cost = record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
11915 : slp_node, 0, vect_body);
11916 31518 : if (n_adjacent_loads > 0)
11917 31518 : n_adjacent_loads = ls.n_loads;
11918 : }
11919 : else
11920 : {
11921 17100 : unsigned n_perms2, n_loads2;
11922 17100 : bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11923 : gsi, vf, false, &n_perms2,
11924 : &n_loads2, true);
11925 17100 : gcc_assert (ok && ls.n_perms == n_perms2 && ls.n_loads == n_loads2);
11926 : }
11927 : }
11928 :
11929 500501 : if (costing_p)
11930 : {
11931 342188 : gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11932 : || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
11933 342188 : if (n_adjacent_loads > 0)
11934 342188 : vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
11935 : alignment_support_scheme, misalignment, false,
11936 : &inside_cost, &prologue_cost, cost_vec, cost_vec,
11937 : true);
11938 342188 : if (dump_enabled_p ())
11939 23163 : dump_printf_loc (MSG_NOTE, vect_location,
11940 : "vect_model_load_cost: inside_cost = %u, "
11941 : "prologue_cost = %u .\n",
11942 : inside_cost, prologue_cost);
11943 : }
11944 :
11945 500501 : return true;
11946 1646650 : }
11947 :
11948 : /* Function vect_is_simple_cond.
11949 :
11950 : Input:
11951 : LOOP - the loop that is being vectorized.
11952 : COND - Condition that is checked for simple use.
11953 :
11954 : Output:
11955 : *COMP_VECTYPE - the vector type for the comparison.
11956 : *DTS - The def types for the arguments of the comparison
11957 :
11958 : Returns whether a COND can be vectorized. Checks whether
11959 : condition operands are supportable using vec_is_simple_use. */
11960 :
11961 : static bool
11962 27834 : vect_is_simple_cond (tree cond, vec_info *vinfo,
11963 : slp_tree slp_node, tree *comp_vectype,
11964 : enum vect_def_type *dts, tree vectype)
11965 : {
11966 27834 : tree lhs, rhs;
11967 27834 : tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11968 27834 : slp_tree slp_op;
11969 :
11970 : /* Mask case. */
11971 27834 : if (TREE_CODE (cond) == SSA_NAME
11972 27834 : && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11973 : {
11974 27822 : if (!vect_is_simple_use (vinfo, slp_node, 0, &cond,
11975 : &slp_op, &dts[0], comp_vectype)
11976 27822 : || !*comp_vectype
11977 55629 : || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11978 : return false;
11979 : return true;
11980 : }
11981 :
11982 12 : if (!COMPARISON_CLASS_P (cond))
11983 : return false;
11984 :
11985 0 : lhs = TREE_OPERAND (cond, 0);
11986 0 : rhs = TREE_OPERAND (cond, 1);
11987 :
11988 0 : if (TREE_CODE (lhs) == SSA_NAME)
11989 : {
11990 0 : if (!vect_is_simple_use (vinfo, slp_node, 0,
11991 : &lhs, &slp_op, &dts[0], &vectype1))
11992 : return false;
11993 : }
11994 0 : else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11995 0 : || TREE_CODE (lhs) == FIXED_CST)
11996 0 : dts[0] = vect_constant_def;
11997 : else
11998 : return false;
11999 :
12000 0 : if (TREE_CODE (rhs) == SSA_NAME)
12001 : {
12002 0 : if (!vect_is_simple_use (vinfo, slp_node, 1,
12003 : &rhs, &slp_op, &dts[1], &vectype2))
12004 : return false;
12005 : }
12006 0 : else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
12007 0 : || TREE_CODE (rhs) == FIXED_CST)
12008 0 : dts[1] = vect_constant_def;
12009 : else
12010 : return false;
12011 :
12012 0 : if (vectype1 && vectype2
12013 0 : && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12014 0 : TYPE_VECTOR_SUBPARTS (vectype2)))
12015 0 : return false;
12016 :
12017 0 : *comp_vectype = vectype1 ? vectype1 : vectype2;
12018 : /* Invariant comparison. */
12019 0 : if (! *comp_vectype)
12020 : {
12021 0 : tree scalar_type = TREE_TYPE (lhs);
12022 0 : if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12023 0 : *comp_vectype = truth_type_for (vectype);
12024 : else
12025 : {
12026 : /* If we can widen the comparison to match vectype do so. */
12027 0 : if (INTEGRAL_TYPE_P (scalar_type)
12028 0 : && !slp_node
12029 0 : && tree_int_cst_lt (TYPE_SIZE (scalar_type),
12030 0 : TYPE_SIZE (TREE_TYPE (vectype))))
12031 0 : scalar_type = build_nonstandard_integer_type
12032 0 : (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
12033 0 : *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12034 : slp_node);
12035 : }
12036 : }
12037 :
12038 : return true;
12039 : }
12040 :
12041 : /* vectorizable_condition.
12042 :
12043 : Check if STMT_INFO is conditional modify expression that can be vectorized.
12044 : If COST_VEC is passed, calculate costs but don't change anything,
12045 : otherwise, vectorize STMT_INFO: create a vectorized stmt using
12046 : VEC_COND_EXPR to replace it, and insert it at GSI.
12047 :
12048 : When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12049 :
12050 : Return true if STMT_INFO is vectorizable in this way. */
12051 :
12052 : static bool
12053 606277 : vectorizable_condition (vec_info *vinfo,
12054 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12055 : slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12056 : {
12057 606277 : tree scalar_dest = NULL_TREE;
12058 606277 : tree vec_dest = NULL_TREE;
12059 606277 : tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
12060 606277 : tree then_clause, else_clause;
12061 606277 : tree comp_vectype = NULL_TREE;
12062 606277 : tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
12063 606277 : tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
12064 606277 : tree vec_compare;
12065 606277 : tree new_temp;
12066 606277 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12067 606277 : enum vect_def_type dts[4]
12068 : = {vect_unknown_def_type, vect_unknown_def_type,
12069 : vect_unknown_def_type, vect_unknown_def_type};
12070 606277 : enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12071 606277 : int i;
12072 606277 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12073 606277 : vec<tree> vec_oprnds0 = vNULL;
12074 606277 : vec<tree> vec_oprnds1 = vNULL;
12075 606277 : vec<tree> vec_oprnds2 = vNULL;
12076 606277 : vec<tree> vec_oprnds3 = vNULL;
12077 606277 : tree vec_cmp_type;
12078 606277 : bool masked = false;
12079 :
12080 606277 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12081 : return false;
12082 :
12083 : /* Is vectorizable conditional operation? */
12084 930984 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12085 352512 : if (!stmt)
12086 : return false;
12087 :
12088 352512 : code = gimple_assign_rhs_code (stmt);
12089 352512 : if (code != COND_EXPR)
12090 : return false;
12091 :
12092 27834 : int reduc_index = SLP_TREE_REDUC_IDX (slp_node);
12093 27834 : vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
12094 27834 : bool nested_cycle_p = false;
12095 27834 : bool for_reduction = vect_is_reduction (stmt_info);
12096 27834 : if (for_reduction)
12097 : {
12098 547 : if (SLP_TREE_LANES (slp_node) > 1)
12099 : return false;
12100 : /* ??? With a reduction path we do not get at the reduction info from
12101 : every stmt, use the conservative default setting then. */
12102 627 : if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
12103 : {
12104 529 : vect_reduc_info reduc_info
12105 529 : = info_for_reduction (loop_vinfo, slp_node);
12106 529 : reduction_type = VECT_REDUC_INFO_TYPE (reduc_info);
12107 529 : nested_cycle_p = nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
12108 : stmt_info);
12109 : }
12110 : }
12111 : else
12112 : {
12113 27287 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12114 : return false;
12115 : }
12116 :
12117 27834 : tree vectype = SLP_TREE_VECTYPE (slp_node);
12118 27834 : tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12119 :
12120 27834 : int vec_num = vect_get_num_copies (vinfo, slp_node);
12121 :
12122 27834 : cond_expr = gimple_assign_rhs1 (stmt);
12123 27834 : gcc_assert (! COMPARISON_CLASS_P (cond_expr));
12124 :
12125 27834 : if (!vect_is_simple_cond (cond_expr, vinfo, slp_node,
12126 : &comp_vectype, &dts[0], vectype)
12127 27834 : || !comp_vectype)
12128 : return false;
12129 :
12130 27807 : unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12131 27807 : slp_tree then_slp_node, else_slp_node;
12132 27807 : if (!vect_is_simple_use (vinfo, slp_node, 1 + op_adjust,
12133 : &then_clause, &then_slp_node, &dts[2], &vectype1))
12134 : return false;
12135 27807 : if (!vect_is_simple_use (vinfo, slp_node, 2 + op_adjust,
12136 : &else_clause, &else_slp_node, &dts[3], &vectype2))
12137 : return false;
12138 :
12139 27807 : if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12140 : return false;
12141 :
12142 27807 : if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12143 : return false;
12144 :
12145 27807 : masked = !COMPARISON_CLASS_P (cond_expr);
12146 27807 : vec_cmp_type = truth_type_for (comp_vectype);
12147 27807 : if (vec_cmp_type == NULL_TREE
12148 55614 : || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
12149 27807 : TYPE_VECTOR_SUBPARTS (vec_cmp_type)))
12150 0 : return false;
12151 :
12152 27807 : cond_code = TREE_CODE (cond_expr);
12153 27807 : if (!masked)
12154 : {
12155 0 : cond_expr0 = TREE_OPERAND (cond_expr, 0);
12156 0 : cond_expr1 = TREE_OPERAND (cond_expr, 1);
12157 : }
12158 :
12159 : /* For conditional reductions, the "then" value needs to be the candidate
12160 : value calculated by this iteration while the "else" value needs to be
12161 : the result carried over from previous iterations. If the COND_EXPR
12162 : is the other way around, we need to swap it. */
12163 27807 : bool must_invert_cmp_result = false;
12164 27807 : if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12165 : {
12166 0 : if (masked)
12167 0 : must_invert_cmp_result = true;
12168 : else
12169 : {
12170 0 : bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12171 0 : tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12172 0 : if (new_code == ERROR_MARK)
12173 : must_invert_cmp_result = true;
12174 : else
12175 : {
12176 0 : cond_code = new_code;
12177 : /* Make sure we don't accidentally use the old condition. */
12178 0 : cond_expr = NULL_TREE;
12179 : }
12180 : }
12181 : /* ??? The vectorized operand query below doesn't allow swapping
12182 : this way for SLP. */
12183 0 : return false;
12184 : /* std::swap (then_clause, else_clause); */
12185 : }
12186 :
12187 27807 : if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12188 : {
12189 : /* Boolean values may have another representation in vectors
12190 : and therefore we prefer bit operations over comparison for
12191 : them (which also works for scalar masks). We store opcodes
12192 : to use in bitop1 and bitop2. Statement is vectorized as
12193 : BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12194 : depending on bitop1 and bitop2 arity. */
12195 0 : switch (cond_code)
12196 : {
12197 : case GT_EXPR:
12198 : bitop1 = BIT_NOT_EXPR;
12199 : bitop2 = BIT_AND_EXPR;
12200 : break;
12201 0 : case GE_EXPR:
12202 0 : bitop1 = BIT_NOT_EXPR;
12203 0 : bitop2 = BIT_IOR_EXPR;
12204 0 : break;
12205 0 : case LT_EXPR:
12206 0 : bitop1 = BIT_NOT_EXPR;
12207 0 : bitop2 = BIT_AND_EXPR;
12208 0 : std::swap (cond_expr0, cond_expr1);
12209 0 : break;
12210 0 : case LE_EXPR:
12211 0 : bitop1 = BIT_NOT_EXPR;
12212 0 : bitop2 = BIT_IOR_EXPR;
12213 0 : std::swap (cond_expr0, cond_expr1);
12214 0 : break;
12215 0 : case NE_EXPR:
12216 0 : bitop1 = BIT_XOR_EXPR;
12217 0 : break;
12218 0 : case EQ_EXPR:
12219 0 : bitop1 = BIT_XOR_EXPR;
12220 0 : bitop2 = BIT_NOT_EXPR;
12221 0 : break;
12222 : default:
12223 : return false;
12224 : }
12225 : cond_code = SSA_NAME;
12226 : }
12227 :
12228 27807 : if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12229 0 : && reduction_type == EXTRACT_LAST_REDUCTION
12230 27807 : && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12231 : {
12232 0 : if (dump_enabled_p ())
12233 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12234 : "reduction comparison operation not supported.\n");
12235 0 : return false;
12236 : }
12237 :
12238 27807 : if (cost_vec)
12239 : {
12240 19333 : if (bitop1 != NOP_EXPR)
12241 : {
12242 0 : machine_mode mode = TYPE_MODE (comp_vectype);
12243 0 : optab optab;
12244 :
12245 0 : optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12246 0 : if (!optab || !can_implement_p (optab, mode))
12247 0 : return false;
12248 :
12249 0 : if (bitop2 != NOP_EXPR)
12250 : {
12251 0 : optab = optab_for_tree_code (bitop2, comp_vectype,
12252 : optab_default);
12253 0 : if (!optab || !can_implement_p (optab, mode))
12254 0 : return false;
12255 : }
12256 : }
12257 :
12258 19333 : vect_cost_for_stmt kind = vector_stmt;
12259 19333 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12260 : /* Count one reduction-like operation per vector. */
12261 : kind = vec_to_scalar;
12262 19333 : else if ((masked && !expand_vec_cond_expr_p (vectype, comp_vectype))
12263 19333 : || (!masked
12264 0 : && (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12265 : cond_code)
12266 0 : || !expand_vec_cond_expr_p (vectype, vec_cmp_type))))
12267 2 : return false;
12268 :
12269 19331 : if (!vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
12270 : comp_vectype)
12271 19331 : || (op_adjust == 1
12272 0 : && !vect_maybe_update_slp_op_vectype
12273 0 : (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12274 19331 : || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12275 38662 : || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype))
12276 : {
12277 0 : if (dump_enabled_p ())
12278 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12279 : "incompatible vector types for invariants\n");
12280 0 : return false;
12281 : }
12282 :
12283 19331 : if (loop_vinfo && for_reduction
12284 400 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12285 : {
12286 65 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12287 : {
12288 0 : if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12289 : vectype, OPTIMIZE_FOR_SPEED))
12290 0 : vect_record_loop_len (loop_vinfo,
12291 : &LOOP_VINFO_LENS (loop_vinfo),
12292 : vec_num, vectype, 1);
12293 : else
12294 0 : vect_record_loop_mask (loop_vinfo,
12295 : &LOOP_VINFO_MASKS (loop_vinfo),
12296 : vec_num, vectype, NULL);
12297 : }
12298 : /* Extra inactive lanes should be safe for vect_nested_cycle. */
12299 65 : else if (!nested_cycle_p)
12300 : {
12301 65 : if (dump_enabled_p ())
12302 8 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12303 : "conditional reduction prevents the use"
12304 : " of partial vectors.\n");
12305 65 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12306 : }
12307 : }
12308 :
12309 19331 : SLP_TREE_TYPE (slp_node) = condition_vec_info_type;
12310 19331 : vect_model_simple_cost (vinfo, 1, slp_node, cost_vec, kind);
12311 19331 : return true;
12312 : }
12313 :
12314 : /* Transform. */
12315 :
12316 : /* Handle def. */
12317 8474 : scalar_dest = gimple_assign_lhs (stmt);
12318 8474 : if (reduction_type != EXTRACT_LAST_REDUCTION)
12319 8474 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
12320 :
12321 8474 : bool swap_cond_operands = false;
12322 :
12323 : /* See whether another part of the vectorized code applies a loop
12324 : mask to the condition, or to its inverse. */
12325 :
12326 8474 : vec_loop_masks *masks = NULL;
12327 8474 : vec_loop_lens *lens = NULL;
12328 8474 : if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12329 : {
12330 0 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12331 0 : lens = &LOOP_VINFO_LENS (loop_vinfo);
12332 : }
12333 8474 : else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12334 : {
12335 3 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12336 0 : masks = &LOOP_VINFO_MASKS (loop_vinfo);
12337 : else
12338 : {
12339 3 : scalar_cond_masked_key cond (cond_expr, 1);
12340 3 : if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12341 0 : masks = &LOOP_VINFO_MASKS (loop_vinfo);
12342 : else
12343 : {
12344 3 : bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12345 3 : tree_code orig_code = cond.code;
12346 3 : cond.code = invert_tree_comparison (cond.code, honor_nans);
12347 3 : if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12348 : {
12349 0 : masks = &LOOP_VINFO_MASKS (loop_vinfo);
12350 0 : cond_code = cond.code;
12351 0 : swap_cond_operands = true;
12352 : }
12353 : else
12354 : {
12355 : /* Try the inverse of the current mask. We check if the
12356 : inverse mask is live and if so we generate a negate of
12357 : the current mask such that we still honor NaNs. */
12358 3 : cond.inverted_p = true;
12359 3 : cond.code = orig_code;
12360 3 : if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12361 : {
12362 0 : masks = &LOOP_VINFO_MASKS (loop_vinfo);
12363 0 : cond_code = cond.code;
12364 0 : swap_cond_operands = true;
12365 0 : must_invert_cmp_result = true;
12366 : }
12367 : }
12368 : }
12369 : }
12370 : }
12371 :
12372 : /* Handle cond expr. */
12373 8474 : if (masked)
12374 8474 : vect_get_vec_defs (vinfo, slp_node,
12375 : cond_expr, &vec_oprnds0,
12376 : then_clause, &vec_oprnds2,
12377 : reduction_type != EXTRACT_LAST_REDUCTION
12378 : ? else_clause : NULL, &vec_oprnds3);
12379 : else
12380 0 : vect_get_vec_defs (vinfo, slp_node,
12381 : cond_expr0, &vec_oprnds0,
12382 : cond_expr1, &vec_oprnds1,
12383 : then_clause, &vec_oprnds2,
12384 : reduction_type != EXTRACT_LAST_REDUCTION
12385 : ? else_clause : NULL, &vec_oprnds3);
12386 :
12387 8474 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12388 0 : vec_else_clause = else_clause;
12389 :
12390 : /* Arguments are ready. Create the new vector stmt. */
12391 19939 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12392 : {
12393 11465 : vec_then_clause = vec_oprnds2[i];
12394 11465 : if (reduction_type != EXTRACT_LAST_REDUCTION)
12395 11465 : vec_else_clause = vec_oprnds3[i];
12396 :
12397 11465 : if (swap_cond_operands)
12398 0 : std::swap (vec_then_clause, vec_else_clause);
12399 :
12400 11465 : if (masked)
12401 : vec_compare = vec_cond_lhs;
12402 : else
12403 : {
12404 0 : vec_cond_rhs = vec_oprnds1[i];
12405 0 : if (bitop1 == NOP_EXPR)
12406 : {
12407 0 : gimple_seq stmts = NULL;
12408 0 : vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12409 : vec_cond_lhs, vec_cond_rhs);
12410 0 : gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12411 : }
12412 : else
12413 : {
12414 0 : new_temp = make_ssa_name (vec_cmp_type);
12415 0 : gassign *new_stmt;
12416 0 : if (bitop1 == BIT_NOT_EXPR)
12417 0 : new_stmt = gimple_build_assign (new_temp, bitop1,
12418 : vec_cond_rhs);
12419 : else
12420 0 : new_stmt
12421 0 : = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12422 : vec_cond_rhs);
12423 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12424 0 : if (bitop2 == NOP_EXPR)
12425 : vec_compare = new_temp;
12426 0 : else if (bitop2 == BIT_NOT_EXPR
12427 0 : && reduction_type != EXTRACT_LAST_REDUCTION)
12428 : {
12429 : /* Instead of doing ~x ? y : z do x ? z : y. */
12430 : vec_compare = new_temp;
12431 : std::swap (vec_then_clause, vec_else_clause);
12432 : }
12433 : else
12434 : {
12435 0 : vec_compare = make_ssa_name (vec_cmp_type);
12436 0 : if (bitop2 == BIT_NOT_EXPR)
12437 0 : new_stmt
12438 0 : = gimple_build_assign (vec_compare, bitop2, new_temp);
12439 : else
12440 0 : new_stmt
12441 0 : = gimple_build_assign (vec_compare, bitop2,
12442 : vec_cond_lhs, new_temp);
12443 0 : vect_finish_stmt_generation (vinfo, stmt_info,
12444 : new_stmt, gsi);
12445 : }
12446 : }
12447 : }
12448 :
12449 : /* If we decided to apply a loop mask to the result of the vector
12450 : comparison, AND the comparison with the mask now. Later passes
12451 : should then be able to reuse the AND results between mulitple
12452 : vector statements.
12453 :
12454 : For example:
12455 : for (int i = 0; i < 100; ++i)
12456 : x[i] = y[i] ? z[i] : 10;
12457 :
12458 : results in following optimized GIMPLE:
12459 :
12460 : mask__35.8_43 = vect__4.7_41 != { 0, ... };
12461 : vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12462 : _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12463 : vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12464 : vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12465 : vect_iftmp.11_47, { 10, ... }>;
12466 :
12467 : instead of using a masked and unmasked forms of
12468 : vec != { 0, ... } (masked in the MASK_LOAD,
12469 : unmasked in the VEC_COND_EXPR). */
12470 :
12471 : /* Force vec_compare to be an SSA_NAME rather than a comparison,
12472 : in cases where that's necessary. */
12473 :
12474 11465 : tree len = NULL_TREE, bias = NULL_TREE;
12475 11465 : if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12476 : {
12477 0 : if (!is_gimple_val (vec_compare))
12478 : {
12479 0 : tree vec_compare_name = make_ssa_name (vec_cmp_type);
12480 0 : gassign *new_stmt = gimple_build_assign (vec_compare_name,
12481 : vec_compare);
12482 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12483 0 : vec_compare = vec_compare_name;
12484 : }
12485 :
12486 0 : if (must_invert_cmp_result)
12487 : {
12488 0 : tree vec_compare_name = make_ssa_name (vec_cmp_type);
12489 0 : gassign *new_stmt = gimple_build_assign (vec_compare_name,
12490 : BIT_NOT_EXPR,
12491 : vec_compare);
12492 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12493 0 : vec_compare = vec_compare_name;
12494 : }
12495 :
12496 0 : if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12497 : vectype, OPTIMIZE_FOR_SPEED))
12498 : {
12499 0 : if (lens)
12500 : {
12501 : /* ??? Do we really want the adjusted LEN here? Isn't this
12502 : based on number of elements? */
12503 0 : len = vect_get_loop_len (loop_vinfo, gsi, lens,
12504 : vec_num, vectype, i, 1, true);
12505 0 : signed char biasval
12506 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12507 0 : bias = build_int_cst (intQI_type_node, biasval);
12508 : }
12509 : else
12510 : {
12511 0 : len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12512 0 : bias = build_int_cst (intQI_type_node, 0);
12513 : }
12514 : }
12515 0 : if (masks)
12516 : {
12517 0 : tree loop_mask
12518 0 : = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num,
12519 : vectype, i);
12520 0 : tree tmp2 = make_ssa_name (vec_cmp_type);
12521 0 : gassign *g
12522 0 : = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12523 : loop_mask);
12524 0 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12525 0 : vec_compare = tmp2;
12526 : }
12527 : }
12528 :
12529 0 : gimple *new_stmt;
12530 0 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12531 : {
12532 0 : gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12533 0 : tree lhs = gimple_get_lhs (old_stmt);
12534 0 : if ((unsigned)i != vec_oprnds0.length () - 1)
12535 0 : lhs = copy_ssa_name (lhs);
12536 0 : if (len)
12537 0 : new_stmt = gimple_build_call_internal
12538 0 : (IFN_LEN_FOLD_EXTRACT_LAST, 5, vec_else_clause, vec_compare,
12539 : vec_then_clause, len, bias);
12540 : else
12541 0 : new_stmt = gimple_build_call_internal
12542 0 : (IFN_FOLD_EXTRACT_LAST, 3, vec_else_clause, vec_compare,
12543 : vec_then_clause);
12544 0 : gimple_call_set_lhs (new_stmt, lhs);
12545 0 : SSA_NAME_DEF_STMT (lhs) = new_stmt;
12546 0 : if ((unsigned)i != vec_oprnds0.length () - 1)
12547 : {
12548 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12549 0 : vec_else_clause = lhs;
12550 : }
12551 0 : else if (old_stmt == gsi_stmt (*gsi))
12552 0 : vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12553 : else
12554 : {
12555 : /* In this case we're moving the definition to later in the
12556 : block. That doesn't matter because the only uses of the
12557 : lhs are in phi statements. */
12558 0 : gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12559 0 : gsi_remove (&old_gsi, true);
12560 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12561 : }
12562 : }
12563 : else
12564 : {
12565 11465 : new_temp = make_ssa_name (vec_dest);
12566 11465 : new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12567 : vec_then_clause, vec_else_clause);
12568 11465 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12569 : }
12570 11465 : slp_node->push_vec_def (new_stmt);
12571 : }
12572 :
12573 8474 : vec_oprnds0.release ();
12574 8474 : vec_oprnds1.release ();
12575 8474 : vec_oprnds2.release ();
12576 8474 : vec_oprnds3.release ();
12577 :
12578 8474 : return true;
12579 : }
12580 :
12581 : /* Helper of vectorizable_comparison.
12582 :
12583 : Check if STMT_INFO is comparison expression CODE that can be vectorized.
12584 : If COST_VEC is passed, calculate costs but don't change anything,
12585 : otherwise, vectorize STMT_INFO: create a vectorized comparison, and insert
12586 : it at GSI.
12587 :
12588 : Return true if STMT_INFO is vectorizable in this way. */
12589 :
12590 : static bool
12591 333226 : vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12592 : stmt_vec_info stmt_info, tree_code code,
12593 : gimple_stmt_iterator *gsi,
12594 : slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12595 : {
12596 333226 : tree lhs, rhs1, rhs2;
12597 333226 : tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12598 333226 : tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12599 333226 : tree new_temp;
12600 333226 : enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12601 333226 : poly_uint64 nunits;
12602 333226 : enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12603 333226 : int i;
12604 333226 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12605 333226 : vec<tree> vec_oprnds0 = vNULL;
12606 333226 : vec<tree> vec_oprnds1 = vNULL;
12607 333226 : tree mask_type;
12608 333226 : tree mask = NULL_TREE;
12609 :
12610 333226 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12611 : return false;
12612 :
12613 333226 : if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12614 : return false;
12615 :
12616 151888 : mask_type = vectype;
12617 151888 : nunits = TYPE_VECTOR_SUBPARTS (vectype);
12618 :
12619 151888 : if (TREE_CODE_CLASS (code) != tcc_comparison)
12620 : return false;
12621 :
12622 150136 : slp_tree slp_rhs1, slp_rhs2;
12623 150136 : if (!vect_is_simple_use (vinfo, slp_node,
12624 : 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12625 : return false;
12626 :
12627 150136 : if (!vect_is_simple_use (vinfo, slp_node,
12628 : 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12629 : return false;
12630 :
12631 115203 : if (vectype1 && vectype2
12632 219275 : && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12633 69139 : TYPE_VECTOR_SUBPARTS (vectype2)))
12634 16 : return false;
12635 :
12636 150120 : vectype = vectype1 ? vectype1 : vectype2;
12637 :
12638 : /* Invariant comparison. */
12639 150120 : if (!vectype)
12640 : {
12641 30065 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node);
12642 30065 : if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12643 7 : return false;
12644 : }
12645 120055 : else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12646 : return false;
12647 :
12648 : /* Can't compare mask and non-mask types. */
12649 115187 : if (vectype1 && vectype2
12650 356944 : && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12651 : return false;
12652 :
12653 : /* Boolean values may have another representation in vectors
12654 : and therefore we prefer bit operations over comparison for
12655 : them (which also works for scalar masks). We store opcodes
12656 : to use in bitop1 and bitop2. Statement is vectorized as
12657 : BITOP2 (rhs1 BITOP1 rhs2) or
12658 : rhs1 BITOP2 (BITOP1 rhs2)
12659 : depending on bitop1 and bitop2 arity. */
12660 150105 : bool swap_p = false;
12661 150105 : if (VECTOR_BOOLEAN_TYPE_P (vectype))
12662 : {
12663 635 : if (code == GT_EXPR)
12664 : {
12665 : bitop1 = BIT_NOT_EXPR;
12666 : bitop2 = BIT_AND_EXPR;
12667 : }
12668 : else if (code == GE_EXPR)
12669 : {
12670 : bitop1 = BIT_NOT_EXPR;
12671 : bitop2 = BIT_IOR_EXPR;
12672 : }
12673 : else if (code == LT_EXPR)
12674 : {
12675 : bitop1 = BIT_NOT_EXPR;
12676 : bitop2 = BIT_AND_EXPR;
12677 : swap_p = true;
12678 : }
12679 : else if (code == LE_EXPR)
12680 : {
12681 : bitop1 = BIT_NOT_EXPR;
12682 : bitop2 = BIT_IOR_EXPR;
12683 : swap_p = true;
12684 : }
12685 : else
12686 : {
12687 : bitop1 = BIT_XOR_EXPR;
12688 : if (code == EQ_EXPR)
12689 : bitop2 = BIT_NOT_EXPR;
12690 : }
12691 : }
12692 :
12693 150105 : if (cost_vec)
12694 : {
12695 137695 : if (bitop1 == NOP_EXPR)
12696 : {
12697 137192 : if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12698 : return false;
12699 : }
12700 : else
12701 : {
12702 503 : machine_mode mode = TYPE_MODE (vectype);
12703 503 : optab optab;
12704 :
12705 503 : optab = optab_for_tree_code (bitop1, vectype, optab_default);
12706 503 : if (!optab || !can_implement_p (optab, mode))
12707 0 : return false;
12708 :
12709 503 : if (bitop2 != NOP_EXPR)
12710 : {
12711 91 : optab = optab_for_tree_code (bitop2, vectype, optab_default);
12712 91 : if (!optab || !can_implement_p (optab, mode))
12713 0 : return false;
12714 : }
12715 : }
12716 :
12717 : /* Put types on constant and invariant SLP children. */
12718 125797 : if (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12719 125797 : || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype))
12720 : {
12721 2 : if (dump_enabled_p ())
12722 2 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12723 : "incompatible vector types for invariants\n");
12724 2 : return false;
12725 : }
12726 :
12727 125795 : vect_model_simple_cost (vinfo, 1 + (bitop2 != NOP_EXPR),
12728 : slp_node, cost_vec);
12729 125795 : return true;
12730 : }
12731 :
12732 : /* Transform. */
12733 :
12734 : /* Handle def. */
12735 12410 : lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
12736 12410 : if (lhs)
12737 12410 : mask = vect_create_destination_var (lhs, mask_type);
12738 :
12739 12410 : vect_get_vec_defs (vinfo, slp_node, rhs1, &vec_oprnds0, rhs2, &vec_oprnds1);
12740 12410 : if (swap_p)
12741 58 : std::swap (vec_oprnds0, vec_oprnds1);
12742 :
12743 : /* Arguments are ready. Create the new vector stmt. */
12744 31209 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12745 : {
12746 18799 : gimple *new_stmt;
12747 18799 : vec_rhs2 = vec_oprnds1[i];
12748 :
12749 18799 : if (lhs)
12750 18799 : new_temp = make_ssa_name (mask);
12751 : else
12752 0 : new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
12753 18799 : if (bitop1 == NOP_EXPR)
12754 : {
12755 18657 : new_stmt = gimple_build_assign (new_temp, code,
12756 : vec_rhs1, vec_rhs2);
12757 18657 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12758 : }
12759 : else
12760 : {
12761 142 : if (bitop1 == BIT_NOT_EXPR)
12762 84 : new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12763 : else
12764 58 : new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12765 : vec_rhs2);
12766 142 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12767 142 : if (bitop2 != NOP_EXPR)
12768 : {
12769 84 : tree res = make_ssa_name (mask);
12770 84 : if (bitop2 == BIT_NOT_EXPR)
12771 0 : new_stmt = gimple_build_assign (res, bitop2, new_temp);
12772 : else
12773 84 : new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12774 : new_temp);
12775 84 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12776 : }
12777 : }
12778 18799 : slp_node->push_vec_def (new_stmt);
12779 : }
12780 :
12781 12410 : vec_oprnds0.release ();
12782 12410 : vec_oprnds1.release ();
12783 :
12784 12410 : return true;
12785 : }
12786 :
12787 : /* vectorizable_comparison.
12788 :
12789 : Check if STMT_INFO is comparison expression that can be vectorized.
12790 : If COST_VEC is passed, calculate costs but don't change anything,
12791 : otherwise, vectorize STMT_INFO: create a vectorized comparison, and insert
12792 : it at GSI.
12793 :
12794 : Return true if STMT_INFO is vectorizable in this way. */
12795 :
12796 : static bool
12797 590882 : vectorizable_comparison (vec_info *vinfo,
12798 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12799 : slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12800 : {
12801 590882 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12802 :
12803 590882 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12804 : return false;
12805 :
12806 590882 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12807 : return false;
12808 :
12809 785903 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12810 331169 : if (!stmt)
12811 : return false;
12812 :
12813 331169 : enum tree_code code = gimple_assign_rhs_code (stmt);
12814 331169 : tree vectype = SLP_TREE_VECTYPE (slp_node);
12815 331169 : if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12816 : slp_node, cost_vec))
12817 : return false;
12818 :
12819 136148 : if (cost_vec)
12820 123738 : SLP_TREE_TYPE (slp_node) = comparison_vec_info_type;
12821 :
12822 : return true;
12823 : }
12824 :
12825 : /* Check to see if the target supports any of the compare and branch optabs for
12826 : vectors with MODE as these would be required when expanding. */
12827 : static bool
12828 61946 : supports_vector_compare_and_branch (loop_vec_info loop_vinfo, machine_mode mode)
12829 : {
12830 61946 : bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
12831 61946 : bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
12832 :
12833 : /* The vectorizer only produces vec_cbranch_any_optab directly. So only
12834 : check for support for that or vec_cbranch_any_optab when masked.
12835 : We can't produce vcond_cbranch_any directly from the vectorizer as we
12836 : want to keep gimple_cond as the GIMPLE representation. But we'll fold
12837 : it in expand. For that reason we require a backend to support the
12838 : unconditional vector cbranch optab if they support the conditional one,
12839 : which is just an optimization on the unconditional one. */
12840 61946 : if (masked_loop_p
12841 61946 : && direct_optab_handler (cond_vec_cbranch_any_optab, mode)
12842 : != CODE_FOR_nothing)
12843 : return true;
12844 61946 : else if (len_loop_p
12845 61946 : && direct_optab_handler (cond_len_vec_cbranch_any_optab, mode)
12846 : != CODE_FOR_nothing)
12847 : return true;
12848 61946 : else if (!masked_loop_p && !len_loop_p
12849 123892 : && direct_optab_handler (vec_cbranch_any_optab, mode)
12850 : != CODE_FOR_nothing)
12851 : return true;
12852 :
12853 : /* The target can implement cbranch to distinguish between boolean vector
12854 : types and data types if they don't have a different mode for both. */
12855 61946 : return direct_optab_handler (cbranch_optab, mode) != CODE_FOR_nothing;
12856 : }
12857 :
12858 : /* Determine the type to use for early break vectorization's scalar IV. If
12859 : no type is possible return false. */
12860 :
12861 : static bool
12862 2057 : vect_compute_type_for_early_break_scalar_iv (loop_vec_info loop_vinfo)
12863 : {
12864 : /* Check if we have a usable scalar IV type for vectorization. */
12865 2057 : tree iters_vf_type = sizetype;
12866 2057 : if (!LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
12867 : {
12868 : /* Find the type with the minimum precision we can use
12869 : for the scalar IV. */
12870 1842 : tree cand_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
12871 :
12872 : /* Work out how many bits we need to represent the limit. */
12873 1842 : unsigned int min_ni_width
12874 1842 : = vect_min_prec_for_max_niters (loop_vinfo, 1);
12875 :
12876 : /* Check if we're using PFA, if so we need a signed IV and an
12877 : extra bit for the sign. */
12878 1842 : if (TYPE_UNSIGNED (cand_type)
12879 1842 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
12880 2910 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
12881 164 : min_ni_width += 1;
12882 :
12883 1842 : if (TYPE_PRECISION (cand_type) >= min_ni_width)
12884 1767 : iters_vf_type = unsigned_type_for (cand_type);
12885 : else
12886 : {
12887 75 : opt_scalar_int_mode cmp_mode_iter;
12888 75 : tree iv_type = NULL_TREE;
12889 367 : FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
12890 : {
12891 367 : auto cmp_mode = cmp_mode_iter.require ();
12892 367 : unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode);
12893 367 : if (cmp_bits >= min_ni_width
12894 367 : && targetm.scalar_mode_supported_p (cmp_mode))
12895 : {
12896 75 : iv_type = build_nonstandard_integer_type (cmp_bits, true);
12897 75 : if (iv_type)
12898 : break;
12899 : }
12900 : }
12901 :
12902 75 : if (!iv_type)
12903 : {
12904 0 : if (dump_enabled_p ())
12905 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12906 : "can't vectorize early exit because the "
12907 : "target doesn't support a scalar type wide "
12908 : "wide enough to hold niters.\n");
12909 0 : return false;
12910 : }
12911 75 : iters_vf_type = iv_type;
12912 : }
12913 : }
12914 :
12915 2057 : LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo) = iters_vf_type;
12916 2057 : return true;
12917 : }
12918 :
12919 : /* Check to see if the current early break given in STMT_INFO is valid for
12920 : vectorization. */
12921 :
12922 : bool
12923 231580 : vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
12924 : gimple_stmt_iterator *gsi,
12925 : slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12926 : {
12927 231580 : if (!is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
12928 : return false;
12929 :
12930 63535 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
12931 : return false;
12932 :
12933 63535 : if (!STMT_VINFO_RELEVANT_P (stmt_info))
12934 : return false;
12935 :
12936 63535 : DUMP_VECT_SCOPE ("vectorizable_early_exit");
12937 :
12938 63535 : auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
12939 :
12940 : /* For SLP we don't want to use the type of the operands of the SLP node, when
12941 : vectorizing using SLP slp_node will be the children of the gcond and we
12942 : want to use the type of the direct children which since the gcond is root
12943 : will be the current node, rather than a child node as vect_is_simple_use
12944 : assumes. */
12945 63535 : tree vectype = SLP_TREE_VECTYPE (slp_node);
12946 63535 : if (!vectype)
12947 : return false;
12948 :
12949 63535 : machine_mode mode = TYPE_MODE (vectype);
12950 63535 : int vec_num = vect_get_num_copies (loop_vinfo, slp_node);
12951 :
12952 63535 : vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
12953 63535 : vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
12954 63535 : bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
12955 63535 : bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
12956 :
12957 : /* Now build the new conditional. Pattern gimple_conds get dropped during
12958 : codegen so we must replace the original insn. */
12959 63535 : gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
12960 63535 : gcond *cond_stmt = as_a <gcond *>(orig_stmt);
12961 :
12962 63535 : tree vectype_out = vectype;
12963 63535 : auto bb = gimple_bb (cond_stmt);
12964 63535 : edge exit_true_edge = EDGE_SUCC (bb, 0);
12965 63535 : if (exit_true_edge->flags & EDGE_FALSE_VALUE)
12966 3438 : exit_true_edge = EDGE_SUCC (bb, 1);
12967 63535 : gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
12968 :
12969 : /* When vectorizing we assume that if the branch edge is taken that we're
12970 : exiting the loop. This is not however always the case as the compiler will
12971 : rewrite conditions to always be a comparison against 0. To do this it
12972 : sometimes flips the edges. This is fine for scalar, but for vector we
12973 : then have to negate the result of the test, as we're still assuming that if
12974 : you take the branch edge that we found the exit condition. i.e. we need to
12975 : know whether we are generating a `forall` or an `exist` condition. */
12976 127070 : bool flipped = flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
12977 63535 : exit_true_edge->dest);
12978 :
12979 : /* See if we support ADDHN and use that for the reduction. */
12980 63535 : internal_fn ifn = IFN_VEC_TRUNC_ADD_HIGH;
12981 63535 : bool addhn_supported_p
12982 63535 : = direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_BOTH);
12983 63535 : tree narrow_type = NULL_TREE;
12984 63535 : if (addhn_supported_p)
12985 : {
12986 : /* Calculate the narrowing type for the result. */
12987 0 : auto halfprec = TYPE_PRECISION (TREE_TYPE (vectype)) / 2;
12988 0 : auto unsignedp = TYPE_UNSIGNED (TREE_TYPE (vectype));
12989 0 : tree itype = build_nonstandard_integer_type (halfprec, unsignedp);
12990 0 : tree tmp_type = build_vector_type (itype, TYPE_VECTOR_SUBPARTS (vectype));
12991 0 : narrow_type = truth_type_for (tmp_type);
12992 :
12993 0 : if (!supports_vector_compare_and_branch (loop_vinfo,
12994 0 : TYPE_MODE (narrow_type)))
12995 : {
12996 0 : if (dump_enabled_p ())
12997 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12998 : "can't use ADDHN reduction because cbranch for "
12999 : "the narrowed type is not supported by the "
13000 : "target.\n");
13001 : addhn_supported_p = false;
13002 : }
13003 : }
13004 :
13005 : /* Analyze only. */
13006 63535 : if (cost_vec)
13007 : {
13008 61946 : if (!addhn_supported_p
13009 61946 : && !supports_vector_compare_and_branch (loop_vinfo, mode))
13010 : {
13011 59889 : if (dump_enabled_p ())
13012 573 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13013 : "can't vectorize early exit because the "
13014 : "target doesn't support flag setting vector "
13015 : "comparisons.\n");
13016 59889 : return false;
13017 : }
13018 :
13019 2057 : if (!vectorizable_comparison_1 (loop_vinfo, vectype, stmt_info, code, gsi,
13020 : slp_node, cost_vec))
13021 : return false;
13022 :
13023 2057 : if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
13024 : {
13025 1068 : if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
13026 : OPTIMIZE_FOR_SPEED))
13027 0 : vect_record_loop_len (loop_vinfo, lens, vec_num, vectype, 1);
13028 : else
13029 1068 : vect_record_loop_mask (loop_vinfo, masks, vec_num, vectype, NULL);
13030 : }
13031 :
13032 2057 : if (!vect_compute_type_for_early_break_scalar_iv (loop_vinfo))
13033 : return false;
13034 :
13035 : return true;
13036 : }
13037 :
13038 : /* Tranform. */
13039 :
13040 1589 : tree new_temp = NULL_TREE;
13041 1589 : gimple *new_stmt = NULL;
13042 :
13043 1589 : if (dump_enabled_p ())
13044 409 : dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
13045 :
13046 : /* For SLP we don't do codegen of the body starting from the gcond, the gconds are
13047 : roots and so by the time we get to them we have already codegened the SLP tree
13048 : and so we shouldn't try to do so again. The arguments have already been
13049 : vectorized. It's not very clean to do this here, But the masking code below is
13050 : complex and this keeps it all in one place to ease fixes and backports. Once we
13051 : drop the non-SLP loop vect or split vectorizable_* this can be simplified. */
13052 :
13053 1589 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
13054 1589 : basic_block cond_bb = gimple_bb (stmt);
13055 1589 : gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
13056 :
13057 1589 : auto_vec<tree> stmts;
13058 1589 : stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
13059 :
13060 : /* If we're comparing against a previous forall we need to negate the resullts
13061 : before we do the final comparison or reduction. */
13062 1589 : if (flipped)
13063 : {
13064 : /* Rewrite the if(all(mask)) into if (!all(mask)) which is the same as
13065 : if (any(~mask)) by negating the masks and flipping the branches.
13066 :
13067 : 1. For unmasked loops we simply reduce the ~mask.
13068 : 2. For masked loops we reduce (~mask & loop_mask) which is the same as
13069 : doing (mask & loop_mask) ^ loop_mask. */
13070 294 : for (unsigned i = 0; i < stmts.length (); i++)
13071 : {
13072 173 : tree inv_lhs = make_temp_ssa_name (vectype, NULL, "vexit_inv");
13073 173 : auto inv_stmt = gimple_build_assign (inv_lhs, BIT_NOT_EXPR, stmts[i]);
13074 173 : vect_finish_stmt_generation (loop_vinfo, stmt_info, inv_stmt,
13075 : &cond_gsi);
13076 173 : stmts[i] = inv_lhs;
13077 : }
13078 :
13079 121 : EDGE_SUCC (bb, 0)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
13080 121 : EDGE_SUCC (bb, 1)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
13081 : }
13082 :
13083 : /* Determine if we need to reduce the final value. */
13084 1589 : if (stmts.length () > 1)
13085 : {
13086 : /* We build the reductions in a way to maintain as much parallelism as
13087 : possible. */
13088 141 : auto_vec<tree> workset (stmts.length ());
13089 :
13090 : /* Mask the statements as we queue them up. Normally we loop over
13091 : vec_num, but since we inspect the exact results of vectorization
13092 : we don't need to and instead can just use the stmts themselves. */
13093 141 : if (masked_loop_p)
13094 0 : for (unsigned i = 0; i < stmts.length (); i++)
13095 : {
13096 0 : tree stmt_mask
13097 0 : = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num,
13098 : vectype, i);
13099 0 : stmt_mask
13100 0 : = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask,
13101 0 : stmts[i], &cond_gsi);
13102 0 : workset.quick_push (stmt_mask);
13103 : }
13104 141 : else if (len_loop_p)
13105 0 : for (unsigned i = 0; i < stmts.length (); i++)
13106 : {
13107 0 : tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
13108 : lens, vec_num,
13109 0 : vectype, stmts[i], i, 1);
13110 :
13111 0 : workset.quick_push (len_mask);
13112 : }
13113 : else
13114 141 : workset.splice (stmts);
13115 :
13116 430 : while (workset.length () > 1)
13117 : {
13118 289 : tree arg0 = workset.pop ();
13119 289 : tree arg1 = workset.pop ();
13120 289 : if (addhn_supported_p && workset.length () == 0)
13121 : {
13122 0 : new_stmt = gimple_build_call_internal (ifn, 2, arg0, arg1);
13123 0 : vectype_out = narrow_type;
13124 0 : new_temp = make_temp_ssa_name (vectype_out, NULL, "vexit_reduc");
13125 0 : gimple_call_set_lhs (as_a <gcall *> (new_stmt), new_temp);
13126 0 : gimple_call_set_nothrow (as_a <gcall *> (new_stmt), true);
13127 : }
13128 : else
13129 : {
13130 289 : new_temp = make_temp_ssa_name (vectype_out, NULL, "vexit_reduc");
13131 289 : new_stmt
13132 289 : = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, arg1);
13133 : }
13134 289 : vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
13135 : &cond_gsi);
13136 289 : workset.quick_insert (0, new_temp);
13137 : }
13138 141 : }
13139 : else
13140 : {
13141 1448 : new_temp = stmts[0];
13142 1448 : if (masked_loop_p)
13143 : {
13144 0 : tree mask
13145 0 : = vect_get_loop_mask (loop_vinfo, gsi, masks, 1, vectype, 0);
13146 0 : new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
13147 : new_temp, &cond_gsi);
13148 : }
13149 1448 : else if (len_loop_p)
13150 0 : new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
13151 : 1, vectype, new_temp, 0, 1);
13152 : }
13153 :
13154 1589 : gcc_assert (new_temp);
13155 :
13156 1589 : tree cst = build_zero_cst (vectype_out);
13157 1589 : gimple_cond_set_condition (cond_stmt, NE_EXPR, new_temp, cst);
13158 1589 : update_stmt (orig_stmt);
13159 :
13160 : /* ??? */
13161 1589 : SLP_TREE_VEC_DEFS (slp_node).truncate (0);
13162 :
13163 1589 : return true;
13164 1589 : }
13165 :
13166 : /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
13167 : can handle all live statements in the node. Otherwise return true
13168 : if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13169 : VEC_STMT_P is as for vectorizable_live_operation. */
13170 :
13171 : static bool
13172 1444971 : can_vectorize_live_stmts (vec_info *vinfo,
13173 : slp_tree slp_node, slp_instance slp_node_instance,
13174 : bool vec_stmt_p,
13175 : stmt_vector_for_cost *cost_vec)
13176 : {
13177 1444971 : stmt_vec_info slp_stmt_info;
13178 1444971 : unsigned int i;
13179 3248201 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
13180 : {
13181 1803230 : if (slp_stmt_info
13182 1783603 : && STMT_VINFO_LIVE_P (slp_stmt_info)
13183 1972059 : && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
13184 : slp_node_instance, i,
13185 : vec_stmt_p, cost_vec))
13186 : return false;
13187 : }
13188 :
13189 : return true;
13190 : }
13191 :
13192 : /* Make sure the statement is vectorizable. */
13193 :
13194 : opt_result
13195 2344426 : vect_analyze_stmt (vec_info *vinfo,
13196 : slp_tree node, slp_instance node_instance,
13197 : stmt_vector_for_cost *cost_vec)
13198 : {
13199 2344426 : stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
13200 2344426 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
13201 2344426 : enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
13202 2344426 : bool ok;
13203 :
13204 2344426 : if (dump_enabled_p ())
13205 97947 : dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
13206 : stmt_info->stmt);
13207 :
13208 4443399 : if (gimple_has_volatile_ops (stmt_info->stmt))
13209 : {
13210 : /* ??? This shouldn't really happen, volatile stmts should
13211 : not end up in the SLP graph. */
13212 0 : return opt_result::failure_at (stmt_info->stmt,
13213 : "not vectorized:"
13214 : " stmt has volatile operands: %G\n",
13215 : stmt_info->stmt);
13216 : }
13217 :
13218 : /* Skip stmts that do not need to be vectorized. */
13219 2344426 : if (!STMT_VINFO_RELEVANT_P (stmt_info)
13220 0 : && !STMT_VINFO_LIVE_P (stmt_info))
13221 : {
13222 0 : if (dump_enabled_p ())
13223 0 : dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
13224 :
13225 : /* ??? This shouldn't really happen, irrelevant stmts should
13226 : not end up in the SLP graph. */
13227 0 : return opt_result::failure_at (stmt_info->stmt,
13228 : "not vectorized:"
13229 : " irrelevant stmt as SLP node %p "
13230 : "representative.\n",
13231 : (void *)node);
13232 : }
13233 :
13234 2344426 : switch (STMT_VINFO_DEF_TYPE (stmt_info))
13235 : {
13236 : case vect_internal_def:
13237 : case vect_condition_def:
13238 : break;
13239 :
13240 57465 : case vect_reduction_def:
13241 57465 : case vect_nested_cycle:
13242 57465 : gcc_assert (!bb_vinfo
13243 : && (relevance == vect_used_in_outer
13244 : || relevance == vect_used_in_outer_by_reduction
13245 : || relevance == vect_used_by_reduction
13246 : || relevance == vect_unused_in_scope
13247 : || relevance == vect_used_only_live));
13248 : break;
13249 :
13250 287 : case vect_double_reduction_def:
13251 287 : gcc_assert (!bb_vinfo && node);
13252 : break;
13253 :
13254 136773 : case vect_induction_def:
13255 136773 : case vect_first_order_recurrence:
13256 136773 : gcc_assert (!bb_vinfo);
13257 : break;
13258 :
13259 0 : case vect_constant_def:
13260 0 : case vect_external_def:
13261 0 : case vect_unknown_def_type:
13262 0 : default:
13263 0 : gcc_unreachable ();
13264 : }
13265 :
13266 2344426 : tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13267 2344426 : STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
13268 :
13269 2344426 : if (STMT_VINFO_RELEVANT_P (stmt_info))
13270 : {
13271 2344426 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
13272 2344426 : gcc_assert (SLP_TREE_VECTYPE (node)
13273 : || gimple_code (stmt_info->stmt) == GIMPLE_COND
13274 : || (call && gimple_call_lhs (call) == NULL_TREE));
13275 : }
13276 :
13277 2344426 : ok = true;
13278 2344426 : if (bb_vinfo
13279 1197326 : || (STMT_VINFO_RELEVANT_P (stmt_info)
13280 0 : || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
13281 : /* Prefer vectorizable_call over vectorizable_simd_clone_call so
13282 : -mveclibabi= takes preference over library functions with
13283 : the simd attribute. */
13284 2344426 : ok = (vectorizable_call (vinfo, stmt_info, NULL, node, cost_vec)
13285 2338913 : || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, node,
13286 : cost_vec)
13287 2338461 : || vectorizable_conversion (vinfo, stmt_info, NULL, node, cost_vec)
13288 2273369 : || vectorizable_operation (vinfo, stmt_info, NULL, node, cost_vec)
13289 1854650 : || vectorizable_assignment (vinfo, stmt_info, NULL, node, cost_vec)
13290 1797194 : || vectorizable_load (vinfo, stmt_info, NULL, node, cost_vec)
13291 1414594 : || vectorizable_store (vinfo, stmt_info, NULL, node, cost_vec)
13292 643193 : || vectorizable_shift (vinfo, stmt_info, NULL, node, cost_vec)
13293 597803 : || vectorizable_condition (vinfo, stmt_info, NULL, node, cost_vec)
13294 578472 : || vectorizable_comparison (vinfo, stmt_info, NULL, node, cost_vec)
13295 454734 : || (bb_vinfo
13296 124355 : && vectorizable_phi (bb_vinfo, stmt_info, node, cost_vec))
13297 2742153 : || (is_a <loop_vec_info> (vinfo)
13298 330379 : && (vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo),
13299 : stmt_info, node, cost_vec)
13300 329925 : || vectorizable_reduction (as_a <loop_vec_info> (vinfo),
13301 : stmt_info,
13302 : node, node_instance, cost_vec)
13303 274683 : || vectorizable_induction (as_a <loop_vec_info> (vinfo),
13304 : stmt_info, node, cost_vec)
13305 169054 : || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13306 : stmt_info, node)
13307 168293 : || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13308 : stmt_info, node, cost_vec)
13309 168045 : || vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
13310 : stmt_info, NULL, node,
13311 : cost_vec))));
13312 :
13313 2344426 : STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13314 :
13315 2109033 : if (!ok)
13316 235393 : return opt_result::failure_at (stmt_info->stmt,
13317 : "not vectorized:"
13318 : " relevant stmt not supported: %G",
13319 : stmt_info->stmt);
13320 :
13321 : /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
13322 : need extra handling, except for vectorizable reductions. */
13323 2109033 : if (!bb_vinfo
13324 1029281 : && SLP_TREE_TYPE (node) != reduc_vec_info_type
13325 1021951 : && (SLP_TREE_TYPE (node) != lc_phi_info_type
13326 761 : || SLP_TREE_DEF_TYPE (node) == vect_internal_def)
13327 1021951 : && (!node->ldst_lanes || SLP_TREE_PERMUTE_P (node))
13328 3130984 : && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
13329 : node, node_instance,
13330 : false, cost_vec))
13331 0 : return opt_result::failure_at (stmt_info->stmt,
13332 : "not vectorized:"
13333 : " live stmt not supported: %G",
13334 : stmt_info->stmt);
13335 :
13336 2109033 : return opt_result::success ();
13337 : }
13338 :
13339 :
13340 : /* Function vect_transform_stmt.
13341 :
13342 : Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13343 :
13344 : bool
13345 964419 : vect_transform_stmt (vec_info *vinfo,
13346 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
13347 : slp_tree slp_node, slp_instance slp_node_instance)
13348 : {
13349 964419 : bool is_store = false;
13350 964419 : bool done;
13351 :
13352 964419 : gcc_assert (slp_node);
13353 :
13354 964419 : if (stmt_info)
13355 963578 : STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
13356 :
13357 964419 : switch (SLP_TREE_TYPE (slp_node))
13358 : {
13359 22631 : case type_demotion_vec_info_type:
13360 22631 : case type_promotion_vec_info_type:
13361 22631 : case type_conversion_vec_info_type:
13362 22631 : done = vectorizable_conversion (vinfo, stmt_info, gsi, slp_node, NULL);
13363 22631 : gcc_assert (done);
13364 : break;
13365 :
13366 16168 : case induc_vec_info_type:
13367 16168 : done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
13368 : stmt_info, slp_node, NULL);
13369 16168 : gcc_assert (done);
13370 : break;
13371 :
13372 7726 : case shift_vec_info_type:
13373 7726 : done = vectorizable_shift (vinfo, stmt_info, gsi, slp_node, NULL);
13374 7726 : gcc_assert (done);
13375 : break;
13376 :
13377 115343 : case op_vec_info_type:
13378 115343 : done = vectorizable_operation (vinfo, stmt_info, gsi, slp_node, NULL);
13379 115343 : gcc_assert (done);
13380 : break;
13381 :
13382 14988 : case assignment_vec_info_type:
13383 14988 : done = vectorizable_assignment (vinfo, stmt_info, gsi, slp_node, NULL);
13384 14988 : gcc_assert (done);
13385 : break;
13386 :
13387 163322 : case load_vec_info_type:
13388 163322 : done = vectorizable_load (vinfo, stmt_info, gsi, slp_node, NULL);
13389 163322 : gcc_assert (done);
13390 : break;
13391 :
13392 541399 : case store_vec_info_type:
13393 541399 : done = vectorizable_store (vinfo, stmt_info, gsi, slp_node, NULL);
13394 541399 : gcc_assert (done);
13395 : is_store = true;
13396 : break;
13397 :
13398 8474 : case condition_vec_info_type:
13399 8474 : done = vectorizable_condition (vinfo, stmt_info, gsi, slp_node, NULL);
13400 8474 : gcc_assert (done);
13401 : break;
13402 :
13403 12410 : case comparison_vec_info_type:
13404 12410 : done = vectorizable_comparison (vinfo, stmt_info, gsi, slp_node, NULL);
13405 12410 : gcc_assert (done);
13406 : break;
13407 :
13408 4210 : case call_vec_info_type:
13409 4210 : done = vectorizable_call (vinfo, stmt_info, gsi, slp_node, NULL);
13410 4210 : break;
13411 :
13412 357 : case call_simd_clone_vec_info_type:
13413 357 : done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi,
13414 : slp_node, NULL);
13415 357 : break;
13416 :
13417 2573 : case reduc_vec_info_type:
13418 2573 : done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13419 : gsi, slp_node);
13420 2573 : gcc_assert (done);
13421 : break;
13422 :
13423 23709 : case cycle_phi_info_type:
13424 23709 : done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13425 : slp_node, slp_node_instance);
13426 23709 : gcc_assert (done);
13427 : break;
13428 :
13429 504 : case lc_phi_info_type:
13430 504 : done = vect_transform_lc_phi (as_a <loop_vec_info> (vinfo),
13431 : stmt_info, slp_node);
13432 504 : gcc_assert (done);
13433 : break;
13434 :
13435 40 : case recurr_info_type:
13436 40 : done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13437 : stmt_info, slp_node, NULL);
13438 40 : gcc_assert (done);
13439 : break;
13440 :
13441 14091 : case phi_info_type:
13442 14091 : done = vectorizable_phi (as_a <bb_vec_info> (vinfo),
13443 : stmt_info, slp_node, NULL);
13444 14091 : gcc_assert (done);
13445 : break;
13446 :
13447 0 : case loop_exit_ctrl_vec_info_type:
13448 0 : done = vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
13449 : stmt_info, gsi, slp_node, NULL);
13450 0 : gcc_assert (done);
13451 : break;
13452 :
13453 16474 : case permute_info_type:
13454 16474 : done = vectorizable_slp_permutation (vinfo, gsi, slp_node, NULL);
13455 16474 : gcc_assert (done);
13456 : break;
13457 :
13458 0 : default:
13459 0 : if (!STMT_VINFO_LIVE_P (stmt_info))
13460 : {
13461 0 : if (dump_enabled_p ())
13462 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13463 : "stmt not supported.\n");
13464 0 : gcc_unreachable ();
13465 : }
13466 964419 : done = true;
13467 : }
13468 :
13469 964419 : if (SLP_TREE_TYPE (slp_node) != store_vec_info_type
13470 423020 : && (!slp_node->ldst_lanes || SLP_TREE_PERMUTE_P (slp_node)))
13471 : {
13472 : /* Handle stmts whose DEF is used outside the loop-nest that is
13473 : being vectorized. */
13474 423020 : done = can_vectorize_live_stmts (vinfo, slp_node,
13475 : slp_node_instance, true, NULL);
13476 423020 : gcc_assert (done);
13477 : }
13478 :
13479 964419 : return is_store;
13480 : }
13481 :
13482 :
13483 : /* Remove a group of stores (for SLP or interleaving), free their
13484 : stmt_vec_info. */
13485 :
13486 : void
13487 0 : vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13488 : {
13489 0 : stmt_vec_info next_stmt_info = first_stmt_info;
13490 :
13491 0 : while (next_stmt_info)
13492 : {
13493 0 : stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13494 0 : next_stmt_info = vect_orig_stmt (next_stmt_info);
13495 : /* Free the attached stmt_vec_info and remove the stmt. */
13496 0 : vinfo->remove_stmt (next_stmt_info);
13497 0 : next_stmt_info = tmp;
13498 : }
13499 0 : }
13500 :
13501 : /* If NUNITS is nonzero, return a vector type that contains NUNITS
13502 : elements of type SCALAR_TYPE, or null if the target doesn't support
13503 : such a type.
13504 :
13505 : If NUNITS is zero, return a vector type that contains elements of
13506 : type SCALAR_TYPE, choosing whichever vector size the target prefers.
13507 :
13508 : If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13509 : for this vectorization region and want to "autodetect" the best choice.
13510 : Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13511 : and we want the new type to be interoperable with it. PREVAILING_MODE
13512 : in this case can be a scalar integer mode or a vector mode; when it
13513 : is a vector mode, the function acts like a tree-level version of
13514 : related_vector_mode. */
13515 :
13516 : tree
13517 30179485 : get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13518 : tree scalar_type, poly_uint64 nunits)
13519 : {
13520 30179485 : tree orig_scalar_type = scalar_type;
13521 30179485 : scalar_mode inner_mode;
13522 30179485 : machine_mode simd_mode;
13523 30179485 : tree vectype;
13524 :
13525 30179485 : if ((!INTEGRAL_TYPE_P (scalar_type)
13526 10320088 : && !POINTER_TYPE_P (scalar_type)
13527 1639014 : && !SCALAR_FLOAT_TYPE_P (scalar_type))
13528 40005567 : || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13529 1145090 : && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13530 497221 : return NULL_TREE;
13531 :
13532 29682264 : unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13533 :
13534 : /* Interoperability between modes requires one to be a constant multiple
13535 : of the other, so that the number of vectors required for each operation
13536 : is a compile-time constant. */
13537 29682264 : if (prevailing_mode != VOIDmode
13538 28553126 : && !constant_multiple_p (nunits * nbytes,
13539 28553126 : GET_MODE_SIZE (prevailing_mode))
13540 31157188 : && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13541 1474924 : nunits * nbytes))
13542 : return NULL_TREE;
13543 :
13544 : /* For vector types of elements whose mode precision doesn't
13545 : match their types precision we use a element type of mode
13546 : precision. The vectorization routines will have to make sure
13547 : they support the proper result truncation/extension.
13548 : We also make sure to build vector types with INTEGER_TYPE
13549 : component type only. */
13550 29682264 : if (INTEGRAL_TYPE_P (scalar_type)
13551 49541579 : && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13552 18384205 : || TREE_CODE (scalar_type) != INTEGER_TYPE))
13553 1686821 : scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13554 1686821 : TYPE_UNSIGNED (scalar_type));
13555 :
13556 : /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13557 : When the component mode passes the above test simply use a type
13558 : corresponding to that mode. The theory is that any use that
13559 : would cause problems with this will disable vectorization anyway. */
13560 27995443 : else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13561 : && !INTEGRAL_TYPE_P (scalar_type))
13562 8681074 : scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13563 :
13564 : /* We can't build a vector type of elements with alignment bigger than
13565 : their size. */
13566 19314369 : else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13567 358654 : scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13568 179327 : TYPE_UNSIGNED (scalar_type));
13569 :
13570 : /* If we felt back to using the mode fail if there was
13571 : no scalar type for it. */
13572 29682264 : if (scalar_type == NULL_TREE)
13573 : return NULL_TREE;
13574 :
13575 : /* If no prevailing mode was supplied, use the mode the target prefers.
13576 : Otherwise lookup a vector mode based on the prevailing mode. */
13577 29682264 : if (prevailing_mode == VOIDmode)
13578 : {
13579 1129138 : gcc_assert (known_eq (nunits, 0U));
13580 1129138 : simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13581 1129138 : if (SCALAR_INT_MODE_P (simd_mode))
13582 : {
13583 : /* Traditional behavior is not to take the integer mode
13584 : literally, but simply to use it as a way of determining
13585 : the vector size. It is up to mode_for_vector to decide
13586 : what the TYPE_MODE should be.
13587 :
13588 : Note that nunits == 1 is allowed in order to support single
13589 : element vector types. */
13590 60190 : if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13591 545 : || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13592 29550 : return NULL_TREE;
13593 : }
13594 : }
13595 28553126 : else if (SCALAR_INT_MODE_P (prevailing_mode)
13596 28553126 : || !related_vector_mode (prevailing_mode,
13597 26570389 : inner_mode, nunits).exists (&simd_mode))
13598 : {
13599 : /* Fall back to using mode_for_vector, mostly in the hope of being
13600 : able to use an integer mode. */
13601 1982737 : if (known_eq (nunits, 0U)
13602 4623409 : && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13603 : return NULL_TREE;
13604 :
13605 138517 : if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13606 128463 : return NULL_TREE;
13607 : }
13608 :
13609 27680031 : vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13610 :
13611 : /* In cases where the mode was chosen by mode_for_vector, check that
13612 : the target actually supports the chosen mode, or that it at least
13613 : allows the vector mode to be replaced by a like-sized integer. */
13614 55360062 : if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13615 27690343 : && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13616 : return NULL_TREE;
13617 :
13618 : /* Re-attach the address-space qualifier if we canonicalized the scalar
13619 : type. */
13620 27671948 : if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13621 5 : return build_qualified_type
13622 5 : (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13623 :
13624 : return vectype;
13625 : }
13626 :
13627 : /* Function get_vectype_for_scalar_type.
13628 :
13629 : Returns the vector type corresponding to SCALAR_TYPE as supported
13630 : by the target. If GROUP_SIZE is nonzero and we're performing BB
13631 : vectorization, make sure that the number of elements in the vector
13632 : is no bigger than GROUP_SIZE. */
13633 :
13634 : tree
13635 25779544 : get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13636 : unsigned int group_size)
13637 : {
13638 : /* For BB vectorization, we should always have a group size once we've
13639 : constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13640 : are tentative requests during things like early data reference
13641 : analysis and pattern recognition. */
13642 25779544 : if (is_a <bb_vec_info> (vinfo))
13643 23405972 : gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13644 : else
13645 : group_size = 0;
13646 :
13647 25779544 : tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13648 : scalar_type);
13649 25779544 : if (vectype && vinfo->vector_mode == VOIDmode)
13650 1052901 : vinfo->vector_mode = TYPE_MODE (vectype);
13651 :
13652 : /* Register the natural choice of vector type, before the group size
13653 : has been applied. */
13654 0 : if (vectype)
13655 23433961 : vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13656 :
13657 : /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13658 : try again with an explicit number of elements. */
13659 23433961 : if (vectype
13660 23433961 : && group_size
13661 25779544 : && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13662 : {
13663 : /* Start with the biggest number of units that fits within
13664 : GROUP_SIZE and halve it until we find a valid vector type.
13665 : Usually either the first attempt will succeed or all will
13666 : fail (in the latter case because GROUP_SIZE is too small
13667 : for the target), but it's possible that a target could have
13668 : a hole between supported vector types.
13669 :
13670 : If GROUP_SIZE is not a power of 2, this has the effect of
13671 : trying the largest power of 2 that fits within the group,
13672 : even though the group is not a multiple of that vector size.
13673 : The BB vectorizer will then try to carve up the group into
13674 : smaller pieces. */
13675 3008843 : unsigned int nunits = 1 << floor_log2 (group_size);
13676 3008843 : do
13677 : {
13678 3008843 : vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13679 3008843 : scalar_type, nunits);
13680 3008843 : nunits /= 2;
13681 : }
13682 3008843 : while (nunits > 1 && !vectype);
13683 : }
13684 :
13685 25779544 : return vectype;
13686 : }
13687 :
13688 : /* Return the vector type corresponding to SCALAR_TYPE as supported
13689 : by the target. NODE, if nonnull, is the SLP tree node that will
13690 : use the returned vector type. */
13691 :
13692 : tree
13693 159974 : get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13694 : {
13695 159974 : unsigned int group_size = 0;
13696 159974 : if (node)
13697 159974 : group_size = SLP_TREE_LANES (node);
13698 159974 : return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13699 : }
13700 :
13701 : /* Function get_mask_type_for_scalar_type.
13702 :
13703 : Returns the mask type corresponding to a result of comparison
13704 : of vectors of specified SCALAR_TYPE as supported by target.
13705 : If GROUP_SIZE is nonzero and we're performing BB vectorization,
13706 : make sure that the number of elements in the vector is no bigger
13707 : than GROUP_SIZE. */
13708 :
13709 : tree
13710 1056578 : get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13711 : unsigned int group_size)
13712 : {
13713 1056578 : tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13714 :
13715 1056578 : if (!vectype)
13716 : return NULL;
13717 :
13718 1033220 : return truth_type_for (vectype);
13719 : }
13720 :
13721 : /* Function get_mask_type_for_scalar_type.
13722 :
13723 : Returns the mask type corresponding to a result of comparison
13724 : of vectors of specified SCALAR_TYPE as supported by target.
13725 : NODE, if nonnull, is the SLP tree node that will use the returned
13726 : vector type. */
13727 :
13728 : tree
13729 17 : get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13730 : slp_tree node)
13731 : {
13732 17 : tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13733 :
13734 17 : if (!vectype)
13735 : return NULL;
13736 :
13737 17 : return truth_type_for (vectype);
13738 : }
13739 :
13740 : /* Function get_same_sized_vectype
13741 :
13742 : Returns a vector type corresponding to SCALAR_TYPE of size
13743 : VECTOR_TYPE if supported by the target. */
13744 :
13745 : tree
13746 143875 : get_same_sized_vectype (tree scalar_type, tree vector_type)
13747 : {
13748 143875 : if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13749 0 : return truth_type_for (vector_type);
13750 :
13751 143875 : poly_uint64 nunits;
13752 287750 : if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13753 287750 : GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13754 : return NULL_TREE;
13755 :
13756 143875 : return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13757 143875 : scalar_type, nunits);
13758 : }
13759 :
13760 : /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13761 : would not change the chosen vector modes. */
13762 :
13763 : bool
13764 1537235 : vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13765 : {
13766 1537235 : for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13767 3552127 : i != vinfo->used_vector_modes.end (); ++i)
13768 1801861 : if (!VECTOR_MODE_P (*i)
13769 5405583 : || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13770 794415 : return false;
13771 742820 : return true;
13772 : }
13773 :
13774 : /* Return true if replacing VECTOR_MODE with ALT_VECTOR_MODE would not
13775 : change the chosen vector modes for analysis of a loop. */
13776 :
13777 : bool
13778 347180 : vect_chooses_same_modes_p (machine_mode vector_mode,
13779 : machine_mode alt_vector_mode)
13780 : {
13781 50759 : return (VECTOR_MODE_P (vector_mode)
13782 347180 : && VECTOR_MODE_P (alt_vector_mode)
13783 694360 : && (related_vector_mode (vector_mode,
13784 : GET_MODE_INNER (alt_vector_mode))
13785 347180 : == alt_vector_mode)
13786 373198 : && (related_vector_mode (alt_vector_mode,
13787 : GET_MODE_INNER (vector_mode))
13788 13009 : == vector_mode));
13789 : }
13790 :
13791 : /* Function vect_is_simple_use.
13792 :
13793 : Input:
13794 : VINFO - the vect info of the loop or basic block that is being vectorized.
13795 : OPERAND - operand in the loop or bb.
13796 : Output:
13797 : DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13798 : case OPERAND is an SSA_NAME that is defined in the vectorizable region
13799 : DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13800 : the definition could be anywhere in the function
13801 : DT - the type of definition
13802 :
13803 : Returns whether a stmt with OPERAND can be vectorized.
13804 : For loops, supportable operands are constants, loop invariants, and operands
13805 : that are defined by the current iteration of the loop. Unsupportable
13806 : operands are those that are defined by a previous iteration of the loop (as
13807 : is the case in reduction/induction computations).
13808 : For basic blocks, supportable operands are constants and bb invariants.
13809 : For now, operands defined outside the basic block are not supported. */
13810 :
13811 : bool
13812 39535321 : vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13813 : stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13814 : {
13815 39535321 : if (def_stmt_info_out)
13816 37678593 : *def_stmt_info_out = NULL;
13817 39535321 : if (def_stmt_out)
13818 9194071 : *def_stmt_out = NULL;
13819 39535321 : *dt = vect_unknown_def_type;
13820 :
13821 39535321 : if (dump_enabled_p ())
13822 : {
13823 748492 : dump_printf_loc (MSG_NOTE, vect_location,
13824 : "vect_is_simple_use: operand ");
13825 748492 : if (TREE_CODE (operand) == SSA_NAME
13826 748492 : && !SSA_NAME_IS_DEFAULT_DEF (operand))
13827 686960 : dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13828 : else
13829 61532 : dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13830 : }
13831 :
13832 39535321 : if (CONSTANT_CLASS_P (operand))
13833 2648808 : *dt = vect_constant_def;
13834 36886513 : else if (is_gimple_min_invariant (operand))
13835 332329 : *dt = vect_external_def;
13836 36554184 : else if (TREE_CODE (operand) != SSA_NAME)
13837 957 : *dt = vect_unknown_def_type;
13838 36553227 : else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13839 527460 : *dt = vect_external_def;
13840 : else
13841 : {
13842 36025767 : gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13843 36025767 : stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13844 36025767 : if (!stmt_vinfo)
13845 754684 : *dt = vect_external_def;
13846 : else
13847 : {
13848 35271083 : stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13849 35271083 : def_stmt = stmt_vinfo->stmt;
13850 35271083 : *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13851 35271083 : if (def_stmt_info_out)
13852 33423309 : *def_stmt_info_out = stmt_vinfo;
13853 : }
13854 36025767 : if (def_stmt_out)
13855 8983925 : *def_stmt_out = def_stmt;
13856 : }
13857 :
13858 39535321 : if (dump_enabled_p ())
13859 : {
13860 748492 : dump_printf (MSG_NOTE, ", type of def: ");
13861 748492 : switch (*dt)
13862 : {
13863 0 : case vect_uninitialized_def:
13864 0 : dump_printf (MSG_NOTE, "uninitialized\n");
13865 0 : break;
13866 50943 : case vect_constant_def:
13867 50943 : dump_printf (MSG_NOTE, "constant\n");
13868 50943 : break;
13869 25486 : case vect_external_def:
13870 25486 : dump_printf (MSG_NOTE, "external\n");
13871 25486 : break;
13872 537094 : case vect_internal_def:
13873 537094 : dump_printf (MSG_NOTE, "internal\n");
13874 537094 : break;
13875 105240 : case vect_induction_def:
13876 105240 : dump_printf (MSG_NOTE, "induction\n");
13877 105240 : break;
13878 26380 : case vect_reduction_def:
13879 26380 : dump_printf (MSG_NOTE, "reduction\n");
13880 26380 : break;
13881 482 : case vect_double_reduction_def:
13882 482 : dump_printf (MSG_NOTE, "double reduction\n");
13883 482 : break;
13884 2175 : case vect_nested_cycle:
13885 2175 : dump_printf (MSG_NOTE, "nested cycle\n");
13886 2175 : break;
13887 264 : case vect_first_order_recurrence:
13888 264 : dump_printf (MSG_NOTE, "first order recurrence\n");
13889 264 : break;
13890 0 : case vect_condition_def:
13891 0 : dump_printf (MSG_NOTE, "control flow\n");
13892 0 : break;
13893 428 : case vect_unknown_def_type:
13894 428 : dump_printf (MSG_NOTE, "unknown\n");
13895 428 : break;
13896 : }
13897 : }
13898 :
13899 39535321 : if (*dt == vect_unknown_def_type)
13900 : {
13901 58762 : if (dump_enabled_p ())
13902 428 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13903 : "Unsupported pattern.\n");
13904 58762 : return false;
13905 : }
13906 :
13907 : return true;
13908 : }
13909 :
13910 : /* Function vect_is_simple_use.
13911 :
13912 : Same as vect_is_simple_use but determines the operand by operand
13913 : position OPERAND from either STMT or SLP_NODE, filling in *OP
13914 : and *SLP_DEF (when SLP_NODE is not NULL). */
13915 :
13916 : bool
13917 3382817 : vect_is_simple_use (vec_info *vinfo, slp_tree slp_node,
13918 : unsigned operand, tree *op, slp_tree *slp_def,
13919 : enum vect_def_type *dt,
13920 : tree *vectype, stmt_vec_info *def_stmt_info_out)
13921 : {
13922 3382817 : slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13923 3382817 : *slp_def = child;
13924 3382817 : *vectype = SLP_TREE_VECTYPE (child);
13925 3382817 : if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13926 : {
13927 : /* ??? VEC_PERM nodes might be intermediate and their lane value
13928 : have no representative (nor do we build a VEC_PERM stmt for
13929 : the actual operation). Note for two-operator nodes we set
13930 : a representative but leave scalar stmts empty as we'd only
13931 : have one for a subset of lanes. Ideally no caller would
13932 : require *op for internal defs. */
13933 1757716 : if (SLP_TREE_REPRESENTATIVE (child))
13934 : {
13935 1756988 : *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13936 1756988 : return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13937 : }
13938 : else
13939 : {
13940 728 : gcc_assert (SLP_TREE_PERMUTE_P (child));
13941 728 : *op = error_mark_node;
13942 728 : *dt = vect_internal_def;
13943 728 : if (def_stmt_info_out)
13944 0 : *def_stmt_info_out = NULL;
13945 728 : return true;
13946 : }
13947 : }
13948 : else
13949 : {
13950 1625101 : if (def_stmt_info_out)
13951 50030 : *def_stmt_info_out = NULL;
13952 1625101 : *op = SLP_TREE_SCALAR_OPS (child)[0];
13953 1625101 : *dt = SLP_TREE_DEF_TYPE (child);
13954 1625101 : return true;
13955 : }
13956 : }
13957 :
13958 : /* If OP is not NULL and is external or constant update its vector
13959 : type with VECTYPE. Returns true if successful or false if not,
13960 : for example when conflicting vector types are present. */
13961 :
13962 : bool
13963 2965707 : vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13964 : {
13965 2965707 : if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13966 : return true;
13967 1070159 : if (SLP_TREE_VECTYPE (op))
13968 66683 : return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13969 : /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13970 : should be handled by patters. Allow vect_constant_def for now
13971 : as well as the trivial single-lane uniform vect_external_def case
13972 : both of which we code-generate reasonably. */
13973 1003476 : if (VECTOR_BOOLEAN_TYPE_P (vectype)
13974 1047 : && SLP_TREE_DEF_TYPE (op) == vect_external_def
13975 1004147 : && SLP_TREE_LANES (op) > 1)
13976 : return false;
13977 1003277 : SLP_TREE_VECTYPE (op) = vectype;
13978 1003277 : return true;
13979 : }
13980 :
13981 : /* Function supportable_widening_operation
13982 :
13983 : Check whether an operation represented by the code CODE is a
13984 : widening operation that is supported by the target platform in
13985 : vector form (i.e., when operating on arguments of type VECTYPE_IN
13986 : producing a result of type VECTYPE_OUT).
13987 :
13988 : Widening operations we currently support are NOP (CONVERT), FLOAT,
13989 : FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13990 : are supported by the target platform either directly (via vector
13991 : tree-codes), or via target builtins.
13992 :
13993 : When EVENODD_OK then also lane-swizzling operations are considered.
13994 :
13995 : Output:
13996 : - CODE1 and CODE2 are codes of vector operations to be used when
13997 : vectorizing the operation, if available.
13998 : - MULTI_STEP_CVT determines the number of required intermediate steps in
13999 : case of multi-step conversion (like char->short->int - in that case
14000 : MULTI_STEP_CVT will be 1).
14001 : - INTERM_TYPES contains the intermediate type required to perform the
14002 : widening operation (short in the above example). */
14003 :
14004 : bool
14005 444117 : supportable_widening_operation (code_helper code,
14006 : tree vectype_out, tree vectype_in,
14007 : bool evenodd_ok,
14008 : code_helper *code1,
14009 : code_helper *code2,
14010 : int *multi_step_cvt,
14011 : vec<tree> *interm_types)
14012 : {
14013 444117 : machine_mode vec_mode;
14014 444117 : enum insn_code icode1, icode2;
14015 444117 : optab optab1 = unknown_optab, optab2 = unknown_optab;
14016 444117 : tree vectype = vectype_in;
14017 444117 : tree wide_vectype = vectype_out;
14018 444117 : tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
14019 444117 : int i;
14020 444117 : tree prev_type, intermediate_type;
14021 444117 : machine_mode intermediate_mode, prev_mode;
14022 444117 : optab optab3, optab4;
14023 :
14024 444117 : *multi_step_cvt = 0;
14025 :
14026 444117 : switch (code.safe_as_tree_code ())
14027 : {
14028 : case MAX_TREE_CODES:
14029 : /* Don't set c1 and c2 if code is not a tree_code. */
14030 : break;
14031 :
14032 171462 : case WIDEN_MULT_EXPR:
14033 : /* The result of a vectorized widening operation usually requires
14034 : two vectors (because the widened results do not fit into one vector).
14035 : The generated vector results would normally be expected to be
14036 : generated in the same order as in the original scalar computation,
14037 : i.e. if 8 results are generated in each vector iteration, they are
14038 : to be organized as follows:
14039 : vect1: [res1,res2,res3,res4],
14040 : vect2: [res5,res6,res7,res8].
14041 :
14042 : However, in the special case that the result of the widening
14043 : operation is used in a reduction computation only, the order doesn't
14044 : matter (because when vectorizing a reduction we change the order of
14045 : the computation). Some targets can take advantage of this and
14046 : generate more efficient code. For example, targets like Altivec,
14047 : that support widen_mult using a sequence of {mult_even,mult_odd}
14048 : generate the following vectors:
14049 : vect1: [res1,res3,res5,res7],
14050 : vect2: [res2,res4,res6,res8].
14051 :
14052 : When vectorizing outer-loops, we execute the inner-loop sequentially
14053 : (each vectorized inner-loop iteration contributes to VF outer-loop
14054 : iterations in parallel). We therefore don't allow to change the
14055 : order of the computation in the inner-loop during outer-loop
14056 : vectorization. */
14057 : /* TODO: Another case in which order doesn't *really* matter is when we
14058 : widen and then contract again, e.g. (short)((int)x * y >> 8).
14059 : Normally, pack_trunc performs an even/odd permute, whereas the
14060 : repack from an even/odd expansion would be an interleave, which
14061 : would be significantly simpler for e.g. AVX2. */
14062 : /* In any case, in order to avoid duplicating the code below, recurse
14063 : on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14064 : are properly set up for the caller. If we fail, we'll continue with
14065 : a VEC_WIDEN_MULT_LO/HI_EXPR check. */
14066 171462 : if (evenodd_ok
14067 171462 : && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
14068 : vectype_out, vectype_in,
14069 : evenodd_ok, code1,
14070 : code2, multi_step_cvt,
14071 : interm_types))
14072 92592 : return true;
14073 : c1 = VEC_WIDEN_MULT_LO_EXPR;
14074 : c2 = VEC_WIDEN_MULT_HI_EXPR;
14075 : break;
14076 :
14077 : case DOT_PROD_EXPR:
14078 351525 : c1 = DOT_PROD_EXPR;
14079 351525 : c2 = DOT_PROD_EXPR;
14080 : break;
14081 :
14082 0 : case SAD_EXPR:
14083 0 : c1 = SAD_EXPR;
14084 0 : c2 = SAD_EXPR;
14085 0 : break;
14086 :
14087 169432 : case VEC_WIDEN_MULT_EVEN_EXPR:
14088 : /* Support the recursion induced just above. */
14089 169432 : c1 = VEC_WIDEN_MULT_EVEN_EXPR;
14090 169432 : c2 = VEC_WIDEN_MULT_ODD_EXPR;
14091 169432 : break;
14092 :
14093 9309 : case WIDEN_LSHIFT_EXPR:
14094 9309 : c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
14095 9309 : c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
14096 9309 : break;
14097 :
14098 34199 : CASE_CONVERT:
14099 34199 : c1 = VEC_UNPACK_LO_EXPR;
14100 34199 : c2 = VEC_UNPACK_HI_EXPR;
14101 34199 : break;
14102 :
14103 7207 : case FLOAT_EXPR:
14104 7207 : c1 = VEC_UNPACK_FLOAT_LO_EXPR;
14105 7207 : c2 = VEC_UNPACK_FLOAT_HI_EXPR;
14106 7207 : break;
14107 :
14108 110 : case FIX_TRUNC_EXPR:
14109 110 : c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
14110 110 : c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
14111 110 : break;
14112 :
14113 0 : default:
14114 0 : gcc_unreachable ();
14115 : }
14116 :
14117 351525 : if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
14118 : std::swap (c1, c2);
14119 :
14120 351525 : if (code == FIX_TRUNC_EXPR)
14121 : {
14122 : /* The signedness is determined from output operand. */
14123 110 : optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14124 110 : optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14125 : }
14126 622252 : else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
14127 34199 : && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14128 5777 : && VECTOR_BOOLEAN_TYPE_P (vectype)
14129 5777 : && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14130 299330 : && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14131 : {
14132 : /* If the input and result modes are the same, a different optab
14133 : is needed where we pass in the number of units in vectype. */
14134 : optab1 = vec_unpacks_sbool_lo_optab;
14135 : optab2 = vec_unpacks_sbool_hi_optab;
14136 : }
14137 :
14138 351525 : vec_mode = TYPE_MODE (vectype);
14139 351525 : if (widening_fn_p (code))
14140 : {
14141 : /* If this is an internal fn then we must check whether the target
14142 : supports either a low-high split or an even-odd split. */
14143 52398 : internal_fn ifn = as_internal_fn ((combined_fn) code);
14144 :
14145 52398 : internal_fn lo, hi, even, odd;
14146 52398 : lookup_hilo_internal_fn (ifn, &lo, &hi);
14147 52398 : if (BYTES_BIG_ENDIAN)
14148 : std::swap (lo, hi);
14149 52398 : *code1 = as_combined_fn (lo);
14150 52398 : *code2 = as_combined_fn (hi);
14151 52398 : optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
14152 52398 : optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
14153 :
14154 : /* If we don't support low-high, then check for even-odd. */
14155 52398 : if (!optab1
14156 52398 : || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14157 0 : || !optab2
14158 52398 : || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14159 : {
14160 52398 : lookup_evenodd_internal_fn (ifn, &even, &odd);
14161 52398 : *code1 = as_combined_fn (even);
14162 52398 : *code2 = as_combined_fn (odd);
14163 52398 : optab1 = direct_internal_fn_optab (even, {vectype, vectype});
14164 52398 : optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
14165 : }
14166 : }
14167 299127 : else if (code.is_tree_code ())
14168 : {
14169 299127 : if (code == FIX_TRUNC_EXPR)
14170 : {
14171 : /* The signedness is determined from output operand. */
14172 110 : optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14173 110 : optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14174 : }
14175 299017 : else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
14176 34199 : && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14177 5777 : && VECTOR_BOOLEAN_TYPE_P (vectype)
14178 5777 : && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14179 299330 : && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14180 : {
14181 : /* If the input and result modes are the same, a different optab
14182 : is needed where we pass in the number of units in vectype. */
14183 : optab1 = vec_unpacks_sbool_lo_optab;
14184 : optab2 = vec_unpacks_sbool_hi_optab;
14185 : }
14186 : else
14187 : {
14188 298704 : optab1 = optab_for_tree_code (c1, vectype, optab_default);
14189 298704 : optab2 = optab_for_tree_code (c2, vectype, optab_default);
14190 : }
14191 299127 : *code1 = c1;
14192 299127 : *code2 = c2;
14193 : }
14194 :
14195 351525 : if (!optab1 || !optab2)
14196 : return false;
14197 :
14198 351525 : if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14199 351525 : || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14200 208638 : return false;
14201 :
14202 :
14203 142887 : if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14204 142887 : && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14205 : {
14206 133236 : if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14207 : return true;
14208 : /* For scalar masks we may have different boolean
14209 : vector types having the same QImode. Thus we
14210 : add additional check for elements number. */
14211 2935 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
14212 : TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14213 : return true;
14214 : }
14215 :
14216 : /* Check if it's a multi-step conversion that can be done using intermediate
14217 : types. */
14218 :
14219 9772 : prev_type = vectype;
14220 9772 : prev_mode = vec_mode;
14221 :
14222 218651 : if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
14223 : return false;
14224 :
14225 : /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14226 : intermediate steps in promotion sequence. We try
14227 : MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14228 : not. */
14229 9720 : interm_types->create (MAX_INTERM_CVT_STEPS);
14230 10986 : for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14231 : {
14232 10986 : intermediate_mode = insn_data[icode1].operand[0].mode;
14233 10986 : if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14234 3714 : intermediate_type
14235 3714 : = vect_halve_mask_nunits (prev_type, intermediate_mode);
14236 7272 : else if (VECTOR_MODE_P (intermediate_mode))
14237 : {
14238 7272 : tree intermediate_element_type
14239 7272 : = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
14240 7272 : TYPE_UNSIGNED (prev_type));
14241 7272 : intermediate_type
14242 7272 : = build_vector_type_for_mode (intermediate_element_type,
14243 : intermediate_mode);
14244 7272 : }
14245 : else
14246 0 : intermediate_type
14247 0 : = lang_hooks.types.type_for_mode (intermediate_mode,
14248 0 : TYPE_UNSIGNED (prev_type));
14249 :
14250 10986 : if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14251 3714 : && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14252 3714 : && intermediate_mode == TYPE_MODE (wide_vectype)
14253 11143 : && SCALAR_INT_MODE_P (intermediate_mode))
14254 : {
14255 : /* If the input and result modes are the same, a different optab
14256 : is needed where we pass in the number of units in vectype. */
14257 : optab3 = vec_unpacks_sbool_lo_optab;
14258 : optab4 = vec_unpacks_sbool_hi_optab;
14259 : }
14260 : else
14261 : {
14262 10829 : optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
14263 10829 : optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
14264 : }
14265 :
14266 10986 : if (!optab3 || !optab4
14267 10986 : || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
14268 10970 : || insn_data[icode1].operand[0].mode != intermediate_mode
14269 10970 : || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
14270 10970 : || insn_data[icode2].operand[0].mode != intermediate_mode
14271 10970 : || ((icode1 = optab_handler (optab3, intermediate_mode))
14272 : == CODE_FOR_nothing)
14273 21731 : || ((icode2 = optab_handler (optab4, intermediate_mode))
14274 : == CODE_FOR_nothing))
14275 : break;
14276 :
14277 10745 : interm_types->quick_push (intermediate_type);
14278 10745 : (*multi_step_cvt)++;
14279 :
14280 10745 : if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14281 10745 : && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14282 : {
14283 9515 : if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14284 : return true;
14285 2780 : if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
14286 : TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14287 : return true;
14288 : }
14289 :
14290 1266 : prev_type = intermediate_type;
14291 1266 : prev_mode = intermediate_mode;
14292 : }
14293 :
14294 241 : interm_types->release ();
14295 241 : return false;
14296 : }
14297 :
14298 :
14299 : /* Function supportable_narrowing_operation
14300 :
14301 : Check whether an operation represented by the code CODE is a
14302 : narrowing operation that is supported by the target platform in
14303 : vector form (i.e., when operating on arguments of type VECTYPE_IN
14304 : and producing a result of type VECTYPE_OUT).
14305 :
14306 : Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14307 : and FLOAT. This function checks if these operations are supported by
14308 : the target platform directly via vector tree-codes.
14309 :
14310 : Output:
14311 : - CODE1 is the code of a vector operation to be used when
14312 : vectorizing the operation, if available.
14313 : - MULTI_STEP_CVT determines the number of required intermediate steps in
14314 : case of multi-step conversion (like int->short->char - in that case
14315 : MULTI_STEP_CVT will be 1).
14316 : - INTERM_TYPES contains the intermediate type required to perform the
14317 : narrowing operation (short in the above example). */
14318 :
14319 : bool
14320 34720 : supportable_narrowing_operation (code_helper code,
14321 : tree vectype_out, tree vectype_in,
14322 : code_helper *code1, int *multi_step_cvt,
14323 : vec<tree> *interm_types)
14324 : {
14325 34720 : machine_mode vec_mode;
14326 34720 : enum insn_code icode1;
14327 34720 : optab optab1, interm_optab;
14328 34720 : tree vectype = vectype_in;
14329 34720 : tree narrow_vectype = vectype_out;
14330 34720 : enum tree_code c1;
14331 34720 : tree intermediate_type, prev_type;
14332 34720 : machine_mode intermediate_mode, prev_mode;
14333 34720 : int i;
14334 34720 : unsigned HOST_WIDE_INT n_elts;
14335 34720 : bool uns;
14336 :
14337 34720 : if (!code.is_tree_code ())
14338 : return false;
14339 :
14340 34720 : *multi_step_cvt = 0;
14341 34720 : switch ((tree_code) code)
14342 : {
14343 34027 : CASE_CONVERT:
14344 34027 : c1 = VEC_PACK_TRUNC_EXPR;
14345 34027 : if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14346 9544 : && VECTOR_BOOLEAN_TYPE_P (vectype)
14347 9544 : && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14348 4546 : && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
14349 38573 : && n_elts < BITS_PER_UNIT)
14350 : optab1 = vec_pack_sbool_trunc_optab;
14351 : else
14352 32224 : optab1 = optab_for_tree_code (c1, vectype, optab_default);
14353 : break;
14354 :
14355 483 : case FIX_TRUNC_EXPR:
14356 483 : c1 = VEC_PACK_FIX_TRUNC_EXPR;
14357 : /* The signedness is determined from output operand. */
14358 483 : optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14359 483 : break;
14360 :
14361 210 : case FLOAT_EXPR:
14362 210 : c1 = VEC_PACK_FLOAT_EXPR;
14363 210 : optab1 = optab_for_tree_code (c1, vectype, optab_default);
14364 210 : break;
14365 :
14366 0 : default:
14367 0 : gcc_unreachable ();
14368 : }
14369 :
14370 34720 : if (!optab1)
14371 : return false;
14372 :
14373 34720 : vec_mode = TYPE_MODE (vectype);
14374 34720 : if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14375 : return false;
14376 :
14377 30722 : *code1 = c1;
14378 :
14379 30722 : if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14380 : {
14381 18682 : if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14382 : return true;
14383 : /* For scalar masks we may have different boolean
14384 : vector types having the same QImode. Thus we
14385 : add additional check for elements number. */
14386 4595 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14387 : TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14388 : return true;
14389 : }
14390 :
14391 12041 : if (code == FLOAT_EXPR)
14392 : return false;
14393 :
14394 : /* Check if it's a multi-step conversion that can be done using intermediate
14395 : types. */
14396 12041 : prev_mode = vec_mode;
14397 12041 : prev_type = vectype;
14398 12041 : if (code == FIX_TRUNC_EXPR)
14399 87 : uns = TYPE_UNSIGNED (vectype_out);
14400 : else
14401 11954 : uns = TYPE_UNSIGNED (vectype);
14402 :
14403 : /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14404 : conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14405 : costly than signed. */
14406 12041 : if (code == FIX_TRUNC_EXPR && uns)
14407 : {
14408 28 : enum insn_code icode2;
14409 :
14410 28 : intermediate_type
14411 28 : = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14412 28 : interm_optab
14413 28 : = optab_for_tree_code (c1, intermediate_type, optab_default);
14414 28 : if (interm_optab != unknown_optab
14415 28 : && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14416 28 : && insn_data[icode1].operand[0].mode
14417 28 : == insn_data[icode2].operand[0].mode)
14418 : {
14419 : uns = false;
14420 : optab1 = interm_optab;
14421 : icode1 = icode2;
14422 : }
14423 : }
14424 :
14425 : /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14426 : intermediate steps in promotion sequence. We try
14427 : MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14428 12041 : interm_types->create (MAX_INTERM_CVT_STEPS);
14429 26002 : for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14430 : {
14431 13961 : intermediate_mode = insn_data[icode1].operand[0].mode;
14432 13961 : if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14433 5915 : intermediate_type
14434 5915 : = vect_double_mask_nunits (prev_type, intermediate_mode);
14435 : else
14436 8046 : intermediate_type
14437 8046 : = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14438 13961 : if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14439 5915 : && VECTOR_BOOLEAN_TYPE_P (prev_type)
14440 5915 : && SCALAR_INT_MODE_P (prev_mode)
14441 2681 : && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14442 16642 : && n_elts < BITS_PER_UNIT)
14443 : interm_optab = vec_pack_sbool_trunc_optab;
14444 : else
14445 13844 : interm_optab
14446 13844 : = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14447 : optab_default);
14448 117 : if (!interm_optab
14449 13961 : || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14450 13961 : || insn_data[icode1].operand[0].mode != intermediate_mode
14451 27805 : || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14452 : == CODE_FOR_nothing))
14453 : break;
14454 :
14455 13068 : interm_types->quick_push (intermediate_type);
14456 13068 : (*multi_step_cvt)++;
14457 :
14458 13068 : if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14459 : {
14460 11148 : if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14461 : return true;
14462 3984 : if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14463 : TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14464 : return true;
14465 : }
14466 :
14467 1920 : prev_mode = intermediate_mode;
14468 1920 : prev_type = intermediate_type;
14469 1920 : optab1 = interm_optab;
14470 : }
14471 :
14472 893 : interm_types->release ();
14473 893 : return false;
14474 : }
14475 :
14476 : /* Function supportable_indirect_convert_operation
14477 :
14478 : Check whether an operation represented by the code CODE is single or multi
14479 : operations that are supported by the target platform in
14480 : vector form (i.e., when operating on arguments of type VECTYPE_IN
14481 : producing a result of type VECTYPE_OUT).
14482 :
14483 : Convert operations we currently support directly are FIX_TRUNC and FLOAT.
14484 : This function checks if these operations are supported
14485 : by the target platform directly (via vector tree-codes).
14486 :
14487 : Output:
14488 : - converts contains some pairs to perform the convert operation,
14489 : the pair's first is the intermediate type, and its second is the code of
14490 : a vector operation to be used when converting the operation from the
14491 : previous type to the intermediate type. */
14492 : bool
14493 71162 : supportable_indirect_convert_operation (code_helper code,
14494 : tree vectype_out,
14495 : tree vectype_in,
14496 : vec<std::pair<tree, tree_code> > &converts,
14497 : tree op0, slp_tree slp_op0)
14498 : {
14499 71162 : bool found_mode = false;
14500 71162 : scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
14501 71162 : scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
14502 71162 : tree_code tc1, tc2, code1, code2;
14503 :
14504 71162 : tree cvt_type = NULL_TREE;
14505 71162 : poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
14506 :
14507 71162 : if (supportable_convert_operation ((tree_code) code,
14508 : vectype_out,
14509 : vectype_in,
14510 : &tc1))
14511 : {
14512 16135 : converts.safe_push (std::make_pair (vectype_out, tc1));
14513 16135 : return true;
14514 : }
14515 :
14516 : /* For conversions between float and integer types try whether
14517 : we can use intermediate signed integer types to support the
14518 : conversion. */
14519 110054 : if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
14520 55027 : && (code == FLOAT_EXPR
14521 2975 : || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
14522 : {
14523 376 : bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
14524 188 : bool float_expr_p = code == FLOAT_EXPR;
14525 188 : unsigned short target_size;
14526 188 : scalar_mode intermediate_mode;
14527 188 : if (demotion)
14528 : {
14529 84 : intermediate_mode = lhs_mode;
14530 84 : target_size = GET_MODE_SIZE (rhs_mode);
14531 : }
14532 : else
14533 : {
14534 104 : target_size = GET_MODE_SIZE (lhs_mode);
14535 104 : if (!int_mode_for_size
14536 104 : (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
14537 122 : return false;
14538 : }
14539 188 : code1 = float_expr_p ? (tree_code) code : NOP_EXPR;
14540 : code2 = float_expr_p ? NOP_EXPR : (tree_code) code;
14541 188 : opt_scalar_mode mode_iter;
14542 292 : FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
14543 : {
14544 292 : intermediate_mode = mode_iter.require ();
14545 :
14546 584 : if (GET_MODE_SIZE (intermediate_mode) > target_size)
14547 : break;
14548 :
14549 264 : scalar_mode cvt_mode;
14550 264 : if (!int_mode_for_size
14551 264 : (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
14552 : break;
14553 :
14554 234 : cvt_type = build_nonstandard_integer_type
14555 234 : (GET_MODE_BITSIZE (cvt_mode), 0);
14556 :
14557 : /* Check if the intermediate type can hold OP0's range.
14558 : When converting from float to integer this is not necessary
14559 : because values that do not fit the (smaller) target type are
14560 : unspecified anyway. */
14561 234 : if (demotion && float_expr_p)
14562 : {
14563 8 : wide_int op_min_value, op_max_value;
14564 : /* For vector form, it looks like op0 doesn't have RANGE_INFO.
14565 : In the future, if it is supported, changes may need to be made
14566 : to this part, such as checking the RANGE of each element
14567 : in the vector. */
14568 8 : if (slp_op0)
14569 : {
14570 4 : tree def;
14571 : /* ??? Merge ranges in case of more than one lane. */
14572 4 : if (SLP_TREE_LANES (slp_op0) != 1
14573 0 : || !(def = vect_get_slp_scalar_def (slp_op0, 0))
14574 4 : || !vect_get_range_info (def,
14575 : &op_min_value, &op_max_value))
14576 : break;
14577 : }
14578 4 : else if (!op0
14579 0 : || TREE_CODE (op0) != SSA_NAME
14580 0 : || !SSA_NAME_RANGE_INFO (op0)
14581 4 : || !vect_get_range_info (op0, &op_min_value,
14582 : &op_max_value))
14583 : break;
14584 :
14585 0 : if (cvt_type == NULL_TREE
14586 0 : || (wi::min_precision (op_max_value, SIGNED)
14587 0 : > TYPE_PRECISION (cvt_type))
14588 0 : || (wi::min_precision (op_min_value, SIGNED)
14589 0 : > TYPE_PRECISION (cvt_type)))
14590 0 : continue;
14591 8 : }
14592 :
14593 226 : cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE (vectype_in),
14594 : cvt_type,
14595 : nelts);
14596 : /* This should only happened for SLP as long as loop vectorizer
14597 : only supports same-sized vector. */
14598 330 : if (cvt_type == NULL_TREE
14599 348 : || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
14600 226 : || !supportable_convert_operation ((tree_code) code1,
14601 : vectype_out,
14602 : cvt_type, &tc1)
14603 396 : || !supportable_convert_operation ((tree_code) code2,
14604 : cvt_type,
14605 : vectype_in, &tc2))
14606 104 : continue;
14607 :
14608 : found_mode = true;
14609 : break;
14610 : }
14611 :
14612 188 : if (found_mode)
14613 : {
14614 122 : converts.safe_push (std::make_pair (cvt_type, tc2));
14615 122 : if (TYPE_MODE (cvt_type) != TYPE_MODE (vectype_out))
14616 122 : converts.safe_push (std::make_pair (vectype_out, tc1));
14617 122 : return true;
14618 : }
14619 : }
14620 : return false;
14621 : }
14622 :
14623 : /* Generate and return a vector mask of MASK_TYPE such that
14624 : mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14625 : Add the statements to SEQ. */
14626 :
14627 : tree
14628 0 : vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14629 : tree end_index, const char *name)
14630 : {
14631 0 : tree cmp_type = TREE_TYPE (start_index);
14632 0 : gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14633 : cmp_type, mask_type,
14634 : OPTIMIZE_FOR_SPEED));
14635 0 : gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14636 : start_index, end_index,
14637 : build_zero_cst (mask_type));
14638 0 : tree tmp;
14639 0 : if (name)
14640 0 : tmp = make_temp_ssa_name (mask_type, NULL, name);
14641 : else
14642 0 : tmp = make_ssa_name (mask_type);
14643 0 : gimple_call_set_lhs (call, tmp);
14644 0 : gimple_seq_add_stmt (seq, call);
14645 0 : return tmp;
14646 : }
14647 :
14648 : /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14649 : J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14650 :
14651 : tree
14652 0 : vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14653 : tree end_index)
14654 : {
14655 0 : tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14656 0 : return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14657 : }
14658 :
14659 : /* Try to compute the vector types required to vectorize STMT_INFO,
14660 : returning true on success and false if vectorization isn't possible.
14661 : If GROUP_SIZE is nonzero and we're performing BB vectorization,
14662 : take sure that the number of elements in the vectors is no bigger
14663 : than GROUP_SIZE.
14664 :
14665 : On success:
14666 :
14667 : - Set *STMT_VECTYPE_OUT to:
14668 : - NULL_TREE if the statement doesn't need to be vectorized;
14669 : - the equivalent of STMT_VINFO_VECTYPE otherwise.
14670 :
14671 : - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14672 : number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14673 : statement does not help to determine the overall number of units. */
14674 :
14675 : opt_result
14676 5239811 : vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14677 : tree *stmt_vectype_out,
14678 : tree *nunits_vectype_out,
14679 : unsigned int group_size)
14680 : {
14681 5239811 : gimple *stmt = stmt_info->stmt;
14682 :
14683 : /* For BB vectorization, we should always have a group size once we've
14684 : constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14685 : are tentative requests during things like early data reference
14686 : analysis and pattern recognition. */
14687 5239811 : if (is_a <bb_vec_info> (vinfo))
14688 4415076 : gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14689 : else
14690 : group_size = 0;
14691 :
14692 5239811 : *stmt_vectype_out = NULL_TREE;
14693 5239811 : *nunits_vectype_out = NULL_TREE;
14694 :
14695 5239811 : if (gimple_get_lhs (stmt) == NULL_TREE
14696 : /* Allow vector conditionals through here. */
14697 1543 : && !is_a <gcond *> (stmt)
14698 : /* MASK_STORE and friends have no lhs, but are ok. */
14699 5242877 : && !(is_gimple_call (stmt)
14700 1543 : && gimple_call_internal_p (stmt)
14701 1523 : && internal_store_fn_p (gimple_call_internal_fn (stmt))))
14702 : {
14703 20 : if (is_a <gcall *> (stmt))
14704 : {
14705 : /* Ignore calls with no lhs. These must be calls to
14706 : #pragma omp simd functions, and what vectorization factor
14707 : it really needs can't be determined until
14708 : vectorizable_simd_clone_call. */
14709 20 : if (dump_enabled_p ())
14710 18 : dump_printf_loc (MSG_NOTE, vect_location,
14711 : "defer to SIMD clone analysis.\n");
14712 20 : return opt_result::success ();
14713 : }
14714 :
14715 0 : return opt_result::failure_at (stmt,
14716 : "not vectorized: irregular stmt: %G", stmt);
14717 : }
14718 :
14719 5239791 : tree vectype;
14720 5239791 : tree scalar_type = NULL_TREE;
14721 5239791 : if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14722 : {
14723 1348300 : vectype = STMT_VINFO_VECTYPE (stmt_info);
14724 1348300 : if (dump_enabled_p ())
14725 76472 : dump_printf_loc (MSG_NOTE, vect_location,
14726 : "precomputed vectype: %T\n", vectype);
14727 : }
14728 3891491 : else if (vect_use_mask_type_p (stmt_info))
14729 : {
14730 176439 : unsigned int precision = stmt_info->mask_precision;
14731 176439 : scalar_type = build_nonstandard_integer_type (precision, 1);
14732 176439 : vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14733 176439 : if (!vectype)
14734 0 : return opt_result::failure_at (stmt, "not vectorized: unsupported"
14735 : " data-type %T\n", scalar_type);
14736 176439 : if (dump_enabled_p ())
14737 4504 : dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14738 : }
14739 : else
14740 : {
14741 : /* If we got here with a gcond it means that the target had no available vector
14742 : mode for the scalar type. We can't vectorize so abort. */
14743 3715052 : if (is_a <gcond *> (stmt))
14744 0 : return opt_result::failure_at (stmt,
14745 : "not vectorized:"
14746 : " unsupported data-type for gcond %T\n",
14747 : scalar_type);
14748 :
14749 3715052 : if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14750 1440450 : scalar_type = TREE_TYPE (DR_REF (dr));
14751 : else
14752 2274602 : scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14753 :
14754 3715052 : if (dump_enabled_p ())
14755 : {
14756 60844 : if (group_size)
14757 7375 : dump_printf_loc (MSG_NOTE, vect_location,
14758 : "get vectype for scalar type (group size %d):"
14759 : " %T\n", group_size, scalar_type);
14760 : else
14761 53469 : dump_printf_loc (MSG_NOTE, vect_location,
14762 : "get vectype for scalar type: %T\n", scalar_type);
14763 : }
14764 3715052 : vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14765 3715052 : if (!vectype)
14766 193771 : return opt_result::failure_at (stmt,
14767 : "not vectorized:"
14768 : " unsupported data-type %T\n",
14769 : scalar_type);
14770 :
14771 3521281 : if (dump_enabled_p ())
14772 60651 : dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14773 : }
14774 :
14775 3774192 : if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14776 0 : return opt_result::failure_at (stmt,
14777 : "not vectorized: vector stmt in loop:%G",
14778 : stmt);
14779 :
14780 5046020 : *stmt_vectype_out = vectype;
14781 :
14782 : /* Don't try to compute scalar types if the stmt produces a boolean
14783 : vector; use the existing vector type instead. */
14784 5046020 : tree nunits_vectype = vectype;
14785 5046020 : if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14786 : {
14787 : /* The number of units is set according to the smallest scalar
14788 : type (or the largest vector size, but we only support one
14789 : vector size per vectorization). */
14790 4564404 : scalar_type = vect_get_smallest_scalar_type (stmt_info,
14791 4564404 : TREE_TYPE (vectype));
14792 4564404 : if (!types_compatible_p (scalar_type, TREE_TYPE (vectype)))
14793 : {
14794 970802 : if (dump_enabled_p ())
14795 9541 : dump_printf_loc (MSG_NOTE, vect_location,
14796 : "get vectype for smallest scalar type: %T\n",
14797 : scalar_type);
14798 970802 : nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14799 : group_size);
14800 970802 : if (!nunits_vectype)
14801 10 : return opt_result::failure_at
14802 10 : (stmt, "not vectorized: unsupported data-type %T\n",
14803 : scalar_type);
14804 970792 : if (dump_enabled_p ())
14805 9541 : dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14806 : nunits_vectype);
14807 : }
14808 : }
14809 :
14810 5046010 : if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14811 5046010 : TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14812 0 : return opt_result::failure_at (stmt,
14813 : "Not vectorized: Incompatible number "
14814 : "of vector subparts between %T and %T\n",
14815 : nunits_vectype, *stmt_vectype_out);
14816 :
14817 5046010 : if (dump_enabled_p ())
14818 : {
14819 141627 : dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14820 141627 : dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14821 141627 : dump_printf (MSG_NOTE, "\n");
14822 : }
14823 :
14824 5046010 : *nunits_vectype_out = nunits_vectype;
14825 5046010 : return opt_result::success ();
14826 : }
14827 :
14828 : /* Generate and return statement sequence that sets vector length LEN that is:
14829 :
14830 : min_of_start_and_end = min (START_INDEX, END_INDEX);
14831 : left_len = END_INDEX - min_of_start_and_end;
14832 : rhs = min (left_len, LEN_LIMIT);
14833 : LEN = rhs;
14834 :
14835 : Note: the cost of the code generated by this function is modeled
14836 : by vect_estimate_min_profitable_iters, so changes here may need
14837 : corresponding changes there. */
14838 :
14839 : gimple_seq
14840 0 : vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14841 : {
14842 0 : gimple_seq stmts = NULL;
14843 0 : tree len_type = TREE_TYPE (len);
14844 0 : gcc_assert (TREE_TYPE (start_index) == len_type);
14845 :
14846 0 : tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14847 0 : tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14848 0 : tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14849 0 : gimple* stmt = gimple_build_assign (len, rhs);
14850 0 : gimple_seq_add_stmt (&stmts, stmt);
14851 :
14852 0 : return stmts;
14853 : }
14854 :
|