Line data Source code
1 : /* Statement Analysis and Transformation for Vectorization
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 : Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 : and Ira Rosen <irar@il.ibm.com>
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify it under
9 : the terms of the GNU General Public License as published by the Free
10 : Software Foundation; either version 3, or (at your option) any later
11 : version.
12 :
13 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 : for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : #include "config.h"
23 : #include "system.h"
24 : #include "coretypes.h"
25 : #include "backend.h"
26 : #include "target.h"
27 : #include "rtl.h"
28 : #include "tree.h"
29 : #include "gimple.h"
30 : #include "ssa.h"
31 : #include "optabs-tree.h"
32 : #include "insn-config.h"
33 : #include "recog.h" /* FIXME: for insn_data */
34 : #include "cgraph.h"
35 : #include "dumpfile.h"
36 : #include "alias.h"
37 : #include "fold-const.h"
38 : #include "stor-layout.h"
39 : #include "tree-eh.h"
40 : #include "gimplify.h"
41 : #include "gimple-iterator.h"
42 : #include "gimplify-me.h"
43 : #include "tree-cfg.h"
44 : #include "tree-ssa-loop-manip.h"
45 : #include "cfgloop.h"
46 : #include "explow.h"
47 : #include "tree-ssa-loop.h"
48 : #include "tree-scalar-evolution.h"
49 : #include "tree-vectorizer.h"
50 : #include "builtins.h"
51 : #include "internal-fn.h"
52 : #include "tree-vector-builder.h"
53 : #include "vec-perm-indices.h"
54 : #include "gimple-range.h"
55 : #include "tree-ssa-loop-niter.h"
56 : #include "gimple-fold.h"
57 : #include "regs.h"
58 : #include "attribs.h"
59 : #include "optabs-libfuncs.h"
60 : #include "tree-dfa.h"
61 :
62 : /* For lang_hooks.types.type_for_mode. */
63 : #include "langhooks.h"
64 :
65 : static tree vector_vector_composition_type (tree, poly_uint64, tree *,
66 : bool = false);
67 :
68 : /* Return TRUE iff the given statement is in an inner loop relative to
69 : the loop being vectorized. */
70 : bool
71 5794829 : stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
72 : {
73 5794829 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
74 5794829 : basic_block bb = gimple_bb (stmt);
75 5794829 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
76 2749387 : class loop* loop;
77 :
78 2749387 : if (!loop_vinfo)
79 : return false;
80 :
81 2749387 : loop = LOOP_VINFO_LOOP (loop_vinfo);
82 :
83 2749387 : return (bb->loop_father == loop->inner);
84 : }
85 :
86 : /* Record the cost of a statement, either by directly informing the
87 : target model or by saving it in a vector for later processing.
88 : Return a preliminary estimate of the statement's cost. */
89 :
90 : unsigned
91 8834976 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
92 : enum vect_cost_for_stmt kind,
93 : stmt_vec_info stmt_info, slp_tree node,
94 : tree vectype, int misalign,
95 : enum vect_cost_model_location where)
96 : {
97 8834976 : if ((kind == vector_load || kind == unaligned_load)
98 1578519 : && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
99 : kind = vector_gather_load;
100 8834976 : if ((kind == vector_store || kind == unaligned_store)
101 1014860 : && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
102 8834976 : kind = vector_scatter_store;
103 :
104 8834976 : stmt_info_for_cost si
105 8834976 : = { count, kind, where, stmt_info, node, vectype, misalign };
106 8834976 : body_cost_vec->safe_push (si);
107 :
108 8834976 : return (unsigned)
109 8834976 : (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 : }
111 :
112 : unsigned
113 3966180 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
114 : enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
115 : tree vectype, int misalign,
116 : enum vect_cost_model_location where)
117 : {
118 3966180 : return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
119 3966180 : vectype, misalign, where);
120 : }
121 :
122 : unsigned
123 1806438 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
124 : enum vect_cost_for_stmt kind, slp_tree node,
125 : tree vectype, int misalign,
126 : enum vect_cost_model_location where)
127 : {
128 1806438 : return record_stmt_cost (body_cost_vec, count, kind,
129 : SLP_TREE_REPRESENTATIVE (node), node,
130 1806438 : vectype, misalign, where);
131 : }
132 :
133 : unsigned
134 0 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
135 : enum vect_cost_for_stmt kind,
136 : enum vect_cost_model_location where)
137 : {
138 0 : gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
139 : || kind == scalar_stmt);
140 0 : return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
141 0 : NULL_TREE, 0, where);
142 : }
143 :
144 : /* Return a variable of type ELEM_TYPE[NELEMS]. */
145 :
146 : static tree
147 0 : create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
148 : {
149 0 : return create_tmp_var (build_array_type_nelts (elem_type, nelems),
150 0 : "vect_array");
151 : }
152 :
153 : /* ARRAY is an array of vectors created by create_vector_array.
154 : Return an SSA_NAME for the vector in index N. The reference
155 : is part of the vectorization of STMT_INFO and the vector is associated
156 : with scalar destination SCALAR_DEST.
157 : If we need to ensure that inactive elements are set to zero,
158 : NEED_ZEROING is true, MASK contains the loop mask to be used. */
159 :
160 : static tree
161 0 : read_vector_array (vec_info *vinfo,
162 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
163 : tree scalar_dest, tree array, unsigned HOST_WIDE_INT n,
164 : bool need_zeroing, tree mask)
165 : {
166 0 : tree vect_type, vect, vect_name, tmp, tmp_name, array_ref;
167 0 : gimple *new_stmt;
168 :
169 0 : gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 0 : vect_type = TREE_TYPE (TREE_TYPE (array));
171 0 : tmp = vect_create_destination_var (scalar_dest, vect_type);
172 0 : vect = vect_create_destination_var (scalar_dest, vect_type);
173 0 : array_ref = build4 (ARRAY_REF, vect_type, array,
174 0 : build_int_cst (size_type_node, n),
175 : NULL_TREE, NULL_TREE);
176 :
177 0 : new_stmt = gimple_build_assign (tmp, array_ref);
178 0 : tmp_name = make_ssa_name (vect, new_stmt);
179 0 : gimple_assign_set_lhs (new_stmt, tmp_name);
180 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 :
182 0 : if (need_zeroing)
183 : {
184 0 : tree vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
185 : vect_type);
186 0 : vect_name = make_ssa_name (vect, new_stmt);
187 0 : new_stmt
188 0 : = gimple_build_assign (vect_name, VEC_COND_EXPR,
189 : mask, tmp_name, vec_els);
190 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
191 : }
192 : else
193 : vect_name = tmp_name;
194 :
195 0 : return vect_name;
196 : }
197 :
198 : /* ARRAY is an array of vectors created by create_vector_array.
199 : Emit code to store SSA_NAME VECT in index N of the array.
200 : The store is part of the vectorization of STMT_INFO. */
201 :
202 : static void
203 0 : write_vector_array (vec_info *vinfo,
204 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
205 : tree vect, tree array, unsigned HOST_WIDE_INT n)
206 : {
207 0 : tree array_ref;
208 0 : gimple *new_stmt;
209 :
210 0 : array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
211 0 : build_int_cst (size_type_node, n),
212 : NULL_TREE, NULL_TREE);
213 :
214 0 : new_stmt = gimple_build_assign (array_ref, vect);
215 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
216 0 : }
217 :
218 : /* PTR is a pointer to an array of type TYPE. Return a representation
219 : of *PTR. The memory reference replaces those in FIRST_DR
220 : (and its group). */
221 :
222 : static tree
223 0 : create_array_ref (tree type, tree ptr, tree alias_ptr_type)
224 : {
225 0 : tree mem_ref;
226 :
227 0 : mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
228 : /* Arrays have the same alignment as their type. */
229 0 : set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
230 0 : return mem_ref;
231 : }
232 :
233 : /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
234 : Emit the clobber before *GSI. */
235 :
236 : static void
237 15 : vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
238 : gimple_stmt_iterator *gsi, tree var)
239 : {
240 15 : tree clobber = build_clobber (TREE_TYPE (var));
241 15 : gimple *new_stmt = gimple_build_assign (var, clobber);
242 15 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
243 15 : }
244 :
245 : /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
246 :
247 : /* Function vect_mark_relevant.
248 :
249 : Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
250 :
251 : static void
252 3191800 : vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
253 : enum vect_relevant relevant, bool live_p)
254 : {
255 3191800 : enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
256 3191800 : bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
257 :
258 3191800 : if (dump_enabled_p ())
259 163237 : dump_printf_loc (MSG_NOTE, vect_location,
260 : "mark relevant %d, live %d: %G", relevant, live_p,
261 : stmt_info->stmt);
262 :
263 : /* If this stmt is an original stmt in a pattern, we might need to mark its
264 : related pattern stmt instead of the original stmt. However, such stmts
265 : may have their own uses that are not in any pattern, in such cases the
266 : stmt itself should be marked. */
267 3191800 : if (STMT_VINFO_IN_PATTERN_P (stmt_info))
268 : {
269 : /* This is the last stmt in a sequence that was detected as a
270 : pattern that can potentially be vectorized. Don't mark the stmt
271 : as relevant/live because it's not going to be vectorized.
272 : Instead mark the pattern-stmt that replaces it. */
273 :
274 240671 : if (dump_enabled_p ())
275 2773 : dump_printf_loc (MSG_NOTE, vect_location,
276 : "last stmt in pattern. don't mark"
277 : " relevant/live.\n");
278 :
279 240671 : stmt_vec_info old_stmt_info = stmt_info;
280 240671 : stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
281 240671 : gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
282 240671 : save_relevant = STMT_VINFO_RELEVANT (stmt_info);
283 240671 : save_live_p = STMT_VINFO_LIVE_P (stmt_info);
284 :
285 240671 : if (live_p && relevant == vect_unused_in_scope)
286 : {
287 110 : if (dump_enabled_p ())
288 10 : dump_printf_loc (MSG_NOTE, vect_location,
289 : "vec_stmt_relevant_p: forcing live pattern stmt "
290 : "relevant.\n");
291 : relevant = vect_used_only_live;
292 : }
293 :
294 240671 : if (dump_enabled_p ())
295 2773 : dump_printf_loc (MSG_NOTE, vect_location,
296 : "mark relevant %d, live %d: %G", relevant, live_p,
297 : stmt_info->stmt);
298 : }
299 :
300 3191800 : STMT_VINFO_LIVE_P (stmt_info) |= live_p;
301 3191800 : if (relevant > STMT_VINFO_RELEVANT (stmt_info))
302 2862072 : STMT_VINFO_RELEVANT (stmt_info) = relevant;
303 :
304 3191800 : if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
305 329728 : && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
306 : {
307 329010 : if (dump_enabled_p ())
308 19448 : dump_printf_loc (MSG_NOTE, vect_location,
309 : "already marked relevant/live.\n");
310 329010 : return;
311 : }
312 :
313 2862790 : worklist->safe_push (stmt_info);
314 : }
315 :
316 :
317 : /* Function is_simple_and_all_uses_invariant
318 :
319 : Return true if STMT_INFO is simple and all uses of it are invariant. */
320 :
321 : bool
322 247257 : is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
323 : loop_vec_info loop_vinfo)
324 : {
325 247257 : tree op;
326 247257 : ssa_op_iter iter;
327 :
328 440181 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
329 193752 : if (!stmt)
330 : return false;
331 :
332 201285 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
333 : {
334 200457 : enum vect_def_type dt = vect_uninitialized_def;
335 :
336 200457 : if (!vect_is_simple_use (op, loop_vinfo, &dt))
337 : {
338 5354 : if (dump_enabled_p ())
339 16 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 : "use not simple.\n");
341 192924 : return false;
342 : }
343 :
344 195103 : if (dt != vect_external_def && dt != vect_constant_def)
345 : return false;
346 : }
347 : return true;
348 : }
349 :
350 : /* Function vect_stmt_relevant_p.
351 :
352 : Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
353 : is "relevant for vectorization".
354 :
355 : A stmt is considered "relevant for vectorization" if:
356 : - it has uses outside the loop.
357 : - it has vdefs (it alters memory).
358 : - control stmts in the loop (except for the exit condition).
359 :
360 : CHECKME: what other side effects would the vectorizer allow? */
361 :
362 : static bool
363 5126682 : vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
364 : enum vect_relevant *relevant, bool *live_p)
365 : {
366 5126682 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
367 5126682 : ssa_op_iter op_iter;
368 5126682 : imm_use_iterator imm_iter;
369 5126682 : use_operand_p use_p;
370 5126682 : def_operand_p def_p;
371 :
372 5126682 : *relevant = vect_unused_in_scope;
373 5126682 : *live_p = false;
374 :
375 : /* cond stmt other than loop exit cond. */
376 5126682 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
377 5126682 : if (is_ctrl_stmt (stmt)
378 602027 : && LOOP_VINFO_LOOP_IV_COND (loop_vinfo) != stmt
379 5354210 : && (!loop->inner || gimple_bb (stmt)->loop_father == loop))
380 225541 : *relevant = vect_used_in_scope;
381 :
382 : /* changing memory. */
383 5126682 : if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
384 4253577 : if (gimple_vdef (stmt_info->stmt)
385 3651550 : && !gimple_clobber_p (stmt_info->stmt))
386 : {
387 366758 : if (dump_enabled_p ())
388 27834 : dump_printf_loc (MSG_NOTE, vect_location,
389 : "vec_stmt_relevant_p: stmt has vdefs.\n");
390 366758 : *relevant = vect_used_in_scope;
391 366758 : if (! STMT_VINFO_DATA_REF (stmt_info)
392 366758 : && zero_ssa_operands (stmt_info->stmt, SSA_OP_DEF))
393 20 : LOOP_VINFO_ALTERNATE_DEFS (loop_vinfo).safe_push (stmt_info);
394 : }
395 :
396 : /* uses outside the loop. */
397 14404775 : FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
398 : {
399 15254810 : FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
400 : {
401 6951988 : basic_block bb = gimple_bb (USE_STMT (use_p));
402 6951988 : if (!flow_bb_inside_loop_p (loop, bb))
403 : {
404 262059 : if (is_gimple_debug (USE_STMT (use_p)))
405 1081 : continue;
406 :
407 260978 : if (dump_enabled_p ())
408 5952 : dump_printf_loc (MSG_NOTE, vect_location,
409 : "vec_stmt_relevant_p: used out of loop.\n");
410 :
411 : /* We expect all such uses to be in the loop exit phis
412 : (because of loop closed form) */
413 260978 : gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
414 :
415 260978 : *live_p = true;
416 : }
417 4151411 : }
418 : }
419 :
420 247259 : if (*live_p && *relevant == vect_unused_in_scope
421 5373939 : && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
422 : {
423 246429 : if (dump_enabled_p ())
424 5808 : dump_printf_loc (MSG_NOTE, vect_location,
425 : "vec_stmt_relevant_p: stmt live but not relevant.\n");
426 246429 : *relevant = vect_used_only_live;
427 : }
428 :
429 5126682 : return (*live_p || *relevant);
430 : }
431 :
432 :
433 : /* Function exist_non_indexing_operands_for_use_p
434 :
435 : USE is one of the uses attached to STMT_INFO. Check if USE is
436 : used in STMT_INFO for anything other than indexing an array. */
437 :
438 : static bool
439 4277859 : exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
440 : {
441 4277859 : tree operand;
442 :
443 : /* USE corresponds to some operand in STMT. If there is no data
444 : reference in STMT, then any operand that corresponds to USE
445 : is not indexing an array. */
446 4277859 : if (!STMT_VINFO_DATA_REF (stmt_info))
447 : return true;
448 :
449 : /* STMT has a data_ref. FORNOW this means that its of one of
450 : the following forms:
451 : -1- ARRAY_REF = var
452 : -2- var = ARRAY_REF
453 : (This should have been verified in analyze_data_refs).
454 :
455 : 'var' in the second case corresponds to a def, not a use,
456 : so USE cannot correspond to any operands that are not used
457 : for array indexing.
458 :
459 : Therefore, all we need to check is if STMT falls into the
460 : first case, and whether var corresponds to USE. */
461 :
462 1458568 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
463 1440338 : if (!assign || !gimple_assign_copy_p (assign))
464 : {
465 781650 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
466 18230 : if (call && gimple_call_internal_p (call))
467 : {
468 18230 : internal_fn ifn = gimple_call_internal_fn (call);
469 18230 : int mask_index = internal_fn_mask_index (ifn);
470 18230 : if (mask_index >= 0
471 18230 : && use == gimple_call_arg (call, mask_index))
472 : return true;
473 11833 : int els_index = internal_fn_else_index (ifn);
474 11833 : if (els_index >= 0
475 11833 : && use == gimple_call_arg (call, els_index))
476 : return true;
477 10328 : int stored_value_index = internal_fn_stored_value_index (ifn);
478 10328 : if (stored_value_index >= 0
479 10328 : && use == gimple_call_arg (call, stored_value_index))
480 : return true;
481 8098 : if (internal_gather_scatter_fn_p (ifn)
482 8098 : && use == gimple_call_arg (call, 1))
483 : return true;
484 : }
485 771518 : return false;
486 : }
487 :
488 676918 : if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
489 : return false;
490 676918 : operand = gimple_assign_rhs1 (assign);
491 676918 : if (TREE_CODE (operand) != SSA_NAME)
492 : return false;
493 :
494 586032 : if (operand == use)
495 : return true;
496 :
497 : return false;
498 : }
499 :
500 :
501 : /*
502 : Function process_use.
503 :
504 : Inputs:
505 : - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
506 : - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
507 : that defined USE. This is done by calling mark_relevant and passing it
508 : the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
509 : - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
510 : be performed.
511 :
512 : Outputs:
513 : Generally, LIVE_P and RELEVANT are used to define the liveness and
514 : relevance info of the DEF_STMT of this USE:
515 : STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
516 : STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
517 : Exceptions:
518 : - case 1: If USE is used only for address computations (e.g. array indexing),
519 : which does not need to be directly vectorized, then the liveness/relevance
520 : of the respective DEF_STMT is left unchanged.
521 : - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
522 : we skip DEF_STMT cause it had already been processed.
523 : - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
524 : "relevant" will be modified accordingly.
525 :
526 : Return true if everything is as expected. Return false otherwise. */
527 :
528 : static opt_result
529 4333719 : process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
530 : enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
531 : bool force)
532 : {
533 4333719 : stmt_vec_info dstmt_vinfo;
534 4333719 : enum vect_def_type dt;
535 :
536 : /* case 1: we are only interested in uses that need to be vectorized. Uses
537 : that are used for address computation are not considered relevant. */
538 4333719 : if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
539 1171805 : return opt_result::success ();
540 :
541 3161914 : if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
542 34717 : return opt_result::failure_at (stmt_vinfo->stmt,
543 : "not vectorized:"
544 : " unsupported use in stmt.\n");
545 :
546 3127197 : if (!dstmt_vinfo)
547 588113 : return opt_result::success ();
548 :
549 2539084 : basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
550 2539084 : basic_block bb = gimple_bb (stmt_vinfo->stmt);
551 :
552 : /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
553 : We have to force the stmt live since the epilogue loop needs it to
554 : continue computing the reduction. */
555 2539084 : if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
556 267568 : && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
557 84582 : && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
558 84582 : && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
559 2623666 : && bb->loop_father == def_bb->loop_father)
560 : {
561 84582 : if (dump_enabled_p ())
562 3898 : dump_printf_loc (MSG_NOTE, vect_location,
563 : "reduc-stmt defining reduc-phi in the same nest.\n");
564 84582 : vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
565 84582 : return opt_result::success ();
566 : }
567 :
568 : /* case 3a: outer-loop stmt defining an inner-loop stmt:
569 : outer-loop-header-bb:
570 : d = dstmt_vinfo
571 : inner-loop:
572 : stmt # use (d)
573 : outer-loop-tail-bb:
574 : ... */
575 2454502 : if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
576 : {
577 2227 : if (dump_enabled_p ())
578 321 : dump_printf_loc (MSG_NOTE, vect_location,
579 : "outer-loop def-stmt defining inner-loop stmt.\n");
580 :
581 2227 : switch (relevant)
582 : {
583 0 : case vect_unused_in_scope:
584 0 : relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
585 : vect_used_in_scope : vect_unused_in_scope;
586 : break;
587 :
588 766 : case vect_used_in_outer_by_reduction:
589 766 : gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
590 : relevant = vect_used_by_reduction;
591 : break;
592 :
593 1181 : case vect_used_in_outer:
594 1181 : gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
595 : relevant = vect_used_in_scope;
596 : break;
597 :
598 : case vect_used_in_scope:
599 : break;
600 :
601 0 : default:
602 0 : gcc_unreachable ();
603 : }
604 : }
605 :
606 : /* case 3b: inner-loop stmt defining an outer-loop stmt:
607 : outer-loop-header-bb:
608 : ...
609 : inner-loop:
610 : d = dstmt_vinfo
611 : outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
612 : stmt # use (d) */
613 2452275 : else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
614 : {
615 2090 : if (dump_enabled_p ())
616 626 : dump_printf_loc (MSG_NOTE, vect_location,
617 : "inner-loop def-stmt defining outer-loop stmt.\n");
618 :
619 2090 : switch (relevant)
620 : {
621 0 : case vect_unused_in_scope:
622 0 : relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
623 0 : || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
624 : vect_used_in_outer_by_reduction : vect_unused_in_scope;
625 : break;
626 :
627 : case vect_used_by_reduction:
628 : case vect_used_only_live:
629 : relevant = vect_used_in_outer_by_reduction;
630 : break;
631 :
632 : case vect_used_in_scope:
633 2277860 : relevant = vect_used_in_outer;
634 : break;
635 :
636 0 : default:
637 0 : gcc_unreachable ();
638 : }
639 : }
640 : /* We are also not interested in uses on loop PHI backedges that are
641 : inductions. Otherwise we'll needlessly vectorize the IV increment
642 : and cause hybrid SLP for SLP inductions. */
643 2450185 : else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
644 179667 : && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
645 2626827 : && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
646 : loop_latch_edge (bb->loop_father))
647 : == use))
648 : {
649 176642 : if (dump_enabled_p ())
650 4846 : dump_printf_loc (MSG_NOTE, vect_location,
651 : "induction value on backedge.\n");
652 176642 : return opt_result::success ();
653 : }
654 :
655 2277860 : vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
656 2277860 : return opt_result::success ();
657 : }
658 :
659 :
660 : /* Function vect_mark_stmts_to_be_vectorized.
661 :
662 : Not all stmts in the loop need to be vectorized. For example:
663 :
664 : for i...
665 : for j...
666 : 1. T0 = i + j
667 : 2. T1 = a[T0]
668 :
669 : 3. j = j + 1
670 :
671 : Stmt 1 and 3 do not need to be vectorized, because loop control and
672 : addressing of vectorized data-refs are handled differently.
673 :
674 : This pass detects such stmts. */
675 :
676 : opt_result
677 429825 : vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
678 : {
679 429825 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
680 429825 : basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
681 429825 : unsigned int nbbs = loop->num_nodes;
682 429825 : gimple_stmt_iterator si;
683 429825 : unsigned int i;
684 429825 : basic_block bb;
685 429825 : bool live_p;
686 429825 : enum vect_relevant relevant;
687 :
688 429825 : DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
689 :
690 429825 : auto_vec<stmt_vec_info, 64> worklist;
691 :
692 : /* 1. Init worklist. */
693 1455333 : for (i = 0; i < nbbs; i++)
694 : {
695 1035850 : bb = bbs[i];
696 2127651 : for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
697 : {
698 2203998 : if (virtual_operand_p (gimple_phi_result (gsi_stmt (si))))
699 228894 : continue;
700 873105 : stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
701 873105 : if (dump_enabled_p ())
702 41482 : dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
703 : phi_info->stmt);
704 :
705 873105 : if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
706 : {
707 44625 : if (STMT_VINFO_DEF_TYPE (phi_info) == vect_unknown_def_type)
708 10198 : return opt_result::failure_at
709 10198 : (*si, "not vectorized: unhandled relevant PHI: %G", *si);
710 34427 : vect_mark_relevant (&worklist, phi_info, relevant, live_p);
711 : }
712 : }
713 7973043 : for (si = gsi_after_labels (bb); !gsi_end_p (si); gsi_next (&si))
714 : {
715 6947535 : gimple *stmt = gsi_stmt (si);
716 6947535 : if (is_gimple_debug (stmt))
717 2693814 : continue;
718 4253721 : stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
719 4253721 : if (dump_enabled_p ())
720 221698 : dump_printf_loc (MSG_NOTE, vect_location,
721 : "init: stmt relevant? %G", stmt);
722 :
723 4253721 : if (gimple_get_lhs (stmt) == NULL_TREE
724 608334 : && !is_a <gcond *> (stmt)
725 4260028 : && !is_a <gcall *> (stmt))
726 144 : return opt_result::failure_at
727 144 : (stmt, "not vectorized: irregular stmt: %G", stmt);
728 :
729 4253577 : if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
730 794931 : vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
731 : }
732 : }
733 :
734 : /* 2. Process_worklist */
735 3159383 : while (worklist.length () > 0)
736 : {
737 2774619 : use_operand_p use_p;
738 2774619 : ssa_op_iter iter;
739 :
740 2774619 : stmt_vec_info stmt_vinfo = worklist.pop ();
741 2774619 : if (dump_enabled_p ())
742 143179 : dump_printf_loc (MSG_NOTE, vect_location,
743 : "worklist: examine stmt: %G", stmt_vinfo->stmt);
744 :
745 : /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
746 : (DEF_STMT) as relevant/irrelevant according to the relevance property
747 : of STMT. */
748 2774619 : relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
749 :
750 : /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
751 : propagated as is to the DEF_STMTs of its USEs.
752 :
753 : One exception is when STMT has been identified as defining a reduction
754 : variable; in this case we set the relevance to vect_used_by_reduction.
755 : This is because we distinguish between two kinds of relevant stmts -
756 : those that are used by a reduction computation, and those that are
757 : (also) used by a regular computation. This allows us later on to
758 : identify stmts that are used solely by a reduction, and therefore the
759 : order of the results that they produce does not have to be kept. */
760 :
761 2774619 : switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
762 : {
763 171253 : case vect_reduction_def:
764 171253 : gcc_assert (relevant != vect_unused_in_scope);
765 171253 : if (relevant != vect_unused_in_scope
766 171253 : && relevant != vect_used_in_scope
767 171253 : && relevant != vect_used_by_reduction
768 171253 : && relevant != vect_used_only_live)
769 0 : return opt_result::failure_at
770 0 : (stmt_vinfo->stmt, "unsupported use of reduction.\n");
771 : break;
772 :
773 2204 : case vect_nested_cycle:
774 2204 : if (relevant != vect_unused_in_scope
775 2204 : && relevant != vect_used_in_outer_by_reduction
776 1614 : && relevant != vect_used_in_outer)
777 2 : return opt_result::failure_at
778 2 : (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
779 : break;
780 :
781 1194 : case vect_double_reduction_def:
782 1194 : if (relevant != vect_unused_in_scope
783 1194 : && relevant != vect_used_by_reduction
784 404 : && relevant != vect_used_only_live)
785 0 : return opt_result::failure_at
786 0 : (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
787 : break;
788 :
789 : default:
790 : break;
791 : }
792 :
793 2774617 : if (is_pattern_stmt_p (stmt_vinfo))
794 : {
795 : /* Pattern statements are not inserted into the code, so
796 : FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
797 : have to scan the RHS or function arguments instead. */
798 621140 : if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
799 : {
800 404808 : enum tree_code rhs_code = gimple_assign_rhs_code (assign);
801 404808 : tree op = gimple_assign_rhs1 (assign);
802 :
803 404808 : i = 1;
804 404808 : if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
805 : {
806 0 : opt_result res
807 0 : = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
808 : loop_vinfo, relevant, &worklist, false);
809 0 : if (!res)
810 0 : return res;
811 0 : res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
812 : loop_vinfo, relevant, &worklist, false);
813 0 : if (!res)
814 0 : return res;
815 : i = 2;
816 : }
817 1165186 : for (; i < gimple_num_ops (assign); i++)
818 : {
819 764096 : op = gimple_op (assign, i);
820 764096 : if (TREE_CODE (op) == SSA_NAME)
821 : {
822 581119 : opt_result res
823 581119 : = process_use (stmt_vinfo, op, loop_vinfo, relevant,
824 : &worklist, false);
825 581119 : if (!res)
826 3718 : return res;
827 : }
828 : }
829 : }
830 216332 : else if (gcond *cond = dyn_cast <gcond *> (stmt_vinfo->stmt))
831 : {
832 209699 : tree_code rhs_code = gimple_cond_code (cond);
833 209699 : gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
834 209699 : opt_result res
835 209699 : = process_use (stmt_vinfo, gimple_cond_lhs (cond),
836 : loop_vinfo, relevant, &worklist, false);
837 209699 : if (!res)
838 34719 : return res;
839 209699 : res = process_use (stmt_vinfo, gimple_cond_rhs (cond),
840 : loop_vinfo, relevant, &worklist, false);
841 209699 : if (!res)
842 0 : return res;
843 : }
844 6633 : else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
845 : {
846 31717 : for (i = 0; i < gimple_call_num_args (call); i++)
847 : {
848 25084 : tree arg = gimple_call_arg (call, i);
849 25084 : opt_result res
850 25084 : = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
851 : &worklist, false);
852 25084 : if (!res)
853 0 : return res;
854 : }
855 : }
856 : else
857 0 : gcc_unreachable ();
858 : }
859 : else
860 7541426 : FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
861 : {
862 3252258 : tree op = USE_FROM_PTR (use_p);
863 3252258 : opt_result res
864 3252258 : = process_use (stmt_vinfo, op, loop_vinfo, relevant,
865 : &worklist, false);
866 3252258 : if (!res)
867 17786 : return res;
868 : }
869 :
870 2753113 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
871 : {
872 55860 : gather_scatter_info gs_info;
873 55860 : if (!vect_check_gather_scatter (stmt_vinfo,
874 : STMT_VINFO_VECTYPE (stmt_vinfo),
875 : loop_vinfo, &gs_info))
876 0 : gcc_unreachable ();
877 55860 : opt_result res
878 55860 : = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
879 : &worklist, true);
880 55860 : if (!res)
881 : {
882 13213 : if (fatal)
883 13213 : *fatal = false;
884 13213 : return res;
885 : }
886 : }
887 : } /* while worklist */
888 :
889 384764 : return opt_result::success ();
890 429825 : }
891 :
892 : /* Function vect_model_simple_cost.
893 :
894 : Models cost for simple operations, i.e. those that only emit N operations
895 : of the same KIND. */
896 :
897 : static void
898 794694 : vect_model_simple_cost (vec_info *vinfo, int n, slp_tree node,
899 : stmt_vector_for_cost *cost_vec,
900 : vect_cost_for_stmt kind = vector_stmt)
901 : {
902 794694 : int inside_cost = 0, prologue_cost = 0;
903 :
904 794694 : gcc_assert (cost_vec != NULL);
905 :
906 794694 : n *= vect_get_num_copies (vinfo, node);
907 :
908 : /* Pass the inside-of-loop statements to the target-specific cost model. */
909 794694 : inside_cost += record_stmt_cost (cost_vec, n, kind, node, 0, vect_body);
910 :
911 794694 : if (dump_enabled_p ())
912 33117 : dump_printf_loc (MSG_NOTE, vect_location,
913 : "vect_model_simple_cost: inside_cost = %d, "
914 : "prologue_cost = %d .\n", inside_cost, prologue_cost);
915 794694 : }
916 :
917 :
918 : /* Model cost for type demotion and promotion operations. PWR is
919 : normally zero for single-step promotions and demotions. It will be
920 : one if two-step promotion/demotion is required, and so on. NCOPIES
921 : is the number of vector results (and thus number of instructions)
922 : for the narrowest end of the operation chain. Each additional
923 : step doubles the number of instructions required. If WIDEN_ARITH
924 : is true the stmt is doing widening arithmetic. */
925 :
926 : static void
927 68296 : vect_model_promotion_demotion_cost (slp_tree slp_node,
928 : unsigned int ncopies, int pwr,
929 : stmt_vector_for_cost *cost_vec,
930 : bool widen_arith)
931 : {
932 68296 : int i;
933 68296 : int inside_cost = 0, prologue_cost = 0;
934 :
935 159314 : for (i = 0; i < pwr + 1; i++)
936 : {
937 180314 : inside_cost += record_stmt_cost (cost_vec, ncopies,
938 : widen_arith
939 : ? vector_stmt : vec_promote_demote,
940 : slp_node, 0, vect_body);
941 91018 : ncopies *= 2;
942 : }
943 :
944 68296 : if (dump_enabled_p ())
945 6376 : dump_printf_loc (MSG_NOTE, vect_location,
946 : "vect_model_promotion_demotion_cost: inside_cost = %d, "
947 : "prologue_cost = %d .\n", inside_cost, prologue_cost);
948 68296 : }
949 :
950 : /* Returns true if the current function returns DECL. */
951 :
952 : static bool
953 554414 : cfun_returns (tree decl)
954 : {
955 554414 : edge_iterator ei;
956 554414 : edge e;
957 1091659 : FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
958 : {
959 1097878 : greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
960 548939 : if (!ret)
961 0 : continue;
962 548939 : if (gimple_return_retval (ret) == decl)
963 : return true;
964 : /* We often end up with an aggregate copy to the result decl,
965 : handle that case as well. First skip intermediate clobbers
966 : though. */
967 : gimple *def = ret;
968 1650787 : do
969 : {
970 3301574 : def = SSA_NAME_DEF_STMT (gimple_vuse (def));
971 : }
972 1650787 : while (gimple_clobber_p (def));
973 537988 : if (is_a <gassign *> (def)
974 61180 : && gimple_assign_lhs (def) == gimple_return_retval (ret)
975 545052 : && gimple_assign_rhs1 (def) == decl)
976 : return true;
977 : }
978 : return false;
979 : }
980 :
981 : /* Calculate cost of DR's memory access. */
982 : void
983 1004825 : vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node,
984 : int ncopies, dr_alignment_support alignment_support_scheme,
985 : int misalignment,
986 : unsigned int *inside_cost,
987 : stmt_vector_for_cost *body_cost_vec)
988 : {
989 1004825 : tree vectype
990 1004825 : = slp_node ? SLP_TREE_VECTYPE (slp_node) : STMT_VINFO_VECTYPE (stmt_info);
991 1004825 : switch (alignment_support_scheme)
992 : {
993 548182 : case dr_aligned:
994 548182 : {
995 548182 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
996 : vector_store, stmt_info, slp_node,
997 : vectype, 0, vect_body);
998 :
999 548182 : if (dump_enabled_p ())
1000 14497 : dump_printf_loc (MSG_NOTE, vect_location,
1001 : "vect_model_store_cost: aligned.\n");
1002 : break;
1003 : }
1004 :
1005 456643 : case dr_unaligned_supported:
1006 456643 : {
1007 : /* Here, we assign an additional cost for the unaligned store. */
1008 456643 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1009 : unaligned_store, stmt_info, slp_node,
1010 : vectype, misalignment, vect_body);
1011 456643 : if (dump_enabled_p ())
1012 12894 : dump_printf_loc (MSG_NOTE, vect_location,
1013 : "vect_model_store_cost: unaligned supported by "
1014 : "hardware.\n");
1015 : break;
1016 : }
1017 :
1018 0 : case dr_unaligned_unsupported:
1019 0 : {
1020 0 : *inside_cost = VECT_MAX_COST;
1021 :
1022 0 : if (dump_enabled_p ())
1023 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1024 : "vect_model_store_cost: unsupported access.\n");
1025 : break;
1026 : }
1027 :
1028 0 : default:
1029 0 : gcc_unreachable ();
1030 : }
1031 1004825 : }
1032 :
1033 : /* Calculate cost of DR's memory access. */
1034 : void
1035 922438 : vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node,
1036 : int ncopies, dr_alignment_support alignment_support_scheme,
1037 : int misalignment,
1038 : bool add_realign_cost, unsigned int *inside_cost,
1039 : unsigned int *prologue_cost,
1040 : stmt_vector_for_cost *prologue_cost_vec,
1041 : stmt_vector_for_cost *body_cost_vec,
1042 : bool record_prologue_costs)
1043 : {
1044 922438 : tree vectype
1045 922438 : = slp_node ? SLP_TREE_VECTYPE (slp_node) : STMT_VINFO_VECTYPE (stmt_info);
1046 922438 : switch (alignment_support_scheme)
1047 : {
1048 522118 : case dr_aligned:
1049 522118 : {
1050 522118 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1051 : stmt_info, slp_node, vectype,
1052 : 0, vect_body);
1053 :
1054 522118 : if (dump_enabled_p ())
1055 18796 : dump_printf_loc (MSG_NOTE, vect_location,
1056 : "vect_model_load_cost: aligned.\n");
1057 :
1058 : break;
1059 : }
1060 344777 : case dr_unaligned_supported:
1061 344777 : {
1062 : /* Here, we assign an additional cost for the unaligned load. */
1063 344777 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1064 : unaligned_load, stmt_info, slp_node,
1065 : vectype, misalignment, vect_body);
1066 :
1067 344777 : if (dump_enabled_p ())
1068 22250 : dump_printf_loc (MSG_NOTE, vect_location,
1069 : "vect_model_load_cost: unaligned supported by "
1070 : "hardware.\n");
1071 :
1072 : break;
1073 : }
1074 0 : case dr_explicit_realign:
1075 0 : {
1076 0 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1077 : vector_load, stmt_info, slp_node,
1078 : vectype, 0, vect_body);
1079 0 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1080 : vec_perm, stmt_info, slp_node,
1081 : vectype, 0, vect_body);
1082 :
1083 : /* FIXME: If the misalignment remains fixed across the iterations of
1084 : the containing loop, the following cost should be added to the
1085 : prologue costs. */
1086 0 : if (targetm.vectorize.builtin_mask_for_load)
1087 0 : *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1088 : stmt_info, slp_node, vectype,
1089 : 0, vect_body);
1090 :
1091 0 : if (dump_enabled_p ())
1092 0 : dump_printf_loc (MSG_NOTE, vect_location,
1093 : "vect_model_load_cost: explicit realign\n");
1094 :
1095 : break;
1096 : }
1097 0 : case dr_explicit_realign_optimized:
1098 0 : {
1099 0 : if (dump_enabled_p ())
1100 0 : dump_printf_loc (MSG_NOTE, vect_location,
1101 : "vect_model_load_cost: unaligned software "
1102 : "pipelined.\n");
1103 :
1104 : /* Unaligned software pipeline has a load of an address, an initial
1105 : load, and possibly a mask operation to "prime" the loop. However,
1106 : if this is an access in a group of loads, which provide grouped
1107 : access, then the above cost should only be considered for one
1108 : access in the group. Inside the loop, there is a load op
1109 : and a realignment op. */
1110 :
1111 0 : if (add_realign_cost && record_prologue_costs)
1112 : {
1113 0 : *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1114 : vector_stmt, stmt_info,
1115 : slp_node, vectype,
1116 : 0, vect_prologue);
1117 0 : if (targetm.vectorize.builtin_mask_for_load)
1118 0 : *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1119 : vector_stmt, stmt_info,
1120 : slp_node, vectype,
1121 : 0, vect_prologue);
1122 : }
1123 :
1124 0 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1125 : stmt_info, slp_node, vectype,
1126 : 0, vect_body);
1127 0 : *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1128 : stmt_info, slp_node, vectype,
1129 : 0, vect_body);
1130 :
1131 0 : if (dump_enabled_p ())
1132 0 : dump_printf_loc (MSG_NOTE, vect_location,
1133 : "vect_model_load_cost: explicit realign optimized"
1134 : "\n");
1135 :
1136 : break;
1137 : }
1138 :
1139 55543 : case dr_unaligned_unsupported:
1140 55543 : {
1141 55543 : *inside_cost = VECT_MAX_COST;
1142 :
1143 55543 : if (dump_enabled_p ())
1144 98 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1145 : "vect_model_load_cost: unsupported access.\n");
1146 : break;
1147 : }
1148 :
1149 0 : default:
1150 0 : gcc_unreachable ();
1151 : }
1152 922438 : }
1153 :
1154 : /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1155 : the loop preheader for the vectorized stmt STMT_VINFO. */
1156 :
1157 : static void
1158 6070 : vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1159 : gimple_stmt_iterator *gsi)
1160 : {
1161 6070 : if (gsi)
1162 2765 : vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1163 : else
1164 3305 : vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1165 :
1166 6070 : if (dump_enabled_p ())
1167 1807 : dump_printf_loc (MSG_NOTE, vect_location,
1168 : "created new init_stmt: %G", new_stmt);
1169 6070 : }
1170 :
1171 : /* Function vect_init_vector.
1172 :
1173 : Insert a new stmt (INIT_STMT) that initializes a new variable of type
1174 : TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1175 : vector type a vector with all elements equal to VAL is created first.
1176 : Place the initialization at GSI if it is not NULL. Otherwise, place the
1177 : initialization at the loop preheader.
1178 : Return the DEF of INIT_STMT.
1179 : It will be used in the vectorization of STMT_INFO. */
1180 :
1181 : tree
1182 4354 : vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1183 : gimple_stmt_iterator *gsi)
1184 : {
1185 4354 : gimple *init_stmt;
1186 4354 : tree new_temp;
1187 :
1188 : /* We abuse this function to push sth to a SSA name with initial 'val'. */
1189 4354 : if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1190 : {
1191 1344 : gcc_assert (VECTOR_TYPE_P (type));
1192 1344 : if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1193 : {
1194 : /* Scalar boolean value should be transformed into
1195 : all zeros or all ones value before building a vector. */
1196 8 : if (VECTOR_BOOLEAN_TYPE_P (type))
1197 : {
1198 0 : tree true_val = build_all_ones_cst (TREE_TYPE (type));
1199 0 : tree false_val = build_zero_cst (TREE_TYPE (type));
1200 :
1201 0 : if (CONSTANT_CLASS_P (val))
1202 0 : val = integer_zerop (val) ? false_val : true_val;
1203 : else
1204 : {
1205 0 : new_temp = make_ssa_name (TREE_TYPE (type));
1206 0 : init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1207 : val, true_val, false_val);
1208 0 : vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1209 0 : val = new_temp;
1210 : }
1211 : }
1212 : else
1213 : {
1214 8 : gimple_seq stmts = NULL;
1215 8 : if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1216 8 : val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1217 8 : TREE_TYPE (type), val);
1218 : else
1219 : /* ??? Condition vectorization expects us to do
1220 : promotion of invariant/external defs. */
1221 0 : val = gimple_convert (&stmts, TREE_TYPE (type), val);
1222 16 : for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1223 16 : !gsi_end_p (gsi2); )
1224 : {
1225 8 : init_stmt = gsi_stmt (gsi2);
1226 8 : gsi_remove (&gsi2, false);
1227 8 : vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1228 : }
1229 : }
1230 : }
1231 1344 : val = build_vector_from_val (type, val);
1232 : }
1233 :
1234 4354 : new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1235 4354 : init_stmt = gimple_build_assign (new_temp, val);
1236 4354 : vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1237 4354 : return new_temp;
1238 : }
1239 :
1240 :
1241 : /* Get vectorized definitions for OP0 and OP1. */
1242 :
1243 : void
1244 186366 : vect_get_vec_defs (vec_info *, slp_tree slp_node,
1245 : tree op0, vec<tree> *vec_oprnds0,
1246 : tree op1, vec<tree> *vec_oprnds1,
1247 : tree op2, vec<tree> *vec_oprnds2,
1248 : tree op3, vec<tree> *vec_oprnds3)
1249 : {
1250 186366 : if (op0)
1251 184713 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1252 186366 : if (op1)
1253 137356 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1254 186366 : if (op2)
1255 9213 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1256 186366 : if (op3)
1257 0 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1258 186366 : }
1259 :
1260 : /* Helper function called by vect_finish_replace_stmt and
1261 : vect_finish_stmt_generation. Set the location of the new
1262 : statement and create and return a stmt_vec_info for it. */
1263 :
1264 : static void
1265 1424167 : vect_finish_stmt_generation_1 (vec_info *,
1266 : stmt_vec_info stmt_info, gimple *vec_stmt)
1267 : {
1268 1424167 : if (dump_enabled_p ())
1269 147747 : dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1270 :
1271 1424167 : if (stmt_info)
1272 : {
1273 1392906 : gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1274 :
1275 : /* While EH edges will generally prevent vectorization, stmt might
1276 : e.g. be in a must-not-throw region. Ensure newly created stmts
1277 : that could throw are part of the same region. */
1278 1392906 : int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1279 1392906 : if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1280 48 : add_stmt_to_eh_lp (vec_stmt, lp_nr);
1281 : }
1282 : else
1283 31261 : gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1284 1424167 : }
1285 :
1286 : /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1287 : which sets the same scalar result as STMT_INFO did. Create and return a
1288 : stmt_vec_info for VEC_STMT. */
1289 :
1290 : void
1291 839 : vect_finish_replace_stmt (vec_info *vinfo,
1292 : stmt_vec_info stmt_info, gimple *vec_stmt)
1293 : {
1294 839 : gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1295 839 : gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1296 :
1297 839 : gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1298 839 : gsi_replace (&gsi, vec_stmt, true);
1299 :
1300 839 : vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1301 839 : }
1302 :
1303 : /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1304 : before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1305 :
1306 : void
1307 1423328 : vect_finish_stmt_generation (vec_info *vinfo,
1308 : stmt_vec_info stmt_info, gimple *vec_stmt,
1309 : gimple_stmt_iterator *gsi)
1310 : {
1311 1423328 : gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1312 :
1313 1423328 : if (!gsi_end_p (*gsi)
1314 2845381 : && gimple_has_mem_ops (vec_stmt))
1315 : {
1316 1422053 : gimple *at_stmt = gsi_stmt (*gsi);
1317 1422053 : tree vuse = gimple_vuse (at_stmt);
1318 1415761 : if (vuse && TREE_CODE (vuse) == SSA_NAME)
1319 : {
1320 1274756 : tree vdef = gimple_vdef (at_stmt);
1321 1274756 : gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1322 1274756 : gimple_set_modified (vec_stmt, true);
1323 : /* If we have an SSA vuse and insert a store, update virtual
1324 : SSA form to avoid triggering the renamer. Do so only
1325 : if we can easily see all uses - which is what almost always
1326 : happens with the way vectorized stmts are inserted. */
1327 750280 : if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1328 2025000 : && ((is_gimple_assign (vec_stmt)
1329 749372 : && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1330 65299 : || (is_gimple_call (vec_stmt)
1331 872 : && (!(gimple_call_flags (vec_stmt)
1332 872 : & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1333 1 : || (gimple_call_lhs (vec_stmt)
1334 1 : && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1335 : {
1336 685816 : tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1337 685816 : gimple_set_vdef (vec_stmt, new_vdef);
1338 685816 : SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1339 : }
1340 : }
1341 : }
1342 1423328 : gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1343 1423328 : vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1344 1423328 : }
1345 :
1346 : /* We want to vectorize a call to combined function CFN with function
1347 : decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1348 : as the types of all inputs. Check whether this is possible using
1349 : an internal function, returning its code if so or IFN_LAST if not. */
1350 :
1351 : static internal_fn
1352 16379 : vectorizable_internal_function (combined_fn cfn, tree fndecl,
1353 : tree vectype_out, tree vectype_in)
1354 : {
1355 16379 : internal_fn ifn;
1356 16379 : if (internal_fn_p (cfn))
1357 13875 : ifn = as_internal_fn (cfn);
1358 : else
1359 2504 : ifn = associated_internal_fn (fndecl);
1360 16379 : if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1361 : {
1362 12923 : const direct_internal_fn_info &info = direct_internal_fn (ifn);
1363 12923 : if (info.vectorizable)
1364 : {
1365 12923 : bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1366 12923 : tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1367 12923 : tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1368 :
1369 : /* The type size of both the vectype_in and vectype_out should be
1370 : exactly the same when vectype_out isn't participating the optab.
1371 : While there is no restriction for type size when vectype_out
1372 : is part of the optab query. */
1373 12923 : if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1374 : return IFN_LAST;
1375 :
1376 12903 : if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1377 : OPTIMIZE_FOR_SPEED))
1378 : return ifn;
1379 : }
1380 : }
1381 : return IFN_LAST;
1382 : }
1383 :
1384 :
1385 : static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1386 : gimple_stmt_iterator *);
1387 :
1388 : /* Check whether a load or store statement in the loop described by
1389 : LOOP_VINFO is possible in a loop using partial vectors. This is
1390 : testing whether the vectorizer pass has the appropriate support,
1391 : as well as whether the target does.
1392 :
1393 : VLS_TYPE says whether the statement is a load or store and VECTYPE
1394 : is the type of the vector being loaded or stored. SLP_NODE is the SLP
1395 : node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1396 : says how the load or store is going to be implemented and GROUP_SIZE
1397 : is the number of load or store statements in the containing group.
1398 : If the access is a gather load or scatter store, GS_INFO describes
1399 : its arguments. If the load or store is conditional, SCALAR_MASK is the
1400 : condition under which it occurs.
1401 :
1402 : Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1403 : vectors is not supported, otherwise record the required rgroup control
1404 : types.
1405 :
1406 : If partial vectors can be used and ELSVALS is nonzero the supported
1407 : else values will be added to the vector ELSVALS points to. */
1408 :
1409 : static void
1410 288160 : check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1411 : slp_tree slp_node,
1412 : vec_load_store_type vls_type,
1413 : int group_size,
1414 : vect_load_store_data *ls,
1415 : slp_tree mask_node,
1416 : vec<int> *elsvals = nullptr)
1417 : {
1418 288160 : vect_memory_access_type memory_access_type = ls->memory_access_type;
1419 :
1420 : /* Invariant loads need no special support. */
1421 288160 : if (memory_access_type == VMAT_INVARIANT)
1422 28756 : return;
1423 :
1424 : /* Figure whether the mask is uniform. scalar_mask is used to
1425 : populate the scalar_cond_masked_set. */
1426 286945 : tree scalar_mask = NULL_TREE;
1427 286945 : if (mask_node)
1428 4968 : for (unsigned i = 0; i < SLP_TREE_LANES (mask_node); ++i)
1429 : {
1430 2535 : tree def = vect_get_slp_scalar_def (mask_node, i);
1431 2535 : if (!def
1432 2535 : || (scalar_mask && def != scalar_mask))
1433 : {
1434 : scalar_mask = NULL;
1435 : break;
1436 : }
1437 : else
1438 2504 : scalar_mask = def;
1439 : }
1440 :
1441 286945 : unsigned int nvectors = vect_get_num_copies (loop_vinfo, slp_node);
1442 286945 : vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1443 286945 : vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1444 286945 : machine_mode vecmode = TYPE_MODE (vectype);
1445 286945 : bool is_load = (vls_type == VLS_LOAD);
1446 286945 : if (memory_access_type == VMAT_LOAD_STORE_LANES)
1447 : {
1448 0 : nvectors /= group_size;
1449 0 : internal_fn ifn
1450 0 : = (is_load ? vect_load_lanes_supported (vectype, group_size, true,
1451 : elsvals)
1452 0 : : vect_store_lanes_supported (vectype, group_size, true));
1453 0 : if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1454 0 : vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1455 0 : else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1456 0 : vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1457 : scalar_mask);
1458 : else
1459 : {
1460 0 : if (dump_enabled_p ())
1461 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1462 : "can't operate on partial vectors because"
1463 : " the target doesn't have an appropriate"
1464 : " load/store-lanes instruction.\n");
1465 0 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1466 : }
1467 0 : return;
1468 : }
1469 :
1470 286945 : if (mat_gather_scatter_p (memory_access_type))
1471 : {
1472 1750 : internal_fn ifn = (is_load
1473 1750 : ? IFN_MASK_GATHER_LOAD
1474 : : IFN_MASK_SCATTER_STORE);
1475 421 : internal_fn len_ifn = (is_load
1476 : ? IFN_MASK_LEN_GATHER_LOAD
1477 : : IFN_MASK_LEN_SCATTER_STORE);
1478 1750 : stmt_vec_info repr = SLP_TREE_REPRESENTATIVE (slp_node);
1479 1750 : tree off_vectype = (STMT_VINFO_GATHER_SCATTER_P (repr)
1480 1750 : ? SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0])
1481 1750 : : ls->strided_offset_vectype);
1482 1750 : tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr));
1483 1750 : int scale = SLP_TREE_GS_SCALE (slp_node);
1484 :
1485 : /* The following "supported" checks just verify what we established in
1486 : get_load_store_type and don't try different offset types.
1487 : Therefore, off_vectype must be a supported offset type. In case
1488 : we chose a different one use this instead. */
1489 1750 : if (ls->supported_offset_vectype)
1490 0 : off_vectype = ls->supported_offset_vectype;
1491 : /* Same for scale. */
1492 1750 : if (ls->supported_scale)
1493 0 : scale = ls->supported_scale;
1494 :
1495 1750 : if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1496 : memory_type,
1497 : off_vectype, scale,
1498 : elsvals))
1499 0 : vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1500 1750 : else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1501 : memory_type,
1502 : off_vectype, scale,
1503 : elsvals)
1504 1750 : || memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
1505 566 : vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1506 : scalar_mask);
1507 : else
1508 : {
1509 1184 : if (dump_enabled_p ())
1510 26 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1511 : "can't operate on partial vectors because"
1512 : " the target doesn't have an appropriate"
1513 : " gather load or scatter store instruction.\n");
1514 1184 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1515 : }
1516 1750 : return;
1517 : }
1518 :
1519 285195 : if (memory_access_type != VMAT_CONTIGUOUS)
1520 : {
1521 : /* Element X of the data must come from iteration i * VF + X of the
1522 : scalar loop. We need more work to support other mappings. */
1523 25791 : if (dump_enabled_p ())
1524 728 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1525 : "can't operate on partial vectors because an"
1526 : " access isn't contiguous.\n");
1527 25791 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1528 25791 : return;
1529 : }
1530 :
1531 259404 : if (!VECTOR_MODE_P (vecmode))
1532 : {
1533 0 : if (dump_enabled_p ())
1534 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1535 : "can't operate on partial vectors when emulating"
1536 : " vector operations.\n");
1537 0 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1538 0 : return;
1539 : }
1540 :
1541 : /* We might load more scalars than we need for permuting SLP loads.
1542 : We checked in get_load_store_type that the extra elements
1543 : don't leak into a new vector. */
1544 349225 : auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1545 : {
1546 89821 : unsigned int nvectors;
1547 179642 : if (can_div_away_from_zero_p (size, nunits, &nvectors))
1548 89821 : return nvectors;
1549 : gcc_unreachable ();
1550 : };
1551 :
1552 259404 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1553 259404 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1554 259404 : machine_mode mask_mode;
1555 259404 : machine_mode vmode;
1556 259404 : bool using_partial_vectors_p = false;
1557 259404 : if (get_len_load_store_mode
1558 259404 : (vecmode, is_load, nullptr, elsvals).exists (&vmode))
1559 : {
1560 0 : nvectors = group_memory_nvectors (group_size * vf, nunits);
1561 0 : unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1562 0 : vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1563 0 : using_partial_vectors_p = true;
1564 : }
1565 349225 : else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1566 259404 : && can_vec_mask_load_store_p (vecmode, mask_mode, is_load, NULL,
1567 : elsvals))
1568 : {
1569 89821 : nvectors = group_memory_nvectors (group_size * vf, nunits);
1570 89821 : vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1571 89821 : using_partial_vectors_p = true;
1572 : }
1573 :
1574 89821 : if (!using_partial_vectors_p)
1575 : {
1576 169583 : if (dump_enabled_p ())
1577 11625 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1578 : "can't operate on partial vectors because the"
1579 : " target doesn't have the appropriate partial"
1580 : " vectorization load or store.\n");
1581 169583 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1582 : }
1583 : }
1584 :
1585 : /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1586 : form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1587 : that needs to be applied to all loads and stores in a vectorized loop.
1588 : Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1589 : otherwise return VEC_MASK & LOOP_MASK.
1590 :
1591 : MASK_TYPE is the type of both masks. If new statements are needed,
1592 : insert them before GSI. */
1593 :
1594 : tree
1595 1702 : prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1596 : tree vec_mask, gimple_stmt_iterator *gsi)
1597 : {
1598 1702 : gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1599 1702 : if (!loop_mask)
1600 : return vec_mask;
1601 :
1602 139 : gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1603 :
1604 139 : if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1605 : return vec_mask;
1606 :
1607 139 : tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1608 139 : gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1609 : vec_mask, loop_mask);
1610 :
1611 139 : gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1612 139 : return and_res;
1613 : }
1614 :
1615 : /* Determine whether we can use a gather load or scatter store to vectorize
1616 : strided load or store STMT_INFO by truncating the current offset to a
1617 : smaller width. We need to be able to construct an offset vector:
1618 :
1619 : { 0, X, X*2, X*3, ... }
1620 :
1621 : without loss of precision, where X is STMT_INFO's DR_STEP.
1622 :
1623 : Return true if this is possible, describing the gather load or scatter
1624 : store in GS_INFO. MASKED_P is true if the load or store is conditional.
1625 :
1626 : If we can use gather/scatter and ELSVALS is nonzero the supported
1627 : else values will be stored in the vector ELSVALS points to. */
1628 :
1629 : static bool
1630 63871 : vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype,
1631 : loop_vec_info loop_vinfo, bool masked_p,
1632 : gather_scatter_info *gs_info,
1633 : vec<int> *elsvals)
1634 : {
1635 63871 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1636 63871 : data_reference *dr = dr_info->dr;
1637 63871 : tree step = DR_STEP (dr);
1638 63871 : if (TREE_CODE (step) != INTEGER_CST)
1639 : {
1640 : /* ??? Perhaps we could use range information here? */
1641 28464 : if (dump_enabled_p ())
1642 229 : dump_printf_loc (MSG_NOTE, vect_location,
1643 : "cannot truncate variable step.\n");
1644 28464 : return false;
1645 : }
1646 :
1647 : /* Get the number of bits in an element. */
1648 35407 : scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1649 35407 : unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1650 :
1651 : /* Set COUNT to the upper limit on the number of elements - 1.
1652 : Start with the maximum vectorization factor. */
1653 35407 : unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1654 :
1655 : /* Try lowering COUNT to the number of scalar latch iterations. */
1656 35407 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1657 35407 : widest_int max_iters;
1658 35407 : if (max_loop_iterations (loop, &max_iters)
1659 70101 : && max_iters < count)
1660 2085 : count = max_iters.to_shwi ();
1661 :
1662 : /* Try scales of 1 and the element size. */
1663 35407 : unsigned int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1664 35407 : wi::overflow_type overflow = wi::OVF_NONE;
1665 106221 : for (int i = 0; i < 2; ++i)
1666 : {
1667 70814 : unsigned int scale = scales[i];
1668 70814 : widest_int factor;
1669 70814 : if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1670 0 : continue;
1671 :
1672 : /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1673 70814 : widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1674 70814 : if (overflow)
1675 0 : continue;
1676 70814 : signop sign = range >= 0 ? UNSIGNED : SIGNED;
1677 70814 : unsigned int min_offset_bits = wi::min_precision (range, sign);
1678 :
1679 : /* Find the narrowest viable offset type. */
1680 70814 : unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1681 70814 : tree offset_type = build_nonstandard_integer_type (offset_bits,
1682 : sign == UNSIGNED);
1683 :
1684 : /* See whether the target supports the operation with an offset
1685 : no narrower than OFFSET_TYPE. */
1686 70814 : tree memory_type = TREE_TYPE (DR_REF (dr));
1687 70814 : tree tmp_offset_vectype;
1688 70814 : int tmp_scale;
1689 70814 : if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1690 : vectype, memory_type, offset_type,
1691 : scale, &tmp_scale,
1692 : &gs_info->ifn, &gs_info->offset_vectype,
1693 : &tmp_offset_vectype, elsvals)
1694 70814 : || gs_info->ifn == IFN_LAST)
1695 70814 : continue;
1696 :
1697 0 : gs_info->decl = NULL_TREE;
1698 : /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1699 : but we don't need to store that here. */
1700 0 : gs_info->base = NULL_TREE;
1701 0 : gs_info->alias_ptr = build_int_cst
1702 0 : (reference_alias_ptr_type (DR_REF (dr)),
1703 0 : get_object_alignment (DR_REF (dr)));
1704 0 : gs_info->element_type = TREE_TYPE (vectype);
1705 0 : gs_info->offset = fold_convert (offset_type, step);
1706 0 : gs_info->scale = scale;
1707 0 : gs_info->memory_type = memory_type;
1708 0 : return true;
1709 141628 : }
1710 :
1711 35407 : if (overflow && dump_enabled_p ())
1712 0 : dump_printf_loc (MSG_NOTE, vect_location,
1713 : "truncating gather/scatter offset to %d bits"
1714 : " might change its value.\n", element_bits);
1715 :
1716 : return false;
1717 35407 : }
1718 :
1719 : /* Return true if we can use gather/scatter or strided internal functions
1720 : to vectorize STMT_INFO, which is a grouped or strided load or store
1721 : with multiple lanes and will be implemented by a type-punned access
1722 : of a vector with element size that matches the number of lanes.
1723 :
1724 : MASKED_P is true if load or store is conditional.
1725 : When returning true, fill in GS_INFO with the information required to
1726 : perform the operation. Also, store the punning type in PUNNED_VECTYPE.
1727 :
1728 : If successful and ELSVALS is nonzero the supported
1729 : else values will be stored in the vector ELSVALS points to. */
1730 :
1731 : static bool
1732 4689 : vect_use_grouped_gather (dr_vec_info *dr_info, tree vectype,
1733 : loop_vec_info loop_vinfo, bool masked_p,
1734 : unsigned int nelts,
1735 : gather_scatter_info *info, vec<int> *elsvals,
1736 : tree *pun_vectype)
1737 : {
1738 4689 : data_reference *dr = dr_info->dr;
1739 :
1740 : /* TODO: We can support nelts > BITS_PER_UNIT or non-power-of-two by
1741 : multiple gathers/scatter. */
1742 9051 : if (nelts > BITS_PER_UNIT || !pow2p_hwi (nelts))
1743 : return false;
1744 :
1745 : /* Pun the vectype with one of the same size but an element spanning
1746 : NELTS elements of VECTYPE.
1747 : The punned type of a V16QI with NELTS = 4 would be V4SI.
1748 : */
1749 4061 : tree tmp;
1750 4061 : unsigned int pieces;
1751 4061 : if (!can_div_trunc_p (TYPE_VECTOR_SUBPARTS (vectype), nelts, &pieces)
1752 4061 : || !pieces)
1753 352 : return false;
1754 :
1755 3709 : *pun_vectype = vector_vector_composition_type (vectype, pieces, &tmp, true);
1756 :
1757 3709 : if (!*pun_vectype || !VECTOR_TYPE_P (*pun_vectype))
1758 : return false;
1759 :
1760 3335 : internal_fn ifn;
1761 3335 : tree offset_vectype = *pun_vectype;
1762 :
1763 2231 : internal_fn strided_ifn = DR_IS_READ (dr)
1764 3335 : ? IFN_MASK_LEN_STRIDED_LOAD : IFN_MASK_LEN_STRIDED_STORE;
1765 :
1766 : /* Check if we have a gather/scatter with the new type. We're just trying
1767 : with the type itself as offset for now. If not, check if we have a
1768 : strided load/store. These have fewer constraints (for example no offset
1769 : type must exist) so it is possible that even though a gather/scatter is
1770 : not available we still have a strided load/store. */
1771 3335 : bool ok = false;
1772 3335 : tree tmp_vectype;
1773 3335 : int tmp_scale;
1774 3335 : if (vect_gather_scatter_fn_p
1775 3335 : (loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
1776 3335 : TREE_TYPE (*pun_vectype), *pun_vectype, 1, &tmp_scale, &ifn,
1777 : &offset_vectype, &tmp_vectype, elsvals))
1778 : ok = true;
1779 3335 : else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
1780 : elsvals))
1781 : {
1782 : /* Use gather/scatter IFNs, vect_get_strided_load_store_ops
1783 : will switch back to the strided variants. */
1784 0 : ifn = DR_IS_READ (dr) ? IFN_MASK_LEN_GATHER_LOAD :
1785 : IFN_MASK_LEN_SCATTER_STORE;
1786 0 : ok = true;
1787 : }
1788 :
1789 0 : if (ok)
1790 : {
1791 0 : info->ifn = ifn;
1792 0 : info->decl = NULL_TREE;
1793 0 : info->base = dr->ref;
1794 0 : info->alias_ptr = build_int_cst
1795 0 : (reference_alias_ptr_type (DR_REF (dr)),
1796 0 : get_object_alignment (DR_REF (dr)));
1797 0 : info->element_type = TREE_TYPE (*pun_vectype);
1798 0 : info->offset_vectype = offset_vectype;
1799 : /* No need to set the offset, vect_get_strided_load_store_ops
1800 : will do that. */
1801 0 : info->scale = 1;
1802 0 : info->memory_type = TREE_TYPE (DR_REF (dr));
1803 0 : return true;
1804 : }
1805 :
1806 : return false;
1807 : }
1808 :
1809 :
1810 : /* Return true if we can use gather/scatter internal functions to
1811 : vectorize STMT_INFO, which is a grouped or strided load or store.
1812 : MASKED_P is true if load or store is conditional. When returning
1813 : true, fill in GS_INFO with the information required to perform the
1814 : operation.
1815 :
1816 : If we can use gather/scatter and ELSVALS is nonzero the supported
1817 : else values will be stored in the vector ELSVALS points to. */
1818 :
1819 : static bool
1820 63871 : vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype,
1821 : loop_vec_info loop_vinfo, bool masked_p,
1822 : gather_scatter_info *gs_info,
1823 : vec<int> *elsvals,
1824 : unsigned int group_size,
1825 : bool single_element_p)
1826 : {
1827 63871 : if (!vect_check_gather_scatter (stmt_info, vectype,
1828 : loop_vinfo, gs_info, elsvals)
1829 63871 : || gs_info->ifn == IFN_LAST)
1830 : {
1831 63871 : if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo,
1832 : masked_p, gs_info, elsvals))
1833 : return false;
1834 : }
1835 :
1836 0 : if (!single_element_p
1837 0 : && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype),
1838 : gs_info->scale,
1839 : group_size))
1840 : return false;
1841 :
1842 0 : if (dump_enabled_p ())
1843 0 : dump_printf_loc (MSG_NOTE, vect_location,
1844 : "using gather/scatter for strided/grouped access,"
1845 : " scale = %d\n", gs_info->scale);
1846 :
1847 : return true;
1848 : }
1849 :
1850 : /* STMT_INFO is a non-strided load or store, meaning that it accesses
1851 : elements with a known constant step. Return -1 if that step
1852 : is negative, 0 if it is zero, and 1 if it is greater than zero. */
1853 :
1854 : int
1855 1467204 : compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1856 : {
1857 1467204 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1858 1467204 : return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1859 1467204 : size_zero_node);
1860 : }
1861 :
1862 : /* If the target supports a permute mask that reverses the elements in
1863 : a vector of type VECTYPE, return that mask, otherwise return null. */
1864 :
1865 : tree
1866 9170 : perm_mask_for_reverse (tree vectype)
1867 : {
1868 9170 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1869 :
1870 : /* The encoding has a single stepped pattern. */
1871 9170 : vec_perm_builder sel (nunits, 1, 3);
1872 36680 : for (int i = 0; i < 3; ++i)
1873 27510 : sel.quick_push (nunits - 1 - i);
1874 :
1875 9170 : vec_perm_indices indices (sel, 1, nunits);
1876 9170 : if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1877 : indices))
1878 : return NULL_TREE;
1879 8018 : return vect_gen_perm_mask_checked (vectype, indices);
1880 9170 : }
1881 :
1882 : /* A subroutine of get_load_store_type, with a subset of the same
1883 : arguments. Handle the case where STMT_INFO is a load or store that
1884 : accesses consecutive elements with a negative step. Sets *POFFSET
1885 : to the offset to be applied to the DR for the first access. */
1886 :
1887 : static vect_memory_access_type
1888 12145 : get_negative_load_store_type (vec_info *vinfo,
1889 : stmt_vec_info stmt_info, tree vectype,
1890 : vec_load_store_type vls_type,
1891 : unsigned int ncopies, poly_int64 *poffset)
1892 : {
1893 12145 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1894 12145 : dr_alignment_support alignment_support_scheme;
1895 :
1896 12145 : if (ncopies > 1)
1897 : {
1898 0 : if (dump_enabled_p ())
1899 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 : "multiple types with negative step.\n");
1901 0 : return VMAT_ELEMENTWISE;
1902 : }
1903 :
1904 : /* For backward running DRs the first access in vectype actually is
1905 : N-1 elements before the address of the DR. */
1906 12145 : *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1907 12145 : * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1908 :
1909 12145 : int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1910 12145 : alignment_support_scheme
1911 12145 : = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1912 12145 : if (alignment_support_scheme != dr_aligned
1913 12145 : && alignment_support_scheme != dr_unaligned_supported)
1914 : {
1915 4378 : if (dump_enabled_p ())
1916 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1917 : "negative step but alignment required.\n");
1918 4378 : *poffset = 0;
1919 4378 : return VMAT_ELEMENTWISE;
1920 : }
1921 :
1922 7767 : if (vls_type == VLS_STORE_INVARIANT)
1923 : {
1924 1181 : if (dump_enabled_p ())
1925 21 : dump_printf_loc (MSG_NOTE, vect_location,
1926 : "negative step with invariant source;"
1927 : " no permute needed.\n");
1928 1181 : return VMAT_CONTIGUOUS_DOWN;
1929 : }
1930 :
1931 6586 : if (!perm_mask_for_reverse (vectype))
1932 : {
1933 1152 : if (dump_enabled_p ())
1934 52 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 : "negative step and reversing not supported.\n");
1936 1152 : *poffset = 0;
1937 1152 : return VMAT_ELEMENTWISE;
1938 : }
1939 :
1940 : return VMAT_CONTIGUOUS_REVERSE;
1941 : }
1942 :
1943 : /* STMT_INFO is either a masked or unconditional store. Return the value
1944 : being stored. */
1945 :
1946 : tree
1947 0 : vect_get_store_rhs (stmt_vec_info stmt_info)
1948 : {
1949 0 : if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1950 : {
1951 0 : gcc_assert (gimple_assign_single_p (assign));
1952 0 : return gimple_assign_rhs1 (assign);
1953 : }
1954 0 : if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1955 : {
1956 0 : internal_fn ifn = gimple_call_internal_fn (call);
1957 0 : int index = internal_fn_stored_value_index (ifn);
1958 0 : gcc_assert (index >= 0);
1959 0 : return gimple_call_arg (call, index);
1960 : }
1961 0 : gcc_unreachable ();
1962 : }
1963 :
1964 : /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1965 :
1966 : This function returns a vector type which can be composed with NELTS pieces,
1967 : whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1968 : same vector size as the return vector. It checks target whether supports
1969 : pieces-size vector mode for construction firstly, if target fails to, check
1970 : pieces-size scalar mode for construction further. It returns NULL_TREE if
1971 : fails to find the available composition. If the caller only wants scalar
1972 : pieces where PTYPE e.g. is a possible gather/scatter element type
1973 : SCALAR_PTYPE_ONLY must be true.
1974 :
1975 : For example, for (vtype=V16QI, nelts=4), we can probably get:
1976 : - V16QI with PTYPE V4QI.
1977 : - V4SI with PTYPE SI.
1978 : - NULL_TREE. */
1979 :
1980 : static tree
1981 13934 : vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype,
1982 : bool scalar_ptype_only)
1983 : {
1984 13934 : gcc_assert (VECTOR_TYPE_P (vtype));
1985 13934 : gcc_assert (known_gt (nelts, 0U));
1986 :
1987 13934 : machine_mode vmode = TYPE_MODE (vtype);
1988 13934 : if (!VECTOR_MODE_P (vmode))
1989 : return NULL_TREE;
1990 :
1991 : /* When we are asked to compose the vector from its components let
1992 : that happen directly. */
1993 13934 : if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1994 : {
1995 5945 : *ptype = TREE_TYPE (vtype);
1996 5945 : return vtype;
1997 : }
1998 :
1999 15978 : poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2000 7989 : unsigned int pbsize;
2001 7989 : if (constant_multiple_p (vbsize, nelts, &pbsize))
2002 : {
2003 : /* First check if vec_init optab supports construction from
2004 : vector pieces directly. */
2005 7989 : scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2006 15978 : poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2007 7989 : machine_mode rmode;
2008 7989 : if (!scalar_ptype_only
2009 4280 : && related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2010 11861 : && (convert_optab_handler (vec_init_optab, vmode, rmode)
2011 : != CODE_FOR_nothing))
2012 : {
2013 3218 : *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2014 3218 : return vtype;
2015 : }
2016 :
2017 : /* Otherwise check if exists an integer type of the same piece size and
2018 : if vec_init optab supports construction from it directly. */
2019 4771 : if (int_mode_for_size (pbsize, 0).exists (&elmode)
2020 4771 : && related_vector_mode (vmode, elmode, nelts).exists (&rmode))
2021 : {
2022 4367 : if (scalar_ptype_only
2023 4367 : || convert_optab_handler (vec_init_optab, rmode, elmode)
2024 : != CODE_FOR_nothing)
2025 : {
2026 4367 : *ptype = build_nonstandard_integer_type (pbsize, 1);
2027 4367 : return build_vector_type (*ptype, nelts);
2028 : }
2029 : }
2030 : }
2031 :
2032 : return NULL_TREE;
2033 : }
2034 :
2035 : /* Check if the load permutation of NODE only refers to a consecutive
2036 : subset of the group indices where GROUP_SIZE is the size of the
2037 : dataref's group. We also assert that the length of the permutation
2038 : divides the group size and is a power of two.
2039 : Such load permutations can be elided in strided access schemes as
2040 : we can "jump over" the gap they leave. */
2041 :
2042 : bool
2043 44993 : has_consecutive_load_permutation (slp_tree node, unsigned group_size)
2044 : {
2045 44993 : load_permutation_t perm = SLP_TREE_LOAD_PERMUTATION (node);
2046 44993 : if (!perm.exists ()
2047 1935 : || perm.length () <= 1
2048 491 : || !pow2p_hwi (perm.length ())
2049 45468 : || group_size % perm.length ())
2050 : return false;
2051 :
2052 428 : return vect_load_perm_consecutive_p (node);
2053 : }
2054 :
2055 :
2056 : /* Analyze load or store SLP_NODE of type VLS_TYPE. Return true
2057 : if there is a memory access type that the vectorized form can use,
2058 : storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2059 : or scatters, fill in GS_INFO accordingly. In addition
2060 : *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2061 : the target does not support the alignment scheme. *MISALIGNMENT
2062 : is set according to the alignment of the access (including
2063 : DR_MISALIGNMENT_UNKNOWN when it is unknown).
2064 :
2065 : MASKED_P is true if the statement is conditional on a vectorized mask.
2066 : VECTYPE is the vector type that the vectorized statements will use.
2067 :
2068 : If ELSVALS is nonzero the supported else values will be stored in the
2069 : vector ELSVALS points to. */
2070 :
2071 : static bool
2072 1353979 : get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2073 : tree vectype, slp_tree slp_node,
2074 : bool masked_p, vec_load_store_type vls_type,
2075 : vect_load_store_data *ls)
2076 : {
2077 1353979 : vect_memory_access_type *memory_access_type = &ls->memory_access_type;
2078 1353979 : poly_int64 *poffset = &ls->poffset;
2079 1353979 : dr_alignment_support *alignment_support_scheme
2080 : = &ls->alignment_support_scheme;
2081 1353979 : int *misalignment = &ls->misalignment;
2082 1353979 : internal_fn *lanes_ifn = &ls->lanes_ifn;
2083 1353979 : vec<int> *elsvals = &ls->elsvals;
2084 1353979 : tree *ls_type = &ls->ls_type;
2085 1353979 : bool *slp_perm = &ls->slp_perm;
2086 1353979 : unsigned *n_perms = &ls->n_perms;
2087 1353979 : unsigned *n_loads = &ls->n_loads;
2088 1353979 : tree *supported_offset_vectype = &ls->supported_offset_vectype;
2089 1353979 : int *supported_scale = &ls->supported_scale;
2090 1353979 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2091 1353979 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2092 1353979 : class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2093 1353979 : stmt_vec_info first_stmt_info;
2094 1353979 : unsigned int group_size;
2095 1353979 : unsigned HOST_WIDE_INT gap;
2096 1353979 : bool single_element_p;
2097 1353979 : poly_int64 neg_ldst_offset = 0;
2098 :
2099 1353979 : *misalignment = DR_MISALIGNMENT_UNKNOWN;
2100 1353979 : *poffset = 0;
2101 1353979 : *ls_type = NULL_TREE;
2102 1353979 : *slp_perm = false;
2103 1353979 : *n_perms = -1U;
2104 1353979 : *n_loads = -1U;
2105 1353979 : ls->subchain_p = false;
2106 :
2107 1353979 : bool perm_ok = true;
2108 1353979 : poly_int64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
2109 :
2110 1353979 : if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2111 72814 : perm_ok = vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
2112 72814 : vf, true, n_perms, n_loads);
2113 :
2114 1353979 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2115 : {
2116 868973 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2117 868973 : group_size = DR_GROUP_SIZE (first_stmt_info);
2118 868973 : gap = DR_GROUP_GAP (first_stmt_info);
2119 868973 : single_element_p = (stmt_info == first_stmt_info
2120 868973 : && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2121 : }
2122 : else
2123 : {
2124 : first_stmt_info = stmt_info;
2125 : group_size = 1;
2126 : gap = 0;
2127 : single_element_p = true;
2128 : }
2129 1353979 : dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2130 :
2131 : /* True if the vectorized statements would access beyond the last
2132 : statement in the group. */
2133 1353979 : bool overrun_p = false;
2134 :
2135 : /* True if we can cope with such overrun by peeling for gaps, so that
2136 : there is at least one final scalar iteration after the vector loop. */
2137 2707958 : bool can_overrun_p = (!masked_p
2138 1353979 : && vls_type == VLS_LOAD
2139 539932 : && loop_vinfo
2140 1763884 : && !loop->inner);
2141 :
2142 : /* There can only be a gap at the end of the group if the stride is
2143 : known at compile time. */
2144 1353979 : gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2145 :
2146 : /* For SLP vectorization we directly vectorize a subchain
2147 : without permutation. */
2148 1353979 : if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2149 1281165 : first_dr_info = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2150 :
2151 1353979 : if (STMT_VINFO_STRIDED_P (first_stmt_info))
2152 : {
2153 : /* Try to use consecutive accesses of as many elements as possible,
2154 : separated by the stride, until we have a complete vector.
2155 : Fall back to scalar accesses if that isn't possible. */
2156 44993 : *memory_access_type = VMAT_STRIDED_SLP;
2157 :
2158 : /* If the load permutation is consecutive we can reduce the group to
2159 : the elements the permutation accesses. Then we release the
2160 : permutation. */
2161 44993 : if (has_consecutive_load_permutation (slp_node, group_size))
2162 : {
2163 32 : ls->subchain_p = true;
2164 32 : group_size = SLP_TREE_LANES (slp_node);
2165 32 : SLP_TREE_LOAD_PERMUTATION (slp_node).release ();
2166 : }
2167 : }
2168 1308986 : else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2169 : {
2170 10848 : slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
2171 10848 : tree offset_vectype = SLP_TREE_VECTYPE (offset_node);
2172 10848 : int scale = SLP_TREE_GS_SCALE (slp_node);
2173 10848 : tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
2174 10848 : tree tem;
2175 10848 : if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
2176 : masked_p, vectype, memory_type,
2177 : offset_vectype, scale, supported_scale,
2178 : &ls->gs.ifn, &tem,
2179 : supported_offset_vectype, elsvals))
2180 : {
2181 0 : if (dump_enabled_p ())
2182 : {
2183 0 : dump_printf_loc (MSG_NOTE, vect_location,
2184 : "gather/scatter with required "
2185 : "offset type "
2186 : "%T and offset scale %d.\n",
2187 : offset_vectype, scale);
2188 0 : if (*supported_offset_vectype)
2189 0 : dump_printf_loc (MSG_NOTE, vect_location,
2190 : " target supports offset type %T.\n",
2191 : *supported_offset_vectype);
2192 0 : if (*supported_scale)
2193 0 : dump_printf_loc (MSG_NOTE, vect_location,
2194 : " target supports offset scale %d.\n",
2195 : *supported_scale);
2196 : }
2197 0 : *memory_access_type = VMAT_GATHER_SCATTER_IFN;
2198 : }
2199 10848 : else if (vls_type == VLS_LOAD
2200 10848 : ? (targetm.vectorize.builtin_gather
2201 9282 : && (ls->gs.decl
2202 9282 : = targetm.vectorize.builtin_gather (vectype,
2203 9282 : TREE_TYPE
2204 : (offset_vectype),
2205 : scale)))
2206 1566 : : (targetm.vectorize.builtin_scatter
2207 1566 : && (ls->gs.decl
2208 1566 : = targetm.vectorize.builtin_scatter (vectype,
2209 1566 : TREE_TYPE
2210 : (offset_vectype),
2211 : scale))))
2212 574 : *memory_access_type = VMAT_GATHER_SCATTER_LEGACY;
2213 : else
2214 : {
2215 : /* GATHER_SCATTER_EMULATED_P. */
2216 10274 : if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2217 10274 : || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant ()
2218 10274 : || VECTOR_BOOLEAN_TYPE_P (offset_vectype)
2219 10274 : || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype),
2220 10274 : TYPE_VECTOR_SUBPARTS (vectype)))
2221 : {
2222 2692 : if (dump_enabled_p ())
2223 466 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2224 : "unsupported vector types for emulated "
2225 : "gather.\n");
2226 2692 : return false;
2227 : }
2228 7582 : *memory_access_type = VMAT_GATHER_SCATTER_EMULATED;
2229 : }
2230 : }
2231 : else
2232 : {
2233 1298138 : int cmp = compare_step_with_zero (vinfo, stmt_info);
2234 1298138 : if (cmp < 0)
2235 : {
2236 12327 : if (single_element_p)
2237 : /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2238 : only correct for single element "interleaving" SLP. */
2239 12145 : *memory_access_type = get_negative_load_store_type
2240 12145 : (vinfo, stmt_info, vectype, vls_type, 1,
2241 : &neg_ldst_offset);
2242 : else
2243 : /* We can fall back to VMAT_STRIDED_SLP since that does
2244 : not care whether the stride between the group instances
2245 : is positive or negative. */
2246 182 : *memory_access_type = VMAT_STRIDED_SLP;
2247 : }
2248 1285811 : else if (cmp == 0 && loop_vinfo)
2249 : {
2250 3365 : gcc_assert (vls_type == VLS_LOAD);
2251 3365 : *memory_access_type = VMAT_INVARIANT;
2252 : }
2253 : /* Try using LOAD/STORE_LANES. */
2254 1282446 : else if (slp_node->ldst_lanes
2255 1282446 : && (*lanes_ifn
2256 0 : = (vls_type == VLS_LOAD
2257 0 : ? vect_load_lanes_supported (vectype, group_size,
2258 : masked_p, elsvals)
2259 0 : : vect_store_lanes_supported (vectype, group_size,
2260 : masked_p))) != IFN_LAST)
2261 0 : *memory_access_type = VMAT_LOAD_STORE_LANES;
2262 1282446 : else if (!loop_vinfo && slp_node->avoid_stlf_fail)
2263 : {
2264 70 : *memory_access_type = VMAT_ELEMENTWISE;
2265 70 : if (dump_enabled_p ())
2266 2 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2267 : "using element-wise load to avoid disrupting "
2268 : "cross iteration store-to-load forwarding\n");
2269 : }
2270 : else
2271 1282376 : *memory_access_type = VMAT_CONTIGUOUS;
2272 :
2273 : /* If this is single-element interleaving with an element
2274 : distance that leaves unused vector loads around fall back
2275 : to elementwise access if possible - we otherwise least
2276 : create very sub-optimal code in that case (and
2277 : blow up memory, see PR65518). */
2278 1298138 : if (loop_vinfo
2279 1298138 : && single_element_p
2280 465433 : && (*memory_access_type == VMAT_CONTIGUOUS
2281 15510 : || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
2282 1763571 : && maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
2283 : {
2284 17804 : *memory_access_type = VMAT_ELEMENTWISE;
2285 17804 : if (dump_enabled_p ())
2286 198 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2287 : "single-element interleaving not supported "
2288 : "for not adjacent vector loads, using "
2289 : "elementwise access\n");
2290 : }
2291 :
2292 : /* Also fall back to elementwise access in case we did not lower a
2293 : permutation and cannot code generate it. */
2294 1298138 : if (loop_vinfo
2295 519945 : && *memory_access_type != VMAT_ELEMENTWISE
2296 496611 : && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2297 1326620 : && !perm_ok)
2298 : {
2299 2053 : *memory_access_type = VMAT_ELEMENTWISE;
2300 2053 : if (dump_enabled_p ())
2301 246 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 : "permutation not supported, using elementwise "
2303 : "access\n");
2304 : }
2305 :
2306 519945 : overrun_p = (loop_vinfo && gap != 0
2307 1341038 : && *memory_access_type != VMAT_ELEMENTWISE);
2308 1298138 : if (overrun_p && vls_type != VLS_LOAD)
2309 : {
2310 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2311 : "Grouped store with gaps requires"
2312 : " non-consecutive accesses\n");
2313 9 : return false;
2314 : }
2315 :
2316 1298138 : unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (first_dr_info);
2317 1298138 : poly_int64 off = 0;
2318 1298138 : if (*memory_access_type == VMAT_CONTIGUOUS_REVERSE)
2319 5275 : off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
2320 :
2321 : /* An overrun is fine if the trailing elements are smaller
2322 : than the alignment boundary B. Every vector access will
2323 : be a multiple of B and so we are guaranteed to access a
2324 : non-gap element in the same B-sized block. */
2325 1298138 : if (overrun_p
2326 1298138 : && gap < (vect_known_alignment_in_bytes (first_dr_info,
2327 22956 : vectype, off) / dr_size))
2328 : overrun_p = false;
2329 :
2330 : /* When we have a contiguous access across loop iterations
2331 : but the access in the loop doesn't cover the full vector
2332 : we can end up with no gap recorded but still excess
2333 : elements accessed, see PR103116. Make sure we peel for
2334 : gaps if necessary and sufficient and give up if not.
2335 :
2336 : If there is a combination of the access not covering the full
2337 : vector and a gap recorded then we may need to peel twice. */
2338 1298138 : bool large_vector_overrun_p = false;
2339 1298138 : if (loop_vinfo
2340 519945 : && (*memory_access_type == VMAT_CONTIGUOUS
2341 35387 : || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
2342 489833 : && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2343 1324150 : && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2344 : nunits))
2345 : large_vector_overrun_p = overrun_p = true;
2346 :
2347 : /* If the gap splits the vector in half and the target
2348 : can do half-vector operations avoid the epilogue peeling
2349 : by simply loading half of the vector only. Usually
2350 : the construction with an upper zero half will be elided. */
2351 1298138 : dr_alignment_support alss;
2352 1298138 : int misalign = dr_misalignment (first_dr_info, vectype, off);
2353 1298138 : tree half_vtype;
2354 1298138 : poly_uint64 remain;
2355 1298138 : unsigned HOST_WIDE_INT tem, num;
2356 1298138 : if (overrun_p
2357 1298138 : && !masked_p
2358 17437 : && *memory_access_type != VMAT_LOAD_STORE_LANES
2359 17437 : && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2360 : vectype, misalign)))
2361 : == dr_aligned
2362 14961 : || alss == dr_unaligned_supported)
2363 9883 : && can_div_trunc_p (group_size
2364 9883 : * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
2365 : nunits, &tem, &remain)
2366 1308021 : && (known_eq (remain, 0u)
2367 7428 : || (known_ne (remain, 0u)
2368 5767 : && constant_multiple_p (nunits, remain, &num)
2369 1295683 : && (vector_vector_composition_type (vectype, num, &half_vtype)
2370 : != NULL_TREE))))
2371 8222 : overrun_p = false;
2372 :
2373 1298138 : if (overrun_p && !can_overrun_p)
2374 : {
2375 6 : if (dump_enabled_p ())
2376 6 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 : "Peeling for outer loop is not supported\n");
2378 6 : return false;
2379 : }
2380 :
2381 : /* Peeling for gaps assumes that a single scalar iteration
2382 : is enough to make sure the last vector iteration doesn't
2383 : access excess elements. */
2384 1298132 : if (overrun_p
2385 1298132 : && (!can_div_trunc_p (group_size
2386 9209 : * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
2387 : nunits, &tem, &remain)
2388 9209 : || maybe_lt (remain + group_size, nunits)))
2389 : {
2390 : /* But peeling a single scalar iteration is enough if
2391 : we can use the next power-of-two sized partial
2392 : access and that is sufficiently small to be covered
2393 : by the single scalar iteration. */
2394 16 : unsigned HOST_WIDE_INT cnunits, cvf, cremain, cpart_size;
2395 16 : if (masked_p
2396 16 : || !nunits.is_constant (&cnunits)
2397 16 : || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2398 16 : || (((cremain = (group_size * cvf - gap) % cnunits), true)
2399 16 : && ((cpart_size = (1 << ceil_log2 (cremain))), true)
2400 16 : && (cremain + group_size < cpart_size
2401 13 : || (vector_vector_composition_type (vectype,
2402 13 : cnunits / cpart_size,
2403 : &half_vtype)
2404 : == NULL_TREE))))
2405 : {
2406 : /* If all fails we can still resort to niter masking unless
2407 : the vectors used are too big, so enforce the use of
2408 : partial vectors. */
2409 3 : if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
2410 3 : && !large_vector_overrun_p)
2411 : {
2412 0 : if (dump_enabled_p ())
2413 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2414 : "peeling for gaps insufficient for "
2415 : "access unless using partial "
2416 : "vectors\n");
2417 0 : LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
2418 : }
2419 : else
2420 : {
2421 3 : if (dump_enabled_p ())
2422 3 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2423 : "peeling for gaps insufficient for "
2424 : "access\n");
2425 3 : return false;
2426 : }
2427 : }
2428 13 : else if (large_vector_overrun_p)
2429 : {
2430 13 : if (dump_enabled_p ())
2431 12 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 : "can't operate on partial vectors because "
2433 : "only unmasked loads handle access "
2434 : "shortening required because of gaps at "
2435 : "the end of the access\n");
2436 13 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
2437 : }
2438 : }
2439 : }
2440 :
2441 : /* As a last resort, trying using a gather load or scatter store.
2442 :
2443 : ??? Although the code can handle all group sizes correctly,
2444 : it probably isn't a win to use separate strided accesses based
2445 : on nearby locations. Or, even if it's a win over scalar code,
2446 : it might not be a win over vectorizing at a lower VF, if that
2447 : allows us to use contiguous accesses. */
2448 1351278 : vect_memory_access_type grouped_gather_fallback = VMAT_UNINITIALIZED;
2449 1351278 : if (loop_vinfo
2450 573085 : && (*memory_access_type == VMAT_ELEMENTWISE
2451 573085 : || *memory_access_type == VMAT_STRIDED_SLP))
2452 : {
2453 70559 : gather_scatter_info gs_info;
2454 70559 : if (SLP_TREE_LANES (slp_node) == 1
2455 65675 : && (!SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2456 21352 : || single_element_p)
2457 134430 : && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
2458 : masked_p, &gs_info, elsvals,
2459 : group_size, single_element_p))
2460 : {
2461 : /* vect_use_strided_gather_scatters_p does not save the actually
2462 : supported scale and offset type so do that here.
2463 : We need it later in check_load_store_for_partial_vectors
2464 : where we only check if the given internal function is supported
2465 : (to choose whether to use the IFN, LEGACY, or EMULATED flavor
2466 : of gather/scatter) and don't re-do the full analysis. */
2467 0 : tree tmp;
2468 0 : gcc_assert (vect_gather_scatter_fn_p
2469 : (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype,
2470 : gs_info.memory_type, TREE_TYPE (gs_info.offset),
2471 : gs_info.scale, supported_scale, &gs_info.ifn,
2472 : &tmp, supported_offset_vectype, elsvals));
2473 :
2474 0 : SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
2475 0 : SLP_TREE_GS_BASE (slp_node) = error_mark_node;
2476 0 : ls->gs.ifn = gs_info.ifn;
2477 0 : ls->strided_offset_vectype = gs_info.offset_vectype;
2478 0 : *memory_access_type = VMAT_GATHER_SCATTER_IFN;
2479 : }
2480 70559 : else if (SLP_TREE_LANES (slp_node) > 1
2481 : && !masked_p
2482 4884 : && !single_element_p
2483 75248 : && vect_use_grouped_gather (STMT_VINFO_DR_INFO (stmt_info),
2484 : vectype, loop_vinfo,
2485 : masked_p, group_size,
2486 : &gs_info, elsvals, ls_type))
2487 : {
2488 0 : SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
2489 0 : SLP_TREE_GS_BASE (slp_node) = error_mark_node;
2490 0 : grouped_gather_fallback = *memory_access_type;
2491 0 : *memory_access_type = VMAT_GATHER_SCATTER_IFN;
2492 0 : ls->gs.ifn = gs_info.ifn;
2493 0 : vectype = *ls_type;
2494 0 : ls->strided_offset_vectype = gs_info.offset_vectype;
2495 : }
2496 : }
2497 :
2498 1351278 : if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
2499 1351278 : || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
2500 6452 : *poffset = neg_ldst_offset;
2501 :
2502 1351278 : if (*memory_access_type == VMAT_ELEMENTWISE
2503 1325821 : || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY
2504 1325247 : || *memory_access_type == VMAT_STRIDED_SLP
2505 1280075 : || *memory_access_type == VMAT_INVARIANT)
2506 : {
2507 74568 : *alignment_support_scheme = dr_unaligned_supported;
2508 74568 : *misalignment = DR_MISALIGNMENT_UNKNOWN;
2509 : }
2510 : else
2511 : {
2512 1276710 : if (mat_gather_scatter_p (*memory_access_type)
2513 : && !first_dr_info)
2514 : *misalignment = DR_MISALIGNMENT_UNKNOWN;
2515 : else
2516 1276710 : *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2517 1276710 : *alignment_support_scheme
2518 1276710 : = vect_supportable_dr_alignment
2519 1276710 : (vinfo, first_dr_info, vectype, *misalignment,
2520 1276710 : mat_gather_scatter_p (*memory_access_type));
2521 1276710 : if (grouped_gather_fallback != VMAT_UNINITIALIZED
2522 0 : && *alignment_support_scheme != dr_aligned
2523 0 : && *alignment_support_scheme != dr_unaligned_supported)
2524 : {
2525 : /* No supportable alignment for a grouped gather, fall back to the
2526 : original memory access type. Even though VMAT_STRIDED_SLP might
2527 : also try aligned vector loads it can still choose vector
2528 : construction from scalars. */
2529 0 : *memory_access_type = grouped_gather_fallback;
2530 0 : *alignment_support_scheme = dr_unaligned_supported;
2531 0 : *misalignment = DR_MISALIGNMENT_UNKNOWN;
2532 : }
2533 : }
2534 :
2535 1351278 : if (overrun_p)
2536 : {
2537 9206 : gcc_assert (can_overrun_p);
2538 9206 : if (dump_enabled_p ())
2539 503 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2540 : "Data access with gaps requires scalar "
2541 : "epilogue loop\n");
2542 9206 : LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2543 : }
2544 :
2545 1351278 : if ((*memory_access_type == VMAT_ELEMENTWISE
2546 1351278 : || *memory_access_type == VMAT_STRIDED_SLP)
2547 : && !nunits.is_constant ())
2548 : {
2549 : if (dump_enabled_p ())
2550 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 : "Not using elementwise accesses due to variable "
2552 : "vectorization factor.\n");
2553 : return false;
2554 : }
2555 :
2556 : /* Checks if all scalar iterations are known to be inbounds. */
2557 1351278 : bool inbounds = DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_info));
2558 :
2559 : /* Check if we support the operation if early breaks are needed. Here we
2560 : must ensure that we don't access any more than the scalar code would
2561 : have. A masked operation would ensure this, so for these load types
2562 : force masking. */
2563 1351278 : if (loop_vinfo
2564 573085 : && dr_safe_speculative_read_required (stmt_info)
2565 1528727 : && LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
2566 : {
2567 177449 : if (mat_gather_scatter_p (*memory_access_type)
2568 177449 : || *memory_access_type == VMAT_STRIDED_SLP)
2569 : {
2570 9246 : if (dump_enabled_p ())
2571 8 : dump_printf_loc (MSG_NOTE, vect_location,
2572 : "early break not supported: cannot peel for "
2573 : "alignment. With non-contiguous memory vectorization"
2574 : " could read out of bounds at %G ",
2575 : STMT_VINFO_STMT (stmt_info));
2576 9246 : if (inbounds)
2577 0 : LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
2578 : else
2579 : return false;
2580 : }
2581 : /* Block-level alignment: Even though individual accesses of
2582 : VMAT_ELEMENTWISE type do not cause alignment problems, loading the
2583 : whole vector's worth of values in a speculative early-break context
2584 : might cross a page boundary. Set the alignment scheme to `dr_aligned'
2585 : here in order to force checking of whether such accesses meet
2586 : alignment criteria. */
2587 168203 : else if (*memory_access_type == VMAT_ELEMENTWISE && !inbounds)
2588 14881 : *alignment_support_scheme = dr_aligned;
2589 : }
2590 :
2591 : /* If this DR needs alignment for correctness, we must ensure the target
2592 : alignment is a constant power-of-two multiple of the amount read per
2593 : vector iteration or force masking. */
2594 1342032 : if (dr_safe_speculative_read_required (stmt_info)
2595 1342032 : && (*alignment_support_scheme == dr_aligned
2596 101106 : && !mat_gather_scatter_p (*memory_access_type)))
2597 : {
2598 : /* We can only peel for loops, of course. */
2599 101106 : gcc_checking_assert (loop_vinfo);
2600 :
2601 101106 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2602 101106 : poly_uint64 read_amount
2603 101106 : = vf * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2604 101106 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2605 101106 : read_amount *= group_size;
2606 :
2607 101106 : auto target_alignment
2608 101106 : = DR_TARGET_ALIGNMENT (STMT_VINFO_DR_INFO (stmt_info));
2609 101106 : if (!multiple_p (target_alignment, read_amount))
2610 : {
2611 12716 : if (dump_enabled_p ())
2612 : {
2613 28 : dump_printf_loc (MSG_NOTE, vect_location,
2614 : "desired alignment not met, target was ");
2615 28 : dump_dec (MSG_NOTE, target_alignment);
2616 28 : dump_printf (MSG_NOTE, " previously, but read amount is ");
2617 28 : dump_dec (MSG_NOTE, read_amount);
2618 28 : dump_printf (MSG_NOTE, " at %G.\n", STMT_VINFO_STMT (stmt_info));
2619 : }
2620 14901 : return false;
2621 : }
2622 :
2623 : /* When using a group access the first element may be aligned but the
2624 : subsequent loads may not be. For LOAD_LANES since the loads are based
2625 : on the first DR then all loads in the group are aligned. For
2626 : non-LOAD_LANES this is not the case. In particular a load + blend when
2627 : there are gaps can have the non first loads issued unaligned, even
2628 : partially overlapping the memory of the first load in order to simplify
2629 : the blend. This is what the x86_64 backend does for instance. As
2630 : such only the first load in the group is aligned, the rest are not.
2631 : Because of this the permutes may break the alignment requirements that
2632 : have been set, and as such we should for now, reject them. */
2633 88390 : if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2634 : {
2635 2185 : if (dump_enabled_p ())
2636 75 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2637 : "loads with load permutations not supported for "
2638 : "speculative early break loads for %G",
2639 : STMT_VINFO_STMT (stmt_info));
2640 2185 : return false;
2641 : }
2642 :
2643 : /* Reject vectorization if we know the read mount per vector iteration
2644 : exceeds the min page size. */
2645 86205 : if (known_gt (read_amount, (unsigned) param_min_pagesize))
2646 : {
2647 0 : if (dump_enabled_p ())
2648 : {
2649 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2650 : "alignment required for correctness (");
2651 0 : dump_dec (MSG_MISSED_OPTIMIZATION, read_amount);
2652 0 : dump_printf (MSG_NOTE, ") may exceed page size.\n");
2653 : }
2654 0 : return false;
2655 : }
2656 :
2657 86205 : if (!vf.is_constant ())
2658 : {
2659 : /* For VLA modes, we need a runtime check to ensure any speculative
2660 : read amount does not exceed the page size. Here we record the max
2661 : possible read amount for the check. */
2662 : if (maybe_gt (read_amount,
2663 : LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo)))
2664 : LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo) = read_amount;
2665 :
2666 : /* For VLA modes, we must use partial vectors. */
2667 : LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
2668 : }
2669 : }
2670 :
2671 1327131 : if (*alignment_support_scheme == dr_unaligned_unsupported)
2672 : {
2673 63775 : if (dump_enabled_p ())
2674 248 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2675 : "unsupported unaligned access\n");
2676 63775 : return false;
2677 : }
2678 :
2679 : /* FIXME: At the moment the cost model seems to underestimate the
2680 : cost of using elementwise accesses. This check preserves the
2681 : traditional behavior until that can be fixed. */
2682 1263356 : if (*memory_access_type == VMAT_ELEMENTWISE
2683 14720 : && !STMT_VINFO_STRIDED_P (first_stmt_info)
2684 1278076 : && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
2685 9627 : && single_element_p
2686 8986 : && !pow2p_hwi (group_size)))
2687 : {
2688 9050 : if (dump_enabled_p ())
2689 362 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2690 : "not falling back to elementwise accesses\n");
2691 9050 : return false;
2692 : }
2693 :
2694 : /* For BB vectorization build up the vector from existing scalar defs. */
2695 1254306 : if (!loop_vinfo && *memory_access_type == VMAT_ELEMENTWISE)
2696 : return false;
2697 :
2698 : /* Some loads need to explicitly permute the loaded data if there
2699 : is a load permutation. Among those are:
2700 : - VMAT_ELEMENTWISE.
2701 : - VMAT_STRIDED_SLP.
2702 : - VMAT_GATHER_SCATTER:
2703 : - Strided gather (fallback for VMAT_STRIDED_SLP if #lanes == 1).
2704 : - Grouped strided gather (ditto but for #lanes > 1).
2705 :
2706 : For VMAT_ELEMENTWISE we can fold the load permutation into the
2707 : individual indices we access directly, eliding the permutation.
2708 : Strided gather only allows load permutations for the
2709 : single-element case. */
2710 :
2711 1254306 : if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2712 1254306 : && !(*memory_access_type == VMAT_ELEMENTWISE
2713 45950 : || (mat_gather_scatter_p (*memory_access_type)
2714 0 : && SLP_TREE_LANES (slp_node) == 1
2715 0 : && single_element_p)))
2716 : {
2717 45950 : if (!loop_vinfo)
2718 : {
2719 : /* In BB vectorization we may not actually use a loaded vector
2720 : accessing elements in excess of DR_GROUP_SIZE. */
2721 24271 : stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2722 24271 : group_info = DR_GROUP_FIRST_ELEMENT (group_info);
2723 24271 : unsigned HOST_WIDE_INT nunits;
2724 24271 : unsigned j, k, maxk = 0;
2725 85993 : FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
2726 61722 : if (k > maxk)
2727 : maxk = k;
2728 24271 : tree vectype = SLP_TREE_VECTYPE (slp_node);
2729 44193 : if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2730 24271 : || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
2731 : {
2732 4349 : if (dump_enabled_p ())
2733 31 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2734 : "BB vectorization with gaps at the end of "
2735 : "a load is not supported\n");
2736 4349 : return false;
2737 : }
2738 : }
2739 :
2740 41601 : if (!perm_ok)
2741 : {
2742 1973 : if (dump_enabled_p ())
2743 8 : dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2744 : vect_location,
2745 : "unsupported load permutation\n");
2746 1973 : return false;
2747 : }
2748 :
2749 39628 : *slp_perm = true;
2750 : }
2751 :
2752 : return true;
2753 : }
2754 :
2755 : /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2756 : conditional operation STMT_INFO. When returning true, store the mask
2757 : in *MASK_NODE, the type of its definition in *MASK_DT_OUT and the type of
2758 : the vectorized mask in *MASK_VECTYPE_OUT. */
2759 :
2760 : static bool
2761 12691 : vect_check_scalar_mask (vec_info *vinfo,
2762 : slp_tree slp_node, unsigned mask_index,
2763 : slp_tree *mask_node,
2764 : vect_def_type *mask_dt_out, tree *mask_vectype_out)
2765 : {
2766 12691 : enum vect_def_type mask_dt;
2767 12691 : tree mask_vectype;
2768 12691 : slp_tree mask_node_1;
2769 12691 : tree mask_;
2770 12691 : if (!vect_is_simple_use (vinfo, slp_node, mask_index,
2771 : &mask_, &mask_node_1, &mask_dt, &mask_vectype))
2772 : {
2773 0 : if (dump_enabled_p ())
2774 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2775 : "mask use not simple.\n");
2776 0 : return false;
2777 : }
2778 :
2779 12691 : if ((mask_dt == vect_constant_def || mask_dt == vect_external_def)
2780 12691 : && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask_)))
2781 : {
2782 0 : if (dump_enabled_p ())
2783 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2784 : "mask argument is not a boolean.\n");
2785 0 : return false;
2786 : }
2787 :
2788 12691 : tree vectype = SLP_TREE_VECTYPE (slp_node);
2789 12691 : if (!mask_vectype)
2790 19 : mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2791 : mask_node_1);
2792 :
2793 12691 : if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2794 : {
2795 0 : if (dump_enabled_p ())
2796 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2797 : "could not find an appropriate vector mask type.\n");
2798 0 : return false;
2799 : }
2800 :
2801 12691 : if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2802 25382 : TYPE_VECTOR_SUBPARTS (vectype)))
2803 : {
2804 0 : if (dump_enabled_p ())
2805 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2806 : "vector mask type %T"
2807 : " does not match vector data type %T.\n",
2808 : mask_vectype, vectype);
2809 :
2810 0 : return false;
2811 : }
2812 :
2813 12691 : *mask_dt_out = mask_dt;
2814 12691 : *mask_vectype_out = mask_vectype;
2815 12691 : *mask_node = mask_node_1;
2816 12691 : return true;
2817 : }
2818 :
2819 :
2820 : /* Return true if stored value is suitable for vectorizing store
2821 : statement STMT_INFO. When returning true, store the scalar stored
2822 : in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2823 : the type of the vectorized store value in
2824 : *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2825 :
2826 : static bool
2827 1354793 : vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2828 : slp_tree slp_node, slp_tree *rhs_node,
2829 : vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2830 : vec_load_store_type *vls_type_out)
2831 : {
2832 1354793 : int op_no = 0;
2833 1354793 : if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2834 : {
2835 1901 : if (gimple_call_internal_p (call)
2836 1901 : && internal_store_fn_p (gimple_call_internal_fn (call)))
2837 1901 : op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2838 : }
2839 1354793 : op_no = vect_slp_child_index_for_operand (stmt_info, op_no);
2840 :
2841 1354793 : enum vect_def_type rhs_dt;
2842 1354793 : tree rhs_vectype;
2843 1354793 : tree rhs;
2844 1354793 : if (!vect_is_simple_use (vinfo, slp_node, op_no,
2845 : &rhs, rhs_node, &rhs_dt, &rhs_vectype))
2846 : {
2847 0 : if (dump_enabled_p ())
2848 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2849 : "use not simple.\n");
2850 0 : return false;
2851 : }
2852 :
2853 : /* In the case this is a store from a constant make sure
2854 : native_encode_expr can handle it. */
2855 1354793 : if (rhs_dt == vect_constant_def
2856 1354793 : && CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2857 : {
2858 0 : if (dump_enabled_p ())
2859 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2860 : "cannot encode constant as a byte sequence.\n");
2861 0 : return false;
2862 : }
2863 :
2864 1354793 : tree vectype = SLP_TREE_VECTYPE (slp_node);
2865 1354793 : if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2866 : {
2867 24 : if (dump_enabled_p ())
2868 24 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2869 : "incompatible vector types.\n");
2870 24 : return false;
2871 : }
2872 :
2873 1354769 : *rhs_dt_out = rhs_dt;
2874 1354769 : *rhs_vectype_out = rhs_vectype;
2875 1354769 : if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2876 999012 : *vls_type_out = VLS_STORE_INVARIANT;
2877 : else
2878 355757 : *vls_type_out = VLS_STORE;
2879 : return true;
2880 : }
2881 :
2882 : /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2883 : Note that we support masks with floating-point type, in which case the
2884 : floats are interpreted as a bitmask. */
2885 :
2886 : static tree
2887 170 : vect_build_all_ones_mask (vec_info *vinfo,
2888 : stmt_vec_info stmt_info, tree masktype)
2889 : {
2890 170 : if (TREE_CODE (masktype) == INTEGER_TYPE)
2891 98 : return build_int_cst (masktype, -1);
2892 72 : else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2893 144 : || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2894 : {
2895 19 : tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2896 19 : mask = build_vector_from_val (masktype, mask);
2897 19 : return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2898 : }
2899 53 : else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2900 : {
2901 : REAL_VALUE_TYPE r;
2902 : long tmp[6];
2903 371 : for (int j = 0; j < 6; ++j)
2904 318 : tmp[j] = -1;
2905 53 : real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2906 53 : tree mask = build_real (TREE_TYPE (masktype), r);
2907 53 : mask = build_vector_from_val (masktype, mask);
2908 53 : return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2909 : }
2910 0 : gcc_unreachable ();
2911 : }
2912 :
2913 : /* Build an all-zero merge value of type VECTYPE while vectorizing
2914 : STMT_INFO as a gather load. */
2915 :
2916 : static tree
2917 158 : vect_build_zero_merge_argument (vec_info *vinfo,
2918 : stmt_vec_info stmt_info, tree vectype)
2919 : {
2920 158 : tree merge;
2921 158 : if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2922 49 : merge = build_int_cst (TREE_TYPE (vectype), 0);
2923 109 : else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2924 : {
2925 : REAL_VALUE_TYPE r;
2926 : long tmp[6];
2927 763 : for (int j = 0; j < 6; ++j)
2928 654 : tmp[j] = 0;
2929 109 : real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2930 109 : merge = build_real (TREE_TYPE (vectype), r);
2931 : }
2932 : else
2933 0 : gcc_unreachable ();
2934 158 : merge = build_vector_from_val (vectype, merge);
2935 158 : return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2936 : }
2937 :
2938 : /* Return the corresponding else value for an else value constant
2939 : ELSVAL with type TYPE. */
2940 :
2941 : tree
2942 1944 : vect_get_mask_load_else (int elsval, tree type)
2943 : {
2944 1944 : tree els;
2945 1944 : if (elsval == MASK_LOAD_ELSE_UNDEFINED)
2946 : {
2947 0 : tree tmp = create_tmp_var (type);
2948 : /* No need to warn about anything. */
2949 0 : TREE_NO_WARNING (tmp) = 1;
2950 0 : els = get_or_create_ssa_default_def (cfun, tmp);
2951 : }
2952 1944 : else if (elsval == MASK_LOAD_ELSE_M1)
2953 0 : els = build_minus_one_cst (type);
2954 1944 : else if (elsval == MASK_LOAD_ELSE_ZERO)
2955 1944 : els = build_zero_cst (type);
2956 : else
2957 0 : gcc_unreachable ();
2958 :
2959 1944 : return els;
2960 : }
2961 :
2962 : /* Build a gather load call while vectorizing STMT_INFO. Insert new
2963 : instructions before GSI and add them to VEC_STMT. GS_INFO describes
2964 : the gather load operation. If the load is conditional, MASK is the
2965 : vectorized condition, otherwise MASK is null. PTR is the base
2966 : pointer and OFFSET is the vectorized offset. */
2967 :
2968 : static gimple *
2969 346 : vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2970 : slp_tree slp_node, tree vectype,
2971 : gimple_stmt_iterator *gsi, tree decl,
2972 : tree ptr, tree offset, tree mask)
2973 : {
2974 346 : tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
2975 346 : tree rettype = TREE_TYPE (TREE_TYPE (decl));
2976 346 : tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2977 346 : /* ptrtype */ arglist = TREE_CHAIN (arglist);
2978 346 : tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2979 346 : tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2980 346 : tree scaletype = TREE_VALUE (arglist);
2981 346 : tree var;
2982 346 : gcc_checking_assert (types_compatible_p (srctype, rettype)
2983 : && (!mask
2984 : || TREE_CODE (masktype) == INTEGER_TYPE
2985 : || types_compatible_p (srctype, masktype)));
2986 :
2987 346 : tree op = offset;
2988 346 : if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2989 : {
2990 100 : gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2991 : TYPE_VECTOR_SUBPARTS (idxtype)));
2992 100 : var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2993 100 : op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2994 100 : gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2995 100 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2996 100 : op = var;
2997 : }
2998 :
2999 346 : tree src_op = NULL_TREE;
3000 346 : tree mask_op = NULL_TREE;
3001 346 : if (mask)
3002 : {
3003 188 : if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
3004 : {
3005 188 : tree utype, optype = TREE_TYPE (mask);
3006 188 : if (VECTOR_TYPE_P (masktype)
3007 188 : || TYPE_MODE (masktype) == TYPE_MODE (optype))
3008 : utype = masktype;
3009 : else
3010 6 : utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3011 188 : var = vect_get_new_ssa_name (utype, vect_scalar_var);
3012 188 : tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
3013 188 : gassign *new_stmt
3014 188 : = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3015 188 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3016 188 : mask_arg = var;
3017 188 : if (!useless_type_conversion_p (masktype, utype))
3018 : {
3019 6 : gcc_assert (TYPE_PRECISION (utype)
3020 : <= TYPE_PRECISION (masktype));
3021 6 : var = vect_get_new_ssa_name (masktype, vect_scalar_var);
3022 6 : new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3023 6 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3024 6 : mask_arg = var;
3025 : }
3026 188 : src_op = build_zero_cst (srctype);
3027 188 : mask_op = mask_arg;
3028 : }
3029 : else
3030 : {
3031 : src_op = mask;
3032 : mask_op = mask;
3033 : }
3034 : }
3035 : else
3036 : {
3037 158 : src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
3038 158 : mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
3039 : }
3040 :
3041 346 : tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
3042 346 : gimple *new_stmt = gimple_build_call (decl, 5, src_op, ptr, op,
3043 : mask_op, scale);
3044 :
3045 346 : if (!useless_type_conversion_p (vectype, rettype))
3046 : {
3047 49 : gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3048 : TYPE_VECTOR_SUBPARTS (rettype)));
3049 49 : op = vect_get_new_ssa_name (rettype, vect_simple_var);
3050 49 : gimple_call_set_lhs (new_stmt, op);
3051 49 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3052 49 : op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3053 49 : new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
3054 : }
3055 :
3056 346 : return new_stmt;
3057 : }
3058 :
3059 : /* Build a scatter store call while vectorizing STMT_INFO. Insert new
3060 : instructions before GSI. GS_INFO describes the scatter store operation.
3061 : PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
3062 : vectorized data to store.
3063 : If the store is conditional, MASK is the vectorized condition, otherwise
3064 : MASK is null. */
3065 :
3066 : static gimple *
3067 161 : vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
3068 : slp_tree slp_node,
3069 : gimple_stmt_iterator *gsi,
3070 : tree decl,
3071 : tree ptr, tree offset, tree oprnd, tree mask)
3072 : {
3073 161 : tree rettype = TREE_TYPE (TREE_TYPE (decl));
3074 161 : tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl));
3075 161 : /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
3076 161 : tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3077 161 : tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3078 161 : tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3079 161 : tree scaletype = TREE_VALUE (arglist);
3080 161 : gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
3081 : && TREE_CODE (rettype) == VOID_TYPE);
3082 :
3083 161 : tree mask_arg = NULL_TREE;
3084 161 : if (mask)
3085 : {
3086 110 : mask_arg = mask;
3087 110 : tree optype = TREE_TYPE (mask_arg);
3088 110 : tree utype;
3089 110 : if (TYPE_MODE (masktype) == TYPE_MODE (optype))
3090 : utype = masktype;
3091 : else
3092 8 : utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3093 110 : tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
3094 110 : mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
3095 110 : gassign *new_stmt
3096 110 : = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3097 110 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3098 110 : mask_arg = var;
3099 110 : if (!useless_type_conversion_p (masktype, utype))
3100 : {
3101 8 : gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
3102 8 : tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
3103 8 : new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3104 8 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3105 8 : mask_arg = var;
3106 : }
3107 : }
3108 : else
3109 : {
3110 51 : mask_arg = build_int_cst (masktype, -1);
3111 51 : mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
3112 : }
3113 :
3114 161 : tree src = oprnd;
3115 161 : if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
3116 : {
3117 0 : gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
3118 : TYPE_VECTOR_SUBPARTS (srctype)));
3119 0 : tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
3120 0 : src = build1 (VIEW_CONVERT_EXPR, srctype, src);
3121 0 : gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
3122 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3123 0 : src = var;
3124 : }
3125 :
3126 161 : tree op = offset;
3127 161 : if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
3128 : {
3129 16 : gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
3130 : TYPE_VECTOR_SUBPARTS (idxtype)));
3131 16 : tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3132 16 : op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
3133 16 : gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3134 16 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3135 16 : op = var;
3136 : }
3137 :
3138 161 : tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node));
3139 161 : gcall *new_stmt
3140 161 : = gimple_build_call (decl, 5, ptr, mask_arg, op, src, scale);
3141 161 : return new_stmt;
3142 : }
3143 :
3144 : /* Prepare the base and offset in GS_INFO for vectorization.
3145 : Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3146 : to the vectorized offset argument for the first copy of STMT_INFO.
3147 : STMT_INFO is the statement described by GS_INFO and LOOP is the
3148 : containing loop. */
3149 :
3150 : static void
3151 1217 : vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node,
3152 : tree *dataref_ptr, vec<tree> *vec_offset)
3153 : {
3154 1217 : gimple_seq stmts = NULL;
3155 1217 : *dataref_ptr = force_gimple_operand (SLP_TREE_GS_BASE (slp_node),
3156 : &stmts, true, NULL_TREE);
3157 1217 : if (stmts != NULL)
3158 : {
3159 984 : basic_block new_bb;
3160 984 : edge pe = loop_preheader_edge (loop);
3161 984 : new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3162 984 : gcc_assert (!new_bb);
3163 : }
3164 1217 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3165 1217 : }
3166 :
3167 : /* Prepare to implement a grouped or strided load or store using
3168 : the gather load or scatter store operation described by GS_INFO.
3169 : STMT_INFO is the load or store statement.
3170 :
3171 : Set *DATAREF_BUMP to the amount that should be added to the base
3172 : address after each copy of the vectorized statement. Set *VEC_OFFSET
3173 : to an invariant offset vector in which element I has the value
3174 : I * DR_STEP / SCALE. */
3175 :
3176 : static void
3177 0 : vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node,
3178 : tree vectype, tree offset_vectype,
3179 : loop_vec_info loop_vinfo,
3180 : gimple_stmt_iterator *gsi,
3181 : tree *dataref_bump, tree *vec_offset,
3182 : vec_loop_lens *loop_lens)
3183 : {
3184 0 : struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3185 :
3186 0 : if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3187 : {
3188 : /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
3189 : ivtmp_8 = _31 * 16 (step in bytes);
3190 : .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
3191 : vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
3192 0 : tree loop_len
3193 0 : = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0, true);
3194 0 : tree tmp
3195 0 : = fold_build2 (MULT_EXPR, sizetype,
3196 : fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3197 : loop_len);
3198 0 : *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
3199 : GSI_SAME_STMT);
3200 : }
3201 : else
3202 : {
3203 0 : tree bump
3204 0 : = size_binop (MULT_EXPR,
3205 : fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3206 : size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3207 0 : *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3208 : }
3209 :
3210 0 : internal_fn ifn
3211 0 : = DR_IS_READ (dr) ? IFN_MASK_LEN_STRIDED_LOAD : IFN_MASK_LEN_STRIDED_STORE;
3212 0 : if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3213 : {
3214 0 : *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo,
3215 : unshare_expr (DR_STEP (dr)));
3216 0 : return;
3217 : }
3218 :
3219 : /* The offset given in GS_INFO can have pointer type, so use the element
3220 : type of the vector instead. */
3221 0 : tree offset_type = TREE_TYPE (offset_vectype);
3222 :
3223 : /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3224 0 : tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3225 : ssize_int (SLP_TREE_GS_SCALE (node)));
3226 0 : step = fold_convert (offset_type, step);
3227 :
3228 : /* Create {0, X, X*2, X*3, ...}. */
3229 0 : tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype,
3230 : build_zero_cst (offset_type), step);
3231 0 : *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3232 : }
3233 :
3234 : /* Prepare the pointer IVs which needs to be updated by a variable amount.
3235 : Such variable amount is the outcome of .SELECT_VL. In this case, we can
3236 : allow each iteration process the flexible number of elements as long as
3237 : the number <= vf elments.
3238 :
3239 : Return data reference according to SELECT_VL.
3240 : If new statements are needed, insert them before GSI. */
3241 :
3242 : static tree
3243 0 : vect_get_loop_variant_data_ptr_increment (
3244 : vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
3245 : vec_loop_lens *loop_lens, dr_vec_info *dr_info,
3246 : vect_memory_access_type memory_access_type)
3247 : {
3248 0 : loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3249 0 : tree step = vect_dr_behavior (vinfo, dr_info)->step;
3250 :
3251 : /* gather/scatter never reach here. */
3252 0 : gcc_assert (!mat_gather_scatter_p (memory_access_type));
3253 :
3254 : /* When we support SELECT_VL pattern, we dynamic adjust
3255 : the memory address by .SELECT_VL result.
3256 :
3257 : The result of .SELECT_VL is the number of elements to
3258 : be processed of each iteration. So the memory address
3259 : adjustment operation should be:
3260 :
3261 : addr = addr + .SELECT_VL (ARG..) * step;
3262 : */
3263 0 : tree loop_len
3264 0 : = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0, true);
3265 0 : tree len_type = TREE_TYPE (loop_len);
3266 : /* Since the outcome of .SELECT_VL is element size, we should adjust
3267 : it into bytesize so that it can be used in address pointer variable
3268 : amount IVs adjustment. */
3269 0 : tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3270 : wide_int_to_tree (len_type, wi::to_widest (step)));
3271 0 : tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3272 0 : gassign *assign = gimple_build_assign (bump, tmp);
3273 0 : gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3274 0 : return bump;
3275 : }
3276 :
3277 : /* Return the amount that should be added to a vector pointer to move
3278 : to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3279 : being vectorized and MEMORY_ACCESS_TYPE describes the type of
3280 : vectorization. */
3281 :
3282 : static tree
3283 699850 : vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3284 : dr_vec_info *dr_info, tree aggr_type,
3285 : vect_memory_access_type memory_access_type,
3286 : vec_loop_lens *loop_lens)
3287 : {
3288 699850 : if (memory_access_type == VMAT_INVARIANT)
3289 0 : return size_zero_node;
3290 :
3291 699850 : loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3292 134196 : if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3293 0 : return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3294 : loop_lens, dr_info,
3295 0 : memory_access_type);
3296 :
3297 699850 : tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3298 699850 : tree step = vect_dr_behavior (vinfo, dr_info)->step;
3299 699850 : if (tree_int_cst_sgn (step) == -1)
3300 2841 : iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3301 : return iv_step;
3302 : }
3303 :
3304 : /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3305 :
3306 : static bool
3307 206 : vectorizable_bswap (vec_info *vinfo,
3308 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3309 : slp_tree slp_node,
3310 : slp_tree *slp_op,
3311 : tree vectype_in, stmt_vector_for_cost *cost_vec)
3312 : {
3313 206 : tree op, vectype;
3314 206 : gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3315 :
3316 206 : op = gimple_call_arg (stmt, 0);
3317 206 : vectype = SLP_TREE_VECTYPE (slp_node);
3318 206 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3319 :
3320 206 : if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype))
3321 : {
3322 0 : if (dump_enabled_p ())
3323 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3324 : "mismatched vector sizes %T and %T\n",
3325 : vectype_in, vectype);
3326 0 : return false;
3327 : }
3328 :
3329 206 : tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3330 206 : if (! char_vectype)
3331 : return false;
3332 :
3333 206 : poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3334 206 : unsigned word_bytes;
3335 206 : if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3336 : return false;
3337 :
3338 : /* The encoding uses one stepped pattern for each byte in the word. */
3339 206 : vec_perm_builder elts (num_bytes, word_bytes, 3);
3340 824 : for (unsigned i = 0; i < 3; ++i)
3341 3318 : for (unsigned j = 0; j < word_bytes; ++j)
3342 2700 : elts.quick_push ((i + 1) * word_bytes - j - 1);
3343 :
3344 206 : vec_perm_indices indices (elts, 1, num_bytes);
3345 206 : machine_mode vmode = TYPE_MODE (char_vectype);
3346 206 : if (!can_vec_perm_const_p (vmode, vmode, indices))
3347 : return false;
3348 :
3349 152 : if (cost_vec)
3350 : {
3351 140 : if (!vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3352 : {
3353 0 : if (dump_enabled_p ())
3354 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3355 : "incompatible vector types for invariants\n");
3356 0 : return false;
3357 : }
3358 :
3359 140 : SLP_TREE_TYPE (slp_node) = call_vec_info_type;
3360 140 : DUMP_VECT_SCOPE ("vectorizable_bswap");
3361 140 : record_stmt_cost (cost_vec,
3362 : 1, vector_stmt, slp_node, 0, vect_prologue);
3363 140 : record_stmt_cost (cost_vec,
3364 140 : vect_get_num_copies (vinfo, slp_node),
3365 : vec_perm, slp_node, 0, vect_body);
3366 140 : return true;
3367 : }
3368 :
3369 12 : tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3370 :
3371 : /* Transform. */
3372 12 : vec<tree> vec_oprnds = vNULL;
3373 12 : vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds);
3374 : /* Arguments are ready. create the new vector stmt. */
3375 12 : unsigned i;
3376 12 : tree vop;
3377 24 : FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3378 : {
3379 12 : gimple *new_stmt;
3380 12 : tree tem = make_ssa_name (char_vectype);
3381 12 : new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3382 : char_vectype, vop));
3383 12 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3384 12 : tree tem2 = make_ssa_name (char_vectype);
3385 12 : new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3386 : tem, tem, bswap_vconst);
3387 12 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3388 12 : tem = make_ssa_name (vectype);
3389 12 : new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3390 : vectype, tem2));
3391 12 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3392 12 : slp_node->push_vec_def (new_stmt);
3393 : }
3394 :
3395 12 : vec_oprnds.release ();
3396 12 : return true;
3397 206 : }
3398 :
3399 : /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3400 : integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3401 : in a single step. On success, store the binary pack code in
3402 : *CONVERT_CODE. */
3403 :
3404 : static bool
3405 180 : simple_integer_narrowing (tree vectype_out, tree vectype_in,
3406 : code_helper *convert_code)
3407 : {
3408 360 : if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3409 360 : || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3410 : return false;
3411 :
3412 70 : code_helper code;
3413 70 : int multi_step_cvt = 0;
3414 70 : auto_vec <tree, 8> interm_types;
3415 101 : if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3416 : &code, &multi_step_cvt, &interm_types)
3417 70 : || multi_step_cvt)
3418 31 : return false;
3419 :
3420 39 : *convert_code = code;
3421 39 : return true;
3422 70 : }
3423 :
3424 : /* Function vectorizable_call.
3425 :
3426 : Check if STMT_INFO performs a function call that can be vectorized.
3427 : If COST_VEC is passed, calculate costs but don't change anything,
3428 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
3429 : it, and insert it at GSI.
3430 : Return true if STMT_INFO is vectorizable in this way. */
3431 :
3432 : static bool
3433 2684382 : vectorizable_call (vec_info *vinfo,
3434 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3435 : slp_tree slp_node,
3436 : stmt_vector_for_cost *cost_vec)
3437 : {
3438 2684382 : gcall *stmt;
3439 2684382 : tree vec_dest;
3440 2684382 : tree scalar_dest;
3441 2684382 : tree op;
3442 2684382 : tree vec_oprnd0 = NULL_TREE;
3443 2684382 : tree vectype_out, vectype_in;
3444 2684382 : poly_uint64 nunits_in;
3445 2684382 : poly_uint64 nunits_out;
3446 2684382 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3447 2684382 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3448 2684382 : tree fndecl, new_temp, rhs_type;
3449 2684382 : enum vect_def_type dt[5]
3450 : = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3451 : vect_unknown_def_type, vect_unknown_def_type };
3452 2684382 : tree vectypes[ARRAY_SIZE (dt)] = {};
3453 2684382 : slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3454 2684382 : auto_vec<tree, 8> vargs;
3455 2684382 : enum { NARROW, NONE, WIDEN } modifier;
3456 2684382 : size_t i, nargs;
3457 2684382 : tree clz_ctz_arg1 = NULL_TREE;
3458 :
3459 2684382 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3460 : return false;
3461 :
3462 2684382 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3463 232992 : && cost_vec)
3464 : return false;
3465 :
3466 : /* Is STMT_INFO a vectorizable call? */
3467 2698088 : stmt = dyn_cast <gcall *> (stmt_info->stmt);
3468 24992 : if (!stmt)
3469 : return false;
3470 :
3471 24992 : if (gimple_call_internal_p (stmt)
3472 24992 : && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3473 16533 : || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3474 : /* Handled by vectorizable_load and vectorizable_store. */
3475 3816 : return false;
3476 :
3477 21176 : if (gimple_call_lhs (stmt) == NULL_TREE
3478 21176 : || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3479 : return false;
3480 :
3481 21170 : gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3482 :
3483 21170 : vectype_out = SLP_TREE_VECTYPE (slp_node);
3484 :
3485 : /* Process function arguments. */
3486 21170 : rhs_type = NULL_TREE;
3487 21170 : vectype_in = NULL_TREE;
3488 21170 : nargs = gimple_call_num_args (stmt);
3489 :
3490 : /* Bail out if the function has more than four arguments, we do not have
3491 : interesting builtin functions to vectorize with more than two arguments
3492 : except for fma (cond_fma has more). No arguments is also not good. */
3493 21170 : if (nargs == 0 || nargs > 5)
3494 : return false;
3495 :
3496 : /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3497 21090 : combined_fn cfn = gimple_call_combined_fn (stmt);
3498 21090 : if (cfn == CFN_GOMP_SIMD_LANE)
3499 : {
3500 3207 : nargs = 0;
3501 3207 : rhs_type = unsigned_type_node;
3502 : }
3503 : /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3504 : argument just says whether it is well-defined at zero or not and what
3505 : value should be returned for it. */
3506 21090 : if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
3507 : {
3508 168 : nargs = 1;
3509 168 : clz_ctz_arg1 = gimple_call_arg (stmt, 1);
3510 : }
3511 :
3512 21090 : int mask_opno = -1;
3513 21090 : if (internal_fn_p (cfn))
3514 : {
3515 : /* We can only handle direct internal masked calls here,
3516 : vectorizable_simd_clone_call is for the rest. */
3517 18036 : if (cfn == CFN_MASK_CALL)
3518 : return false;
3519 17882 : mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3520 : }
3521 :
3522 66045 : for (i = 0; i < nargs; i++)
3523 : {
3524 46383 : if ((int) i == mask_opno)
3525 : {
3526 7694 : if (!vect_check_scalar_mask (vinfo, slp_node, mask_opno,
3527 : &slp_op[i], &dt[i], &vectypes[i]))
3528 : return false;
3529 7694 : continue;
3530 : }
3531 :
3532 38689 : if (!vect_is_simple_use (vinfo, slp_node,
3533 : i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3534 : {
3535 0 : if (dump_enabled_p ())
3536 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3537 : "use not simple.\n");
3538 0 : return false;
3539 : }
3540 :
3541 : /* We can only handle calls with arguments of the same type. */
3542 38689 : if (rhs_type
3543 38689 : && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3544 : {
3545 1274 : if (dump_enabled_p ())
3546 200 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3547 : "argument types differ.\n");
3548 1274 : return false;
3549 : }
3550 37415 : if (!rhs_type)
3551 17729 : rhs_type = TREE_TYPE (op);
3552 :
3553 37415 : if (!vectype_in)
3554 18243 : vectype_in = vectypes[i];
3555 19172 : else if (vectypes[i]
3556 19172 : && !types_compatible_p (vectypes[i], vectype_in))
3557 : {
3558 0 : if (dump_enabled_p ())
3559 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3560 : "argument vector types differ.\n");
3561 0 : return false;
3562 : }
3563 : }
3564 : /* If all arguments are external or constant defs, infer the vector type
3565 : from the scalar type. */
3566 19662 : if (!vectype_in)
3567 5524 : vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3568 19662 : if (!cost_vec)
3569 4215 : gcc_assert (vectype_in);
3570 15447 : if (!vectype_in)
3571 : {
3572 1031 : if (dump_enabled_p ())
3573 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3574 : "no vectype for scalar type %T\n", rhs_type);
3575 :
3576 1031 : return false;
3577 : }
3578 :
3579 37262 : if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3580 18631 : != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3581 : {
3582 12 : if (dump_enabled_p ())
3583 12 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3584 : "mixed mask and nonmask vector types\n");
3585 12 : return false;
3586 : }
3587 :
3588 18619 : if (vect_emulated_vector_p (vectype_in)
3589 18619 : || vect_emulated_vector_p (vectype_out))
3590 : {
3591 0 : if (dump_enabled_p ())
3592 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3593 : "use emulated vector type for call\n");
3594 0 : return false;
3595 : }
3596 :
3597 : /* FORNOW */
3598 18619 : nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3599 18619 : nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3600 18619 : if (known_eq (nunits_in * 2, nunits_out))
3601 : modifier = NARROW;
3602 18052 : else if (known_eq (nunits_out, nunits_in))
3603 : modifier = NONE;
3604 45 : else if (known_eq (nunits_out * 2, nunits_in))
3605 : modifier = WIDEN;
3606 : else
3607 : return false;
3608 :
3609 : /* We only handle functions that do not read or clobber memory. */
3610 37238 : if (gimple_vuse (stmt))
3611 : {
3612 1241 : if (dump_enabled_p ())
3613 14 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3614 : "function reads from or writes to memory.\n");
3615 1241 : return false;
3616 : }
3617 :
3618 : /* For now, we only vectorize functions if a target specific builtin
3619 : is available. TODO -- in some cases, it might be profitable to
3620 : insert the calls for pieces of the vector, in order to be able
3621 : to vectorize other operations in the loop. */
3622 17378 : fndecl = NULL_TREE;
3623 17378 : internal_fn ifn = IFN_LAST;
3624 17378 : tree callee = gimple_call_fndecl (stmt);
3625 :
3626 : /* First try using an internal function. */
3627 17378 : code_helper convert_code = MAX_TREE_CODES;
3628 17378 : if (cfn != CFN_LAST
3629 17378 : && (modifier == NONE
3630 192 : || (modifier == NARROW
3631 180 : && simple_integer_narrowing (vectype_out, vectype_in,
3632 : &convert_code))))
3633 16379 : ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3634 : vectype_in);
3635 :
3636 : /* Check if the operation traps. */
3637 17378 : bool could_trap = gimple_could_trap_p (STMT_VINFO_STMT (stmt_info));
3638 17378 : if (could_trap && cost_vec && loop_vinfo)
3639 : {
3640 : /* If the operation can trap it must be conditional, otherwise fail. */
3641 474 : internal_fn cond_fn = (internal_fn_mask_index (ifn) != -1
3642 474 : ? ifn : get_conditional_internal_fn (ifn));
3643 474 : internal_fn cond_len_fn = get_len_internal_fn (cond_fn);
3644 474 : if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
3645 : {
3646 : /* We assume that BB SLP fills all lanes, so no inactive lanes can
3647 : cause issues. */
3648 84 : if ((cond_fn == IFN_LAST
3649 56 : || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3650 : OPTIMIZE_FOR_SPEED))
3651 140 : && (cond_len_fn == IFN_LAST
3652 56 : || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3653 : OPTIMIZE_FOR_SPEED)))
3654 : {
3655 84 : if (dump_enabled_p ())
3656 10 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3657 : "can't use a fully-masked loop because no"
3658 : " conditional operation is available.\n");
3659 84 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3660 : }
3661 : }
3662 : }
3663 :
3664 : /* If that fails, try asking for a target-specific built-in function. */
3665 17378 : if (ifn == IFN_LAST)
3666 : {
3667 9885 : if (cfn != CFN_LAST)
3668 9039 : fndecl = targetm.vectorize.builtin_vectorized_function
3669 9039 : (cfn, vectype_out, vectype_in);
3670 846 : else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3671 24 : fndecl = targetm.vectorize.builtin_md_vectorized_function
3672 24 : (callee, vectype_out, vectype_in);
3673 : }
3674 :
3675 17378 : if (ifn == IFN_LAST && !fndecl)
3676 : {
3677 9505 : if (cfn == CFN_GOMP_SIMD_LANE
3678 3207 : && SLP_TREE_LANES (slp_node) == 1
3679 3207 : && loop_vinfo
3680 3207 : && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3681 3207 : && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3682 15919 : && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3683 3207 : == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3684 : {
3685 : /* We can handle IFN_GOMP_SIMD_LANE by returning a
3686 : { 0, 1, 2, ... vf - 1 } vector. */
3687 3207 : gcc_assert (nargs == 0);
3688 : }
3689 6298 : else if (modifier == NONE
3690 6298 : && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3691 5964 : || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3692 5816 : || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3693 5784 : || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3694 206 : return vectorizable_bswap (vinfo, stmt_info, gsi, slp_node,
3695 206 : slp_op, vectype_in, cost_vec);
3696 : else
3697 : {
3698 6092 : if (dump_enabled_p ())
3699 274 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3700 : "function is not vectorizable.\n");
3701 6092 : return false;
3702 : }
3703 : }
3704 :
3705 11080 : int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
3706 11080 : internal_fn cond_fn = (internal_fn_mask_index (ifn) != -1
3707 11080 : ? ifn : get_conditional_internal_fn (ifn));
3708 11080 : internal_fn cond_len_fn = get_len_internal_fn (cond_fn);
3709 11080 : vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3710 9180 : vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3711 11080 : unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
3712 11080 : if (cost_vec) /* transformation not required. */
3713 : {
3714 21801 : for (i = 0; i < nargs; ++i)
3715 14924 : if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3716 14924 : vectypes[i]
3717 : ? vectypes[i] : vectype_in))
3718 : {
3719 0 : if (dump_enabled_p ())
3720 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3721 : "incompatible vector types for invariants\n");
3722 0 : return false;
3723 : }
3724 6877 : SLP_TREE_TYPE (slp_node) = call_vec_info_type;
3725 6877 : DUMP_VECT_SCOPE ("vectorizable_call");
3726 6877 : vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
3727 :
3728 6877 : if (loop_vinfo
3729 5923 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3730 4056 : && (reduc_idx >= 0 || could_trap || mask_opno >= 0))
3731 : {
3732 2558 : if (reduc_idx >= 0
3733 1631 : && (cond_fn == IFN_LAST
3734 1631 : || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3735 : OPTIMIZE_FOR_SPEED))
3736 2570 : && (cond_len_fn == IFN_LAST
3737 12 : || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3738 : OPTIMIZE_FOR_SPEED)))
3739 : {
3740 12 : if (dump_enabled_p ())
3741 6 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3742 : "can't use a fully-masked loop because no"
3743 : " conditional operation is available.\n");
3744 12 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3745 : }
3746 : else
3747 : {
3748 2546 : tree scalar_mask = NULL_TREE;
3749 2546 : if (mask_opno >= 0)
3750 2546 : scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3751 2546 : if (cond_len_fn != IFN_LAST
3752 2546 : && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3753 : OPTIMIZE_FOR_SPEED))
3754 0 : vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3755 : 1);
3756 : else
3757 2546 : vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3758 : scalar_mask);
3759 : }
3760 : }
3761 6877 : return true;
3762 : }
3763 :
3764 : /* Transform. */
3765 :
3766 4203 : if (dump_enabled_p ())
3767 416 : dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3768 :
3769 : /* Handle def. */
3770 4203 : scalar_dest = gimple_call_lhs (stmt);
3771 4203 : vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3772 :
3773 4203 : bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3774 3257 : bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3775 4203 : unsigned int vect_nargs = nargs;
3776 4203 : if (len_loop_p && (reduc_idx >= 0 || could_trap || mask_opno >= 0))
3777 : {
3778 0 : ifn = cond_len_fn;
3779 : /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3780 0 : vect_nargs += 2;
3781 : /* But unless there's a mask argument already we need that
3782 : as well, and an else value. */
3783 0 : if (mask_opno == -1)
3784 0 : vect_nargs += 2;
3785 : }
3786 4203 : else if (masked_loop_p && mask_opno == -1 && (reduc_idx >= 0 || could_trap))
3787 : {
3788 0 : ifn = cond_fn;
3789 0 : vect_nargs += 2;
3790 : }
3791 4203 : int len_opno = internal_fn_len_index (ifn);
3792 4203 : if (clz_ctz_arg1)
3793 59 : ++vect_nargs;
3794 :
3795 4203 : if (modifier == NONE || ifn != IFN_LAST)
3796 : {
3797 4171 : tree prev_res = NULL_TREE;
3798 4171 : vargs.safe_grow (vect_nargs, true);
3799 4171 : auto_vec<vec<tree> > vec_defs (nargs);
3800 :
3801 : /* Build argument list for the vectorized call. */
3802 4171 : if (cfn == CFN_GOMP_SIMD_LANE)
3803 : {
3804 3308 : for (i = 0; i < nvectors; ++i)
3805 : {
3806 : /* ??? For multi-lane SLP we'd need to build
3807 : { 0, 0, .., 1, 1, ... }. */
3808 1708 : tree cst = build_index_vector (vectype_out,
3809 : i * nunits_out, 1);
3810 1708 : tree new_var
3811 1708 : = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3812 1708 : gimple *init_stmt = gimple_build_assign (new_var, cst);
3813 1708 : vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3814 1708 : new_temp = make_ssa_name (vec_dest);
3815 1708 : gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3816 1708 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3817 1708 : slp_node->push_vec_def (new_stmt);
3818 : }
3819 : }
3820 : else
3821 : {
3822 2571 : vec<tree> vec_oprnds0;
3823 2571 : vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3824 2571 : vec_oprnds0 = vec_defs[0];
3825 :
3826 : /* Arguments are ready. Create the new vector stmt. */
3827 5283 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3828 : {
3829 2712 : int varg = 0;
3830 : /* Add the mask if necessary. */
3831 38 : if ((masked_loop_p || len_loop_p) && mask_opno == -1
3832 2714 : && internal_fn_mask_index (ifn) != -1)
3833 : {
3834 0 : gcc_assert (internal_fn_mask_index (ifn) == varg);
3835 0 : if (masked_loop_p)
3836 : {
3837 0 : unsigned int vec_num = vec_oprnds0.length ();
3838 0 : vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi,
3839 : masks, vec_num,
3840 : vectype_out, i);
3841 : }
3842 : else
3843 : {
3844 0 : tree mask_vectype = truth_type_for (vectype_out);
3845 0 : vargs[varg++] = vect_build_all_ones_mask (loop_vinfo,
3846 : stmt_info,
3847 : mask_vectype);
3848 : }
3849 : }
3850 : size_t k;
3851 9937 : for (k = 0; k < nargs; k++)
3852 : {
3853 7225 : vec<tree> vec_oprndsk = vec_defs[k];
3854 7225 : vargs[varg++] = vec_oprndsk[i];
3855 : }
3856 : /* Add the else value if necessary. */
3857 38 : if ((masked_loop_p || len_loop_p) && mask_opno == -1
3858 2714 : && internal_fn_else_index (ifn) != -1)
3859 : {
3860 0 : gcc_assert (internal_fn_else_index (ifn) == varg);
3861 0 : if (reduc_idx >= 0)
3862 0 : vargs[varg++] = vargs[reduc_idx + 1];
3863 : else
3864 : {
3865 0 : auto else_value = targetm.preferred_else_value
3866 0 : (ifn, vectype_out, varg - 1, &vargs[1]);
3867 0 : vargs[varg++] = else_value;
3868 : }
3869 : }
3870 2712 : if (clz_ctz_arg1)
3871 59 : vargs[varg++] = clz_ctz_arg1;
3872 :
3873 2712 : gimple *new_stmt;
3874 2712 : if (modifier == NARROW)
3875 : {
3876 : /* We don't define any narrowing conditional functions
3877 : at present. */
3878 0 : gcc_assert (mask_opno < 0);
3879 0 : tree half_res = make_ssa_name (vectype_in);
3880 0 : gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3881 0 : gimple_call_set_lhs (call, half_res);
3882 0 : gimple_call_set_nothrow (call, true);
3883 0 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3884 0 : if ((i & 1) == 0)
3885 : {
3886 0 : prev_res = half_res;
3887 0 : continue;
3888 : }
3889 0 : new_temp = make_ssa_name (vec_dest);
3890 0 : new_stmt = vect_gimple_build (new_temp, convert_code,
3891 : prev_res, half_res);
3892 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3893 : }
3894 : else
3895 : {
3896 2712 : if (len_opno >= 0 && len_loop_p)
3897 : {
3898 0 : unsigned int vec_num = vec_oprnds0.length ();
3899 0 : tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
3900 : vec_num, vectype_out, i, 1, true);
3901 0 : signed char biasval
3902 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3903 0 : tree bias = build_int_cst (intQI_type_node, biasval);
3904 0 : vargs[len_opno] = len;
3905 0 : vargs[len_opno + 1] = bias;
3906 : }
3907 2712 : else if (mask_opno >= 0 && masked_loop_p)
3908 : {
3909 36 : unsigned int vec_num = vec_oprnds0.length ();
3910 36 : tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
3911 : vec_num, vectype_out, i);
3912 36 : vargs[mask_opno]
3913 72 : = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3914 36 : vargs[mask_opno], gsi);
3915 : }
3916 :
3917 2712 : gcall *call;
3918 2712 : if (ifn != IFN_LAST)
3919 2631 : call = gimple_build_call_internal_vec (ifn, vargs);
3920 : else
3921 81 : call = gimple_build_call_vec (fndecl, vargs);
3922 2712 : new_temp = make_ssa_name (vec_dest, call);
3923 2712 : gimple_call_set_lhs (call, new_temp);
3924 2712 : gimple_call_set_nothrow (call, true);
3925 2712 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3926 2712 : new_stmt = call;
3927 : }
3928 2712 : slp_node->push_vec_def (new_stmt);
3929 : }
3930 : }
3931 :
3932 11058 : for (i = 0; i < nargs; i++)
3933 : {
3934 6887 : vec<tree> vec_oprndsi = vec_defs[i];
3935 6887 : vec_oprndsi.release ();
3936 : }
3937 4171 : }
3938 32 : else if (modifier == NARROW)
3939 : {
3940 32 : auto_vec<vec<tree> > vec_defs (nargs);
3941 : /* We don't define any narrowing conditional functions at present. */
3942 32 : gcc_assert (mask_opno < 0);
3943 :
3944 : /* Build argument list for the vectorized call. */
3945 32 : vargs.create (nargs * 2);
3946 :
3947 32 : vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3948 32 : vec<tree> vec_oprnds0 = vec_defs[0];
3949 :
3950 : /* Arguments are ready. Create the new vector stmt. */
3951 64 : for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3952 : {
3953 32 : size_t k;
3954 32 : vargs.truncate (0);
3955 64 : for (k = 0; k < nargs; k++)
3956 : {
3957 32 : vec<tree> vec_oprndsk = vec_defs[k];
3958 32 : vargs.quick_push (vec_oprndsk[i]);
3959 32 : vargs.quick_push (vec_oprndsk[i + 1]);
3960 : }
3961 32 : gcall *call;
3962 32 : if (ifn != IFN_LAST)
3963 : call = gimple_build_call_internal_vec (ifn, vargs);
3964 : else
3965 32 : call = gimple_build_call_vec (fndecl, vargs);
3966 32 : new_temp = make_ssa_name (vec_dest, call);
3967 32 : gimple_call_set_lhs (call, new_temp);
3968 32 : gimple_call_set_nothrow (call, true);
3969 32 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3970 32 : slp_node->push_vec_def (call);
3971 : }
3972 :
3973 64 : for (i = 0; i < nargs; i++)
3974 : {
3975 32 : vec<tree> vec_oprndsi = vec_defs[i];
3976 32 : vec_oprndsi.release ();
3977 : }
3978 32 : }
3979 : else
3980 : /* No current target implements this case. */
3981 : return false;
3982 :
3983 4203 : vargs.release ();
3984 :
3985 4203 : return true;
3986 2684382 : }
3987 :
3988 :
3989 : struct simd_call_arg_info
3990 : {
3991 : tree vectype;
3992 : tree op;
3993 : HOST_WIDE_INT linear_step;
3994 : enum vect_def_type dt;
3995 : unsigned int align;
3996 : bool simd_lane_linear;
3997 : };
3998 :
3999 : /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
4000 : is linear within simd lane (but not within whole loop), note it in
4001 : *ARGINFO. */
4002 :
4003 : static void
4004 15 : vect_simd_lane_linear (tree op, class loop *loop,
4005 : struct simd_call_arg_info *arginfo)
4006 : {
4007 15 : gimple *def_stmt = SSA_NAME_DEF_STMT (op);
4008 :
4009 15 : if (!is_gimple_assign (def_stmt)
4010 15 : || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
4011 27 : || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
4012 3 : return;
4013 :
4014 12 : tree base = gimple_assign_rhs1 (def_stmt);
4015 12 : HOST_WIDE_INT linear_step = 0;
4016 12 : tree v = gimple_assign_rhs2 (def_stmt);
4017 48 : while (TREE_CODE (v) == SSA_NAME)
4018 : {
4019 36 : tree t;
4020 36 : def_stmt = SSA_NAME_DEF_STMT (v);
4021 36 : if (is_gimple_assign (def_stmt))
4022 24 : switch (gimple_assign_rhs_code (def_stmt))
4023 : {
4024 0 : case PLUS_EXPR:
4025 0 : t = gimple_assign_rhs2 (def_stmt);
4026 0 : if (linear_step || TREE_CODE (t) != INTEGER_CST)
4027 : return;
4028 0 : base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
4029 0 : v = gimple_assign_rhs1 (def_stmt);
4030 0 : continue;
4031 12 : case MULT_EXPR:
4032 12 : t = gimple_assign_rhs2 (def_stmt);
4033 12 : if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
4034 0 : return;
4035 12 : linear_step = tree_to_shwi (t);
4036 12 : v = gimple_assign_rhs1 (def_stmt);
4037 12 : continue;
4038 12 : CASE_CONVERT:
4039 12 : t = gimple_assign_rhs1 (def_stmt);
4040 12 : if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
4041 12 : || (TYPE_PRECISION (TREE_TYPE (v))
4042 12 : < TYPE_PRECISION (TREE_TYPE (t))))
4043 : return;
4044 12 : if (!linear_step)
4045 0 : linear_step = 1;
4046 12 : v = t;
4047 12 : continue;
4048 : default:
4049 : return;
4050 : }
4051 12 : else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
4052 12 : && loop->simduid
4053 12 : && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
4054 24 : && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
4055 : == loop->simduid))
4056 : {
4057 12 : if (!linear_step)
4058 0 : linear_step = 1;
4059 12 : arginfo->linear_step = linear_step;
4060 12 : arginfo->op = base;
4061 12 : arginfo->simd_lane_linear = true;
4062 12 : return;
4063 : }
4064 : }
4065 : }
4066 :
4067 : /* Function vectorizable_simd_clone_call.
4068 :
4069 : Check if STMT_INFO performs a function call that can be vectorized
4070 : by calling a simd clone of the function.
4071 : If COST_VEC is passed, calculate costs but don't change anything,
4072 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
4073 : it, and insert it at GSI.
4074 : Return true if STMT_INFO is vectorizable in this way. */
4075 :
4076 : static bool
4077 2673512 : vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
4078 : gimple_stmt_iterator *gsi,
4079 : slp_tree slp_node,
4080 : stmt_vector_for_cost *cost_vec)
4081 : {
4082 2673512 : tree vec_dest;
4083 2673512 : tree scalar_dest;
4084 2673512 : tree vec_oprnd0 = NULL_TREE;
4085 2673512 : tree vectype;
4086 2673512 : poly_uint64 nunits;
4087 2673512 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4088 2673512 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4089 2673512 : class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4090 2673512 : tree fndecl, new_temp;
4091 2673512 : int j;
4092 2673512 : auto_vec<simd_call_arg_info> arginfo;
4093 2673512 : vec<tree> vargs = vNULL;
4094 2673512 : size_t i, nargs;
4095 2673512 : tree rtype, ratype;
4096 2673512 : vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4097 2673512 : int masked_call_offset = 0;
4098 :
4099 : /* Is STMT a vectorizable call? */
4100 2673512 : gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4101 15310 : if (!stmt)
4102 : return false;
4103 :
4104 15310 : fndecl = gimple_call_fndecl (stmt);
4105 15310 : if (fndecl == NULL_TREE
4106 15310 : && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4107 : {
4108 220 : fndecl = gimple_call_arg (stmt, 0);
4109 220 : gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4110 220 : fndecl = TREE_OPERAND (fndecl, 0);
4111 220 : gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4112 : masked_call_offset = 1;
4113 : }
4114 15090 : if (fndecl == NULL_TREE)
4115 : return false;
4116 :
4117 4875 : struct cgraph_node *node = cgraph_node::get (fndecl);
4118 4875 : if (node == NULL || node->simd_clones == NULL)
4119 : return false;
4120 :
4121 1500 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4122 : return false;
4123 :
4124 1500 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4125 0 : && cost_vec)
4126 : return false;
4127 :
4128 1500 : if (gimple_call_lhs (stmt)
4129 1500 : && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4130 : return false;
4131 :
4132 1500 : gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4133 :
4134 1500 : vectype = SLP_TREE_VECTYPE (slp_node);
4135 :
4136 2673576 : if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4137 : return false;
4138 :
4139 : /* Process function arguments. */
4140 1500 : nargs = gimple_call_num_args (stmt) - masked_call_offset;
4141 :
4142 : /* Bail out if the function has zero arguments. */
4143 1500 : if (nargs == 0)
4144 : return false;
4145 :
4146 1436 : vect_simd_clone_data _data;
4147 1436 : vect_simd_clone_data &data = slp_node->get_data (_data);
4148 1436 : vec<tree>& simd_clone_info = data.simd_clone_info;
4149 1436 : arginfo.reserve (nargs, true);
4150 1436 : auto_vec<slp_tree> slp_op;
4151 1436 : slp_op.safe_grow_cleared (nargs);
4152 :
4153 4101 : for (i = 0; i < nargs; i++)
4154 : {
4155 2665 : simd_call_arg_info thisarginfo;
4156 2665 : affine_iv iv;
4157 2665 : tree op;
4158 :
4159 2665 : thisarginfo.linear_step = 0;
4160 2665 : thisarginfo.align = 0;
4161 2665 : thisarginfo.op = NULL_TREE;
4162 2665 : thisarginfo.simd_lane_linear = false;
4163 :
4164 5330 : int op_no = vect_slp_child_index_for_operand (stmt_info,
4165 2665 : i + masked_call_offset);
4166 5330 : if (!vect_is_simple_use (vinfo, slp_node,
4167 2665 : op_no, &op, &slp_op[i],
4168 : &thisarginfo.dt, &thisarginfo.vectype)
4169 2665 : || thisarginfo.dt == vect_uninitialized_def)
4170 : {
4171 0 : if (dump_enabled_p ())
4172 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4173 : "use not simple.\n");
4174 0 : return false;
4175 : }
4176 :
4177 2665 : if (thisarginfo.dt == vect_constant_def
4178 2665 : || thisarginfo.dt == vect_external_def)
4179 : {
4180 : /* With SLP we determine the vector type of constants/externals
4181 : at analysis time, handling conflicts via
4182 : vect_maybe_update_slp_op_vectype. At transform time
4183 : we have a vector type recorded for SLP. */
4184 704 : gcc_assert (cost_vec
4185 : || thisarginfo.vectype != NULL_TREE);
4186 : if (cost_vec)
4187 573 : thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
4188 573 : TREE_TYPE (op),
4189 : slp_node);
4190 : }
4191 : else
4192 1961 : gcc_assert (thisarginfo.vectype != NULL_TREE);
4193 :
4194 : /* For linear arguments, the analyze phase should have saved
4195 : the base and step. */
4196 2534 : if (!cost_vec
4197 1594 : && i * 3 + 4 <= simd_clone_info.length ()
4198 2744 : && simd_clone_info[i * 3 + 2])
4199 : {
4200 118 : thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
4201 118 : thisarginfo.op = simd_clone_info[i * 3 + 1];
4202 118 : thisarginfo.simd_lane_linear
4203 118 : = (simd_clone_info[i * 3 + 3] == boolean_true_node);
4204 : /* If loop has been peeled for alignment, we need to adjust it. */
4205 118 : tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4206 118 : tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4207 118 : if (n1 != n2 && !thisarginfo.simd_lane_linear)
4208 : {
4209 0 : tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4210 0 : tree step = simd_clone_info[i * 3 + 2];
4211 0 : tree opt = TREE_TYPE (thisarginfo.op);
4212 0 : bias = fold_convert (TREE_TYPE (step), bias);
4213 0 : bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4214 0 : thisarginfo.op
4215 0 : = fold_build2 (POINTER_TYPE_P (opt)
4216 : ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4217 : thisarginfo.op, bias);
4218 : }
4219 : }
4220 2547 : else if (cost_vec
4221 1868 : && thisarginfo.dt != vect_constant_def
4222 1741 : && thisarginfo.dt != vect_external_def
4223 1295 : && loop_vinfo
4224 1290 : && SLP_TREE_LANES (slp_node) == 1
4225 1266 : && TREE_CODE (op) == SSA_NAME
4226 2532 : && simple_iv (loop, loop_containing_stmt (stmt), op,
4227 : &iv, false)
4228 2759 : && tree_fits_shwi_p (iv.step))
4229 : {
4230 212 : thisarginfo.linear_step = tree_to_shwi (iv.step);
4231 212 : thisarginfo.op = iv.base;
4232 : }
4233 2335 : else if ((thisarginfo.dt == vect_constant_def
4234 2335 : || thisarginfo.dt == vect_external_def)
4235 704 : && SLP_TREE_LANES (slp_node) == 1
4236 2641 : && POINTER_TYPE_P (TREE_TYPE (op)))
4237 86 : thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4238 : /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4239 : linear too. */
4240 2665 : if (SLP_TREE_LANES (slp_node) == 1
4241 2221 : && POINTER_TYPE_P (TREE_TYPE (op))
4242 196 : && !thisarginfo.linear_step
4243 112 : && cost_vec
4244 58 : && thisarginfo.dt != vect_constant_def
4245 58 : && thisarginfo.dt != vect_external_def
4246 15 : && loop_vinfo
4247 2680 : && TREE_CODE (op) == SSA_NAME)
4248 15 : vect_simd_lane_linear (op, loop, &thisarginfo);
4249 :
4250 2665 : if (!vectype)
4251 12 : vectype = thisarginfo.vectype;
4252 2665 : arginfo.quick_push (thisarginfo);
4253 : }
4254 :
4255 1436 : poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4256 1436 : unsigned group_size = SLP_TREE_LANES (slp_node);
4257 1436 : unsigned int badness = 0;
4258 1436 : unsigned int badness_inbranch = 0;
4259 1436 : struct cgraph_node *bestn = NULL;
4260 1436 : struct cgraph_node *bestn_inbranch = NULL;
4261 1436 : if (!cost_vec)
4262 362 : bestn = ((loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4263 362 : ? data.clone_inbranch : data.clone);
4264 : else
4265 6196 : for (struct cgraph_node *n = node->simd_clones; n != NULL;
4266 5122 : n = n->simdclone->next_clone)
4267 : {
4268 5122 : unsigned int this_badness = 0;
4269 5122 : unsigned int num_calls;
4270 : /* The number of arguments in the call and the number of parameters in
4271 : the simdclone should match. However, when the simdclone is
4272 : 'inbranch', it could have one more paramater than nargs when using
4273 : an inbranch simdclone to call a non-inbranch call, either in a
4274 : non-masked loop using a all true constant mask, or inside a masked
4275 : loop using it's mask. */
4276 5122 : size_t simd_nargs = n->simdclone->nargs;
4277 5122 : if (!masked_call_offset && n->simdclone->inbranch)
4278 2367 : simd_nargs--;
4279 5122 : if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4280 : &num_calls)
4281 1974 : || (!n->simdclone->inbranch && (masked_call_offset > 0))
4282 1790 : || (nargs != simd_nargs))
4283 3332 : continue;
4284 1790 : if (num_calls != 1)
4285 1142 : this_badness += floor_log2 (num_calls) * 4096;
4286 1790 : if (n->simdclone->inbranch)
4287 771 : this_badness += 8192;
4288 :
4289 : /* If SLP_TREE_VECTYPE has not been set yet pass the general vector
4290 : mode, which for targets that use it will determine what ISA we can
4291 : vectorize this code with. */
4292 1790 : machine_mode vector_mode = vinfo->vector_mode;
4293 1790 : if (vectype)
4294 1790 : vector_mode = TYPE_MODE (vectype);
4295 1790 : int target_badness = targetm.simd_clone.usable (n, vector_mode);
4296 1790 : if (target_badness < 0)
4297 368 : continue;
4298 1422 : this_badness += target_badness * 512;
4299 4192 : for (i = 0; i < nargs; i++)
4300 : {
4301 3018 : switch (n->simdclone->args[i].arg_type)
4302 : {
4303 2088 : case SIMD_CLONE_ARG_TYPE_VECTOR:
4304 2088 : if (VECTOR_BOOLEAN_TYPE_P (n->simdclone->args[i].vector_type))
4305 : /* Vector mask arguments are not supported. */
4306 : i = -1;
4307 2080 : else if (!useless_type_conversion_p
4308 2080 : (n->simdclone->args[i].orig_type,
4309 2080 : TREE_TYPE (gimple_call_arg (stmt,
4310 : i + masked_call_offset))))
4311 : i = -1;
4312 2080 : else if (arginfo[i].dt == vect_constant_def
4313 1973 : || arginfo[i].dt == vect_external_def
4314 3989 : || arginfo[i].linear_step)
4315 399 : this_badness += 64;
4316 : break;
4317 310 : case SIMD_CLONE_ARG_TYPE_UNIFORM:
4318 310 : if ((arginfo[i].dt != vect_constant_def
4319 145 : && arginfo[i].dt != vect_external_def)
4320 410 : || SLP_TREE_LANES (slp_node) != 1)
4321 : i = -1;
4322 : break;
4323 324 : case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4324 324 : case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4325 324 : if (arginfo[i].dt == vect_constant_def
4326 324 : || arginfo[i].dt == vect_external_def
4327 324 : || (arginfo[i].linear_step
4328 324 : != n->simdclone->args[i].linear_step))
4329 : i = -1;
4330 : break;
4331 : case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4332 : case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4333 : case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4334 : case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4335 : case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4336 : case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4337 : /* FORNOW */
4338 : i = -1;
4339 : break;
4340 296 : case SIMD_CLONE_ARG_TYPE_MASK:
4341 296 : if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4342 264 : && n->simdclone->mask_mode != VOIDmode)
4343 : i = -1;
4344 : /* While we can create a traditional data vector from
4345 : an incoming integer mode mask we have no good way to
4346 : force generate an integer mode mask from a traditional
4347 : boolean vector input. */
4348 296 : else if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4349 296 : && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4350 : i = -1;
4351 290 : else if (n->simdclone->mask_mode == VOIDmode
4352 : /* FORNOW we only have partial support for vector-type
4353 : masks that can't hold all of simdlen. */
4354 554 : && (maybe_ne (TYPE_VECTOR_SUBPARTS (n->simdclone->args[i].vector_type),
4355 264 : TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
4356 : /* Verify we can compute the mask argument. */
4357 111 : || !expand_vec_cond_expr_p (n->simdclone->args[i].vector_type,
4358 111 : arginfo[i].vectype)))
4359 : i = -1;
4360 125 : else if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4361 : /* FORNOW we only have partial support for
4362 : integer-type masks that represent the same number
4363 : of lanes as the vectorized mask inputs. */
4364 151 : && maybe_ne (exact_div (n->simdclone->simdlen,
4365 : n->simdclone->args[i].linear_step),
4366 26 : TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4367 : i = -1;
4368 107 : else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4369 107 : && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4370 8 : this_badness += 2048;
4371 : break;
4372 : }
4373 183 : if (i == (size_t) -1)
4374 : break;
4375 2770 : if (n->simdclone->args[i].alignment > arginfo[i].align)
4376 : {
4377 : i = -1;
4378 : break;
4379 : }
4380 2770 : if (arginfo[i].align)
4381 110 : this_badness += (exact_log2 (arginfo[i].align)
4382 160 : - exact_log2 (n->simdclone->args[i].alignment));
4383 : }
4384 1422 : if (i == (size_t) -1)
4385 248 : continue;
4386 1174 : if (masked_call_offset == 0
4387 1067 : && n->simdclone->inbranch
4388 347 : && n->simdclone->nargs > nargs)
4389 : {
4390 347 : gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4391 : SIMD_CLONE_ARG_TYPE_MASK);
4392 : /* Penalize using a masked SIMD clone in a non-masked loop, that is
4393 : not in a branch, as we'd have to construct an all-true mask. */
4394 347 : this_badness += 64;
4395 : }
4396 1174 : if (bestn == NULL || this_badness < badness)
4397 : {
4398 817 : bestn = n;
4399 817 : badness = this_badness;
4400 : }
4401 1174 : if (n->simdclone->inbranch
4402 454 : && (bestn_inbranch == NULL || this_badness < badness_inbranch))
4403 : {
4404 5122 : bestn_inbranch = n;
4405 5122 : badness_inbranch = this_badness;
4406 : }
4407 : }
4408 :
4409 1436 : if (bestn == NULL)
4410 : return false;
4411 :
4412 829 : fndecl = bestn->decl;
4413 829 : nunits = bestn->simdclone->simdlen;
4414 829 : int ncopies = vector_unroll_factor (vf * group_size, nunits);
4415 :
4416 : /* If the function isn't const, only allow it in simd loops where user
4417 : has asserted that at least nunits consecutive iterations can be
4418 : performed using SIMD instructions. */
4419 824 : if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4420 1006 : && gimple_vuse (stmt))
4421 : return false;
4422 :
4423 : /* ncopies is the number of SIMD clone calls we create, since simdlen
4424 : is not necessarily matching nunits of the vector types used, track
4425 : that in ncopies_in. */
4426 829 : int ncopies_in = vect_get_num_vectors (vf * group_size, vectype);
4427 :
4428 : /* Sanity check: make sure that at least one copy of the vectorized stmt
4429 : needs to be generated. */
4430 829 : gcc_assert (ncopies >= 1);
4431 :
4432 829 : if (cost_vec) /* transformation not required. */
4433 : {
4434 1514 : for (unsigned i = 0; i < nargs; ++i)
4435 1047 : if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4436 : {
4437 0 : if (dump_enabled_p ())
4438 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4439 : "incompatible vector types for invariants\n");
4440 0 : return false;
4441 : }
4442 :
4443 467 : if (!bestn_inbranch && loop_vinfo)
4444 : {
4445 248 : if (dump_enabled_p ()
4446 248 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4447 171 : dump_printf_loc (MSG_NOTE, vect_location,
4448 : "can't use a fully-masked loop because no"
4449 : " masked simd clone was available.\n");
4450 248 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4451 : }
4452 :
4453 : /* When the original call is pure or const but the SIMD ABI dictates
4454 : an aggregate return we will have to use a virtual definition and
4455 : in a loop eventually even need to add a virtual PHI. That's
4456 : not straight-forward so allow to fix this up via renaming. */
4457 467 : if (gimple_call_lhs (stmt)
4458 461 : && !gimple_vdef (stmt)
4459 832 : && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4460 27 : vinfo->any_known_not_updated_vssa = true;
4461 : /* ??? For SLP code-gen we end up inserting after the last
4462 : vector argument def rather than at the original call position
4463 : so automagic virtual operand updating doesn't work. */
4464 934 : if (gimple_vuse (stmt))
4465 139 : vinfo->any_known_not_updated_vssa = true;
4466 :
4467 467 : data.clone = bestn;
4468 467 : data.clone_inbranch = bestn_inbranch;
4469 :
4470 467 : simd_clone_info.safe_push (NULL_TREE);
4471 1663 : for (i = 0;
4472 2502 : i < (bestn_inbranch ? bestn_inbranch : bestn)->simdclone->nargs; i++)
4473 : {
4474 1196 : if (loop_vinfo
4475 1190 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
4476 482 : && (bestn_inbranch->simdclone->args[i].arg_type
4477 : == SIMD_CLONE_ARG_TYPE_MASK))
4478 : {
4479 174 : if (masked_call_offset)
4480 : /* When there is an explicit mask we require the
4481 : number of elements to match up. */
4482 49 : vect_record_loop_mask (loop_vinfo,
4483 : &LOOP_VINFO_MASKS (loop_vinfo),
4484 : ncopies_in, vectype, NULL_TREE);
4485 : else
4486 : {
4487 : /* When there is no explicit mask on the call we have
4488 : more relaxed requirements. */
4489 125 : tree masktype;
4490 125 : poly_uint64 callee_nelements;
4491 125 : if (SCALAR_INT_MODE_P (bestn_inbranch->simdclone->mask_mode))
4492 : {
4493 12 : callee_nelements
4494 12 : = exact_div (bestn_inbranch->simdclone->simdlen,
4495 : bestn_inbranch->simdclone->args[i].linear_step);
4496 12 : masktype = get_related_vectype_for_scalar_type
4497 12 : (vinfo->vector_mode, TREE_TYPE (vectype),
4498 : callee_nelements);
4499 : }
4500 : else
4501 : {
4502 113 : masktype = bestn_inbranch->simdclone->args[i].vector_type;
4503 : /* The aarch64 port will add custom attributes to types
4504 : for SVE simdclones which make the types different. We
4505 : should use canonincal types for masks within the
4506 : vectorizer, hence we construct the related vectype
4507 : here. */
4508 113 : masktype
4509 : = build_truth_vector_type_for_mode
4510 113 : (TYPE_VECTOR_SUBPARTS (masktype),
4511 113 : TYPE_MODE (masktype));
4512 113 : callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4513 : }
4514 125 : auto o = vector_unroll_factor (nunits, callee_nelements);
4515 125 : vect_record_loop_mask (loop_vinfo,
4516 : &LOOP_VINFO_MASKS (loop_vinfo),
4517 : ncopies * o, masktype, NULL_TREE);
4518 : }
4519 : }
4520 1022 : else if ((bestn->simdclone->args[i].arg_type
4521 : == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4522 915 : || (bestn->simdclone->args[i].arg_type
4523 : == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)
4524 904 : || (bestn_inbranch
4525 364 : && ((bestn_inbranch->simdclone->args[i].arg_type
4526 : == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4527 364 : || (bestn_inbranch->simdclone->args[i].arg_type
4528 : == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))))
4529 : {
4530 118 : simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
4531 118 : simd_clone_info.safe_push (arginfo[i].op);
4532 202 : tree lst = (POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4533 202 : ? size_type_node : TREE_TYPE (arginfo[i].op));
4534 118 : tree ls = build_int_cst (lst, arginfo[i].linear_step);
4535 118 : simd_clone_info.safe_push (ls);
4536 118 : tree sll = (arginfo[i].simd_lane_linear
4537 118 : ? boolean_true_node : boolean_false_node);
4538 118 : simd_clone_info.safe_push (sll);
4539 : }
4540 : }
4541 :
4542 467 : SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type;
4543 467 : slp_node->data = new vect_simd_clone_data (std::move (_data));
4544 467 : DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4545 : /* ??? We're confused by calls w/o LHS. */
4546 467 : if (SLP_TREE_VECTYPE (slp_node))
4547 461 : vect_model_simple_cost (vinfo, ncopies, slp_node, cost_vec);
4548 467 : return true;
4549 : }
4550 :
4551 : /* Transform. */
4552 :
4553 362 : if (dump_enabled_p ())
4554 246 : dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4555 :
4556 : /* Handle def. */
4557 362 : scalar_dest = gimple_call_lhs (stmt);
4558 362 : vec_dest = NULL_TREE;
4559 362 : rtype = NULL_TREE;
4560 362 : ratype = NULL_TREE;
4561 362 : if (scalar_dest)
4562 : {
4563 356 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
4564 356 : rtype = TREE_TYPE (TREE_TYPE (fndecl));
4565 356 : if (TREE_CODE (rtype) == ARRAY_TYPE)
4566 : {
4567 9 : ratype = rtype;
4568 9 : rtype = TREE_TYPE (ratype);
4569 : }
4570 : }
4571 :
4572 724 : auto_vec<vec<tree> > vec_oprnds;
4573 362 : auto_vec<unsigned> vec_oprnds_i;
4574 362 : vec_oprnds_i.safe_grow_cleared (nargs, true);
4575 362 : vec_oprnds.reserve_exact (nargs);
4576 362 : vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4577 833 : for (j = 0; j < ncopies; ++j)
4578 : {
4579 471 : poly_uint64 callee_nelements;
4580 471 : poly_uint64 caller_nelements;
4581 : /* Build argument list for the vectorized call. */
4582 471 : if (j == 0)
4583 362 : vargs.create (nargs);
4584 : else
4585 109 : vargs.truncate (0);
4586 :
4587 1580 : for (i = 0; i < nargs; i++)
4588 : {
4589 1109 : unsigned int k, l, m, o;
4590 1109 : tree atype;
4591 1109 : tree op = gimple_call_arg (stmt, i + masked_call_offset);
4592 1109 : switch (bestn->simdclone->args[i].arg_type)
4593 : {
4594 820 : case SIMD_CLONE_ARG_TYPE_VECTOR:
4595 820 : atype = bestn->simdclone->args[i].vector_type;
4596 820 : caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4597 820 : callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4598 820 : o = vector_unroll_factor (nunits, callee_nelements);
4599 1870 : for (m = j * o; m < (j + 1) * o; m++)
4600 : {
4601 1050 : if (known_lt (callee_nelements, caller_nelements))
4602 : {
4603 516 : poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4604 258 : if (!constant_multiple_p (caller_nelements,
4605 : callee_nelements, &k))
4606 0 : gcc_unreachable ();
4607 :
4608 258 : gcc_assert ((k & (k - 1)) == 0);
4609 258 : if (m == 0)
4610 : {
4611 57 : vec_oprnds_i[i] = 0;
4612 57 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4613 : }
4614 : else
4615 : {
4616 201 : vec_oprnd0 = arginfo[i].op;
4617 201 : if ((m & (k - 1)) == 0)
4618 72 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4619 : }
4620 258 : arginfo[i].op = vec_oprnd0;
4621 258 : vec_oprnd0
4622 258 : = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4623 258 : bitsize_int (prec),
4624 258 : bitsize_int ((m & (k - 1)) * prec));
4625 258 : gassign *new_stmt
4626 258 : = gimple_build_assign (make_ssa_name (atype),
4627 : vec_oprnd0);
4628 258 : vect_finish_stmt_generation (vinfo, stmt_info,
4629 : new_stmt, gsi);
4630 258 : vargs.safe_push (gimple_assign_lhs (new_stmt));
4631 : }
4632 : else
4633 : {
4634 792 : if (!constant_multiple_p (callee_nelements,
4635 : caller_nelements, &k))
4636 0 : gcc_unreachable ();
4637 792 : gcc_assert ((k & (k - 1)) == 0);
4638 792 : vec<constructor_elt, va_gc> *ctor_elts;
4639 792 : if (k != 1)
4640 14 : vec_alloc (ctor_elts, k);
4641 : else
4642 778 : ctor_elts = NULL;
4643 820 : for (l = 0; l < k; l++)
4644 : {
4645 806 : if (m == 0 && l == 0)
4646 : {
4647 454 : vec_oprnds_i[i] = 0;
4648 454 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4649 : }
4650 : else
4651 352 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4652 806 : arginfo[i].op = vec_oprnd0;
4653 806 : if (k == 1)
4654 : break;
4655 28 : CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4656 : vec_oprnd0);
4657 : }
4658 792 : if (k == 1)
4659 778 : if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4660 : atype))
4661 : {
4662 0 : vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4663 : vec_oprnd0);
4664 0 : gassign *new_stmt
4665 0 : = gimple_build_assign (make_ssa_name (atype),
4666 : vec_oprnd0);
4667 0 : vect_finish_stmt_generation (vinfo, stmt_info,
4668 : new_stmt, gsi);
4669 0 : vargs.safe_push (gimple_get_lhs (new_stmt));
4670 : }
4671 : else
4672 778 : vargs.safe_push (vec_oprnd0);
4673 : else
4674 : {
4675 14 : vec_oprnd0 = build_constructor (atype, ctor_elts);
4676 14 : gassign *new_stmt
4677 14 : = gimple_build_assign (make_ssa_name (atype),
4678 : vec_oprnd0);
4679 14 : vect_finish_stmt_generation (vinfo, stmt_info,
4680 : new_stmt, gsi);
4681 14 : vargs.safe_push (gimple_assign_lhs (new_stmt));
4682 : }
4683 : }
4684 : }
4685 : break;
4686 66 : case SIMD_CLONE_ARG_TYPE_MASK:
4687 66 : if (bestn->simdclone->mask_mode == VOIDmode)
4688 : {
4689 60 : atype = bestn->simdclone->args[i].vector_type;
4690 60 : tree elt_type = TREE_TYPE (atype);
4691 60 : tree one = fold_convert (elt_type, integer_one_node);
4692 60 : tree zero = fold_convert (elt_type, integer_zero_node);
4693 60 : callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4694 60 : caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4695 60 : o = vector_unroll_factor (nunits, callee_nelements);
4696 120 : for (m = j * o; m < (j + 1) * o; m++)
4697 : {
4698 60 : if (maybe_lt (callee_nelements, caller_nelements))
4699 : {
4700 : /* The mask type has fewer elements than simdlen. */
4701 :
4702 : /* FORNOW */
4703 0 : gcc_unreachable ();
4704 : }
4705 60 : else if (known_eq (callee_nelements, caller_nelements))
4706 : {
4707 : /* The SIMD clone function has the same number of
4708 : elements as the current function. */
4709 60 : if (m == 0)
4710 60 : vec_oprnds_i[i] = 0;
4711 60 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4712 60 : if (loop_vinfo
4713 60 : && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4714 : {
4715 0 : vec_loop_masks *loop_masks
4716 : = &LOOP_VINFO_MASKS (loop_vinfo);
4717 0 : tree loop_mask
4718 0 : = vect_get_loop_mask (loop_vinfo, gsi,
4719 : loop_masks, ncopies_in,
4720 0 : vectype, j);
4721 0 : vec_oprnd0
4722 0 : = prepare_vec_mask (loop_vinfo,
4723 0 : TREE_TYPE (loop_mask),
4724 : loop_mask, vec_oprnd0,
4725 : gsi);
4726 0 : loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4727 : loop_mask });
4728 :
4729 : }
4730 60 : vec_oprnd0
4731 60 : = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4732 : build_vector_from_val (atype, one),
4733 : build_vector_from_val (atype, zero));
4734 60 : gassign *new_stmt
4735 60 : = gimple_build_assign (make_ssa_name (atype),
4736 : vec_oprnd0);
4737 60 : vect_finish_stmt_generation (vinfo, stmt_info,
4738 : new_stmt, gsi);
4739 60 : vargs.safe_push (gimple_assign_lhs (new_stmt));
4740 : }
4741 : else
4742 : {
4743 : /* The mask type has more elements than simdlen. */
4744 :
4745 : /* FORNOW */
4746 0 : gcc_unreachable ();
4747 : }
4748 : }
4749 : }
4750 6 : else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4751 : {
4752 6 : atype = bestn->simdclone->args[i].vector_type;
4753 6 : poly_uint64 atype_subparts
4754 6 : = exact_div (bestn->simdclone->simdlen,
4755 : bestn->simdclone->args[i].linear_step);
4756 6 : o = bestn->simdclone->args[i].linear_step;
4757 12 : for (m = j * o; m < (j + 1) * o; m++)
4758 : {
4759 6 : if (m == 0)
4760 6 : vec_oprnds_i[i] = 0;
4761 6 : if (maybe_lt (atype_subparts,
4762 6 : TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4763 : {
4764 : /* The mask argument has fewer elements than the
4765 : input vector. */
4766 : /* FORNOW */
4767 0 : gcc_unreachable ();
4768 : }
4769 6 : else if (known_eq (atype_subparts,
4770 : TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4771 : {
4772 6 : vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4773 6 : if (loop_vinfo
4774 6 : && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4775 : {
4776 1 : vec_loop_masks *loop_masks
4777 : = &LOOP_VINFO_MASKS (loop_vinfo);
4778 1 : tree loop_mask
4779 1 : = vect_get_loop_mask (loop_vinfo, gsi,
4780 : loop_masks, ncopies_in,
4781 : vectype, j);
4782 1 : vec_oprnd0
4783 1 : = prepare_vec_mask (loop_vinfo,
4784 1 : TREE_TYPE (loop_mask),
4785 : loop_mask, vec_oprnd0,
4786 : gsi);
4787 : }
4788 : /* The vector mask argument matches the input
4789 : in the number of lanes, but not necessarily
4790 : in the mode. */
4791 6 : tree st = lang_hooks.types.type_for_mode
4792 6 : (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4793 6 : vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4794 : vec_oprnd0);
4795 6 : gassign *new_stmt
4796 6 : = gimple_build_assign (make_ssa_name (st),
4797 : vec_oprnd0);
4798 6 : vect_finish_stmt_generation (vinfo, stmt_info,
4799 : new_stmt, gsi);
4800 6 : if (!types_compatible_p (atype, st))
4801 : {
4802 6 : new_stmt
4803 6 : = gimple_build_assign (make_ssa_name (atype),
4804 : NOP_EXPR,
4805 : gimple_assign_lhs
4806 : (new_stmt));
4807 6 : vect_finish_stmt_generation (vinfo, stmt_info,
4808 : new_stmt, gsi);
4809 : }
4810 6 : vargs.safe_push (gimple_assign_lhs (new_stmt));
4811 : }
4812 : else
4813 : {
4814 : /* The mask argument has more elements than the
4815 : input vector. */
4816 : /* FORNOW */
4817 0 : gcc_unreachable ();
4818 : }
4819 : }
4820 : }
4821 : else
4822 0 : gcc_unreachable ();
4823 : break;
4824 102 : case SIMD_CLONE_ARG_TYPE_UNIFORM:
4825 102 : vargs.safe_push (op);
4826 102 : break;
4827 121 : case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4828 121 : case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4829 121 : if (j == 0)
4830 : {
4831 118 : gimple_seq stmts;
4832 118 : arginfo[i].op
4833 118 : = force_gimple_operand (unshare_expr (arginfo[i].op),
4834 : &stmts, true, NULL_TREE);
4835 118 : if (stmts != NULL)
4836 : {
4837 0 : basic_block new_bb;
4838 0 : edge pe = loop_preheader_edge (loop);
4839 0 : new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4840 0 : gcc_assert (!new_bb);
4841 : }
4842 118 : if (arginfo[i].simd_lane_linear)
4843 : {
4844 6 : vargs.safe_push (arginfo[i].op);
4845 6 : break;
4846 : }
4847 112 : tree phi_res = copy_ssa_name (op);
4848 112 : gphi *new_phi = create_phi_node (phi_res, loop->header);
4849 112 : add_phi_arg (new_phi, arginfo[i].op,
4850 : loop_preheader_edge (loop), UNKNOWN_LOCATION);
4851 112 : enum tree_code code
4852 196 : = POINTER_TYPE_P (TREE_TYPE (op))
4853 112 : ? POINTER_PLUS_EXPR : PLUS_EXPR;
4854 196 : tree type = POINTER_TYPE_P (TREE_TYPE (op))
4855 196 : ? sizetype : TREE_TYPE (op);
4856 112 : poly_widest_int cst
4857 112 : = wi::mul (bestn->simdclone->args[i].linear_step,
4858 112 : ncopies * nunits);
4859 112 : tree tcst = wide_int_to_tree (type, cst);
4860 112 : tree phi_arg = copy_ssa_name (op);
4861 112 : gassign *new_stmt
4862 112 : = gimple_build_assign (phi_arg, code, phi_res, tcst);
4863 112 : gimple_stmt_iterator si = gsi_after_labels (loop->header);
4864 112 : gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4865 112 : add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4866 : UNKNOWN_LOCATION);
4867 112 : arginfo[i].op = phi_res;
4868 112 : vargs.safe_push (phi_res);
4869 112 : }
4870 : else
4871 : {
4872 3 : enum tree_code code
4873 6 : = POINTER_TYPE_P (TREE_TYPE (op))
4874 3 : ? POINTER_PLUS_EXPR : PLUS_EXPR;
4875 6 : tree type = POINTER_TYPE_P (TREE_TYPE (op))
4876 6 : ? sizetype : TREE_TYPE (op);
4877 3 : poly_widest_int cst
4878 3 : = wi::mul (bestn->simdclone->args[i].linear_step,
4879 3 : j * nunits);
4880 3 : tree tcst = wide_int_to_tree (type, cst);
4881 3 : new_temp = make_ssa_name (TREE_TYPE (op));
4882 3 : gassign *new_stmt
4883 6 : = gimple_build_assign (new_temp, code,
4884 3 : arginfo[i].op, tcst);
4885 3 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4886 3 : vargs.safe_push (new_temp);
4887 3 : }
4888 : break;
4889 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4890 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4891 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4892 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4893 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4894 0 : case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4895 0 : default:
4896 0 : gcc_unreachable ();
4897 : }
4898 : }
4899 :
4900 471 : if (masked_call_offset == 0
4901 405 : && bestn->simdclone->inbranch
4902 13 : && bestn->simdclone->nargs > nargs)
4903 : {
4904 13 : unsigned long m, o;
4905 13 : size_t mask_i = bestn->simdclone->nargs - 1;
4906 13 : tree mask;
4907 13 : gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4908 : SIMD_CLONE_ARG_TYPE_MASK);
4909 :
4910 13 : tree mask_argtype = bestn->simdclone->args[mask_i].vector_type;
4911 13 : tree mask_vectype;
4912 13 : if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4913 : {
4914 2 : callee_nelements = exact_div (bestn->simdclone->simdlen,
4915 : bestn->simdclone->args[i].linear_step);
4916 2 : mask_vectype = get_related_vectype_for_scalar_type
4917 2 : (vinfo->vector_mode, TREE_TYPE (vectype), callee_nelements);
4918 : }
4919 : else
4920 : {
4921 11 : mask_vectype = mask_argtype;
4922 11 : callee_nelements = TYPE_VECTOR_SUBPARTS (mask_vectype);
4923 : }
4924 13 : o = vector_unroll_factor (nunits, callee_nelements);
4925 26 : for (m = j * o; m < (j + 1) * o; m++)
4926 : {
4927 13 : if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4928 : {
4929 1 : vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4930 1 : mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4931 : ncopies * o, mask_vectype, m);
4932 : }
4933 : else
4934 12 : mask = vect_build_all_ones_mask (vinfo, stmt_info,
4935 : mask_argtype);
4936 :
4937 13 : gassign *new_stmt;
4938 13 : if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4939 : {
4940 : /* This means we are dealing with integer mask modes.
4941 : First convert to an integer type with the same size as
4942 : the current vector type. */
4943 2 : unsigned HOST_WIDE_INT intermediate_size
4944 2 : = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4945 2 : tree mid_int_type =
4946 2 : build_nonstandard_integer_type (intermediate_size, 1);
4947 2 : mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4948 2 : new_stmt
4949 2 : = gimple_build_assign (make_ssa_name (mid_int_type),
4950 : mask);
4951 2 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4952 : /* Then zero-extend to the mask mode. */
4953 2 : mask = fold_build1 (NOP_EXPR, mask_argtype,
4954 : gimple_get_lhs (new_stmt));
4955 : }
4956 11 : else if (bestn->simdclone->mask_mode == VOIDmode)
4957 11 : mask = build3 (VEC_COND_EXPR, mask_argtype, mask,
4958 : build_one_cst (mask_argtype),
4959 : build_zero_cst (mask_argtype));
4960 : else
4961 0 : gcc_unreachable ();
4962 :
4963 13 : new_stmt = gimple_build_assign (make_ssa_name (mask_argtype),
4964 : mask);
4965 13 : vect_finish_stmt_generation (vinfo, stmt_info,
4966 : new_stmt, gsi);
4967 13 : mask = gimple_assign_lhs (new_stmt);
4968 13 : vargs.safe_push (mask);
4969 : }
4970 : }
4971 :
4972 471 : gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4973 471 : if (vec_dest)
4974 : {
4975 465 : gcc_assert (ratype
4976 : || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4977 465 : if (ratype)
4978 15 : new_temp = create_tmp_var (ratype);
4979 450 : else if (useless_type_conversion_p (vectype, rtype))
4980 428 : new_temp = make_ssa_name (vec_dest, new_call);
4981 : else
4982 22 : new_temp = make_ssa_name (rtype, new_call);
4983 465 : gimple_call_set_lhs (new_call, new_temp);
4984 : }
4985 471 : vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4986 471 : gimple *new_stmt = new_call;
4987 :
4988 471 : if (vec_dest)
4989 : {
4990 465 : if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4991 : {
4992 21 : unsigned int k, l;
4993 42 : poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4994 42 : poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4995 21 : k = vector_unroll_factor (nunits,
4996 : TYPE_VECTOR_SUBPARTS (vectype));
4997 21 : gcc_assert ((k & (k - 1)) == 0);
4998 75 : for (l = 0; l < k; l++)
4999 : {
5000 54 : tree t;
5001 54 : if (ratype)
5002 : {
5003 42 : t = build_fold_addr_expr (new_temp);
5004 42 : t = build2 (MEM_REF, vectype, t,
5005 42 : build_int_cst (TREE_TYPE (t), l * bytes));
5006 : }
5007 : else
5008 12 : t = build3 (BIT_FIELD_REF, vectype, new_temp,
5009 12 : bitsize_int (prec), bitsize_int (l * prec));
5010 54 : new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
5011 54 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5012 :
5013 54 : SLP_TREE_VEC_DEFS (slp_node)
5014 54 : .quick_push (gimple_assign_lhs (new_stmt));
5015 : }
5016 :
5017 21 : if (ratype)
5018 15 : vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
5019 21 : continue;
5020 21 : }
5021 444 : else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
5022 : {
5023 16 : unsigned int k;
5024 16 : if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
5025 16 : TYPE_VECTOR_SUBPARTS (rtype), &k))
5026 0 : gcc_unreachable ();
5027 16 : gcc_assert ((k & (k - 1)) == 0);
5028 16 : if ((j & (k - 1)) == 0)
5029 8 : vec_alloc (ret_ctor_elts, k);
5030 16 : if (ratype)
5031 : {
5032 0 : unsigned int m, o;
5033 0 : o = vector_unroll_factor (nunits,
5034 : TYPE_VECTOR_SUBPARTS (rtype));
5035 0 : for (m = 0; m < o; m++)
5036 : {
5037 0 : tree tem = build4 (ARRAY_REF, rtype, new_temp,
5038 0 : size_int (m), NULL_TREE, NULL_TREE);
5039 0 : new_stmt = gimple_build_assign (make_ssa_name (rtype),
5040 : tem);
5041 0 : vect_finish_stmt_generation (vinfo, stmt_info,
5042 : new_stmt, gsi);
5043 0 : CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
5044 : gimple_assign_lhs (new_stmt));
5045 : }
5046 0 : vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
5047 : }
5048 : else
5049 16 : CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
5050 16 : if ((j & (k - 1)) != k - 1)
5051 8 : continue;
5052 8 : vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
5053 8 : new_stmt
5054 8 : = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
5055 8 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5056 :
5057 8 : SLP_TREE_VEC_DEFS (slp_node)
5058 8 : .quick_push (gimple_assign_lhs (new_stmt));
5059 8 : continue;
5060 8 : }
5061 428 : else if (ratype)
5062 : {
5063 0 : tree t = build_fold_addr_expr (new_temp);
5064 0 : t = build2 (MEM_REF, vectype, t,
5065 0 : build_int_cst (TREE_TYPE (t), 0));
5066 0 : new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
5067 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5068 0 : vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
5069 : }
5070 428 : else if (!useless_type_conversion_p (vectype, rtype))
5071 : {
5072 0 : vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
5073 0 : new_stmt
5074 0 : = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
5075 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5076 : }
5077 : }
5078 :
5079 434 : if (gimple_get_lhs (new_stmt))
5080 428 : SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
5081 : }
5082 :
5083 1159 : for (i = 0; i < nargs; ++i)
5084 : {
5085 797 : vec<tree> oprndsi = vec_oprnds[i];
5086 797 : oprndsi.release ();
5087 : }
5088 362 : vargs.release ();
5089 :
5090 : /* Mark the clone as no longer being a candidate for GC. */
5091 362 : bestn->gc_candidate = false;
5092 :
5093 362 : return true;
5094 1436 : }
5095 :
5096 :
5097 : /* Function vect_gen_widened_results_half
5098 :
5099 : Create a vector stmt whose code, type, number of arguments, and result
5100 : variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5101 : VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5102 : In the case that CODE is a CALL_EXPR, this means that a call to DECL
5103 : needs to be created (DECL is a function-decl of a target-builtin).
5104 : STMT_INFO is the original scalar stmt that we are vectorizing. */
5105 :
5106 : static gimple *
5107 31704 : vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
5108 : tree vec_oprnd0, tree vec_oprnd1, int op_type,
5109 : tree vec_dest, gimple_stmt_iterator *gsi,
5110 : stmt_vec_info stmt_info)
5111 : {
5112 31704 : gimple *new_stmt;
5113 31704 : tree new_temp;
5114 :
5115 : /* Generate half of the widened result: */
5116 31704 : if (op_type != binary_op)
5117 30594 : vec_oprnd1 = NULL;
5118 31704 : new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
5119 31704 : new_temp = make_ssa_name (vec_dest, new_stmt);
5120 31704 : gimple_set_lhs (new_stmt, new_temp);
5121 31704 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5122 :
5123 31704 : return new_stmt;
5124 : }
5125 :
5126 :
5127 : /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5128 : For multi-step conversions store the resulting vectors and call the function
5129 : recursively. When NARROW_SRC_P is true, there's still a conversion after
5130 : narrowing, don't store the vectors in the SLP_NODE or in vector info of
5131 : the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5132 :
5133 : static void
5134 12038 : vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
5135 : int multi_step_cvt,
5136 : stmt_vec_info stmt_info,
5137 : vec<tree> &vec_dsts,
5138 : gimple_stmt_iterator *gsi,
5139 : slp_tree slp_node, code_helper code,
5140 : bool narrow_src_p)
5141 : {
5142 12038 : unsigned int i;
5143 12038 : tree vop0, vop1, new_tmp, vec_dest;
5144 :
5145 12038 : vec_dest = vec_dsts.pop ();
5146 :
5147 28485 : for (i = 0; i < vec_oprnds->length (); i += 2)
5148 : {
5149 : /* Create demotion operation. */
5150 16447 : vop0 = (*vec_oprnds)[i];
5151 16447 : vop1 = (*vec_oprnds)[i + 1];
5152 16447 : gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
5153 16447 : new_tmp = make_ssa_name (vec_dest, new_stmt);
5154 16447 : gimple_set_lhs (new_stmt, new_tmp);
5155 16447 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5156 16447 : if (multi_step_cvt || narrow_src_p)
5157 : /* Store the resulting vector for next recursive call,
5158 : or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5159 6745 : (*vec_oprnds)[i/2] = new_tmp;
5160 : else
5161 : {
5162 : /* This is the last step of the conversion sequence. Store the
5163 : vectors in SLP_NODE. */
5164 9702 : slp_node->push_vec_def (new_stmt);
5165 : }
5166 : }
5167 :
5168 : /* For multi-step demotion operations we first generate demotion operations
5169 : from the source type to the intermediate types, and then combine the
5170 : results (stored in VEC_OPRNDS) in demotion operation to the destination
5171 : type. */
5172 12038 : if (multi_step_cvt)
5173 : {
5174 : /* At each level of recursion we have half of the operands we had at the
5175 : previous level. */
5176 2998 : vec_oprnds->truncate ((i+1)/2);
5177 2998 : vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5178 : multi_step_cvt - 1,
5179 : stmt_info, vec_dsts, gsi,
5180 2998 : slp_node, VEC_PACK_TRUNC_EXPR,
5181 : narrow_src_p);
5182 : }
5183 :
5184 12038 : vec_dsts.quick_push (vec_dest);
5185 12038 : }
5186 :
5187 :
5188 : /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5189 : and VEC_OPRNDS1, for a binary operation associated with scalar statement
5190 : STMT_INFO. For multi-step conversions store the resulting vectors and
5191 : call the function recursively. */
5192 :
5193 : static void
5194 11549 : vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5195 : vec<tree> *vec_oprnds0,
5196 : vec<tree> *vec_oprnds1,
5197 : stmt_vec_info stmt_info, tree vec_dest,
5198 : gimple_stmt_iterator *gsi,
5199 : code_helper ch1,
5200 : code_helper ch2, int op_type)
5201 : {
5202 11549 : int i;
5203 11549 : tree vop0, vop1, new_tmp1, new_tmp2;
5204 11549 : gimple *new_stmt1, *new_stmt2;
5205 11549 : vec<tree> vec_tmp = vNULL;
5206 :
5207 11549 : vec_tmp.create (vec_oprnds0->length () * 2);
5208 38950 : FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5209 : {
5210 15852 : if (op_type == binary_op)
5211 555 : vop1 = (*vec_oprnds1)[i];
5212 : else
5213 : vop1 = NULL_TREE;
5214 :
5215 : /* Generate the two halves of promotion operation. */
5216 15852 : new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5217 : op_type, vec_dest, gsi,
5218 : stmt_info);
5219 15852 : new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5220 : op_type, vec_dest, gsi,
5221 : stmt_info);
5222 15852 : if (is_gimple_call (new_stmt1))
5223 : {
5224 0 : new_tmp1 = gimple_call_lhs (new_stmt1);
5225 0 : new_tmp2 = gimple_call_lhs (new_stmt2);
5226 : }
5227 : else
5228 : {
5229 15852 : new_tmp1 = gimple_assign_lhs (new_stmt1);
5230 15852 : new_tmp2 = gimple_assign_lhs (new_stmt2);
5231 : }
5232 :
5233 : /* Store the results for the next step. */
5234 15852 : vec_tmp.quick_push (new_tmp1);
5235 15852 : vec_tmp.quick_push (new_tmp2);
5236 : }
5237 :
5238 11549 : vec_oprnds0->release ();
5239 11549 : *vec_oprnds0 = vec_tmp;
5240 11549 : }
5241 :
5242 : /* Create vectorized promotion stmts for widening stmts using only half the
5243 : potential vector size for input. */
5244 : static void
5245 14 : vect_create_half_widening_stmts (vec_info *vinfo,
5246 : vec<tree> *vec_oprnds0,
5247 : vec<tree> *vec_oprnds1,
5248 : stmt_vec_info stmt_info, tree vec_dest,
5249 : gimple_stmt_iterator *gsi,
5250 : code_helper code1,
5251 : int op_type)
5252 : {
5253 14 : int i;
5254 14 : tree vop0, vop1;
5255 14 : gimple *new_stmt1;
5256 14 : gimple *new_stmt2;
5257 14 : gimple *new_stmt3;
5258 14 : vec<tree> vec_tmp = vNULL;
5259 :
5260 14 : vec_tmp.create (vec_oprnds0->length ());
5261 28 : FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5262 : {
5263 14 : tree new_tmp1, new_tmp2, new_tmp3, out_type;
5264 :
5265 14 : gcc_assert (op_type == binary_op);
5266 14 : vop1 = (*vec_oprnds1)[i];
5267 :
5268 : /* Widen the first vector input. */
5269 14 : out_type = TREE_TYPE (vec_dest);
5270 14 : new_tmp1 = make_ssa_name (out_type);
5271 14 : new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5272 14 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5273 14 : if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5274 : {
5275 : /* Widen the second vector input. */
5276 14 : new_tmp2 = make_ssa_name (out_type);
5277 14 : new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5278 14 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5279 : /* Perform the operation. With both vector inputs widened. */
5280 14 : new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5281 : }
5282 : else
5283 : {
5284 : /* Perform the operation. With the single vector input widened. */
5285 0 : new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5286 : }
5287 :
5288 14 : new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5289 14 : gimple_assign_set_lhs (new_stmt3, new_tmp3);
5290 14 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5291 :
5292 : /* Store the results for the next step. */
5293 14 : vec_tmp.quick_push (new_tmp3);
5294 : }
5295 :
5296 14 : vec_oprnds0->release ();
5297 14 : *vec_oprnds0 = vec_tmp;
5298 14 : }
5299 :
5300 :
5301 : /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5302 : If COST_VEC is passed, calculate costs but don't change anything,
5303 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
5304 : it, and insert it at GSI.
5305 : Return true if STMT_INFO is vectorizable in this way. */
5306 :
5307 : static bool
5308 2695519 : vectorizable_conversion (vec_info *vinfo,
5309 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5310 : slp_tree slp_node,
5311 : stmt_vector_for_cost *cost_vec)
5312 : {
5313 2695519 : tree vec_dest, cvt_op = NULL_TREE;
5314 2695519 : tree scalar_dest;
5315 2695519 : tree op0, op1 = NULL_TREE;
5316 2695519 : tree_code tc1;
5317 2695519 : code_helper code, code1, code2;
5318 2695519 : code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5319 2695519 : tree new_temp;
5320 2695519 : enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5321 2695519 : poly_uint64 nunits_in;
5322 2695519 : poly_uint64 nunits_out;
5323 2695519 : tree vectype_out, vectype_in;
5324 2695519 : int i;
5325 2695519 : tree lhs_type, rhs_type;
5326 : /* For conversions between floating point and integer, there're 2 NARROW
5327 : cases. NARROW_SRC is for FLOAT_EXPR, means
5328 : integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5329 : This is safe when the range of the source integer can fit into the lower
5330 : precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5331 : floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5332 : For other conversions, when there's narrowing, NARROW_DST is used as
5333 : default. */
5334 2695519 : enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5335 2695519 : vec<tree> vec_oprnds0 = vNULL;
5336 2695519 : vec<tree> vec_oprnds1 = vNULL;
5337 2695519 : tree vop0;
5338 2695519 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5339 2695519 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5340 2695519 : int multi_step_cvt = 0;
5341 2695519 : vec<tree> interm_types = vNULL;
5342 2695519 : tree intermediate_type, cvt_type = NULL_TREE;
5343 2695519 : int op_type;
5344 2695519 : unsigned short fltsz;
5345 :
5346 : /* Is STMT a vectorizable conversion? */
5347 :
5348 2695519 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5349 : return false;
5350 :
5351 2695519 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5352 232992 : && cost_vec)
5353 : return false;
5354 :
5355 2462527 : gimple* stmt = stmt_info->stmt;
5356 2462527 : if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5357 : return false;
5358 :
5359 2404659 : if (gimple_get_lhs (stmt) == NULL_TREE
5360 2404659 : || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5361 811619 : return false;
5362 :
5363 1593040 : if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5364 : return false;
5365 :
5366 1593040 : if (is_gimple_assign (stmt))
5367 : {
5368 1581103 : code = gimple_assign_rhs_code (stmt);
5369 1581103 : op_type = TREE_CODE_LENGTH ((tree_code) code);
5370 : }
5371 11937 : else if (gimple_call_internal_p (stmt))
5372 : {
5373 7864 : code = gimple_call_internal_fn (stmt);
5374 7864 : op_type = gimple_call_num_args (stmt);
5375 : }
5376 : else
5377 : return false;
5378 :
5379 1588967 : bool widen_arith = (code == WIDEN_MULT_EXPR
5380 1586637 : || code == WIDEN_LSHIFT_EXPR
5381 3175604 : || widening_fn_p (code));
5382 :
5383 1586637 : if (!widen_arith
5384 1586637 : && !CONVERT_EXPR_CODE_P (code)
5385 1424677 : && code != FIX_TRUNC_EXPR
5386 1422941 : && code != FLOAT_EXPR)
5387 : return false;
5388 :
5389 : /* Check types of lhs and rhs. */
5390 184275 : scalar_dest = gimple_get_lhs (stmt);
5391 184275 : lhs_type = TREE_TYPE (scalar_dest);
5392 184275 : vectype_out = SLP_TREE_VECTYPE (slp_node);
5393 :
5394 : /* Check the operands of the operation. */
5395 184275 : slp_tree slp_op0, slp_op1 = NULL;
5396 184275 : if (!vect_is_simple_use (vinfo, slp_node,
5397 : 0, &op0, &slp_op0, &dt[0], &vectype_in))
5398 : {
5399 0 : if (dump_enabled_p ())
5400 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5401 : "use not simple.\n");
5402 0 : return false;
5403 : }
5404 :
5405 184275 : rhs_type = TREE_TYPE (op0);
5406 182539 : if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5407 351907 : && !((INTEGRAL_TYPE_P (lhs_type)
5408 154404 : && INTEGRAL_TYPE_P (rhs_type))
5409 : || (SCALAR_FLOAT_TYPE_P (lhs_type)
5410 8815 : && SCALAR_FLOAT_TYPE_P (rhs_type))))
5411 : return false;
5412 :
5413 179862 : if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5414 159717 : && INTEGRAL_TYPE_P (lhs_type)
5415 312515 : && !type_has_mode_precision_p (lhs_type))
5416 : {
5417 447 : if (dump_enabled_p ())
5418 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5419 : "type conversion to bit-precision unsupported\n");
5420 447 : return false;
5421 : }
5422 :
5423 179415 : if (op_type == binary_op)
5424 : {
5425 2330 : gcc_assert (code == WIDEN_MULT_EXPR
5426 : || code == WIDEN_LSHIFT_EXPR
5427 : || widening_fn_p (code));
5428 :
5429 2330 : op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5430 0 : gimple_call_arg (stmt, 0);
5431 2330 : tree vectype1_in;
5432 2330 : if (!vect_is_simple_use (vinfo, slp_node, 1,
5433 : &op1, &slp_op1, &dt[1], &vectype1_in))
5434 : {
5435 0 : if (dump_enabled_p ())
5436 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5437 : "use not simple.\n");
5438 0 : return false;
5439 : }
5440 : /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5441 : OP1. */
5442 2330 : if (!vectype_in)
5443 101 : vectype_in = vectype1_in;
5444 : }
5445 :
5446 : /* If op0 is an external or constant def, infer the vector type
5447 : from the scalar type. */
5448 179415 : if (!vectype_in)
5449 20016 : vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5450 179415 : if (!cost_vec)
5451 22836 : gcc_assert (vectype_in);
5452 179415 : if (!vectype_in)
5453 : {
5454 258 : if (dump_enabled_p ())
5455 2 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5456 : "no vectype for scalar type %T\n", rhs_type);
5457 :
5458 258 : return false;
5459 : }
5460 :
5461 358314 : if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5462 179157 : != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5463 : {
5464 229 : if (dump_enabled_p ())
5465 36 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5466 : "can't convert between boolean and non "
5467 : "boolean vectors %T\n", rhs_type);
5468 :
5469 229 : return false;
5470 : }
5471 :
5472 178928 : nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5473 178928 : nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5474 178928 : if (known_eq (nunits_out, nunits_in))
5475 85186 : if (widen_arith)
5476 : modifier = WIDEN;
5477 : else
5478 178928 : modifier = NONE;
5479 93742 : else if (multiple_p (nunits_out, nunits_in))
5480 : modifier = NARROW_DST;
5481 : else
5482 : {
5483 51936 : gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5484 : modifier = WIDEN;
5485 : }
5486 :
5487 178928 : bool found_mode = false;
5488 178928 : scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5489 178928 : scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5490 178928 : opt_scalar_mode rhs_mode_iter;
5491 178928 : auto_vec<std::pair<tree, tree_code>, 2> converts;
5492 178928 : bool evenodd_ok = false;
5493 :
5494 : /* Supportable by target? */
5495 178928 : switch (modifier)
5496 : {
5497 84943 : case NONE:
5498 84943 : if (code != FIX_TRUNC_EXPR
5499 83901 : && code != FLOAT_EXPR
5500 159869 : && !CONVERT_EXPR_CODE_P (code))
5501 : return false;
5502 84943 : gcc_assert (code.is_tree_code ());
5503 84943 : if (supportable_indirect_convert_operation (code,
5504 : vectype_out, vectype_in,
5505 : converts, op0, slp_op0))
5506 : {
5507 18985 : gcc_assert (converts.length () <= 2);
5508 18985 : if (converts.length () == 1)
5509 18911 : code1 = converts[0].second;
5510 : else
5511 : {
5512 74 : cvt_type = NULL_TREE;
5513 74 : multi_step_cvt = converts.length () - 1;
5514 74 : codecvt1 = converts[0].second;
5515 74 : code1 = converts[1].second;
5516 74 : interm_types.safe_push (converts[0].first);
5517 : }
5518 : break;
5519 : }
5520 :
5521 : /* FALLTHRU */
5522 65958 : unsupported:
5523 72814 : if (dump_enabled_p ())
5524 5992 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5525 : "conversion not supported by target.\n");
5526 : return false;
5527 :
5528 52179 : case WIDEN:
5529 52179 : if (known_eq (nunits_in, nunits_out))
5530 : {
5531 486 : if (!(code.is_tree_code ()
5532 243 : && supportable_half_widening_operation ((tree_code) code,
5533 : vectype_out, vectype_in,
5534 : &tc1)))
5535 74 : goto unsupported;
5536 169 : code1 = tc1;
5537 169 : gcc_assert (!(multi_step_cvt && op_type == binary_op));
5538 : break;
5539 : }
5540 : /* Elements in a vector can only be reordered if used in a reduction
5541 : operation only. */
5542 51936 : if (code == WIDEN_MULT_EXPR
5543 2087 : && loop_vinfo
5544 2038 : && !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)
5545 : /* For a SLP reduction we cannot swizzle lanes, detecting a
5546 : reduction chain isn't possible here. */
5547 53952 : && SLP_TREE_LANES (slp_node) == 1)
5548 : {
5549 : /* ??? There is no way to look for SLP uses, so work on
5550 : the stmt and what the stmt-based cycle detection gives us. */
5551 1914 : tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
5552 1914 : stmt_vec_info use_stmt_info
5553 1914 : = lhs ? loop_vinfo->lookup_single_use (lhs) : NULL;
5554 1914 : if (use_stmt_info
5555 1765 : && STMT_VINFO_REDUC_DEF (use_stmt_info))
5556 51936 : evenodd_ok = true;
5557 : }
5558 51936 : if (supportable_widening_operation (code, vectype_out, vectype_in,
5559 : evenodd_ok, &code1,
5560 : &code2, &multi_step_cvt,
5561 : &interm_types))
5562 : {
5563 : /* Binary widening operation can only be supported directly by the
5564 : architecture. */
5565 50010 : gcc_assert (!(multi_step_cvt && op_type == binary_op));
5566 : break;
5567 : }
5568 :
5569 1926 : if (code != FLOAT_EXPR
5570 2298 : || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5571 1740 : goto unsupported;
5572 :
5573 186 : fltsz = GET_MODE_SIZE (lhs_mode);
5574 273 : FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5575 : {
5576 273 : rhs_mode = rhs_mode_iter.require ();
5577 546 : if (GET_MODE_SIZE (rhs_mode) > fltsz)
5578 : break;
5579 :
5580 273 : cvt_type
5581 273 : = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5582 273 : cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5583 273 : if (cvt_type == NULL_TREE)
5584 0 : goto unsupported;
5585 :
5586 546 : if (GET_MODE_SIZE (rhs_mode) == fltsz)
5587 : {
5588 81 : tc1 = ERROR_MARK;
5589 81 : gcc_assert (code.is_tree_code ());
5590 81 : if (!supportable_convert_operation ((tree_code) code, vectype_out,
5591 : cvt_type, &tc1))
5592 22 : goto unsupported;
5593 59 : codecvt1 = tc1;
5594 : }
5595 192 : else if (!supportable_widening_operation (code, vectype_out,
5596 : cvt_type, evenodd_ok,
5597 : &codecvt1,
5598 : &codecvt2, &multi_step_cvt,
5599 : &interm_types))
5600 87 : continue;
5601 : else
5602 105 : gcc_assert (multi_step_cvt == 0);
5603 :
5604 164 : if (supportable_widening_operation (NOP_EXPR, cvt_type,
5605 : vectype_in, evenodd_ok, &code1,
5606 : &code2, &multi_step_cvt,
5607 : &interm_types))
5608 : {
5609 : found_mode = true;
5610 : break;
5611 : }
5612 : }
5613 :
5614 164 : if (!found_mode)
5615 0 : goto unsupported;
5616 :
5617 328 : if (GET_MODE_SIZE (rhs_mode) == fltsz)
5618 59 : codecvt2 = ERROR_MARK;
5619 : else
5620 : {
5621 105 : multi_step_cvt++;
5622 105 : interm_types.safe_push (cvt_type);
5623 105 : cvt_type = NULL_TREE;
5624 : }
5625 : break;
5626 :
5627 41806 : case NARROW_DST:
5628 41806 : gcc_assert (op_type == unary_op);
5629 41806 : if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5630 : &code1, &multi_step_cvt,
5631 : &interm_types))
5632 : break;
5633 :
5634 15444 : if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5635 984 : goto unsupported;
5636 :
5637 4164 : if (code == FIX_TRUNC_EXPR)
5638 : {
5639 107 : cvt_type
5640 107 : = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5641 107 : cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5642 107 : if (cvt_type == NULL_TREE)
5643 0 : goto unsupported;
5644 107 : if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5645 : &tc1))
5646 105 : codecvt1 = tc1;
5647 : else
5648 2 : goto unsupported;
5649 105 : if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5650 : &code1, &multi_step_cvt,
5651 : &interm_types))
5652 : break;
5653 : }
5654 : /* If op0 can be represented with low precision integer,
5655 : truncate it to cvt_type and the do FLOAT_EXPR. */
5656 4057 : else if (code == FLOAT_EXPR)
5657 : {
5658 137 : if (cost_vec)
5659 : {
5660 132 : wide_int op_min_value, op_max_value;
5661 132 : tree def;
5662 :
5663 : /* ??? Merge ranges in case of more than one lane. */
5664 132 : if (SLP_TREE_LANES (slp_op0) != 1
5665 130 : || !(def = vect_get_slp_scalar_def (slp_op0, 0))
5666 262 : || !vect_get_range_info (def, &op_min_value, &op_max_value))
5667 106 : goto unsupported;
5668 :
5669 26 : if ((wi::min_precision (op_max_value, SIGNED)
5670 26 : > GET_MODE_BITSIZE (lhs_mode))
5671 26 : || (wi::min_precision (op_min_value, SIGNED)
5672 24 : > GET_MODE_BITSIZE (lhs_mode)))
5673 2 : goto unsupported;
5674 132 : }
5675 :
5676 29 : cvt_type
5677 29 : = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5678 29 : cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5679 29 : if (cvt_type == NULL_TREE)
5680 0 : goto unsupported;
5681 29 : if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5682 : &code1, &multi_step_cvt,
5683 : &interm_types))
5684 2 : goto unsupported;
5685 27 : if (supportable_convert_operation ((tree_code) code, vectype_out,
5686 : cvt_type, &tc1))
5687 : {
5688 27 : codecvt1 = tc1;
5689 27 : modifier = NARROW_SRC;
5690 27 : break;
5691 : }
5692 : }
5693 :
5694 3924 : goto unsupported;
5695 :
5696 : default:
5697 : gcc_unreachable ();
5698 : }
5699 :
5700 106114 : if (modifier == WIDEN
5701 106114 : && loop_vinfo
5702 49189 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5703 127171 : && (code1 == VEC_WIDEN_MULT_EVEN_EXPR
5704 21035 : || widening_evenodd_fn_p (code1)))
5705 : {
5706 22 : if (dump_enabled_p ())
5707 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5708 : "can't use a fully-masked loop because"
5709 : " widening operation on even/odd elements"
5710 : " mixes up lanes.\n");
5711 22 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
5712 : }
5713 :
5714 106114 : if (cost_vec) /* transformation not required. */
5715 : {
5716 83278 : if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5717 83278 : || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in))
5718 : {
5719 0 : if (dump_enabled_p ())
5720 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5721 : "incompatible vector types for invariants\n");
5722 0 : return false;
5723 : }
5724 83278 : DUMP_VECT_SCOPE ("vectorizable_conversion");
5725 83278 : unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
5726 83278 : if (modifier == NONE)
5727 : {
5728 14982 : SLP_TREE_TYPE (slp_node) = type_conversion_vec_info_type;
5729 14982 : vect_model_simple_cost (vinfo, (1 + multi_step_cvt),
5730 : slp_node, cost_vec);
5731 : }
5732 68296 : else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5733 : {
5734 27746 : SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type;
5735 : /* The final packing step produces one vector result per copy. */
5736 27746 : vect_model_promotion_demotion_cost (slp_node, nvectors,
5737 : multi_step_cvt, cost_vec,
5738 : widen_arith);
5739 : }
5740 : else
5741 : {
5742 40550 : SLP_TREE_TYPE (slp_node) = type_promotion_vec_info_type;
5743 : /* The initial unpacking step produces two vector results
5744 : per copy. MULTI_STEP_CVT is 0 for a single conversion,
5745 : so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5746 40550 : vect_model_promotion_demotion_cost (slp_node,
5747 : nvectors >> multi_step_cvt,
5748 : multi_step_cvt, cost_vec,
5749 : widen_arith);
5750 : }
5751 83278 : interm_types.release ();
5752 83278 : return true;
5753 83278 : }
5754 :
5755 : /* Transform. */
5756 22836 : if (dump_enabled_p ())
5757 4270 : dump_printf_loc (MSG_NOTE, vect_location, "transform conversion.\n");
5758 :
5759 22836 : if (op_type == binary_op)
5760 : {
5761 508 : if (CONSTANT_CLASS_P (op0))
5762 0 : op0 = fold_convert (TREE_TYPE (op1), op0);
5763 508 : else if (CONSTANT_CLASS_P (op1))
5764 234 : op1 = fold_convert (TREE_TYPE (op0), op1);
5765 : }
5766 :
5767 : /* In case of multi-step conversion, we first generate conversion operations
5768 : to the intermediate types, and then from that types to the final one.
5769 : We create vector destinations for the intermediate type (TYPES) received
5770 : from supportable_*_operation, and store them in the correct order
5771 : for future use in vect_create_vectorized_*_stmts (). */
5772 22836 : auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5773 22836 : bool widen_or_narrow_float_p
5774 22836 : = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5775 22836 : vec_dest = vect_create_destination_var (scalar_dest,
5776 : widen_or_narrow_float_p
5777 : ? cvt_type : vectype_out);
5778 22836 : vec_dsts.quick_push (vec_dest);
5779 :
5780 22836 : if (multi_step_cvt)
5781 : {
5782 9086 : for (i = interm_types.length () - 1;
5783 9086 : interm_types.iterate (i, &intermediate_type); i--)
5784 : {
5785 4789 : vec_dest = vect_create_destination_var (scalar_dest,
5786 : intermediate_type);
5787 4789 : vec_dsts.quick_push (vec_dest);
5788 : }
5789 : }
5790 :
5791 22836 : if (cvt_type)
5792 73 : vec_dest = vect_create_destination_var (scalar_dest,
5793 : widen_or_narrow_float_p
5794 : ? vectype_out : cvt_type);
5795 :
5796 22836 : switch (modifier)
5797 : {
5798 4003 : case NONE:
5799 4003 : vect_get_vec_defs (vinfo, slp_node, op0, &vec_oprnds0);
5800 : /* vec_dest is intermediate type operand when multi_step_cvt. */
5801 4003 : if (multi_step_cvt)
5802 : {
5803 21 : cvt_op = vec_dest;
5804 21 : vec_dest = vec_dsts[0];
5805 : }
5806 :
5807 8382 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5808 : {
5809 : /* Arguments are ready, create the new vector stmt. */
5810 4379 : gimple* new_stmt;
5811 4379 : if (multi_step_cvt)
5812 : {
5813 21 : gcc_assert (multi_step_cvt == 1);
5814 21 : new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5815 21 : new_temp = make_ssa_name (cvt_op, new_stmt);
5816 21 : gimple_assign_set_lhs (new_stmt, new_temp);
5817 21 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5818 21 : vop0 = new_temp;
5819 : }
5820 4379 : new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5821 4379 : new_temp = make_ssa_name (vec_dest, new_stmt);
5822 4379 : gimple_set_lhs (new_stmt, new_temp);
5823 4379 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5824 :
5825 4379 : slp_node->push_vec_def (new_stmt);
5826 : }
5827 : break;
5828 :
5829 9793 : case WIDEN:
5830 : /* In case the vectorization factor (VF) is bigger than the number
5831 : of elements that we can fit in a vectype (nunits), we have to
5832 : generate more than one vector stmt - i.e - we need to "unroll"
5833 : the vector stmt by a factor VF/nunits. */
5834 9793 : vect_get_vec_defs (vinfo, slp_node, op0, &vec_oprnds0,
5835 9793 : code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5836 : &vec_oprnds1);
5837 9793 : if (code == WIDEN_LSHIFT_EXPR)
5838 : {
5839 0 : int oprnds_size = vec_oprnds0.length ();
5840 0 : vec_oprnds1.create (oprnds_size);
5841 0 : for (i = 0; i < oprnds_size; ++i)
5842 0 : vec_oprnds1.quick_push (op1);
5843 : }
5844 : /* Arguments are ready. Create the new vector stmts. */
5845 21356 : for (i = multi_step_cvt; i >= 0; i--)
5846 : {
5847 11563 : tree this_dest = vec_dsts[i];
5848 11563 : code_helper c1 = code1, c2 = code2;
5849 11563 : if (i == 0 && codecvt2 != ERROR_MARK)
5850 : {
5851 48 : c1 = codecvt1;
5852 48 : c2 = codecvt2;
5853 : }
5854 11563 : if (known_eq (nunits_out, nunits_in))
5855 14 : vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5856 : stmt_info, this_dest, gsi, c1,
5857 : op_type);
5858 : else
5859 11549 : vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5860 : &vec_oprnds1, stmt_info,
5861 : this_dest, gsi,
5862 : c1, c2, op_type);
5863 : }
5864 :
5865 37359 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5866 : {
5867 27566 : gimple *new_stmt;
5868 27566 : if (cvt_type)
5869 : {
5870 120 : new_temp = make_ssa_name (vec_dest);
5871 120 : new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5872 120 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5873 : }
5874 : else
5875 27446 : new_stmt = SSA_NAME_DEF_STMT (vop0);
5876 :
5877 27566 : slp_node->push_vec_def (new_stmt);
5878 : }
5879 : break;
5880 :
5881 9040 : case NARROW_SRC:
5882 9040 : case NARROW_DST:
5883 : /* In case the vectorization factor (VF) is bigger than the number
5884 : of elements that we can fit in a vectype (nunits), we have to
5885 : generate more than one vector stmt - i.e - we need to "unroll"
5886 : the vector stmt by a factor VF/nunits. */
5887 9040 : vect_get_vec_defs (vinfo, slp_node, op0, &vec_oprnds0);
5888 : /* Arguments are ready. Create the new vector stmts. */
5889 9040 : if (cvt_type && modifier == NARROW_DST)
5890 153 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5891 : {
5892 124 : new_temp = make_ssa_name (vec_dest);
5893 124 : gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5894 124 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5895 124 : vec_oprnds0[i] = new_temp;
5896 : }
5897 :
5898 9040 : vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5899 : multi_step_cvt,
5900 : stmt_info, vec_dsts, gsi,
5901 : slp_node, code1,
5902 : modifier == NARROW_SRC);
5903 : /* After demoting op0 to cvt_type, convert it to dest. */
5904 9040 : if (cvt_type && code == FLOAT_EXPR)
5905 : {
5906 10 : for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5907 : {
5908 : /* Arguments are ready, create the new vector stmt. */
5909 5 : gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5910 5 : gimple *new_stmt
5911 5 : = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5912 5 : new_temp = make_ssa_name (vec_dest, new_stmt);
5913 5 : gimple_set_lhs (new_stmt, new_temp);
5914 5 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5915 :
5916 : /* This is the last step of the conversion sequence. Store the
5917 : vectors in SLP_NODE or in vector info of the scalar statement
5918 : (or in STMT_VINFO_RELATED_STMT chain). */
5919 5 : slp_node->push_vec_def (new_stmt);
5920 : }
5921 : }
5922 : break;
5923 : }
5924 :
5925 22836 : vec_oprnds0.release ();
5926 22836 : vec_oprnds1.release ();
5927 22836 : interm_types.release ();
5928 :
5929 22836 : return true;
5930 178928 : }
5931 :
5932 : /* Return true if we can assume from the scalar form of STMT_INFO that
5933 : neither the scalar nor the vector forms will generate code. STMT_INFO
5934 : is known not to involve a data reference. */
5935 :
5936 : bool
5937 3146997 : vect_nop_conversion_p (stmt_vec_info stmt_info)
5938 : {
5939 3146997 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5940 2875602 : if (!stmt || STMT_VINFO_DATA_REF (stmt_info))
5941 : return false;
5942 :
5943 928718 : tree lhs = gimple_assign_lhs (stmt);
5944 928718 : tree_code code = gimple_assign_rhs_code (stmt);
5945 928718 : tree rhs = gimple_assign_rhs1 (stmt);
5946 :
5947 928718 : if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5948 : return true;
5949 :
5950 925816 : if (CONVERT_EXPR_CODE_P (code))
5951 228854 : return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5952 :
5953 : return false;
5954 : }
5955 :
5956 : /* Function vectorizable_assignment.
5957 :
5958 : Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5959 : If COST_VEC is passed, calculate costs but don't change anything,
5960 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
5961 : it, and insert it at GSI.
5962 : Return true if STMT_INFO is vectorizable in this way. */
5963 :
5964 : static bool
5965 2048144 : vectorizable_assignment (vec_info *vinfo,
5966 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5967 : slp_tree slp_node,
5968 : stmt_vector_for_cost *cost_vec)
5969 : {
5970 2048144 : tree vec_dest;
5971 2048144 : tree scalar_dest;
5972 2048144 : tree op;
5973 2048144 : tree new_temp;
5974 2048144 : enum vect_def_type dt[1] = {vect_unknown_def_type};
5975 2048144 : int i;
5976 2048144 : vec<tree> vec_oprnds = vNULL;
5977 2048144 : tree vop;
5978 2048144 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5979 2048144 : enum tree_code code;
5980 2048144 : tree vectype_in;
5981 :
5982 2048144 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5983 : return false;
5984 :
5985 2048144 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5986 232992 : && cost_vec)
5987 : return false;
5988 :
5989 : /* Is vectorizable assignment? */
5990 3707594 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5991 1743991 : if (!stmt)
5992 : return false;
5993 :
5994 1743991 : scalar_dest = gimple_assign_lhs (stmt);
5995 1743991 : if (TREE_CODE (scalar_dest) != SSA_NAME)
5996 : return false;
5997 :
5998 933728 : if (STMT_VINFO_DATA_REF (stmt_info))
5999 : return false;
6000 :
6001 393401 : code = gimple_assign_rhs_code (stmt);
6002 393401 : if (!(gimple_assign_single_p (stmt)
6003 391872 : || code == PAREN_EXPR
6004 390693 : || CONVERT_EXPR_CODE_P (code)))
6005 : return false;
6006 :
6007 95189 : tree vectype = SLP_TREE_VECTYPE (slp_node);
6008 95189 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6009 :
6010 95189 : slp_tree slp_op;
6011 95189 : if (!vect_is_simple_use (vinfo, slp_node, 0, &op, &slp_op,
6012 : &dt[0], &vectype_in))
6013 : {
6014 0 : if (dump_enabled_p ())
6015 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6016 : "use not simple.\n");
6017 0 : return false;
6018 : }
6019 95189 : if (!vectype_in)
6020 17745 : vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
6021 :
6022 : /* We can handle VIEW_CONVERT conversions that do not change the number
6023 : of elements or the vector size or other conversions when the component
6024 : types are nop-convertible. */
6025 95189 : if (!vectype_in
6026 94911 : || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
6027 87777 : || (code == VIEW_CONVERT_EXPR
6028 2802 : && maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
6029 2802 : GET_MODE_SIZE (TYPE_MODE (vectype_in))))
6030 182966 : || (CONVERT_EXPR_CODE_P (code)
6031 85101 : && !tree_nop_conversion_p (TREE_TYPE (vectype),
6032 85101 : TREE_TYPE (vectype_in))))
6033 10380 : return false;
6034 :
6035 254331 : if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
6036 : {
6037 2 : if (dump_enabled_p ())
6038 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6039 : "can't convert between boolean and non "
6040 0 : "boolean vectors %T\n", TREE_TYPE (op));
6041 :
6042 2 : return false;
6043 : }
6044 :
6045 : /* We do not handle bit-precision changes. */
6046 84807 : if ((CONVERT_EXPR_CODE_P (code)
6047 2676 : || code == VIEW_CONVERT_EXPR)
6048 83532 : && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6049 82245 : && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6050 83217 : || (INTEGRAL_TYPE_P (TREE_TYPE (op))
6051 78504 : && !type_has_mode_precision_p (TREE_TYPE (op))))
6052 : /* But a conversion that does not change the bit-pattern is ok. */
6053 85537 : && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6054 730 : && INTEGRAL_TYPE_P (TREE_TYPE (op))
6055 730 : && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
6056 730 : > TYPE_PRECISION (TREE_TYPE (op)))
6057 415 : && TYPE_UNSIGNED (TREE_TYPE (op)))
6058 331 : || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
6059 331 : == TYPE_PRECISION (TREE_TYPE (op))))))
6060 : {
6061 266 : if (dump_enabled_p ())
6062 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6063 : "type conversion to/from bit-precision "
6064 : "unsupported.\n");
6065 266 : return false;
6066 : }
6067 :
6068 84541 : if (cost_vec) /* transformation not required. */
6069 : {
6070 68767 : if (!vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6071 : {
6072 0 : if (dump_enabled_p ())
6073 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6074 : "incompatible vector types for invariants\n");
6075 0 : return false;
6076 : }
6077 68767 : SLP_TREE_TYPE (slp_node) = assignment_vec_info_type;
6078 68767 : DUMP_VECT_SCOPE ("vectorizable_assignment");
6079 68767 : if (!vect_nop_conversion_p (stmt_info))
6080 963 : vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
6081 68767 : return true;
6082 : }
6083 :
6084 : /* Transform. */
6085 15774 : if (dump_enabled_p ())
6086 3595 : dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6087 :
6088 : /* Handle def. */
6089 15774 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6090 :
6091 : /* Handle use. */
6092 15774 : vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds);
6093 :
6094 : /* Arguments are ready. create the new vector stmt. */
6095 35687 : FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6096 : {
6097 19913 : if (CONVERT_EXPR_CODE_P (code)
6098 683 : || code == VIEW_CONVERT_EXPR)
6099 19364 : vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6100 19913 : gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6101 19913 : new_temp = make_ssa_name (vec_dest, new_stmt);
6102 19913 : gimple_assign_set_lhs (new_stmt, new_temp);
6103 19913 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6104 19913 : slp_node->push_vec_def (new_stmt);
6105 : }
6106 :
6107 15774 : vec_oprnds.release ();
6108 15774 : return true;
6109 : }
6110 :
6111 :
6112 : /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6113 : either as shift by a scalar or by a vector. */
6114 :
6115 : bool
6116 296049 : vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6117 : {
6118 296049 : optab optab;
6119 296049 : tree vectype;
6120 :
6121 296049 : vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6122 296049 : if (!vectype)
6123 : return false;
6124 :
6125 296049 : optab = optab_for_tree_code (code, vectype, optab_scalar);
6126 296049 : if (optab && can_implement_p (optab, TYPE_MODE (vectype)))
6127 : return true;
6128 :
6129 260852 : optab = optab_for_tree_code (code, vectype, optab_vector);
6130 260852 : if (optab && can_implement_p (optab, TYPE_MODE (vectype)))
6131 : return true;
6132 :
6133 : return false;
6134 : }
6135 :
6136 :
6137 : /* Function vectorizable_shift.
6138 :
6139 : Check if STMT_INFO performs a shift operation that can be vectorized.
6140 : If COST_VEC is passed, calculate costs but don't change anything,
6141 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
6142 : it, and insert it at GSI.
6143 : Return true if STMT_INFO is vectorizable in this way. */
6144 :
6145 : static bool
6146 725121 : vectorizable_shift (vec_info *vinfo,
6147 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6148 : slp_tree slp_node,
6149 : stmt_vector_for_cost *cost_vec)
6150 : {
6151 725121 : tree vec_dest;
6152 725121 : tree scalar_dest;
6153 725121 : tree op0, op1 = NULL;
6154 725121 : tree vec_oprnd1 = NULL_TREE;
6155 725121 : tree vectype;
6156 725121 : enum tree_code code;
6157 725121 : machine_mode vec_mode;
6158 725121 : tree new_temp;
6159 725121 : optab optab;
6160 725121 : int icode;
6161 725121 : machine_mode optab_op2_mode;
6162 725121 : enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6163 725121 : poly_uint64 nunits_in;
6164 725121 : poly_uint64 nunits_out;
6165 725121 : tree vectype_out;
6166 725121 : tree op1_vectype;
6167 725121 : int i;
6168 725121 : vec<tree> vec_oprnds0 = vNULL;
6169 725121 : vec<tree> vec_oprnds1 = vNULL;
6170 725121 : tree vop0, vop1;
6171 725121 : unsigned int k;
6172 725121 : bool scalar_shift_arg = true;
6173 725121 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6174 725121 : bool incompatible_op1_vectype_p = false;
6175 :
6176 725121 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6177 : return false;
6178 :
6179 725121 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6180 232992 : && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6181 231502 : && cost_vec)
6182 : return false;
6183 :
6184 : /* Is STMT a vectorizable binary/unary operation? */
6185 1088174 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6186 423459 : if (!stmt)
6187 : return false;
6188 :
6189 423459 : if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6190 : return false;
6191 :
6192 422941 : code = gimple_assign_rhs_code (stmt);
6193 :
6194 422941 : if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6195 : || code == RROTATE_EXPR))
6196 : return false;
6197 :
6198 66528 : scalar_dest = gimple_assign_lhs (stmt);
6199 66528 : vectype_out = SLP_TREE_VECTYPE (slp_node);
6200 66528 : if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6201 : {
6202 0 : if (dump_enabled_p ())
6203 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6204 : "bit-precision shifts not supported.\n");
6205 0 : return false;
6206 : }
6207 :
6208 66528 : slp_tree slp_op0;
6209 66528 : if (!vect_is_simple_use (vinfo, slp_node,
6210 : 0, &op0, &slp_op0, &dt[0], &vectype))
6211 : {
6212 0 : if (dump_enabled_p ())
6213 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6214 : "use not simple.\n");
6215 0 : return false;
6216 : }
6217 : /* If op0 is an external or constant def, infer the vector type
6218 : from the scalar type. */
6219 66528 : if (!vectype)
6220 15031 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6221 66528 : if (!cost_vec)
6222 8397 : gcc_assert (vectype);
6223 66528 : if (!vectype)
6224 : {
6225 0 : if (dump_enabled_p ())
6226 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6227 : "no vectype for scalar type\n");
6228 0 : return false;
6229 : }
6230 :
6231 66528 : nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6232 66528 : nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6233 66528 : if (maybe_ne (nunits_out, nunits_in))
6234 : return false;
6235 :
6236 66528 : stmt_vec_info op1_def_stmt_info;
6237 66528 : slp_tree slp_op1;
6238 66528 : if (!vect_is_simple_use (vinfo, slp_node, 1, &op1, &slp_op1,
6239 : &dt[1], &op1_vectype, &op1_def_stmt_info))
6240 : {
6241 0 : if (dump_enabled_p ())
6242 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6243 : "use not simple.\n");
6244 0 : return false;
6245 : }
6246 :
6247 : /* Determine whether the shift amount is a vector, or scalar. If the
6248 : shift/rotate amount is a vector, use the vector/vector shift optabs. */
6249 :
6250 66528 : if ((dt[1] == vect_internal_def
6251 66528 : || dt[1] == vect_induction_def
6252 50213 : || dt[1] == vect_nested_cycle)
6253 16333 : && SLP_TREE_LANES (slp_node) == 1)
6254 : scalar_shift_arg = false;
6255 50250 : else if (dt[1] == vect_constant_def
6256 : || dt[1] == vect_external_def
6257 50250 : || dt[1] == vect_internal_def)
6258 : {
6259 : /* In SLP, need to check whether the shift count is the same,
6260 : in loops if it is a constant or invariant, it is always
6261 : a scalar shift. */
6262 50244 : vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6263 50244 : stmt_vec_info slpstmt_info;
6264 :
6265 132968 : FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6266 82724 : if (slpstmt_info)
6267 : {
6268 82724 : gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6269 165448 : if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6270 82724 : scalar_shift_arg = false;
6271 : }
6272 :
6273 : /* For internal SLP defs we have to make sure we see scalar stmts
6274 : for all vector elements.
6275 : ??? For different vectors we could resort to a different
6276 : scalar shift operand but code-generation below simply always
6277 : takes the first. */
6278 50244 : if (dt[1] == vect_internal_def
6279 50293 : && maybe_ne (nunits_out * vect_get_num_copies (vinfo, slp_node),
6280 49 : stmts.length ()))
6281 : scalar_shift_arg = false;
6282 :
6283 : /* If the shift amount is computed by a pattern stmt we cannot
6284 : use the scalar amount directly thus give up and use a vector
6285 : shift. */
6286 50244 : if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6287 : scalar_shift_arg = false;
6288 : }
6289 : else
6290 : {
6291 6 : if (dump_enabled_p ())
6292 6 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6293 : "operand mode requires invariant argument.\n");
6294 6 : return false;
6295 : }
6296 :
6297 : /* Vector shifted by vector. */
6298 66560 : bool was_scalar_shift_arg = scalar_shift_arg;
6299 50235 : if (!scalar_shift_arg)
6300 : {
6301 16325 : optab = optab_for_tree_code (code, vectype, optab_vector);
6302 16325 : if (dump_enabled_p ())
6303 1205 : dump_printf_loc (MSG_NOTE, vect_location,
6304 : "vector/vector shift/rotate found.\n");
6305 :
6306 16325 : if (!op1_vectype)
6307 15 : op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6308 : slp_op1);
6309 16325 : incompatible_op1_vectype_p
6310 32650 : = (op1_vectype == NULL_TREE
6311 16325 : || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6312 16325 : TYPE_VECTOR_SUBPARTS (vectype))
6313 32648 : || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6314 16318 : if (incompatible_op1_vectype_p
6315 7 : && (SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6316 1 : || slp_op1->refcnt != 1))
6317 : {
6318 6 : if (dump_enabled_p ())
6319 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6320 : "unusable type for last operand in"
6321 : " vector/vector shift/rotate.\n");
6322 6 : return false;
6323 : }
6324 : }
6325 : /* See if the machine has a vector shifted by scalar insn and if not
6326 : then see if it has a vector shifted by vector insn. */
6327 : else
6328 : {
6329 50197 : optab = optab_for_tree_code (code, vectype, optab_scalar);
6330 50197 : if (optab
6331 50197 : && can_implement_p (optab, TYPE_MODE (vectype)))
6332 : {
6333 50197 : if (dump_enabled_p ())
6334 4930 : dump_printf_loc (MSG_NOTE, vect_location,
6335 : "vector/scalar shift/rotate found.\n");
6336 : }
6337 : else
6338 : {
6339 0 : optab = optab_for_tree_code (code, vectype, optab_vector);
6340 0 : if (optab
6341 0 : && can_implement_p (optab, TYPE_MODE (vectype)))
6342 : {
6343 0 : scalar_shift_arg = false;
6344 :
6345 0 : if (dump_enabled_p ())
6346 0 : dump_printf_loc (MSG_NOTE, vect_location,
6347 : "vector/vector shift/rotate found.\n");
6348 :
6349 0 : if (!op1_vectype)
6350 0 : op1_vectype = get_vectype_for_scalar_type (vinfo,
6351 0 : TREE_TYPE (op1),
6352 : slp_op1);
6353 :
6354 : /* Unlike the other binary operators, shifts/rotates have
6355 : the rhs being int, instead of the same type as the lhs,
6356 : so make sure the scalar is the right type if we are
6357 : dealing with vectors of long long/long/short/char. */
6358 0 : incompatible_op1_vectype_p
6359 0 : = (!op1_vectype
6360 0 : || !tree_nop_conversion_p (TREE_TYPE (vectype),
6361 0 : TREE_TYPE (op1)));
6362 0 : if (incompatible_op1_vectype_p
6363 0 : && dt[1] == vect_internal_def)
6364 : {
6365 0 : if (dump_enabled_p ())
6366 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6367 : "unusable type for last operand in"
6368 : " vector/vector shift/rotate.\n");
6369 0 : return false;
6370 : }
6371 : }
6372 : }
6373 : }
6374 :
6375 : /* Supportable by target? */
6376 66516 : if (!optab)
6377 : {
6378 0 : if (dump_enabled_p ())
6379 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6380 : "no shift optab for %s and %T.\n",
6381 : get_tree_code_name (code), vectype);
6382 0 : return false;
6383 : }
6384 66516 : vec_mode = TYPE_MODE (vectype);
6385 66516 : icode = (int) optab_handler (optab, vec_mode);
6386 66516 : if (icode == CODE_FOR_nothing)
6387 : {
6388 6110 : if (dump_enabled_p ())
6389 900 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6390 : "shift op not supported by target.\n");
6391 6110 : return false;
6392 : }
6393 : /* vector lowering cannot optimize vector shifts using word arithmetic. */
6394 60406 : if (vect_emulated_vector_p (vectype))
6395 : return false;
6396 :
6397 60406 : if (cost_vec) /* transformation not required. */
6398 : {
6399 52009 : if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6400 52009 : || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6401 8072 : && (!incompatible_op1_vectype_p
6402 1 : || dt[1] == vect_constant_def)
6403 8072 : && !vect_maybe_update_slp_op_vectype
6404 8072 : (slp_op1,
6405 : incompatible_op1_vectype_p ? vectype : op1_vectype)))
6406 : {
6407 0 : if (dump_enabled_p ())
6408 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6409 : "incompatible vector types for invariants\n");
6410 0 : return false;
6411 : }
6412 : /* Now adjust the constant shift amount in place. */
6413 52009 : if (incompatible_op1_vectype_p
6414 1 : && dt[1] == vect_constant_def)
6415 4 : for (unsigned i = 0;
6416 5 : i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6417 : {
6418 4 : SLP_TREE_SCALAR_OPS (slp_op1)[i]
6419 4 : = fold_convert (TREE_TYPE (vectype),
6420 : SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6421 4 : gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6422 : == INTEGER_CST));
6423 : }
6424 52009 : SLP_TREE_TYPE (slp_node) = shift_vec_info_type;
6425 52009 : DUMP_VECT_SCOPE ("vectorizable_shift");
6426 52009 : vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
6427 52009 : return true;
6428 : }
6429 :
6430 : /* Transform. */
6431 :
6432 8397 : if (dump_enabled_p ())
6433 2018 : dump_printf_loc (MSG_NOTE, vect_location,
6434 : "transform binary/unary operation.\n");
6435 :
6436 : /* Handle def. */
6437 8397 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6438 :
6439 8397 : unsigned nvectors = vect_get_num_copies (vinfo, slp_node);
6440 8397 : if (scalar_shift_arg && dt[1] != vect_internal_def)
6441 : {
6442 : /* Vector shl and shr insn patterns can be defined with scalar
6443 : operand 2 (shift operand). In this case, use constant or loop
6444 : invariant op1 directly, without extending it to vector mode
6445 : first. */
6446 6240 : optab_op2_mode = insn_data[icode].operand[2].mode;
6447 6240 : if (!VECTOR_MODE_P (optab_op2_mode))
6448 : {
6449 6240 : if (dump_enabled_p ())
6450 1903 : dump_printf_loc (MSG_NOTE, vect_location,
6451 : "operand 1 using scalar mode.\n");
6452 6240 : vec_oprnd1 = op1;
6453 6240 : vec_oprnds1.create (nvectors);
6454 6240 : vec_oprnds1.quick_push (vec_oprnd1);
6455 : /* Store vec_oprnd1 for every vector stmt to be created.
6456 : We check during the analysis that all the shift arguments
6457 : are the same.
6458 : TODO: Allow different constants for different vector
6459 : stmts generated for an SLP instance. */
6460 14525 : for (k = 0; k < nvectors - 1; k++)
6461 2045 : vec_oprnds1.quick_push (vec_oprnd1);
6462 : }
6463 : }
6464 2157 : else if (!scalar_shift_arg && incompatible_op1_vectype_p)
6465 : {
6466 0 : if (was_scalar_shift_arg)
6467 : {
6468 : /* If the argument was the same in all lanes create the
6469 : correctly typed vector shift amount directly. Note
6470 : we made SLP scheduling think we use the original scalars,
6471 : so place the compensation code next to the shift which
6472 : is conservative. See PR119640 where it otherwise breaks. */
6473 0 : op1 = fold_convert (TREE_TYPE (vectype), op1);
6474 0 : op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6475 : gsi);
6476 0 : vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6477 : gsi);
6478 0 : vec_oprnds1.create (nvectors);
6479 0 : for (k = 0; k < nvectors; k++)
6480 0 : vec_oprnds1.quick_push (vec_oprnd1);
6481 : }
6482 0 : else if (dt[1] == vect_constant_def)
6483 : /* The constant shift amount has been adjusted in place. */
6484 : ;
6485 : else
6486 0 : gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6487 : }
6488 :
6489 : /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6490 : (a special case for certain kind of vector shifts); otherwise,
6491 : operand 1 should be of a vector type (the usual case). */
6492 2157 : vect_get_vec_defs (vinfo, slp_node,
6493 : op0, &vec_oprnds0,
6494 8397 : vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6495 :
6496 : /* Arguments are ready. Create the new vector stmt. */
6497 22341 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6498 : {
6499 : /* For internal defs where we need to use a scalar shift arg
6500 : extract the first lane. */
6501 13944 : if (scalar_shift_arg && dt[1] == vect_internal_def)
6502 : {
6503 10 : vop1 = vec_oprnds1[0];
6504 10 : new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6505 10 : gassign *new_stmt
6506 10 : = gimple_build_assign (new_temp,
6507 10 : build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6508 : vop1,
6509 10 : TYPE_SIZE (TREE_TYPE (new_temp)),
6510 : bitsize_zero_node));
6511 10 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6512 10 : vop1 = new_temp;
6513 10 : }
6514 : else
6515 13934 : vop1 = vec_oprnds1[i];
6516 13944 : gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6517 13944 : new_temp = make_ssa_name (vec_dest, new_stmt);
6518 13944 : gimple_assign_set_lhs (new_stmt, new_temp);
6519 13944 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6520 13944 : slp_node->push_vec_def (new_stmt);
6521 : }
6522 :
6523 8397 : vec_oprnds0.release ();
6524 8397 : vec_oprnds1.release ();
6525 :
6526 8397 : return true;
6527 : }
6528 :
6529 : /* Function vectorizable_operation.
6530 :
6531 : Check if STMT_INFO performs a binary, unary or ternary operation that can
6532 : be vectorized.
6533 : If COST_VEC is passed, calculate costs but don't change anything,
6534 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
6535 : it, and insert it at GSI.
6536 : Return true if STMT_INFO is vectorizable in this way. */
6537 :
6538 : static bool
6539 2703372 : vectorizable_operation (vec_info *vinfo,
6540 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6541 : slp_tree slp_node,
6542 : stmt_vector_for_cost *cost_vec)
6543 : {
6544 2703372 : tree vec_dest;
6545 2703372 : tree scalar_dest;
6546 2703372 : tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6547 2703372 : tree vectype;
6548 2703372 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6549 2703372 : enum tree_code code, orig_code;
6550 2703372 : machine_mode vec_mode;
6551 2703372 : tree new_temp;
6552 2703372 : int op_type;
6553 2703372 : optab optab;
6554 2703372 : bool target_support_p;
6555 2703372 : enum vect_def_type dt[3]
6556 : = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6557 2703372 : poly_uint64 nunits_in;
6558 2703372 : poly_uint64 nunits_out;
6559 2703372 : tree vectype_out;
6560 2703372 : int i;
6561 2703372 : vec<tree> vec_oprnds0 = vNULL;
6562 2703372 : vec<tree> vec_oprnds1 = vNULL;
6563 2703372 : vec<tree> vec_oprnds2 = vNULL;
6564 2703372 : tree vop0, vop1, vop2;
6565 2703372 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6566 :
6567 2703372 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6568 : return false;
6569 :
6570 2703372 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6571 232992 : && cost_vec)
6572 : return false;
6573 :
6574 : /* Is STMT a vectorizable binary/unary operation? */
6575 4431589 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6576 2399219 : if (!stmt)
6577 : return false;
6578 :
6579 : /* Loads and stores are handled in vectorizable_{load,store}. */
6580 2399219 : if (STMT_VINFO_DATA_REF (stmt_info))
6581 : return false;
6582 :
6583 1048629 : orig_code = code = gimple_assign_rhs_code (stmt);
6584 :
6585 : /* Shifts are handled in vectorizable_shift. */
6586 1048629 : if (code == LSHIFT_EXPR
6587 : || code == RSHIFT_EXPR
6588 : || code == LROTATE_EXPR
6589 1048629 : || code == RROTATE_EXPR)
6590 : return false;
6591 :
6592 : /* Comparisons are handled in vectorizable_comparison. */
6593 990498 : if (TREE_CODE_CLASS (code) == tcc_comparison)
6594 : return false;
6595 :
6596 : /* Conditions are handled in vectorizable_condition. */
6597 808052 : if (code == COND_EXPR)
6598 : return false;
6599 :
6600 : /* For pointer addition and subtraction, we should use the normal
6601 : plus and minus for the vector operation. */
6602 781691 : if (code == POINTER_PLUS_EXPR)
6603 : code = PLUS_EXPR;
6604 763099 : if (code == POINTER_DIFF_EXPR)
6605 974 : code = MINUS_EXPR;
6606 :
6607 : /* Support only unary or binary operations. */
6608 781691 : op_type = TREE_CODE_LENGTH (code);
6609 781691 : if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6610 : {
6611 0 : if (dump_enabled_p ())
6612 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6613 : "num. args = %d (not unary/binary/ternary op).\n",
6614 : op_type);
6615 0 : return false;
6616 : }
6617 :
6618 781691 : scalar_dest = gimple_assign_lhs (stmt);
6619 781691 : vectype_out = SLP_TREE_VECTYPE (slp_node);
6620 :
6621 : /* Most operations cannot handle bit-precision types without extra
6622 : truncations. */
6623 781691 : bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6624 770664 : if (!mask_op_p
6625 770664 : && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6626 : /* Exception are bitwise binary operations. */
6627 : && code != BIT_IOR_EXPR
6628 1434 : && code != BIT_XOR_EXPR
6629 920 : && code != BIT_AND_EXPR)
6630 : {
6631 690 : if (dump_enabled_p ())
6632 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6633 : "bit-precision arithmetic not supported.\n");
6634 690 : return false;
6635 : }
6636 :
6637 781001 : slp_tree slp_op0;
6638 781001 : if (!vect_is_simple_use (vinfo, slp_node,
6639 : 0, &op0, &slp_op0, &dt[0], &vectype))
6640 : {
6641 0 : if (dump_enabled_p ())
6642 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6643 : "use not simple.\n");
6644 0 : return false;
6645 : }
6646 781001 : bool is_invariant = (dt[0] == vect_external_def
6647 781001 : || dt[0] == vect_constant_def);
6648 : /* If op0 is an external or constant def, infer the vector type
6649 : from the scalar type. */
6650 781001 : if (!vectype)
6651 : {
6652 : /* For boolean type we cannot determine vectype by
6653 : invariant value (don't know whether it is a vector
6654 : of booleans or vector of integers). We use output
6655 : vectype because operations on boolean don't change
6656 : type. */
6657 71820 : if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6658 : {
6659 1481 : if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6660 : {
6661 239 : if (dump_enabled_p ())
6662 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6663 : "not supported operation on bool value.\n");
6664 239 : return false;
6665 : }
6666 1242 : vectype = vectype_out;
6667 : }
6668 : else
6669 70339 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6670 : slp_node);
6671 : }
6672 780762 : if (!cost_vec)
6673 113967 : gcc_assert (vectype);
6674 780762 : if (!vectype)
6675 : {
6676 290 : if (dump_enabled_p ())
6677 2 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6678 : "no vectype for scalar type %T\n",
6679 2 : TREE_TYPE (op0));
6680 :
6681 290 : return false;
6682 : }
6683 :
6684 780472 : nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6685 780472 : nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6686 780472 : if (maybe_ne (nunits_out, nunits_in)
6687 780472 : || !tree_nop_conversion_p (TREE_TYPE (vectype_out), TREE_TYPE (vectype)))
6688 11771 : return false;
6689 :
6690 768701 : tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6691 768701 : slp_tree slp_op1 = NULL, slp_op2 = NULL;
6692 768701 : if (op_type == binary_op || op_type == ternary_op)
6693 : {
6694 688640 : if (!vect_is_simple_use (vinfo, slp_node,
6695 : 1, &op1, &slp_op1, &dt[1], &vectype2))
6696 : {
6697 0 : if (dump_enabled_p ())
6698 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6699 : "use not simple.\n");
6700 0 : return false;
6701 : }
6702 688640 : is_invariant &= (dt[1] == vect_external_def
6703 688640 : || dt[1] == vect_constant_def);
6704 688640 : if (vectype2
6705 1171359 : && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2))
6706 482719 : || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6707 482719 : TREE_TYPE (vectype2))))
6708 4 : return false;
6709 : }
6710 768697 : if (op_type == ternary_op)
6711 : {
6712 0 : if (!vect_is_simple_use (vinfo, slp_node,
6713 : 2, &op2, &slp_op2, &dt[2], &vectype3))
6714 : {
6715 0 : if (dump_enabled_p ())
6716 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6717 : "use not simple.\n");
6718 0 : return false;
6719 : }
6720 0 : is_invariant &= (dt[2] == vect_external_def
6721 0 : || dt[2] == vect_constant_def);
6722 0 : if (vectype3
6723 0 : && (maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3))
6724 0 : || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6725 0 : TREE_TYPE (vectype3))))
6726 0 : return false;
6727 : }
6728 :
6729 : /* Multiple types in SLP are handled by creating the appropriate number of
6730 : vectorized stmts for each SLP node. */
6731 768697 : auto vec_num = vect_get_num_copies (vinfo, slp_node);
6732 :
6733 : /* Reject attempts to combine mask types with nonmask types, e.g. if
6734 : we have an AND between a (nonmask) boolean loaded from memory and
6735 : a (mask) boolean result of a comparison.
6736 :
6737 : TODO: We could easily fix these cases up using pattern statements. */
6738 768697 : if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6739 1243546 : || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6740 1537394 : || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6741 : {
6742 0 : if (dump_enabled_p ())
6743 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6744 : "mixed mask and nonmask vector types\n");
6745 0 : return false;
6746 : }
6747 :
6748 : /* Supportable by target? */
6749 :
6750 768697 : vec_mode = TYPE_MODE (vectype);
6751 768697 : optab = optab_for_tree_code (code, vectype, optab_default);
6752 768697 : if (!optab)
6753 : {
6754 68045 : if (dump_enabled_p ())
6755 5875 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6756 : "no optab for %s and %T.\n",
6757 : get_tree_code_name (code), vectype);
6758 68045 : return false;
6759 : }
6760 700652 : target_support_p = can_implement_p (optab, vec_mode);
6761 :
6762 700652 : bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6763 700652 : if (!target_support_p || using_emulated_vectors_p)
6764 : {
6765 30032 : if (dump_enabled_p ())
6766 1124 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6767 : "op not supported by target.\n");
6768 : /* When vec_mode is not a vector mode and we verified ops we
6769 : do not have to lower like AND are natively supported let
6770 : those through even when the mode isn't word_mode. For
6771 : ops we have to lower the lowering code assumes we are
6772 : dealing with word_mode. */
6773 60064 : if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
6774 29890 : || !GET_MODE_SIZE (vec_mode).is_constant ()
6775 29890 : || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6776 24746 : || !target_support_p)
6777 63515 : && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6778 : /* Check only during analysis. */
6779 41732 : || (cost_vec && !vect_can_vectorize_without_simd_p (code)))
6780 : {
6781 29470 : if (dump_enabled_p ())
6782 1122 : dump_printf (MSG_NOTE, "using word mode not possible.\n");
6783 29470 : return false;
6784 : }
6785 562 : if (dump_enabled_p ())
6786 2 : dump_printf_loc (MSG_NOTE, vect_location,
6787 : "proceeding using word mode.\n");
6788 : using_emulated_vectors_p = true;
6789 : }
6790 :
6791 671182 : int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
6792 671182 : vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6793 431265 : vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6794 671182 : internal_fn cond_fn = get_conditional_internal_fn (code);
6795 671182 : internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6796 :
6797 : /* If operating on inactive elements could generate spurious traps,
6798 : we need to restrict the operation to active lanes. Note that this
6799 : specifically doesn't apply to unhoisted invariants, since they
6800 : operate on the same value for every lane.
6801 :
6802 : Similarly, if this operation is part of a reduction, a fully-masked
6803 : loop should only change the active lanes of the reduction chain,
6804 : keeping the inactive lanes as-is. */
6805 643432 : bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6806 1251204 : || reduc_idx >= 0);
6807 :
6808 671182 : if (cost_vec) /* transformation not required. */
6809 : {
6810 557215 : if (loop_vinfo
6811 328901 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6812 88392 : && mask_out_inactive)
6813 : {
6814 20369 : if (cond_len_fn != IFN_LAST
6815 20369 : && direct_internal_fn_supported_p (cond_len_fn, vectype,
6816 : OPTIMIZE_FOR_SPEED))
6817 0 : vect_record_loop_len (loop_vinfo, lens, vec_num, vectype,
6818 : 1);
6819 20369 : else if (cond_fn != IFN_LAST
6820 20369 : && direct_internal_fn_supported_p (cond_fn, vectype,
6821 : OPTIMIZE_FOR_SPEED))
6822 8506 : vect_record_loop_mask (loop_vinfo, masks, vec_num,
6823 : vectype, NULL);
6824 : else
6825 : {
6826 11863 : if (dump_enabled_p ())
6827 608 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6828 : "can't use a fully-masked loop because no"
6829 : " conditional operation is available.\n");
6830 11863 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6831 : }
6832 : }
6833 :
6834 : /* Put types on constant and invariant SLP children. */
6835 557215 : if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6836 557137 : || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6837 1114250 : || !vect_maybe_update_slp_op_vectype (slp_op2, vectype))
6838 : {
6839 180 : if (dump_enabled_p ())
6840 3 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6841 : "incompatible vector types for invariants\n");
6842 180 : return false;
6843 : }
6844 :
6845 557035 : SLP_TREE_TYPE (slp_node) = op_vec_info_type;
6846 557035 : DUMP_VECT_SCOPE ("vectorizable_operation");
6847 557035 : vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
6848 557035 : if (using_emulated_vectors_p)
6849 : {
6850 : /* The above vect_model_simple_cost call handles constants
6851 : in the prologue and (mis-)costs one of the stmts as
6852 : vector stmt. See below for the actual lowering that will
6853 : be applied. */
6854 560 : unsigned n = vect_get_num_copies (vinfo, slp_node);
6855 560 : switch (code)
6856 : {
6857 201 : case PLUS_EXPR:
6858 201 : n *= 5;
6859 201 : break;
6860 328 : case MINUS_EXPR:
6861 328 : n *= 6;
6862 328 : break;
6863 0 : case NEGATE_EXPR:
6864 0 : n *= 4;
6865 0 : break;
6866 : default:
6867 : /* Bit operations do not have extra cost and are accounted
6868 : as vector stmt by vect_model_simple_cost. */
6869 : n = 0;
6870 : break;
6871 : }
6872 529 : if (n != 0)
6873 : {
6874 : /* We also need to materialize two large constants. */
6875 529 : record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6876 : 0, vect_prologue);
6877 529 : record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6878 : 0, vect_body);
6879 : }
6880 : }
6881 557035 : return true;
6882 : }
6883 :
6884 : /* Transform. */
6885 :
6886 113967 : if (dump_enabled_p ())
6887 16433 : dump_printf_loc (MSG_NOTE, vect_location,
6888 : "transform binary/unary operation.\n");
6889 :
6890 113967 : bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6891 102364 : bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6892 :
6893 : /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6894 : vectors with unsigned elements, but the result is signed. So, we
6895 : need to compute the MINUS_EXPR into vectype temporary and
6896 : VIEW_CONVERT_EXPR it into the final vectype_out result. */
6897 113967 : tree vec_cvt_dest = NULL_TREE;
6898 113967 : if (orig_code == POINTER_DIFF_EXPR)
6899 : {
6900 110 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6901 110 : vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6902 : }
6903 : /* For reduction operations with undefined overflow behavior make sure to
6904 : pun them to unsigned since we change the order of evaluation.
6905 : ??? Avoid for in-order reductions? */
6906 113857 : else if (arith_code_with_undefined_signed_overflow (orig_code)
6907 97307 : && ANY_INTEGRAL_TYPE_P (vectype)
6908 47760 : && TYPE_OVERFLOW_UNDEFINED (vectype)
6909 139879 : && SLP_TREE_REDUC_IDX (slp_node) != -1)
6910 : {
6911 2507 : gcc_assert (orig_code == PLUS_EXPR || orig_code == MINUS_EXPR
6912 : || orig_code == MULT_EXPR || orig_code == POINTER_PLUS_EXPR);
6913 2507 : vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6914 2507 : vectype = unsigned_type_for (vectype);
6915 2507 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6916 : }
6917 : /* Handle def. */
6918 : else
6919 111350 : vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6920 :
6921 113967 : vect_get_vec_defs (vinfo, slp_node,
6922 : op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6923 : /* Arguments are ready. Create the new vector stmt. */
6924 252143 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6925 : {
6926 138176 : gimple *new_stmt = NULL;
6927 276352 : vop1 = ((op_type == binary_op || op_type == ternary_op)
6928 138176 : ? vec_oprnds1[i] : NULL_TREE);
6929 138176 : vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6930 :
6931 138176 : if (vec_cvt_dest
6932 138176 : && !useless_type_conversion_p (vectype, TREE_TYPE (vop0)))
6933 : {
6934 2917 : new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop0);
6935 2917 : new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
6936 : new_temp);
6937 2917 : new_temp = make_ssa_name (vec_dest, new_stmt);
6938 2917 : gimple_assign_set_lhs (new_stmt, new_temp);
6939 2917 : vect_finish_stmt_generation (vinfo, stmt_info,
6940 : new_stmt, gsi);
6941 2917 : vop0 = new_temp;
6942 : }
6943 138176 : if (vop1
6944 135616 : && vec_cvt_dest
6945 141218 : && !useless_type_conversion_p (vectype, TREE_TYPE (vop1)))
6946 : {
6947 2917 : new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop1);
6948 2917 : new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
6949 : new_temp);
6950 2917 : new_temp = make_ssa_name (vec_dest, new_stmt);
6951 2917 : gimple_assign_set_lhs (new_stmt, new_temp);
6952 2917 : vect_finish_stmt_generation (vinfo, stmt_info,
6953 : new_stmt, gsi);
6954 2917 : vop1 = new_temp;
6955 : }
6956 138176 : if (vop2
6957 0 : && vec_cvt_dest
6958 138176 : && !useless_type_conversion_p (vectype, TREE_TYPE (vop2)))
6959 : {
6960 0 : new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop2);
6961 0 : new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR,
6962 : new_temp);
6963 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
6964 0 : gimple_assign_set_lhs (new_stmt, new_temp);
6965 0 : vect_finish_stmt_generation (vinfo, stmt_info,
6966 : new_stmt, gsi);
6967 0 : vop2 = new_temp;
6968 : }
6969 :
6970 138176 : if (using_emulated_vectors_p)
6971 : {
6972 : /* Lower the operation. This follows vector lowering. */
6973 2 : tree word_type = build_nonstandard_integer_type
6974 2 : (GET_MODE_BITSIZE (vec_mode).to_constant (), 1);
6975 2 : tree wvop0 = make_ssa_name (word_type);
6976 2 : new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6977 : build1 (VIEW_CONVERT_EXPR,
6978 : word_type, vop0));
6979 2 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6980 2 : tree wvop1 = NULL_TREE;
6981 2 : if (vop1)
6982 : {
6983 2 : wvop1 = make_ssa_name (word_type);
6984 2 : new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6985 : build1 (VIEW_CONVERT_EXPR,
6986 : word_type, vop1));
6987 2 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6988 : }
6989 :
6990 2 : tree result_low;
6991 2 : if (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6992 : {
6993 1 : unsigned int width = vector_element_bits (vectype);
6994 1 : tree inner_type = TREE_TYPE (vectype);
6995 1 : HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6996 1 : tree low_bits
6997 1 : = build_replicated_int_cst (word_type, width, max >> 1);
6998 1 : tree high_bits
6999 2 : = build_replicated_int_cst (word_type,
7000 1 : width, max & ~(max >> 1));
7001 1 : tree signs;
7002 1 : if (code == PLUS_EXPR || code == MINUS_EXPR)
7003 : {
7004 1 : signs = make_ssa_name (word_type);
7005 1 : new_stmt = gimple_build_assign (signs,
7006 : BIT_XOR_EXPR, wvop0, wvop1);
7007 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7008 1 : tree b_low = make_ssa_name (word_type);
7009 1 : new_stmt = gimple_build_assign (b_low, BIT_AND_EXPR,
7010 : wvop1, low_bits);
7011 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7012 1 : tree a_low = make_ssa_name (word_type);
7013 1 : if (code == PLUS_EXPR)
7014 1 : new_stmt = gimple_build_assign (a_low, BIT_AND_EXPR,
7015 : wvop0, low_bits);
7016 : else
7017 0 : new_stmt = gimple_build_assign (a_low, BIT_IOR_EXPR,
7018 : wvop0, high_bits);
7019 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7020 1 : if (code == MINUS_EXPR)
7021 : {
7022 0 : new_stmt = gimple_build_assign (NULL_TREE,
7023 : BIT_NOT_EXPR, signs);
7024 0 : signs = make_ssa_name (word_type);
7025 0 : gimple_assign_set_lhs (new_stmt, signs);
7026 0 : vect_finish_stmt_generation (vinfo, stmt_info,
7027 : new_stmt, gsi);
7028 : }
7029 1 : new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR,
7030 : signs, high_bits);
7031 1 : signs = make_ssa_name (word_type);
7032 1 : gimple_assign_set_lhs (new_stmt, signs);
7033 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7034 1 : result_low = make_ssa_name (word_type);
7035 1 : new_stmt = gimple_build_assign (result_low, code,
7036 : a_low, b_low);
7037 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7038 : }
7039 : else /* if (code == NEGATE_EXPR) */
7040 : {
7041 0 : tree a_low = make_ssa_name (word_type);
7042 0 : new_stmt = gimple_build_assign (a_low, BIT_AND_EXPR,
7043 : wvop0, low_bits);
7044 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7045 0 : signs = make_ssa_name (word_type);
7046 0 : new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7047 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7048 0 : new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR,
7049 : signs, high_bits);
7050 0 : signs = make_ssa_name (word_type);
7051 0 : gimple_assign_set_lhs (new_stmt, signs);
7052 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7053 0 : result_low = make_ssa_name (word_type);
7054 0 : new_stmt = gimple_build_assign (result_low,
7055 : MINUS_EXPR, high_bits, a_low);
7056 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7057 : }
7058 1 : new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR,
7059 : result_low, signs);
7060 1 : result_low = make_ssa_name (word_type);
7061 1 : gimple_assign_set_lhs (new_stmt, result_low);
7062 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7063 : }
7064 : else
7065 : {
7066 1 : new_stmt = gimple_build_assign (NULL_TREE, code, wvop0, wvop1);
7067 1 : result_low = make_ssa_name (word_type);
7068 1 : gimple_assign_set_lhs (new_stmt, result_low);
7069 1 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7070 :
7071 : }
7072 2 : new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7073 : build1 (VIEW_CONVERT_EXPR,
7074 : vectype, result_low));
7075 2 : new_temp = make_ssa_name (vectype);
7076 2 : gimple_assign_set_lhs (new_stmt, new_temp);
7077 2 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7078 : }
7079 138174 : else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7080 : {
7081 16 : tree mask;
7082 16 : if (masked_loop_p)
7083 16 : mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7084 : vec_num, vectype, i);
7085 : else
7086 : /* Dummy mask. */
7087 0 : mask = build_minus_one_cst (truth_type_for (vectype));
7088 16 : auto_vec<tree> vops (6);
7089 16 : vops.quick_push (mask);
7090 16 : vops.quick_push (vop0);
7091 16 : if (vop1)
7092 16 : vops.quick_push (vop1);
7093 16 : if (vop2)
7094 0 : vops.quick_push (vop2);
7095 16 : if (reduc_idx >= 0)
7096 : {
7097 : /* Perform the operation on active elements only and take
7098 : inactive elements from the reduction chain input. */
7099 8 : gcc_assert (!vop2);
7100 8 : vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7101 : }
7102 : else
7103 : {
7104 8 : auto else_value = targetm.preferred_else_value
7105 8 : (cond_fn, vectype, vops.length () - 1, &vops[1]);
7106 8 : vops.quick_push (else_value);
7107 : }
7108 16 : if (len_loop_p)
7109 : {
7110 0 : tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7111 0 : vec_num, vectype, i, 1, true);
7112 0 : signed char biasval
7113 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7114 0 : tree bias = build_int_cst (intQI_type_node, biasval);
7115 0 : vops.quick_push (len);
7116 0 : vops.quick_push (bias);
7117 : }
7118 16 : gcall *call
7119 16 : = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7120 : : cond_len_fn,
7121 : vops);
7122 16 : new_temp = make_ssa_name (vec_dest, call);
7123 16 : gimple_call_set_lhs (call, new_temp);
7124 16 : gimple_call_set_nothrow (call, true);
7125 16 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7126 16 : new_stmt = call;
7127 16 : }
7128 : else
7129 : {
7130 138158 : tree mask = NULL_TREE;
7131 : /* When combining two masks check if either of them is elsewhere
7132 : combined with a loop mask, if that's the case we can mark that the
7133 : new combined mask doesn't need to be combined with a loop mask. */
7134 138158 : if (masked_loop_p
7135 138158 : && code == BIT_AND_EXPR
7136 138158 : && VECTOR_BOOLEAN_TYPE_P (vectype))
7137 : {
7138 8 : if (loop_vinfo->scalar_cond_masked_set.contains ({ op0, vec_num }))
7139 : {
7140 0 : mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7141 : vec_num, vectype, i);
7142 :
7143 0 : vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7144 : vop0, gsi);
7145 : }
7146 :
7147 8 : if (loop_vinfo->scalar_cond_masked_set.contains ({ op1, vec_num }))
7148 : {
7149 0 : mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7150 : vec_num, vectype, i);
7151 :
7152 0 : vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7153 : vop1, gsi);
7154 : }
7155 : }
7156 :
7157 138158 : new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7158 138158 : new_temp = make_ssa_name (vec_dest, new_stmt);
7159 138158 : gimple_assign_set_lhs (new_stmt, new_temp);
7160 138158 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7161 138158 : if (using_emulated_vectors_p)
7162 : suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7163 :
7164 : /* Enter the combined value into the vector cond hash so we don't
7165 : AND it with a loop mask again. */
7166 138158 : if (mask)
7167 0 : loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7168 : }
7169 :
7170 138176 : if (vec_cvt_dest)
7171 : {
7172 3042 : new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7173 3042 : new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7174 : new_temp);
7175 3042 : new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7176 3042 : gimple_assign_set_lhs (new_stmt, new_temp);
7177 3042 : vect_finish_stmt_generation (vinfo, stmt_info,
7178 : new_stmt, gsi);
7179 : }
7180 :
7181 138176 : slp_node->push_vec_def (new_stmt);
7182 : }
7183 :
7184 113967 : vec_oprnds0.release ();
7185 113967 : vec_oprnds1.release ();
7186 113967 : vec_oprnds2.release ();
7187 :
7188 113967 : return true;
7189 : }
7190 :
7191 : /* A helper function to ensure data reference DR_INFO's base alignment. */
7192 :
7193 : static void
7194 1955346 : ensure_base_align (dr_vec_info *dr_info)
7195 : {
7196 : /* Alignment is only analyzed for the first element of a DR group,
7197 : use that to look at base alignment we need to enforce. */
7198 1955346 : if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7199 1421904 : dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7200 :
7201 1955346 : gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7202 :
7203 1955346 : if (dr_info->base_misaligned)
7204 : {
7205 169073 : tree base_decl = dr_info->base_decl;
7206 :
7207 : // We should only be able to increase the alignment of a base object if
7208 : // we know what its new alignment should be at compile time.
7209 169073 : unsigned HOST_WIDE_INT align_base_to =
7210 169073 : DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7211 :
7212 169073 : if (decl_in_symtab_p (base_decl))
7213 4716 : symtab_node::get (base_decl)->increase_alignment (align_base_to);
7214 164357 : else if (DECL_ALIGN (base_decl) < align_base_to)
7215 : {
7216 131417 : SET_DECL_ALIGN (base_decl, align_base_to);
7217 131417 : DECL_USER_ALIGN (base_decl) = 1;
7218 : }
7219 169073 : dr_info->base_misaligned = false;
7220 : }
7221 1955346 : }
7222 :
7223 :
7224 : /* Function get_group_alias_ptr_type.
7225 :
7226 : Return the alias type for the group starting at FIRST_STMT_INFO. */
7227 :
7228 : static tree
7229 1626705 : get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7230 : {
7231 1626705 : struct data_reference *first_dr, *next_dr;
7232 :
7233 1626705 : first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7234 1626705 : stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7235 3922311 : while (next_stmt_info)
7236 : {
7237 2427556 : next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7238 4855112 : if (get_alias_set (DR_REF (first_dr))
7239 2427556 : != get_alias_set (DR_REF (next_dr)))
7240 : {
7241 131950 : if (dump_enabled_p ())
7242 30 : dump_printf_loc (MSG_NOTE, vect_location,
7243 : "conflicting alias set types.\n");
7244 131950 : return ptr_type_node;
7245 : }
7246 2295606 : next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7247 : }
7248 1494755 : return reference_alias_ptr_type (DR_REF (first_dr));
7249 : }
7250 :
7251 :
7252 : /* Function scan_operand_equal_p.
7253 :
7254 : Helper function for check_scan_store. Compare two references
7255 : with .GOMP_SIMD_LANE bases. */
7256 :
7257 : static bool
7258 1284 : scan_operand_equal_p (tree ref1, tree ref2)
7259 : {
7260 1284 : tree ref[2] = { ref1, ref2 };
7261 1284 : poly_int64 bitsize[2], bitpos[2];
7262 : tree offset[2], base[2];
7263 3852 : for (int i = 0; i < 2; ++i)
7264 : {
7265 2568 : machine_mode mode;
7266 2568 : int unsignedp, reversep, volatilep = 0;
7267 2568 : base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7268 : &offset[i], &mode, &unsignedp,
7269 : &reversep, &volatilep);
7270 2568 : if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7271 0 : return false;
7272 2568 : if (TREE_CODE (base[i]) == MEM_REF
7273 42 : && offset[i] == NULL_TREE
7274 2610 : && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7275 : {
7276 42 : gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7277 42 : if (is_gimple_assign (def_stmt)
7278 42 : && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7279 42 : && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7280 84 : && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7281 : {
7282 42 : if (maybe_ne (mem_ref_offset (base[i]), 0))
7283 : return false;
7284 42 : base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7285 42 : offset[i] = gimple_assign_rhs2 (def_stmt);
7286 : }
7287 : }
7288 : }
7289 :
7290 1284 : if (!operand_equal_p (base[0], base[1], 0))
7291 : return false;
7292 934 : if (maybe_ne (bitsize[0], bitsize[1]))
7293 : return false;
7294 934 : if (offset[0] != offset[1])
7295 : {
7296 916 : if (!offset[0] || !offset[1])
7297 : return false;
7298 916 : if (!operand_equal_p (offset[0], offset[1], 0))
7299 : {
7300 : tree step[2];
7301 0 : for (int i = 0; i < 2; ++i)
7302 : {
7303 0 : step[i] = integer_one_node;
7304 0 : if (TREE_CODE (offset[i]) == SSA_NAME)
7305 : {
7306 0 : gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7307 0 : if (is_gimple_assign (def_stmt)
7308 0 : && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7309 0 : && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7310 : == INTEGER_CST))
7311 : {
7312 0 : step[i] = gimple_assign_rhs2 (def_stmt);
7313 0 : offset[i] = gimple_assign_rhs1 (def_stmt);
7314 : }
7315 : }
7316 0 : else if (TREE_CODE (offset[i]) == MULT_EXPR)
7317 : {
7318 0 : step[i] = TREE_OPERAND (offset[i], 1);
7319 0 : offset[i] = TREE_OPERAND (offset[i], 0);
7320 : }
7321 0 : tree rhs1 = NULL_TREE;
7322 0 : if (TREE_CODE (offset[i]) == SSA_NAME)
7323 : {
7324 0 : gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7325 0 : if (gimple_assign_cast_p (def_stmt))
7326 0 : rhs1 = gimple_assign_rhs1 (def_stmt);
7327 : }
7328 0 : else if (CONVERT_EXPR_P (offset[i]))
7329 0 : rhs1 = TREE_OPERAND (offset[i], 0);
7330 0 : if (rhs1
7331 0 : && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7332 0 : && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7333 0 : && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7334 0 : >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7335 0 : offset[i] = rhs1;
7336 : }
7337 0 : if (!operand_equal_p (offset[0], offset[1], 0)
7338 0 : || !operand_equal_p (step[0], step[1], 0))
7339 0 : return false;
7340 : }
7341 : }
7342 : return true;
7343 : }
7344 :
7345 :
7346 : enum scan_store_kind {
7347 : /* Normal permutation. */
7348 : scan_store_kind_perm,
7349 :
7350 : /* Whole vector left shift permutation with zero init. */
7351 : scan_store_kind_lshift_zero,
7352 :
7353 : /* Whole vector left shift permutation and VEC_COND_EXPR. */
7354 : scan_store_kind_lshift_cond
7355 : };
7356 :
7357 : /* Function check_scan_store.
7358 :
7359 : Verify if we can perform the needed permutations or whole vector shifts.
7360 : Return -1 on failure, otherwise exact log2 of vectype's nunits.
7361 : USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7362 : to do at each step. */
7363 :
7364 : static int
7365 1024 : scan_store_can_perm_p (tree vectype, tree init,
7366 : vec<enum scan_store_kind> *use_whole_vector = NULL)
7367 : {
7368 1024 : enum machine_mode vec_mode = TYPE_MODE (vectype);
7369 1024 : unsigned HOST_WIDE_INT nunits;
7370 1024 : if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7371 : return -1;
7372 1024 : int units_log2 = exact_log2 (nunits);
7373 1024 : if (units_log2 <= 0)
7374 : return -1;
7375 :
7376 : int i;
7377 : enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7378 4784 : for (i = 0; i <= units_log2; ++i)
7379 : {
7380 3760 : unsigned HOST_WIDE_INT j, k;
7381 3760 : enum scan_store_kind kind = scan_store_kind_perm;
7382 3760 : vec_perm_builder sel (nunits, nunits, 1);
7383 3760 : sel.quick_grow (nunits);
7384 3760 : if (i == units_log2)
7385 : {
7386 9728 : for (j = 0; j < nunits; ++j)
7387 8704 : sel[j] = nunits - 1;
7388 : }
7389 : else
7390 : {
7391 10416 : for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7392 7680 : sel[j] = j;
7393 26416 : for (k = 0; j < nunits; ++j, ++k)
7394 23680 : sel[j] = nunits + k;
7395 : }
7396 6496 : vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7397 3760 : if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7398 : {
7399 0 : if (i == units_log2)
7400 : return -1;
7401 :
7402 0 : if (whole_vector_shift_kind == scan_store_kind_perm)
7403 : {
7404 0 : if (!can_implement_p (vec_shl_optab, vec_mode))
7405 : return -1;
7406 0 : whole_vector_shift_kind = scan_store_kind_lshift_zero;
7407 : /* Whole vector shifts shift in zeros, so if init is all zero
7408 : constant, there is no need to do anything further. */
7409 0 : if ((TREE_CODE (init) != INTEGER_CST
7410 0 : && TREE_CODE (init) != REAL_CST)
7411 0 : || !initializer_zerop (init))
7412 : {
7413 0 : tree masktype = truth_type_for (vectype);
7414 0 : if (!expand_vec_cond_expr_p (vectype, masktype))
7415 : return -1;
7416 : whole_vector_shift_kind = scan_store_kind_lshift_cond;
7417 : }
7418 : }
7419 0 : kind = whole_vector_shift_kind;
7420 : }
7421 3760 : if (use_whole_vector)
7422 : {
7423 1880 : if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7424 0 : use_whole_vector->safe_grow_cleared (i, true);
7425 5640 : if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7426 0 : use_whole_vector->safe_push (kind);
7427 : }
7428 3760 : }
7429 :
7430 : return units_log2;
7431 : }
7432 :
7433 :
7434 : /* Function check_scan_store.
7435 :
7436 : Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7437 :
7438 : static bool
7439 1076 : check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7440 : enum vect_def_type rhs_dt, slp_tree slp_node,
7441 : slp_tree mask_node,
7442 : vect_memory_access_type memory_access_type)
7443 : {
7444 1076 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7445 1076 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7446 1076 : tree ref_type;
7447 :
7448 1076 : gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7449 1076 : if (SLP_TREE_LANES (slp_node) > 1
7450 1076 : || mask_node
7451 1076 : || memory_access_type != VMAT_CONTIGUOUS
7452 1076 : || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7453 1076 : || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7454 1076 : || loop_vinfo == NULL
7455 1076 : || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7456 1076 : || LOOP_VINFO_EPILOGUE_P (loop_vinfo)
7457 1076 : || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7458 1076 : || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7459 1076 : || !integer_zerop (DR_INIT (dr_info->dr))
7460 1076 : || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7461 2152 : || !alias_sets_conflict_p (get_alias_set (vectype),
7462 1076 : get_alias_set (TREE_TYPE (ref_type))))
7463 : {
7464 0 : if (dump_enabled_p ())
7465 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7466 : "unsupported OpenMP scan store.\n");
7467 0 : return false;
7468 : }
7469 :
7470 : /* We need to pattern match code built by OpenMP lowering and simplified
7471 : by following optimizations into something we can handle.
7472 : #pragma omp simd reduction(inscan,+:r)
7473 : for (...)
7474 : {
7475 : r += something ();
7476 : #pragma omp scan inclusive (r)
7477 : use (r);
7478 : }
7479 : shall have body with:
7480 : // Initialization for input phase, store the reduction initializer:
7481 : _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7482 : _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7483 : D.2042[_21] = 0;
7484 : // Actual input phase:
7485 : ...
7486 : r.0_5 = D.2042[_20];
7487 : _6 = _4 + r.0_5;
7488 : D.2042[_20] = _6;
7489 : // Initialization for scan phase:
7490 : _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7491 : _26 = D.2043[_25];
7492 : _27 = D.2042[_25];
7493 : _28 = _26 + _27;
7494 : D.2043[_25] = _28;
7495 : D.2042[_25] = _28;
7496 : // Actual scan phase:
7497 : ...
7498 : r.1_8 = D.2042[_20];
7499 : ...
7500 : The "omp simd array" variable D.2042 holds the privatized copy used
7501 : inside of the loop and D.2043 is another one that holds copies of
7502 : the current original list item. The separate GOMP_SIMD_LANE ifn
7503 : kinds are there in order to allow optimizing the initializer store
7504 : and combiner sequence, e.g. if it is originally some C++ish user
7505 : defined reduction, but allow the vectorizer to pattern recognize it
7506 : and turn into the appropriate vectorized scan.
7507 :
7508 : For exclusive scan, this is slightly different:
7509 : #pragma omp simd reduction(inscan,+:r)
7510 : for (...)
7511 : {
7512 : use (r);
7513 : #pragma omp scan exclusive (r)
7514 : r += something ();
7515 : }
7516 : shall have body with:
7517 : // Initialization for input phase, store the reduction initializer:
7518 : _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7519 : _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7520 : D.2042[_21] = 0;
7521 : // Actual input phase:
7522 : ...
7523 : r.0_5 = D.2042[_20];
7524 : _6 = _4 + r.0_5;
7525 : D.2042[_20] = _6;
7526 : // Initialization for scan phase:
7527 : _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7528 : _26 = D.2043[_25];
7529 : D.2044[_25] = _26;
7530 : _27 = D.2042[_25];
7531 : _28 = _26 + _27;
7532 : D.2043[_25] = _28;
7533 : // Actual scan phase:
7534 : ...
7535 : r.1_8 = D.2044[_20];
7536 : ... */
7537 :
7538 1076 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7539 : {
7540 : /* Match the D.2042[_21] = 0; store above. Just require that
7541 : it is a constant or external definition store. */
7542 564 : if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7543 : {
7544 0 : fail_init:
7545 0 : if (dump_enabled_p ())
7546 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7547 : "unsupported OpenMP scan initializer store.\n");
7548 0 : return false;
7549 : }
7550 :
7551 564 : if (! loop_vinfo->scan_map)
7552 322 : loop_vinfo->scan_map = new hash_map<tree, tree>;
7553 564 : tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7554 564 : tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7555 564 : if (cached)
7556 0 : goto fail_init;
7557 564 : cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7558 :
7559 : /* These stores can be vectorized normally. */
7560 564 : return true;
7561 : }
7562 :
7563 512 : if (rhs_dt != vect_internal_def)
7564 : {
7565 0 : fail:
7566 0 : if (dump_enabled_p ())
7567 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7568 : "unsupported OpenMP scan combiner pattern.\n");
7569 0 : return false;
7570 : }
7571 :
7572 512 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
7573 512 : tree rhs = gimple_assign_rhs1 (stmt);
7574 512 : if (TREE_CODE (rhs) != SSA_NAME)
7575 0 : goto fail;
7576 :
7577 512 : gimple *other_store_stmt = NULL;
7578 512 : tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7579 512 : bool inscan_var_store
7580 512 : = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7581 :
7582 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7583 : {
7584 252 : if (!inscan_var_store)
7585 : {
7586 126 : use_operand_p use_p;
7587 126 : imm_use_iterator iter;
7588 378 : FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7589 : {
7590 252 : gimple *use_stmt = USE_STMT (use_p);
7591 252 : if (use_stmt == stmt || is_gimple_debug (use_stmt))
7592 126 : continue;
7593 126 : if (gimple_bb (use_stmt) != gimple_bb (stmt)
7594 126 : || !is_gimple_assign (use_stmt)
7595 126 : || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7596 126 : || other_store_stmt
7597 252 : || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7598 0 : goto fail;
7599 126 : other_store_stmt = use_stmt;
7600 0 : }
7601 126 : if (other_store_stmt == NULL)
7602 0 : goto fail;
7603 126 : rhs = gimple_assign_lhs (other_store_stmt);
7604 126 : if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7605 0 : goto fail;
7606 : }
7607 : }
7608 260 : else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7609 : {
7610 260 : use_operand_p use_p;
7611 260 : imm_use_iterator iter;
7612 1040 : FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7613 : {
7614 520 : gimple *use_stmt = USE_STMT (use_p);
7615 520 : if (use_stmt == stmt || is_gimple_debug (use_stmt))
7616 260 : continue;
7617 260 : if (other_store_stmt)
7618 0 : goto fail;
7619 260 : other_store_stmt = use_stmt;
7620 260 : }
7621 : }
7622 : else
7623 0 : goto fail;
7624 :
7625 512 : gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7626 512 : if (gimple_bb (def_stmt) != gimple_bb (stmt)
7627 512 : || !is_gimple_assign (def_stmt)
7628 1024 : || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7629 0 : goto fail;
7630 :
7631 512 : enum tree_code code = gimple_assign_rhs_code (def_stmt);
7632 : /* For pointer addition, we should use the normal plus for the vector
7633 : operation. */
7634 512 : switch (code)
7635 : {
7636 0 : case POINTER_PLUS_EXPR:
7637 0 : code = PLUS_EXPR;
7638 0 : break;
7639 0 : case MULT_HIGHPART_EXPR:
7640 0 : goto fail;
7641 : default:
7642 : break;
7643 : }
7644 512 : if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7645 0 : goto fail;
7646 :
7647 512 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
7648 512 : tree rhs2 = gimple_assign_rhs2 (def_stmt);
7649 512 : if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7650 0 : goto fail;
7651 :
7652 512 : gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7653 512 : gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7654 512 : if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7655 512 : || !gimple_assign_load_p (load1_stmt)
7656 512 : || gimple_bb (load2_stmt) != gimple_bb (stmt)
7657 1024 : || !gimple_assign_load_p (load2_stmt))
7658 0 : goto fail;
7659 :
7660 512 : stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7661 512 : stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7662 512 : if (load1_stmt_info == NULL
7663 512 : || load2_stmt_info == NULL
7664 512 : || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7665 512 : != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7666 512 : || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7667 512 : != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7668 0 : goto fail;
7669 :
7670 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7671 : {
7672 126 : dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7673 126 : if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7674 126 : || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7675 0 : goto fail;
7676 126 : tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7677 126 : tree lrhs;
7678 126 : if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7679 : lrhs = rhs1;
7680 : else
7681 16 : lrhs = rhs2;
7682 126 : use_operand_p use_p;
7683 126 : imm_use_iterator iter;
7684 504 : FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7685 : {
7686 252 : gimple *use_stmt = USE_STMT (use_p);
7687 252 : if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7688 126 : continue;
7689 126 : if (other_store_stmt)
7690 0 : goto fail;
7691 126 : other_store_stmt = use_stmt;
7692 126 : }
7693 : }
7694 :
7695 512 : if (other_store_stmt == NULL)
7696 0 : goto fail;
7697 512 : if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7698 512 : || !gimple_store_p (other_store_stmt))
7699 0 : goto fail;
7700 :
7701 512 : stmt_vec_info other_store_stmt_info
7702 512 : = loop_vinfo->lookup_stmt (other_store_stmt);
7703 512 : if (other_store_stmt_info == NULL
7704 512 : || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7705 512 : != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7706 0 : goto fail;
7707 :
7708 512 : gimple *stmt1 = stmt;
7709 512 : gimple *stmt2 = other_store_stmt;
7710 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7711 : std::swap (stmt1, stmt2);
7712 512 : if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7713 : gimple_assign_rhs1 (load2_stmt)))
7714 : {
7715 162 : std::swap (rhs1, rhs2);
7716 162 : std::swap (load1_stmt, load2_stmt);
7717 162 : std::swap (load1_stmt_info, load2_stmt_info);
7718 : }
7719 512 : if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7720 : gimple_assign_rhs1 (load1_stmt)))
7721 0 : goto fail;
7722 :
7723 512 : tree var3 = NULL_TREE;
7724 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7725 512 : && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7726 : gimple_assign_rhs1 (load2_stmt)))
7727 0 : goto fail;
7728 512 : else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7729 : {
7730 252 : dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7731 252 : if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7732 252 : || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7733 0 : goto fail;
7734 252 : var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7735 252 : if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7736 252 : || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7737 504 : || lookup_attribute ("omp simd inscan exclusive",
7738 252 : DECL_ATTRIBUTES (var3)))
7739 0 : goto fail;
7740 : }
7741 :
7742 512 : dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7743 512 : if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7744 512 : || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7745 0 : goto fail;
7746 :
7747 512 : tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7748 512 : tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7749 512 : if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7750 512 : || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7751 1024 : || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7752 512 : == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7753 0 : goto fail;
7754 :
7755 512 : if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7756 256 : std::swap (var1, var2);
7757 :
7758 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7759 : {
7760 252 : if (!lookup_attribute ("omp simd inscan exclusive",
7761 252 : DECL_ATTRIBUTES (var1)))
7762 0 : goto fail;
7763 252 : var1 = var3;
7764 : }
7765 :
7766 512 : if (loop_vinfo->scan_map == NULL)
7767 0 : goto fail;
7768 512 : tree *init = loop_vinfo->scan_map->get (var1);
7769 512 : if (init == NULL)
7770 0 : goto fail;
7771 :
7772 : /* The IL is as expected, now check if we can actually vectorize it.
7773 : Inclusive scan:
7774 : _26 = D.2043[_25];
7775 : _27 = D.2042[_25];
7776 : _28 = _26 + _27;
7777 : D.2043[_25] = _28;
7778 : D.2042[_25] = _28;
7779 : should be vectorized as (where _40 is the vectorized rhs
7780 : from the D.2042[_21] = 0; store):
7781 : _30 = MEM <vector(8) int> [(int *)&D.2043];
7782 : _31 = MEM <vector(8) int> [(int *)&D.2042];
7783 : _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7784 : _33 = _31 + _32;
7785 : // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7786 : _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7787 : _35 = _33 + _34;
7788 : // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7789 : // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7790 : _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7791 : _37 = _35 + _36;
7792 : // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7793 : // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7794 : _38 = _30 + _37;
7795 : _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7796 : MEM <vector(8) int> [(int *)&D.2043] = _39;
7797 : MEM <vector(8) int> [(int *)&D.2042] = _38;
7798 : Exclusive scan:
7799 : _26 = D.2043[_25];
7800 : D.2044[_25] = _26;
7801 : _27 = D.2042[_25];
7802 : _28 = _26 + _27;
7803 : D.2043[_25] = _28;
7804 : should be vectorized as (where _40 is the vectorized rhs
7805 : from the D.2042[_21] = 0; store):
7806 : _30 = MEM <vector(8) int> [(int *)&D.2043];
7807 : _31 = MEM <vector(8) int> [(int *)&D.2042];
7808 : _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7809 : _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7810 : _34 = _32 + _33;
7811 : // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7812 : // _31[3]+_31[4], ... _31[5]+.._31[6] };
7813 : _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7814 : _36 = _34 + _35;
7815 : // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7816 : // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7817 : _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7818 : _38 = _36 + _37;
7819 : // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7820 : // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7821 : _39 = _30 + _38;
7822 : _50 = _31 + _39;
7823 : _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7824 : MEM <vector(8) int> [(int *)&D.2044] = _39;
7825 : MEM <vector(8) int> [(int *)&D.2042] = _51; */
7826 512 : enum machine_mode vec_mode = TYPE_MODE (vectype);
7827 512 : optab optab = optab_for_tree_code (code, vectype, optab_default);
7828 512 : if (!optab || !can_implement_p (optab, vec_mode))
7829 0 : goto fail;
7830 :
7831 512 : int units_log2 = scan_store_can_perm_p (vectype, *init);
7832 512 : if (units_log2 == -1)
7833 0 : goto fail;
7834 :
7835 : return true;
7836 : }
7837 :
7838 :
7839 : /* Function vectorizable_scan_store.
7840 :
7841 : Helper of vectorizable_score, arguments like on vectorizable_store.
7842 : Handle only the transformation, checking is done in check_scan_store. */
7843 :
7844 : static bool
7845 512 : vectorizable_scan_store (vec_info *vinfo, stmt_vec_info stmt_info,
7846 : slp_tree slp_node, gimple_stmt_iterator *gsi)
7847 : {
7848 512 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7849 512 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7850 512 : tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7851 512 : tree vectype = SLP_TREE_VECTYPE (slp_node);
7852 :
7853 512 : if (dump_enabled_p ())
7854 492 : dump_printf_loc (MSG_NOTE, vect_location,
7855 : "transform scan store.\n");
7856 :
7857 512 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
7858 512 : tree rhs = gimple_assign_rhs1 (stmt);
7859 512 : gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7860 :
7861 512 : tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7862 512 : bool inscan_var_store
7863 512 : = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7864 :
7865 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7866 : {
7867 126 : use_operand_p use_p;
7868 126 : imm_use_iterator iter;
7869 252 : FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7870 : {
7871 126 : gimple *use_stmt = USE_STMT (use_p);
7872 126 : if (use_stmt == stmt || is_gimple_debug (use_stmt))
7873 0 : continue;
7874 126 : rhs = gimple_assign_lhs (use_stmt);
7875 126 : break;
7876 126 : }
7877 : }
7878 :
7879 512 : gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7880 512 : enum tree_code code = gimple_assign_rhs_code (def_stmt);
7881 512 : if (code == POINTER_PLUS_EXPR)
7882 0 : code = PLUS_EXPR;
7883 512 : gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7884 : && commutative_tree_code (code));
7885 512 : tree rhs1 = gimple_assign_rhs1 (def_stmt);
7886 512 : tree rhs2 = gimple_assign_rhs2 (def_stmt);
7887 512 : gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7888 512 : gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7889 512 : gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7890 512 : stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7891 512 : stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7892 512 : dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7893 512 : dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7894 512 : tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7895 512 : tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7896 :
7897 512 : if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7898 : {
7899 436 : std::swap (rhs1, rhs2);
7900 436 : std::swap (var1, var2);
7901 436 : std::swap (load1_dr_info, load2_dr_info);
7902 : }
7903 :
7904 512 : tree *init = loop_vinfo->scan_map->get (var1);
7905 512 : gcc_assert (init);
7906 :
7907 512 : unsigned HOST_WIDE_INT nunits;
7908 512 : if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7909 : gcc_unreachable ();
7910 512 : auto_vec<enum scan_store_kind, 16> use_whole_vector;
7911 512 : int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7912 512 : gcc_assert (units_log2 > 0);
7913 512 : auto_vec<tree, 16> perms;
7914 512 : perms.quick_grow (units_log2 + 1);
7915 512 : tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7916 2392 : for (int i = 0; i <= units_log2; ++i)
7917 : {
7918 1880 : unsigned HOST_WIDE_INT j, k;
7919 1880 : vec_perm_builder sel (nunits, nunits, 1);
7920 1880 : sel.quick_grow (nunits);
7921 1880 : if (i == units_log2)
7922 4864 : for (j = 0; j < nunits; ++j)
7923 4352 : sel[j] = nunits - 1;
7924 : else
7925 : {
7926 5208 : for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7927 3840 : sel[j] = j;
7928 13208 : for (k = 0; j < nunits; ++j, ++k)
7929 11840 : sel[j] = nunits + k;
7930 : }
7931 3248 : vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7932 1880 : if (!use_whole_vector.is_empty ()
7933 0 : && use_whole_vector[i] != scan_store_kind_perm)
7934 : {
7935 0 : if (zero_vec == NULL_TREE)
7936 0 : zero_vec = build_zero_cst (vectype);
7937 0 : if (masktype == NULL_TREE
7938 0 : && use_whole_vector[i] == scan_store_kind_lshift_cond)
7939 0 : masktype = truth_type_for (vectype);
7940 0 : perms[i] = vect_gen_perm_mask_any (vectype, indices);
7941 : }
7942 : else
7943 1880 : perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7944 1880 : }
7945 :
7946 512 : vec_loop_lens *loop_lens
7947 512 : = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7948 : ? &LOOP_VINFO_LENS (loop_vinfo)
7949 0 : : NULL);
7950 :
7951 512 : tree vec_oprnd1 = NULL_TREE;
7952 512 : tree vec_oprnd2 = NULL_TREE;
7953 512 : tree vec_oprnd3 = NULL_TREE;
7954 512 : tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7955 512 : tree dataref_offset = build_int_cst (ref_type, 0);
7956 512 : tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7957 : vectype, VMAT_CONTIGUOUS,
7958 : loop_lens);
7959 512 : tree ldataref_ptr = NULL_TREE;
7960 512 : tree orig = NULL_TREE;
7961 512 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7962 126 : ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7963 : /* The initialization is invariant. */
7964 512 : vec_oprnd1 = vect_init_vector (vinfo, stmt_info, *init, vectype, NULL);
7965 512 : auto_vec<tree> vec_oprnds2;
7966 512 : auto_vec<tree> vec_oprnds3;
7967 512 : if (ldataref_ptr == NULL)
7968 : {
7969 : /* We want to lookup the vector operands of the reduction, not those
7970 : of the store - for SLP we have to use the proper SLP node for the
7971 : lookup, which should be the single child of the scan store. */
7972 386 : vect_get_vec_defs (vinfo, SLP_TREE_CHILDREN (slp_node)[0],
7973 : rhs1, &vec_oprnds2, rhs2, &vec_oprnds3);
7974 : /* ??? For SLP we do not key the def on 'rhs1' or 'rhs2' but get
7975 : them in SLP child order. So we have to swap here with logic
7976 : similar to above. */
7977 386 : stmt_vec_info load
7978 386 : = SLP_TREE_SCALAR_STMTS (SLP_TREE_CHILDREN
7979 386 : (SLP_TREE_CHILDREN (slp_node)[0])[0])[0];
7980 386 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (load);
7981 386 : tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7982 386 : if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)))
7983 820 : for (unsigned i = 0; i < vec_oprnds2.length (); ++i)
7984 494 : std::swap (vec_oprnds2[i], vec_oprnds3[i]);;
7985 : }
7986 : else
7987 126 : vect_get_vec_defs (vinfo, slp_node,
7988 : rhs2, &vec_oprnds3);
7989 1248 : for (unsigned j = 0; j < vec_oprnds3.length (); j++)
7990 : {
7991 736 : if (ldataref_ptr == NULL)
7992 554 : vec_oprnd2 = vec_oprnds2[j];
7993 736 : vec_oprnd3 = vec_oprnds3[j];
7994 736 : if (j == 0)
7995 : orig = vec_oprnd3;
7996 224 : else if (!inscan_var_store)
7997 112 : dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7998 :
7999 736 : if (ldataref_ptr)
8000 : {
8001 182 : vec_oprnd2 = make_ssa_name (vectype);
8002 182 : tree data_ref = fold_build2 (MEM_REF, vectype,
8003 : unshare_expr (ldataref_ptr),
8004 : dataref_offset);
8005 182 : vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
8006 182 : gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
8007 182 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8008 : }
8009 :
8010 736 : tree v = vec_oprnd2;
8011 3068 : for (int i = 0; i < units_log2; ++i)
8012 : {
8013 2332 : tree new_temp = make_ssa_name (vectype);
8014 2332 : gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
8015 : (zero_vec
8016 0 : && (use_whole_vector[i]
8017 0 : != scan_store_kind_perm))
8018 : ? zero_vec : vec_oprnd1, v,
8019 2332 : perms[i]);
8020 2332 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8021 :
8022 2332 : if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
8023 : {
8024 : /* Whole vector shift shifted in zero bits, but if *init
8025 : is not initializer_zerop, we need to replace those elements
8026 : with elements from vec_oprnd1. */
8027 0 : tree_vector_builder vb (masktype, nunits, 1);
8028 0 : for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8029 0 : vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8030 : ? boolean_false_node : boolean_true_node);
8031 :
8032 0 : tree new_temp2 = make_ssa_name (vectype);
8033 0 : g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8034 : new_temp, vec_oprnd1);
8035 0 : vect_finish_stmt_generation (vinfo, stmt_info,
8036 : g, gsi);
8037 0 : new_temp = new_temp2;
8038 0 : }
8039 :
8040 : /* For exclusive scan, perform the perms[i] permutation once
8041 : more. */
8042 2332 : if (i == 0
8043 1100 : && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8044 728 : && v == vec_oprnd2)
8045 : {
8046 364 : v = new_temp;
8047 364 : --i;
8048 364 : continue;
8049 : }
8050 :
8051 1968 : tree new_temp2 = make_ssa_name (vectype);
8052 1968 : g = gimple_build_assign (new_temp2, code, v, new_temp);
8053 1968 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8054 :
8055 1968 : v = new_temp2;
8056 : }
8057 :
8058 736 : tree new_temp = make_ssa_name (vectype);
8059 736 : gimple *g = gimple_build_assign (new_temp, code, orig, v);
8060 736 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8061 :
8062 736 : tree last_perm_arg = new_temp;
8063 : /* For exclusive scan, new_temp computed above is the exclusive scan
8064 : prefix sum. Turn it into inclusive prefix sum for the broadcast
8065 : of the last element into orig. */
8066 736 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8067 : {
8068 364 : last_perm_arg = make_ssa_name (vectype);
8069 364 : g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8070 364 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8071 : }
8072 :
8073 736 : orig = make_ssa_name (vectype);
8074 2208 : g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8075 736 : last_perm_arg, perms[units_log2]);
8076 736 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8077 :
8078 736 : if (!inscan_var_store)
8079 : {
8080 368 : tree data_ref = fold_build2 (MEM_REF, vectype,
8081 : unshare_expr (dataref_ptr),
8082 : dataref_offset);
8083 368 : vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8084 368 : g = gimple_build_assign (data_ref, new_temp);
8085 368 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8086 : }
8087 : }
8088 :
8089 512 : if (inscan_var_store)
8090 624 : for (unsigned j = 0; j < vec_oprnds3.length (); j++)
8091 : {
8092 368 : if (j != 0)
8093 112 : dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8094 :
8095 368 : tree data_ref = fold_build2 (MEM_REF, vectype,
8096 : unshare_expr (dataref_ptr),
8097 : dataref_offset);
8098 368 : vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8099 368 : gimple *g = gimple_build_assign (data_ref, orig);
8100 368 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8101 : }
8102 512 : return true;
8103 512 : }
8104 :
8105 :
8106 : /* Function vectorizable_store.
8107 :
8108 : Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8109 : that can be vectorized.
8110 : If COST_VEC is passed, calculate costs but don't change anything,
8111 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
8112 : it, and insert it at GSI.
8113 : Return true if STMT_INFO is vectorizable in this way. */
8114 :
8115 : static bool
8116 2070707 : vectorizable_store (vec_info *vinfo,
8117 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8118 : slp_tree slp_node,
8119 : stmt_vector_for_cost *cost_vec)
8120 : {
8121 2070707 : tree data_ref;
8122 2070707 : tree vec_oprnd = NULL_TREE;
8123 2070707 : tree elem_type;
8124 2070707 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8125 2070707 : class loop *loop = NULL;
8126 2070707 : machine_mode vec_mode;
8127 2070707 : tree dummy;
8128 2070707 : enum vect_def_type rhs_dt = vect_unknown_def_type;
8129 2070707 : enum vect_def_type mask_dt = vect_unknown_def_type;
8130 2070707 : tree dataref_ptr = NULL_TREE;
8131 2070707 : tree dataref_offset = NULL_TREE;
8132 2070707 : gimple *ptr_incr = NULL;
8133 2070707 : int j;
8134 2070707 : stmt_vec_info first_stmt_info;
8135 2070707 : bool grouped_store;
8136 2070707 : unsigned int group_size, i;
8137 2070707 : unsigned int vec_num;
8138 2070707 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8139 2070707 : tree aggr_type;
8140 2070707 : poly_uint64 vf;
8141 2070707 : vec_load_store_type vls_type;
8142 2070707 : tree ref_type;
8143 :
8144 2070707 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8145 : return false;
8146 :
8147 2070707 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8148 232992 : && cost_vec)
8149 : return false;
8150 :
8151 : /* Is vectorizable store? */
8152 :
8153 1837715 : tree mask_vectype = NULL_TREE;
8154 1837715 : slp_tree mask_node = NULL;
8155 1837715 : if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8156 : {
8157 1767444 : tree scalar_dest = gimple_assign_lhs (assign);
8158 1767444 : if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8159 1767444 : && is_pattern_stmt_p (stmt_info))
8160 1672 : scalar_dest = TREE_OPERAND (scalar_dest, 0);
8161 1767444 : if (TREE_CODE (scalar_dest) != ARRAY_REF
8162 1767444 : && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8163 : && TREE_CODE (scalar_dest) != INDIRECT_REF
8164 : && TREE_CODE (scalar_dest) != COMPONENT_REF
8165 : && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8166 : && TREE_CODE (scalar_dest) != REALPART_EXPR
8167 : && TREE_CODE (scalar_dest) != MEM_REF)
8168 : return false;
8169 : }
8170 : else
8171 : {
8172 728341 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8173 12403 : if (!call || !gimple_call_internal_p (call))
8174 : return false;
8175 :
8176 8330 : internal_fn ifn = gimple_call_internal_fn (call);
8177 8330 : if (!internal_store_fn_p (ifn))
8178 : return false;
8179 :
8180 1901 : int mask_index = internal_fn_mask_index (ifn);
8181 1901 : if (mask_index >= 0)
8182 1901 : mask_index = vect_slp_child_index_for_operand (stmt_info, mask_index);
8183 1901 : if (mask_index >= 0
8184 1901 : && !vect_check_scalar_mask (vinfo, slp_node, mask_index,
8185 : &mask_node, &mask_dt,
8186 : &mask_vectype))
8187 : return false;
8188 : }
8189 :
8190 1354801 : tree vectype = SLP_TREE_VECTYPE (slp_node), rhs_vectype = NULL_TREE;
8191 1354801 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8192 :
8193 1354801 : if (loop_vinfo)
8194 : {
8195 226212 : loop = LOOP_VINFO_LOOP (loop_vinfo);
8196 226212 : vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8197 : }
8198 : else
8199 : vf = 1;
8200 1354801 : vec_num = vect_get_num_copies (vinfo, slp_node);
8201 :
8202 : /* FORNOW. This restriction should be relaxed. */
8203 1354801 : if (loop
8204 1355076 : && nested_in_vect_loop_p (loop, stmt_info)
8205 1355084 : && vec_num > 1)
8206 : {
8207 8 : if (dump_enabled_p ())
8208 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8209 : "multiple types in nested loop.\n");
8210 8 : return false;
8211 : }
8212 :
8213 1354793 : slp_tree op_node;
8214 1354793 : if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8215 : &op_node, &rhs_dt, &rhs_vectype, &vls_type))
8216 : return false;
8217 :
8218 1354769 : elem_type = TREE_TYPE (vectype);
8219 1354769 : vec_mode = TYPE_MODE (vectype);
8220 :
8221 1354769 : if (!STMT_VINFO_DATA_REF (stmt_info))
8222 : return false;
8223 :
8224 1354769 : vect_load_store_data _ls_data{};
8225 1354769 : vect_load_store_data &ls = slp_node->get_data (_ls_data);
8226 1354769 : if (cost_vec
8227 1354769 : && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
8228 : vls_type, &_ls_data))
8229 : return false;
8230 : /* Temporary aliases to analysis data, should not be modified through
8231 : these. */
8232 1354193 : const vect_memory_access_type memory_access_type = ls.memory_access_type;
8233 1354193 : const dr_alignment_support alignment_support_scheme
8234 : = ls.alignment_support_scheme;
8235 1354193 : const int misalignment = ls.misalignment;
8236 1354193 : const poly_int64 poffset = ls.poffset;
8237 :
8238 1354193 : if (slp_node->ldst_lanes
8239 0 : && memory_access_type != VMAT_LOAD_STORE_LANES)
8240 : {
8241 0 : if (dump_enabled_p ())
8242 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8243 : "discovered store-lane but cannot use it.\n");
8244 0 : return false;
8245 : }
8246 :
8247 1354193 : if (mask_node)
8248 : {
8249 1811 : if (memory_access_type == VMAT_CONTIGUOUS)
8250 : {
8251 616 : if (!VECTOR_MODE_P (vec_mode)
8252 3090 : || !can_vec_mask_load_store_p (vec_mode,
8253 1545 : TYPE_MODE (mask_vectype), false))
8254 114 : return false;
8255 : }
8256 266 : else if (memory_access_type != VMAT_LOAD_STORE_LANES
8257 266 : && (!mat_gather_scatter_p (memory_access_type)
8258 242 : || (memory_access_type == VMAT_GATHER_SCATTER_LEGACY
8259 170 : && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8260 : {
8261 24 : if (dump_enabled_p ())
8262 24 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8263 : "unsupported access type for masked store.\n");
8264 24 : return false;
8265 : }
8266 242 : else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED)
8267 : {
8268 72 : if (dump_enabled_p ())
8269 24 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8270 : "unsupported masked emulated scatter.\n");
8271 72 : return false;
8272 : }
8273 : }
8274 : else
8275 : {
8276 : /* FORNOW. In some cases can vectorize even if data-type not supported
8277 : (e.g. - array initialization with 0). */
8278 1352382 : if (!can_implement_p (mov_optab, vec_mode))
8279 : return false;
8280 : }
8281 :
8282 1353983 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8283 1353983 : grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8284 2502583 : && !mat_gather_scatter_p (memory_access_type));
8285 1148600 : if (grouped_store)
8286 : {
8287 1148600 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8288 1148600 : first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8289 1148600 : group_size = DR_GROUP_SIZE (first_stmt_info);
8290 : }
8291 : else
8292 : {
8293 1353983 : first_stmt_info = stmt_info;
8294 1353983 : first_dr_info = dr_info;
8295 : group_size = 1;
8296 : }
8297 :
8298 1353983 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && cost_vec)
8299 : {
8300 1076 : if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node,
8301 : mask_node, memory_access_type))
8302 : return false;
8303 : }
8304 :
8305 2707198 : bool costing_p = cost_vec;
8306 1353215 : if (costing_p) /* transformation not required. */
8307 : {
8308 810801 : if (loop_vinfo
8309 162638 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8310 76001 : check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8311 : vls_type, group_size, &ls,
8312 : mask_node);
8313 :
8314 810801 : if (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8315 810801 : || (mask_node
8316 1056 : && !vect_maybe_update_slp_op_vectype (mask_node,
8317 : mask_vectype)))
8318 : {
8319 0 : if (dump_enabled_p ())
8320 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8321 : "incompatible vector types for invariants\n");
8322 0 : return false;
8323 : }
8324 :
8325 810801 : if (dump_enabled_p ()
8326 : && memory_access_type != VMAT_ELEMENTWISE
8327 15065 : && memory_access_type != VMAT_STRIDED_SLP
8328 14389 : && memory_access_type != VMAT_INVARIANT
8329 825190 : && alignment_support_scheme != dr_aligned)
8330 4977 : dump_printf_loc (MSG_NOTE, vect_location,
8331 : "Vectorizing an unaligned access.\n");
8332 :
8333 810801 : SLP_TREE_TYPE (slp_node) = store_vec_info_type;
8334 810801 : slp_node->data = new vect_load_store_data (std::move (ls));
8335 : }
8336 :
8337 : /* Transform. */
8338 :
8339 1353983 : ensure_base_align (dr_info);
8340 :
8341 1353983 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8342 : {
8343 1024 : gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8344 1024 : gcc_assert (SLP_TREE_LANES (slp_node) == 1);
8345 1024 : if (costing_p)
8346 : {
8347 512 : unsigned int inside_cost = 0, prologue_cost = 0;
8348 512 : if (vls_type == VLS_STORE_INVARIANT)
8349 0 : prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8350 : slp_node, 0, vect_prologue);
8351 512 : vect_get_store_cost (vinfo, stmt_info, slp_node, 1,
8352 : alignment_support_scheme, misalignment,
8353 : &inside_cost, cost_vec);
8354 :
8355 512 : if (dump_enabled_p ())
8356 492 : dump_printf_loc (MSG_NOTE, vect_location,
8357 : "vect_model_store_cost: inside_cost = %d, "
8358 : "prologue_cost = %d .\n",
8359 : inside_cost, prologue_cost);
8360 :
8361 512 : return true;
8362 : }
8363 512 : return vectorizable_scan_store (vinfo, stmt_info, slp_node, gsi);
8364 : }
8365 :
8366 : /* FORNOW */
8367 1352959 : gcc_assert (!grouped_store
8368 : || !loop
8369 : || !nested_in_vect_loop_p (loop, stmt_info));
8370 :
8371 1352959 : grouped_store = false;
8372 1352959 : first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8373 1352959 : gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info)
8374 : || (DR_GROUP_FIRST_ELEMENT (first_stmt_info) == first_stmt_info));
8375 1352959 : first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8376 :
8377 1352959 : ref_type = get_group_alias_ptr_type (first_stmt_info);
8378 :
8379 1352959 : if (!costing_p && dump_enabled_p ())
8380 12240 : dump_printf_loc (MSG_NOTE, vect_location, "transform store.\n");
8381 :
8382 1352959 : if (memory_access_type == VMAT_ELEMENTWISE
8383 1352959 : || memory_access_type == VMAT_STRIDED_SLP)
8384 : {
8385 29508 : unsigned inside_cost = 0, prologue_cost = 0;
8386 29508 : gimple_stmt_iterator incr_gsi;
8387 29508 : bool insert_after;
8388 29508 : tree offvar = NULL_TREE;
8389 29508 : tree ivstep;
8390 29508 : tree running_off;
8391 29508 : tree stride_base, stride_step, alias_off;
8392 29508 : tree vec_oprnd = NULL_TREE;
8393 29508 : tree dr_offset;
8394 : /* Checked by get_load_store_type. */
8395 29508 : unsigned int const_nunits = nunits.to_constant ();
8396 :
8397 29508 : gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8398 29508 : gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8399 :
8400 29508 : dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8401 29508 : stride_base
8402 29508 : = fold_build_pointer_plus
8403 : (DR_BASE_ADDRESS (first_dr_info->dr),
8404 : size_binop (PLUS_EXPR,
8405 : convert_to_ptrofftype (dr_offset),
8406 : convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8407 29508 : stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8408 :
8409 : /* For a store with loop-invariant (but other than power-of-2)
8410 : stride (i.e. not a grouped access) like so:
8411 :
8412 : for (i = 0; i < n; i += stride)
8413 : array[i] = ...;
8414 :
8415 : we generate a new induction variable and new stores from
8416 : the components of the (vectorized) rhs:
8417 :
8418 : for (j = 0; ; j += VF*stride)
8419 : vectemp = ...;
8420 : tmp1 = vectemp[0];
8421 : array[j] = tmp1;
8422 : tmp2 = vectemp[1];
8423 : array[j + stride] = tmp2;
8424 : ...
8425 : */
8426 :
8427 : /* ??? Modify local copies of alignment_support_scheme and
8428 : misalignment, but this part of analysis should be done
8429 : earlier and remembered, likewise the chosen load mode. */
8430 29508 : const dr_alignment_support tem = alignment_support_scheme;
8431 29508 : dr_alignment_support alignment_support_scheme = tem;
8432 29508 : const int tem2 = misalignment;
8433 29508 : int misalignment = tem2;
8434 :
8435 29508 : unsigned nstores = const_nunits;
8436 29508 : unsigned lnel = 1;
8437 29508 : tree ltype = elem_type;
8438 29508 : tree lvectype = vectype;
8439 29508 : HOST_WIDE_INT n = gcd (group_size, const_nunits);
8440 29508 : if (n == const_nunits)
8441 : {
8442 2934 : int mis_align = dr_misalignment (first_dr_info, vectype);
8443 : /* With VF > 1 we advance the DR by step, if that is constant
8444 : and only aligned when performed VF times, DR alignment
8445 : analysis can analyze this as aligned since it assumes
8446 : contiguous accesses. But that is not how we code generate
8447 : here, so adjust for this. */
8448 2934 : if (maybe_gt (vf, 1u)
8449 4457 : && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
8450 4228 : DR_TARGET_ALIGNMENT (first_dr_info)))
8451 229 : mis_align = -1;
8452 2934 : dr_alignment_support dr_align
8453 2934 : = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8454 : mis_align);
8455 2934 : if (dr_align == dr_aligned
8456 2934 : || dr_align == dr_unaligned_supported)
8457 : {
8458 29508 : nstores = 1;
8459 29508 : lnel = const_nunits;
8460 29508 : ltype = vectype;
8461 29508 : lvectype = vectype;
8462 29508 : alignment_support_scheme = dr_align;
8463 29508 : misalignment = mis_align;
8464 : }
8465 : }
8466 26574 : else if (n > 1)
8467 : {
8468 2061 : nstores = const_nunits / n;
8469 2061 : lnel = n;
8470 2061 : ltype = build_vector_type (elem_type, n);
8471 2061 : lvectype = vectype;
8472 2061 : int mis_align = dr_misalignment (first_dr_info, ltype);
8473 2061 : if (maybe_gt (vf, 1u)
8474 4122 : && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
8475 3426 : DR_TARGET_ALIGNMENT (first_dr_info)))
8476 696 : mis_align = -1;
8477 2061 : dr_alignment_support dr_align
8478 2061 : = vect_supportable_dr_alignment (vinfo, dr_info, ltype,
8479 : mis_align);
8480 2061 : alignment_support_scheme = dr_align;
8481 2061 : misalignment = mis_align;
8482 :
8483 : /* First check if vec_extract optab doesn't support extraction
8484 : of vector elts directly. */
8485 2061 : scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8486 2061 : machine_mode vmode;
8487 4122 : if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8488 2243 : || !related_vector_mode (TYPE_MODE (vectype), elmode,
8489 2061 : n).exists (&vmode)
8490 1845 : || (convert_optab_handler (vec_extract_optab,
8491 1845 : TYPE_MODE (vectype), vmode)
8492 : == CODE_FOR_nothing)
8493 2061 : || !(dr_align == dr_aligned
8494 182 : || dr_align == dr_unaligned_supported))
8495 : {
8496 : /* Try to avoid emitting an extract of vector elements
8497 : by performing the extracts using an integer type of the
8498 : same size, extracting from a vector of those and then
8499 : re-interpreting it as the original vector type if
8500 : supported. */
8501 1879 : unsigned lsize = n * GET_MODE_BITSIZE (elmode);
8502 1879 : unsigned int lnunits = const_nunits / n;
8503 : /* If we can't construct such a vector fall back to
8504 : element extracts from the original vector type and
8505 : element size stores. */
8506 1879 : if (int_mode_for_size (lsize, 0).exists (&elmode)
8507 1879 : && VECTOR_MODE_P (TYPE_MODE (vectype))
8508 1879 : && related_vector_mode (TYPE_MODE (vectype), elmode,
8509 1879 : lnunits).exists (&vmode)
8510 1853 : && (convert_optab_handler (vec_extract_optab,
8511 : vmode, elmode)
8512 : != CODE_FOR_nothing))
8513 : {
8514 1853 : nstores = lnunits;
8515 1853 : lnel = n;
8516 1853 : ltype = build_nonstandard_integer_type (lsize, 1);
8517 1853 : lvectype = build_vector_type (ltype, nstores);
8518 : }
8519 : /* Else fall back to vector extraction anyway.
8520 : Fewer stores are more important than avoiding spilling
8521 : of the vector we extract from. Compared to the
8522 : construction case in vectorizable_load no store-forwarding
8523 : issue exists here for reasonable archs. But only
8524 : if the store is supported. */
8525 26 : else if (!(dr_align == dr_aligned
8526 26 : || dr_align == dr_unaligned_supported))
8527 : {
8528 : nstores = const_nunits;
8529 : lnel = 1;
8530 : ltype = elem_type;
8531 : lvectype = vectype;
8532 : }
8533 : }
8534 : }
8535 29508 : unsigned align;
8536 29508 : if (alignment_support_scheme == dr_aligned)
8537 1249 : align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8538 : else
8539 28259 : align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8540 : /* Alignment is at most the access size if we do multiple stores. */
8541 29508 : if (nstores > 1)
8542 26574 : align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
8543 29508 : ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
8544 29508 : int ncopies = vec_num;
8545 :
8546 29508 : if (!costing_p)
8547 : {
8548 3355 : ivstep = stride_step;
8549 3355 : ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8550 : build_int_cst (TREE_TYPE (ivstep), vf));
8551 :
8552 3355 : standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8553 :
8554 3355 : stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8555 3355 : ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8556 3355 : create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8557 : insert_after, &offvar, NULL);
8558 :
8559 3355 : stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8560 : }
8561 :
8562 29508 : alias_off = build_int_cst (ref_type, 0);
8563 29508 : auto_vec<tree> vec_oprnds;
8564 : /* For costing some adjacent vector stores, we'd like to cost with
8565 : the total number of them once instead of cost each one by one. */
8566 29508 : unsigned int n_adjacent_stores = 0;
8567 29508 : running_off = offvar;
8568 29508 : if (!costing_p)
8569 3355 : vect_get_slp_defs (op_node, &vec_oprnds);
8570 29508 : unsigned int group_el = 0;
8571 29508 : unsigned HOST_WIDE_INT elsz
8572 29508 : = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8573 70332 : for (j = 0; j < ncopies; j++)
8574 : {
8575 40824 : if (!costing_p)
8576 : {
8577 5186 : vec_oprnd = vec_oprnds[j];
8578 : /* Pun the vector to extract from if necessary. */
8579 5186 : if (lvectype != vectype)
8580 : {
8581 1122 : tree tem = make_ssa_name (lvectype);
8582 1122 : tree cvt = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8583 1122 : gimple *pun = gimple_build_assign (tem, cvt);
8584 1122 : vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8585 1122 : vec_oprnd = tem;
8586 : }
8587 : }
8588 181869 : for (i = 0; i < nstores; i++)
8589 : {
8590 141045 : if (costing_p)
8591 : {
8592 124595 : n_adjacent_stores++;
8593 124595 : continue;
8594 : }
8595 16450 : tree newref, newoff;
8596 16450 : gimple *incr, *assign;
8597 16450 : tree size = TYPE_SIZE (ltype);
8598 : /* Extract the i'th component. */
8599 16450 : tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8600 : bitsize_int (i), size);
8601 16450 : tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8602 : size, pos);
8603 :
8604 16450 : elem = force_gimple_operand_gsi (gsi, elem, true, NULL_TREE, true,
8605 : GSI_SAME_STMT);
8606 :
8607 16450 : tree this_off = build_int_cst (TREE_TYPE (alias_off),
8608 16450 : group_el * elsz);
8609 16450 : newref = build2 (MEM_REF, ltype, running_off, this_off);
8610 16450 : vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8611 :
8612 : /* And store it to *running_off. */
8613 16450 : assign = gimple_build_assign (newref, elem);
8614 16450 : vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8615 :
8616 16450 : group_el += lnel;
8617 16450 : if (group_el == group_size)
8618 : {
8619 14773 : newoff = copy_ssa_name (running_off, NULL);
8620 14773 : incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8621 : running_off, stride_step);
8622 14773 : vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8623 :
8624 14773 : running_off = newoff;
8625 14773 : group_el = 0;
8626 : }
8627 : }
8628 : }
8629 :
8630 29508 : if (costing_p)
8631 : {
8632 26153 : if (n_adjacent_stores > 0)
8633 : {
8634 : /* Take a single lane vector type store as scalar
8635 : store to avoid ICE like 110776. */
8636 26153 : if (VECTOR_TYPE_P (ltype)
8637 26153 : && maybe_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8638 1648 : vect_get_store_cost (vinfo, stmt_info, slp_node,
8639 : n_adjacent_stores, alignment_support_scheme,
8640 : misalignment, &inside_cost, cost_vec);
8641 : else
8642 24505 : inside_cost
8643 24505 : += record_stmt_cost (cost_vec, n_adjacent_stores,
8644 : scalar_store, slp_node, 0, vect_body);
8645 : /* Only need vector extracting when there are more
8646 : than one stores. */
8647 26153 : if (nstores > 1)
8648 24103 : inside_cost
8649 24103 : += record_stmt_cost (cost_vec, n_adjacent_stores,
8650 : vec_to_scalar, slp_node, 0, vect_body);
8651 : }
8652 26153 : if (dump_enabled_p ())
8653 676 : dump_printf_loc (MSG_NOTE, vect_location,
8654 : "vect_model_store_cost: inside_cost = %d, "
8655 : "prologue_cost = %d .\n",
8656 : inside_cost, prologue_cost);
8657 : }
8658 :
8659 29508 : return true;
8660 29508 : }
8661 :
8662 1323451 : gcc_assert (alignment_support_scheme);
8663 1323451 : vec_loop_masks *loop_masks
8664 194862 : = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8665 1323451 : ? &LOOP_VINFO_MASKS (loop_vinfo)
8666 11 : : NULL);
8667 11 : vec_loop_lens *loop_lens
8668 194862 : = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8669 : ? &LOOP_VINFO_LENS (loop_vinfo)
8670 0 : : NULL);
8671 :
8672 : /* The vect_transform_stmt and vect_analyze_stmt will go here but there
8673 : are some difference here. We cannot enable both the lens and masks
8674 : during transform but it is allowed during analysis.
8675 : Shouldn't go with length-based approach if fully masked. */
8676 1323451 : if (cost_vec == NULL)
8677 : /* The cost_vec is NULL during transfrom. */
8678 539315 : gcc_assert ((!loop_lens || !loop_masks));
8679 :
8680 : /* Targets with store-lane instructions must not require explicit
8681 : realignment. vect_supportable_dr_alignment always returns either
8682 : dr_aligned or dr_unaligned_supported for masked operations. */
8683 1323451 : gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8684 : && !mask_node
8685 : && !loop_masks)
8686 : || alignment_support_scheme == dr_aligned
8687 : || alignment_support_scheme == dr_unaligned_supported);
8688 :
8689 1323451 : tree offset = NULL_TREE;
8690 1323451 : if (!known_eq (poffset, 0))
8691 4634 : offset = size_int (poffset);
8692 :
8693 1323451 : tree bump;
8694 1323451 : tree vec_offset = NULL_TREE;
8695 1323451 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8696 : {
8697 1470 : aggr_type = NULL_TREE;
8698 1470 : bump = NULL_TREE;
8699 : }
8700 1321981 : else if (mat_gather_scatter_p (memory_access_type))
8701 : {
8702 0 : aggr_type = elem_type;
8703 0 : if (!costing_p)
8704 : {
8705 0 : tree vtype = ls.ls_type ? ls.ls_type : vectype;
8706 0 : vect_get_strided_load_store_ops (stmt_info, slp_node, vtype,
8707 : ls.strided_offset_vectype,
8708 : loop_vinfo, gsi,
8709 : &bump, &vec_offset, loop_lens);
8710 : }
8711 : }
8712 : else
8713 : {
8714 1321981 : if (memory_access_type == VMAT_LOAD_STORE_LANES)
8715 0 : aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
8716 : else
8717 : aggr_type = vectype;
8718 1321981 : if (!costing_p)
8719 538845 : bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8720 : memory_access_type, loop_lens);
8721 : }
8722 :
8723 1323451 : if (loop_vinfo && mask_node && !costing_p)
8724 544 : LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8725 :
8726 : /* In case the vectorization factor (VF) is bigger than the number
8727 : of elements that we can fit in a vectype (nunits), we have to generate
8728 : more than one vector stmt - i.e - we need to "unroll" the
8729 : vector stmt by a factor VF/nunits. */
8730 :
8731 1323451 : auto_vec<tree> dr_chain (group_size);
8732 1323451 : auto_vec<tree> vec_masks;
8733 1323451 : tree vec_mask = NULL;
8734 1323451 : auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8735 5965556 : for (i = 0; i < group_size; i++)
8736 3318654 : gvec_oprnds.quick_push (new auto_vec<tree> ());
8737 :
8738 1323451 : if (memory_access_type == VMAT_LOAD_STORE_LANES)
8739 : {
8740 0 : const internal_fn lanes_ifn = ls.lanes_ifn;
8741 :
8742 0 : if (costing_p)
8743 : /* Update all incoming store operand nodes, the general handling
8744 : above only handles the mask and the first store operand node. */
8745 0 : for (slp_tree child : SLP_TREE_CHILDREN (slp_node))
8746 0 : if (child != mask_node
8747 0 : && !vect_maybe_update_slp_op_vectype (child, vectype))
8748 : {
8749 0 : if (dump_enabled_p ())
8750 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8751 : "incompatible vector types for invariants\n");
8752 0 : return false;
8753 : }
8754 0 : unsigned inside_cost = 0, prologue_cost = 0;
8755 : /* For costing some adjacent vector stores, we'd like to cost with
8756 : the total number of them once instead of cost each one by one. */
8757 0 : unsigned int n_adjacent_stores = 0;
8758 0 : int ncopies = vec_num / group_size;
8759 0 : for (j = 0; j < ncopies; j++)
8760 : {
8761 0 : if (j == 0)
8762 : {
8763 0 : if (!costing_p)
8764 : {
8765 0 : if (mask_node)
8766 : {
8767 0 : vect_get_slp_defs (mask_node, &vec_masks);
8768 0 : vec_mask = vec_masks[0];
8769 : }
8770 0 : dataref_ptr
8771 0 : = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8772 : aggr_type, NULL, offset, &dummy,
8773 : gsi, &ptr_incr, false, bump);
8774 : }
8775 : }
8776 0 : else if (!costing_p)
8777 : {
8778 0 : gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8779 0 : if (mask_node)
8780 0 : vec_mask = vec_masks[j];
8781 0 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8782 : stmt_info, bump);
8783 : }
8784 :
8785 0 : if (costing_p)
8786 : {
8787 0 : n_adjacent_stores += group_size;
8788 0 : continue;
8789 : }
8790 :
8791 : /* Get an array into which we can store the individual vectors. */
8792 0 : tree vec_array = create_vector_array (vectype, group_size);
8793 :
8794 : /* Invalidate the current contents of VEC_ARRAY. This should
8795 : become an RTL clobber too, which prevents the vector registers
8796 : from being upward-exposed. */
8797 0 : vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8798 :
8799 : /* Store the individual vectors into the array. */
8800 0 : for (i = 0; i < group_size; i++)
8801 : {
8802 0 : slp_tree child;
8803 0 : if (i == 0 || !mask_node)
8804 0 : child = SLP_TREE_CHILDREN (slp_node)[i];
8805 : else
8806 0 : child = SLP_TREE_CHILDREN (slp_node)[i + 1];
8807 0 : vec_oprnd = SLP_TREE_VEC_DEFS (child)[j];
8808 0 : write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8809 : i);
8810 : }
8811 :
8812 0 : tree final_mask = NULL;
8813 0 : tree final_len = NULL;
8814 0 : tree bias = NULL;
8815 0 : if (loop_masks)
8816 0 : final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8817 : ncopies, vectype, j);
8818 0 : if (vec_mask)
8819 0 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8820 : vec_mask, gsi);
8821 :
8822 0 : if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8823 : {
8824 0 : if (loop_lens)
8825 0 : final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8826 : ncopies, vectype, j, 1, true);
8827 : else
8828 0 : final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8829 0 : signed char biasval
8830 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8831 0 : bias = build_int_cst (intQI_type_node, biasval);
8832 0 : if (!final_mask)
8833 : {
8834 0 : mask_vectype = truth_type_for (vectype);
8835 0 : final_mask = build_minus_one_cst (mask_vectype);
8836 : }
8837 : }
8838 :
8839 0 : gcall *call;
8840 0 : if (final_len && final_mask)
8841 : {
8842 : /* Emit:
8843 : MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8844 : LEN, BIAS, VEC_ARRAY). */
8845 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8846 0 : tree alias_ptr = build_int_cst (ref_type, align);
8847 0 : call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8848 : dataref_ptr, alias_ptr,
8849 : final_mask, final_len, bias,
8850 : vec_array);
8851 : }
8852 0 : else if (final_mask)
8853 : {
8854 : /* Emit:
8855 : MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8856 : VEC_ARRAY). */
8857 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8858 0 : tree alias_ptr = build_int_cst (ref_type, align);
8859 0 : call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8860 : dataref_ptr, alias_ptr,
8861 : final_mask, vec_array);
8862 : }
8863 : else
8864 : {
8865 : /* Emit:
8866 : MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8867 0 : data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8868 0 : call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8869 0 : gimple_call_set_lhs (call, data_ref);
8870 : }
8871 0 : gimple_call_set_nothrow (call, true);
8872 0 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8873 :
8874 : /* Record that VEC_ARRAY is now dead. */
8875 0 : vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8876 : }
8877 :
8878 0 : if (costing_p)
8879 : {
8880 0 : if (n_adjacent_stores > 0)
8881 0 : vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
8882 : alignment_support_scheme, misalignment,
8883 : &inside_cost, cost_vec);
8884 0 : if (dump_enabled_p ())
8885 0 : dump_printf_loc (MSG_NOTE, vect_location,
8886 : "vect_model_store_cost: inside_cost = %d, "
8887 : "prologue_cost = %d .\n",
8888 : inside_cost, prologue_cost);
8889 : }
8890 :
8891 0 : return true;
8892 : }
8893 :
8894 1323451 : if (mat_gather_scatter_p (memory_access_type))
8895 : {
8896 1470 : gcc_assert (!grouped_store || ls.ls_type);
8897 1470 : if (ls.ls_type)
8898 0 : vectype = ls.ls_type;
8899 1470 : auto_vec<tree> vec_offsets;
8900 1470 : unsigned int inside_cost = 0, prologue_cost = 0;
8901 1470 : int num_stmts = vec_num;
8902 3340 : for (j = 0; j < num_stmts; j++)
8903 : {
8904 1870 : gimple *new_stmt;
8905 1870 : if (j == 0)
8906 : {
8907 1470 : if (costing_p && vls_type == VLS_STORE_INVARIANT)
8908 210 : prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8909 : slp_node, 0, vect_prologue);
8910 : else if (!costing_p)
8911 : {
8912 : /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8913 : DR_CHAIN is of size 1. */
8914 470 : gcc_assert (group_size == 1);
8915 470 : vect_get_slp_defs (op_node, gvec_oprnds[0]);
8916 470 : if (mask_node)
8917 70 : vect_get_slp_defs (mask_node, &vec_masks);
8918 :
8919 470 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8920 470 : vect_get_gather_scatter_ops (loop, slp_node,
8921 : &dataref_ptr, &vec_offsets);
8922 : else
8923 0 : dataref_ptr
8924 0 : = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8925 : aggr_type, NULL, offset,
8926 : &dummy, gsi, &ptr_incr, false,
8927 : bump);
8928 : }
8929 : }
8930 400 : else if (!costing_p)
8931 : {
8932 34 : gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8933 34 : if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8934 0 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8935 : gsi, stmt_info, bump);
8936 : }
8937 :
8938 2584 : new_stmt = NULL;
8939 714 : if (!costing_p)
8940 : {
8941 504 : vec_oprnd = (*gvec_oprnds[0])[j];
8942 504 : if (mask_node)
8943 90 : vec_mask = vec_masks[j];
8944 : /* We should have caught mismatched types earlier. */
8945 504 : gcc_assert (ls.ls_type
8946 : || useless_type_conversion_p
8947 : (vectype, TREE_TYPE (vec_oprnd)));
8948 : }
8949 504 : tree final_mask = NULL_TREE;
8950 2374 : tree final_len = NULL_TREE;
8951 2374 : tree bias = NULL_TREE;
8952 504 : if (!costing_p)
8953 : {
8954 504 : if (loop_masks)
8955 0 : final_mask = vect_get_loop_mask (loop_vinfo, gsi,
8956 : loop_masks, num_stmts,
8957 : vectype, j);
8958 504 : if (vec_mask)
8959 90 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8960 : final_mask, vec_mask, gsi);
8961 : }
8962 :
8963 1870 : unsigned align = get_object_alignment (DR_REF (first_dr_info->dr));
8964 1870 : tree alias_align_ptr = build_int_cst (ref_type, align);
8965 1870 : if (memory_access_type == VMAT_GATHER_SCATTER_IFN)
8966 : {
8967 0 : if (costing_p)
8968 : {
8969 0 : if (ls.supported_offset_vectype)
8970 0 : inside_cost
8971 0 : += record_stmt_cost (cost_vec, 1, vector_stmt,
8972 : slp_node, 0, vect_body);
8973 0 : if (ls.supported_scale)
8974 0 : inside_cost
8975 0 : += record_stmt_cost (cost_vec, 1, vector_stmt,
8976 : slp_node, 0, vect_body);
8977 :
8978 0 : unsigned int cnunits = vect_nunits_for_cost (vectype);
8979 0 : inside_cost
8980 0 : += record_stmt_cost (cost_vec, cnunits, scalar_store,
8981 : slp_node, 0, vect_body);
8982 1870 : continue;
8983 0 : }
8984 :
8985 0 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8986 0 : vec_offset = vec_offsets[j];
8987 :
8988 0 : tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
8989 0 : bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
8990 :
8991 : /* Perform the offset conversion and scaling if necessary. */
8992 0 : if (!strided
8993 0 : && (ls.supported_offset_vectype || ls.supported_scale))
8994 : {
8995 0 : gimple_seq stmts = NULL;
8996 0 : if (ls.supported_offset_vectype)
8997 0 : vec_offset = gimple_convert
8998 0 : (&stmts, ls.supported_offset_vectype, vec_offset);
8999 0 : if (ls.supported_scale)
9000 : {
9001 : /* Only scale the vec_offset if we haven't already. */
9002 0 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
9003 0 : || j == 0)
9004 : {
9005 0 : tree mult_cst = build_int_cst
9006 0 : (TREE_TYPE (TREE_TYPE (vec_offset)),
9007 0 : SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
9008 0 : tree mult = build_vector_from_val
9009 0 : (TREE_TYPE (vec_offset), mult_cst);
9010 0 : vec_offset = gimple_build
9011 0 : (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
9012 : vec_offset, mult);
9013 : }
9014 0 : scale = size_int (ls.supported_scale);
9015 : }
9016 0 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9017 : }
9018 :
9019 0 : if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
9020 : {
9021 0 : if (loop_lens)
9022 0 : final_len = vect_get_loop_len (loop_vinfo, gsi,
9023 : loop_lens, num_stmts,
9024 : vectype, j, 1, true);
9025 : else
9026 0 : final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9027 :
9028 0 : signed char biasval
9029 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9030 0 : bias = build_int_cst (intQI_type_node, biasval);
9031 0 : if (!final_mask)
9032 : {
9033 0 : mask_vectype = truth_type_for (vectype);
9034 0 : final_mask = build_minus_one_cst (mask_vectype);
9035 : }
9036 : }
9037 :
9038 0 : if (ls.ls_type)
9039 : {
9040 0 : gimple *conv_stmt
9041 0 : = gimple_build_assign (make_ssa_name (vectype),
9042 : VIEW_CONVERT_EXPR,
9043 : build1 (VIEW_CONVERT_EXPR, vectype,
9044 : vec_oprnd));
9045 0 : vect_finish_stmt_generation (vinfo, stmt_info, conv_stmt,
9046 : gsi);
9047 0 : vec_oprnd = gimple_get_lhs (conv_stmt);
9048 : }
9049 :
9050 0 : gcall *call;
9051 0 : if (final_len && final_mask)
9052 : {
9053 0 : if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
9054 0 : call = gimple_build_call_internal (
9055 : IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr,
9056 : alias_align_ptr,
9057 : vec_offset, scale, vec_oprnd, final_mask, final_len,
9058 : bias);
9059 : else
9060 : /* Non-vector offset indicates that prefer to take
9061 : MASK_LEN_STRIDED_STORE instead of the
9062 : IFN_MASK_SCATTER_STORE with direct stride arg.
9063 : Similar to the gather case we have checked the
9064 : alignment for a scatter already and assume
9065 : that the strided store has the same requirements. */
9066 0 : call = gimple_build_call_internal (
9067 : IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr,
9068 : vec_offset, vec_oprnd, final_mask, final_len, bias);
9069 : }
9070 0 : else if (final_mask)
9071 0 : call = gimple_build_call_internal
9072 0 : (IFN_MASK_SCATTER_STORE, 6, dataref_ptr,
9073 : alias_align_ptr,
9074 : vec_offset, scale, vec_oprnd, final_mask);
9075 : else
9076 0 : call = gimple_build_call_internal (IFN_SCATTER_STORE, 5,
9077 : dataref_ptr,
9078 : alias_align_ptr,
9079 : vec_offset,
9080 : scale, vec_oprnd);
9081 0 : gimple_call_set_nothrow (call, true);
9082 0 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9083 0 : new_stmt = call;
9084 : }
9085 1870 : else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
9086 : {
9087 : /* The builtin decls path for scatter is legacy, x86 only. */
9088 330 : gcc_assert (nunits.is_constant ()
9089 : && (!final_mask
9090 : || SCALAR_INT_MODE_P
9091 : (TYPE_MODE (TREE_TYPE (final_mask)))));
9092 330 : if (costing_p)
9093 : {
9094 199 : unsigned int cnunits = vect_nunits_for_cost (vectype);
9095 199 : inside_cost
9096 199 : += record_stmt_cost (cost_vec, cnunits, scalar_store,
9097 : slp_node, 0, vect_body);
9098 199 : continue;
9099 199 : }
9100 :
9101 131 : tree offset_vectype = TREE_TYPE (vec_offsets[0]);
9102 131 : poly_uint64 offset_nunits
9103 131 : = TYPE_VECTOR_SUBPARTS (offset_vectype);
9104 131 : if (known_eq (nunits, offset_nunits))
9105 : {
9106 55 : new_stmt = vect_build_one_scatter_store_call
9107 110 : (vinfo, stmt_info, slp_node, gsi,
9108 55 : ls.gs.decl, dataref_ptr, vec_offsets[j],
9109 : vec_oprnd, final_mask);
9110 55 : vect_finish_stmt_generation (vinfo, stmt_info,
9111 : new_stmt, gsi);
9112 : }
9113 76 : else if (known_eq (nunits, offset_nunits * 2))
9114 : {
9115 : /* We have a offset vector with half the number of
9116 : lanes but the builtins will store full vectype
9117 : data from the lower lanes. */
9118 30 : new_stmt = vect_build_one_scatter_store_call
9119 60 : (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
9120 30 : dataref_ptr, vec_offsets[2 * j],
9121 : vec_oprnd, final_mask);
9122 30 : vect_finish_stmt_generation (vinfo, stmt_info,
9123 : new_stmt, gsi);
9124 30 : int count = nunits.to_constant ();
9125 30 : vec_perm_builder sel (count, count, 1);
9126 30 : sel.quick_grow (count);
9127 382 : for (int i = 0; i < count; ++i)
9128 352 : sel[i] = i | (count / 2);
9129 30 : vec_perm_indices indices (sel, 2, count);
9130 30 : tree perm_mask
9131 30 : = vect_gen_perm_mask_checked (vectype, indices);
9132 30 : new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9133 : vec_oprnd, vec_oprnd,
9134 : perm_mask);
9135 30 : vec_oprnd = make_ssa_name (vectype);
9136 30 : gimple_set_lhs (new_stmt, vec_oprnd);
9137 30 : vect_finish_stmt_generation (vinfo, stmt_info,
9138 : new_stmt, gsi);
9139 30 : if (final_mask)
9140 : {
9141 20 : new_stmt = gimple_build_assign (NULL_TREE,
9142 : VEC_UNPACK_HI_EXPR,
9143 : final_mask);
9144 20 : final_mask = make_ssa_name
9145 20 : (truth_type_for (offset_vectype));
9146 20 : gimple_set_lhs (new_stmt, final_mask);
9147 20 : vect_finish_stmt_generation (vinfo, stmt_info,
9148 : new_stmt, gsi);
9149 : }
9150 :
9151 30 : new_stmt = vect_build_one_scatter_store_call
9152 60 : (vinfo, stmt_info, slp_node, gsi, ls.gs.decl,
9153 30 : dataref_ptr, vec_offsets[2 * j + 1],
9154 : vec_oprnd, final_mask);
9155 30 : vect_finish_stmt_generation (vinfo, stmt_info,
9156 : new_stmt, gsi);
9157 30 : }
9158 46 : else if (known_eq (nunits * 2, offset_nunits))
9159 : {
9160 : /* We have a offset vector with double the number of
9161 : lanes. Select the low/high part accordingly. */
9162 46 : vec_offset = vec_offsets[j / 2];
9163 46 : if (j & 1)
9164 : {
9165 23 : int count = offset_nunits.to_constant ();
9166 23 : vec_perm_builder sel (count, count, 1);
9167 23 : sel.quick_grow (count);
9168 263 : for (int i = 0; i < count; ++i)
9169 240 : sel[i] = i | (count / 2);
9170 23 : vec_perm_indices indices (sel, 2, count);
9171 23 : tree perm_mask = vect_gen_perm_mask_checked
9172 23 : (TREE_TYPE (vec_offset), indices);
9173 23 : new_stmt = gimple_build_assign (NULL_TREE,
9174 : VEC_PERM_EXPR,
9175 : vec_offset,
9176 : vec_offset,
9177 : perm_mask);
9178 23 : vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9179 23 : gimple_set_lhs (new_stmt, vec_offset);
9180 23 : vect_finish_stmt_generation (vinfo, stmt_info,
9181 : new_stmt, gsi);
9182 23 : }
9183 :
9184 46 : new_stmt = vect_build_one_scatter_store_call
9185 46 : (vinfo, stmt_info, slp_node, gsi,
9186 : ls.gs.decl, dataref_ptr, vec_offset,
9187 : vec_oprnd, final_mask);
9188 46 : vect_finish_stmt_generation (vinfo, stmt_info,
9189 : new_stmt, gsi);
9190 : }
9191 : else
9192 0 : gcc_unreachable ();
9193 : }
9194 : else
9195 : {
9196 : /* Emulated scatter. */
9197 1540 : gcc_assert (!final_mask);
9198 1540 : if (costing_p)
9199 : {
9200 1167 : unsigned int cnunits = vect_nunits_for_cost (vectype);
9201 : /* For emulated scatter N offset vector element extracts
9202 : (we assume the scalar scaling and ptr + offset add is
9203 : consumed by the load). */
9204 1167 : inside_cost
9205 1167 : += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9206 : slp_node, 0, vect_body);
9207 : /* N scalar stores plus extracting the elements. */
9208 1167 : inside_cost
9209 1167 : += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9210 : slp_node, 0, vect_body);
9211 1167 : inside_cost
9212 1167 : += record_stmt_cost (cost_vec, cnunits, scalar_store,
9213 : slp_node, 0, vect_body);
9214 1167 : continue;
9215 1167 : }
9216 :
9217 373 : tree offset_vectype = TREE_TYPE (vec_offsets[0]);
9218 373 : unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9219 373 : unsigned HOST_WIDE_INT const_offset_nunits
9220 373 : = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
9221 373 : vec<constructor_elt, va_gc> *ctor_elts;
9222 373 : vec_alloc (ctor_elts, const_nunits);
9223 373 : gimple_seq stmts = NULL;
9224 373 : tree elt_type = TREE_TYPE (vectype);
9225 373 : unsigned HOST_WIDE_INT elt_size
9226 373 : = tree_to_uhwi (TYPE_SIZE (elt_type));
9227 : /* We support offset vectors with more elements
9228 : than the data vector for now. */
9229 373 : unsigned HOST_WIDE_INT factor
9230 : = const_offset_nunits / const_nunits;
9231 373 : vec_offset = vec_offsets[j / factor];
9232 373 : unsigned elt_offset
9233 373 : = (j % factor) * const_nunits;
9234 373 : tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9235 373 : tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
9236 373 : tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9237 1519 : for (unsigned k = 0; k < const_nunits; ++k)
9238 : {
9239 : /* Compute the offsetted pointer. */
9240 1146 : tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9241 : bitsize_int (k + elt_offset));
9242 1146 : tree idx
9243 2292 : = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
9244 1146 : vec_offset, TYPE_SIZE (idx_type), boff);
9245 1146 : idx = gimple_convert (&stmts, sizetype, idx);
9246 1146 : idx = gimple_build (&stmts, MULT_EXPR, sizetype,
9247 : idx, scale);
9248 1146 : tree ptr
9249 1146 : = gimple_build (&stmts, PLUS_EXPR,
9250 1146 : TREE_TYPE (dataref_ptr),
9251 : dataref_ptr, idx);
9252 1146 : ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9253 : /* Extract the element to be stored. */
9254 1146 : tree elt
9255 2292 : = gimple_build (&stmts, BIT_FIELD_REF,
9256 1146 : TREE_TYPE (vectype),
9257 1146 : vec_oprnd, TYPE_SIZE (elt_type),
9258 1146 : bitsize_int (k * elt_size));
9259 1146 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9260 1146 : stmts = NULL;
9261 1146 : tree ref
9262 1146 : = build2 (MEM_REF, ltype, ptr,
9263 : build_int_cst (ref_type, 0));
9264 1146 : new_stmt = gimple_build_assign (ref, elt);
9265 1146 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9266 : }
9267 :
9268 373 : slp_node->push_vec_def (new_stmt);
9269 : }
9270 : }
9271 :
9272 1470 : if (costing_p && dump_enabled_p ())
9273 80 : dump_printf_loc (MSG_NOTE, vect_location,
9274 : "vect_model_store_cost: inside_cost = %d, "
9275 : "prologue_cost = %d .\n",
9276 : inside_cost, prologue_cost);
9277 :
9278 1470 : return true;
9279 1470 : }
9280 :
9281 1321981 : gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9282 : || memory_access_type == VMAT_CONTIGUOUS_DOWN
9283 : || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9284 :
9285 1321981 : unsigned inside_cost = 0, prologue_cost = 0;
9286 : /* For costing some adjacent vector stores, we'd like to cost with
9287 : the total number of them once instead of cost each one by one. */
9288 1321981 : unsigned int n_adjacent_stores = 0;
9289 1321981 : auto_vec<tree> result_chain (group_size);
9290 1321981 : auto_vec<tree, 1> vec_oprnds;
9291 1321981 : gimple *new_stmt;
9292 1321981 : if (!costing_p)
9293 : {
9294 : /* Get vectorized arguments for SLP_NODE. */
9295 538845 : vect_get_slp_defs (op_node, &vec_oprnds);
9296 538845 : vec_oprnd = vec_oprnds[0];
9297 538845 : if (mask_node)
9298 : {
9299 475 : vect_get_slp_defs (mask_node, &vec_masks);
9300 475 : vec_mask = vec_masks[0];
9301 : }
9302 : }
9303 :
9304 : /* We should have caught mismatched types earlier. */
9305 538845 : gcc_assert (costing_p
9306 : || useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
9307 1321981 : bool simd_lane_access_p
9308 1321981 : = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9309 1321981 : if (!costing_p
9310 1321981 : && simd_lane_access_p
9311 4374 : && !loop_masks
9312 4374 : && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9313 4374 : && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9314 4374 : && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9315 4374 : && integer_zerop (DR_INIT (first_dr_info->dr))
9316 1326355 : && alias_sets_conflict_p (get_alias_set (aggr_type),
9317 4374 : get_alias_set (TREE_TYPE (ref_type))))
9318 : {
9319 4366 : dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9320 4366 : dataref_offset = build_int_cst (ref_type, 0);
9321 : }
9322 1317615 : else if (!costing_p)
9323 1068950 : dataref_ptr = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9324 : simd_lane_access_p ? loop : NULL,
9325 : offset, &dummy, gsi, &ptr_incr,
9326 : simd_lane_access_p, bump);
9327 :
9328 1321981 : new_stmt = NULL;
9329 1321981 : gcc_assert (!grouped_store);
9330 2938502 : for (i = 0; i < vec_num; i++)
9331 : {
9332 1616521 : if (!costing_p)
9333 667321 : vec_oprnd = vec_oprnds[i];
9334 :
9335 1616521 : if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9336 : {
9337 3330 : if (costing_p)
9338 2190 : inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9339 : slp_node, 0, vect_body);
9340 : else
9341 : {
9342 1140 : tree perm_mask = perm_mask_for_reverse (vectype);
9343 1140 : tree new_temp = make_ssa_name (vectype);
9344 :
9345 : /* Generate the permute statement. */
9346 1140 : gimple *perm_stmt
9347 1140 : = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9348 : vec_oprnd, perm_mask);
9349 1140 : vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9350 :
9351 1140 : perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9352 1616521 : vec_oprnd = new_temp;
9353 : }
9354 : }
9355 :
9356 1616521 : if (costing_p)
9357 : {
9358 949200 : n_adjacent_stores++;
9359 949200 : continue;
9360 : }
9361 :
9362 667321 : tree final_mask = NULL_TREE;
9363 667321 : tree final_len = NULL_TREE;
9364 667321 : tree bias = NULL_TREE;
9365 667321 : if (loop_masks)
9366 77 : final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9367 : vec_num, vectype, i);
9368 667321 : if (vec_mask)
9369 696 : vec_mask = vec_masks[i];
9370 696 : if (vec_mask)
9371 696 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9372 : vec_mask, gsi);
9373 :
9374 667321 : if (i > 0)
9375 : /* Bump the vector pointer. */
9376 128476 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9377 : stmt_info, bump);
9378 :
9379 667321 : unsigned misalign;
9380 667321 : unsigned HOST_WIDE_INT align;
9381 667321 : align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9382 667321 : if (alignment_support_scheme == dr_aligned)
9383 : misalign = 0;
9384 307720 : else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9385 : {
9386 160165 : align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9387 160165 : misalign = 0;
9388 : }
9389 : else
9390 147555 : misalign = misalignment;
9391 667321 : if (dataref_offset == NULL_TREE
9392 661941 : && TREE_CODE (dataref_ptr) == SSA_NAME)
9393 181713 : set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign);
9394 667321 : align = least_bit_hwi (misalign | align);
9395 :
9396 : /* Compute IFN when LOOP_LENS or final_mask valid. */
9397 667321 : machine_mode vmode = TYPE_MODE (vectype);
9398 667321 : machine_mode new_vmode = vmode;
9399 667321 : internal_fn partial_ifn = IFN_LAST;
9400 667321 : if (loop_lens)
9401 : {
9402 0 : opt_machine_mode new_ovmode
9403 0 : = get_len_load_store_mode (vmode, false, &partial_ifn);
9404 0 : new_vmode = new_ovmode.require ();
9405 0 : unsigned factor
9406 0 : = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9407 0 : final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9408 : vec_num, vectype, i, factor, true);
9409 : }
9410 667321 : else if (final_mask)
9411 : {
9412 708 : if (!can_vec_mask_load_store_p (vmode,
9413 708 : TYPE_MODE (TREE_TYPE (final_mask)),
9414 : false, &partial_ifn))
9415 0 : gcc_unreachable ();
9416 : }
9417 :
9418 667321 : if (partial_ifn == IFN_MASK_LEN_STORE)
9419 : {
9420 0 : if (!final_len)
9421 : {
9422 : /* Pass VF value to 'len' argument of
9423 : MASK_LEN_STORE if LOOP_LENS is invalid. */
9424 0 : final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9425 : }
9426 0 : if (!final_mask)
9427 : {
9428 : /* Pass all ones value to 'mask' argument of
9429 : MASK_LEN_STORE if final_mask is invalid. */
9430 0 : mask_vectype = truth_type_for (vectype);
9431 0 : final_mask = build_minus_one_cst (mask_vectype);
9432 : }
9433 : }
9434 667321 : if (final_len)
9435 : {
9436 0 : signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9437 0 : bias = build_int_cst (intQI_type_node, biasval);
9438 : }
9439 :
9440 : /* Arguments are ready. Create the new vector stmt. */
9441 667321 : if (final_len)
9442 : {
9443 0 : gcall *call;
9444 0 : tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9445 : /* Need conversion if it's wrapped with VnQI. */
9446 0 : if (vmode != new_vmode)
9447 : {
9448 0 : tree new_vtype
9449 0 : = build_vector_type_for_mode (unsigned_intQI_type_node,
9450 : new_vmode);
9451 0 : tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9452 0 : vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9453 0 : gassign *new_stmt
9454 0 : = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9455 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9456 0 : vec_oprnd = var;
9457 : }
9458 :
9459 0 : if (partial_ifn == IFN_MASK_LEN_STORE)
9460 0 : call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9461 : dataref_ptr, ptr, final_mask,
9462 : final_len, bias, vec_oprnd);
9463 : else
9464 0 : call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9465 : dataref_ptr, ptr, final_len,
9466 : bias, vec_oprnd);
9467 0 : gimple_call_set_nothrow (call, true);
9468 0 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9469 0 : new_stmt = call;
9470 : }
9471 667321 : else if (final_mask)
9472 : {
9473 708 : tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9474 708 : gcall *call
9475 708 : = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9476 : ptr, final_mask, vec_oprnd);
9477 708 : gimple_call_set_nothrow (call, true);
9478 708 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9479 708 : new_stmt = call;
9480 : }
9481 : else
9482 : {
9483 666613 : data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr,
9484 : dataref_offset ? dataref_offset
9485 : : build_int_cst (ref_type, 0));
9486 666613 : if (alignment_support_scheme == dr_aligned
9487 666613 : && align >= TYPE_ALIGN_UNIT (vectype))
9488 : ;
9489 : else
9490 307182 : TREE_TYPE (data_ref)
9491 614364 : = build_aligned_type (TREE_TYPE (data_ref),
9492 : align * BITS_PER_UNIT);
9493 666613 : vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9494 666613 : new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9495 666613 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9496 : }
9497 : }
9498 :
9499 1321981 : if (costing_p)
9500 : {
9501 783136 : if (n_adjacent_stores > 0)
9502 783136 : vect_get_store_cost (vinfo, stmt_info, slp_node, n_adjacent_stores,
9503 : alignment_support_scheme, misalignment,
9504 : &inside_cost, cost_vec);
9505 :
9506 : /* When vectorizing a store into the function result assign
9507 : a penalty if the function returns in a multi-register location.
9508 : In this case we assume we'll end up with having to spill the
9509 : vector result and do piecewise loads as a conservative estimate. */
9510 783136 : tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9511 783136 : if (base
9512 783136 : && (TREE_CODE (base) == RESULT_DECL
9513 732885 : || (DECL_P (base) && cfun_returns (base)))
9514 845081 : && !aggregate_value_p (base, cfun->decl))
9515 : {
9516 11053 : rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9517 : /* ??? Handle PARALLEL in some way. */
9518 11053 : if (REG_P (reg))
9519 : {
9520 10849 : int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9521 : /* Assume that a single reg-reg move is possible and cheap,
9522 : do not account for vector to gp register move cost. */
9523 10849 : if (nregs > 1)
9524 : {
9525 : /* Spill. */
9526 10035 : prologue_cost
9527 10035 : += record_stmt_cost (cost_vec, 1, vector_store,
9528 : slp_node, 0, vect_epilogue);
9529 : /* Loads. */
9530 10035 : prologue_cost
9531 10035 : += record_stmt_cost (cost_vec, nregs, scalar_load,
9532 : slp_node, 0, vect_epilogue);
9533 : }
9534 : }
9535 : }
9536 783136 : if (dump_enabled_p ())
9537 13817 : dump_printf_loc (MSG_NOTE, vect_location,
9538 : "vect_model_store_cost: inside_cost = %d, "
9539 : "prologue_cost = %d .\n",
9540 : inside_cost, prologue_cost);
9541 : }
9542 :
9543 1321981 : return true;
9544 2676750 : }
9545 :
9546 : /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9547 : VECTOR_CST mask. No checks are made that the target platform supports the
9548 : mask, so callers may wish to test can_vec_perm_const_p separately, or use
9549 : vect_gen_perm_mask_checked. */
9550 :
9551 : tree
9552 61438 : vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9553 : {
9554 61438 : tree mask_type;
9555 :
9556 61438 : poly_uint64 nunits = sel.length ();
9557 61438 : gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9558 :
9559 61438 : mask_type = build_vector_type (ssizetype, nunits);
9560 61438 : return vec_perm_indices_to_tree (mask_type, sel);
9561 : }
9562 :
9563 : /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9564 : i.e. that the target supports the pattern _for arbitrary input vectors_. */
9565 :
9566 : tree
9567 58576 : vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9568 : {
9569 58576 : machine_mode vmode = TYPE_MODE (vectype);
9570 58576 : gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9571 58576 : return vect_gen_perm_mask_any (vectype, sel);
9572 : }
9573 :
9574 : /* Given a vector variable X and Y, that was generated for the scalar
9575 : STMT_INFO, generate instructions to permute the vector elements of X and Y
9576 : using permutation mask MASK_VEC, insert them at *GSI and return the
9577 : permuted vector variable. */
9578 :
9579 : static tree
9580 1444 : permute_vec_elements (vec_info *vinfo,
9581 : tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9582 : gimple_stmt_iterator *gsi)
9583 : {
9584 1444 : tree vectype = TREE_TYPE (x);
9585 1444 : tree perm_dest, data_ref;
9586 1444 : gimple *perm_stmt;
9587 :
9588 1444 : tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9589 1444 : if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9590 1444 : perm_dest = vect_create_destination_var (scalar_dest, vectype);
9591 : else
9592 0 : perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9593 1444 : data_ref = make_ssa_name (perm_dest);
9594 :
9595 : /* Generate the permute statement. */
9596 1444 : perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9597 1444 : vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9598 :
9599 1444 : return data_ref;
9600 : }
9601 :
9602 : /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9603 : inserting them on the loops preheader edge. Returns true if we
9604 : were successful in doing so (and thus STMT_INFO can be moved then),
9605 : otherwise returns false. HOIST_P indicates if we want to hoist the
9606 : definitions of all SSA uses, it would be false when we are costing. */
9607 :
9608 : static bool
9609 4130 : hoist_defs_of_uses (gimple *stmt, class loop *loop, bool hoist_p)
9610 : {
9611 4130 : ssa_op_iter i;
9612 4130 : use_operand_p use_p;
9613 4130 : auto_vec<use_operand_p, 8> to_hoist;
9614 :
9615 7867 : FOR_EACH_SSA_USE_OPERAND (use_p, stmt, i, SSA_OP_USE)
9616 : {
9617 3765 : gimple *def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
9618 3765 : if (!gimple_nop_p (def_stmt)
9619 3765 : && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9620 : {
9621 : /* Make sure we don't need to recurse. While we could do
9622 : so in simple cases when there are more complex use webs
9623 : we don't have an easy way to preserve stmt order to fulfil
9624 : dependencies within them. */
9625 111 : tree op2;
9626 111 : ssa_op_iter i2;
9627 111 : if (gimple_code (def_stmt) == GIMPLE_PHI
9628 111 : || (single_ssa_def_operand (def_stmt, SSA_OP_DEF)
9629 : == NULL_DEF_OPERAND_P))
9630 28 : return false;
9631 226 : FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9632 : {
9633 143 : gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9634 143 : if (!gimple_nop_p (def_stmt2)
9635 143 : && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9636 : return false;
9637 : }
9638 83 : to_hoist.safe_push (use_p);
9639 : }
9640 : }
9641 :
9642 8204 : if (to_hoist.is_empty ())
9643 : return true;
9644 :
9645 59 : if (!hoist_p)
9646 : return true;
9647 :
9648 : /* Instead of moving defs we copy them so we can zero their UID to not
9649 : confuse dominance queries in the preheader. */
9650 9 : gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
9651 36 : for (use_operand_p use_p : to_hoist)
9652 : {
9653 9 : gimple *def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
9654 9 : gimple *copy = gimple_copy (def_stmt);
9655 9 : gimple_set_uid (copy, 0);
9656 9 : def_operand_p def_p = single_ssa_def_operand (def_stmt, SSA_OP_DEF);
9657 9 : tree new_def = duplicate_ssa_name (DEF_FROM_PTR (def_p), copy);
9658 9 : update_stmt (copy);
9659 9 : def_p = single_ssa_def_operand (copy, SSA_OP_DEF);
9660 9 : SET_DEF (def_p, new_def);
9661 9 : SET_USE (use_p, new_def);
9662 9 : gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
9663 : }
9664 :
9665 : return true;
9666 4130 : }
9667 :
9668 : /* vectorizable_load.
9669 :
9670 : Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9671 : that can be vectorized.
9672 : If COST_VEC is passed, calculate costs but don't change anything,
9673 : otherwise, vectorize STMT_INFO: create a vectorized stmt to replace
9674 : it, and insert it at GSI.
9675 : Return true if STMT_INFO is vectorizable in this way. */
9676 :
9677 : static bool
9678 2128888 : vectorizable_load (vec_info *vinfo,
9679 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9680 : slp_tree slp_node,
9681 : stmt_vector_for_cost *cost_vec)
9682 : {
9683 2128888 : tree scalar_dest;
9684 2128888 : tree vec_dest = NULL;
9685 2128888 : tree data_ref = NULL;
9686 2128888 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9687 2128888 : class loop *loop = NULL;
9688 2128888 : class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9689 2128888 : bool nested_in_vect_loop = false;
9690 2128888 : tree elem_type;
9691 : /* Avoid false positive uninitialized warning, see PR110652. */
9692 2128888 : tree new_temp = NULL_TREE;
9693 2128888 : machine_mode mode;
9694 2128888 : tree dummy;
9695 2128888 : tree dataref_ptr = NULL_TREE;
9696 2128888 : tree dataref_offset = NULL_TREE;
9697 2128888 : gimple *ptr_incr = NULL;
9698 2128888 : int i, j;
9699 2128888 : unsigned int group_size;
9700 2128888 : poly_uint64 group_gap_adj;
9701 2128888 : tree msq = NULL_TREE, lsq;
9702 2128888 : tree realignment_token = NULL_TREE;
9703 2128888 : gphi *phi = NULL;
9704 2128888 : bool grouped_load = false;
9705 2128888 : stmt_vec_info first_stmt_info;
9706 2128888 : stmt_vec_info first_stmt_info_for_drptr = NULL;
9707 2128888 : bool compute_in_loop = false;
9708 2128888 : class loop *at_loop;
9709 2128888 : int vec_num;
9710 2128888 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9711 2128888 : poly_uint64 vf;
9712 2128888 : tree aggr_type;
9713 2128888 : tree ref_type;
9714 2128888 : enum vect_def_type mask_dt = vect_unknown_def_type;
9715 2128888 : enum vect_def_type els_dt = vect_unknown_def_type;
9716 :
9717 2128888 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9718 : return false;
9719 :
9720 2128888 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9721 232992 : && cost_vec)
9722 : return false;
9723 :
9724 1895896 : if (!STMT_VINFO_DATA_REF (stmt_info))
9725 : return false;
9726 :
9727 1519691 : tree mask_vectype = NULL_TREE;
9728 1519691 : tree els = NULL_TREE; tree els_vectype = NULL_TREE;
9729 :
9730 1519691 : int mask_index = -1;
9731 1519691 : int els_index = -1;
9732 1519691 : slp_tree mask_node = NULL;
9733 1519691 : slp_tree els_op = NULL;
9734 1519691 : if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9735 : {
9736 1515239 : scalar_dest = gimple_assign_lhs (assign);
9737 1515239 : if (TREE_CODE (scalar_dest) != SSA_NAME)
9738 : return false;
9739 :
9740 704976 : tree_code code = gimple_assign_rhs_code (assign);
9741 704976 : if (code != ARRAY_REF
9742 704976 : && code != BIT_FIELD_REF
9743 704976 : && code != INDIRECT_REF
9744 486078 : && code != COMPONENT_REF
9745 486078 : && code != IMAGPART_EXPR
9746 350650 : && code != REALPART_EXPR
9747 350650 : && code != MEM_REF
9748 283 : && TREE_CODE_CLASS (code) != tcc_declaration)
9749 : return false;
9750 : }
9751 : else
9752 : {
9753 1425663 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9754 4452 : if (!call || !gimple_call_internal_p (call))
9755 : return false;
9756 :
9757 4452 : internal_fn ifn = gimple_call_internal_fn (call);
9758 4452 : if (!internal_load_fn_p (ifn))
9759 : return false;
9760 :
9761 3096 : scalar_dest = gimple_call_lhs (call);
9762 3096 : if (!scalar_dest)
9763 : return false;
9764 :
9765 3096 : mask_index = internal_fn_mask_index (ifn);
9766 3096 : if (mask_index >= 0)
9767 3096 : mask_index = vect_slp_child_index_for_operand (stmt_info, mask_index);
9768 3096 : if (mask_index >= 0
9769 3096 : && !vect_check_scalar_mask (vinfo, slp_node, mask_index,
9770 : &mask_node, &mask_dt, &mask_vectype))
9771 : return false;
9772 :
9773 3096 : els_index = internal_fn_else_index (ifn);
9774 3096 : if (els_index >= 0)
9775 3096 : els_index = vect_slp_child_index_for_operand (stmt_info, els_index);
9776 3096 : if (els_index >= 0
9777 3096 : && !vect_is_simple_use (vinfo, slp_node, els_index,
9778 : &els, &els_op, &els_dt, &els_vectype))
9779 : return false;
9780 : }
9781 :
9782 708005 : tree vectype = SLP_TREE_VECTYPE (slp_node);
9783 708005 : poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9784 :
9785 708005 : if (loop_vinfo)
9786 : {
9787 492747 : loop = LOOP_VINFO_LOOP (loop_vinfo);
9788 492747 : nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9789 492747 : vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9790 : }
9791 : else
9792 : vf = 1;
9793 :
9794 708005 : vec_num = vect_get_num_copies (vinfo, slp_node);
9795 :
9796 : /* FORNOW. This restriction should be relaxed. */
9797 708005 : if (nested_in_vect_loop && vec_num > 1)
9798 : {
9799 316 : if (dump_enabled_p ())
9800 66 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9801 : "multiple types in nested loop.\n");
9802 316 : return false;
9803 : }
9804 :
9805 707689 : elem_type = TREE_TYPE (vectype);
9806 707689 : mode = TYPE_MODE (vectype);
9807 :
9808 : /* FORNOW. In some cases can vectorize even if data-type not supported
9809 : (e.g. - data copies). */
9810 707689 : if (!can_implement_p (mov_optab, mode))
9811 : {
9812 0 : if (dump_enabled_p ())
9813 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9814 : "Aligned load, but unsupported type.\n");
9815 0 : return false;
9816 : }
9817 :
9818 : /* Check if the load is a part of an interleaving chain. */
9819 707689 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9820 : {
9821 309315 : grouped_load = true;
9822 : /* FORNOW */
9823 309315 : gcc_assert (!nested_in_vect_loop);
9824 309315 : gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9825 :
9826 309315 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9827 309315 : group_size = DR_GROUP_SIZE (first_stmt_info);
9828 :
9829 : /* Invalidate assumptions made by dependence analysis when vectorization
9830 : on the unrolled body effectively re-orders stmts. */
9831 309315 : if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9832 309315 : && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9833 : STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9834 : {
9835 12 : if (dump_enabled_p ())
9836 12 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9837 : "cannot perform implicit CSE when performing "
9838 : "group loads with negative dependence distance\n");
9839 12 : return false;
9840 : }
9841 : }
9842 : else
9843 : group_size = 1;
9844 :
9845 707677 : vect_load_store_data _ls_data{};
9846 707677 : vect_load_store_data &ls = slp_node->get_data (_ls_data);
9847 707677 : if (cost_vec
9848 707677 : && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node,
9849 : VLS_LOAD, &ls))
9850 : return false;
9851 : /* Temporary aliases to analysis data, should not be modified through
9852 : these. */
9853 602258 : const vect_memory_access_type memory_access_type = ls.memory_access_type;
9854 602258 : const dr_alignment_support alignment_support_scheme
9855 : = ls.alignment_support_scheme;
9856 602258 : const int misalignment = ls.misalignment;
9857 602258 : const poly_int64 poffset = ls.poffset;
9858 602258 : const vec<int> &elsvals = ls.elsvals;
9859 :
9860 602258 : int maskload_elsval = 0;
9861 602258 : bool need_zeroing = false;
9862 :
9863 : /* We might need to explicitly zero inactive elements if there are
9864 : padding bits in the type that might leak otherwise.
9865 : Refer to PR115336. */
9866 602258 : tree scalar_type = TREE_TYPE (scalar_dest);
9867 602258 : bool type_mode_padding_p
9868 1204516 : = TYPE_PRECISION (scalar_type) < GET_MODE_PRECISION (GET_MODE_INNER (mode));
9869 :
9870 602258 : if (slp_node->ldst_lanes
9871 0 : && memory_access_type != VMAT_LOAD_STORE_LANES)
9872 : {
9873 0 : if (dump_enabled_p ())
9874 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9875 : "discovered load-lane but cannot use it.\n");
9876 0 : return false;
9877 : }
9878 :
9879 602258 : if (mask_node)
9880 : {
9881 2966 : if (memory_access_type == VMAT_CONTIGUOUS)
9882 : {
9883 2100 : machine_mode vec_mode = TYPE_MODE (vectype);
9884 721 : if (!VECTOR_MODE_P (vec_mode)
9885 4200 : || !can_vec_mask_load_store_p (vec_mode,
9886 2100 : TYPE_MODE (mask_vectype),
9887 : true, NULL, &ls.elsvals))
9888 351 : return false;
9889 : }
9890 866 : else if (memory_access_type == VMAT_ELEMENTWISE
9891 866 : || memory_access_type == VMAT_STRIDED_SLP)
9892 : {
9893 0 : if (dump_enabled_p ())
9894 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9895 : "unsupported masked strided access.\n");
9896 0 : return false;
9897 : }
9898 866 : else if (memory_access_type != VMAT_LOAD_STORE_LANES
9899 866 : && !mat_gather_scatter_p (memory_access_type))
9900 : {
9901 62 : if (dump_enabled_p ())
9902 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9903 : "unsupported access type for masked load.\n");
9904 62 : return false;
9905 : }
9906 804 : else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED)
9907 : {
9908 482 : if (dump_enabled_p ())
9909 28 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9910 : "unsupported masked emulated gather.\n");
9911 482 : return false;
9912 : }
9913 : }
9914 :
9915 601363 : bool costing_p = cost_vec;
9916 :
9917 601363 : if (costing_p) /* transformation not required. */
9918 : {
9919 436078 : if (mask_node
9920 436078 : && !vect_maybe_update_slp_op_vectype (mask_node,
9921 : mask_vectype))
9922 : {
9923 0 : if (dump_enabled_p ())
9924 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9925 : "incompatible vector types for invariants\n");
9926 0 : return false;
9927 : }
9928 :
9929 436078 : if (loop_vinfo
9930 312431 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9931 212159 : check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9932 : VLS_LOAD, group_size, &ls,
9933 : mask_node, &ls.elsvals);
9934 :
9935 436078 : if (dump_enabled_p ()
9936 25383 : && memory_access_type != VMAT_ELEMENTWISE
9937 25272 : && !mat_gather_scatter_p (memory_access_type)
9938 24955 : && memory_access_type != VMAT_STRIDED_SLP
9939 24955 : && memory_access_type != VMAT_INVARIANT
9940 460102 : && alignment_support_scheme != dr_aligned)
9941 9888 : dump_printf_loc (MSG_NOTE, vect_location,
9942 : "Vectorizing an unaligned access.\n");
9943 :
9944 436078 : if (memory_access_type == VMAT_LOAD_STORE_LANES)
9945 0 : vinfo->any_known_not_updated_vssa = true;
9946 :
9947 436078 : SLP_TREE_TYPE (slp_node) = load_vec_info_type;
9948 436078 : slp_node->data = new vect_load_store_data (std::move (ls));
9949 : }
9950 :
9951 : /* If the type needs padding we must zero inactive elements.
9952 : Check if we can do that with a VEC_COND_EXPR and store the
9953 : elsval we choose in MASKLOAD_ELSVAL. */
9954 601363 : if (elsvals.length ()
9955 23128 : && type_mode_padding_p
9956 3 : && !elsvals.contains (MASK_LOAD_ELSE_ZERO)
9957 23128 : && !expand_vec_cond_expr_p (vectype, truth_type_for (vectype)))
9958 : {
9959 0 : if (dump_enabled_p ())
9960 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9961 : "cannot zero inactive elements.\n");
9962 0 : return false;
9963 : }
9964 :
9965 : /* For now just use the first available else value.
9966 : get_supported_else_vals tries MASK_LOAD_ELSE_ZERO first so we will
9967 : select it here if it is supported. */
9968 601363 : if (elsvals.length ())
9969 23128 : maskload_elsval = *elsvals.begin ();
9970 :
9971 601363 : if (dump_enabled_p () && !costing_p)
9972 16592 : dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n");
9973 :
9974 : /* Transform. */
9975 :
9976 601363 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9977 601363 : ensure_base_align (dr_info);
9978 :
9979 601363 : if (memory_access_type == VMAT_INVARIANT)
9980 : {
9981 4149 : gcc_assert (!grouped_load && !mask_node && !bb_vinfo);
9982 : /* If we have versioned for aliasing or the loop doesn't
9983 : have any data dependencies that would preclude this,
9984 : then we are sure this is a loop invariant load and
9985 : thus we can insert it on the preheader edge.
9986 : TODO: hoist_defs_of_uses should ideally be computed
9987 : once at analysis time, remembered and used in the
9988 : transform time. */
9989 8298 : bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9990 4149 : && !nested_in_vect_loop);
9991 :
9992 4149 : bool uniform_p = true;
9993 17426 : for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
9994 : {
9995 : /* It is unsafe to hoist a conditional load over the conditions that
9996 : make it valid. When early break this means that any invariant load
9997 : can't be hoisted unless it's in the loop header or if we know
9998 : something else has verified the load is valid to do. Alignment
9999 : peeling would do this since getting through the prologue means the
10000 : load was done at least once and so the vector main body is free to
10001 : hoist it. However today GCC will hoist the load above the PFA
10002 : loop. As such that makes it still invalid and so we can't allow it
10003 : today. */
10004 4979 : if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
10005 1048 : && !DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (sinfo))
10006 5995 : && gimple_bb (STMT_VINFO_STMT (vect_orig_stmt (sinfo)))
10007 1016 : != loop->header)
10008 : {
10009 916 : if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
10010 916 : && dump_enabled_p ())
10011 6 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10012 : "not hoisting invariant load due to early break"
10013 : "constraints\n");
10014 910 : else if (dump_enabled_p ())
10015 16 : dump_printf_loc (MSG_NOTE, vect_location,
10016 : "not hoisting invariant load due to early break"
10017 : "constraints\n");
10018 : hoist_p = false;
10019 : }
10020 :
10021 4063 : hoist_p = hoist_p && hoist_defs_of_uses (sinfo->stmt, loop, false);
10022 4979 : if (sinfo != SLP_TREE_SCALAR_STMTS (slp_node)[0])
10023 279 : uniform_p = false;
10024 : }
10025 4149 : if (costing_p)
10026 : {
10027 3303 : if (!uniform_p && (!hoist_p || !vf.is_constant ()))
10028 : {
10029 0 : if (dump_enabled_p ())
10030 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10031 : "not vectorizing non-uniform invariant "
10032 : "load\n");
10033 0 : return false;
10034 : }
10035 1419 : enum vect_cost_model_location cost_loc
10036 3303 : = hoist_p ? vect_prologue : vect_body;
10037 3303 : unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10038 : slp_node, 0, cost_loc);
10039 3303 : cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
10040 : slp_node, 0, cost_loc);
10041 3303 : unsigned int prologue_cost = hoist_p ? cost : 0;
10042 1419 : unsigned int inside_cost = hoist_p ? 0 : cost;
10043 3303 : if (dump_enabled_p ())
10044 546 : dump_printf_loc (MSG_NOTE, vect_location,
10045 : "vect_model_load_cost: inside_cost = %d, "
10046 : "prologue_cost = %d .\n",
10047 : inside_cost, prologue_cost);
10048 3303 : return true;
10049 : }
10050 846 : if (hoist_p)
10051 : {
10052 : /* ??? For non-uniform lanes there could be still duplicates.
10053 : We're leaving those to post-vectorizer CSE for the moment. */
10054 649 : auto_vec<tree> scalar_defs (SLP_TREE_LANES (slp_node));
10055 2088 : for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
10056 : {
10057 739 : gassign *stmt = as_a <gassign *> (sinfo->stmt);
10058 739 : if (dump_enabled_p ())
10059 352 : dump_printf_loc (MSG_NOTE, vect_location,
10060 : "hoisting out of the vectorized loop: %G",
10061 : (gimple *) stmt);
10062 739 : scalar_dest = copy_ssa_name (gimple_assign_lhs (stmt));
10063 739 : tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10064 739 : edge pe = loop_preheader_edge (loop);
10065 739 : gphi *vphi = get_virtual_phi (loop->header);
10066 739 : tree vuse;
10067 739 : if (vphi)
10068 733 : vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10069 : else
10070 6 : vuse = gimple_vuse (gsi_stmt (*gsi));
10071 739 : gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10072 739 : gimple_set_vuse (new_stmt, vuse);
10073 739 : gsi_insert_on_edge_immediate (pe, new_stmt);
10074 739 : hoist_defs_of_uses (new_stmt, loop, true);
10075 739 : if (!useless_type_conversion_p (TREE_TYPE (vectype),
10076 739 : TREE_TYPE (scalar_dest)))
10077 : {
10078 12 : tree tem = make_ssa_name (TREE_TYPE (vectype));
10079 12 : new_stmt = gimple_build_assign (tem,
10080 : NOP_EXPR, scalar_dest);
10081 12 : gsi_insert_on_edge_immediate (pe, new_stmt);
10082 12 : scalar_dest = tem;
10083 : }
10084 739 : scalar_defs.quick_push (scalar_dest);
10085 739 : if (uniform_p)
10086 : break;
10087 : }
10088 649 : if (!uniform_p)
10089 : {
10090 51 : unsigned const_nunits
10091 51 : = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
10092 116 : for (j = 0; j < (int) vec_num; ++j)
10093 : {
10094 65 : vec<constructor_elt, va_gc> *v = NULL;
10095 65 : vec_safe_reserve (v, const_nunits, true);
10096 369 : for (unsigned i = 0; i < const_nunits; ++i)
10097 : {
10098 304 : unsigned def_idx
10099 304 : = (j * const_nunits + i) % SLP_TREE_LANES (slp_node);
10100 304 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10101 : scalar_defs[def_idx]);
10102 : }
10103 65 : scalar_dest = build_constructor (vectype, v);
10104 65 : new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10105 : vectype, NULL);
10106 65 : slp_node->push_vec_def (new_temp);
10107 : }
10108 51 : return true;
10109 : }
10110 598 : new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10111 : vectype, NULL);
10112 649 : }
10113 : else
10114 : {
10115 197 : gcc_assert (uniform_p);
10116 197 : gimple_stmt_iterator gsi2 = *gsi;
10117 197 : gsi_next (&gsi2);
10118 197 : new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10119 : vectype, &gsi2);
10120 : }
10121 1664 : for (j = 0; j < (int) vec_num; ++j)
10122 869 : slp_node->push_vec_def (new_temp);
10123 : return true;
10124 : }
10125 :
10126 597214 : if (memory_access_type == VMAT_ELEMENTWISE
10127 597214 : || memory_access_type == VMAT_STRIDED_SLP)
10128 : {
10129 22953 : gimple_stmt_iterator incr_gsi;
10130 22953 : bool insert_after;
10131 22953 : tree offvar = NULL_TREE;
10132 22953 : tree ivstep;
10133 22953 : tree running_off;
10134 22953 : vec<constructor_elt, va_gc> *v = NULL;
10135 22953 : tree stride_base, stride_step, alias_off;
10136 : /* Checked by get_load_store_type. */
10137 22953 : unsigned int const_nunits = nunits.to_constant ();
10138 22953 : unsigned HOST_WIDE_INT cst_offset = 0;
10139 22953 : tree dr_offset;
10140 22953 : unsigned int inside_cost = 0;
10141 :
10142 22953 : gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10143 22953 : gcc_assert (!nested_in_vect_loop);
10144 :
10145 22953 : if (grouped_load)
10146 : {
10147 : /* If we elided a consecutive load permutation, don't
10148 : use the original first statement (which could be elided)
10149 : but the one the load permutation starts with.
10150 : This ensures the stride_base below is correct. */
10151 10300 : if (!ls.subchain_p)
10152 10256 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10153 : else
10154 44 : first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10155 10300 : first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10156 10300 : ref_type = get_group_alias_ptr_type (first_stmt_info);
10157 : }
10158 : else
10159 : {
10160 12653 : first_stmt_info = stmt_info;
10161 12653 : first_dr_info = dr_info;
10162 12653 : ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10163 : }
10164 :
10165 22953 : if (grouped_load)
10166 : {
10167 10300 : if (memory_access_type == VMAT_STRIDED_SLP)
10168 : {
10169 : /* If we elided a consecutive load permutation, adjust
10170 : the group size here. */
10171 3795 : if (!ls.subchain_p)
10172 3751 : group_size = DR_GROUP_SIZE (first_stmt_info);
10173 : else
10174 44 : group_size = SLP_TREE_LANES (slp_node);
10175 : }
10176 : else /* VMAT_ELEMENTWISE */
10177 6505 : group_size = SLP_TREE_LANES (slp_node);
10178 : }
10179 : else
10180 : group_size = 1;
10181 :
10182 22953 : if (!costing_p)
10183 : {
10184 3199 : dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10185 3199 : stride_base = fold_build_pointer_plus (
10186 : DR_BASE_ADDRESS (first_dr_info->dr),
10187 : size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10188 : convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10189 3199 : stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10190 :
10191 : /* For a load with loop-invariant (but other than power-of-2)
10192 : stride (i.e. not a grouped access) like so:
10193 :
10194 : for (i = 0; i < n; i += stride)
10195 : ... = array[i];
10196 :
10197 : we generate a new induction variable and new accesses to
10198 : form a new vector (or vectors, depending on ncopies):
10199 :
10200 : for (j = 0; ; j += VF*stride)
10201 : tmp1 = array[j];
10202 : tmp2 = array[j + stride];
10203 : ...
10204 : vectemp = {tmp1, tmp2, ...}
10205 : */
10206 :
10207 3199 : ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10208 : build_int_cst (TREE_TYPE (stride_step), vf));
10209 :
10210 3199 : standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10211 :
10212 3199 : stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10213 3199 : ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10214 3199 : create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10215 : loop, &incr_gsi, insert_after,
10216 : &offvar, NULL);
10217 :
10218 3199 : stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10219 : }
10220 :
10221 22953 : running_off = offvar;
10222 22953 : alias_off = build_int_cst (ref_type, 0);
10223 22953 : int nloads = const_nunits;
10224 22953 : int lnel = 1;
10225 22953 : tree ltype = TREE_TYPE (vectype);
10226 22953 : tree lvectype = vectype;
10227 22953 : auto_vec<tree> dr_chain;
10228 : /* ??? Modify local copies of alignment_support_scheme and
10229 : misalignment, but this part of analysis should be done
10230 : earlier and remembered, likewise the chosen load mode. */
10231 22953 : const dr_alignment_support tem = alignment_support_scheme;
10232 22953 : dr_alignment_support alignment_support_scheme = tem;
10233 22953 : const int tem2 = misalignment;
10234 22953 : int misalignment = tem2;
10235 22953 : if (memory_access_type == VMAT_STRIDED_SLP)
10236 : {
10237 16448 : HOST_WIDE_INT n = gcd (group_size, const_nunits);
10238 : /* Use the target vector type if the group size is a multiple
10239 : of it. */
10240 16448 : if (n == const_nunits)
10241 : {
10242 2143 : int mis_align = dr_misalignment (first_dr_info, vectype);
10243 : /* With VF > 1 we advance the DR by step, if that is constant
10244 : and only aligned when performed VF times, DR alignment
10245 : analysis can analyze this as aligned since it assumes
10246 : contiguous accesses. But that is not how we code generate
10247 : here, so adjust for this. */
10248 2143 : if (maybe_gt (vf, 1u)
10249 3415 : && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
10250 3203 : DR_TARGET_ALIGNMENT (first_dr_info)))
10251 212 : mis_align = -1;
10252 2143 : dr_alignment_support dr_align
10253 2143 : = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
10254 : mis_align);
10255 2143 : if (dr_align == dr_aligned
10256 2143 : || dr_align == dr_unaligned_supported)
10257 : {
10258 16448 : nloads = 1;
10259 16448 : lnel = const_nunits;
10260 16448 : ltype = vectype;
10261 16448 : alignment_support_scheme = dr_align;
10262 16448 : misalignment = mis_align;
10263 : }
10264 : }
10265 : /* Else use the biggest vector we can load the group without
10266 : accessing excess elements. */
10267 14305 : else if (n > 1)
10268 : {
10269 1653 : tree ptype;
10270 1653 : tree vtype
10271 1653 : = vector_vector_composition_type (vectype, const_nunits / n,
10272 : &ptype);
10273 1653 : if (vtype != NULL_TREE)
10274 : {
10275 1623 : dr_alignment_support dr_align;
10276 1623 : int mis_align = 0;
10277 1623 : if (VECTOR_TYPE_P (ptype))
10278 : {
10279 733 : mis_align = dr_misalignment (first_dr_info, ptype);
10280 733 : if (maybe_gt (vf, 1u)
10281 1436 : && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
10282 739 : DR_TARGET_ALIGNMENT (first_dr_info)))
10283 697 : mis_align = -1;
10284 733 : dr_align
10285 733 : = vect_supportable_dr_alignment (vinfo, dr_info, ptype,
10286 : mis_align);
10287 : }
10288 : else
10289 : dr_align = dr_unaligned_supported;
10290 1623 : if (dr_align == dr_aligned
10291 1623 : || dr_align == dr_unaligned_supported)
10292 : {
10293 1623 : nloads = const_nunits / n;
10294 1623 : lnel = n;
10295 1623 : lvectype = vtype;
10296 1623 : ltype = ptype;
10297 1623 : alignment_support_scheme = dr_align;
10298 1623 : misalignment = mis_align;
10299 : }
10300 : }
10301 : }
10302 16448 : unsigned align;
10303 16448 : if (alignment_support_scheme == dr_aligned)
10304 20 : align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
10305 : else
10306 16428 : align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
10307 : /* Alignment is at most the access size if we do multiple loads. */
10308 16448 : if (nloads > 1)
10309 14305 : align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
10310 16448 : ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
10311 : }
10312 :
10313 : /* For SLP permutation support we need to load the whole group,
10314 : not only the number of vector stmts the permutation result
10315 : fits in. */
10316 22953 : int ncopies;
10317 22953 : if (ls.slp_perm)
10318 : {
10319 2403 : gcc_assert (memory_access_type != VMAT_ELEMENTWISE);
10320 : /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10321 : variable VF. */
10322 2403 : unsigned int const_vf = vf.to_constant ();
10323 2403 : ncopies = CEIL (group_size * const_vf, const_nunits);
10324 2403 : dr_chain.create (ncopies);
10325 : }
10326 : else
10327 : ncopies = vec_num;
10328 :
10329 22953 : unsigned int group_el = 0;
10330 22953 : unsigned HOST_WIDE_INT
10331 22953 : elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10332 22953 : unsigned int n_groups = 0;
10333 : /* For costing some adjacent vector loads, we'd like to cost with
10334 : the total number of them once instead of cost each one by one. */
10335 22953 : unsigned int n_adjacent_loads = 0;
10336 54540 : for (j = 0; j < ncopies; j++)
10337 : {
10338 31587 : if (nloads > 1 && !costing_p)
10339 2564 : vec_alloc (v, nloads);
10340 : gimple *new_stmt = NULL;
10341 134430 : for (i = 0; i < nloads; i++)
10342 : {
10343 102843 : if (costing_p)
10344 : {
10345 : /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10346 : avoid ICE, see PR110776. */
10347 93668 : if (VECTOR_TYPE_P (ltype)
10348 4916 : && memory_access_type != VMAT_ELEMENTWISE)
10349 4916 : n_adjacent_loads++;
10350 : else
10351 88752 : inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10352 : slp_node, 0, vect_body);
10353 93668 : continue;
10354 : }
10355 9175 : unsigned int load_el = group_el;
10356 : /* For elementwise accesses apply a load permutation directly. */
10357 9175 : if (memory_access_type == VMAT_ELEMENTWISE
10358 9175 : && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10359 2002 : load_el = SLP_TREE_LOAD_PERMUTATION (slp_node)[group_el];
10360 9175 : tree this_off = build_int_cst (TREE_TYPE (alias_off),
10361 9175 : load_el * elsz + cst_offset);
10362 9175 : tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10363 9175 : vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10364 9175 : new_temp = make_ssa_name (ltype);
10365 9175 : new_stmt = gimple_build_assign (new_temp, data_ref);
10366 9175 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10367 9175 : if (nloads > 1)
10368 7456 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_temp);
10369 :
10370 9175 : group_el += lnel;
10371 9175 : if (group_el == group_size)
10372 : {
10373 8804 : n_groups++;
10374 : /* When doing SLP make sure to not load elements from
10375 : the next vector iteration, those will not be accessed
10376 : so just use the last element again. See PR107451. */
10377 8804 : if (known_lt (n_groups, vf))
10378 : {
10379 5585 : tree newoff = copy_ssa_name (running_off);
10380 5585 : gimple *incr
10381 5585 : = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10382 : running_off, stride_step);
10383 5585 : vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10384 5585 : running_off = newoff;
10385 : }
10386 : group_el = 0;
10387 : }
10388 : }
10389 :
10390 31587 : if (nloads > 1)
10391 : {
10392 22952 : if (costing_p)
10393 20388 : inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10394 : slp_node, 0, vect_body);
10395 : else
10396 : {
10397 2564 : tree vec_inv = build_constructor (lvectype, v);
10398 2564 : new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10399 : lvectype, gsi);
10400 2564 : new_stmt = SSA_NAME_DEF_STMT (new_temp);
10401 2564 : if (lvectype != vectype)
10402 : {
10403 232 : new_stmt
10404 232 : = gimple_build_assign (make_ssa_name (vectype),
10405 : VIEW_CONVERT_EXPR,
10406 : build1 (VIEW_CONVERT_EXPR,
10407 : vectype, new_temp));
10408 232 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10409 : gsi);
10410 : }
10411 : }
10412 : }
10413 8635 : else if (!costing_p && ltype != vectype)
10414 : {
10415 1700 : new_stmt = gimple_build_assign (make_ssa_name (vectype),
10416 : VIEW_CONVERT_EXPR,
10417 : build1 (VIEW_CONVERT_EXPR,
10418 : vectype, new_temp));
10419 1700 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10420 : gsi);
10421 : }
10422 :
10423 31587 : if (!costing_p)
10424 : {
10425 4283 : if (ls.slp_perm)
10426 1208 : dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10427 : else
10428 3075 : slp_node->push_vec_def (new_stmt);
10429 : }
10430 : }
10431 22953 : if (ls.slp_perm)
10432 : {
10433 2403 : if (costing_p)
10434 : {
10435 1847 : gcc_assert (ls.n_perms != -1U);
10436 1847 : inside_cost += record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
10437 : slp_node, 0, vect_body);
10438 : }
10439 : else
10440 : {
10441 556 : unsigned n_perms2;
10442 556 : vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10443 : false, &n_perms2);
10444 556 : gcc_assert (ls.n_perms == n_perms2);
10445 : }
10446 : }
10447 :
10448 22953 : if (costing_p)
10449 : {
10450 19754 : if (n_adjacent_loads > 0)
10451 1938 : vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
10452 : alignment_support_scheme, misalignment, false,
10453 : &inside_cost, nullptr, cost_vec, cost_vec,
10454 : true);
10455 19754 : if (dump_enabled_p ())
10456 496 : dump_printf_loc (MSG_NOTE, vect_location,
10457 : "vect_model_load_cost: inside_cost = %u, "
10458 : "prologue_cost = 0 .\n",
10459 : inside_cost);
10460 : }
10461 :
10462 22953 : return true;
10463 22953 : }
10464 :
10465 574261 : if (mat_gather_scatter_p (memory_access_type)
10466 574261 : && !ls.ls_type)
10467 : grouped_load = false;
10468 :
10469 571260 : if (grouped_load
10470 574261 : || SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10471 : {
10472 263446 : if (grouped_load)
10473 : {
10474 263004 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10475 263004 : group_size = DR_GROUP_SIZE (first_stmt_info);
10476 : }
10477 : else
10478 : {
10479 : first_stmt_info = stmt_info;
10480 : group_size = 1;
10481 : }
10482 : /* For SLP vectorization we directly vectorize a subchain
10483 : without permutation. */
10484 263446 : if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10485 209107 : first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10486 : /* For BB vectorization always use the first stmt to base
10487 : the data ref pointer on. */
10488 263446 : if (bb_vinfo)
10489 208875 : first_stmt_info_for_drptr
10490 208875 : = vect_find_first_scalar_stmt_in_slp (slp_node);
10491 :
10492 263446 : first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10493 263446 : group_gap_adj = 0;
10494 :
10495 : /* VEC_NUM is the number of vect stmts to be created for this group. */
10496 263446 : grouped_load = false;
10497 : /* If an SLP permutation is from N elements to N elements,
10498 : and if one vector holds a whole number of N, we can load
10499 : the inputs to the permutation in the same way as an
10500 : unpermuted sequence. In other cases we need to load the
10501 : whole group, not only the number of vector stmts the
10502 : permutation result fits in. */
10503 263446 : unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10504 263446 : if (nested_in_vect_loop)
10505 : /* We do not support grouped accesses in a nested loop,
10506 : instead the access is contiguous but it might be
10507 : permuted. No gap adjustment is needed though. */
10508 : ;
10509 263444 : else if (ls.slp_perm
10510 263444 : && (group_size != scalar_lanes
10511 11510 : || !multiple_p (nunits, group_size)))
10512 : {
10513 : /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10514 : variable VF; see vect_transform_slp_perm_load. */
10515 44000 : unsigned int const_vf = vf.to_constant ();
10516 44000 : unsigned int const_nunits = nunits.to_constant ();
10517 44000 : vec_num = CEIL (group_size * const_vf, const_nunits);
10518 44000 : group_gap_adj = vf * group_size - nunits * vec_num;
10519 : }
10520 : else
10521 : {
10522 219444 : group_gap_adj = group_size - scalar_lanes;
10523 : }
10524 :
10525 263446 : ref_type = get_group_alias_ptr_type (first_stmt_info);
10526 : }
10527 : else
10528 : {
10529 310815 : first_stmt_info = stmt_info;
10530 310815 : first_dr_info = dr_info;
10531 310815 : group_size = 1;
10532 310815 : group_gap_adj = 0;
10533 310815 : ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10534 : }
10535 :
10536 574261 : vec_loop_masks *loop_masks
10537 365386 : = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10538 574261 : ? &LOOP_VINFO_MASKS (loop_vinfo)
10539 31 : : NULL);
10540 31 : vec_loop_lens *loop_lens
10541 365386 : = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10542 : ? &LOOP_VINFO_LENS (loop_vinfo)
10543 0 : : NULL);
10544 :
10545 : /* The vect_transform_stmt and vect_analyze_stmt will go here but there
10546 : are some difference here. We cannot enable both the lens and masks
10547 : during transform but it is allowed during analysis.
10548 : Shouldn't go with length-based approach if fully masked. */
10549 574261 : if (cost_vec == NULL)
10550 : /* The cost_vec is NULL during transfrom. */
10551 161240 : gcc_assert ((!loop_lens || !loop_masks));
10552 :
10553 : /* Targets with store-lane instructions must not require explicit
10554 : realignment. vect_supportable_dr_alignment always returns either
10555 : dr_aligned or dr_unaligned_supported for (non-length) masked
10556 : operations. */
10557 574261 : gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10558 : && !mask_node
10559 : && !loop_masks)
10560 : || mat_gather_scatter_p (memory_access_type)
10561 : || alignment_support_scheme == dr_aligned
10562 : || alignment_support_scheme == dr_unaligned_supported);
10563 :
10564 : /* In case the vectorization factor (VF) is bigger than the number
10565 : of elements that we can fit in a vectype (nunits), we have to generate
10566 : more than one vector stmt - i.e - we need to "unroll" the
10567 : vector stmt by a factor VF/nunits. In doing so, we record a pointer
10568 : from one copy of the vector stmt to the next, in the field
10569 : STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10570 : stages to find the correct vector defs to be used when vectorizing
10571 : stmts that use the defs of the current stmt. The example below
10572 : illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10573 : need to create 4 vectorized stmts):
10574 :
10575 : before vectorization:
10576 : RELATED_STMT VEC_STMT
10577 : S1: x = memref - -
10578 : S2: z = x + 1 - -
10579 :
10580 : step 1: vectorize stmt S1:
10581 : We first create the vector stmt VS1_0, and, as usual, record a
10582 : pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10583 : Next, we create the vector stmt VS1_1, and record a pointer to
10584 : it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10585 : Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10586 : stmts and pointers:
10587 : RELATED_STMT VEC_STMT
10588 : VS1_0: vx0 = memref0 VS1_1 -
10589 : VS1_1: vx1 = memref1 VS1_2 -
10590 : VS1_2: vx2 = memref2 VS1_3 -
10591 : VS1_3: vx3 = memref3 - -
10592 : S1: x = load - VS1_0
10593 : S2: z = x + 1 - -
10594 : */
10595 :
10596 : /* If the data reference is aligned (dr_aligned) or potentially unaligned
10597 : on a target that supports unaligned accesses (dr_unaligned_supported)
10598 : we generate the following code:
10599 : p = initial_addr;
10600 : indx = 0;
10601 : loop {
10602 : p = p + indx * vectype_size;
10603 : vec_dest = *(p);
10604 : indx = indx + 1;
10605 : }
10606 :
10607 : Otherwise, the data reference is potentially unaligned on a target that
10608 : does not support unaligned accesses (dr_explicit_realign_optimized) -
10609 : then generate the following code, in which the data in each iteration is
10610 : obtained by two vector loads, one from the previous iteration, and one
10611 : from the current iteration:
10612 : p1 = initial_addr;
10613 : msq_init = *(floor(p1))
10614 : p2 = initial_addr + VS - 1;
10615 : realignment_token = call target_builtin;
10616 : indx = 0;
10617 : loop {
10618 : p2 = p2 + indx * vectype_size
10619 : lsq = *(floor(p2))
10620 : vec_dest = realign_load (msq, lsq, realignment_token)
10621 : indx = indx + 1;
10622 : msq = lsq;
10623 : } */
10624 :
10625 : /* If the misalignment remains the same throughout the execution of the
10626 : loop, we can create the init_addr and permutation mask at the loop
10627 : preheader. Otherwise, it needs to be created inside the loop.
10628 : This can only occur when vectorizing memory accesses in the inner-loop
10629 : nested within an outer-loop that is being vectorized. */
10630 :
10631 574261 : if (nested_in_vect_loop
10632 574261 : && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10633 1234 : GET_MODE_SIZE (TYPE_MODE (vectype))))
10634 : {
10635 195 : gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10636 : compute_in_loop = true;
10637 : }
10638 :
10639 574261 : bool diff_first_stmt_info
10640 574261 : = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10641 :
10642 574261 : tree offset = NULL_TREE;
10643 574261 : if ((alignment_support_scheme == dr_explicit_realign_optimized
10644 574261 : || alignment_support_scheme == dr_explicit_realign)
10645 0 : && !compute_in_loop)
10646 : {
10647 : /* If we have different first_stmt_info, we can't set up realignment
10648 : here, since we can't guarantee first_stmt_info DR has been
10649 : initialized yet, use first_stmt_info_for_drptr DR by bumping the
10650 : distance from first_stmt_info DR instead as below. */
10651 0 : if (!costing_p)
10652 : {
10653 0 : if (!diff_first_stmt_info)
10654 0 : msq = vect_setup_realignment (vinfo, first_stmt_info, vectype, gsi,
10655 : &realignment_token,
10656 : alignment_support_scheme, NULL_TREE,
10657 : &at_loop);
10658 0 : if (alignment_support_scheme == dr_explicit_realign_optimized)
10659 : {
10660 0 : phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10661 0 : offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10662 : size_one_node);
10663 0 : gcc_assert (!first_stmt_info_for_drptr);
10664 : }
10665 : }
10666 : }
10667 : else
10668 574261 : at_loop = loop;
10669 :
10670 574261 : if (!known_eq (poffset, 0))
10671 4633 : offset = (offset
10672 4633 : ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10673 4633 : : size_int (poffset));
10674 :
10675 574261 : tree bump;
10676 574261 : tree vec_offset = NULL_TREE;
10677 :
10678 574261 : auto_vec<tree> vec_offsets;
10679 574261 : auto_vec<tree> vec_masks;
10680 574261 : if (mask_node && !costing_p)
10681 636 : vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10682 : &vec_masks);
10683 :
10684 574261 : tree vec_mask = NULL_TREE;
10685 574261 : tree vec_els = NULL_TREE;
10686 574261 : if (memory_access_type == VMAT_LOAD_STORE_LANES)
10687 : {
10688 0 : const internal_fn lanes_ifn = ls.lanes_ifn;
10689 :
10690 0 : gcc_assert (alignment_support_scheme == dr_aligned
10691 : || alignment_support_scheme == dr_unaligned_supported);
10692 :
10693 0 : aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
10694 0 : if (!costing_p)
10695 0 : bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10696 : memory_access_type, loop_lens);
10697 :
10698 0 : unsigned int inside_cost = 0, prologue_cost = 0;
10699 : /* For costing some adjacent vector loads, we'd like to cost with
10700 : the total number of them once instead of cost each one by one. */
10701 0 : unsigned int n_adjacent_loads = 0;
10702 0 : int ncopies = vec_num / group_size;
10703 0 : for (j = 0; j < ncopies; j++)
10704 : {
10705 0 : if (costing_p)
10706 : {
10707 : /* An IFN_LOAD_LANES will load all its vector results,
10708 : regardless of which ones we actually need. Account
10709 : for the cost of unused results. */
10710 0 : if (first_stmt_info == stmt_info)
10711 : {
10712 0 : unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10713 0 : stmt_vec_info next_stmt_info = first_stmt_info;
10714 0 : do
10715 : {
10716 0 : gaps -= 1;
10717 0 : next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10718 : }
10719 0 : while (next_stmt_info);
10720 0 : if (gaps)
10721 : {
10722 0 : if (dump_enabled_p ())
10723 0 : dump_printf_loc (MSG_NOTE, vect_location,
10724 : "vect_model_load_cost: %d "
10725 : "unused vectors.\n",
10726 : gaps);
10727 0 : vect_get_load_cost (vinfo, stmt_info, slp_node, gaps,
10728 : alignment_support_scheme,
10729 : misalignment, false, &inside_cost,
10730 : &prologue_cost, cost_vec, cost_vec,
10731 : true);
10732 : }
10733 : }
10734 0 : n_adjacent_loads++;
10735 0 : continue;
10736 0 : }
10737 :
10738 : /* 1. Create the vector or array pointer update chain. */
10739 0 : if (j == 0)
10740 0 : dataref_ptr
10741 0 : = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10742 : at_loop, offset, &dummy, gsi,
10743 : &ptr_incr, false, bump);
10744 : else
10745 : {
10746 0 : gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10747 0 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10748 : stmt_info, bump);
10749 : }
10750 0 : if (mask_node)
10751 0 : vec_mask = vec_masks[j];
10752 :
10753 0 : tree vec_array = create_vector_array (vectype, group_size);
10754 :
10755 0 : tree final_mask = NULL_TREE;
10756 0 : tree final_len = NULL_TREE;
10757 0 : tree bias = NULL_TREE;
10758 0 : if (loop_masks)
10759 0 : final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10760 : ncopies, vectype, j);
10761 0 : if (vec_mask)
10762 0 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10763 : vec_mask, gsi);
10764 :
10765 0 : if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10766 : {
10767 0 : if (loop_lens)
10768 0 : final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10769 : ncopies, vectype, j, 1, true);
10770 : else
10771 0 : final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10772 0 : signed char biasval
10773 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10774 0 : bias = build_int_cst (intQI_type_node, biasval);
10775 0 : if (!final_mask)
10776 : {
10777 0 : mask_vectype = truth_type_for (vectype);
10778 0 : final_mask = build_minus_one_cst (mask_vectype);
10779 : }
10780 : }
10781 :
10782 0 : if (final_mask)
10783 : {
10784 0 : vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
10785 0 : if (type_mode_padding_p
10786 0 : && maskload_elsval != MASK_LOAD_ELSE_ZERO)
10787 0 : need_zeroing = true;
10788 : }
10789 :
10790 0 : gcall *call;
10791 0 : if (final_len && final_mask)
10792 : {
10793 : /* Emit:
10794 : VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10795 : VEC_MASK, LEN, BIAS). */
10796 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10797 0 : tree alias_ptr = build_int_cst (ref_type, align);
10798 0 : call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 6,
10799 : dataref_ptr, alias_ptr,
10800 : final_mask, vec_els,
10801 : final_len, bias);
10802 : }
10803 0 : else if (final_mask)
10804 : {
10805 : /* Emit:
10806 : VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10807 : VEC_MASK). */
10808 0 : unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10809 0 : tree alias_ptr = build_int_cst (ref_type, align);
10810 0 : call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
10811 : dataref_ptr, alias_ptr,
10812 : final_mask, vec_els);
10813 : }
10814 : else
10815 : {
10816 : /* Emit:
10817 : VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10818 0 : data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10819 0 : call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10820 : }
10821 0 : gimple_call_set_lhs (call, vec_array);
10822 0 : gimple_call_set_nothrow (call, true);
10823 0 : vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10824 :
10825 : /* Extract each vector into an SSA_NAME. */
10826 0 : for (unsigned i = 0; i < group_size; i++)
10827 : {
10828 0 : new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10829 : vec_array, i, need_zeroing,
10830 : final_mask);
10831 0 : slp_node->push_vec_def (new_temp);
10832 : }
10833 :
10834 : /* Record that VEC_ARRAY is now dead. */
10835 0 : vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10836 : }
10837 :
10838 0 : if (costing_p)
10839 : {
10840 0 : if (n_adjacent_loads > 0)
10841 0 : vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
10842 : alignment_support_scheme, misalignment, false,
10843 : &inside_cost, &prologue_cost, cost_vec,
10844 : cost_vec, true);
10845 0 : if (dump_enabled_p ())
10846 0 : dump_printf_loc (MSG_NOTE, vect_location,
10847 : "vect_model_load_cost: inside_cost = %u, "
10848 : "prologue_cost = %u .\n",
10849 : inside_cost, prologue_cost);
10850 : }
10851 :
10852 0 : return true;
10853 : }
10854 :
10855 574261 : if (mat_gather_scatter_p (memory_access_type))
10856 : {
10857 3001 : gcc_assert ((!grouped_load && !ls.slp_perm) || ls.ls_type);
10858 :
10859 3001 : auto_vec<tree> dr_chain (vec_num);
10860 :
10861 : /* If we pun the original vectype the loads as well as costing, length,
10862 : etc. is performed with the new type. After loading we VIEW_CONVERT
10863 : the data to the original vectype. */
10864 3001 : tree original_vectype = vectype;
10865 3001 : if (ls.ls_type)
10866 0 : vectype = ls.ls_type;
10867 :
10868 : /* 1. Create the vector or array pointer update chain. */
10869 3001 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10870 : {
10871 3001 : aggr_type = NULL_TREE;
10872 3001 : bump = NULL_TREE;
10873 3001 : if (!costing_p)
10874 747 : vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr,
10875 : &vec_offsets);
10876 : }
10877 : else
10878 : {
10879 0 : aggr_type = elem_type;
10880 0 : if (!costing_p)
10881 : {
10882 0 : vect_get_strided_load_store_ops (stmt_info, slp_node, vectype,
10883 : ls.strided_offset_vectype,
10884 : loop_vinfo, gsi,
10885 : &bump, &vec_offset, loop_lens);
10886 0 : dataref_ptr
10887 0 : = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10888 : at_loop, offset, &dummy, gsi,
10889 : &ptr_incr, false, bump);
10890 : }
10891 : }
10892 :
10893 : unsigned int inside_cost = 0, prologue_cost = 0;
10894 :
10895 6777 : gimple *new_stmt = NULL;
10896 6777 : for (i = 0; i < vec_num; i++)
10897 : {
10898 3776 : tree final_mask = NULL_TREE;
10899 3776 : tree final_len = NULL_TREE;
10900 3776 : tree bias = NULL_TREE;
10901 3776 : if (!costing_p)
10902 : {
10903 959 : if (mask_node)
10904 153 : vec_mask = vec_masks[i];
10905 959 : if (loop_masks)
10906 0 : final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10907 : vec_num, vectype, i);
10908 959 : if (vec_mask)
10909 153 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10910 : final_mask, vec_mask, gsi);
10911 :
10912 959 : if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10913 0 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10914 : gsi, stmt_info, bump);
10915 : }
10916 :
10917 : /* 2. Create the vector-load in the loop. */
10918 3776 : unsigned align = get_object_alignment (DR_REF (first_dr_info->dr));
10919 3776 : tree alias_align_ptr = build_int_cst (ref_type, align);
10920 3776 : if (memory_access_type == VMAT_GATHER_SCATTER_IFN)
10921 : {
10922 0 : if (costing_p)
10923 : {
10924 0 : if (ls.supported_offset_vectype)
10925 0 : inside_cost
10926 0 : += record_stmt_cost (cost_vec, 1, vector_stmt,
10927 : slp_node, 0, vect_body);
10928 0 : if (ls.supported_scale)
10929 0 : inside_cost
10930 0 : += record_stmt_cost (cost_vec, 1, vector_stmt,
10931 : slp_node, 0, vect_body);
10932 :
10933 0 : unsigned int cnunits = vect_nunits_for_cost (vectype);
10934 0 : inside_cost
10935 0 : = record_stmt_cost (cost_vec, cnunits, scalar_load,
10936 : slp_node, 0, vect_body);
10937 3776 : continue;
10938 0 : }
10939 0 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10940 0 : vec_offset = vec_offsets[i];
10941 0 : tree zero = build_zero_cst (vectype);
10942 0 : tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
10943 0 : bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
10944 :
10945 : /* Perform the offset conversion and scaling if necessary. */
10946 0 : if (!strided
10947 0 : && (ls.supported_offset_vectype || ls.supported_scale))
10948 : {
10949 0 : gimple_seq stmts = NULL;
10950 0 : if (ls.supported_offset_vectype)
10951 0 : vec_offset = gimple_convert
10952 0 : (&stmts, ls.supported_offset_vectype, vec_offset);
10953 0 : if (ls.supported_scale)
10954 : {
10955 : /* Only scale the vec_offset if we haven't already. */
10956 0 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
10957 0 : || i == 0)
10958 : {
10959 0 : tree mult_cst = build_int_cst
10960 0 : (TREE_TYPE (TREE_TYPE (vec_offset)),
10961 0 : SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
10962 0 : tree mult = build_vector_from_val
10963 0 : (TREE_TYPE (vec_offset), mult_cst);
10964 0 : vec_offset = gimple_build
10965 0 : (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
10966 : vec_offset, mult);
10967 : }
10968 0 : scale = size_int (ls.supported_scale);
10969 : }
10970 0 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
10971 : }
10972 :
10973 0 : if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
10974 : {
10975 0 : if (loop_lens)
10976 0 : final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10977 : vec_num, vectype, i, 1, true);
10978 : else
10979 0 : final_len = build_int_cst (sizetype,
10980 0 : TYPE_VECTOR_SUBPARTS (vectype));
10981 0 : signed char biasval
10982 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10983 0 : bias = build_int_cst (intQI_type_node, biasval);
10984 0 : if (!final_mask)
10985 : {
10986 0 : mask_vectype = truth_type_for (vectype);
10987 0 : final_mask = build_minus_one_cst (mask_vectype);
10988 : }
10989 : }
10990 :
10991 0 : if (final_mask)
10992 : {
10993 0 : vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
10994 0 : if (type_mode_padding_p
10995 0 : && maskload_elsval != MASK_LOAD_ELSE_ZERO)
10996 0 : need_zeroing = true;
10997 : }
10998 :
10999 0 : gcall *call;
11000 0 : if (final_len && final_mask)
11001 : {
11002 0 : if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
11003 0 : call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD,
11004 : 9, dataref_ptr,
11005 : alias_align_ptr,
11006 : vec_offset, scale, zero,
11007 : final_mask, vec_els,
11008 : final_len, bias);
11009 : else
11010 : /* Non-vector offset indicates that prefer to take
11011 : MASK_LEN_STRIDED_LOAD instead of the
11012 : MASK_LEN_GATHER_LOAD with direct stride arg. */
11013 0 : call = gimple_build_call_internal
11014 0 : (IFN_MASK_LEN_STRIDED_LOAD, 7, dataref_ptr,
11015 : vec_offset, zero, final_mask, vec_els, final_len,
11016 : bias);
11017 : }
11018 0 : else if (final_mask)
11019 0 : call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
11020 : 7, dataref_ptr,
11021 : alias_align_ptr,
11022 : vec_offset, scale,
11023 : zero, final_mask, vec_els);
11024 : else
11025 0 : call = gimple_build_call_internal (IFN_GATHER_LOAD, 5,
11026 : dataref_ptr,
11027 : alias_align_ptr,
11028 : vec_offset, scale, zero);
11029 0 : gimple_call_set_nothrow (call, true);
11030 0 : new_stmt = call;
11031 0 : data_ref = NULL_TREE;
11032 : }
11033 3776 : else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
11034 : {
11035 : /* The builtin decls path for gather is legacy, x86 only. */
11036 849 : gcc_assert (!final_len && nunits.is_constant ());
11037 849 : if (costing_p)
11038 : {
11039 566 : unsigned int cnunits = vect_nunits_for_cost (vectype);
11040 566 : inside_cost
11041 566 : = record_stmt_cost (cost_vec, cnunits, scalar_load,
11042 : slp_node, 0, vect_body);
11043 566 : continue;
11044 566 : }
11045 283 : tree offset_vectype = TREE_TYPE (vec_offsets[0]);
11046 283 : poly_uint64 offset_nunits = TYPE_VECTOR_SUBPARTS (offset_vectype);
11047 283 : if (known_eq (nunits, offset_nunits))
11048 : {
11049 134 : new_stmt = vect_build_one_gather_load_call
11050 134 : (vinfo, stmt_info, slp_node, vectype, gsi,
11051 134 : ls.gs.decl, dataref_ptr, vec_offsets[i],
11052 : final_mask);
11053 134 : data_ref = NULL_TREE;
11054 : }
11055 149 : else if (known_eq (nunits, offset_nunits * 2))
11056 : {
11057 : /* We have a offset vector with half the number of
11058 : lanes but the builtins will produce full vectype
11059 : data with just the lower lanes filled. */
11060 63 : new_stmt = vect_build_one_gather_load_call
11061 126 : (vinfo, stmt_info, slp_node, vectype, gsi,
11062 63 : ls.gs.decl, dataref_ptr, vec_offsets[2 * i],
11063 : final_mask);
11064 63 : tree low = make_ssa_name (vectype);
11065 63 : gimple_set_lhs (new_stmt, low);
11066 63 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11067 :
11068 : /* now put upper half of final_mask in final_mask low. */
11069 63 : if (final_mask
11070 63 : && !SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (final_mask))))
11071 : {
11072 11 : int count = nunits.to_constant ();
11073 11 : vec_perm_builder sel (count, count, 1);
11074 11 : sel.quick_grow (count);
11075 87 : for (int i = 0; i < count; ++i)
11076 76 : sel[i] = i | (count / 2);
11077 11 : vec_perm_indices indices (sel, 2, count);
11078 11 : tree perm_mask = vect_gen_perm_mask_checked
11079 11 : (TREE_TYPE (final_mask), indices);
11080 11 : new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
11081 : final_mask, final_mask,
11082 : perm_mask);
11083 11 : final_mask = make_ssa_name (TREE_TYPE (final_mask));
11084 11 : gimple_set_lhs (new_stmt, final_mask);
11085 11 : vect_finish_stmt_generation (vinfo, stmt_info,
11086 : new_stmt, gsi);
11087 11 : }
11088 52 : else if (final_mask)
11089 : {
11090 24 : new_stmt = gimple_build_assign (NULL_TREE,
11091 : VEC_UNPACK_HI_EXPR,
11092 : final_mask);
11093 24 : final_mask = make_ssa_name
11094 24 : (truth_type_for (offset_vectype));
11095 24 : gimple_set_lhs (new_stmt, final_mask);
11096 24 : vect_finish_stmt_generation (vinfo, stmt_info,
11097 : new_stmt, gsi);
11098 : }
11099 :
11100 63 : new_stmt = vect_build_one_gather_load_call
11101 126 : (vinfo, stmt_info, slp_node, vectype, gsi,
11102 : ls.gs.decl, dataref_ptr,
11103 63 : vec_offsets[2 * i + 1], final_mask);
11104 63 : tree high = make_ssa_name (vectype);
11105 63 : gimple_set_lhs (new_stmt, high);
11106 63 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11107 :
11108 : /* compose low + high. */
11109 63 : int count = nunits.to_constant ();
11110 63 : vec_perm_builder sel (count, count, 1);
11111 63 : sel.quick_grow (count);
11112 647 : for (int i = 0; i < count; ++i)
11113 584 : sel[i] = i < count / 2 ? i : i + count / 2;
11114 63 : vec_perm_indices indices (sel, 2, count);
11115 63 : tree perm_mask
11116 63 : = vect_gen_perm_mask_checked (vectype, indices);
11117 63 : new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
11118 : low, high, perm_mask);
11119 63 : data_ref = NULL_TREE;
11120 63 : }
11121 86 : else if (known_eq (nunits * 2, offset_nunits))
11122 : {
11123 : /* We have a offset vector with double the number of
11124 : lanes. Select the low/high part accordingly. */
11125 86 : vec_offset = vec_offsets[i / 2];
11126 86 : if (i & 1)
11127 : {
11128 43 : int count = offset_nunits.to_constant ();
11129 43 : vec_perm_builder sel (count, count, 1);
11130 43 : sel.quick_grow (count);
11131 463 : for (int i = 0; i < count; ++i)
11132 420 : sel[i] = i | (count / 2);
11133 43 : vec_perm_indices indices (sel, 2, count);
11134 43 : tree perm_mask = vect_gen_perm_mask_checked
11135 43 : (TREE_TYPE (vec_offset), indices);
11136 43 : new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
11137 : vec_offset, vec_offset,
11138 : perm_mask);
11139 43 : vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11140 43 : gimple_set_lhs (new_stmt, vec_offset);
11141 43 : vect_finish_stmt_generation (vinfo, stmt_info,
11142 : new_stmt, gsi);
11143 43 : }
11144 86 : new_stmt = vect_build_one_gather_load_call
11145 86 : (vinfo, stmt_info, slp_node, vectype, gsi,
11146 : ls.gs.decl,
11147 : dataref_ptr, vec_offset, final_mask);
11148 86 : data_ref = NULL_TREE;
11149 : }
11150 : else
11151 0 : gcc_unreachable ();
11152 : }
11153 : else
11154 : {
11155 : /* Emulated gather-scatter. */
11156 2927 : gcc_assert (!final_mask);
11157 2927 : unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11158 2927 : if (costing_p)
11159 : {
11160 : /* For emulated gathers N offset vector element
11161 : offset add is consumed by the load). */
11162 2251 : inside_cost = record_stmt_cost (cost_vec, const_nunits,
11163 : vec_to_scalar,
11164 : slp_node, 0, vect_body);
11165 : /* N scalar loads plus gathering them into a
11166 : vector. */
11167 2251 : inside_cost
11168 2251 : = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11169 : slp_node, 0, vect_body);
11170 2251 : inside_cost
11171 2251 : = record_stmt_cost (cost_vec, 1, vec_construct,
11172 : slp_node, 0, vect_body);
11173 2251 : continue;
11174 : }
11175 676 : tree offset_vectype = TREE_TYPE (vec_offsets[0]);
11176 676 : unsigned HOST_WIDE_INT const_offset_nunits
11177 676 : = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant ();
11178 676 : vec<constructor_elt, va_gc> *ctor_elts;
11179 676 : vec_alloc (ctor_elts, const_nunits);
11180 676 : gimple_seq stmts = NULL;
11181 : /* We support offset vectors with more elements
11182 : than the data vector for now. */
11183 676 : unsigned HOST_WIDE_INT factor
11184 : = const_offset_nunits / const_nunits;
11185 676 : vec_offset = vec_offsets[i / factor];
11186 676 : unsigned elt_offset = (i % factor) * const_nunits;
11187 676 : tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11188 676 : tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
11189 676 : tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11190 2762 : for (unsigned k = 0; k < const_nunits; ++k)
11191 : {
11192 2086 : tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11193 : bitsize_int (k + elt_offset));
11194 6258 : tree idx = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11195 2086 : vec_offset, TYPE_SIZE (idx_type),
11196 : boff);
11197 2086 : idx = gimple_convert (&stmts, sizetype, idx);
11198 2086 : idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
11199 2086 : tree ptr = gimple_build (&stmts, PLUS_EXPR,
11200 2086 : TREE_TYPE (dataref_ptr),
11201 : dataref_ptr, idx);
11202 2086 : ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11203 2086 : tree elt = make_ssa_name (TREE_TYPE (vectype));
11204 2086 : tree ref = build2 (MEM_REF, ltype, ptr,
11205 : build_int_cst (ref_type, 0));
11206 2086 : new_stmt = gimple_build_assign (elt, ref);
11207 4172 : gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11208 2086 : gimple_seq_add_stmt (&stmts, new_stmt);
11209 2086 : CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11210 : }
11211 676 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11212 676 : new_stmt = gimple_build_assign (NULL_TREE,
11213 : build_constructor (vectype,
11214 : ctor_elts));
11215 676 : data_ref = NULL_TREE;
11216 : }
11217 :
11218 959 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
11219 : /* DATA_REF is null if we've already built the statement. */
11220 959 : if (data_ref)
11221 : {
11222 : vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11223 : new_stmt = gimple_build_assign (vec_dest, data_ref);
11224 : }
11225 1918 : new_temp = (need_zeroing
11226 959 : ? make_ssa_name (vectype)
11227 959 : : make_ssa_name (vec_dest, new_stmt));
11228 959 : gimple_set_lhs (new_stmt, new_temp);
11229 959 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11230 :
11231 : /* If we need to explicitly zero inactive elements emit a
11232 : VEC_COND_EXPR that does so. */
11233 959 : if (need_zeroing)
11234 : {
11235 0 : vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
11236 : vectype);
11237 :
11238 0 : tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
11239 0 : new_stmt = gimple_build_assign (new_temp2, VEC_COND_EXPR,
11240 : final_mask, new_temp, vec_els);
11241 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11242 0 : new_temp = new_temp2;
11243 : }
11244 :
11245 959 : if (ls.ls_type)
11246 : {
11247 0 : new_stmt = gimple_build_assign (make_ssa_name
11248 : (original_vectype),
11249 : VIEW_CONVERT_EXPR,
11250 : build1 (VIEW_CONVERT_EXPR,
11251 : original_vectype,
11252 : new_temp));
11253 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11254 : }
11255 :
11256 : /* Store vector loads in the corresponding SLP_NODE. */
11257 959 : if (!costing_p)
11258 : {
11259 959 : if (ls.slp_perm)
11260 0 : dr_chain.quick_push (gimple_assign_lhs (new_stmt));
11261 : else
11262 959 : slp_node->push_vec_def (new_stmt);
11263 : }
11264 : }
11265 :
11266 3001 : if (ls.slp_perm)
11267 : {
11268 0 : if (costing_p)
11269 : {
11270 0 : gcc_assert (ls.n_perms != -1U);
11271 0 : inside_cost += record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
11272 : slp_node, 0, vect_body);
11273 : }
11274 : else
11275 : {
11276 0 : unsigned n_perms2;
11277 0 : vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
11278 : false, &n_perms2);
11279 0 : gcc_assert (ls.n_perms == n_perms2);
11280 : }
11281 : }
11282 :
11283 3001 : if (costing_p && dump_enabled_p ())
11284 317 : dump_printf_loc (MSG_NOTE, vect_location,
11285 : "vect_model_load_cost: inside_cost = %u, "
11286 : "prologue_cost = %u .\n",
11287 : inside_cost, prologue_cost);
11288 3001 : return true;
11289 3001 : }
11290 :
11291 571260 : aggr_type = vectype;
11292 571260 : if (!costing_p)
11293 160493 : bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
11294 : memory_access_type, loop_lens);
11295 :
11296 571260 : poly_uint64 group_elt = 0;
11297 571260 : unsigned int inside_cost = 0, prologue_cost = 0;
11298 : /* For costing some adjacent vector loads, we'd like to cost with
11299 : the total number of them once instead of cost each one by one. */
11300 571260 : unsigned int n_adjacent_loads = 0;
11301 :
11302 : /* 1. Create the vector or array pointer update chain. */
11303 571260 : if (!costing_p)
11304 : {
11305 160493 : bool simd_lane_access_p
11306 160493 : = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11307 160493 : if (simd_lane_access_p
11308 1629 : && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11309 1629 : && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11310 1629 : && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11311 1629 : && integer_zerop (DR_INIT (first_dr_info->dr))
11312 1629 : && alias_sets_conflict_p (get_alias_set (aggr_type),
11313 1629 : get_alias_set (TREE_TYPE (ref_type)))
11314 160493 : && (alignment_support_scheme == dr_aligned
11315 1629 : || alignment_support_scheme == dr_unaligned_supported))
11316 : {
11317 1629 : dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11318 1629 : dataref_offset = build_int_cst (ref_type, 0);
11319 : }
11320 158864 : else if (diff_first_stmt_info)
11321 : {
11322 3548 : dataref_ptr
11323 3548 : = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11324 : aggr_type, at_loop, offset, &dummy,
11325 : gsi, &ptr_incr, simd_lane_access_p,
11326 : bump);
11327 : /* Adjust the pointer by the difference to first_stmt. */
11328 3548 : data_reference_p ptrdr
11329 : = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11330 3548 : tree diff = fold_convert (sizetype,
11331 : size_binop (MINUS_EXPR,
11332 : DR_INIT (first_dr_info->dr),
11333 : DR_INIT (ptrdr)));
11334 3548 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11335 : stmt_info, diff);
11336 3548 : if (alignment_support_scheme == dr_explicit_realign)
11337 : {
11338 0 : msq = vect_setup_realignment (vinfo, first_stmt_info_for_drptr,
11339 : vectype, gsi,
11340 : &realignment_token,
11341 : alignment_support_scheme,
11342 : dataref_ptr, &at_loop);
11343 0 : gcc_assert (!compute_in_loop);
11344 : }
11345 : }
11346 : else
11347 155316 : dataref_ptr
11348 155316 : = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11349 : at_loop,
11350 : offset, &dummy, gsi, &ptr_incr,
11351 : simd_lane_access_p, bump);
11352 : }
11353 : else if (!costing_p)
11354 : {
11355 : gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11356 : if (dataref_offset)
11357 : dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
11358 : else
11359 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11360 : stmt_info, bump);
11361 : }
11362 :
11363 571260 : auto_vec<tree> dr_chain;
11364 571260 : if (grouped_load || ls.slp_perm)
11365 54339 : dr_chain.create (vec_num);
11366 :
11367 : gimple *new_stmt = NULL;
11368 1489907 : for (i = 0; i < vec_num; i++)
11369 : {
11370 918647 : tree final_mask = NULL_TREE;
11371 918647 : tree final_len = NULL_TREE;
11372 918647 : tree bias = NULL_TREE;
11373 :
11374 918647 : if (!costing_p)
11375 : {
11376 251128 : if (mask_node)
11377 709 : vec_mask = vec_masks[i];
11378 251128 : if (loop_masks)
11379 48 : final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11380 : vec_num, vectype, i);
11381 251128 : if (vec_mask)
11382 709 : final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11383 : final_mask, vec_mask, gsi);
11384 :
11385 251128 : if (i > 0)
11386 90635 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11387 : gsi, stmt_info, bump);
11388 : }
11389 :
11390 : /* 2. Create the vector-load in the loop. */
11391 918647 : switch (alignment_support_scheme)
11392 : {
11393 918647 : case dr_aligned:
11394 918647 : case dr_unaligned_supported:
11395 918647 : {
11396 918647 : if (costing_p)
11397 : break;
11398 :
11399 251128 : unsigned int misalign;
11400 251128 : unsigned HOST_WIDE_INT align;
11401 251128 : align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11402 251128 : if (alignment_support_scheme == dr_aligned)
11403 : misalign = 0;
11404 161486 : else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11405 : {
11406 122764 : align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11407 122764 : misalign = 0;
11408 : }
11409 : else
11410 38722 : misalign = misalignment;
11411 251128 : if (dataref_offset == NULL_TREE
11412 249001 : && TREE_CODE (dataref_ptr) == SSA_NAME)
11413 168903 : set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11414 : misalign);
11415 251128 : align = least_bit_hwi (misalign | align);
11416 :
11417 : /* Compute IFN when LOOP_LENS or final_mask valid. */
11418 251128 : machine_mode vmode = TYPE_MODE (vectype);
11419 251128 : machine_mode new_vmode = vmode;
11420 251128 : internal_fn partial_ifn = IFN_LAST;
11421 251128 : if (loop_lens)
11422 : {
11423 0 : opt_machine_mode new_ovmode
11424 0 : = get_len_load_store_mode (vmode, true, &partial_ifn);
11425 0 : new_vmode = new_ovmode.require ();
11426 0 : unsigned factor
11427 0 : = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11428 0 : final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11429 : vec_num, vectype, i, factor, true);
11430 : }
11431 251128 : else if (final_mask)
11432 : {
11433 737 : if (!can_vec_mask_load_store_p (vmode,
11434 737 : TYPE_MODE
11435 : (TREE_TYPE (final_mask)),
11436 : true, &partial_ifn))
11437 0 : gcc_unreachable ();
11438 : }
11439 :
11440 251128 : if (partial_ifn == IFN_MASK_LEN_LOAD)
11441 : {
11442 0 : if (!final_len)
11443 : {
11444 : /* Pass VF value to 'len' argument of
11445 : MASK_LEN_LOAD if LOOP_LENS is invalid. */
11446 0 : final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11447 : }
11448 0 : if (!final_mask)
11449 : {
11450 : /* Pass all ones value to 'mask' argument of
11451 : MASK_LEN_LOAD if final_mask is invalid. */
11452 0 : mask_vectype = truth_type_for (vectype);
11453 0 : final_mask = build_minus_one_cst (mask_vectype);
11454 : }
11455 : }
11456 251128 : if (final_len)
11457 : {
11458 0 : signed char biasval
11459 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11460 0 : bias = build_int_cst (intQI_type_node, biasval);
11461 : }
11462 :
11463 251128 : tree vec_els;
11464 :
11465 251128 : if (final_len)
11466 : {
11467 0 : tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11468 0 : gcall *call;
11469 :
11470 : /* Need conversion if the vectype is punned by VnQI. */
11471 0 : els_vectype = vectype;
11472 0 : if (vmode != new_vmode)
11473 0 : els_vectype
11474 0 : = build_vector_type_for_mode (unsigned_intQI_type_node,
11475 : new_vmode);
11476 0 : vec_els = vect_get_mask_load_else (maskload_elsval,
11477 : els_vectype);
11478 :
11479 0 : if (partial_ifn == IFN_MASK_LEN_LOAD)
11480 : {
11481 0 : if (type_mode_padding_p
11482 0 : && maskload_elsval != MASK_LOAD_ELSE_ZERO)
11483 0 : need_zeroing = true;
11484 0 : call = gimple_build_call_internal (IFN_MASK_LEN_LOAD,
11485 : 6, dataref_ptr, ptr,
11486 : final_mask, vec_els,
11487 : final_len, bias);
11488 : }
11489 : else
11490 0 : call = gimple_build_call_internal (IFN_LEN_LOAD, 5,
11491 : dataref_ptr, ptr,
11492 : vec_els, final_len,
11493 : bias);
11494 0 : gimple_call_set_nothrow (call, true);
11495 0 : new_stmt = call;
11496 0 : data_ref = NULL_TREE;
11497 :
11498 : /* Need conversion if it's wrapped with VnQI. */
11499 0 : if (vmode != new_vmode)
11500 : {
11501 0 : tree new_vtype
11502 0 : = build_vector_type_for_mode (unsigned_intQI_type_node,
11503 : new_vmode);
11504 0 : tree var = vect_get_new_ssa_name (new_vtype,
11505 : vect_simple_var);
11506 0 : gimple_set_lhs (call, var);
11507 0 : vect_finish_stmt_generation (vinfo, stmt_info, call,
11508 : gsi);
11509 0 : tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11510 0 : new_stmt = gimple_build_assign (vec_dest,
11511 : VIEW_CONVERT_EXPR, op);
11512 : }
11513 : }
11514 251128 : else if (final_mask)
11515 : {
11516 737 : tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11517 737 : vec_els = vect_get_mask_load_else (maskload_elsval, vectype);
11518 737 : if (type_mode_padding_p
11519 737 : && maskload_elsval != MASK_LOAD_ELSE_ZERO)
11520 0 : need_zeroing = true;
11521 737 : gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
11522 : dataref_ptr, ptr,
11523 : final_mask,
11524 : vec_els);
11525 737 : gimple_call_set_nothrow (call, true);
11526 737 : new_stmt = call;
11527 737 : data_ref = NULL_TREE;
11528 : }
11529 : else
11530 : {
11531 250391 : tree ltype = vectype;
11532 250391 : tree new_vtype = NULL_TREE;
11533 250391 : unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11534 250391 : unsigned HOST_WIDE_INT dr_size
11535 250391 : = vect_get_scalar_dr_size (first_dr_info);
11536 250391 : poly_int64 off = 0;
11537 250391 : if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11538 1444 : off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
11539 250391 : unsigned int vect_align
11540 250391 : = vect_known_alignment_in_bytes (first_dr_info, vectype,
11541 250391 : off);
11542 : /* Try to use a single smaller load when we are about
11543 : to load excess elements compared to the unrolled
11544 : scalar loop. */
11545 250391 : if (known_gt ((i + 1) * nunits,
11546 : (group_size * vf - gap)))
11547 : {
11548 6855 : poly_uint64 remain = ((group_size * vf - gap) - i * nunits);
11549 6855 : if (known_ge ((i + 1) * nunits - (group_size * vf - gap),
11550 : nunits))
11551 : /* DR will be unused. */
11552 : ltype = NULL_TREE;
11553 2196 : else if (known_ge (vect_align,
11554 : tree_to_poly_uint64
11555 : (TYPE_SIZE_UNIT (vectype))))
11556 : /* Aligned access to excess elements is OK if
11557 : at least one element is accessed in the
11558 : scalar loop. */
11559 : ;
11560 1851 : else if (known_gt (vect_align,
11561 : ((nunits - remain) * dr_size)))
11562 : /* Aligned access to the gap area when there's
11563 : at least one element in it is OK. */
11564 : ;
11565 : else
11566 : {
11567 : /* remain should now be > 0 and < nunits. */
11568 1848 : unsigned num;
11569 1848 : if (known_ne (remain, 0u)
11570 1848 : && constant_multiple_p (nunits, remain, &num))
11571 : {
11572 1390 : tree ptype;
11573 1390 : new_vtype
11574 1390 : = vector_vector_composition_type (vectype, num,
11575 : &ptype);
11576 1390 : if (new_vtype)
11577 1390 : ltype = ptype;
11578 : }
11579 : /* Else use multiple loads or a masked load? */
11580 : /* For loop vectorization we now should have
11581 : an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
11582 : set. */
11583 1848 : if (loop_vinfo)
11584 1600 : gcc_assert (new_vtype
11585 : || LOOP_VINFO_PEELING_FOR_GAPS
11586 : (loop_vinfo));
11587 : /* But still reduce the access size to the next
11588 : required power-of-two so peeling a single
11589 : scalar iteration is sufficient. */
11590 1848 : unsigned HOST_WIDE_INT cremain;
11591 1848 : if (remain.is_constant (&cremain))
11592 : {
11593 1848 : unsigned HOST_WIDE_INT cpart_size
11594 1848 : = 1 << ceil_log2 (cremain);
11595 1848 : if (known_gt (nunits, cpart_size)
11596 1848 : && constant_multiple_p (nunits, cpart_size,
11597 : &num))
11598 : {
11599 1402 : tree ptype;
11600 1402 : new_vtype
11601 2804 : = vector_vector_composition_type (vectype,
11602 1402 : num,
11603 : &ptype);
11604 1402 : if (new_vtype)
11605 1402 : ltype = ptype;
11606 : }
11607 : }
11608 : }
11609 : }
11610 250391 : tree offset = (dataref_offset ? dataref_offset
11611 248264 : : build_int_cst (ref_type, 0));
11612 250391 : if (!ltype)
11613 : ;
11614 245732 : else if (ltype != vectype
11615 245732 : && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11616 : {
11617 25 : poly_uint64 gap_offset
11618 25 : = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
11619 25 : - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
11620 25 : tree gapcst = build_int_cstu (ref_type, gap_offset);
11621 25 : offset = size_binop (PLUS_EXPR, offset, gapcst);
11622 : }
11623 250391 : if (ltype)
11624 : {
11625 245732 : data_ref = fold_build2 (MEM_REF, ltype,
11626 : dataref_ptr, offset);
11627 245732 : if (alignment_support_scheme == dr_aligned
11628 245732 : && align >= TYPE_ALIGN_UNIT (ltype))
11629 : ;
11630 : else
11631 159780 : TREE_TYPE (data_ref)
11632 319560 : = build_aligned_type (TREE_TYPE (data_ref),
11633 : align * BITS_PER_UNIT);
11634 : }
11635 250391 : if (!ltype)
11636 4659 : data_ref = build_constructor (vectype, NULL);
11637 245732 : else if (ltype != vectype)
11638 : {
11639 1402 : vect_copy_ref_info (data_ref,
11640 1402 : DR_REF (first_dr_info->dr));
11641 1402 : tree tem = make_ssa_name (ltype);
11642 1402 : new_stmt = gimple_build_assign (tem, data_ref);
11643 1402 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11644 : gsi);
11645 1402 : data_ref = NULL;
11646 1402 : vec<constructor_elt, va_gc> *v;
11647 : /* We've computed 'num' above to statically two
11648 : or via constant_multiple_p. */
11649 1402 : unsigned num
11650 1402 : = (exact_div (tree_to_poly_uint64
11651 1402 : (TYPE_SIZE_UNIT (vectype)),
11652 : tree_to_poly_uint64
11653 1402 : (TYPE_SIZE_UNIT (ltype)))
11654 1402 : .to_constant ());
11655 1402 : vec_alloc (v, num);
11656 1402 : if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11657 : {
11658 62 : while (--num)
11659 62 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11660 : build_zero_cst (ltype));
11661 25 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11662 : }
11663 : else
11664 : {
11665 1377 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11666 1377 : while (--num)
11667 3108 : CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11668 : build_zero_cst (ltype));
11669 : }
11670 1402 : gcc_assert (new_vtype != NULL_TREE);
11671 1402 : if (new_vtype == vectype)
11672 1370 : new_stmt
11673 1370 : = gimple_build_assign (vec_dest,
11674 : build_constructor (vectype, v));
11675 : else
11676 : {
11677 32 : tree new_vname = make_ssa_name (new_vtype);
11678 32 : new_stmt
11679 32 : = gimple_build_assign (new_vname,
11680 : build_constructor (new_vtype,
11681 : v));
11682 32 : vect_finish_stmt_generation (vinfo, stmt_info,
11683 : new_stmt, gsi);
11684 32 : new_stmt
11685 32 : = gimple_build_assign (vec_dest,
11686 : build1 (VIEW_CONVERT_EXPR,
11687 : vectype, new_vname));
11688 : }
11689 : }
11690 : }
11691 : break;
11692 : }
11693 0 : case dr_explicit_realign:
11694 0 : {
11695 0 : if (costing_p)
11696 : break;
11697 0 : tree ptr, bump;
11698 :
11699 0 : tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11700 :
11701 0 : if (compute_in_loop)
11702 0 : msq = vect_setup_realignment (vinfo, first_stmt_info, vectype,
11703 : gsi, &realignment_token,
11704 : dr_explicit_realign,
11705 : dataref_ptr, NULL);
11706 :
11707 0 : if (TREE_CODE (dataref_ptr) == SSA_NAME)
11708 0 : ptr = copy_ssa_name (dataref_ptr);
11709 : else
11710 0 : ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11711 : // For explicit realign the target alignment should be
11712 : // known at compile time.
11713 0 : unsigned HOST_WIDE_INT align
11714 0 : = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11715 0 : new_stmt = gimple_build_assign (ptr, BIT_AND_EXPR, dataref_ptr,
11716 : build_int_cst
11717 0 : (TREE_TYPE (dataref_ptr),
11718 0 : -(HOST_WIDE_INT) align));
11719 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11720 0 : data_ref = build2 (MEM_REF, vectype,
11721 : ptr, build_int_cst (ref_type, 0));
11722 0 : vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11723 0 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
11724 0 : new_stmt = gimple_build_assign (vec_dest, data_ref);
11725 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
11726 0 : gimple_assign_set_lhs (new_stmt, new_temp);
11727 0 : gimple_move_vops (new_stmt, stmt_info->stmt);
11728 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11729 0 : msq = new_temp;
11730 :
11731 0 : bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11732 0 : bump = size_binop (MINUS_EXPR, bump, size_one_node);
11733 0 : ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11734 : bump);
11735 0 : new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR, ptr,
11736 0 : build_int_cst (TREE_TYPE (ptr),
11737 0 : -(HOST_WIDE_INT) align));
11738 0 : if (TREE_CODE (ptr) == SSA_NAME)
11739 0 : ptr = copy_ssa_name (ptr, new_stmt);
11740 : else
11741 0 : ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11742 0 : gimple_assign_set_lhs (new_stmt, ptr);
11743 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11744 0 : data_ref = build2 (MEM_REF, vectype,
11745 : ptr, build_int_cst (ref_type, 0));
11746 0 : break;
11747 : }
11748 0 : case dr_explicit_realign_optimized:
11749 0 : {
11750 0 : if (costing_p)
11751 : break;
11752 0 : if (TREE_CODE (dataref_ptr) == SSA_NAME)
11753 0 : new_temp = copy_ssa_name (dataref_ptr);
11754 : else
11755 0 : new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11756 : // We should only be doing this if we know the target
11757 : // alignment at compile time.
11758 0 : unsigned HOST_WIDE_INT align
11759 0 : = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11760 0 : new_stmt = gimple_build_assign (new_temp, BIT_AND_EXPR, dataref_ptr,
11761 0 : build_int_cst (TREE_TYPE (dataref_ptr),
11762 0 : -(HOST_WIDE_INT) align));
11763 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11764 0 : data_ref = build2 (MEM_REF, vectype, new_temp,
11765 : build_int_cst (ref_type, 0));
11766 0 : break;
11767 : }
11768 0 : default:
11769 0 : gcc_unreachable ();
11770 : }
11771 :
11772 : /* One common place to cost the above vect load for different
11773 : alignment support schemes. */
11774 918647 : if (costing_p)
11775 : {
11776 : /* For the prologue cost for realign,
11777 : we only need to count it once for the whole group. */
11778 667519 : bool first_stmt_info_p = first_stmt_info == stmt_info;
11779 667519 : bool add_realign_cost = first_stmt_info_p && i == 0;
11780 667519 : if (memory_access_type == VMAT_CONTIGUOUS
11781 667519 : || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11782 : {
11783 : /* Leave realign cases alone to keep them simple. */
11784 667519 : if (alignment_support_scheme == dr_explicit_realign_optimized
11785 : || alignment_support_scheme == dr_explicit_realign)
11786 0 : vect_get_load_cost (vinfo, stmt_info, slp_node, 1,
11787 : alignment_support_scheme, misalignment,
11788 : add_realign_cost, &inside_cost,
11789 : &prologue_cost, cost_vec, cost_vec,
11790 : true);
11791 : else
11792 667519 : n_adjacent_loads++;
11793 : }
11794 : }
11795 : else
11796 : {
11797 251128 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
11798 : /* DATA_REF is null if we've already built the statement. */
11799 251128 : if (data_ref)
11800 : {
11801 248989 : vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11802 248989 : new_stmt = gimple_build_assign (vec_dest, data_ref);
11803 : }
11804 :
11805 502256 : new_temp = (need_zeroing
11806 251128 : ? make_ssa_name (vectype)
11807 251128 : : make_ssa_name (vec_dest, new_stmt));
11808 251128 : gimple_set_lhs (new_stmt, new_temp);
11809 251128 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11810 :
11811 : /* If we need to explicitly zero inactive elements emit a
11812 : VEC_COND_EXPR that does so. */
11813 251128 : if (need_zeroing)
11814 : {
11815 0 : vec_els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO,
11816 : vectype);
11817 :
11818 0 : tree new_temp2 = make_ssa_name (vec_dest, new_stmt);
11819 0 : new_stmt = gimple_build_assign (new_temp2, VEC_COND_EXPR,
11820 : final_mask, new_temp, vec_els);
11821 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11822 : gsi);
11823 0 : new_temp = new_temp2;
11824 : }
11825 : }
11826 :
11827 : /* 3. Handle explicit realignment if necessary/supported.
11828 : Create in loop:
11829 : vec_dest = realign_load (msq, lsq, realignment_token) */
11830 918647 : if (!costing_p
11831 251128 : && (alignment_support_scheme == dr_explicit_realign_optimized
11832 : || alignment_support_scheme == dr_explicit_realign))
11833 : {
11834 0 : lsq = gimple_assign_lhs (new_stmt);
11835 0 : if (!realignment_token)
11836 0 : realignment_token = dataref_ptr;
11837 0 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
11838 0 : new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11839 : lsq, realignment_token);
11840 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
11841 0 : gimple_assign_set_lhs (new_stmt, new_temp);
11842 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11843 :
11844 0 : if (alignment_support_scheme == dr_explicit_realign_optimized)
11845 : {
11846 0 : gcc_assert (phi);
11847 0 : if (i == vec_num - 1)
11848 0 : add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11849 : UNKNOWN_LOCATION);
11850 : msq = lsq;
11851 : }
11852 : }
11853 :
11854 918647 : if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11855 : {
11856 5939 : if (costing_p)
11857 4495 : inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11858 : slp_node, 0, vect_body);
11859 : else
11860 : {
11861 1444 : tree perm_mask = perm_mask_for_reverse (vectype);
11862 1444 : new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11863 : perm_mask, stmt_info, gsi);
11864 1444 : new_stmt = SSA_NAME_DEF_STMT (new_temp);
11865 : }
11866 : }
11867 :
11868 : /* Collect vector loads and later create their permutation in
11869 : vect_transform_slp_perm_load. */
11870 918647 : if (!costing_p && (grouped_load || ls.slp_perm))
11871 71890 : dr_chain.quick_push (new_temp);
11872 :
11873 : /* Store vector loads in the corresponding SLP_NODE. */
11874 251128 : if (!costing_p && !ls.slp_perm)
11875 179238 : slp_node->push_vec_def (new_stmt);
11876 :
11877 : /* With SLP permutation we load the gaps as well, without
11878 : we need to skip the gaps after we manage to fully load
11879 : all elements. group_gap_adj is DR_GROUP_SIZE here. */
11880 918647 : group_elt += nunits;
11881 918647 : if (!costing_p
11882 251128 : && maybe_ne (group_gap_adj, 0U)
11883 45613 : && !ls.slp_perm
11884 939397 : && known_eq (group_elt, group_size - group_gap_adj))
11885 : {
11886 16096 : poly_wide_int bump_val
11887 16096 : = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11888 16096 : if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11889 0 : bump_val = -bump_val;
11890 16096 : tree bump = wide_int_to_tree (sizetype, bump_val);
11891 16096 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11892 : stmt_info, bump);
11893 16096 : group_elt = 0;
11894 16096 : }
11895 : }
11896 : /* Bump the vector pointer to account for a gap or for excess
11897 : elements loaded for a permuted SLP load. */
11898 571260 : if (!costing_p
11899 160493 : && maybe_ne (group_gap_adj, 0U)
11900 587805 : && ls.slp_perm)
11901 : {
11902 449 : poly_wide_int bump_val
11903 449 : = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11904 449 : if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11905 9 : bump_val = -bump_val;
11906 449 : tree bump = wide_int_to_tree (sizetype, bump_val);
11907 449 : dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11908 : stmt_info, bump);
11909 449 : }
11910 :
11911 571260 : if (ls.slp_perm)
11912 : {
11913 : /* For SLP we know we've seen all possible uses of dr_chain so
11914 : direct vect_transform_slp_perm_load to DCE the unused parts.
11915 : ??? This is a hack to prevent compile-time issues as seen
11916 : in PR101120 and friends. */
11917 54339 : if (costing_p)
11918 : {
11919 37397 : gcc_assert (ls.n_perms != -1U && ls.n_loads != -1U);
11920 37397 : if (ls.n_perms != 0)
11921 36887 : inside_cost = record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
11922 : slp_node, 0, vect_body);
11923 37397 : if (n_adjacent_loads > 0)
11924 37397 : n_adjacent_loads = ls.n_loads;
11925 : }
11926 : else
11927 : {
11928 16942 : unsigned n_perms2, n_loads2;
11929 16942 : bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11930 : gsi, vf, false, &n_perms2,
11931 : &n_loads2, true);
11932 16942 : gcc_assert (ok && ls.n_perms == n_perms2 && ls.n_loads == n_loads2);
11933 : }
11934 : }
11935 :
11936 571260 : if (costing_p)
11937 : {
11938 410767 : gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11939 : || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
11940 410767 : if (n_adjacent_loads > 0)
11941 410767 : vect_get_load_cost (vinfo, stmt_info, slp_node, n_adjacent_loads,
11942 : alignment_support_scheme, misalignment, false,
11943 : &inside_cost, &prologue_cost, cost_vec, cost_vec,
11944 : true);
11945 410767 : if (dump_enabled_p ())
11946 24024 : dump_printf_loc (MSG_NOTE, vect_location,
11947 : "vect_model_load_cost: inside_cost = %u, "
11948 : "prologue_cost = %u .\n",
11949 : inside_cost, prologue_cost);
11950 : }
11951 :
11952 571260 : return true;
11953 1853198 : }
11954 :
11955 : /* Function vect_is_simple_cond.
11956 :
11957 : Input:
11958 : LOOP - the loop that is being vectorized.
11959 : COND - Condition that is checked for simple use.
11960 :
11961 : Output:
11962 : *COMP_VECTYPE - the vector type for the comparison.
11963 : *DTS - The def types for the arguments of the comparison
11964 :
11965 : Returns whether a COND can be vectorized. Checks whether
11966 : condition operands are supportable using vec_is_simple_use. */
11967 :
11968 : static bool
11969 34861 : vect_is_simple_cond (tree cond, vec_info *vinfo,
11970 : slp_tree slp_node, tree *comp_vectype,
11971 : enum vect_def_type *dts, tree vectype)
11972 : {
11973 34861 : tree lhs, rhs;
11974 34861 : tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11975 34861 : slp_tree slp_op;
11976 :
11977 : /* Mask case. */
11978 34861 : if (TREE_CODE (cond) == SSA_NAME
11979 34861 : && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11980 : {
11981 34849 : if (!vect_is_simple_use (vinfo, slp_node, 0, &cond,
11982 : &slp_op, &dts[0], comp_vectype)
11983 34849 : || !*comp_vectype
11984 69683 : || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11985 : return false;
11986 : return true;
11987 : }
11988 :
11989 12 : if (!COMPARISON_CLASS_P (cond))
11990 : return false;
11991 :
11992 0 : lhs = TREE_OPERAND (cond, 0);
11993 0 : rhs = TREE_OPERAND (cond, 1);
11994 :
11995 0 : if (TREE_CODE (lhs) == SSA_NAME)
11996 : {
11997 0 : if (!vect_is_simple_use (vinfo, slp_node, 0,
11998 : &lhs, &slp_op, &dts[0], &vectype1))
11999 : return false;
12000 : }
12001 0 : else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
12002 0 : || TREE_CODE (lhs) == FIXED_CST)
12003 0 : dts[0] = vect_constant_def;
12004 : else
12005 : return false;
12006 :
12007 0 : if (TREE_CODE (rhs) == SSA_NAME)
12008 : {
12009 0 : if (!vect_is_simple_use (vinfo, slp_node, 1,
12010 : &rhs, &slp_op, &dts[1], &vectype2))
12011 : return false;
12012 : }
12013 0 : else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
12014 0 : || TREE_CODE (rhs) == FIXED_CST)
12015 0 : dts[1] = vect_constant_def;
12016 : else
12017 : return false;
12018 :
12019 0 : if (vectype1 && vectype2
12020 0 : && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12021 0 : TYPE_VECTOR_SUBPARTS (vectype2)))
12022 0 : return false;
12023 :
12024 0 : *comp_vectype = vectype1 ? vectype1 : vectype2;
12025 : /* Invariant comparison. */
12026 0 : if (! *comp_vectype)
12027 : {
12028 0 : tree scalar_type = TREE_TYPE (lhs);
12029 0 : if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12030 0 : *comp_vectype = truth_type_for (vectype);
12031 : else
12032 : {
12033 : /* If we can widen the comparison to match vectype do so. */
12034 0 : if (INTEGRAL_TYPE_P (scalar_type)
12035 0 : && !slp_node
12036 0 : && tree_int_cst_lt (TYPE_SIZE (scalar_type),
12037 0 : TYPE_SIZE (TREE_TYPE (vectype))))
12038 0 : scalar_type = build_nonstandard_integer_type
12039 0 : (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
12040 0 : *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12041 : slp_node);
12042 : }
12043 : }
12044 :
12045 : return true;
12046 : }
12047 :
12048 : /* vectorizable_condition.
12049 :
12050 : Check if STMT_INFO is conditional modify expression that can be vectorized.
12051 : If COST_VEC is passed, calculate costs but don't change anything,
12052 : otherwise, vectorize STMT_INFO: create a vectorized stmt using
12053 : VEC_COND_EXPR to replace it, and insert it at GSI.
12054 :
12055 : When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12056 :
12057 : Return true if STMT_INFO is vectorizable in this way. */
12058 :
12059 : static bool
12060 673215 : vectorizable_condition (vec_info *vinfo,
12061 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12062 : slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12063 : {
12064 673215 : tree scalar_dest = NULL_TREE;
12065 673215 : tree vec_dest = NULL_TREE;
12066 673215 : tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
12067 673215 : tree then_clause, else_clause;
12068 673215 : tree comp_vectype = NULL_TREE;
12069 673215 : tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
12070 673215 : tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
12071 673215 : tree vec_compare;
12072 673215 : tree new_temp;
12073 673215 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12074 673215 : enum vect_def_type dts[4]
12075 : = {vect_unknown_def_type, vect_unknown_def_type,
12076 : vect_unknown_def_type, vect_unknown_def_type};
12077 673215 : enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12078 673215 : int i;
12079 673215 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12080 673215 : vec<tree> vec_oprnds0 = vNULL;
12081 673215 : vec<tree> vec_oprnds1 = vNULL;
12082 673215 : vec<tree> vec_oprnds2 = vNULL;
12083 673215 : vec<tree> vec_oprnds3 = vNULL;
12084 673215 : tree vec_cmp_type;
12085 673215 : bool masked = false;
12086 :
12087 673215 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12088 : return false;
12089 :
12090 : /* Is vectorizable conditional operation? */
12091 1016701 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12092 378314 : if (!stmt)
12093 : return false;
12094 :
12095 378314 : code = gimple_assign_rhs_code (stmt);
12096 378314 : if (code != COND_EXPR)
12097 : return false;
12098 :
12099 34861 : int reduc_index = SLP_TREE_REDUC_IDX (slp_node);
12100 34861 : vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
12101 34861 : bool nested_cycle_p = false;
12102 34861 : bool for_reduction = vect_is_reduction (stmt_info);
12103 34861 : if (for_reduction)
12104 : {
12105 574 : if (SLP_TREE_LANES (slp_node) > 1)
12106 : return false;
12107 : /* ??? With a reduction path we do not get at the reduction info from
12108 : every stmt, use the conservative default setting then. */
12109 654 : if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
12110 : {
12111 556 : vect_reduc_info reduc_info
12112 556 : = info_for_reduction (loop_vinfo, slp_node);
12113 556 : reduction_type = VECT_REDUC_INFO_TYPE (reduc_info);
12114 556 : nested_cycle_p = nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
12115 : stmt_info);
12116 : }
12117 : }
12118 : else
12119 : {
12120 34287 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12121 : return false;
12122 : }
12123 :
12124 34861 : tree vectype = SLP_TREE_VECTYPE (slp_node);
12125 34861 : tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12126 :
12127 34861 : int vec_num = vect_get_num_copies (vinfo, slp_node);
12128 :
12129 34861 : cond_expr = gimple_assign_rhs1 (stmt);
12130 34861 : gcc_assert (! COMPARISON_CLASS_P (cond_expr));
12131 :
12132 34861 : if (!vect_is_simple_cond (cond_expr, vinfo, slp_node,
12133 : &comp_vectype, &dts[0], vectype)
12134 34861 : || !comp_vectype)
12135 : return false;
12136 :
12137 34834 : unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12138 34834 : slp_tree then_slp_node, else_slp_node;
12139 34834 : if (!vect_is_simple_use (vinfo, slp_node, 1 + op_adjust,
12140 : &then_clause, &then_slp_node, &dts[2], &vectype1))
12141 : return false;
12142 34834 : if (!vect_is_simple_use (vinfo, slp_node, 2 + op_adjust,
12143 : &else_clause, &else_slp_node, &dts[3], &vectype2))
12144 : return false;
12145 :
12146 34834 : if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12147 : return false;
12148 :
12149 34834 : if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12150 : return false;
12151 :
12152 34834 : masked = !COMPARISON_CLASS_P (cond_expr);
12153 34834 : vec_cmp_type = truth_type_for (comp_vectype);
12154 34834 : if (vec_cmp_type == NULL_TREE
12155 69668 : || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
12156 34834 : TYPE_VECTOR_SUBPARTS (vec_cmp_type)))
12157 0 : return false;
12158 :
12159 34834 : cond_code = TREE_CODE (cond_expr);
12160 34834 : if (!masked)
12161 : {
12162 0 : cond_expr0 = TREE_OPERAND (cond_expr, 0);
12163 0 : cond_expr1 = TREE_OPERAND (cond_expr, 1);
12164 : }
12165 :
12166 : /* For conditional reductions, the "then" value needs to be the candidate
12167 : value calculated by this iteration while the "else" value needs to be
12168 : the result carried over from previous iterations. If the COND_EXPR
12169 : is the other way around, we need to swap it. */
12170 34834 : bool must_invert_cmp_result = false;
12171 34834 : if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12172 : {
12173 0 : if (masked)
12174 0 : must_invert_cmp_result = true;
12175 : else
12176 : {
12177 0 : bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12178 0 : tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12179 0 : if (new_code == ERROR_MARK)
12180 : must_invert_cmp_result = true;
12181 : else
12182 : {
12183 0 : cond_code = new_code;
12184 : /* Make sure we don't accidentally use the old condition. */
12185 0 : cond_expr = NULL_TREE;
12186 : }
12187 : }
12188 : /* ??? The vectorized operand query below doesn't allow swapping
12189 : this way for SLP. */
12190 0 : return false;
12191 : /* std::swap (then_clause, else_clause); */
12192 : }
12193 :
12194 34834 : if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12195 : {
12196 : /* Boolean values may have another representation in vectors
12197 : and therefore we prefer bit operations over comparison for
12198 : them (which also works for scalar masks). We store opcodes
12199 : to use in bitop1 and bitop2. Statement is vectorized as
12200 : BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12201 : depending on bitop1 and bitop2 arity. */
12202 0 : switch (cond_code)
12203 : {
12204 : case GT_EXPR:
12205 : bitop1 = BIT_NOT_EXPR;
12206 : bitop2 = BIT_AND_EXPR;
12207 : break;
12208 0 : case GE_EXPR:
12209 0 : bitop1 = BIT_NOT_EXPR;
12210 0 : bitop2 = BIT_IOR_EXPR;
12211 0 : break;
12212 0 : case LT_EXPR:
12213 0 : bitop1 = BIT_NOT_EXPR;
12214 0 : bitop2 = BIT_AND_EXPR;
12215 0 : std::swap (cond_expr0, cond_expr1);
12216 0 : break;
12217 0 : case LE_EXPR:
12218 0 : bitop1 = BIT_NOT_EXPR;
12219 0 : bitop2 = BIT_IOR_EXPR;
12220 0 : std::swap (cond_expr0, cond_expr1);
12221 0 : break;
12222 0 : case NE_EXPR:
12223 0 : bitop1 = BIT_XOR_EXPR;
12224 0 : break;
12225 0 : case EQ_EXPR:
12226 0 : bitop1 = BIT_XOR_EXPR;
12227 0 : bitop2 = BIT_NOT_EXPR;
12228 0 : break;
12229 : default:
12230 : return false;
12231 : }
12232 : cond_code = SSA_NAME;
12233 : }
12234 :
12235 34834 : if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12236 0 : && reduction_type == EXTRACT_LAST_REDUCTION
12237 34834 : && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12238 : {
12239 0 : if (dump_enabled_p ())
12240 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12241 : "reduction comparison operation not supported.\n");
12242 0 : return false;
12243 : }
12244 :
12245 34834 : if (cost_vec)
12246 : {
12247 26334 : if (bitop1 != NOP_EXPR)
12248 : {
12249 0 : machine_mode mode = TYPE_MODE (comp_vectype);
12250 0 : optab optab;
12251 :
12252 0 : optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12253 0 : if (!optab || !can_implement_p (optab, mode))
12254 0 : return false;
12255 :
12256 0 : if (bitop2 != NOP_EXPR)
12257 : {
12258 0 : optab = optab_for_tree_code (bitop2, comp_vectype,
12259 : optab_default);
12260 0 : if (!optab || !can_implement_p (optab, mode))
12261 0 : return false;
12262 : }
12263 : }
12264 :
12265 26334 : vect_cost_for_stmt kind = vector_stmt;
12266 26334 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12267 : /* Count one reduction-like operation per vector. */
12268 : kind = vec_to_scalar;
12269 26334 : else if ((masked && !expand_vec_cond_expr_p (vectype, comp_vectype))
12270 26334 : || (!masked
12271 0 : && (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12272 : cond_code)
12273 0 : || !expand_vec_cond_expr_p (vectype, vec_cmp_type))))
12274 6 : return false;
12275 :
12276 26328 : if (!vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
12277 : comp_vectype)
12278 26328 : || (op_adjust == 1
12279 0 : && !vect_maybe_update_slp_op_vectype
12280 0 : (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12281 26328 : || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12282 52656 : || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype))
12283 : {
12284 0 : if (dump_enabled_p ())
12285 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12286 : "incompatible vector types for invariants\n");
12287 0 : return false;
12288 : }
12289 :
12290 26328 : if (loop_vinfo && for_reduction
12291 427 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12292 : {
12293 68 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12294 : {
12295 0 : if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12296 : vectype, OPTIMIZE_FOR_SPEED))
12297 0 : vect_record_loop_len (loop_vinfo,
12298 : &LOOP_VINFO_LENS (loop_vinfo),
12299 : vec_num, vectype, 1);
12300 : else
12301 0 : vect_record_loop_mask (loop_vinfo,
12302 : &LOOP_VINFO_MASKS (loop_vinfo),
12303 : vec_num, vectype, NULL);
12304 : }
12305 : /* Extra inactive lanes should be safe for vect_nested_cycle. */
12306 68 : else if (!nested_cycle_p)
12307 : {
12308 68 : if (dump_enabled_p ())
12309 8 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12310 : "conditional reduction prevents the use"
12311 : " of partial vectors.\n");
12312 68 : LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12313 : }
12314 : }
12315 :
12316 26328 : SLP_TREE_TYPE (slp_node) = condition_vec_info_type;
12317 26328 : vect_model_simple_cost (vinfo, 1, slp_node, cost_vec, kind);
12318 26328 : return true;
12319 : }
12320 :
12321 : /* Transform. */
12322 :
12323 : /* Handle def. */
12324 8500 : scalar_dest = gimple_assign_lhs (stmt);
12325 8500 : if (reduction_type != EXTRACT_LAST_REDUCTION)
12326 8500 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
12327 :
12328 8500 : bool swap_cond_operands = false;
12329 :
12330 : /* See whether another part of the vectorized code applies a loop
12331 : mask to the condition, or to its inverse. */
12332 :
12333 8500 : vec_loop_masks *masks = NULL;
12334 8500 : vec_loop_lens *lens = NULL;
12335 8500 : if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12336 : {
12337 0 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12338 0 : lens = &LOOP_VINFO_LENS (loop_vinfo);
12339 : }
12340 8500 : else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12341 : {
12342 3 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12343 0 : masks = &LOOP_VINFO_MASKS (loop_vinfo);
12344 : else
12345 : {
12346 3 : scalar_cond_masked_key cond (cond_expr, 1);
12347 3 : if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12348 0 : masks = &LOOP_VINFO_MASKS (loop_vinfo);
12349 : else
12350 : {
12351 3 : bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12352 3 : tree_code orig_code = cond.code;
12353 3 : cond.code = invert_tree_comparison (cond.code, honor_nans);
12354 3 : if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12355 : {
12356 0 : masks = &LOOP_VINFO_MASKS (loop_vinfo);
12357 0 : cond_code = cond.code;
12358 0 : swap_cond_operands = true;
12359 : }
12360 : else
12361 : {
12362 : /* Try the inverse of the current mask. We check if the
12363 : inverse mask is live and if so we generate a negate of
12364 : the current mask such that we still honor NaNs. */
12365 3 : cond.inverted_p = true;
12366 3 : cond.code = orig_code;
12367 3 : if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12368 : {
12369 0 : masks = &LOOP_VINFO_MASKS (loop_vinfo);
12370 0 : cond_code = cond.code;
12371 0 : swap_cond_operands = true;
12372 0 : must_invert_cmp_result = true;
12373 : }
12374 : }
12375 : }
12376 : }
12377 : }
12378 :
12379 : /* Handle cond expr. */
12380 8500 : if (masked)
12381 8500 : vect_get_vec_defs (vinfo, slp_node,
12382 : cond_expr, &vec_oprnds0,
12383 : then_clause, &vec_oprnds2,
12384 : reduction_type != EXTRACT_LAST_REDUCTION
12385 : ? else_clause : NULL, &vec_oprnds3);
12386 : else
12387 0 : vect_get_vec_defs (vinfo, slp_node,
12388 : cond_expr0, &vec_oprnds0,
12389 : cond_expr1, &vec_oprnds1,
12390 : then_clause, &vec_oprnds2,
12391 : reduction_type != EXTRACT_LAST_REDUCTION
12392 : ? else_clause : NULL, &vec_oprnds3);
12393 :
12394 8500 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12395 0 : vec_else_clause = else_clause;
12396 :
12397 : /* Arguments are ready. Create the new vector stmt. */
12398 20009 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12399 : {
12400 11509 : vec_then_clause = vec_oprnds2[i];
12401 11509 : if (reduction_type != EXTRACT_LAST_REDUCTION)
12402 11509 : vec_else_clause = vec_oprnds3[i];
12403 :
12404 11509 : if (swap_cond_operands)
12405 0 : std::swap (vec_then_clause, vec_else_clause);
12406 :
12407 11509 : if (masked)
12408 : vec_compare = vec_cond_lhs;
12409 : else
12410 : {
12411 0 : vec_cond_rhs = vec_oprnds1[i];
12412 0 : if (bitop1 == NOP_EXPR)
12413 : {
12414 0 : gimple_seq stmts = NULL;
12415 0 : vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12416 : vec_cond_lhs, vec_cond_rhs);
12417 0 : gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12418 : }
12419 : else
12420 : {
12421 0 : new_temp = make_ssa_name (vec_cmp_type);
12422 0 : gassign *new_stmt;
12423 0 : if (bitop1 == BIT_NOT_EXPR)
12424 0 : new_stmt = gimple_build_assign (new_temp, bitop1,
12425 : vec_cond_rhs);
12426 : else
12427 0 : new_stmt
12428 0 : = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12429 : vec_cond_rhs);
12430 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12431 0 : if (bitop2 == NOP_EXPR)
12432 : vec_compare = new_temp;
12433 0 : else if (bitop2 == BIT_NOT_EXPR
12434 0 : && reduction_type != EXTRACT_LAST_REDUCTION)
12435 : {
12436 : /* Instead of doing ~x ? y : z do x ? z : y. */
12437 : vec_compare = new_temp;
12438 : std::swap (vec_then_clause, vec_else_clause);
12439 : }
12440 : else
12441 : {
12442 0 : vec_compare = make_ssa_name (vec_cmp_type);
12443 0 : if (bitop2 == BIT_NOT_EXPR)
12444 0 : new_stmt
12445 0 : = gimple_build_assign (vec_compare, bitop2, new_temp);
12446 : else
12447 0 : new_stmt
12448 0 : = gimple_build_assign (vec_compare, bitop2,
12449 : vec_cond_lhs, new_temp);
12450 0 : vect_finish_stmt_generation (vinfo, stmt_info,
12451 : new_stmt, gsi);
12452 : }
12453 : }
12454 : }
12455 :
12456 : /* If we decided to apply a loop mask to the result of the vector
12457 : comparison, AND the comparison with the mask now. Later passes
12458 : should then be able to reuse the AND results between mulitple
12459 : vector statements.
12460 :
12461 : For example:
12462 : for (int i = 0; i < 100; ++i)
12463 : x[i] = y[i] ? z[i] : 10;
12464 :
12465 : results in following optimized GIMPLE:
12466 :
12467 : mask__35.8_43 = vect__4.7_41 != { 0, ... };
12468 : vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12469 : _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12470 : vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12471 : vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12472 : vect_iftmp.11_47, { 10, ... }>;
12473 :
12474 : instead of using a masked and unmasked forms of
12475 : vec != { 0, ... } (masked in the MASK_LOAD,
12476 : unmasked in the VEC_COND_EXPR). */
12477 :
12478 : /* Force vec_compare to be an SSA_NAME rather than a comparison,
12479 : in cases where that's necessary. */
12480 :
12481 11509 : tree len = NULL_TREE, bias = NULL_TREE;
12482 11509 : if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12483 : {
12484 0 : if (!is_gimple_val (vec_compare))
12485 : {
12486 0 : tree vec_compare_name = make_ssa_name (vec_cmp_type);
12487 0 : gassign *new_stmt = gimple_build_assign (vec_compare_name,
12488 : vec_compare);
12489 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12490 0 : vec_compare = vec_compare_name;
12491 : }
12492 :
12493 0 : if (must_invert_cmp_result)
12494 : {
12495 0 : tree vec_compare_name = make_ssa_name (vec_cmp_type);
12496 0 : gassign *new_stmt = gimple_build_assign (vec_compare_name,
12497 : BIT_NOT_EXPR,
12498 : vec_compare);
12499 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12500 0 : vec_compare = vec_compare_name;
12501 : }
12502 :
12503 0 : if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12504 : vectype, OPTIMIZE_FOR_SPEED))
12505 : {
12506 0 : if (lens)
12507 : {
12508 : /* ??? Do we really want the adjusted LEN here? Isn't this
12509 : based on number of elements? */
12510 0 : len = vect_get_loop_len (loop_vinfo, gsi, lens,
12511 : vec_num, vectype, i, 1, true);
12512 0 : signed char biasval
12513 0 : = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12514 0 : bias = build_int_cst (intQI_type_node, biasval);
12515 : }
12516 : else
12517 : {
12518 0 : len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12519 0 : bias = build_int_cst (intQI_type_node, 0);
12520 : }
12521 : }
12522 0 : if (masks)
12523 : {
12524 0 : tree loop_mask
12525 0 : = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num,
12526 : vectype, i);
12527 0 : tree tmp2 = make_ssa_name (vec_cmp_type);
12528 0 : gassign *g
12529 0 : = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12530 : loop_mask);
12531 0 : vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12532 0 : vec_compare = tmp2;
12533 : }
12534 : }
12535 :
12536 0 : gimple *new_stmt;
12537 0 : if (reduction_type == EXTRACT_LAST_REDUCTION)
12538 : {
12539 0 : gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12540 0 : tree lhs = gimple_get_lhs (old_stmt);
12541 0 : if ((unsigned)i != vec_oprnds0.length () - 1)
12542 0 : lhs = copy_ssa_name (lhs);
12543 0 : if (len)
12544 0 : new_stmt = gimple_build_call_internal
12545 0 : (IFN_LEN_FOLD_EXTRACT_LAST, 5, vec_else_clause, vec_compare,
12546 : vec_then_clause, len, bias);
12547 : else
12548 0 : new_stmt = gimple_build_call_internal
12549 0 : (IFN_FOLD_EXTRACT_LAST, 3, vec_else_clause, vec_compare,
12550 : vec_then_clause);
12551 0 : gimple_call_set_lhs (new_stmt, lhs);
12552 0 : SSA_NAME_DEF_STMT (lhs) = new_stmt;
12553 0 : if ((unsigned)i != vec_oprnds0.length () - 1)
12554 : {
12555 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12556 0 : vec_else_clause = lhs;
12557 : }
12558 0 : else if (old_stmt == gsi_stmt (*gsi))
12559 0 : vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12560 : else
12561 : {
12562 : /* In this case we're moving the definition to later in the
12563 : block. That doesn't matter because the only uses of the
12564 : lhs are in phi statements. */
12565 0 : gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12566 0 : gsi_remove (&old_gsi, true);
12567 0 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12568 : }
12569 : }
12570 : else
12571 : {
12572 11509 : new_temp = make_ssa_name (vec_dest);
12573 11509 : new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12574 : vec_then_clause, vec_else_clause);
12575 11509 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12576 : }
12577 11509 : slp_node->push_vec_def (new_stmt);
12578 : }
12579 :
12580 8500 : vec_oprnds0.release ();
12581 8500 : vec_oprnds1.release ();
12582 8500 : vec_oprnds2.release ();
12583 8500 : vec_oprnds3.release ();
12584 :
12585 8500 : return true;
12586 : }
12587 :
12588 : /* Helper of vectorizable_comparison.
12589 :
12590 : Check if STMT_INFO is comparison expression CODE that can be vectorized.
12591 : If COST_VEC is passed, calculate costs but don't change anything,
12592 : otherwise, vectorize STMT_INFO: create a vectorized comparison, and insert
12593 : it at GSI.
12594 :
12595 : Return true if STMT_INFO is vectorizable in this way. */
12596 :
12597 : static bool
12598 351766 : vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12599 : stmt_vec_info stmt_info, tree_code code,
12600 : gimple_stmt_iterator *gsi,
12601 : slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12602 : {
12603 351766 : tree lhs, rhs1, rhs2;
12604 351766 : tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12605 351766 : tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12606 351766 : tree new_temp;
12607 351766 : enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12608 351766 : poly_uint64 nunits;
12609 351766 : enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12610 351766 : int i;
12611 351766 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12612 351766 : vec<tree> vec_oprnds0 = vNULL;
12613 351766 : vec<tree> vec_oprnds1 = vNULL;
12614 351766 : tree mask_type;
12615 351766 : tree mask = NULL_TREE;
12616 :
12617 351766 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12618 : return false;
12619 :
12620 351766 : if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12621 : return false;
12622 :
12623 158399 : mask_type = vectype;
12624 158399 : nunits = TYPE_VECTOR_SUBPARTS (vectype);
12625 :
12626 158399 : if (TREE_CODE_CLASS (code) != tcc_comparison)
12627 : return false;
12628 :
12629 156580 : slp_tree slp_rhs1, slp_rhs2;
12630 156580 : if (!vect_is_simple_use (vinfo, slp_node,
12631 : 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12632 : return false;
12633 :
12634 156580 : if (!vect_is_simple_use (vinfo, slp_node,
12635 : 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12636 : return false;
12637 :
12638 121736 : if (vectype1 && vectype2
12639 229029 : && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12640 72449 : TYPE_VECTOR_SUBPARTS (vectype2)))
12641 16 : return false;
12642 :
12643 156564 : vectype = vectype1 ? vectype1 : vectype2;
12644 :
12645 : /* Invariant comparison. */
12646 156564 : if (!vectype)
12647 : {
12648 30014 : vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), slp_node);
12649 30014 : if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12650 7 : return false;
12651 : }
12652 126550 : else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12653 : return false;
12654 :
12655 : /* Can't compare mask and non-mask types. */
12656 121720 : if (vectype1 && vectype2
12657 373312 : && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12658 : return false;
12659 :
12660 : /* Boolean values may have another representation in vectors
12661 : and therefore we prefer bit operations over comparison for
12662 : them (which also works for scalar masks). We store opcodes
12663 : to use in bitop1 and bitop2. Statement is vectorized as
12664 : BITOP2 (rhs1 BITOP1 rhs2) or
12665 : rhs1 BITOP2 (BITOP1 rhs2)
12666 : depending on bitop1 and bitop2 arity. */
12667 156549 : bool swap_p = false;
12668 156549 : if (VECTOR_BOOLEAN_TYPE_P (vectype))
12669 : {
12670 646 : if (code == GT_EXPR)
12671 : {
12672 : bitop1 = BIT_NOT_EXPR;
12673 : bitop2 = BIT_AND_EXPR;
12674 : }
12675 : else if (code == GE_EXPR)
12676 : {
12677 : bitop1 = BIT_NOT_EXPR;
12678 : bitop2 = BIT_IOR_EXPR;
12679 : }
12680 : else if (code == LT_EXPR)
12681 : {
12682 : bitop1 = BIT_NOT_EXPR;
12683 : bitop2 = BIT_AND_EXPR;
12684 : swap_p = true;
12685 : }
12686 : else if (code == LE_EXPR)
12687 : {
12688 : bitop1 = BIT_NOT_EXPR;
12689 : bitop2 = BIT_IOR_EXPR;
12690 : swap_p = true;
12691 : }
12692 : else
12693 : {
12694 : bitop1 = BIT_XOR_EXPR;
12695 : if (code == EQ_EXPR)
12696 : bitop2 = BIT_NOT_EXPR;
12697 : }
12698 : }
12699 :
12700 156549 : if (cost_vec)
12701 : {
12702 144106 : if (bitop1 == NOP_EXPR)
12703 : {
12704 143592 : if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12705 : return false;
12706 : }
12707 : else
12708 : {
12709 514 : machine_mode mode = TYPE_MODE (vectype);
12710 514 : optab optab;
12711 :
12712 514 : optab = optab_for_tree_code (bitop1, vectype, optab_default);
12713 514 : if (!optab || !can_implement_p (optab, mode))
12714 0 : return false;
12715 :
12716 514 : if (bitop2 != NOP_EXPR)
12717 : {
12718 91 : optab = optab_for_tree_code (bitop2, vectype, optab_default);
12719 91 : if (!optab || !can_implement_p (optab, mode))
12720 0 : return false;
12721 : }
12722 : }
12723 :
12724 : /* Put types on constant and invariant SLP children. */
12725 136041 : if (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12726 136041 : || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype))
12727 : {
12728 2 : if (dump_enabled_p ())
12729 2 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12730 : "incompatible vector types for invariants\n");
12731 2 : return false;
12732 : }
12733 :
12734 136039 : vect_model_simple_cost (vinfo, 1 + (bitop2 != NOP_EXPR),
12735 : slp_node, cost_vec);
12736 136039 : return true;
12737 : }
12738 :
12739 : /* Transform. */
12740 :
12741 : /* Handle def. */
12742 12443 : lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
12743 12443 : if (lhs)
12744 12443 : mask = vect_create_destination_var (lhs, mask_type);
12745 :
12746 12443 : vect_get_vec_defs (vinfo, slp_node, rhs1, &vec_oprnds0, rhs2, &vec_oprnds1);
12747 12443 : if (swap_p)
12748 58 : std::swap (vec_oprnds0, vec_oprnds1);
12749 :
12750 : /* Arguments are ready. Create the new vector stmt. */
12751 31385 : FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12752 : {
12753 18942 : gimple *new_stmt;
12754 18942 : vec_rhs2 = vec_oprnds1[i];
12755 :
12756 18942 : if (lhs)
12757 18942 : new_temp = make_ssa_name (mask);
12758 : else
12759 0 : new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
12760 18942 : if (bitop1 == NOP_EXPR)
12761 : {
12762 18800 : new_stmt = gimple_build_assign (new_temp, code,
12763 : vec_rhs1, vec_rhs2);
12764 18800 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12765 : }
12766 : else
12767 : {
12768 142 : if (bitop1 == BIT_NOT_EXPR)
12769 84 : new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12770 : else
12771 58 : new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12772 : vec_rhs2);
12773 142 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12774 142 : if (bitop2 != NOP_EXPR)
12775 : {
12776 84 : tree res = make_ssa_name (mask);
12777 84 : if (bitop2 == BIT_NOT_EXPR)
12778 0 : new_stmt = gimple_build_assign (res, bitop2, new_temp);
12779 : else
12780 84 : new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12781 : new_temp);
12782 84 : vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12783 : }
12784 : }
12785 18942 : slp_node->push_vec_def (new_stmt);
12786 : }
12787 :
12788 12443 : vec_oprnds0.release ();
12789 12443 : vec_oprnds1.release ();
12790 :
12791 12443 : return true;
12792 : }
12793 :
12794 : /* vectorizable_comparison.
12795 :
12796 : Check if STMT_INFO is comparison expression that can be vectorized.
12797 : If COST_VEC is passed, calculate costs but don't change anything,
12798 : otherwise, vectorize STMT_INFO: create a vectorized comparison, and insert
12799 : it at GSI.
12800 :
12801 : Return true if STMT_INFO is vectorizable in this way. */
12802 :
12803 : static bool
12804 650830 : vectorizable_comparison (vec_info *vinfo,
12805 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12806 : slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12807 : {
12808 650830 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12809 :
12810 650830 : if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12811 : return false;
12812 :
12813 650830 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12814 : return false;
12815 :
12816 854114 : gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12817 349168 : if (!stmt)
12818 : return false;
12819 :
12820 349168 : enum tree_code code = gimple_assign_rhs_code (stmt);
12821 349168 : tree vectype = SLP_TREE_VECTYPE (slp_node);
12822 349168 : if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12823 : slp_node, cost_vec))
12824 : return false;
12825 :
12826 145884 : if (cost_vec)
12827 133441 : SLP_TREE_TYPE (slp_node) = comparison_vec_info_type;
12828 :
12829 : return true;
12830 : }
12831 :
12832 : /* Check to see if the target supports any of the compare and branch optabs for
12833 : vectors with MODE as these would be required when expanding. */
12834 : static bool
12835 60059 : supports_vector_compare_and_branch (loop_vec_info loop_vinfo, machine_mode mode)
12836 : {
12837 60059 : bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
12838 60059 : bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
12839 :
12840 : /* The vectorizer only produces vec_cbranch_any_optab directly. So only
12841 : check for support for that or vec_cbranch_any_optab when masked.
12842 : We can't produce vcond_cbranch_any directly from the vectorizer as we
12843 : want to keep gimple_cond as the GIMPLE representation. But we'll fold
12844 : it in expand. For that reason we require a backend to support the
12845 : unconditional vector cbranch optab if they support the conditional one,
12846 : which is just an optimization on the unconditional one. */
12847 60059 : if (masked_loop_p
12848 60059 : && direct_optab_handler (cond_vec_cbranch_any_optab, mode)
12849 : != CODE_FOR_nothing)
12850 : return true;
12851 60059 : else if (len_loop_p
12852 60059 : && direct_optab_handler (cond_len_vec_cbranch_any_optab, mode)
12853 : != CODE_FOR_nothing)
12854 : return true;
12855 60059 : else if (!masked_loop_p && !len_loop_p
12856 120118 : && direct_optab_handler (vec_cbranch_any_optab, mode)
12857 : != CODE_FOR_nothing)
12858 : return true;
12859 :
12860 : /* The target can implement cbranch to distinguish between boolean vector
12861 : types and data types if they don't have a different mode for both. */
12862 60059 : return direct_optab_handler (cbranch_optab, mode) != CODE_FOR_nothing;
12863 : }
12864 :
12865 : /* Determine the type to use for early break vectorization's scalar IV. If
12866 : no type is possible return false. */
12867 :
12868 : static bool
12869 2598 : vect_compute_type_for_early_break_scalar_iv (loop_vec_info loop_vinfo)
12870 : {
12871 : /* Check if we have a usable scalar IV type for vectorization. */
12872 2598 : tree iters_vf_type = sizetype;
12873 2598 : if (!LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
12874 : {
12875 : /* Find the type with the minimum precision we can use
12876 : for the scalar IV. */
12877 2375 : tree cand_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
12878 :
12879 : /* Work out how many bits we need to represent the limit. */
12880 2375 : unsigned int min_ni_width
12881 2375 : = vect_min_prec_for_max_niters (loop_vinfo, 1);
12882 :
12883 : /* Check if we're using PFA, if so we need a signed IV and an
12884 : extra bit for the sign. */
12885 2375 : if (TYPE_UNSIGNED (cand_type)
12886 2375 : && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
12887 3931 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
12888 168 : min_ni_width += 1;
12889 :
12890 2375 : if (TYPE_PRECISION (cand_type) >= min_ni_width)
12891 2300 : iters_vf_type = unsigned_type_for (cand_type);
12892 : else
12893 : {
12894 75 : opt_scalar_int_mode cmp_mode_iter;
12895 75 : tree iv_type = NULL_TREE;
12896 367 : FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
12897 : {
12898 367 : auto cmp_mode = cmp_mode_iter.require ();
12899 367 : unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode);
12900 367 : if (cmp_bits >= min_ni_width
12901 367 : && targetm.scalar_mode_supported_p (cmp_mode))
12902 : {
12903 75 : iv_type = build_nonstandard_integer_type (cmp_bits, true);
12904 75 : if (iv_type)
12905 : break;
12906 : }
12907 : }
12908 :
12909 75 : if (!iv_type)
12910 : {
12911 0 : if (dump_enabled_p ())
12912 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12913 : "can't vectorize early exit because the "
12914 : "target doesn't support a scalar type wide "
12915 : "wide enough to hold niters.\n");
12916 0 : return false;
12917 : }
12918 75 : iters_vf_type = iv_type;
12919 : }
12920 : }
12921 :
12922 2598 : LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo) = iters_vf_type;
12923 2598 : return true;
12924 : }
12925 :
12926 : /* Check to see if the current early break given in STMT_INFO is valid for
12927 : vectorization. */
12928 :
12929 : bool
12930 241832 : vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
12931 : gimple_stmt_iterator *gsi,
12932 : slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12933 : {
12934 241832 : if (!is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
12935 : return false;
12936 :
12937 61623 : if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
12938 : return false;
12939 :
12940 61623 : if (!STMT_VINFO_RELEVANT_P (stmt_info))
12941 : return false;
12942 :
12943 61623 : DUMP_VECT_SCOPE ("vectorizable_early_exit");
12944 :
12945 61623 : auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
12946 :
12947 : /* For SLP we don't want to use the type of the operands of the SLP node, when
12948 : vectorizing using SLP slp_node will be the children of the gcond and we
12949 : want to use the type of the direct children which since the gcond is root
12950 : will be the current node, rather than a child node as vect_is_simple_use
12951 : assumes. */
12952 61623 : tree vectype = SLP_TREE_VECTYPE (slp_node);
12953 61623 : if (!vectype)
12954 : return false;
12955 :
12956 61623 : machine_mode mode = TYPE_MODE (vectype);
12957 61623 : int vec_num = vect_get_num_copies (loop_vinfo, slp_node);
12958 :
12959 61623 : vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
12960 61623 : vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
12961 61623 : bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
12962 61623 : bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
12963 :
12964 : /* Now build the new conditional. Pattern gimple_conds get dropped during
12965 : codegen so we must replace the original insn. */
12966 61623 : gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
12967 61623 : gcond *cond_stmt = as_a <gcond *>(orig_stmt);
12968 :
12969 61623 : tree vectype_out = vectype;
12970 61623 : auto bb = gimple_bb (cond_stmt);
12971 61623 : edge exit_true_edge = EDGE_SUCC (bb, 0);
12972 61623 : if (exit_true_edge->flags & EDGE_FALSE_VALUE)
12973 660 : exit_true_edge = EDGE_SUCC (bb, 1);
12974 61623 : gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
12975 :
12976 : /* When vectorizing we assume that if the branch edge is taken that we're
12977 : exiting the loop. This is not however always the case as the compiler will
12978 : rewrite conditions to always be a comparison against 0. To do this it
12979 : sometimes flips the edges. This is fine for scalar, but for vector we
12980 : then have to negate the result of the test, as we're still assuming that if
12981 : you take the branch edge that we found the exit condition. i.e. we need to
12982 : know whether we are generating a `forall` or an `exist` condition. */
12983 123246 : bool flipped = flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
12984 61623 : exit_true_edge->dest);
12985 :
12986 : /* See if we support ADDHN and use that for the reduction. */
12987 61623 : internal_fn ifn = IFN_VEC_TRUNC_ADD_HIGH;
12988 61623 : bool addhn_supported_p
12989 61623 : = direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_BOTH);
12990 61623 : tree narrow_type = NULL_TREE;
12991 61623 : if (addhn_supported_p)
12992 : {
12993 : /* Calculate the narrowing type for the result. */
12994 0 : auto halfprec = TYPE_PRECISION (TREE_TYPE (vectype)) / 2;
12995 0 : auto unsignedp = TYPE_UNSIGNED (TREE_TYPE (vectype));
12996 0 : tree itype = build_nonstandard_integer_type (halfprec, unsignedp);
12997 0 : tree tmp_type = build_vector_type (itype, TYPE_VECTOR_SUBPARTS (vectype));
12998 0 : narrow_type = truth_type_for (tmp_type);
12999 :
13000 0 : if (!supports_vector_compare_and_branch (loop_vinfo,
13001 0 : TYPE_MODE (narrow_type)))
13002 : {
13003 0 : if (dump_enabled_p ())
13004 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13005 : "can't use ADDHN reduction because cbranch for "
13006 : "the narrowed type is not supported by the "
13007 : "target.\n");
13008 : addhn_supported_p = false;
13009 : }
13010 : }
13011 :
13012 : /* Analyze only. */
13013 61623 : if (cost_vec)
13014 : {
13015 60059 : if (!addhn_supported_p
13016 60059 : && !supports_vector_compare_and_branch (loop_vinfo, mode))
13017 : {
13018 57461 : if (dump_enabled_p ())
13019 589 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13020 : "can't vectorize early exit because the "
13021 : "target doesn't support flag setting vector "
13022 : "comparisons.\n");
13023 57461 : return false;
13024 : }
13025 :
13026 2598 : if (!vectorizable_comparison_1 (loop_vinfo, vectype, stmt_info, code, gsi,
13027 : slp_node, cost_vec))
13028 : return false;
13029 :
13030 2598 : if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
13031 : {
13032 1556 : if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
13033 : OPTIMIZE_FOR_SPEED))
13034 0 : vect_record_loop_len (loop_vinfo, lens, vec_num, vectype, 1);
13035 : else
13036 1556 : vect_record_loop_mask (loop_vinfo, masks, vec_num, vectype, NULL);
13037 : }
13038 :
13039 2598 : if (!vect_compute_type_for_early_break_scalar_iv (loop_vinfo))
13040 : return false;
13041 :
13042 : return true;
13043 : }
13044 :
13045 : /* Tranform. */
13046 :
13047 1564 : tree new_temp = NULL_TREE;
13048 1564 : gimple *new_stmt = NULL;
13049 :
13050 1564 : if (dump_enabled_p ())
13051 387 : dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
13052 :
13053 : /* For SLP we don't do codegen of the body starting from the gcond, the gconds are
13054 : roots and so by the time we get to them we have already codegened the SLP tree
13055 : and so we shouldn't try to do so again. The arguments have already been
13056 : vectorized. It's not very clean to do this here, But the masking code below is
13057 : complex and this keeps it all in one place to ease fixes and backports. Once we
13058 : drop the non-SLP loop vect or split vectorizable_* this can be simplified. */
13059 :
13060 1564 : gimple *stmt = STMT_VINFO_STMT (stmt_info);
13061 1564 : basic_block cond_bb = gimple_bb (stmt);
13062 1564 : gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
13063 :
13064 1564 : auto_vec<tree> stmts;
13065 1564 : stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
13066 :
13067 : /* If we're comparing against a previous forall we need to negate the resullts
13068 : before we do the final comparison or reduction. */
13069 1564 : if (flipped)
13070 : {
13071 : /* Rewrite the if(all(mask)) into if (!all(mask)) which is the same as
13072 : if (any(~mask)) by negating the masks and flipping the branches.
13073 :
13074 : 1. For unmasked loops we simply reduce the ~mask.
13075 : 2. For masked loops we reduce (~mask & loop_mask) which is the same as
13076 : doing (mask & loop_mask) ^ loop_mask. */
13077 294 : for (unsigned i = 0; i < stmts.length (); i++)
13078 : {
13079 173 : tree inv_lhs = make_temp_ssa_name (vectype, NULL, "vexit_inv");
13080 173 : auto inv_stmt = gimple_build_assign (inv_lhs, BIT_NOT_EXPR, stmts[i]);
13081 173 : vect_finish_stmt_generation (loop_vinfo, stmt_info, inv_stmt,
13082 : &cond_gsi);
13083 173 : stmts[i] = inv_lhs;
13084 : }
13085 :
13086 121 : EDGE_SUCC (bb, 0)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
13087 121 : EDGE_SUCC (bb, 1)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
13088 : }
13089 :
13090 : /* Determine if we need to reduce the final value. */
13091 1564 : if (stmts.length () > 1)
13092 : {
13093 : /* We build the reductions in a way to maintain as much parallelism as
13094 : possible. */
13095 141 : auto_vec<tree> workset (stmts.length ());
13096 :
13097 : /* Mask the statements as we queue them up. Normally we loop over
13098 : vec_num, but since we inspect the exact results of vectorization
13099 : we don't need to and instead can just use the stmts themselves. */
13100 141 : if (masked_loop_p)
13101 0 : for (unsigned i = 0; i < stmts.length (); i++)
13102 : {
13103 0 : tree stmt_mask
13104 0 : = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num,
13105 : vectype, i);
13106 0 : stmt_mask
13107 0 : = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask,
13108 0 : stmts[i], &cond_gsi);
13109 0 : workset.quick_push (stmt_mask);
13110 : }
13111 141 : else if (len_loop_p)
13112 0 : for (unsigned i = 0; i < stmts.length (); i++)
13113 : {
13114 0 : tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
13115 : lens, vec_num,
13116 0 : vectype, stmts[i], i, 1);
13117 :
13118 0 : workset.quick_push (len_mask);
13119 : }
13120 : else
13121 141 : workset.splice (stmts);
13122 :
13123 430 : while (workset.length () > 1)
13124 : {
13125 289 : tree arg0 = workset.pop ();
13126 289 : tree arg1 = workset.pop ();
13127 289 : if (addhn_supported_p && workset.length () == 0)
13128 : {
13129 0 : new_stmt = gimple_build_call_internal (ifn, 2, arg0, arg1);
13130 0 : vectype_out = narrow_type;
13131 0 : new_temp = make_temp_ssa_name (vectype_out, NULL, "vexit_reduc");
13132 0 : gimple_call_set_lhs (as_a <gcall *> (new_stmt), new_temp);
13133 0 : gimple_call_set_nothrow (as_a <gcall *> (new_stmt), true);
13134 : }
13135 : else
13136 : {
13137 289 : new_temp = make_temp_ssa_name (vectype_out, NULL, "vexit_reduc");
13138 289 : new_stmt
13139 289 : = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, arg1);
13140 : }
13141 289 : vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
13142 : &cond_gsi);
13143 289 : workset.quick_insert (0, new_temp);
13144 : }
13145 141 : }
13146 : else
13147 : {
13148 1423 : new_temp = stmts[0];
13149 1423 : if (masked_loop_p)
13150 : {
13151 0 : tree mask
13152 0 : = vect_get_loop_mask (loop_vinfo, gsi, masks, 1, vectype, 0);
13153 0 : new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
13154 : new_temp, &cond_gsi);
13155 : }
13156 1423 : else if (len_loop_p)
13157 0 : new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
13158 : 1, vectype, new_temp, 0, 1);
13159 : }
13160 :
13161 1564 : gcc_assert (new_temp);
13162 :
13163 1564 : tree cst = build_zero_cst (vectype_out);
13164 1564 : gimple_cond_set_condition (cond_stmt, NE_EXPR, new_temp, cst);
13165 1564 : update_stmt (orig_stmt);
13166 :
13167 : /* ??? */
13168 1564 : SLP_TREE_VEC_DEFS (slp_node).truncate (0);
13169 :
13170 1564 : return true;
13171 1564 : }
13172 :
13173 : /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
13174 : can handle all live statements in the node. Otherwise return true
13175 : if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13176 : VEC_STMT_P is as for vectorizable_live_operation. */
13177 :
13178 : static bool
13179 1285877 : can_vectorize_live_stmts (vec_info *vinfo,
13180 : slp_tree slp_node, slp_instance slp_node_instance,
13181 : bool vec_stmt_p,
13182 : stmt_vector_for_cost *cost_vec)
13183 : {
13184 1285877 : stmt_vec_info slp_stmt_info;
13185 1285877 : unsigned int i;
13186 2712132 : FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
13187 : {
13188 1426255 : if (slp_stmt_info
13189 1410164 : && STMT_VINFO_LIVE_P (slp_stmt_info)
13190 1560551 : && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
13191 : slp_node_instance, i,
13192 : vec_stmt_p, cost_vec))
13193 : return false;
13194 : }
13195 :
13196 : return true;
13197 : }
13198 :
13199 : /* Make sure the statement is vectorizable. */
13200 :
13201 : opt_result
13202 2680167 : vect_analyze_stmt (vec_info *vinfo,
13203 : slp_tree node, slp_instance node_instance,
13204 : stmt_vector_for_cost *cost_vec)
13205 : {
13206 2680167 : stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
13207 2680167 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
13208 2680167 : enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
13209 2680167 : bool ok;
13210 :
13211 2680167 : if (dump_enabled_p ())
13212 100200 : dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
13213 : stmt_info->stmt);
13214 :
13215 5077423 : if (gimple_has_volatile_ops (stmt_info->stmt))
13216 : {
13217 : /* ??? This shouldn't really happen, volatile stmts should
13218 : not end up in the SLP graph. */
13219 0 : return opt_result::failure_at (stmt_info->stmt,
13220 : "not vectorized:"
13221 : " stmt has volatile operands: %G\n",
13222 : stmt_info->stmt);
13223 : }
13224 :
13225 : /* Skip stmts that do not need to be vectorized. */
13226 2680167 : if (!STMT_VINFO_RELEVANT_P (stmt_info)
13227 0 : && !STMT_VINFO_LIVE_P (stmt_info))
13228 : {
13229 0 : if (dump_enabled_p ())
13230 0 : dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
13231 :
13232 : /* ??? This shouldn't really happen, irrelevant stmts should
13233 : not end up in the SLP graph. */
13234 0 : return opt_result::failure_at (stmt_info->stmt,
13235 : "not vectorized:"
13236 : " irrelevant stmt as SLP node %p "
13237 : "representative.\n",
13238 : (void *)node);
13239 : }
13240 :
13241 2680167 : switch (STMT_VINFO_DEF_TYPE (stmt_info))
13242 : {
13243 : case vect_internal_def:
13244 : case vect_condition_def:
13245 : break;
13246 :
13247 83876 : case vect_reduction_def:
13248 83876 : case vect_nested_cycle:
13249 83876 : gcc_assert (!bb_vinfo
13250 : && (relevance == vect_used_in_outer
13251 : || relevance == vect_used_in_outer_by_reduction
13252 : || relevance == vect_used_by_reduction
13253 : || relevance == vect_unused_in_scope
13254 : || relevance == vect_used_only_live));
13255 : break;
13256 :
13257 312 : case vect_double_reduction_def:
13258 312 : gcc_assert (!bb_vinfo && node);
13259 : break;
13260 :
13261 148798 : case vect_induction_def:
13262 148798 : case vect_first_order_recurrence:
13263 148798 : gcc_assert (!bb_vinfo);
13264 : break;
13265 :
13266 0 : case vect_constant_def:
13267 0 : case vect_external_def:
13268 0 : case vect_unknown_def_type:
13269 0 : default:
13270 0 : gcc_unreachable ();
13271 : }
13272 :
13273 2680167 : tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13274 2680167 : STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
13275 :
13276 2680167 : if (STMT_VINFO_RELEVANT_P (stmt_info))
13277 : {
13278 2680167 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
13279 2680167 : gcc_assert (SLP_TREE_VECTYPE (node)
13280 : || gimple_code (stmt_info->stmt) == GIMPLE_COND
13281 : || (call && gimple_call_lhs (call) == NULL_TREE));
13282 : }
13283 :
13284 2680167 : ok = true;
13285 2680167 : if (bb_vinfo
13286 1466086 : || (STMT_VINFO_RELEVANT_P (stmt_info)
13287 0 : || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
13288 : /* Prefer vectorizable_call over vectorizable_simd_clone_call so
13289 : -mveclibabi= takes preference over library functions with
13290 : the simd attribute. */
13291 2680167 : ok = (vectorizable_call (vinfo, stmt_info, NULL, node, cost_vec)
13292 2673150 : || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, node,
13293 : cost_vec)
13294 2672683 : || vectorizable_conversion (vinfo, stmt_info, NULL, node, cost_vec)
13295 2589405 : || vectorizable_operation (vinfo, stmt_info, NULL, node, cost_vec)
13296 2032370 : || vectorizable_assignment (vinfo, stmt_info, NULL, node, cost_vec)
13297 1963603 : || vectorizable_load (vinfo, stmt_info, NULL, node, cost_vec)
13298 1527525 : || vectorizable_store (vinfo, stmt_info, NULL, node, cost_vec)
13299 716724 : || vectorizable_shift (vinfo, stmt_info, NULL, node, cost_vec)
13300 664715 : || vectorizable_condition (vinfo, stmt_info, NULL, node, cost_vec)
13301 638387 : || vectorizable_comparison (vinfo, stmt_info, NULL, node, cost_vec)
13302 504946 : || (bb_vinfo
13303 124591 : && vectorizable_phi (bb_vinfo, stmt_info, node, cost_vec))
13304 3128102 : || (is_a <loop_vec_info> (vinfo)
13305 380355 : && (vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo),
13306 : stmt_info, node, cost_vec)
13307 379641 : || vectorizable_reduction (as_a <loop_vec_info> (vinfo),
13308 : stmt_info,
13309 : node, node_instance, cost_vec)
13310 298250 : || vectorizable_induction (as_a <loop_vec_info> (vinfo),
13311 : stmt_info, node, cost_vec)
13312 181291 : || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13313 : stmt_info, node)
13314 180470 : || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13315 : stmt_info, node, cost_vec)
13316 180209 : || vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
13317 : stmt_info, NULL, node,
13318 : cost_vec))));
13319 :
13320 2680167 : STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13321 :
13322 2432378 : if (!ok)
13323 247789 : return opt_result::failure_at (stmt_info->stmt,
13324 : "not vectorized:"
13325 : " relevant stmt not supported: %G",
13326 : stmt_info->stmt);
13327 :
13328 : /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
13329 : need extra handling, except for vectorizable reductions. */
13330 2432378 : if (!bb_vinfo
13331 1285877 : && (SLP_TREE_TYPE (node) != lc_phi_info_type
13332 821 : || SLP_TREE_DEF_TYPE (node) == vect_internal_def)
13333 1285877 : && (!node->ldst_lanes || SLP_TREE_PERMUTE_P (node))
13334 3718255 : && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
13335 : node, node_instance,
13336 : false, cost_vec))
13337 0 : return opt_result::failure_at (stmt_info->stmt,
13338 : "not vectorized:"
13339 : " live stmt not supported: %G",
13340 : stmt_info->stmt);
13341 :
13342 2432378 : return opt_result::success ();
13343 : }
13344 :
13345 :
13346 : /* Function vect_transform_stmt.
13347 :
13348 : Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13349 :
13350 : bool
13351 968372 : vect_transform_stmt (vec_info *vinfo,
13352 : stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
13353 : slp_tree slp_node, slp_instance slp_node_instance)
13354 : {
13355 968372 : bool is_store = false;
13356 968372 : bool done;
13357 :
13358 968372 : gcc_assert (slp_node);
13359 :
13360 968372 : if (stmt_info)
13361 967531 : STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
13362 :
13363 968372 : switch (SLP_TREE_TYPE (slp_node))
13364 : {
13365 22836 : case type_demotion_vec_info_type:
13366 22836 : case type_promotion_vec_info_type:
13367 22836 : case type_conversion_vec_info_type:
13368 22836 : done = vectorizable_conversion (vinfo, stmt_info, gsi, slp_node, NULL);
13369 22836 : gcc_assert (done);
13370 : break;
13371 :
13372 16195 : case induc_vec_info_type:
13373 16195 : done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
13374 : stmt_info, slp_node, NULL);
13375 16195 : gcc_assert (done);
13376 : break;
13377 :
13378 8397 : case shift_vec_info_type:
13379 8397 : done = vectorizable_shift (vinfo, stmt_info, gsi, slp_node, NULL);
13380 8397 : gcc_assert (done);
13381 : break;
13382 :
13383 113967 : case op_vec_info_type:
13384 113967 : done = vectorizable_operation (vinfo, stmt_info, gsi, slp_node, NULL);
13385 113967 : gcc_assert (done);
13386 : break;
13387 :
13388 15774 : case assignment_vec_info_type:
13389 15774 : done = vectorizable_assignment (vinfo, stmt_info, gsi, slp_node, NULL);
13390 15774 : gcc_assert (done);
13391 : break;
13392 :
13393 165285 : case load_vec_info_type:
13394 165285 : done = vectorizable_load (vinfo, stmt_info, gsi, slp_node, NULL);
13395 165285 : gcc_assert (done);
13396 : break;
13397 :
13398 543182 : case store_vec_info_type:
13399 543182 : done = vectorizable_store (vinfo, stmt_info, gsi, slp_node, NULL);
13400 543182 : gcc_assert (done);
13401 : is_store = true;
13402 : break;
13403 :
13404 8500 : case condition_vec_info_type:
13405 8500 : done = vectorizable_condition (vinfo, stmt_info, gsi, slp_node, NULL);
13406 8500 : gcc_assert (done);
13407 : break;
13408 :
13409 12443 : case comparison_vec_info_type:
13410 12443 : done = vectorizable_comparison (vinfo, stmt_info, gsi, slp_node, NULL);
13411 12443 : gcc_assert (done);
13412 : break;
13413 :
13414 4215 : case call_vec_info_type:
13415 4215 : done = vectorizable_call (vinfo, stmt_info, gsi, slp_node, NULL);
13416 4215 : break;
13417 :
13418 362 : case call_simd_clone_vec_info_type:
13419 362 : done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi,
13420 : slp_node, NULL);
13421 362 : break;
13422 :
13423 2581 : case reduc_vec_info_type:
13424 2581 : done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13425 : gsi, slp_node);
13426 2581 : gcc_assert (done);
13427 : break;
13428 :
13429 23476 : case cycle_phi_info_type:
13430 23476 : done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13431 : slp_node, slp_node_instance);
13432 23476 : gcc_assert (done);
13433 : break;
13434 :
13435 530 : case lc_phi_info_type:
13436 530 : done = vect_transform_lc_phi (as_a <loop_vec_info> (vinfo),
13437 : stmt_info, slp_node);
13438 530 : gcc_assert (done);
13439 : break;
13440 :
13441 43 : case recurr_info_type:
13442 43 : done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13443 : stmt_info, slp_node, NULL);
13444 43 : gcc_assert (done);
13445 : break;
13446 :
13447 14143 : case phi_info_type:
13448 14143 : done = vectorizable_phi (as_a <bb_vec_info> (vinfo),
13449 : stmt_info, slp_node, NULL);
13450 14143 : gcc_assert (done);
13451 : break;
13452 :
13453 0 : case loop_exit_ctrl_vec_info_type:
13454 0 : done = vectorizable_early_exit (as_a <loop_vec_info> (vinfo),
13455 : stmt_info, gsi, slp_node, NULL);
13456 0 : gcc_assert (done);
13457 : break;
13458 :
13459 16443 : case permute_info_type:
13460 16443 : done = vectorizable_slp_permutation (vinfo, gsi, slp_node, NULL);
13461 16443 : gcc_assert (done);
13462 : break;
13463 :
13464 0 : default:
13465 0 : if (!STMT_VINFO_LIVE_P (stmt_info))
13466 : {
13467 0 : if (dump_enabled_p ())
13468 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13469 : "stmt not supported.\n");
13470 0 : gcc_unreachable ();
13471 : }
13472 968372 : done = true;
13473 : }
13474 :
13475 968372 : if (SLP_TREE_TYPE (slp_node) != store_vec_info_type
13476 425190 : && (!slp_node->ldst_lanes || SLP_TREE_PERMUTE_P (slp_node)))
13477 : {
13478 : /* Handle stmts whose DEF is used outside the loop-nest that is
13479 : being vectorized. */
13480 573145 : for (unsigned lane : SLP_TREE_LIVE_LANES (slp_node))
13481 : {
13482 61029 : stmt_vec_info slp_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[lane];
13483 61029 : done = vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
13484 : slp_node_instance, lane,
13485 : true, NULL);
13486 61029 : gcc_assert (done);
13487 : }
13488 : }
13489 :
13490 968372 : return is_store;
13491 : }
13492 :
13493 :
13494 : /* Remove a group of stores (for SLP or interleaving), free their
13495 : stmt_vec_info. */
13496 :
13497 : void
13498 0 : vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13499 : {
13500 0 : stmt_vec_info next_stmt_info = first_stmt_info;
13501 :
13502 0 : while (next_stmt_info)
13503 : {
13504 0 : stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13505 0 : next_stmt_info = vect_orig_stmt (next_stmt_info);
13506 : /* Free the attached stmt_vec_info and remove the stmt. */
13507 0 : vinfo->remove_stmt (next_stmt_info);
13508 0 : next_stmt_info = tmp;
13509 : }
13510 0 : }
13511 :
13512 : /* If NUNITS is nonzero, return a vector type that contains NUNITS
13513 : elements of type SCALAR_TYPE, or null if the target doesn't support
13514 : such a type.
13515 :
13516 : If NUNITS is zero, return a vector type that contains elements of
13517 : type SCALAR_TYPE, choosing whichever vector size the target prefers.
13518 :
13519 : If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13520 : for this vectorization region and want to "autodetect" the best choice.
13521 : Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13522 : and we want the new type to be interoperable with it. PREVAILING_MODE
13523 : in this case can be a scalar integer mode or a vector mode; when it
13524 : is a vector mode, the function acts like a tree-level version of
13525 : related_vector_mode. */
13526 :
13527 : tree
13528 30707668 : get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13529 : tree scalar_type, poly_uint64 nunits)
13530 : {
13531 30707668 : tree orig_scalar_type = scalar_type;
13532 30707668 : scalar_mode inner_mode;
13533 30707668 : machine_mode simd_mode;
13534 30707668 : tree vectype;
13535 :
13536 30707668 : if ((!INTEGRAL_TYPE_P (scalar_type)
13537 10390386 : && !POINTER_TYPE_P (scalar_type)
13538 1781951 : && !SCALAR_FLOAT_TYPE_P (scalar_type))
13539 40596518 : || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13540 1280497 : && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13541 504751 : return NULL_TREE;
13542 :
13543 30202917 : unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13544 :
13545 : /* Interoperability between modes requires one to be a constant multiple
13546 : of the other, so that the number of vectors required for each operation
13547 : is a compile-time constant. */
13548 30202917 : if (prevailing_mode != VOIDmode
13549 29077559 : && !constant_multiple_p (nunits * nbytes,
13550 29077559 : GET_MODE_SIZE (prevailing_mode))
13551 31680725 : && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13552 1477808 : nunits * nbytes))
13553 : return NULL_TREE;
13554 :
13555 : /* For vector types of elements whose mode precision doesn't
13556 : match their types precision we use a element type of mode
13557 : precision. The vectorization routines will have to make sure
13558 : they support the proper result truncation/extension.
13559 : We also make sure to build vector types with INTEGER_TYPE
13560 : component type only. */
13561 30202917 : if (INTEGRAL_TYPE_P (scalar_type)
13562 50520117 : && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13563 18835847 : || TREE_CODE (scalar_type) != INTEGER_TYPE))
13564 1690236 : scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13565 1690236 : TYPE_UNSIGNED (scalar_type));
13566 :
13567 : /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13568 : When the component mode passes the above test simply use a type
13569 : corresponding to that mode. The theory is that any use that
13570 : would cause problems with this will disable vectorization anyway. */
13571 28512681 : else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13572 : && !INTEGRAL_TYPE_P (scalar_type))
13573 8608435 : scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13574 :
13575 : /* We can't build a vector type of elements with alignment bigger than
13576 : their size. */
13577 19904246 : else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13578 376820 : scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13579 188410 : TYPE_UNSIGNED (scalar_type));
13580 :
13581 : /* If we felt back to using the mode fail if there was
13582 : no scalar type for it. */
13583 30202917 : if (scalar_type == NULL_TREE)
13584 : return NULL_TREE;
13585 :
13586 : /* If no prevailing mode was supplied, use the mode the target prefers.
13587 : Otherwise lookup a vector mode based on the prevailing mode. */
13588 30202917 : if (prevailing_mode == VOIDmode)
13589 : {
13590 1125358 : gcc_assert (known_eq (nunits, 0U));
13591 1125358 : simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13592 1125358 : if (SCALAR_INT_MODE_P (simd_mode))
13593 : {
13594 : /* Traditional behavior is not to take the integer mode
13595 : literally, but simply to use it as a way of determining
13596 : the vector size. It is up to mode_for_vector to decide
13597 : what the TYPE_MODE should be.
13598 :
13599 : Note that nunits == 1 is allowed in order to support single
13600 : element vector types. */
13601 57942 : if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13602 545 : || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13603 28426 : return NULL_TREE;
13604 : }
13605 : }
13606 29077559 : else if (SCALAR_INT_MODE_P (prevailing_mode)
13607 29077559 : || !related_vector_mode (prevailing_mode,
13608 27074757 : inner_mode, nunits).exists (&simd_mode))
13609 : {
13610 : /* Fall back to using mode_for_vector, mostly in the hope of being
13611 : able to use an integer mode. */
13612 2002802 : if (known_eq (nunits, 0U)
13613 4681118 : && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13614 : return NULL_TREE;
13615 :
13616 149613 : if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13617 139624 : return NULL_TREE;
13618 : }
13619 :
13620 28181678 : vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13621 :
13622 : /* In cases where the mode was chosen by mode_for_vector, check that
13623 : the target actually supports the chosen mode, or that it at least
13624 : allows the vector mode to be replaced by a like-sized integer. */
13625 56363356 : if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13626 28191924 : && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13627 : return NULL_TREE;
13628 :
13629 : /* Re-attach the address-space qualifier if we canonicalized the scalar
13630 : type. */
13631 28173635 : if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13632 5 : return build_qualified_type
13633 5 : (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13634 :
13635 : return vectype;
13636 : }
13637 :
13638 : /* Function get_vectype_for_scalar_type.
13639 :
13640 : Returns the vector type corresponding to SCALAR_TYPE as supported
13641 : by the target. If GROUP_SIZE is nonzero and we're performing BB
13642 : vectorization, make sure that the number of elements in the vector
13643 : is no bigger than GROUP_SIZE. */
13644 :
13645 : tree
13646 26259781 : get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13647 : unsigned int group_size)
13648 : {
13649 : /* For BB vectorization, we should always have a group size once we've
13650 : constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13651 : are tentative requests during things like early data reference
13652 : analysis and pattern recognition. */
13653 26259781 : if (is_a <bb_vec_info> (vinfo))
13654 23344106 : gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13655 : else
13656 : group_size = 0;
13657 :
13658 26259781 : tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13659 : scalar_type);
13660 26259781 : if (vectype && vinfo->vector_mode == VOIDmode)
13661 1050255 : vinfo->vector_mode = TYPE_MODE (vectype);
13662 :
13663 : /* Register the natural choice of vector type, before the group size
13664 : has been applied. */
13665 0 : if (vectype)
13666 23888527 : vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13667 :
13668 : /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13669 : try again with an explicit number of elements. */
13670 23888527 : if (vectype
13671 23888527 : && group_size
13672 26259781 : && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13673 : {
13674 : /* Start with the biggest number of units that fits within
13675 : GROUP_SIZE and halve it until we find a valid vector type.
13676 : Usually either the first attempt will succeed or all will
13677 : fail (in the latter case because GROUP_SIZE is too small
13678 : for the target), but it's possible that a target could have
13679 : a hole between supported vector types.
13680 :
13681 : If GROUP_SIZE is not a power of 2, this has the effect of
13682 : trying the largest power of 2 that fits within the group,
13683 : even though the group is not a multiple of that vector size.
13684 : The BB vectorizer will then try to carve up the group into
13685 : smaller pieces. */
13686 3020326 : unsigned int nunits = 1 << floor_log2 (group_size);
13687 3020326 : do
13688 : {
13689 3020326 : vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13690 3020326 : scalar_type, nunits);
13691 3020326 : nunits /= 2;
13692 : }
13693 3020326 : while (nunits > 1 && !vectype);
13694 : }
13695 :
13696 26259781 : return vectype;
13697 : }
13698 :
13699 : /* Return the vector type corresponding to SCALAR_TYPE as supported
13700 : by the target. NODE, if nonnull, is the SLP tree node that will
13701 : use the returned vector type. */
13702 :
13703 : tree
13704 166664 : get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13705 : {
13706 166664 : unsigned int group_size = 0;
13707 166664 : if (node)
13708 166664 : group_size = SLP_TREE_LANES (node);
13709 166664 : return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13710 : }
13711 :
13712 : /* Function get_mask_type_for_scalar_type.
13713 :
13714 : Returns the mask type corresponding to a result of comparison
13715 : of vectors of specified SCALAR_TYPE as supported by target.
13716 : If GROUP_SIZE is nonzero and we're performing BB vectorization,
13717 : make sure that the number of elements in the vector is no bigger
13718 : than GROUP_SIZE. */
13719 :
13720 : tree
13721 1093022 : get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13722 : unsigned int group_size)
13723 : {
13724 1093022 : tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13725 :
13726 1093022 : if (!vectype)
13727 : return NULL;
13728 :
13729 1071083 : return truth_type_for (vectype);
13730 : }
13731 :
13732 : /* Function get_mask_type_for_scalar_type.
13733 :
13734 : Returns the mask type corresponding to a result of comparison
13735 : of vectors of specified SCALAR_TYPE as supported by target.
13736 : NODE, if nonnull, is the SLP tree node that will use the returned
13737 : vector type. */
13738 :
13739 : tree
13740 19 : get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13741 : slp_tree node)
13742 : {
13743 19 : tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13744 :
13745 19 : if (!vectype)
13746 : return NULL;
13747 :
13748 19 : return truth_type_for (vectype);
13749 : }
13750 :
13751 : /* Function get_same_sized_vectype
13752 :
13753 : Returns a vector type corresponding to SCALAR_TYPE of size
13754 : VECTOR_TYPE if supported by the target. */
13755 :
13756 : tree
13757 156476 : get_same_sized_vectype (tree scalar_type, tree vector_type)
13758 : {
13759 156476 : if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13760 0 : return truth_type_for (vector_type);
13761 :
13762 156476 : poly_uint64 nunits;
13763 312952 : if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13764 312952 : GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13765 : return NULL_TREE;
13766 :
13767 156476 : return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13768 156476 : scalar_type, nunits);
13769 : }
13770 :
13771 : /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13772 : would not change the chosen vector modes. */
13773 :
13774 : bool
13775 1574499 : vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13776 : {
13777 1574499 : for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13778 3583815 : i != vinfo->used_vector_modes.end (); ++i)
13779 1838503 : if (!VECTOR_MODE_P (*i)
13780 5515509 : || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13781 833845 : return false;
13782 740654 : return true;
13783 : }
13784 :
13785 : /* Return true if replacing VECTOR_MODE with ALT_VECTOR_MODE would not
13786 : change the chosen vector modes for analysis of a loop. */
13787 :
13788 : bool
13789 381533 : vect_chooses_same_modes_p (machine_mode vector_mode,
13790 : machine_mode alt_vector_mode)
13791 : {
13792 62560 : return (VECTOR_MODE_P (vector_mode)
13793 381533 : && VECTOR_MODE_P (alt_vector_mode)
13794 763066 : && (related_vector_mode (vector_mode,
13795 : GET_MODE_INNER (alt_vector_mode))
13796 381533 : == alt_vector_mode)
13797 407629 : && (related_vector_mode (alt_vector_mode,
13798 : GET_MODE_INNER (vector_mode))
13799 13048 : == vector_mode));
13800 : }
13801 :
13802 : /* Function vect_is_simple_use.
13803 :
13804 : Input:
13805 : VINFO - the vect info of the loop or basic block that is being vectorized.
13806 : OPERAND - operand in the loop or bb.
13807 : Output:
13808 : DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13809 : case OPERAND is an SSA_NAME that is defined in the vectorizable region
13810 : DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13811 : the definition could be anywhere in the function
13812 : DT - the type of definition
13813 :
13814 : Returns whether a stmt with OPERAND can be vectorized.
13815 : For loops, supportable operands are constants, loop invariants, and operands
13816 : that are defined by the current iteration of the loop. Unsupportable
13817 : operands are those that are defined by a previous iteration of the loop (as
13818 : is the case in reduction/induction computations).
13819 : For basic blocks, supportable operands are constants and bb invariants.
13820 : For now, operands defined outside the basic block are not supported. */
13821 :
13822 : bool
13823 41583968 : vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13824 : stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13825 : {
13826 41583968 : if (def_stmt_info_out)
13827 39362179 : *def_stmt_info_out = NULL;
13828 41583968 : if (def_stmt_out)
13829 9657298 : *def_stmt_out = NULL;
13830 41583968 : *dt = vect_unknown_def_type;
13831 :
13832 41583968 : if (dump_enabled_p ())
13833 : {
13834 762541 : dump_printf_loc (MSG_NOTE, vect_location,
13835 : "vect_is_simple_use: operand ");
13836 762541 : if (TREE_CODE (operand) == SSA_NAME
13837 762541 : && !SSA_NAME_IS_DEFAULT_DEF (operand))
13838 699703 : dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13839 : else
13840 62838 : dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13841 : }
13842 :
13843 41583968 : if (CONSTANT_CLASS_P (operand))
13844 2793200 : *dt = vect_constant_def;
13845 38790768 : else if (is_gimple_min_invariant (operand))
13846 330983 : *dt = vect_external_def;
13847 38459785 : else if (TREE_CODE (operand) != SSA_NAME)
13848 994 : *dt = vect_unknown_def_type;
13849 38458791 : else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13850 504436 : *dt = vect_external_def;
13851 : else
13852 : {
13853 37954355 : gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13854 37954355 : stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13855 37954355 : if (!stmt_vinfo)
13856 835017 : *dt = vect_external_def;
13857 : else
13858 : {
13859 37119338 : stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13860 37119338 : def_stmt = stmt_vinfo->stmt;
13861 37119338 : *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13862 37119338 : if (def_stmt_info_out)
13863 34906543 : *def_stmt_info_out = stmt_vinfo;
13864 : }
13865 37954355 : if (def_stmt_out)
13866 9451644 : *def_stmt_out = def_stmt;
13867 : }
13868 :
13869 41583968 : if (dump_enabled_p ())
13870 : {
13871 762541 : dump_printf (MSG_NOTE, ", type of def: ");
13872 762541 : switch (*dt)
13873 : {
13874 0 : case vect_uninitialized_def:
13875 0 : dump_printf (MSG_NOTE, "uninitialized\n");
13876 0 : break;
13877 52142 : case vect_constant_def:
13878 52142 : dump_printf (MSG_NOTE, "constant\n");
13879 52142 : break;
13880 26191 : case vect_external_def:
13881 26191 : dump_printf (MSG_NOTE, "external\n");
13882 26191 : break;
13883 545448 : case vect_internal_def:
13884 545448 : dump_printf (MSG_NOTE, "internal\n");
13885 545448 : break;
13886 107809 : case vect_induction_def:
13887 107809 : dump_printf (MSG_NOTE, "induction\n");
13888 107809 : break;
13889 27604 : case vect_reduction_def:
13890 27604 : dump_printf (MSG_NOTE, "reduction\n");
13891 27604 : break;
13892 482 : case vect_double_reduction_def:
13893 482 : dump_printf (MSG_NOTE, "double reduction\n");
13894 482 : break;
13895 2173 : case vect_nested_cycle:
13896 2173 : dump_printf (MSG_NOTE, "nested cycle\n");
13897 2173 : break;
13898 264 : case vect_first_order_recurrence:
13899 264 : dump_printf (MSG_NOTE, "first order recurrence\n");
13900 264 : break;
13901 0 : case vect_condition_def:
13902 0 : dump_printf (MSG_NOTE, "control flow\n");
13903 0 : break;
13904 428 : case vect_unknown_def_type:
13905 428 : dump_printf (MSG_NOTE, "unknown\n");
13906 428 : break;
13907 : }
13908 : }
13909 :
13910 41583968 : if (*dt == vect_unknown_def_type)
13911 : {
13912 57096 : if (dump_enabled_p ())
13913 428 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13914 : "Unsupported pattern.\n");
13915 57096 : return false;
13916 : }
13917 :
13918 : return true;
13919 : }
13920 :
13921 : /* Function vect_is_simple_use.
13922 :
13923 : Same as vect_is_simple_use but determines the operand by operand
13924 : position OPERAND from either STMT or SLP_NODE, filling in *OP
13925 : and *SLP_DEF (when SLP_NODE is not NULL). */
13926 :
13927 : bool
13928 3877276 : vect_is_simple_use (vec_info *vinfo, slp_tree slp_node,
13929 : unsigned operand, tree *op, slp_tree *slp_def,
13930 : enum vect_def_type *dt,
13931 : tree *vectype, stmt_vec_info *def_stmt_info_out)
13932 : {
13933 3877276 : slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13934 3877276 : *slp_def = child;
13935 3877276 : *vectype = SLP_TREE_VECTYPE (child);
13936 3877276 : if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13937 : {
13938 : /* ??? VEC_PERM nodes might be intermediate and their lane value
13939 : have no representative (nor do we build a VEC_PERM stmt for
13940 : the actual operation). Note for two-operator nodes we set
13941 : a representative but leave scalar stmts empty as we'd only
13942 : have one for a subset of lanes. Ideally no caller would
13943 : require *op for internal defs. */
13944 2156105 : if (SLP_TREE_REPRESENTATIVE (child))
13945 : {
13946 2155298 : *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13947 2155298 : return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13948 : }
13949 : else
13950 : {
13951 807 : gcc_assert (SLP_TREE_PERMUTE_P (child));
13952 807 : *op = error_mark_node;
13953 807 : *dt = vect_internal_def;
13954 807 : if (def_stmt_info_out)
13955 0 : *def_stmt_info_out = NULL;
13956 807 : return true;
13957 : }
13958 : }
13959 : else
13960 : {
13961 1721171 : if (def_stmt_info_out)
13962 57586 : *def_stmt_info_out = NULL;
13963 1721171 : *op = SLP_TREE_SCALAR_OPS (child)[0];
13964 1721171 : *dt = SLP_TREE_DEF_TYPE (child);
13965 1721171 : return true;
13966 : }
13967 : }
13968 :
13969 : /* If OP is not NULL and is external or constant update its vector
13970 : type with VECTYPE. Returns true if successful or false if not,
13971 : for example when conflicting vector types are present. */
13972 :
13973 : bool
13974 3548089 : vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13975 : {
13976 3548089 : if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13977 : return true;
13978 1157658 : if (SLP_TREE_VECTYPE (op))
13979 108584 : return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13980 : /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13981 : should be handled by patters. Allow vect_constant_def for now
13982 : as well as the trivial single-lane uniform vect_external_def case
13983 : both of which we code-generate reasonably. */
13984 1049074 : if (VECTOR_BOOLEAN_TYPE_P (vectype)
13985 1552 : && SLP_TREE_DEF_TYPE (op) == vect_external_def
13986 1050246 : && SLP_TREE_LANES (op) > 1)
13987 : return false;
13988 1048874 : SLP_TREE_VECTYPE (op) = vectype;
13989 1048874 : return true;
13990 : }
13991 :
13992 : /* Function supportable_widening_operation
13993 :
13994 : Check whether an operation represented by the code CODE is a
13995 : widening operation that is supported by the target platform in
13996 : vector form (i.e., when operating on arguments of type VECTYPE_IN
13997 : producing a result of type VECTYPE_OUT).
13998 :
13999 : Widening operations we currently support are NOP (CONVERT), FLOAT,
14000 : FIX_TRUNC and WIDEN_MULT. This function checks if these operations
14001 : are supported by the target platform either directly (via vector
14002 : tree-codes), or via target builtins.
14003 :
14004 : When EVENODD_OK then also lane-swizzling operations are considered.
14005 :
14006 : Output:
14007 : - CODE1 and CODE2 are codes of vector operations to be used when
14008 : vectorizing the operation, if available.
14009 : - MULTI_STEP_CVT determines the number of required intermediate steps in
14010 : case of multi-step conversion (like char->short->int - in that case
14011 : MULTI_STEP_CVT will be 1).
14012 : - INTERM_TYPES contains the intermediate type required to perform the
14013 : widening operation (short in the above example). */
14014 :
14015 : bool
14016 485057 : supportable_widening_operation (code_helper code,
14017 : tree vectype_out, tree vectype_in,
14018 : bool evenodd_ok,
14019 : code_helper *code1,
14020 : code_helper *code2,
14021 : int *multi_step_cvt,
14022 : vec<tree> *interm_types)
14023 : {
14024 485057 : machine_mode vec_mode;
14025 485057 : enum insn_code icode1, icode2;
14026 485057 : optab optab1 = unknown_optab, optab2 = unknown_optab;
14027 485057 : tree vectype = vectype_in;
14028 485057 : tree wide_vectype = vectype_out;
14029 485057 : tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
14030 485057 : int i;
14031 485057 : tree prev_type, intermediate_type;
14032 485057 : machine_mode intermediate_mode, prev_mode;
14033 485057 : optab optab3, optab4;
14034 :
14035 485057 : *multi_step_cvt = 0;
14036 :
14037 485057 : switch (code.safe_as_tree_code ())
14038 : {
14039 : case MAX_TREE_CODES:
14040 : /* Don't set c1 and c2 if code is not a tree_code. */
14041 : break;
14042 :
14043 186614 : case WIDEN_MULT_EXPR:
14044 : /* The result of a vectorized widening operation usually requires
14045 : two vectors (because the widened results do not fit into one vector).
14046 : The generated vector results would normally be expected to be
14047 : generated in the same order as in the original scalar computation,
14048 : i.e. if 8 results are generated in each vector iteration, they are
14049 : to be organized as follows:
14050 : vect1: [res1,res2,res3,res4],
14051 : vect2: [res5,res6,res7,res8].
14052 :
14053 : However, in the special case that the result of the widening
14054 : operation is used in a reduction computation only, the order doesn't
14055 : matter (because when vectorizing a reduction we change the order of
14056 : the computation). Some targets can take advantage of this and
14057 : generate more efficient code. For example, targets like Altivec,
14058 : that support widen_mult using a sequence of {mult_even,mult_odd}
14059 : generate the following vectors:
14060 : vect1: [res1,res3,res5,res7],
14061 : vect2: [res2,res4,res6,res8].
14062 :
14063 : When vectorizing outer-loops, we execute the inner-loop sequentially
14064 : (each vectorized inner-loop iteration contributes to VF outer-loop
14065 : iterations in parallel). We therefore don't allow to change the
14066 : order of the computation in the inner-loop during outer-loop
14067 : vectorization. */
14068 : /* TODO: Another case in which order doesn't *really* matter is when we
14069 : widen and then contract again, e.g. (short)((int)x * y >> 8).
14070 : Normally, pack_trunc performs an even/odd permute, whereas the
14071 : repack from an even/odd expansion would be an interleave, which
14072 : would be significantly simpler for e.g. AVX2. */
14073 : /* In any case, in order to avoid duplicating the code below, recurse
14074 : on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14075 : are properly set up for the caller. If we fail, we'll continue with
14076 : a VEC_WIDEN_MULT_LO/HI_EXPR check. */
14077 186614 : if (evenodd_ok
14078 186614 : && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
14079 : vectype_out, vectype_in,
14080 : evenodd_ok, code1,
14081 : code2, multi_step_cvt,
14082 : interm_types))
14083 97753 : return true;
14084 : c1 = VEC_WIDEN_MULT_LO_EXPR;
14085 : c2 = VEC_WIDEN_MULT_HI_EXPR;
14086 : break;
14087 :
14088 : case DOT_PROD_EXPR:
14089 387304 : c1 = DOT_PROD_EXPR;
14090 387304 : c2 = DOT_PROD_EXPR;
14091 : break;
14092 :
14093 0 : case SAD_EXPR:
14094 0 : c1 = SAD_EXPR;
14095 0 : c2 = SAD_EXPR;
14096 0 : break;
14097 :
14098 184662 : case VEC_WIDEN_MULT_EVEN_EXPR:
14099 : /* Support the recursion induced just above. */
14100 184662 : c1 = VEC_WIDEN_MULT_EVEN_EXPR;
14101 184662 : c2 = VEC_WIDEN_MULT_ODD_EXPR;
14102 184662 : break;
14103 :
14104 9426 : case WIDEN_LSHIFT_EXPR:
14105 9426 : c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
14106 9426 : c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
14107 9426 : break;
14108 :
14109 40903 : CASE_CONVERT:
14110 40903 : c1 = VEC_UNPACK_LO_EXPR;
14111 40903 : c2 = VEC_UNPACK_HI_EXPR;
14112 40903 : break;
14113 :
14114 9183 : case FLOAT_EXPR:
14115 9183 : c1 = VEC_UNPACK_FLOAT_LO_EXPR;
14116 9183 : c2 = VEC_UNPACK_FLOAT_HI_EXPR;
14117 9183 : break;
14118 :
14119 119 : case FIX_TRUNC_EXPR:
14120 119 : c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
14121 119 : c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
14122 119 : break;
14123 :
14124 0 : default:
14125 0 : gcc_unreachable ();
14126 : }
14127 :
14128 387304 : if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
14129 : std::swap (c1, c2);
14130 :
14131 387304 : if (code == FIX_TRUNC_EXPR)
14132 : {
14133 : /* The signedness is determined from output operand. */
14134 119 : optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14135 119 : optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14136 : }
14137 687719 : else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
14138 40903 : && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14139 8032 : && VECTOR_BOOLEAN_TYPE_P (vectype)
14140 8032 : && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14141 333499 : && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14142 : {
14143 : /* If the input and result modes are the same, a different optab
14144 : is needed where we pass in the number of units in vectype. */
14145 : optab1 = vec_unpacks_sbool_lo_optab;
14146 : optab2 = vec_unpacks_sbool_hi_optab;
14147 : }
14148 :
14149 387304 : vec_mode = TYPE_MODE (vectype);
14150 387304 : if (widening_fn_p (code))
14151 : {
14152 : /* If this is an internal fn then we must check whether the target
14153 : supports either a low-high split or an even-odd split. */
14154 54150 : internal_fn ifn = as_internal_fn ((combined_fn) code);
14155 :
14156 54150 : internal_fn lo, hi, even, odd;
14157 54150 : lookup_hilo_internal_fn (ifn, &lo, &hi);
14158 54150 : if (BYTES_BIG_ENDIAN)
14159 : std::swap (lo, hi);
14160 54150 : *code1 = as_combined_fn (lo);
14161 54150 : *code2 = as_combined_fn (hi);
14162 54150 : optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
14163 54150 : optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
14164 :
14165 : /* If we don't support low-high, then check for even-odd. */
14166 54150 : if (!optab1
14167 54150 : || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14168 0 : || !optab2
14169 54150 : || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14170 : {
14171 54150 : lookup_evenodd_internal_fn (ifn, &even, &odd);
14172 54150 : *code1 = as_combined_fn (even);
14173 54150 : *code2 = as_combined_fn (odd);
14174 54150 : optab1 = direct_internal_fn_optab (even, {vectype, vectype});
14175 54150 : optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
14176 : }
14177 : }
14178 333154 : else if (code.is_tree_code ())
14179 : {
14180 333154 : if (code == FIX_TRUNC_EXPR)
14181 : {
14182 : /* The signedness is determined from output operand. */
14183 119 : optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14184 119 : optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14185 : }
14186 333035 : else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
14187 40903 : && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14188 8032 : && VECTOR_BOOLEAN_TYPE_P (vectype)
14189 8032 : && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14190 333499 : && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14191 : {
14192 : /* If the input and result modes are the same, a different optab
14193 : is needed where we pass in the number of units in vectype. */
14194 : optab1 = vec_unpacks_sbool_lo_optab;
14195 : optab2 = vec_unpacks_sbool_hi_optab;
14196 : }
14197 : else
14198 : {
14199 332571 : optab1 = optab_for_tree_code (c1, vectype, optab_default);
14200 332571 : optab2 = optab_for_tree_code (c2, vectype, optab_default);
14201 : }
14202 333154 : *code1 = c1;
14203 333154 : *code2 = c2;
14204 : }
14205 :
14206 387304 : if (!optab1 || !optab2)
14207 : return false;
14208 :
14209 387304 : if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14210 387304 : || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14211 230372 : return false;
14212 :
14213 :
14214 156932 : if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14215 156932 : && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14216 : {
14217 145537 : if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14218 : return true;
14219 : /* For scalar masks we may have different boolean
14220 : vector types having the same QImode. Thus we
14221 : add additional check for elements number. */
14222 4245 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
14223 : TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14224 : return true;
14225 : }
14226 :
14227 : /* Check if it's a multi-step conversion that can be done using intermediate
14228 : types. */
14229 :
14230 11600 : prev_type = vectype;
14231 11600 : prev_mode = vec_mode;
14232 :
14233 242257 : if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
14234 : return false;
14235 :
14236 : /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14237 : intermediate steps in promotion sequence. We try
14238 : MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14239 : not. */
14240 11548 : interm_types->create (MAX_INTERM_CVT_STEPS);
14241 12938 : for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14242 : {
14243 12938 : intermediate_mode = insn_data[icode1].operand[0].mode;
14244 12938 : if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14245 4795 : intermediate_type
14246 4795 : = vect_halve_mask_nunits (prev_type, intermediate_mode);
14247 8143 : else if (VECTOR_MODE_P (intermediate_mode))
14248 : {
14249 8143 : tree intermediate_element_type
14250 8143 : = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
14251 8143 : TYPE_UNSIGNED (prev_type));
14252 8143 : intermediate_type
14253 8143 : = build_vector_type_for_mode (intermediate_element_type,
14254 : intermediate_mode);
14255 8143 : }
14256 : else
14257 0 : intermediate_type
14258 0 : = lang_hooks.types.type_for_mode (intermediate_mode,
14259 0 : TYPE_UNSIGNED (prev_type));
14260 :
14261 12938 : if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14262 4795 : && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14263 4795 : && intermediate_mode == TYPE_MODE (wide_vectype)
14264 13207 : && SCALAR_INT_MODE_P (intermediate_mode))
14265 : {
14266 : /* If the input and result modes are the same, a different optab
14267 : is needed where we pass in the number of units in vectype. */
14268 : optab3 = vec_unpacks_sbool_lo_optab;
14269 : optab4 = vec_unpacks_sbool_hi_optab;
14270 : }
14271 : else
14272 : {
14273 12669 : optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
14274 12669 : optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
14275 : }
14276 :
14277 12938 : if (!optab3 || !optab4
14278 12938 : || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
14279 12906 : || insn_data[icode1].operand[0].mode != intermediate_mode
14280 12906 : || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
14281 12906 : || insn_data[icode2].operand[0].mode != intermediate_mode
14282 12906 : || ((icode1 = optab_handler (optab3, intermediate_mode))
14283 : == CODE_FOR_nothing)
14284 25591 : || ((icode2 = optab_handler (optab4, intermediate_mode))
14285 : == CODE_FOR_nothing))
14286 : break;
14287 :
14288 12653 : interm_types->quick_push (intermediate_type);
14289 12653 : (*multi_step_cvt)++;
14290 :
14291 12653 : if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14292 12653 : && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14293 : {
14294 11327 : if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14295 : return true;
14296 3785 : if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
14297 : TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14298 : return true;
14299 : }
14300 :
14301 1390 : prev_type = intermediate_type;
14302 1390 : prev_mode = intermediate_mode;
14303 : }
14304 :
14305 285 : interm_types->release ();
14306 285 : return false;
14307 : }
14308 :
14309 :
14310 : /* Function supportable_narrowing_operation
14311 :
14312 : Check whether an operation represented by the code CODE is a
14313 : narrowing operation that is supported by the target platform in
14314 : vector form (i.e., when operating on arguments of type VECTYPE_IN
14315 : and producing a result of type VECTYPE_OUT).
14316 :
14317 : Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14318 : and FLOAT. This function checks if these operations are supported by
14319 : the target platform directly via vector tree-codes.
14320 :
14321 : Output:
14322 : - CODE1 is the code of a vector operation to be used when
14323 : vectorizing the operation, if available.
14324 : - MULTI_STEP_CVT determines the number of required intermediate steps in
14325 : case of multi-step conversion (like int->short->char - in that case
14326 : MULTI_STEP_CVT will be 1).
14327 : - INTERM_TYPES contains the intermediate type required to perform the
14328 : narrowing operation (short in the above example). */
14329 :
14330 : bool
14331 42010 : supportable_narrowing_operation (code_helper code,
14332 : tree vectype_out, tree vectype_in,
14333 : code_helper *code1, int *multi_step_cvt,
14334 : vec<tree> *interm_types)
14335 : {
14336 42010 : machine_mode vec_mode;
14337 42010 : enum insn_code icode1;
14338 42010 : optab optab1, interm_optab;
14339 42010 : tree vectype = vectype_in;
14340 42010 : tree narrow_vectype = vectype_out;
14341 42010 : enum tree_code c1;
14342 42010 : tree intermediate_type, prev_type;
14343 42010 : machine_mode intermediate_mode, prev_mode;
14344 42010 : int i;
14345 42010 : unsigned HOST_WIDE_INT n_elts;
14346 42010 : bool uns;
14347 :
14348 42010 : if (!code.is_tree_code ())
14349 : return false;
14350 :
14351 42010 : *multi_step_cvt = 0;
14352 42010 : switch ((tree_code) code)
14353 : {
14354 41166 : CASE_CONVERT:
14355 41166 : c1 = VEC_PACK_TRUNC_EXPR;
14356 41166 : if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14357 11662 : && VECTOR_BOOLEAN_TYPE_P (vectype)
14358 11662 : && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14359 5264 : && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
14360 46430 : && n_elts < BITS_PER_UNIT)
14361 : optab1 = vec_pack_sbool_trunc_optab;
14362 : else
14363 38681 : optab1 = optab_for_tree_code (c1, vectype, optab_default);
14364 : break;
14365 :
14366 561 : case FIX_TRUNC_EXPR:
14367 561 : c1 = VEC_PACK_FIX_TRUNC_EXPR;
14368 : /* The signedness is determined from output operand. */
14369 561 : optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14370 561 : break;
14371 :
14372 283 : case FLOAT_EXPR:
14373 283 : c1 = VEC_PACK_FLOAT_EXPR;
14374 283 : optab1 = optab_for_tree_code (c1, vectype, optab_default);
14375 283 : break;
14376 :
14377 0 : default:
14378 0 : gcc_unreachable ();
14379 : }
14380 :
14381 42010 : if (!optab1)
14382 : return false;
14383 :
14384 42010 : vec_mode = TYPE_MODE (vectype);
14385 42010 : if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14386 : return false;
14387 :
14388 37740 : *code1 = c1;
14389 :
14390 37740 : if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14391 : {
14392 23543 : if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14393 : return true;
14394 : /* For scalar masks we may have different boolean
14395 : vector types having the same QImode. Thus we
14396 : add additional check for elements number. */
14397 5821 : if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14398 : TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14399 : return true;
14400 : }
14401 :
14402 14348 : if (code == FLOAT_EXPR)
14403 : return false;
14404 :
14405 : /* Check if it's a multi-step conversion that can be done using intermediate
14406 : types. */
14407 14348 : prev_mode = vec_mode;
14408 14348 : prev_type = vectype;
14409 14348 : if (code == FIX_TRUNC_EXPR)
14410 94 : uns = TYPE_UNSIGNED (vectype_out);
14411 : else
14412 14254 : uns = TYPE_UNSIGNED (vectype);
14413 :
14414 : /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14415 : conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14416 : costly than signed. */
14417 14348 : if (code == FIX_TRUNC_EXPR && uns)
14418 : {
14419 28 : enum insn_code icode2;
14420 :
14421 28 : intermediate_type
14422 28 : = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14423 28 : interm_optab
14424 28 : = optab_for_tree_code (c1, intermediate_type, optab_default);
14425 28 : if (interm_optab != unknown_optab
14426 28 : && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14427 28 : && insn_data[icode1].operand[0].mode
14428 28 : == insn_data[icode2].operand[0].mode)
14429 : {
14430 : uns = false;
14431 : optab1 = interm_optab;
14432 : icode1 = icode2;
14433 : }
14434 : }
14435 :
14436 : /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14437 : intermediate steps in promotion sequence. We try
14438 : MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14439 14348 : interm_types->create (MAX_INTERM_CVT_STEPS);
14440 30844 : for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14441 : {
14442 16496 : intermediate_mode = insn_data[icode1].operand[0].mode;
14443 16496 : if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14444 7186 : intermediate_type
14445 7186 : = vect_double_mask_nunits (prev_type, intermediate_mode);
14446 : else
14447 9310 : intermediate_type
14448 9310 : = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14449 16496 : if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14450 7186 : && VECTOR_BOOLEAN_TYPE_P (prev_type)
14451 7186 : && SCALAR_INT_MODE_P (prev_mode)
14452 3134 : && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14453 19630 : && n_elts < BITS_PER_UNIT)
14454 : interm_optab = vec_pack_sbool_trunc_optab;
14455 : else
14456 16142 : interm_optab
14457 16142 : = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14458 : optab_default);
14459 354 : if (!interm_optab
14460 16496 : || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14461 16496 : || insn_data[icode1].operand[0].mode != intermediate_mode
14462 32638 : || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14463 : == CODE_FOR_nothing))
14464 : break;
14465 :
14466 15581 : interm_types->quick_push (intermediate_type);
14467 15581 : (*multi_step_cvt)++;
14468 :
14469 15581 : if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14470 : {
14471 13433 : if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14472 : return true;
14473 5008 : if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14474 : TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14475 : return true;
14476 : }
14477 :
14478 2148 : prev_mode = intermediate_mode;
14479 2148 : prev_type = intermediate_type;
14480 2148 : optab1 = interm_optab;
14481 : }
14482 :
14483 915 : interm_types->release ();
14484 915 : return false;
14485 : }
14486 :
14487 : /* Function supportable_indirect_convert_operation
14488 :
14489 : Check whether an operation represented by the code CODE is single or multi
14490 : operations that are supported by the target platform in
14491 : vector form (i.e., when operating on arguments of type VECTYPE_IN
14492 : producing a result of type VECTYPE_OUT).
14493 :
14494 : Convert operations we currently support directly are FIX_TRUNC and FLOAT.
14495 : This function checks if these operations are supported
14496 : by the target platform directly (via vector tree-codes).
14497 :
14498 : Output:
14499 : - converts contains some pairs to perform the convert operation,
14500 : the pair's first is the intermediate type, and its second is the code of
14501 : a vector operation to be used when converting the operation from the
14502 : previous type to the intermediate type. */
14503 : bool
14504 85678 : supportable_indirect_convert_operation (code_helper code,
14505 : tree vectype_out,
14506 : tree vectype_in,
14507 : vec<std::pair<tree, tree_code> > &converts,
14508 : tree op0, slp_tree slp_op0)
14509 : {
14510 85678 : bool found_mode = false;
14511 85678 : scalar_mode lhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_out));
14512 85678 : scalar_mode rhs_mode = GET_MODE_INNER (TYPE_MODE (vectype_in));
14513 85678 : tree_code tc1, tc2, code1, code2;
14514 :
14515 85678 : tree cvt_type = NULL_TREE;
14516 85678 : poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype_in);
14517 :
14518 85678 : if (supportable_convert_operation ((tree_code) code,
14519 : vectype_out,
14520 : vectype_in,
14521 : &tc1))
14522 : {
14523 19252 : converts.safe_push (std::make_pair (vectype_out, tc1));
14524 19252 : return true;
14525 : }
14526 :
14527 : /* For conversions between float and integer types try whether
14528 : we can use intermediate signed integer types to support the
14529 : conversion. */
14530 132852 : if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
14531 66426 : && (code == FLOAT_EXPR
14532 3193 : || (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
14533 : {
14534 476 : bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
14535 238 : bool float_expr_p = code == FLOAT_EXPR;
14536 238 : unsigned short target_size;
14537 238 : scalar_mode intermediate_mode;
14538 238 : if (demotion)
14539 : {
14540 84 : intermediate_mode = lhs_mode;
14541 84 : target_size = GET_MODE_SIZE (rhs_mode);
14542 : }
14543 : else
14544 : {
14545 154 : target_size = GET_MODE_SIZE (lhs_mode);
14546 154 : if (!int_mode_for_size
14547 154 : (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
14548 132 : return false;
14549 : }
14550 238 : code1 = float_expr_p ? (tree_code) code : NOP_EXPR;
14551 : code2 = float_expr_p ? NOP_EXPR : (tree_code) code;
14552 238 : opt_scalar_mode mode_iter;
14553 417 : FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
14554 : {
14555 417 : intermediate_mode = mode_iter.require ();
14556 :
14557 834 : if (GET_MODE_SIZE (intermediate_mode) > target_size)
14558 : break;
14559 :
14560 349 : scalar_mode cvt_mode;
14561 349 : if (!int_mode_for_size
14562 349 : (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
14563 : break;
14564 :
14565 319 : cvt_type = build_nonstandard_integer_type
14566 319 : (GET_MODE_BITSIZE (cvt_mode), 0);
14567 :
14568 : /* Check if the intermediate type can hold OP0's range.
14569 : When converting from float to integer this is not necessary
14570 : because values that do not fit the (smaller) target type are
14571 : unspecified anyway. */
14572 319 : if (demotion && float_expr_p)
14573 : {
14574 8 : wide_int op_min_value, op_max_value;
14575 : /* For vector form, it looks like op0 doesn't have RANGE_INFO.
14576 : In the future, if it is supported, changes may need to be made
14577 : to this part, such as checking the RANGE of each element
14578 : in the vector. */
14579 8 : if (slp_op0)
14580 : {
14581 4 : tree def;
14582 : /* ??? Merge ranges in case of more than one lane. */
14583 4 : if (SLP_TREE_LANES (slp_op0) != 1
14584 0 : || !(def = vect_get_slp_scalar_def (slp_op0, 0))
14585 4 : || !vect_get_range_info (def,
14586 : &op_min_value, &op_max_value))
14587 : break;
14588 : }
14589 4 : else if (!op0
14590 0 : || TREE_CODE (op0) != SSA_NAME
14591 0 : || !SSA_NAME_RANGE_INFO (op0)
14592 4 : || !vect_get_range_info (op0, &op_min_value,
14593 : &op_max_value))
14594 : break;
14595 :
14596 0 : if (cvt_type == NULL_TREE
14597 0 : || (wi::min_precision (op_max_value, SIGNED)
14598 0 : > TYPE_PRECISION (cvt_type))
14599 0 : || (wi::min_precision (op_min_value, SIGNED)
14600 0 : > TYPE_PRECISION (cvt_type)))
14601 0 : continue;
14602 8 : }
14603 :
14604 311 : cvt_type = get_related_vectype_for_scalar_type (TYPE_MODE (vectype_in),
14605 : cvt_type,
14606 : nelts);
14607 : /* This should only happened for SLP as long as loop vectorizer
14608 : only supports same-sized vector. */
14609 490 : if (cvt_type == NULL_TREE
14610 443 : || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nelts)
14611 311 : || !supportable_convert_operation ((tree_code) code1,
14612 : vectype_out,
14613 : cvt_type, &tc1)
14614 521 : || !supportable_convert_operation ((tree_code) code2,
14615 : cvt_type,
14616 : vectype_in, &tc2))
14617 179 : continue;
14618 :
14619 : found_mode = true;
14620 : break;
14621 : }
14622 :
14623 238 : if (found_mode)
14624 : {
14625 132 : converts.safe_push (std::make_pair (cvt_type, tc2));
14626 132 : if (TYPE_MODE (cvt_type) != TYPE_MODE (vectype_out))
14627 132 : converts.safe_push (std::make_pair (vectype_out, tc1));
14628 132 : return true;
14629 : }
14630 : }
14631 : return false;
14632 : }
14633 :
14634 : /* Generate and return a vector mask of MASK_TYPE such that
14635 : mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14636 : Add the statements to SEQ. */
14637 :
14638 : tree
14639 0 : vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14640 : tree end_index, const char *name)
14641 : {
14642 0 : tree cmp_type = TREE_TYPE (start_index);
14643 0 : gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14644 : cmp_type, mask_type,
14645 : OPTIMIZE_FOR_SPEED));
14646 0 : gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14647 : start_index, end_index,
14648 : build_zero_cst (mask_type));
14649 0 : tree tmp;
14650 0 : if (name)
14651 0 : tmp = make_temp_ssa_name (mask_type, NULL, name);
14652 : else
14653 0 : tmp = make_ssa_name (mask_type);
14654 0 : gimple_call_set_lhs (call, tmp);
14655 0 : gimple_seq_add_stmt (seq, call);
14656 0 : return tmp;
14657 : }
14658 :
14659 : /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14660 : J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14661 :
14662 : tree
14663 0 : vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14664 : tree end_index)
14665 : {
14666 0 : tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14667 0 : return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14668 : }
14669 :
14670 : /* Try to compute the vector types required to vectorize STMT_INFO,
14671 : returning true on success and false if vectorization isn't possible.
14672 : If GROUP_SIZE is nonzero and we're performing BB vectorization,
14673 : take sure that the number of elements in the vectors is no bigger
14674 : than GROUP_SIZE.
14675 :
14676 : On success:
14677 :
14678 : - Set *STMT_VECTYPE_OUT to:
14679 : - NULL_TREE if the statement doesn't need to be vectorized;
14680 : - the equivalent of STMT_VINFO_VECTYPE otherwise.
14681 :
14682 : - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14683 : number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14684 : statement does not help to determine the overall number of units. */
14685 :
14686 : opt_result
14687 5708792 : vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14688 : tree *stmt_vectype_out,
14689 : tree *nunits_vectype_out,
14690 : unsigned int group_size)
14691 : {
14692 5708792 : gimple *stmt = stmt_info->stmt;
14693 :
14694 : /* For BB vectorization, we should always have a group size once we've
14695 : constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14696 : are tentative requests during things like early data reference
14697 : analysis and pattern recognition. */
14698 5708792 : if (is_a <bb_vec_info> (vinfo))
14699 4452850 : gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14700 : else
14701 : group_size = 0;
14702 :
14703 5708792 : *stmt_vectype_out = NULL_TREE;
14704 5708792 : *nunits_vectype_out = NULL_TREE;
14705 :
14706 5708792 : if (gimple_get_lhs (stmt) == NULL_TREE
14707 : /* Allow vector conditionals through here. */
14708 2737 : && !is_a <gcond *> (stmt)
14709 : /* MASK_STORE and friends have no lhs, but are ok. */
14710 5714246 : && !(is_gimple_call (stmt)
14711 2737 : && gimple_call_internal_p (stmt)
14712 2717 : && internal_store_fn_p (gimple_call_internal_fn (stmt))))
14713 : {
14714 20 : if (is_a <gcall *> (stmt))
14715 : {
14716 : /* Ignore calls with no lhs. These must be calls to
14717 : #pragma omp simd functions, and what vectorization factor
14718 : it really needs can't be determined until
14719 : vectorizable_simd_clone_call. */
14720 20 : if (dump_enabled_p ())
14721 18 : dump_printf_loc (MSG_NOTE, vect_location,
14722 : "defer to SIMD clone analysis.\n");
14723 20 : return opt_result::success ();
14724 : }
14725 :
14726 0 : return opt_result::failure_at (stmt,
14727 : "not vectorized: irregular stmt: %G", stmt);
14728 : }
14729 :
14730 5708772 : tree vectype;
14731 5708772 : tree scalar_type = NULL_TREE;
14732 5708772 : if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14733 : {
14734 1568565 : vectype = STMT_VINFO_VECTYPE (stmt_info);
14735 1568565 : if (dump_enabled_p ())
14736 79215 : dump_printf_loc (MSG_NOTE, vect_location,
14737 : "precomputed vectype: %T\n", vectype);
14738 : }
14739 4140207 : else if (vect_use_mask_type_p (stmt_info))
14740 : {
14741 194374 : unsigned int precision = stmt_info->mask_precision;
14742 194374 : scalar_type = build_nonstandard_integer_type (precision, 1);
14743 194374 : vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14744 194374 : if (!vectype)
14745 0 : return opt_result::failure_at (stmt, "not vectorized: unsupported"
14746 : " data-type %T\n", scalar_type);
14747 194374 : if (dump_enabled_p ())
14748 4701 : dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14749 : }
14750 : else
14751 : {
14752 : /* If we got here with a gcond it means that the target had no available vector
14753 : mode for the scalar type. We can't vectorize so abort. */
14754 3945833 : if (is_a <gcond *> (stmt))
14755 0 : return opt_result::failure_at (stmt,
14756 : "not vectorized:"
14757 : " unsupported data-type for gcond %T\n",
14758 : scalar_type);
14759 :
14760 3945833 : if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14761 1448342 : scalar_type = TREE_TYPE (DR_REF (dr));
14762 : else
14763 2497491 : scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14764 :
14765 3945833 : if (dump_enabled_p ())
14766 : {
14767 61873 : if (group_size)
14768 7540 : dump_printf_loc (MSG_NOTE, vect_location,
14769 : "get vectype for scalar type (group size %d):"
14770 : " %T\n", group_size, scalar_type);
14771 : else
14772 54333 : dump_printf_loc (MSG_NOTE, vect_location,
14773 : "get vectype for scalar type: %T\n", scalar_type);
14774 : }
14775 3945833 : vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14776 3945833 : if (!vectype)
14777 199553 : return opt_result::failure_at (stmt,
14778 : "not vectorized:"
14779 : " unsupported data-type %T\n",
14780 : scalar_type);
14781 :
14782 3746280 : if (dump_enabled_p ())
14783 61674 : dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14784 : }
14785 :
14786 4019869 : if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14787 0 : return opt_result::failure_at (stmt,
14788 : "not vectorized: vector stmt in loop:%G",
14789 : stmt);
14790 :
14791 5509219 : *stmt_vectype_out = vectype;
14792 :
14793 : /* Don't try to compute scalar types if the stmt produces a boolean
14794 : vector; use the existing vector type instead. */
14795 5509219 : tree nunits_vectype = vectype;
14796 5509219 : if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14797 : {
14798 : /* The number of units is set according to the smallest scalar
14799 : type (or the largest vector size, but we only support one
14800 : vector size per vectorization). */
14801 4995888 : scalar_type = vect_get_smallest_scalar_type (stmt_info,
14802 4995888 : TREE_TYPE (vectype));
14803 4995888 : if (!types_compatible_p (scalar_type, TREE_TYPE (vectype)))
14804 : {
14805 984663 : if (dump_enabled_p ())
14806 9766 : dump_printf_loc (MSG_NOTE, vect_location,
14807 : "get vectype for smallest scalar type: %T\n",
14808 : scalar_type);
14809 984663 : nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14810 : group_size);
14811 984663 : if (!nunits_vectype)
14812 10 : return opt_result::failure_at
14813 10 : (stmt, "not vectorized: unsupported data-type %T\n",
14814 : scalar_type);
14815 984653 : if (dump_enabled_p ())
14816 9766 : dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14817 : nunits_vectype);
14818 : }
14819 : }
14820 :
14821 5509209 : if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14822 5509209 : TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14823 0 : return opt_result::failure_at (stmt,
14824 : "Not vectorized: Incompatible number "
14825 : "of vector subparts between %T and %T\n",
14826 : nunits_vectype, *stmt_vectype_out);
14827 :
14828 5509209 : if (dump_enabled_p ())
14829 : {
14830 145590 : dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14831 145590 : dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14832 145590 : dump_printf (MSG_NOTE, "\n");
14833 : }
14834 :
14835 5509209 : *nunits_vectype_out = nunits_vectype;
14836 5509209 : return opt_result::success ();
14837 : }
14838 :
14839 : /* Generate and return statement sequence that sets vector length LEN that is:
14840 :
14841 : min_of_start_and_end = min (START_INDEX, END_INDEX);
14842 : left_len = END_INDEX - min_of_start_and_end;
14843 : rhs = min (left_len, LEN_LIMIT);
14844 : LEN = rhs;
14845 :
14846 : Note: the cost of the code generated by this function is modeled
14847 : by vect_estimate_min_profitable_iters, so changes here may need
14848 : corresponding changes there. */
14849 :
14850 : gimple_seq
14851 0 : vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14852 : {
14853 0 : gimple_seq stmts = NULL;
14854 0 : tree len_type = TREE_TYPE (len);
14855 0 : gcc_assert (TREE_TYPE (start_index) == len_type);
14856 :
14857 0 : tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14858 0 : tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14859 0 : tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14860 0 : gimple* stmt = gimple_build_assign (len, rhs);
14861 0 : gimple_seq_add_stmt (&stmts, stmt);
14862 :
14863 0 : return stmts;
14864 : }
14865 :
|