Line data Source code
1 : /* Data References Analysis and Manipulation Utilities for Vectorization.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 : Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 : and Ira Rosen <irar@il.ibm.com>
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify it under
9 : the terms of the GNU General Public License as published by the Free
10 : Software Foundation; either version 3, or (at your option) any later
11 : version.
12 :
13 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 : for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : #define INCLUDE_ALGORITHM
23 : #include "config.h"
24 : #include "system.h"
25 : #include "coretypes.h"
26 : #include "backend.h"
27 : #include "target.h"
28 : #include "rtl.h"
29 : #include "tree.h"
30 : #include "gimple.h"
31 : #include "predict.h"
32 : #include "memmodel.h"
33 : #include "tm_p.h"
34 : #include "ssa.h"
35 : #include "optabs-tree.h"
36 : #include "cgraph.h"
37 : #include "dumpfile.h"
38 : #include "pretty-print.h"
39 : #include "alias.h"
40 : #include "fold-const.h"
41 : #include "stor-layout.h"
42 : #include "tree-eh.h"
43 : #include "gimplify.h"
44 : #include "gimple-iterator.h"
45 : #include "gimplify-me.h"
46 : #include "tree-ssa-loop-ivopts.h"
47 : #include "tree-ssa-loop-manip.h"
48 : #include "tree-ssa-loop.h"
49 : #include "cfgloop.h"
50 : #include "tree-scalar-evolution.h"
51 : #include "tree-vectorizer.h"
52 : #include "expr.h"
53 : #include "builtins.h"
54 : #include "tree-cfg.h"
55 : #include "tree-hash-traits.h"
56 : #include "vec-perm-indices.h"
57 : #include "internal-fn.h"
58 : #include "gimple-fold.h"
59 : #include "optabs-query.h"
60 :
61 : /* Return true if load- or store-lanes optab OPTAB is implemented for
62 : COUNT vectors of type VECTYPE. NAME is the name of OPTAB.
63 :
64 : If it is implemented and ELSVALS is nonzero store the possible else
65 : values in the vector it points to. */
66 :
67 : static bool
68 368974 : vect_lanes_optab_supported_p (const char *name, convert_optab optab,
69 : tree vectype, unsigned HOST_WIDE_INT count,
70 : vec<int> *elsvals = nullptr)
71 : {
72 368974 : machine_mode mode, array_mode;
73 368974 : bool limit_p;
74 :
75 368974 : mode = TYPE_MODE (vectype);
76 368974 : if (!targetm.array_mode (mode, count).exists (&array_mode))
77 : {
78 737948 : poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
79 368974 : limit_p = !targetm.array_mode_supported_p (mode, count);
80 368974 : if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
81 : {
82 316940 : if (dump_enabled_p ())
83 12884 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
84 : "no array mode for %s[%wu]\n",
85 12884 : GET_MODE_NAME (mode), count);
86 316940 : return false;
87 : }
88 : }
89 :
90 52034 : enum insn_code icode;
91 52034 : if ((icode = convert_optab_handler (optab, array_mode, mode))
92 : == CODE_FOR_nothing)
93 : {
94 52034 : if (dump_enabled_p ())
95 4152 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
96 : "cannot use %s<%s><%s>\n", name,
97 4152 : GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
98 52034 : return false;
99 : }
100 :
101 0 : if (dump_enabled_p ())
102 0 : dump_printf_loc (MSG_NOTE, vect_location,
103 0 : "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
104 0 : GET_MODE_NAME (mode));
105 :
106 0 : if (elsvals)
107 0 : get_supported_else_vals (icode,
108 0 : internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES),
109 : *elsvals);
110 :
111 : return true;
112 : }
113 :
114 : /* Helper function to identify a simd clone call. If this is a call to a
115 : function with simd clones then return the corresponding cgraph_node,
116 : otherwise return NULL. */
117 :
118 : static cgraph_node*
119 621335 : simd_clone_call_p (gimple *stmt)
120 : {
121 699303 : gcall *call = dyn_cast <gcall *> (stmt);
122 79664 : if (!call)
123 : return NULL;
124 :
125 79664 : tree fndecl = NULL_TREE;
126 79664 : if (gimple_call_internal_p (call, IFN_MASK_CALL))
127 226 : fndecl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
128 : else
129 79438 : fndecl = gimple_call_fndecl (stmt);
130 :
131 79664 : if (fndecl == NULL_TREE)
132 : return NULL;
133 :
134 37058 : cgraph_node *node = cgraph_node::get (fndecl);
135 37058 : if (node && node->simd_clones != NULL)
136 : return node;
137 :
138 : return NULL;
139 : }
140 :
141 :
142 :
143 : /* Return the smallest scalar part of STMT_INFO.
144 : This is used to determine the vectype of the stmt. We generally set the
145 : vectype according to the type of the result (lhs). For stmts whose
146 : result-type is different than the type of the arguments (e.g., demotion,
147 : promotion), vectype will be reset appropriately (later). Note that we have
148 : to visit the smallest datatype in this function, because that determines the
149 : VF. If the smallest datatype in the loop is present only as the rhs of a
150 : promotion operation - we'd miss it.
151 : Such a case, where a variable of this datatype does not appear in the lhs
152 : anywhere in the loop, can only occur if it's an invariant: e.g.:
153 : 'int_x = (int) short_inv', which we'd expect to have been optimized away by
154 : invariant motion. However, we cannot rely on invariant motion to always
155 : take invariants out of the loop, and so in the case of promotion we also
156 : have to check the rhs.
157 : LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
158 : types. */
159 :
160 : tree
161 5040953 : vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type)
162 : {
163 5040953 : HOST_WIDE_INT lhs, rhs;
164 :
165 : /* During the analysis phase, this function is called on arbitrary
166 : statements that might not have scalar results. */
167 5040953 : if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
168 : return scalar_type;
169 :
170 5040953 : lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
171 :
172 5040953 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
173 5040953 : if (assign)
174 : {
175 4419618 : scalar_type = TREE_TYPE (gimple_assign_lhs (assign));
176 4419618 : if (gimple_assign_cast_p (assign)
177 4016843 : || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
178 4016209 : || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
179 4016209 : || gimple_assign_rhs_code (assign) == SAD_EXPR
180 4016100 : || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
181 4012353 : || gimple_assign_rhs_code (assign) == WIDEN_MULT_PLUS_EXPR
182 4012353 : || gimple_assign_rhs_code (assign) == WIDEN_MULT_MINUS_EXPR
183 4012353 : || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
184 8431971 : || gimple_assign_rhs_code (assign) == FLOAT_EXPR)
185 : {
186 421726 : tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
187 :
188 421726 : rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
189 421726 : if (rhs < lhs)
190 5040953 : scalar_type = rhs_type;
191 : }
192 : }
193 621335 : else if (cgraph_node *node = simd_clone_call_p (stmt_info->stmt))
194 : {
195 1696 : auto clone = node->simd_clones->simdclone;
196 5156 : for (unsigned int i = 0; i < clone->nargs; ++i)
197 : {
198 3460 : if (clone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
199 : {
200 1995 : tree arg_scalar_type = TREE_TYPE (clone->args[i].vector_type);
201 1995 : rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (arg_scalar_type));
202 1995 : if (rhs < lhs)
203 : {
204 3460 : scalar_type = arg_scalar_type;
205 3460 : lhs = rhs;
206 : }
207 : }
208 : }
209 : }
210 619639 : else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
211 : {
212 77968 : unsigned int i = 0;
213 77968 : if (gimple_call_internal_p (call))
214 : {
215 40257 : internal_fn ifn = gimple_call_internal_fn (call);
216 40257 : if (internal_load_fn_p (ifn))
217 : /* For loads the LHS type does the trick. */
218 : i = ~0U;
219 35428 : else if (internal_store_fn_p (ifn))
220 : {
221 : /* For stores use the tyep of the stored value. */
222 2717 : i = internal_fn_stored_value_index (ifn);
223 2717 : scalar_type = TREE_TYPE (gimple_call_arg (call, i));
224 2717 : i = ~0U;
225 : }
226 32711 : else if (internal_fn_mask_index (ifn) == 0)
227 11089 : i = 1;
228 : }
229 77968 : if (i < gimple_call_num_args (call))
230 : {
231 65842 : tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
232 65842 : if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
233 : {
234 65842 : rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
235 65842 : if (rhs < lhs)
236 5040953 : scalar_type = rhs_type;
237 : }
238 : }
239 : }
240 :
241 : return scalar_type;
242 : }
243 :
244 :
245 : /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
246 : tested at run-time. Return TRUE if DDR was successfully inserted.
247 : Return false if versioning is not supported. */
248 :
249 : static opt_result
250 169214 : vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
251 : {
252 169214 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
253 :
254 169214 : if ((unsigned) param_vect_max_version_for_alias_checks == 0)
255 54 : return opt_result::failure_at (vect_location,
256 : "will not create alias checks, as"
257 : " --param vect-max-version-for-alias-checks"
258 : " == 0\n");
259 :
260 169160 : opt_result res
261 169160 : = runtime_alias_check_p (ddr, loop,
262 169160 : optimize_loop_nest_for_speed_p (loop));
263 169160 : if (!res)
264 143 : return res;
265 :
266 169017 : LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
267 169017 : return opt_result::success ();
268 : }
269 :
270 : /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero. */
271 :
272 : static void
273 1492 : vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
274 : {
275 1492 : const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
276 2259 : for (unsigned int i = 0; i < checks.length(); ++i)
277 767 : if (checks[i] == value)
278 : return;
279 :
280 1492 : if (dump_enabled_p ())
281 432 : dump_printf_loc (MSG_NOTE, vect_location,
282 : "need run-time check that %T is nonzero\n",
283 : value);
284 1492 : LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
285 : }
286 :
287 : /* Return true if we know that the order of vectorized DR_INFO_A and
288 : vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
289 : DR_INFO_B. At least one of the accesses is a write. */
290 :
291 : static bool
292 144582 : vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
293 : {
294 144582 : stmt_vec_info stmtinfo_a = dr_info_a->stmt;
295 144582 : stmt_vec_info stmtinfo_b = dr_info_b->stmt;
296 :
297 : /* Single statements are always kept in their original order. */
298 144582 : if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
299 239778 : && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
300 : return true;
301 :
302 : /* If there is a loop invariant read involved we might vectorize it in
303 : the prologue, breaking scalar oder with respect to the in-loop store. */
304 26126 : if ((DR_IS_READ (dr_info_a->dr) && integer_zerop (DR_STEP (dr_info_a->dr)))
305 80746 : || (DR_IS_READ (dr_info_b->dr) && integer_zerop (DR_STEP (dr_info_b->dr))))
306 1718 : return false;
307 :
308 : /* STMT_A and STMT_B belong to overlapping groups. All loads are
309 : emitted at the position of the first scalar load.
310 : Stores in a group are emitted at the position of the last scalar store.
311 : Compute that position and check whether the resulting order matches
312 : the current one. */
313 54129 : stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
314 54129 : if (il_a)
315 : {
316 48895 : if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
317 213536 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
318 188179 : s = DR_GROUP_NEXT_ELEMENT (s))
319 188179 : il_a = get_later_stmt (il_a, s);
320 : else /* DR_IS_READ */
321 93482 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
322 69944 : s = DR_GROUP_NEXT_ELEMENT (s))
323 69944 : if (get_later_stmt (il_a, s) == il_a)
324 2144 : il_a = s;
325 : }
326 : else
327 : il_a = stmtinfo_a;
328 54129 : stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
329 54129 : if (il_b)
330 : {
331 47271 : if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
332 273413 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
333 235435 : s = DR_GROUP_NEXT_ELEMENT (s))
334 235435 : il_b = get_later_stmt (il_b, s);
335 : else /* DR_IS_READ */
336 42771 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
337 33478 : s = DR_GROUP_NEXT_ELEMENT (s))
338 33478 : if (get_later_stmt (il_b, s) == il_b)
339 327 : il_b = s;
340 : }
341 : else
342 : il_b = stmtinfo_b;
343 54129 : bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
344 54129 : return (get_later_stmt (il_a, il_b) == il_a) == a_after_b;
345 : }
346 :
347 : /* A subroutine of vect_analyze_data_ref_dependence. Handle
348 : DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
349 : distances. These distances are conservatively correct but they don't
350 : reflect a guaranteed dependence.
351 :
352 : Return true if this function does all the work necessary to avoid
353 : an alias or false if the caller should use the dependence distances
354 : to limit the vectorization factor in the usual way. LOOP_DEPTH is
355 : the depth of the loop described by LOOP_VINFO and the other arguments
356 : are as for vect_analyze_data_ref_dependence. */
357 :
358 : static bool
359 8308 : vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
360 : loop_vec_info loop_vinfo,
361 : int loop_depth, unsigned int *max_vf)
362 : {
363 8308 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
364 33250 : for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr))
365 : {
366 16351 : int dist = dist_v[loop_depth];
367 16351 : if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
368 : {
369 : /* If the user asserted safelen >= DIST consecutive iterations
370 : can be executed concurrently, assume independence.
371 :
372 : ??? An alternative would be to add the alias check even
373 : in this case, and vectorize the fallback loop with the
374 : maximum VF set to safelen. However, if the user has
375 : explicitly given a length, it's less likely that that
376 : would be a win. */
377 8057 : if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
378 : {
379 32 : if ((unsigned int) loop->safelen < *max_vf)
380 2 : *max_vf = loop->safelen;
381 32 : LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
382 32 : continue;
383 : }
384 :
385 : /* For dependence distances of 2 or more, we have the option
386 : of limiting VF or checking for an alias at runtime.
387 : Prefer to check at runtime if we can, to avoid limiting
388 : the VF unnecessarily when the bases are in fact independent.
389 :
390 : Note that the alias checks will be removed if the VF ends up
391 : being small enough. */
392 8025 : dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
393 8025 : dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
394 8025 : return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
395 8025 : && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
396 16058 : && vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
397 : }
398 : }
399 : return true;
400 : }
401 :
402 :
403 : /* Function vect_analyze_data_ref_dependence.
404 :
405 : FIXME: I needed to change the sense of the returned flag.
406 :
407 : Return FALSE if there (might) exist a dependence between a memory-reference
408 : DRA and a memory-reference DRB. When versioning for alias may check a
409 : dependence at run-time, return TRUE. Adjust *MAX_VF according to
410 : the data dependence. */
411 :
412 : static opt_result
413 1480417 : vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
414 : loop_vec_info loop_vinfo,
415 : unsigned int *max_vf)
416 : {
417 1480417 : unsigned int i;
418 1480417 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
419 1480417 : struct data_reference *dra = DDR_A (ddr);
420 1480417 : struct data_reference *drb = DDR_B (ddr);
421 1480417 : dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
422 1480417 : dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
423 1480417 : stmt_vec_info stmtinfo_a = dr_info_a->stmt;
424 1480417 : stmt_vec_info stmtinfo_b = dr_info_b->stmt;
425 1480417 : lambda_vector dist_v;
426 1480417 : unsigned int loop_depth;
427 :
428 : /* If user asserted safelen consecutive iterations can be
429 : executed concurrently, assume independence. */
430 1660307 : auto apply_safelen = [&]()
431 : {
432 179890 : if (loop->safelen >= 2)
433 : {
434 7462 : if ((unsigned int) loop->safelen < *max_vf)
435 1896 : *max_vf = loop->safelen;
436 7462 : LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
437 7462 : return true;
438 : }
439 : return false;
440 1480417 : };
441 :
442 : /* In loop analysis all data references should be vectorizable. */
443 1480417 : if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
444 1480417 : || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
445 0 : gcc_unreachable ();
446 :
447 : /* Independent data accesses. */
448 1480417 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
449 1214020 : return opt_result::success ();
450 :
451 266397 : if (dra == drb
452 266397 : || (DR_IS_READ (dra) && DR_IS_READ (drb)))
453 0 : return opt_result::success ();
454 :
455 : /* We do not have to consider dependences between accesses that belong
456 : to the same group, unless the stride could be smaller than the
457 : group size. */
458 266397 : if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
459 115154 : && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
460 115154 : == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
461 284950 : && !STMT_VINFO_STRIDED_P (stmtinfo_a))
462 2275 : return opt_result::success ();
463 :
464 : /* Even if we have an anti-dependence then, as the vectorized loop covers at
465 : least two scalar iterations, there is always also a true dependence.
466 : As the vectorizer does not re-order loads and stores we can ignore
467 : the anti-dependence if TBAA can disambiguate both DRs similar to the
468 : case with known negative distance anti-dependences (positive
469 : distance anti-dependences would violate TBAA constraints). */
470 132329 : if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
471 131793 : || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
472 414996 : && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
473 : get_alias_set (DR_REF (drb))))
474 6279 : return opt_result::success ();
475 :
476 257843 : if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
477 247881 : || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
478 : {
479 12637 : if (apply_safelen ())
480 1398 : return opt_result::success ();
481 :
482 11239 : return opt_result::failure_at
483 11239 : (stmtinfo_a->stmt,
484 : "possible alias involving gather/scatter between %T and %T\n",
485 : DR_REF (dra), DR_REF (drb));
486 : }
487 :
488 : /* Unknown data dependence. */
489 245206 : if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
490 : {
491 166692 : if (apply_safelen ())
492 6064 : return opt_result::success ();
493 :
494 160628 : if (dump_enabled_p ())
495 7665 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
496 : "versioning for alias required: "
497 : "can't determine dependence between %T and %T\n",
498 : DR_REF (dra), DR_REF (drb));
499 :
500 : /* Add to list of ddrs that need to be tested at run-time. */
501 160628 : return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
502 : }
503 :
504 : /* Known data dependence. */
505 78514 : if (DDR_NUM_DIST_VECTS (ddr) == 0)
506 : {
507 561 : if (apply_safelen ())
508 0 : return opt_result::success ();
509 :
510 561 : if (dump_enabled_p ())
511 156 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
512 : "versioning for alias required: "
513 : "bad dist vector for %T and %T\n",
514 : DR_REF (dra), DR_REF (drb));
515 : /* Add to list of ddrs that need to be tested at run-time. */
516 561 : return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
517 : }
518 :
519 77953 : loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
520 :
521 77953 : if (DDR_COULD_BE_INDEPENDENT_P (ddr)
522 77953 : && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
523 : loop_depth, max_vf))
524 8300 : return opt_result::success ();
525 :
526 132913 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
527 : {
528 69675 : int dist = dist_v[loop_depth];
529 :
530 69675 : if (dump_enabled_p ())
531 4356 : dump_printf_loc (MSG_NOTE, vect_location,
532 : "dependence distance = %d.\n", dist);
533 :
534 69675 : if (dist == 0)
535 : {
536 58254 : if (dump_enabled_p ())
537 3562 : dump_printf_loc (MSG_NOTE, vect_location,
538 : "dependence distance == 0 between %T and %T\n",
539 : DR_REF (dra), DR_REF (drb));
540 :
541 : /* When we perform grouped accesses and perform implicit CSE
542 : by detecting equal accesses and doing disambiguation with
543 : runtime alias tests like for
544 : .. = a[i];
545 : .. = a[i+1];
546 : a[i] = ..;
547 : a[i+1] = ..;
548 : *p = ..;
549 : .. = a[i];
550 : .. = a[i+1];
551 : where we will end up loading { a[i], a[i+1] } once, make
552 : sure that inserting group loads before the first load and
553 : stores after the last store will do the right thing.
554 : Similar for groups like
555 : a[i] = ...;
556 : ... = a[i];
557 : a[i+1] = ...;
558 : where loads from the group interleave with the store. */
559 58254 : if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
560 0 : return opt_result::failure_at (stmtinfo_a->stmt,
561 : "READ_WRITE dependence"
562 : " in interleaving.\n");
563 :
564 58254 : if (loop->safelen < 2)
565 : {
566 54410 : tree indicator = dr_zero_step_indicator (dra);
567 54410 : if (!indicator || integer_zerop (indicator))
568 0 : return opt_result::failure_at (stmtinfo_a->stmt,
569 : "access also has a zero step\n");
570 54410 : else if (TREE_CODE (indicator) != INTEGER_CST)
571 1492 : vect_check_nonzero_value (loop_vinfo, indicator);
572 : }
573 58254 : continue;
574 58254 : }
575 :
576 11421 : if (dist > 0 && DDR_REVERSED_P (ddr))
577 : {
578 : /* If DDR_REVERSED_P the order of the data-refs in DDR was
579 : reversed (to make distance vector positive), and the actual
580 : distance is negative. */
581 3912 : if (dump_enabled_p ())
582 105 : dump_printf_loc (MSG_NOTE, vect_location,
583 : "dependence distance negative.\n");
584 : /* When doing outer loop vectorization, we need to check if there is
585 : a backward dependence at the inner loop level if the dependence
586 : at the outer loop is reversed. See PR81740. */
587 3912 : if (nested_in_vect_loop_p (loop, stmtinfo_a)
588 3900 : || nested_in_vect_loop_p (loop, stmtinfo_b))
589 : {
590 12 : unsigned inner_depth = index_in_loop_nest (loop->inner->num,
591 12 : DDR_LOOP_NEST (ddr));
592 12 : if (dist_v[inner_depth] < 0)
593 9 : return opt_result::failure_at (stmtinfo_a->stmt,
594 : "not vectorized, dependence "
595 : "between data-refs %T and %T\n",
596 : DR_REF (dra), DR_REF (drb));
597 : }
598 : /* Record a negative dependence distance to later limit the
599 : amount of stmt copying / unrolling we can perform.
600 : Only need to handle read-after-write dependence. */
601 3903 : if (DR_IS_READ (drb)
602 156 : && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
603 36 : || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
604 156 : STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
605 3903 : continue;
606 3903 : }
607 :
608 7509 : unsigned int abs_dist = abs (dist);
609 7509 : if (abs_dist >= 2 && abs_dist < *max_vf)
610 : {
611 : /* The dependence distance requires reduction of the maximal
612 : vectorization factor. */
613 558 : *max_vf = abs_dist;
614 558 : if (dump_enabled_p ())
615 30 : dump_printf_loc (MSG_NOTE, vect_location,
616 : "adjusting maximal vectorization factor to %i\n",
617 : *max_vf);
618 : }
619 :
620 7509 : if (abs_dist >= *max_vf)
621 : {
622 : /* Dependence distance does not create dependence, as far as
623 : vectorization is concerned, in this case. */
624 1103 : if (dump_enabled_p ())
625 437 : dump_printf_loc (MSG_NOTE, vect_location,
626 : "dependence distance >= VF.\n");
627 1103 : continue;
628 : }
629 :
630 6406 : return opt_result::failure_at (stmtinfo_a->stmt,
631 : "not vectorized, possible dependence "
632 : "between data-refs %T and %T\n",
633 : DR_REF (dra), DR_REF (drb));
634 : }
635 :
636 63238 : return opt_result::success ();
637 : }
638 :
639 : /* Function vect_analyze_early_break_dependences.
640 :
641 : Examine all the data references in the loop and make sure that if we have
642 : multiple exits that we are able to safely move stores such that they become
643 : safe for vectorization. The function also calculates the place where to move
644 : the instructions to and computes what the new vUSE chain should be.
645 :
646 : This works in tandem with the CFG that will be produced by
647 : slpeel_tree_duplicate_loop_to_edge_cfg later on.
648 :
649 : This function tries to validate whether an early break vectorization
650 : is possible for the current instruction sequence. Returns True i
651 : possible, otherwise False.
652 :
653 : Requirements:
654 : - Any memory access must be to a fixed size buffer.
655 : - There must not be any loads and stores to the same object.
656 : - Multiple loads are allowed as long as they don't alias.
657 :
658 : NOTE:
659 : This implementation is very conservative. Any overlapping loads/stores
660 : that take place before the early break statement gets rejected aside from
661 : WAR dependencies.
662 :
663 : i.e.:
664 :
665 : a[i] = 8
666 : c = a[i]
667 : if (b[i])
668 : ...
669 :
670 : is not allowed, but
671 :
672 : c = a[i]
673 : a[i] = 8
674 : if (b[i])
675 : ...
676 :
677 : is which is the common case. */
678 :
679 : static opt_result
680 141884 : vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
681 : {
682 141884 : DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences");
683 :
684 : /* List of all load data references found during traversal. */
685 141884 : auto_vec<data_reference *> bases;
686 141884 : basic_block dest_bb = NULL;
687 :
688 141884 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
689 141884 : class loop *loop_nest = loop_outer (loop);
690 :
691 141884 : if (dump_enabled_p ())
692 1574 : dump_printf_loc (MSG_NOTE, vect_location,
693 : "loop contains multiple exits, analyzing"
694 : " statement dependencies.\n");
695 :
696 141884 : if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
697 25810 : if (dump_enabled_p ())
698 280 : dump_printf_loc (MSG_NOTE, vect_location,
699 : "alternate exit has been chosen as main exit.\n");
700 :
701 : /* Since we don't support general control flow, the location we'll move the
702 : side-effects to is always the latch connected exit. When we support
703 : general control flow we can do better but for now this is fine. Move
704 : side-effects to the in-loop destination of the last early exit. For the
705 : PEELED case we move the side-effects to the latch block as this is
706 : guaranteed to be the last block to be executed when a vector iteration
707 : finished. */
708 141884 : if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
709 25810 : dest_bb = loop->latch;
710 : else
711 116074 : dest_bb = single_pred (loop->latch);
712 :
713 : /* We start looking from dest_bb, for the non-PEELED case we don't want to
714 : move any stores already present, but we do want to read and validate the
715 : loads. */
716 141884 : basic_block bb = dest_bb;
717 :
718 : /* We move stores across all loads to the beginning of dest_bb, so
719 : the first block processed below doesn't need dependence checking. */
720 141884 : bool check_deps = false;
721 :
722 512610 : do
723 : {
724 327247 : gimple_stmt_iterator gsi = gsi_last_bb (bb);
725 :
726 : /* Now analyze all the remaining statements and try to determine which
727 : instructions are allowed/needed to be moved. */
728 2441150 : while (!gsi_end_p (gsi))
729 : {
730 2119859 : gimple *stmt = gsi_stmt (gsi);
731 2119859 : gsi_prev (&gsi);
732 2119859 : if (is_gimple_debug (stmt))
733 1871739 : continue;
734 :
735 1112805 : stmt_vec_info orig_stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
736 1112805 : stmt_vec_info stmt_vinfo
737 1112805 : = vect_stmt_to_vectorize (orig_stmt_vinfo);
738 1112805 : auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
739 1112805 : if (!dr_ref)
740 : {
741 : /* Trapping statements after the last early exit are fine. */
742 858909 : if (check_deps)
743 : {
744 520935 : bool could_trap_p = false;
745 520935 : gimple *cur_stmt = STMT_VINFO_STMT (stmt_vinfo);
746 520935 : could_trap_p = gimple_could_trap_p (cur_stmt);
747 520935 : if (STMT_VINFO_IN_PATTERN_P (orig_stmt_vinfo))
748 : {
749 192677 : gimple_stmt_iterator gsi2;
750 192677 : auto stmt_seq
751 192677 : = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_vinfo);
752 192677 : for (gsi2 = gsi_start (stmt_seq);
753 388629 : !could_trap_p && !gsi_end_p (gsi2); gsi_next (&gsi2))
754 : {
755 195952 : cur_stmt = gsi_stmt (gsi2);
756 195952 : could_trap_p = gimple_could_trap_p (cur_stmt);
757 : }
758 : }
759 :
760 520935 : if (could_trap_p)
761 : {
762 5414 : if (dump_enabled_p ())
763 150 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
764 : "cannot vectorize as operation may trap.\n");
765 5414 : return opt_result::failure_at (cur_stmt,
766 : "can't safely apply code motion to dependencies"
767 : " to vectorize the early exit. %G may trap.\n",
768 : cur_stmt);
769 : }
770 : }
771 :
772 853495 : continue;
773 853495 : }
774 :
775 : /* We know everything below dest_bb is safe since we know we
776 : had a full vector iteration when reaching it. Either by
777 : the loop entry / IV exit test being last or because this
778 : is the loop latch itself. */
779 253896 : if (!check_deps)
780 11190 : continue;
781 :
782 : /* Check if vector accesses to the object will be within bounds.
783 : must be a constant or assume loop will be versioned or niters
784 : bounded by VF so accesses are within range. We only need to check
785 : the reads since writes are moved to a safe place where if we get
786 : there we know they are safe to perform. */
787 242706 : if (DR_IS_READ (dr_ref))
788 : {
789 226903 : dr_set_safe_speculative_read_required (stmt_vinfo, true);
790 226903 : bool inbounds = ref_within_array_bound (stmt, DR_REF (dr_ref));
791 226903 : DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_vinfo)) = inbounds;
792 :
793 226903 : if (dump_enabled_p ())
794 2455 : dump_printf_loc (MSG_NOTE, vect_location,
795 : "marking DR (read) as possibly needing peeling "
796 : "for alignment at %G", stmt);
797 : }
798 :
799 242706 : if (DR_IS_READ (dr_ref))
800 226903 : bases.safe_push (dr_ref);
801 15803 : else if (DR_IS_WRITE (dr_ref))
802 : {
803 : /* We are moving writes down in the CFG. To be sure that this
804 : is valid after vectorization we have to check all the loads
805 : we are sinking the stores past to see if any of them may
806 : alias or are the same object.
807 :
808 : Same objects will not be an issue because unless the store
809 : is marked volatile the value can be forwarded. If the
810 : store is marked volatile we don't vectorize the loop
811 : anyway.
812 :
813 : That leaves the check for aliasing. We don't really need
814 : to care about the stores aliasing with each other since the
815 : stores are moved in order so the effects are still observed
816 : correctly. This leaves the check for WAR dependencies
817 : which we would be introducing here if the DR can alias.
818 : The check is quadratic in loads/stores but I have not found
819 : a better API to do this. I believe all loads and stores
820 : must be checked. We also must check them when we
821 : encountered the store, since we don't care about loads past
822 : the store. */
823 :
824 49025 : for (auto dr_read : bases)
825 15464 : if (dr_may_alias_p (dr_ref, dr_read, loop_nest))
826 : {
827 542 : if (dump_enabled_p ())
828 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION,
829 : vect_location,
830 : "early breaks not supported: "
831 : "overlapping loads and stores "
832 : "found before the break "
833 : "statement.\n");
834 :
835 542 : return opt_result::failure_at (stmt,
836 : "can't safely apply code motion to dependencies"
837 : " to vectorize the early exit. %G may alias with"
838 : " %G\n", stmt, dr_read->stmt);
839 : }
840 : }
841 :
842 484328 : if (gimple_vdef (stmt))
843 : {
844 15261 : if (dump_enabled_p ())
845 282 : dump_printf_loc (MSG_NOTE, vect_location,
846 : "==> recording stmt %G", stmt);
847 :
848 15261 : LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (stmt);
849 : }
850 695970 : else if (gimple_vuse (stmt))
851 : {
852 226903 : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_insert (0, stmt);
853 226903 : if (dump_enabled_p ())
854 2455 : dump_printf_loc (MSG_NOTE, vect_location,
855 : "marked statement for vUSE update: %G", stmt);
856 : }
857 : }
858 :
859 321291 : if (!single_pred_p (bb))
860 : {
861 135928 : gcc_assert (bb == loop->header);
862 135928 : break;
863 : }
864 :
865 : /* If we possibly sink through a virtual PHI make sure to elide that. */
866 185363 : if (gphi *vphi = get_virtual_phi (bb))
867 107 : LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (vphi);
868 :
869 : /* All earlier blocks need dependence checking. */
870 185363 : check_deps = true;
871 185363 : bb = single_pred (bb);
872 185363 : }
873 : while (1);
874 :
875 : /* We don't allow outer -> inner loop transitions which should have been
876 : trapped already during loop form analysis. */
877 135928 : gcc_assert (dest_bb->loop_father == loop);
878 :
879 : /* Check that the destination block we picked has only one pred. To relax this we
880 : have to take special care when moving the statements. We don't currently support
881 : such control flow however this check is there to simplify how we handle
882 : labels that may be present anywhere in the IL. This check is to ensure that the
883 : labels aren't significant for the CFG. */
884 135928 : if (!single_pred (dest_bb))
885 0 : return opt_result::failure_at (vect_location,
886 : "chosen loop exit block (BB %d) does not have a "
887 : "single predecessor which is currently not "
888 : "supported for early break vectorization.\n",
889 : dest_bb->index);
890 :
891 135928 : LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
892 : /* Check if loop has a side-effect (stores), force scalar epilogue. */
893 613666 : for (auto dr : LOOP_VINFO_DATAREFS (loop_vinfo))
894 232431 : if (DR_IS_WRITE (dr))
895 : {
896 13055 : LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true;
897 13055 : break;
898 : }
899 :
900 135928 : if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
901 : {
902 : /* All uses shall be updated to that of the first load. Entries are
903 : stored in reverse order. */
904 125407 : tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).last ());
905 351067 : for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
906 : {
907 225660 : if (dump_enabled_p ())
908 2392 : dump_printf_loc (MSG_NOTE, vect_location,
909 : "will update use: %T, mem_ref: %G", vuse, g);
910 : }
911 : }
912 :
913 135928 : if (dump_enabled_p ())
914 1420 : dump_printf_loc (MSG_NOTE, vect_location,
915 : "recorded statements to be moved to BB %d\n",
916 1420 : LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
917 :
918 135928 : return opt_result::success ();
919 141884 : }
920 :
921 : /* Function vect_analyze_data_ref_dependences.
922 :
923 : Examine all the data references in the loop, and make sure there do not
924 : exist any data dependences between them. Set *MAX_VF according to
925 : the maximum vectorization factor the data dependences allow. */
926 :
927 : opt_result
928 387316 : vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
929 : unsigned int *max_vf)
930 : {
931 387316 : unsigned int i;
932 387316 : struct data_dependence_relation *ddr;
933 :
934 387316 : DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
935 :
936 387316 : if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
937 : {
938 160921 : LOOP_VINFO_DDRS (loop_vinfo)
939 160921 : .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
940 160921 : * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
941 : /* We do not need read-read dependences. */
942 321842 : bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
943 : &LOOP_VINFO_DDRS (loop_vinfo),
944 160921 : LOOP_VINFO_LOOP_NEST (loop_vinfo),
945 : false);
946 160921 : gcc_assert (res);
947 : }
948 :
949 387316 : LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
950 :
951 : /* For epilogues we either have no aliases or alias versioning
952 : was applied to original loop. Therefore we may just get max_vf
953 : using VF of original loop. */
954 387316 : if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
955 12645 : *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
956 : else
957 1837245 : FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
958 : {
959 1480417 : opt_result res
960 1480417 : = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
961 1480417 : if (!res)
962 17843 : return res;
963 : }
964 :
965 : /* If we have early break statements in the loop, check to see if they
966 : are of a form we can vectorizer. */
967 369473 : if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
968 141884 : return vect_analyze_early_break_dependences (loop_vinfo);
969 :
970 227589 : return opt_result::success ();
971 : }
972 :
973 :
974 : /* Function vect_slp_analyze_data_ref_dependence.
975 :
976 : Return TRUE if there (might) exist a dependence between a memory-reference
977 : DRA and a memory-reference DRB for VINFO. When versioning for alias
978 : may check a dependence at run-time, return FALSE. Adjust *MAX_VF
979 : according to the data dependence. */
980 :
981 : static bool
982 6937954 : vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
983 : struct data_dependence_relation *ddr)
984 : {
985 6937954 : struct data_reference *dra = DDR_A (ddr);
986 6937954 : struct data_reference *drb = DDR_B (ddr);
987 6937954 : dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
988 6937954 : dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
989 :
990 : /* We need to check dependences of statements marked as unvectorizable
991 : as well, they still can prohibit vectorization. */
992 :
993 : /* Independent data accesses. */
994 6937954 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
995 : return false;
996 :
997 1108494 : if (dra == drb)
998 : return false;
999 :
1000 : /* Read-read is OK. */
1001 8786 : if (DR_IS_READ (dra) && DR_IS_READ (drb))
1002 : return false;
1003 :
1004 : /* If dra and drb are part of the same interleaving chain consider
1005 : them independent. */
1006 8786 : if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
1007 8786 : && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
1008 8786 : == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
1009 : return false;
1010 :
1011 : /* Unknown data dependence. */
1012 8786 : if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1013 : {
1014 8786 : if (dump_enabled_p ())
1015 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1016 : "can't determine dependence between %T and %T\n",
1017 : DR_REF (dra), DR_REF (drb));
1018 : }
1019 0 : else if (dump_enabled_p ())
1020 0 : dump_printf_loc (MSG_NOTE, vect_location,
1021 : "determined dependence between %T and %T\n",
1022 : DR_REF (dra), DR_REF (drb));
1023 :
1024 : return true;
1025 : }
1026 :
1027 :
1028 : /* Analyze dependences involved in the transform of a store SLP NODE. */
1029 :
1030 : static bool
1031 663414 : vect_slp_analyze_store_dependences (vec_info *vinfo, slp_tree node)
1032 : {
1033 : /* This walks over all stmts involved in the SLP store done
1034 : in NODE verifying we can sink them up to the last stmt in the
1035 : group. */
1036 663414 : stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
1037 663414 : gcc_assert (DR_IS_WRITE (STMT_VINFO_DATA_REF (last_access_info)));
1038 :
1039 2409882 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
1040 : {
1041 1755222 : stmt_vec_info access_info
1042 1755222 : = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
1043 1755222 : if (access_info == last_access_info)
1044 655514 : continue;
1045 1099708 : data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
1046 1099708 : ao_ref ref;
1047 1099708 : bool ref_initialized_p = false;
1048 1099708 : for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
1049 10664927 : gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
1050 : {
1051 9573973 : gimple *stmt = gsi_stmt (gsi);
1052 16998585 : if (! gimple_vuse (stmt))
1053 2635690 : continue;
1054 :
1055 : /* If we couldn't record a (single) data reference for this
1056 : stmt we have to resort to the alias oracle. */
1057 6938283 : stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
1058 6938283 : data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
1059 6938283 : if (!dr_b)
1060 : {
1061 : /* We are moving a store - this means
1062 : we cannot use TBAA for disambiguation. */
1063 549 : if (!ref_initialized_p)
1064 549 : ao_ref_init (&ref, DR_REF (dr_a));
1065 549 : if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
1066 549 : || ref_maybe_used_by_stmt_p (stmt, &ref, false))
1067 8754 : return false;
1068 545 : continue;
1069 : }
1070 :
1071 6937734 : gcc_assert (!gimple_visited_p (stmt));
1072 :
1073 6937734 : ddr_p ddr = initialize_data_dependence_relation (dr_a,
1074 6937734 : dr_b, vNULL);
1075 6937734 : bool dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1076 6937734 : free_dependence_relation (ddr);
1077 6937734 : if (dependent)
1078 : return false;
1079 : }
1080 : }
1081 : return true;
1082 : }
1083 :
1084 : /* Analyze dependences involved in the transform of a load SLP NODE. STORES
1085 : contain the vector of scalar stores of this instance if we are
1086 : disambiguating the loads. */
1087 :
1088 : static bool
1089 156709 : vect_slp_analyze_load_dependences (vec_info *vinfo, slp_tree node,
1090 : vec<stmt_vec_info> stores,
1091 : stmt_vec_info last_store_info)
1092 : {
1093 : /* This walks over all stmts involved in the SLP load done
1094 : in NODE verifying we can hoist them up to the first stmt in the
1095 : group. */
1096 156709 : stmt_vec_info first_access_info = vect_find_first_scalar_stmt_in_slp (node);
1097 156709 : gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (first_access_info)));
1098 :
1099 549787 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
1100 : {
1101 393114 : if (! SLP_TREE_SCALAR_STMTS (node)[k])
1102 163604 : continue;
1103 393114 : stmt_vec_info access_info
1104 393114 : = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
1105 393114 : if (access_info == first_access_info)
1106 163604 : continue;
1107 229510 : data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
1108 229510 : ao_ref ref;
1109 229510 : bool ref_initialized_p = false;
1110 229510 : hash_set<stmt_vec_info> grp_visited;
1111 229510 : for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
1112 4499796 : gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi))
1113 : {
1114 2135179 : gimple *stmt = gsi_stmt (gsi);
1115 3473468 : if (! gimple_vdef (stmt))
1116 2079514 : continue;
1117 :
1118 280269 : stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
1119 :
1120 : /* If we run into a store of this same instance (we've just
1121 : marked those) then delay dependence checking until we run
1122 : into the last store because this is where it will have
1123 : been sunk to (and we verified that we can do that already). */
1124 280269 : if (gimple_visited_p (stmt))
1125 : {
1126 224604 : if (stmt_info != last_store_info)
1127 224602 : continue;
1128 :
1129 10 : for (stmt_vec_info &store_info : stores)
1130 : {
1131 4 : data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
1132 4 : ddr_p ddr = initialize_data_dependence_relation
1133 4 : (dr_a, store_dr, vNULL);
1134 4 : bool dependent
1135 4 : = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1136 4 : free_dependence_relation (ddr);
1137 4 : if (dependent)
1138 36 : return false;
1139 : }
1140 2 : continue;
1141 2 : }
1142 :
1143 114231 : auto check_hoist = [&] (stmt_vec_info stmt_info) -> bool
1144 : {
1145 : /* We are hoisting a load - this means we can use TBAA for
1146 : disambiguation. */
1147 58566 : if (!ref_initialized_p)
1148 58566 : ao_ref_init (&ref, DR_REF (dr_a));
1149 58566 : if (stmt_may_clobber_ref_p_1 (stmt_info->stmt, &ref, true))
1150 : {
1151 : /* If we couldn't record a (single) data reference for this
1152 : stmt we have to give up now. */
1153 216 : data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
1154 216 : if (!dr_b)
1155 : return false;
1156 216 : ddr_p ddr = initialize_data_dependence_relation (dr_a,
1157 216 : dr_b, vNULL);
1158 216 : bool dependent
1159 216 : = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1160 216 : free_dependence_relation (ddr);
1161 216 : if (dependent)
1162 : return false;
1163 : }
1164 : /* No dependence. */
1165 : return true;
1166 55665 : };
1167 55665 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1168 : {
1169 : /* When we run into a store group we have to honor
1170 : that earlier stores might be moved here. We don't
1171 : know exactly which and where to since we lack a
1172 : back-mapping from DR to SLP node, so assume all
1173 : earlier stores are sunk here. It's enough to
1174 : consider the last stmt of a group for this.
1175 : ??? Both this and the fact that we disregard that
1176 : the conflicting instance might be removed later
1177 : is overly conservative. */
1178 55203 : if (!grp_visited.add (DR_GROUP_FIRST_ELEMENT (stmt_info)))
1179 10723 : for (auto store_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1180 129123 : store_info != NULL;
1181 118400 : store_info = DR_GROUP_NEXT_ELEMENT (store_info))
1182 118436 : if ((store_info == stmt_info
1183 107722 : || get_later_stmt (store_info, stmt_info) == stmt_info)
1184 165826 : && !check_hoist (store_info))
1185 : return false;
1186 : }
1187 : else
1188 : {
1189 462 : if (!check_hoist (stmt_info))
1190 : return false;
1191 : }
1192 : }
1193 229510 : }
1194 : return true;
1195 : }
1196 :
1197 :
1198 : /* Function vect_analyze_data_ref_dependences.
1199 :
1200 : Examine all the data references in the basic-block, and make sure there
1201 : do not exist any data dependences between them. Set *MAX_VF according to
1202 : the maximum vectorization factor the data dependences allow. */
1203 :
1204 : bool
1205 792242 : vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
1206 : {
1207 792242 : DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
1208 :
1209 : /* The stores of this instance are at the root of the SLP tree. */
1210 792242 : slp_tree store = NULL;
1211 792242 : if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store)
1212 663414 : store = SLP_INSTANCE_TREE (instance);
1213 :
1214 : /* Verify we can sink stores to the vectorized stmt insert location. */
1215 663414 : stmt_vec_info last_store_info = NULL;
1216 663414 : if (store)
1217 : {
1218 663414 : if (! vect_slp_analyze_store_dependences (vinfo, store))
1219 : return false;
1220 :
1221 : /* Mark stores in this instance and remember the last one. */
1222 654660 : last_store_info = vect_find_last_scalar_stmt_in_slp (store);
1223 2400244 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
1224 1745584 : gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true);
1225 : }
1226 :
1227 783488 : bool res = true;
1228 :
1229 : /* Verify we can sink loads to the vectorized stmt insert location,
1230 : special-casing stores of this instance. */
1231 1185629 : for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
1232 156709 : if (! vect_slp_analyze_load_dependences (vinfo, load,
1233 : store
1234 : ? SLP_TREE_SCALAR_STMTS (store)
1235 : : vNULL, last_store_info))
1236 : {
1237 : res = false;
1238 : break;
1239 : }
1240 :
1241 : /* Unset the visited flag. */
1242 783488 : if (store)
1243 2400244 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
1244 1745584 : gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
1245 :
1246 : /* If this is a SLP instance with a store check if there's a dependent
1247 : load that cannot be forwarded from a previous iteration of a loop
1248 : both are in. This is to avoid situations like that in PR115777. */
1249 783488 : if (res && store)
1250 : {
1251 654636 : stmt_vec_info store_info
1252 654636 : = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (store)[0]);
1253 654636 : class loop *store_loop = gimple_bb (store_info->stmt)->loop_father;
1254 654636 : if (! loop_outer (store_loop))
1255 558887 : return res;
1256 95749 : vec<loop_p> loop_nest;
1257 95749 : loop_nest.create (1);
1258 95749 : loop_nest.quick_push (store_loop);
1259 95749 : data_reference *drs = nullptr;
1260 179119 : for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
1261 : {
1262 37034 : if (! STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (load)[0]))
1263 0 : continue;
1264 37034 : stmt_vec_info load_info
1265 37034 : = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (load)[0]);
1266 37034 : if (gimple_bb (load_info->stmt)->loop_father != store_loop)
1267 5098 : continue;
1268 :
1269 : /* For now concern ourselves with write-after-read as we also
1270 : only look for re-use of the store within the same SLP instance.
1271 : We can still get a RAW here when the instance contais a PHI
1272 : with a backedge though, thus this test. */
1273 31936 : if (! vect_stmt_dominates_stmt_p (STMT_VINFO_STMT (load_info),
1274 : STMT_VINFO_STMT (store_info)))
1275 11953 : continue;
1276 :
1277 19983 : if (! drs)
1278 : {
1279 19125 : drs = create_data_ref (loop_preheader_edge (store_loop),
1280 : store_loop,
1281 19125 : DR_REF (STMT_VINFO_DATA_REF (store_info)),
1282 : store_info->stmt, false, false);
1283 19125 : if (! DR_BASE_ADDRESS (drs)
1284 16140 : || TREE_CODE (DR_STEP (drs)) != INTEGER_CST)
1285 : break;
1286 : }
1287 16695 : data_reference *drl
1288 16695 : = create_data_ref (loop_preheader_edge (store_loop),
1289 : store_loop,
1290 16695 : DR_REF (STMT_VINFO_DATA_REF (load_info)),
1291 : load_info->stmt, true, false);
1292 :
1293 : /* See whether the DRs have a known constant distance throughout
1294 : the containing loop iteration. */
1295 31671 : if (! DR_BASE_ADDRESS (drl)
1296 14591 : || ! operand_equal_p (DR_STEP (drs), DR_STEP (drl))
1297 8612 : || ! operand_equal_p (DR_BASE_ADDRESS (drs),
1298 8612 : DR_BASE_ADDRESS (drl))
1299 18418 : || ! operand_equal_p (DR_OFFSET (drs), DR_OFFSET (drl)))
1300 : {
1301 14976 : free_data_ref (drl);
1302 14976 : continue;
1303 : }
1304 :
1305 : /* If the next iteration load overlaps with a non-power-of-two offset
1306 : we are surely failing any STLF attempt. */
1307 1719 : HOST_WIDE_INT step = TREE_INT_CST_LOW (DR_STEP (drl));
1308 1719 : unsigned HOST_WIDE_INT sizes
1309 1719 : = (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drs))))
1310 1719 : * DR_GROUP_SIZE (store_info));
1311 1719 : unsigned HOST_WIDE_INT sizel
1312 1719 : = (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drl))))
1313 1719 : * DR_GROUP_SIZE (load_info));
1314 1719 : if (ranges_overlap_p (TREE_INT_CST_LOW (DR_INIT (drl)) + step, sizel,
1315 1719 : TREE_INT_CST_LOW (DR_INIT (drs)), sizes))
1316 : {
1317 835 : unsigned HOST_WIDE_INT dist
1318 835 : = absu_hwi (TREE_INT_CST_LOW (DR_INIT (drl)) + step
1319 835 : - TREE_INT_CST_LOW (DR_INIT (drs)));
1320 835 : poly_uint64 loadsz = tree_to_poly_uint64
1321 835 : (TYPE_SIZE_UNIT (SLP_TREE_VECTYPE (load)));
1322 835 : poly_uint64 storesz = tree_to_poly_uint64
1323 835 : (TYPE_SIZE_UNIT (SLP_TREE_VECTYPE (store)));
1324 : /* When the overlap aligns with vector sizes used for the loads
1325 : and the vector stores are larger or equal to the loads
1326 : forwarding should work. */
1327 1670 : if (maybe_gt (loadsz, storesz) || ! multiple_p (dist, loadsz))
1328 70 : load->avoid_stlf_fail = true;
1329 : }
1330 1719 : free_data_ref (drl);
1331 : }
1332 95749 : if (drs)
1333 19125 : free_data_ref (drs);
1334 95749 : loop_nest.release ();
1335 : }
1336 :
1337 : return res;
1338 : }
1339 :
1340 : /* Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET
1341 : applied. */
1342 :
1343 : int
1344 6629777 : dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
1345 : {
1346 6629777 : HOST_WIDE_INT diff = 0;
1347 : /* Alignment is only analyzed for the first element of a DR group,
1348 : use that but adjust misalignment by the offset of the access. */
1349 6629777 : if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
1350 : {
1351 2305350 : dr_vec_info *first_dr
1352 2305350 : = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
1353 : /* vect_analyze_data_ref_accesses guarantees that DR_INIT are
1354 : INTEGER_CSTs and the first element in the group has the lowest
1355 : address. */
1356 2305350 : diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
1357 2305350 : - TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
1358 2305350 : gcc_assert (diff >= 0);
1359 : dr_info = first_dr;
1360 : }
1361 :
1362 6629777 : int misalign = dr_info->misalignment;
1363 6629777 : gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
1364 6629777 : if (misalign == DR_MISALIGNMENT_UNKNOWN)
1365 : return misalign;
1366 :
1367 : /* If the access is only aligned for a vector type with smaller alignment
1368 : requirement the access has unknown misalignment. */
1369 4027213 : if (maybe_lt (dr_info->target_alignment * BITS_PER_UNIT,
1370 4027213 : targetm.vectorize.preferred_vector_alignment (vectype)))
1371 : return DR_MISALIGNMENT_UNKNOWN;
1372 :
1373 : /* Apply the offset from the DR group start and the externally supplied
1374 : offset which can for example result from a negative stride access. */
1375 4027210 : poly_int64 misalignment = misalign + diff + offset;
1376 :
1377 : /* Below we reject compile-time non-constant target alignments, but if
1378 : our misalignment is zero, then we are known to already be aligned
1379 : w.r.t. any such possible target alignment. */
1380 4027210 : if (known_eq (misalignment, 0))
1381 : return 0;
1382 :
1383 633372 : unsigned HOST_WIDE_INT target_alignment_c;
1384 633372 : if (!dr_info->target_alignment.is_constant (&target_alignment_c)
1385 633372 : || !known_misalignment (misalignment, target_alignment_c, &misalign))
1386 : return DR_MISALIGNMENT_UNKNOWN;
1387 633372 : return misalign;
1388 : }
1389 :
1390 : /* Record the base alignment guarantee given by DRB, which occurs
1391 : in STMT_INFO. */
1392 :
1393 : static void
1394 4622876 : vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info,
1395 : innermost_loop_behavior *drb)
1396 : {
1397 4622876 : bool existed;
1398 4622876 : std::pair<stmt_vec_info, innermost_loop_behavior *> &entry
1399 4622876 : = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
1400 4622876 : if (!existed || entry.second->base_alignment < drb->base_alignment)
1401 : {
1402 1431381 : entry = std::make_pair (stmt_info, drb);
1403 1431381 : if (dump_enabled_p ())
1404 32729 : dump_printf_loc (MSG_NOTE, vect_location,
1405 : "recording new base alignment for %T\n"
1406 : " alignment: %d\n"
1407 : " misalignment: %d\n"
1408 : " based on: %G",
1409 : drb->base_address,
1410 : drb->base_alignment,
1411 : drb->base_misalignment,
1412 : stmt_info->stmt);
1413 : }
1414 4622876 : }
1415 :
1416 : /* If the region we're going to vectorize is reached, all unconditional
1417 : data references occur at least once. We can therefore pool the base
1418 : alignment guarantees from each unconditional reference. Do this by
1419 : going through all the data references in VINFO and checking whether
1420 : the containing statement makes the reference unconditionally. If so,
1421 : record the alignment of the base address in VINFO so that it can be
1422 : used for all other references with the same base. */
1423 :
1424 : void
1425 1027162 : vect_record_base_alignments (vec_info *vinfo)
1426 : {
1427 1027162 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1428 412779 : class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1429 15117049 : for (data_reference *dr : vinfo->shared->datarefs)
1430 : {
1431 12133321 : dr_vec_info *dr_info = vinfo->lookup_dr (dr);
1432 12133321 : stmt_vec_info stmt_info = dr_info->stmt;
1433 12133321 : if (!DR_IS_CONDITIONAL_IN_STMT (dr)
1434 12123425 : && STMT_VINFO_VECTORIZABLE (stmt_info)
1435 4639877 : && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1436 : {
1437 4621349 : vect_record_base_alignment (vinfo, stmt_info, &DR_INNERMOST (dr));
1438 :
1439 : /* If DR is nested in the loop that is being vectorized, we can also
1440 : record the alignment of the base wrt the outer loop. */
1441 13063498 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
1442 1527 : vect_record_base_alignment
1443 1527 : (vinfo, stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
1444 : }
1445 : }
1446 1027162 : }
1447 :
1448 : /* Function vect_compute_data_ref_alignment
1449 :
1450 : Compute the misalignment of the data reference DR_INFO when vectorizing
1451 : with VECTYPE.
1452 :
1453 : Output:
1454 : 1. initialized misalignment info for DR_INFO
1455 :
1456 : FOR NOW: No analysis is actually performed. Misalignment is calculated
1457 : only for trivial cases. TODO. */
1458 :
1459 : static void
1460 1609763 : vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
1461 : tree vectype)
1462 : {
1463 1609763 : stmt_vec_info stmt_info = dr_info->stmt;
1464 1609763 : vec_base_alignments *base_alignments = &vinfo->base_alignments;
1465 1609763 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1466 1609763 : class loop *loop = NULL;
1467 1609763 : tree ref = DR_REF (dr_info->dr);
1468 :
1469 1609763 : if (dump_enabled_p ())
1470 51631 : dump_printf_loc (MSG_NOTE, vect_location,
1471 : "vect_compute_data_ref_alignment:\n");
1472 :
1473 1609763 : if (loop_vinfo)
1474 828027 : loop = LOOP_VINFO_LOOP (loop_vinfo);
1475 :
1476 : /* Initialize misalignment to unknown. */
1477 1609763 : SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1478 :
1479 1609763 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1480 : return;
1481 :
1482 1589515 : innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
1483 1589515 : bool step_preserves_misalignment_p;
1484 :
1485 1589515 : poly_uint64 vector_alignment
1486 1589515 : = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
1487 : BITS_PER_UNIT);
1488 :
1489 1589515 : if (loop_vinfo
1490 1589515 : && dr_safe_speculative_read_required (stmt_info))
1491 : {
1492 : /* The required target alignment must be a power-of-2 value and is
1493 : computed as the product of vector element size, VF and group size.
1494 : We compute the constant part first as VF may be a variable. For
1495 : variable VF, the power-of-2 check of VF is deferred to runtime. */
1496 307386 : auto align_factor_c
1497 307386 : = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1498 307386 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1499 89922 : align_factor_c *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
1500 :
1501 307386 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1502 307386 : poly_uint64 new_alignment = vf * align_factor_c;
1503 :
1504 614772 : if ((vf.is_constant () && pow2p_hwi (new_alignment.to_constant ()))
1505 : || (!vf.is_constant () && pow2p_hwi (align_factor_c)))
1506 : {
1507 244149 : if (dump_enabled_p ())
1508 : {
1509 2958 : dump_printf_loc (MSG_NOTE, vect_location,
1510 : "alignment increased due to early break to ");
1511 2958 : dump_dec (MSG_NOTE, new_alignment);
1512 2958 : dump_printf (MSG_NOTE, " bytes.\n");
1513 : }
1514 244149 : vector_alignment = new_alignment;
1515 : }
1516 : }
1517 :
1518 1589515 : SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
1519 :
1520 : /* If the main loop has peeled for alignment we have no way of knowing
1521 : whether the data accesses in the epilogues are aligned. We can't at
1522 : compile time answer the question whether we have entered the main loop or
1523 : not. Fixes PR 92351. */
1524 1589515 : if (loop_vinfo)
1525 : {
1526 807779 : loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
1527 807779 : if (orig_loop_vinfo
1528 32748 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
1529 : return;
1530 : }
1531 :
1532 1589298 : unsigned HOST_WIDE_INT vect_align_c;
1533 1589298 : if (!vector_alignment.is_constant (&vect_align_c))
1534 : return;
1535 :
1536 : /* No step for BB vectorization. */
1537 1589298 : if (!loop)
1538 : {
1539 781736 : gcc_assert (integer_zerop (drb->step));
1540 : step_preserves_misalignment_p = true;
1541 : }
1542 :
1543 : else
1544 : {
1545 : /* We can only use base and misalignment information relative to
1546 : an innermost loop if the misalignment stays the same throughout the
1547 : execution of the loop. As above, this is the case if the stride of
1548 : the dataref evenly divides by the alignment. Make sure to check
1549 : previous epilogues and the main loop. */
1550 : step_preserves_misalignment_p = true;
1551 : auto lvinfo = loop_vinfo;
1552 1648437 : while (lvinfo)
1553 : {
1554 840875 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (lvinfo);
1555 840875 : step_preserves_misalignment_p
1556 840875 : &= multiple_p (drb->step_alignment * vf, vect_align_c);
1557 840875 : lvinfo = LOOP_VINFO_ORIG_LOOP_INFO (lvinfo);
1558 : }
1559 :
1560 807562 : if (!step_preserves_misalignment_p && dump_enabled_p ())
1561 320 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1562 : "step doesn't divide the vector alignment.\n");
1563 :
1564 : /* In case the dataref is in an inner-loop of the loop that is being
1565 : vectorized (LOOP), we use the base and misalignment information
1566 : relative to the outer-loop (LOOP). This is ok only if the
1567 : misalignment stays the same throughout the execution of the
1568 : inner-loop, which is why we have to check that the stride of the
1569 : dataref in the inner-loop evenly divides by the vector alignment. */
1570 807562 : if (step_preserves_misalignment_p
1571 807562 : && nested_in_vect_loop_p (loop, stmt_info))
1572 : {
1573 1526 : step_preserves_misalignment_p
1574 1526 : = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
1575 :
1576 1526 : if (dump_enabled_p ())
1577 : {
1578 496 : if (step_preserves_misalignment_p)
1579 358 : dump_printf_loc (MSG_NOTE, vect_location,
1580 : "inner step divides the vector alignment.\n");
1581 : else
1582 138 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1583 : "inner step doesn't divide the vector"
1584 : " alignment.\n");
1585 : }
1586 : }
1587 : }
1588 :
1589 1589298 : unsigned int base_alignment = drb->base_alignment;
1590 1589298 : unsigned int base_misalignment = drb->base_misalignment;
1591 :
1592 : /* Calculate the maximum of the pooled base address alignment and the
1593 : alignment that we can compute for DR itself. */
1594 1589298 : std::pair<stmt_vec_info, innermost_loop_behavior *> *entry
1595 1589298 : = base_alignments->get (drb->base_address);
1596 1589298 : if (entry
1597 1584496 : && base_alignment < (*entry).second->base_alignment
1598 1592591 : && (loop_vinfo
1599 2383 : || (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt_info->stmt),
1600 2383 : gimple_bb (entry->first->stmt))
1601 2277 : && (gimple_bb (stmt_info->stmt) != gimple_bb (entry->first->stmt)
1602 2041 : || (entry->first->dr_aux.group <= dr_info->group)))))
1603 : {
1604 3170 : base_alignment = entry->second->base_alignment;
1605 3170 : base_misalignment = entry->second->base_misalignment;
1606 : }
1607 :
1608 1589298 : if (drb->offset_alignment < vect_align_c
1609 1520401 : || !step_preserves_misalignment_p
1610 : /* We need to know whether the step wrt the vectorized loop is
1611 : negative when computing the starting misalignment below. */
1612 1512120 : || TREE_CODE (drb->step) != INTEGER_CST)
1613 : {
1614 105198 : if (dump_enabled_p ())
1615 3713 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1616 : "Unknown alignment for access: %T\n", ref);
1617 105198 : return;
1618 : }
1619 :
1620 1484100 : if (base_alignment < vect_align_c)
1621 : {
1622 733518 : unsigned int max_alignment;
1623 733518 : tree base = get_base_for_alignment (drb->base_address, &max_alignment);
1624 733518 : if (max_alignment < vect_align_c
1625 731145 : || (loop_vinfo && LOOP_VINFO_EPILOGUE_P (loop_vinfo))
1626 1443750 : || !vect_can_force_dr_alignment_p (base,
1627 710232 : vect_align_c * BITS_PER_UNIT))
1628 : {
1629 536602 : if (dump_enabled_p ())
1630 14300 : dump_printf_loc (MSG_NOTE, vect_location,
1631 : "can't force alignment of ref: %T\n", ref);
1632 536602 : return;
1633 : }
1634 :
1635 : /* Force the alignment of the decl.
1636 : NOTE: This is the only change to the code we make during
1637 : the analysis phase, before deciding to vectorize the loop. */
1638 196916 : if (dump_enabled_p ())
1639 7935 : dump_printf_loc (MSG_NOTE, vect_location,
1640 : "force alignment of %T\n", ref);
1641 :
1642 196916 : dr_info->base_decl = base;
1643 196916 : dr_info->base_misaligned = true;
1644 196916 : base_misalignment = 0;
1645 : }
1646 947498 : poly_int64 misalignment
1647 947498 : = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
1648 :
1649 947498 : unsigned int const_misalignment;
1650 947498 : if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
1651 : {
1652 : if (dump_enabled_p ())
1653 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1654 : "Non-constant misalignment for access: %T\n", ref);
1655 : return;
1656 : }
1657 :
1658 947498 : SET_DR_MISALIGNMENT (dr_info, const_misalignment);
1659 :
1660 947498 : if (dump_enabled_p ())
1661 32289 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1662 : "misalign = %d bytes of ref %T\n",
1663 : const_misalignment, ref);
1664 :
1665 : return;
1666 : }
1667 :
1668 : /* Return whether DR_INFO, which is related to DR_PEEL_INFO in
1669 : that it only differs in DR_INIT, is aligned if DR_PEEL_INFO
1670 : is made aligned via peeling. */
1671 :
1672 : static bool
1673 1984182 : vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info,
1674 : dr_vec_info *dr_peel_info)
1675 : {
1676 1984182 : if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info),
1677 1984950 : DR_TARGET_ALIGNMENT (dr_info)))
1678 : {
1679 1983414 : poly_offset_int diff
1680 1983414 : = (wi::to_poly_offset (DR_INIT (dr_peel_info->dr))
1681 1983414 : - wi::to_poly_offset (DR_INIT (dr_info->dr)));
1682 1983414 : if (known_eq (diff, 0)
1683 1983414 : || multiple_p (diff, DR_TARGET_ALIGNMENT (dr_info)))
1684 748557 : return true;
1685 : }
1686 : return false;
1687 : }
1688 :
1689 : /* Return whether DR_INFO is aligned if DR_PEEL_INFO is made
1690 : aligned via peeling. */
1691 :
1692 : static bool
1693 197927 : vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
1694 : dr_vec_info *dr_peel_info)
1695 : {
1696 197927 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr),
1697 197927 : DR_BASE_ADDRESS (dr_peel_info->dr), 0)
1698 48792 : || !operand_equal_p (DR_OFFSET (dr_info->dr),
1699 48792 : DR_OFFSET (dr_peel_info->dr), 0)
1700 245811 : || !operand_equal_p (DR_STEP (dr_info->dr),
1701 47884 : DR_STEP (dr_peel_info->dr), 0))
1702 150441 : return false;
1703 :
1704 47486 : return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
1705 : }
1706 :
1707 : /* Compute the value for dr_info->misalign so that the access appears
1708 : aligned. This is used by peeling to compensate for dr_misalignment
1709 : applying the offset for negative step. */
1710 :
1711 : int
1712 21817 : vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
1713 : {
1714 21817 : if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
1715 : return 0;
1716 :
1717 201 : tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1718 201 : poly_int64 misalignment
1719 201 : = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
1720 201 : * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1721 :
1722 201 : unsigned HOST_WIDE_INT target_alignment_c;
1723 201 : int misalign;
1724 201 : if (!dr_info->target_alignment.is_constant (&target_alignment_c)
1725 201 : || !known_misalignment (misalignment, target_alignment_c, &misalign))
1726 : return DR_MISALIGNMENT_UNKNOWN;
1727 201 : return misalign;
1728 : }
1729 :
1730 : /* Function vect_update_misalignment_for_peel.
1731 : Sets DR_INFO's misalignment
1732 : - to 0 if it has the same alignment as DR_PEEL_INFO,
1733 : - to the misalignment computed using NPEEL if DR_INFO's salignment is known,
1734 : - to -1 (unknown) otherwise.
1735 :
1736 : DR_INFO - the data reference whose misalignment is to be adjusted.
1737 : DR_PEEL_INFO - the data reference whose misalignment is being made
1738 : zero in the vector loop by the peel.
1739 : NPEEL - the number of iterations in the peel loop if the misalignment
1740 : of DR_PEEL_INFO is known at compile time. */
1741 :
1742 : static void
1743 2775 : vect_update_misalignment_for_peel (dr_vec_info *dr_info,
1744 : dr_vec_info *dr_peel_info, int npeel)
1745 : {
1746 : /* If dr_info is aligned of dr_peel_info is, then mark it so. */
1747 2775 : if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
1748 : {
1749 444 : SET_DR_MISALIGNMENT (dr_info,
1750 : vect_dr_misalign_for_aligned_access (dr_peel_info));
1751 444 : return;
1752 : }
1753 :
1754 2331 : unsigned HOST_WIDE_INT alignment;
1755 2331 : if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
1756 2331 : && known_alignment_for_access_p (dr_info,
1757 2331 : STMT_VINFO_VECTYPE (dr_info->stmt))
1758 234 : && known_alignment_for_access_p (dr_peel_info,
1759 234 : STMT_VINFO_VECTYPE (dr_peel_info->stmt)))
1760 : {
1761 202 : int misal = dr_info->misalignment;
1762 202 : misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1763 202 : misal &= alignment - 1;
1764 202 : set_dr_misalignment (dr_info, misal);
1765 202 : return;
1766 : }
1767 :
1768 2129 : if (dump_enabled_p ())
1769 40 : dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1770 : "to unknown (-1).\n");
1771 2129 : SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1772 : }
1773 :
1774 : /* Return true if alignment is relevant for DR_INFO. */
1775 :
1776 : static bool
1777 1801945 : vect_relevant_for_alignment_p (dr_vec_info *dr_info)
1778 : {
1779 1801945 : stmt_vec_info stmt_info = dr_info->stmt;
1780 :
1781 1801945 : if (!STMT_VINFO_RELEVANT_P (stmt_info))
1782 : return false;
1783 :
1784 : /* For interleaving, only the alignment of the first access matters. */
1785 1800495 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1786 2042866 : && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
1787 : return false;
1788 :
1789 : /* Scatter-gather and invariant accesses continue to address individual
1790 : scalars, so vector-level alignment is irrelevant. */
1791 1693679 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
1792 1693679 : || integer_zerop (DR_STEP (dr_info->dr)))
1793 54849 : return false;
1794 :
1795 : /* Strided accesses perform only component accesses, alignment is
1796 : irrelevant for them. */
1797 1638830 : if (STMT_VINFO_STRIDED_P (stmt_info)
1798 1638830 : && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1799 : return false;
1800 :
1801 : return true;
1802 : }
1803 :
1804 : /* Given an memory reference EXP return whether its alignment is less
1805 : than its size. */
1806 :
1807 : static bool
1808 1601025 : not_size_aligned (tree exp)
1809 : {
1810 1601025 : if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
1811 : return true;
1812 :
1813 1601025 : return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
1814 1601025 : > get_object_alignment (exp));
1815 : }
1816 :
1817 : /* Function vector_alignment_reachable_p
1818 :
1819 : Return true if vector alignment for DR_INFO is reachable by peeling
1820 : a few loop iterations. Return false otherwise. */
1821 :
1822 : static bool
1823 612985 : vector_alignment_reachable_p (dr_vec_info *dr_info, poly_uint64 vf)
1824 : {
1825 612985 : stmt_vec_info stmt_info = dr_info->stmt;
1826 612985 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1827 612985 : poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1828 1225970 : poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
1829 612985 : unsigned elem_size = vector_element_size (vector_size, nelements);
1830 612985 : unsigned group_size = 1;
1831 :
1832 612985 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1833 : {
1834 : /* For interleaved access we peel only if number of iterations in
1835 : the prolog loop ({VF - misalignment}), is a multiple of the
1836 : number of the interleaved accesses. */
1837 :
1838 : /* FORNOW: handle only known alignment. */
1839 87348 : if (!known_alignment_for_access_p (dr_info, vectype))
1840 612985 : return false;
1841 :
1842 51981 : unsigned mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
1843 64423 : if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
1844 : return false;
1845 :
1846 12442 : group_size = DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
1847 : }
1848 :
1849 : /* If the vectorization factor does not guarantee DR advancement of
1850 : a multiple of the target alignment no peeling will help. */
1851 538079 : if (!multiple_p (elem_size * group_size * vf, dr_target_alignment (dr_info)))
1852 154 : return false;
1853 :
1854 : /* If misalignment is known at the compile time then allow peeling
1855 : only if natural alignment is reachable through peeling. */
1856 537925 : if (known_alignment_for_access_p (dr_info, vectype)
1857 838109 : && !aligned_access_p (dr_info, vectype))
1858 : {
1859 14225 : HOST_WIDE_INT elmsize =
1860 14225 : int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1861 14225 : if (dump_enabled_p ())
1862 : {
1863 768 : dump_printf_loc (MSG_NOTE, vect_location,
1864 : "data size = %wd. misalignment = %d.\n", elmsize,
1865 : dr_misalignment (dr_info, vectype));
1866 : }
1867 14225 : if (dr_misalignment (dr_info, vectype) % elmsize)
1868 : {
1869 72 : if (dump_enabled_p ())
1870 7 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1871 : "data size does not divide the misalignment.\n");
1872 72 : return false;
1873 : }
1874 : }
1875 :
1876 537853 : if (!known_alignment_for_access_p (dr_info, vectype))
1877 : {
1878 237741 : tree type = TREE_TYPE (DR_REF (dr_info->dr));
1879 237741 : bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
1880 237741 : if (dump_enabled_p ())
1881 15981 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1882 : "Unknown misalignment, %snaturally aligned\n",
1883 : is_packed ? "not " : "");
1884 237741 : return targetm.vectorize.vector_alignment_reachable (type, is_packed);
1885 : }
1886 :
1887 : return true;
1888 : }
1889 :
1890 :
1891 : /* Calculate the cost of the memory access represented by DR_INFO. */
1892 :
1893 : static void
1894 732074 : vect_get_data_access_cost (vec_info *vinfo, dr_vec_info *dr_info,
1895 : dr_alignment_support alignment_support_scheme,
1896 : int misalignment,
1897 : unsigned int *inside_cost,
1898 : unsigned int *outside_cost,
1899 : stmt_vector_for_cost *body_cost_vec,
1900 : stmt_vector_for_cost *prologue_cost_vec)
1901 : {
1902 732074 : stmt_vec_info stmt_info = dr_info->stmt;
1903 :
1904 732074 : if (DR_IS_READ (dr_info->dr))
1905 511942 : vect_get_load_cost (vinfo, stmt_info, NULL, 1,
1906 : alignment_support_scheme, misalignment, true,
1907 : inside_cost, outside_cost, prologue_cost_vec,
1908 : body_cost_vec, false);
1909 : else
1910 220132 : vect_get_store_cost (vinfo,stmt_info, NULL, 1,
1911 : alignment_support_scheme, misalignment, inside_cost,
1912 : body_cost_vec);
1913 :
1914 732074 : if (dump_enabled_p ())
1915 29863 : dump_printf_loc (MSG_NOTE, vect_location,
1916 : "vect_get_data_access_cost: inside_cost = %d, "
1917 : "outside_cost = %d.\n", *inside_cost, *outside_cost);
1918 732074 : }
1919 :
1920 :
1921 : typedef struct _vect_peel_info
1922 : {
1923 : dr_vec_info *dr_info;
1924 : int npeel;
1925 : unsigned int count;
1926 : } *vect_peel_info;
1927 :
1928 : typedef struct _vect_peel_extended_info
1929 : {
1930 : vec_info *vinfo;
1931 : struct _vect_peel_info peel_info;
1932 : unsigned int inside_cost;
1933 : unsigned int outside_cost;
1934 : } *vect_peel_extended_info;
1935 :
1936 :
1937 : /* Peeling hashtable helpers. */
1938 :
1939 : struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1940 : {
1941 : static inline hashval_t hash (const _vect_peel_info *);
1942 : static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
1943 : };
1944 :
1945 : inline hashval_t
1946 745719 : peel_info_hasher::hash (const _vect_peel_info *peel_info)
1947 : {
1948 745719 : return (hashval_t) peel_info->npeel;
1949 : }
1950 :
1951 : inline bool
1952 387596 : peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
1953 : {
1954 387596 : return (a->npeel == b->npeel);
1955 : }
1956 :
1957 :
1958 : /* Insert DR_INFO into peeling hash table with NPEEL as key. */
1959 :
1960 : static void
1961 358787 : vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
1962 : loop_vec_info loop_vinfo, dr_vec_info *dr_info,
1963 : int npeel, bool supportable_if_not_aligned)
1964 : {
1965 358787 : struct _vect_peel_info elem, *slot;
1966 358787 : _vect_peel_info **new_slot;
1967 :
1968 358787 : elem.npeel = npeel;
1969 358787 : slot = peeling_htab->find (&elem);
1970 358787 : if (slot)
1971 157503 : slot->count++;
1972 : else
1973 : {
1974 201284 : slot = XNEW (struct _vect_peel_info);
1975 201284 : slot->npeel = npeel;
1976 201284 : slot->dr_info = dr_info;
1977 201284 : slot->count = 1;
1978 201284 : new_slot = peeling_htab->find_slot (slot, INSERT);
1979 201284 : *new_slot = slot;
1980 : }
1981 :
1982 : /* If this DR is not supported with unknown misalignment then bias
1983 : this slot when the cost model is disabled. */
1984 358787 : if (!supportable_if_not_aligned
1985 358787 : && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1986 4656 : slot->count += VECT_MAX_COST;
1987 358787 : }
1988 :
1989 :
1990 : /* Traverse peeling hash table to find peeling option that aligns maximum
1991 : number of data accesses. */
1992 :
1993 : int
1994 35656 : vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1995 : _vect_peel_extended_info *max)
1996 : {
1997 35656 : vect_peel_info elem = *slot;
1998 :
1999 35656 : if (elem->count > max->peel_info.count
2000 21661 : || (elem->count == max->peel_info.count
2001 16951 : && max->peel_info.npeel > elem->npeel))
2002 : {
2003 14011 : max->peel_info.npeel = elem->npeel;
2004 14011 : max->peel_info.count = elem->count;
2005 14011 : max->peel_info.dr_info = elem->dr_info;
2006 : }
2007 :
2008 35656 : return 1;
2009 : }
2010 :
2011 : /* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking
2012 : data access costs for all data refs. If UNKNOWN_MISALIGNMENT is true,
2013 : npeel is computed at runtime but DR0_INFO's misalignment will be zero
2014 : after peeling. */
2015 :
2016 : static void
2017 401743 : vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
2018 : dr_vec_info *dr0_info,
2019 : unsigned int *inside_cost,
2020 : unsigned int *outside_cost,
2021 : stmt_vector_for_cost *body_cost_vec,
2022 : stmt_vector_for_cost *prologue_cost_vec,
2023 : unsigned int npeel)
2024 : {
2025 401743 : vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2026 :
2027 401743 : bool dr0_alignment_known_p
2028 : = (dr0_info
2029 736090 : && known_alignment_for_access_p (dr0_info,
2030 334347 : STMT_VINFO_VECTYPE (dr0_info->stmt)));
2031 :
2032 1975204 : for (data_reference *dr : datarefs)
2033 : {
2034 769975 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2035 769975 : if (!vect_relevant_for_alignment_p (dr_info))
2036 37901 : continue;
2037 :
2038 732074 : tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
2039 732074 : dr_alignment_support alignment_support_scheme;
2040 732074 : int misalignment;
2041 732074 : unsigned HOST_WIDE_INT alignment;
2042 :
2043 732074 : bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2044 732074 : size_zero_node) < 0;
2045 732074 : poly_int64 off = 0;
2046 732074 : if (negative)
2047 24093 : off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2048 24093 : * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2049 :
2050 732074 : if (npeel == 0)
2051 371995 : misalignment = dr_misalignment (dr_info, vectype, off);
2052 360079 : else if (dr_info == dr0_info
2053 360079 : || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
2054 : misalignment = 0;
2055 125206 : else if (!dr0_alignment_known_p
2056 8095 : || !known_alignment_for_access_p (dr_info, vectype)
2057 133301 : || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
2058 : misalignment = DR_MISALIGNMENT_UNKNOWN;
2059 : else
2060 : {
2061 7086 : misalignment = dr_misalignment (dr_info, vectype, off);
2062 7086 : misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
2063 7086 : misalignment &= alignment - 1;
2064 : }
2065 732074 : alignment_support_scheme
2066 732074 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2067 : misalignment);
2068 :
2069 732074 : vect_get_data_access_cost (loop_vinfo, dr_info,
2070 : alignment_support_scheme, misalignment,
2071 : inside_cost, outside_cost,
2072 : body_cost_vec, prologue_cost_vec);
2073 : }
2074 401743 : }
2075 :
2076 : /* Traverse peeling hash table and calculate cost for each peeling option.
2077 : Find the one with the lowest cost. */
2078 :
2079 : int
2080 146009 : vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
2081 : _vect_peel_extended_info *min)
2082 : {
2083 146009 : vect_peel_info elem = *slot;
2084 146009 : unsigned int inside_cost = 0, outside_cost = 0;
2085 146009 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (min->vinfo);
2086 146009 : stmt_vector_for_cost prologue_cost_vec, body_cost_vec;
2087 :
2088 146009 : prologue_cost_vec.create (2);
2089 146009 : body_cost_vec.create (2);
2090 :
2091 146009 : vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost,
2092 : &outside_cost, &body_cost_vec,
2093 146009 : &prologue_cost_vec, elem->npeel);
2094 :
2095 146009 : body_cost_vec.release ();
2096 146009 : prologue_cost_vec.release ();
2097 :
2098 146009 : outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel);
2099 :
2100 146009 : if (inside_cost < min->inside_cost
2101 1671 : || (inside_cost == min->inside_cost
2102 1247 : && outside_cost < min->outside_cost))
2103 : {
2104 144344 : min->inside_cost = inside_cost;
2105 144344 : min->outside_cost = outside_cost;
2106 144344 : min->peel_info.dr_info = elem->dr_info;
2107 144344 : min->peel_info.npeel = elem->npeel;
2108 144344 : min->peel_info.count = elem->count;
2109 : }
2110 :
2111 146009 : return 1;
2112 : }
2113 :
2114 :
2115 : /* Choose best peeling option by traversing peeling hash table and either
2116 : choosing an option with the lowest cost (if cost model is enabled) or the
2117 : option that aligns as many accesses as possible. */
2118 :
2119 : static struct _vect_peel_extended_info
2120 156976 : vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
2121 : loop_vec_info loop_vinfo)
2122 : {
2123 156976 : struct _vect_peel_extended_info res;
2124 :
2125 156976 : res.peel_info.dr_info = NULL;
2126 156976 : res.vinfo = loop_vinfo;
2127 :
2128 156976 : if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2129 : {
2130 143034 : res.inside_cost = INT_MAX;
2131 143034 : res.outside_cost = INT_MAX;
2132 143034 : peeling_htab->traverse <_vect_peel_extended_info *,
2133 289043 : vect_peeling_hash_get_lowest_cost> (&res);
2134 : }
2135 : else
2136 : {
2137 13942 : res.peel_info.count = 0;
2138 13942 : peeling_htab->traverse <_vect_peel_extended_info *,
2139 49598 : vect_peeling_hash_get_most_frequent> (&res);
2140 13942 : res.inside_cost = 0;
2141 13942 : res.outside_cost = 0;
2142 : }
2143 :
2144 156976 : return res;
2145 : }
2146 :
2147 : /* Return if vectorization is definitely, possibly, or unlikely to be
2148 : supportable after loop peeling. */
2149 :
2150 : static enum peeling_support
2151 78696 : vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
2152 : unsigned npeel)
2153 : {
2154 78696 : vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2155 78696 : enum dr_alignment_support supportable_dr_alignment;
2156 :
2157 78696 : bool dr0_alignment_known_p
2158 157392 : = known_alignment_for_access_p (dr0_info,
2159 78696 : STMT_VINFO_VECTYPE (dr0_info->stmt));
2160 78696 : bool has_unsupported_dr_p = false;
2161 78696 : unsigned int dr0_step = tree_to_shwi (DR_STEP (dr0_info->dr));
2162 78696 : int known_unsupported_misalignment = DR_MISALIGNMENT_UNKNOWN;
2163 :
2164 : /* Check if each data ref can be vectorized after peeling. */
2165 336408 : for (data_reference *dr : datarefs)
2166 : {
2167 116152 : if (dr == dr0_info->dr)
2168 77740 : continue;
2169 :
2170 38412 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2171 38412 : if (!vect_relevant_for_alignment_p (dr_info)
2172 38412 : || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
2173 6703 : continue;
2174 :
2175 31709 : tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
2176 31709 : int misalignment;
2177 31709 : unsigned HOST_WIDE_INT alignment;
2178 31709 : if (!dr0_alignment_known_p
2179 1854 : || !known_alignment_for_access_p (dr_info, vectype)
2180 33563 : || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
2181 : misalignment = DR_MISALIGNMENT_UNKNOWN;
2182 : else
2183 : {
2184 1840 : misalignment = dr_misalignment (dr_info, vectype);
2185 1840 : misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
2186 1840 : misalignment &= alignment - 1;
2187 : }
2188 31709 : supportable_dr_alignment
2189 31709 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2190 : misalignment);
2191 31709 : if (supportable_dr_alignment == dr_unaligned_unsupported)
2192 : {
2193 30284 : has_unsupported_dr_p = true;
2194 :
2195 : /* If unaligned unsupported DRs exist, we do following checks to see
2196 : if they can be mutually aligned to support vectorization. If yes,
2197 : we can try peeling and create a runtime (mutual alignment) check
2198 : to guard the peeled loop. If no, return PEELING_UNSUPPORTED. */
2199 :
2200 : /* 1) If unaligned unsupported DRs have different alignment steps, the
2201 : probability of DRs being mutually aligned is very low, and it's
2202 : quite complex to check mutual alignment at runtime. We return
2203 : PEELING_UNSUPPORTED in this case. */
2204 30284 : if (tree_to_shwi (DR_STEP (dr)) != dr0_step)
2205 78696 : return peeling_unsupported;
2206 :
2207 : /* 2) Based on above same alignment step condition, if one known
2208 : misaligned DR has zero misalignment, or different misalignment
2209 : amount from another known misaligned DR, peeling is unable to
2210 : help make all these DRs aligned together. We won't try peeling
2211 : with versioning anymore. */
2212 26076 : int curr_dr_misalignment = dr_misalignment (dr_info, vectype);
2213 26076 : if (curr_dr_misalignment == 0)
2214 : return peeling_unsupported;
2215 14452 : if (known_unsupported_misalignment != DR_MISALIGNMENT_UNKNOWN)
2216 : {
2217 8 : if (curr_dr_misalignment != DR_MISALIGNMENT_UNKNOWN
2218 8 : && curr_dr_misalignment != known_unsupported_misalignment)
2219 : return peeling_unsupported;
2220 : }
2221 : else
2222 : known_unsupported_misalignment = curr_dr_misalignment;
2223 : }
2224 : }
2225 :
2226 : /* Vectorization is known to be supportable with peeling alone when there is
2227 : no unsupported DR. */
2228 62864 : return has_unsupported_dr_p ? peeling_maybe_supported
2229 : : peeling_known_supported;
2230 : }
2231 :
2232 : /* Compare two data-references DRA and DRB to group them into chunks
2233 : with related alignment. */
2234 :
2235 : static int
2236 4590549 : dr_align_group_sort_cmp (const void *dra_, const void *drb_)
2237 : {
2238 4590549 : data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2239 4590549 : data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
2240 4590549 : int cmp;
2241 :
2242 : /* Stabilize sort. */
2243 4590549 : if (dra == drb)
2244 : return 0;
2245 :
2246 : /* Ordering of DRs according to base. */
2247 4590549 : cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2248 : DR_BASE_ADDRESS (drb));
2249 4590549 : if (cmp != 0)
2250 : return cmp;
2251 :
2252 : /* And according to DR_OFFSET. */
2253 2026569 : cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2254 2026569 : if (cmp != 0)
2255 : return cmp;
2256 :
2257 : /* And after step. */
2258 2012332 : cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
2259 2012332 : if (cmp != 0)
2260 : return cmp;
2261 :
2262 : /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2263 2007089 : cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
2264 2007089 : if (cmp == 0)
2265 237693 : return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2266 : return cmp;
2267 : }
2268 :
2269 : /* Function vect_enhance_data_refs_alignment
2270 :
2271 : This pass will use loop versioning and loop peeling in order to enhance
2272 : the alignment of data references in the loop.
2273 :
2274 : FOR NOW: we assume that whatever versioning/peeling takes place, only the
2275 : original loop is to be vectorized. Any other loops that are created by
2276 : the transformations performed in this pass - are not supposed to be
2277 : vectorized. This restriction will be relaxed.
2278 :
2279 : This pass will require a cost model to guide it whether to apply peeling
2280 : or versioning or a combination of the two. For example, the scheme that
2281 : intel uses when given a loop with several memory accesses, is as follows:
2282 : choose one memory access ('p') which alignment you want to force by doing
2283 : peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2284 : other accesses are not necessarily aligned, or (2) use loop versioning to
2285 : generate one loop in which all accesses are aligned, and another loop in
2286 : which only 'p' is necessarily aligned.
2287 :
2288 : ("Automatic Intra-Register Vectorization for the Intel Architecture",
2289 : Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2290 : Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2291 :
2292 : Devising a cost model is the most critical aspect of this work. It will
2293 : guide us on which access to peel for, whether to use loop versioning, how
2294 : many versions to create, etc. The cost model will probably consist of
2295 : generic considerations as well as target specific considerations (on
2296 : powerpc for example, misaligned stores are more painful than misaligned
2297 : loads).
2298 :
2299 : Here are the general steps involved in alignment enhancements:
2300 :
2301 : -- original loop, before alignment analysis:
2302 : for (i=0; i<N; i++){
2303 : x = q[i]; # DR_MISALIGNMENT(q) = unknown
2304 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2305 : }
2306 :
2307 : -- After vect_compute_data_refs_alignment:
2308 : for (i=0; i<N; i++){
2309 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2310 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2311 : }
2312 :
2313 : -- Possibility 1: we do loop versioning:
2314 : if (p is aligned) {
2315 : for (i=0; i<N; i++){ # loop 1A
2316 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2317 : p[i] = y; # DR_MISALIGNMENT(p) = 0
2318 : }
2319 : }
2320 : else {
2321 : for (i=0; i<N; i++){ # loop 1B
2322 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2323 : p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2324 : }
2325 : }
2326 :
2327 : -- Possibility 2: we do loop peeling:
2328 : for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2329 : x = q[i];
2330 : p[i] = y;
2331 : }
2332 : for (i = 3; i < N; i++){ # loop 2A
2333 : x = q[i]; # DR_MISALIGNMENT(q) = 0
2334 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2335 : }
2336 :
2337 : -- Possibility 3: combination of loop peeling and versioning:
2338 : if (p & q are mutually aligned) {
2339 : for (i=0; i<3; i++){ # (peeled loop iterations).
2340 : x = q[i];
2341 : p[i] = y;
2342 : }
2343 : for (i=3; i<N; i++){ # loop 3A
2344 : x = q[i]; # DR_MISALIGNMENT(q) = 0
2345 : p[i] = y; # DR_MISALIGNMENT(p) = 0
2346 : }
2347 : }
2348 : else {
2349 : for (i=0; i<N; i++){ # (scalar loop, not to be vectorized).
2350 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2351 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2352 : }
2353 : }
2354 :
2355 : These loops are later passed to loop_transform to be vectorized. The
2356 : vectorizer will use the alignment information to guide the transformation
2357 : (whether to generate regular loads/stores, or with special handling for
2358 : misalignment). */
2359 :
2360 : opt_result
2361 381105 : vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
2362 : {
2363 381105 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2364 381105 : dr_vec_info *first_store = NULL;
2365 381105 : dr_vec_info *dr0_info = NULL;
2366 381105 : struct data_reference *dr;
2367 381105 : unsigned int i;
2368 381105 : bool do_peeling = false;
2369 381105 : bool do_versioning = false;
2370 381105 : bool try_peeling_with_versioning = false;
2371 381105 : unsigned int npeel = 0;
2372 381105 : bool one_misalignment_known = false;
2373 381105 : bool one_misalignment_unknown = false;
2374 381105 : bool one_dr_unsupportable = false;
2375 381105 : dr_vec_info *unsupportable_dr_info = NULL;
2376 381105 : unsigned int dr0_same_align_drs = 0, first_store_same_align_drs = 0;
2377 381105 : hash_table<peel_info_hasher> peeling_htab (1);
2378 :
2379 381105 : DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
2380 :
2381 : /* Reset data so we can safely be called multiple times. */
2382 381105 : LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
2383 381105 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
2384 :
2385 381105 : if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ())
2386 14036 : return opt_result::success ();
2387 :
2388 : /* Sort the vector of datarefs so DRs that have the same or dependent
2389 : alignment are next to each other. */
2390 367069 : auto_vec<data_reference_p> datarefs
2391 367069 : = LOOP_VINFO_DATAREFS (loop_vinfo).copy ();
2392 367069 : datarefs.qsort (dr_align_group_sort_cmp);
2393 :
2394 : /* Compute the number of DRs that become aligned when we peel
2395 : a dataref so it becomes aligned. */
2396 734138 : auto_vec<unsigned> n_same_align_refs (datarefs.length ());
2397 367069 : n_same_align_refs.quick_grow_cleared (datarefs.length ());
2398 367069 : unsigned i0;
2399 753835 : for (i0 = 0; i0 < datarefs.length (); ++i0)
2400 380154 : if (DR_BASE_ADDRESS (datarefs[i0]))
2401 : break;
2402 2384628 : for (i = i0 + 1; i <= datarefs.length (); ++i)
2403 : {
2404 825245 : if (i == datarefs.length ()
2405 464788 : || !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0]),
2406 464788 : DR_BASE_ADDRESS (datarefs[i]), 0)
2407 218255 : || !operand_equal_p (DR_OFFSET (datarefs[i0]),
2408 218255 : DR_OFFSET (datarefs[i]), 0)
2409 1042190 : || !operand_equal_p (DR_STEP (datarefs[i0]),
2410 216945 : DR_STEP (datarefs[i]), 0))
2411 : {
2412 : /* The subgroup [i0, i-1] now only differs in DR_INIT and
2413 : possibly DR_TARGET_ALIGNMENT. Still the whole subgroup
2414 : will get known misalignment if we align one of the refs
2415 : with the largest DR_TARGET_ALIGNMENT. */
2416 1434093 : for (unsigned j = i0; j < i; ++j)
2417 : {
2418 825245 : dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs[j]);
2419 3587186 : for (unsigned k = i0; k < i; ++k)
2420 : {
2421 2761941 : if (k == j)
2422 825245 : continue;
2423 1936696 : dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs[k]);
2424 1936696 : if (vect_dr_aligned_if_related_peeled_dr_is (dr_infok,
2425 : dr_infoj))
2426 709876 : n_same_align_refs[j]++;
2427 : }
2428 : }
2429 : i0 = i;
2430 : }
2431 : }
2432 :
2433 : /* While cost model enhancements are expected in the future, the high level
2434 : view of the code at this time is as follows:
2435 :
2436 : A) If there is a misaligned access then see if doing peeling alone can
2437 : make all data references satisfy vect_supportable_dr_alignment. If so,
2438 : update data structures and return.
2439 :
2440 : B) If peeling alone wasn't possible and there is a data reference with an
2441 : unknown misalignment that does not satisfy vect_supportable_dr_alignment
2442 : then we may use either of the following two approaches.
2443 :
2444 : B1) Try peeling with versioning: Add a runtime loop versioning check to
2445 : see if all unsupportable data references are mutually aligned, which
2446 : means they will be uniformly aligned after a certain amount of loop
2447 : peeling. If peeling and versioning can be used together, set
2448 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT_P to TRUE and return.
2449 :
2450 : B2) Try versioning alone: Add a runtime loop versioning check to see if
2451 : all unsupportable data references are already uniformly aligned
2452 : without loop peeling. If versioning can be applied alone, set
2453 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT_P to FALSE and return.
2454 :
2455 : Above B1 is more powerful and more likely to be adopted than B2. But B2
2456 : is still available and useful in some cases, for example, the cost model
2457 : does not allow much peeling.
2458 :
2459 : C) If none of above was successful then the alignment was not enhanced,
2460 : just return. */
2461 :
2462 : /* (1) Peeling to force alignment. */
2463 :
2464 : /* (1.1) Decide whether to perform peeling, how many iterations to peel, and
2465 : if vectorization may be supported by peeling with versioning.
2466 : Considerations:
2467 : - How many accesses will become aligned due to the peeling
2468 : - How many accesses will become unaligned due to the peeling,
2469 : and the cost of misaligned accesses.
2470 : - The cost of peeling (the extra runtime checks, the increase
2471 : in code size). */
2472 :
2473 367069 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2474 1043967 : FOR_EACH_VEC_ELT (datarefs, i, dr)
2475 : {
2476 722139 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2477 722139 : if (!vect_relevant_for_alignment_p (dr_info))
2478 109154 : continue;
2479 :
2480 612985 : stmt_vec_info stmt_info = dr_info->stmt;
2481 612985 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2482 :
2483 : /* With variable VF, unsafe speculative read can be avoided for known
2484 : inbounds DRs as long as partial vectors are used. */
2485 612985 : if (!vf.is_constant ()
2486 : && dr_safe_speculative_read_required (stmt_info)
2487 : && DR_SCALAR_KNOWN_BOUNDS (dr_info))
2488 : {
2489 : dr_set_safe_speculative_read_required (stmt_info, false);
2490 : LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
2491 : }
2492 :
2493 612985 : do_peeling = vector_alignment_reachable_p (dr_info, vf);
2494 612985 : if (do_peeling)
2495 : {
2496 535667 : if (known_alignment_for_access_p (dr_info, vectype))
2497 : {
2498 300112 : unsigned int npeel_tmp = 0;
2499 300112 : bool negative = tree_int_cst_compare (DR_STEP (dr),
2500 300112 : size_zero_node) < 0;
2501 :
2502 : /* If known_alignment_for_access_p then we have set
2503 : DR_MISALIGNMENT which is only done if we know it at compiler
2504 : time, so it is safe to assume target alignment is constant.
2505 : */
2506 300112 : unsigned int target_align =
2507 300112 : DR_TARGET_ALIGNMENT (dr_info).to_constant ();
2508 300112 : unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
2509 300112 : poly_int64 off = 0;
2510 300112 : if (negative)
2511 2552 : off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
2512 300112 : unsigned int mis = dr_misalignment (dr_info, vectype, off);
2513 300112 : mis = negative ? mis : -mis;
2514 300112 : if (mis != 0)
2515 13198 : npeel_tmp = (mis & (target_align - 1)) / dr_size;
2516 :
2517 : /* For multiple types, it is possible that the bigger type access
2518 : will have more than one peeling option. E.g., a loop with two
2519 : types: one of size (vector size / 4), and the other one of
2520 : size (vector size / 8). Vectorization factor will 8. If both
2521 : accesses are misaligned by 3, the first one needs one scalar
2522 : iteration to be aligned, and the second one needs 5. But the
2523 : first one will be aligned also by peeling 5 scalar
2524 : iterations, and in that case both accesses will be aligned.
2525 : Hence, except for the immediate peeling amount, we also want
2526 : to try to add full vector size, while we don't exceed
2527 : vectorization factor.
2528 : We do this automatically for cost model, since we calculate
2529 : cost for every peeling option. */
2530 300112 : poly_uint64 nscalars = npeel_tmp;
2531 300112 : if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2532 : {
2533 39676 : unsigned group_size = 1;
2534 39676 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2535 1917 : group_size = DR_GROUP_SIZE (stmt_info);
2536 39676 : nscalars = vf * group_size;
2537 : }
2538 :
2539 : /* Save info about DR in the hash table. Also include peeling
2540 : amounts according to the explanation above. Indicate
2541 : the alignment status when the ref is not aligned.
2542 : ??? Rather than using unknown alignment here we should
2543 : prune all entries from the peeling hashtable which cause
2544 : DRs to be not supported. */
2545 300112 : bool supportable_if_not_aligned
2546 : = vect_supportable_dr_alignment
2547 300112 : (loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN);
2548 658899 : while (known_le (npeel_tmp, nscalars))
2549 : {
2550 358787 : vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
2551 : dr_info, npeel_tmp,
2552 : supportable_if_not_aligned);
2553 358787 : npeel_tmp += MAX (1, target_align / dr_size);
2554 : }
2555 :
2556 300112 : one_misalignment_known = true;
2557 : }
2558 : else
2559 : {
2560 : /* If we don't know any misalignment values, we prefer
2561 : peeling for data-ref that has the maximum number of data-refs
2562 : with the same alignment, unless the target prefers to align
2563 : stores over load. */
2564 235555 : unsigned same_align_drs = n_same_align_refs[i];
2565 235555 : if (!dr0_info
2566 235555 : || dr0_same_align_drs < same_align_drs)
2567 : {
2568 : dr0_same_align_drs = same_align_drs;
2569 : dr0_info = dr_info;
2570 : }
2571 : /* For data-refs with the same number of related
2572 : accesses prefer the one where the misalign
2573 : computation will be invariant in the outermost loop. */
2574 76049 : else if (dr0_same_align_drs == same_align_drs)
2575 : {
2576 74611 : class loop *ivloop0, *ivloop;
2577 74611 : ivloop0 = outermost_invariant_loop_for_expr
2578 74611 : (loop, DR_BASE_ADDRESS (dr0_info->dr));
2579 74611 : ivloop = outermost_invariant_loop_for_expr
2580 74611 : (loop, DR_BASE_ADDRESS (dr));
2581 74611 : if ((ivloop && !ivloop0)
2582 74611 : || (ivloop && ivloop0
2583 74603 : && flow_loop_nested_p (ivloop, ivloop0)))
2584 : dr0_info = dr_info;
2585 : }
2586 :
2587 235555 : one_misalignment_unknown = true;
2588 :
2589 : /* Check for data refs with unsupportable alignment that
2590 : can be peeled. */
2591 235555 : enum dr_alignment_support supportable_dr_alignment
2592 235555 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2593 : DR_MISALIGNMENT_UNKNOWN);
2594 235555 : if (supportable_dr_alignment == dr_unaligned_unsupported)
2595 : {
2596 97075 : one_dr_unsupportable = true;
2597 97075 : unsupportable_dr_info = dr_info;
2598 : }
2599 :
2600 235555 : if (!first_store && DR_IS_WRITE (dr))
2601 : {
2602 50904 : first_store = dr_info;
2603 50904 : first_store_same_align_drs = same_align_drs;
2604 : }
2605 : }
2606 : }
2607 : else
2608 : {
2609 77318 : if (!aligned_access_p (dr_info, vectype))
2610 : {
2611 45241 : if (dump_enabled_p ())
2612 2088 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2613 : "vector alignment may not be reachable\n");
2614 : break;
2615 : }
2616 : }
2617 : }
2618 :
2619 : /* Check if we can possibly peel the loop. */
2620 367069 : if (!vect_can_advance_ivs_p (loop_vinfo)
2621 363618 : || !slpeel_can_duplicate_loop_p (loop, LOOP_VINFO_MAIN_EXIT (loop_vinfo),
2622 363618 : loop_preheader_edge (loop))
2623 363618 : || loop->inner
2624 : /* We don't currently maintaing the LCSSA for prologue peeled inversed
2625 : loops. */
2626 729092 : || (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
2627 29801 : && !LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)))
2628 : do_peeling = false;
2629 :
2630 367069 : struct _vect_peel_extended_info peel_for_known_alignment;
2631 367069 : struct _vect_peel_extended_info peel_for_unknown_alignment;
2632 367069 : struct _vect_peel_extended_info best_peel;
2633 :
2634 367069 : peel_for_unknown_alignment.inside_cost = INT_MAX;
2635 367069 : peel_for_unknown_alignment.outside_cost = INT_MAX;
2636 367069 : peel_for_unknown_alignment.peel_info.count = 0;
2637 :
2638 367069 : if (do_peeling
2639 367069 : && one_misalignment_unknown)
2640 : {
2641 : /* Check if the target requires to prefer stores over loads, i.e., if
2642 : misaligned stores are more expensive than misaligned loads (taking
2643 : drs with same alignment into account). */
2644 144188 : unsigned int load_inside_cost = 0;
2645 144188 : unsigned int load_outside_cost = 0;
2646 144188 : unsigned int store_inside_cost = 0;
2647 144188 : unsigned int store_outside_cost = 0;
2648 144188 : unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
2649 :
2650 144188 : stmt_vector_for_cost dummy;
2651 144188 : dummy.create (2);
2652 144188 : vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
2653 : &load_inside_cost,
2654 : &load_outside_cost,
2655 : &dummy, &dummy, estimated_npeels);
2656 144188 : dummy.release ();
2657 :
2658 144188 : if (first_store)
2659 : {
2660 44150 : dummy.create (2);
2661 44150 : vect_get_peeling_costs_all_drs (loop_vinfo, first_store,
2662 : &store_inside_cost,
2663 : &store_outside_cost,
2664 : &dummy, &dummy,
2665 : estimated_npeels);
2666 44150 : dummy.release ();
2667 : }
2668 : else
2669 : {
2670 100038 : store_inside_cost = INT_MAX;
2671 100038 : store_outside_cost = INT_MAX;
2672 : }
2673 :
2674 144188 : if (load_inside_cost > store_inside_cost
2675 144188 : || (load_inside_cost == store_inside_cost
2676 43617 : && load_outside_cost > store_outside_cost))
2677 : {
2678 144188 : dr0_info = first_store;
2679 144188 : dr0_same_align_drs = first_store_same_align_drs;
2680 144188 : peel_for_unknown_alignment.inside_cost = store_inside_cost;
2681 144188 : peel_for_unknown_alignment.outside_cost = store_outside_cost;
2682 : }
2683 : else
2684 : {
2685 144188 : peel_for_unknown_alignment.inside_cost = load_inside_cost;
2686 144188 : peel_for_unknown_alignment.outside_cost = load_outside_cost;
2687 : }
2688 :
2689 144188 : peel_for_unknown_alignment.outside_cost
2690 144188 : += vect_get_known_peeling_cost (loop_vinfo, estimated_npeels);
2691 :
2692 144188 : peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + 1;
2693 : }
2694 :
2695 367069 : peel_for_unknown_alignment.peel_info.npeel = 0;
2696 367069 : peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
2697 :
2698 367069 : best_peel = peel_for_unknown_alignment;
2699 :
2700 367069 : peel_for_known_alignment.inside_cost = INT_MAX;
2701 367069 : peel_for_known_alignment.outside_cost = INT_MAX;
2702 367069 : peel_for_known_alignment.peel_info.count = 0;
2703 367069 : peel_for_known_alignment.peel_info.dr_info = NULL;
2704 :
2705 367069 : if (do_peeling && one_misalignment_known)
2706 : {
2707 : /* Peeling is possible, but there is no data access that is not supported
2708 : unless aligned. So we try to choose the best possible peeling from
2709 : the hash table. */
2710 156976 : peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
2711 156976 : (&peeling_htab, loop_vinfo);
2712 : }
2713 :
2714 : /* Compare costs of peeling for known and unknown alignment. */
2715 367069 : if (peel_for_known_alignment.peel_info.dr_info != NULL
2716 156976 : && peel_for_unknown_alignment.inside_cost
2717 : >= peel_for_known_alignment.inside_cost)
2718 : {
2719 142816 : best_peel = peel_for_known_alignment;
2720 :
2721 : /* If the best peeling for known alignment has NPEEL == 0, perform no
2722 : peeling at all except if there is an unsupportable dr that we can
2723 : align. */
2724 142816 : if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
2725 : do_peeling = false;
2726 : }
2727 :
2728 : /* If there is an unsupportable data ref, prefer this over all choices so far
2729 : since we'd have to discard a chosen peeling except when it accidentally
2730 : aligned the unsupportable data ref. */
2731 231925 : if (one_dr_unsupportable)
2732 : dr0_info = unsupportable_dr_info;
2733 287736 : else if (do_peeling)
2734 : {
2735 : /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
2736 : TODO: Use nopeel_outside_cost or get rid of it? */
2737 67396 : unsigned nopeel_inside_cost = 0;
2738 67396 : unsigned nopeel_outside_cost = 0;
2739 :
2740 67396 : stmt_vector_for_cost dummy;
2741 67396 : dummy.create (2);
2742 67396 : vect_get_peeling_costs_all_drs (loop_vinfo, NULL, &nopeel_inside_cost,
2743 : &nopeel_outside_cost, &dummy, &dummy, 0);
2744 67396 : dummy.release ();
2745 :
2746 : /* Add epilogue costs. As we do not peel for alignment here, no prologue
2747 : costs will be recorded. */
2748 67396 : nopeel_outside_cost += vect_get_known_peeling_cost (loop_vinfo, 0);
2749 :
2750 67396 : npeel = best_peel.peel_info.npeel;
2751 67396 : dr0_info = best_peel.peel_info.dr_info;
2752 :
2753 : /* If no peeling is not more expensive than the best peeling we
2754 : have so far, don't perform any peeling. */
2755 67396 : if (nopeel_inside_cost <= best_peel.inside_cost)
2756 60878 : do_peeling = false;
2757 : }
2758 :
2759 146729 : if (do_peeling)
2760 : {
2761 78696 : stmt_vec_info stmt_info = dr0_info->stmt;
2762 78696 : if (known_alignment_for_access_p (dr0_info,
2763 : STMT_VINFO_VECTYPE (stmt_info)))
2764 : {
2765 6495 : bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
2766 6495 : size_zero_node) < 0;
2767 6495 : if (!npeel)
2768 : {
2769 : /* Since it's known at compile time, compute the number of
2770 : iterations in the peeled loop (the peeling factor) for use in
2771 : updating DR_MISALIGNMENT values. The peeling factor is the
2772 : vectorization factor minus the misalignment as an element
2773 : count. */
2774 0 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2775 0 : poly_int64 off = 0;
2776 0 : if (negative)
2777 0 : off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2778 0 : * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2779 0 : unsigned int mis
2780 0 : = dr_misalignment (dr0_info, vectype, off);
2781 0 : mis = negative ? mis : -mis;
2782 : /* If known_alignment_for_access_p then we have set
2783 : DR_MISALIGNMENT which is only done if we know it at compiler
2784 : time, so it is safe to assume target alignment is constant.
2785 : */
2786 0 : unsigned int target_align =
2787 0 : DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
2788 0 : npeel = ((mis & (target_align - 1))
2789 0 : / vect_get_scalar_dr_size (dr0_info));
2790 : }
2791 :
2792 : /* For interleaved data access every iteration accesses all the
2793 : members of the group, therefore we divide the number of iterations
2794 : by the group size. */
2795 6495 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2796 281 : npeel /= DR_GROUP_SIZE (stmt_info);
2797 :
2798 6495 : if (dump_enabled_p ())
2799 284 : dump_printf_loc (MSG_NOTE, vect_location,
2800 : "Try peeling by %d\n", npeel);
2801 : }
2802 :
2803 : /* Check how peeling for alignment can support vectorization. Function
2804 : vect_peeling_supportable returns one of the three possible values:
2805 : - PEELING_KNOWN_SUPPORTED: indicates that we know all unsupported
2806 : datarefs can be aligned after peeling. We can use peeling alone.
2807 : - PEELING_MAYBE_SUPPORTED: indicates that peeling may be able to make
2808 : these datarefs aligned but we are not sure about it at compile time.
2809 : We will try peeling with versioning to add a runtime check to guard
2810 : the peeled loop.
2811 : - PEELING_UNSUPPORTED: indicates that peeling is almost impossible to
2812 : support vectorization. We will stop trying peeling. */
2813 78696 : switch (vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
2814 : {
2815 : case peeling_known_supported:
2816 : break;
2817 13258 : case peeling_maybe_supported:
2818 13258 : try_peeling_with_versioning = true;
2819 13258 : break;
2820 15832 : case peeling_unsupported:
2821 15832 : do_peeling = false;
2822 15832 : break;
2823 : }
2824 :
2825 : /* Check if all datarefs are supportable and log. */
2826 78696 : if (do_peeling
2827 78696 : && npeel == 0
2828 78696 : && known_alignment_for_access_p (dr0_info,
2829 : STMT_VINFO_VECTYPE (stmt_info)))
2830 3 : return opt_result::success ();
2831 :
2832 : /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
2833 78693 : if (do_peeling)
2834 : {
2835 62861 : unsigned max_allowed_peel
2836 62861 : = param_vect_max_peeling_for_alignment;
2837 62861 : if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP)
2838 : max_allowed_peel = 0;
2839 14563 : if (max_allowed_peel != (unsigned)-1)
2840 : {
2841 48319 : unsigned max_peel = npeel;
2842 48319 : if (max_peel == 0)
2843 : {
2844 45564 : poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
2845 45564 : unsigned HOST_WIDE_INT target_align_c;
2846 45564 : if (target_align.is_constant (&target_align_c))
2847 91128 : max_peel =
2848 45564 : target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
2849 : else
2850 : {
2851 : do_peeling = false;
2852 : if (dump_enabled_p ())
2853 : dump_printf_loc (MSG_NOTE, vect_location,
2854 : "Disable peeling, max peels set and vector"
2855 : " alignment unknown\n");
2856 : }
2857 : }
2858 48319 : if (max_peel > max_allowed_peel)
2859 : {
2860 48311 : do_peeling = false;
2861 48311 : if (dump_enabled_p ())
2862 53 : dump_printf_loc (MSG_NOTE, vect_location,
2863 : "Disable peeling, max peels reached: %d\n", max_peel);
2864 : }
2865 : }
2866 : }
2867 :
2868 : /* Cost model #2 - if peeling may result in a remaining loop not
2869 : iterating enough to be vectorized then do not peel. Since this
2870 : is a cost heuristic rather than a correctness decision, use the
2871 : most likely runtime value for variable vectorization factors. */
2872 53 : if (do_peeling
2873 14550 : && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2874 : {
2875 3193 : unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2876 3193 : unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
2877 3193 : if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
2878 3193 : < assumed_vf + max_peel)
2879 : do_peeling = false;
2880 : }
2881 :
2882 : if (do_peeling)
2883 : {
2884 : /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
2885 : If the misalignment of DR_i is identical to that of dr0 then set
2886 : DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
2887 : dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2888 : by the peeling factor times the element size of DR_i (MOD the
2889 : vectorization factor times the size). Otherwise, the
2890 : misalignment of DR_i must be set to unknown. */
2891 30611 : FOR_EACH_VEC_ELT (datarefs, i, dr)
2892 16870 : if (dr != dr0_info->dr)
2893 : {
2894 3129 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2895 3129 : if (!vect_relevant_for_alignment_p (dr_info))
2896 354 : continue;
2897 :
2898 2775 : vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
2899 : }
2900 : }
2901 :
2902 78693 : if (do_peeling && !try_peeling_with_versioning)
2903 : {
2904 : /* Update data structures if peeling will be applied alone. */
2905 12689 : LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
2906 12689 : if (npeel)
2907 2104 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
2908 : else
2909 10585 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
2910 12689 : SET_DR_MISALIGNMENT (dr0_info,
2911 : vect_dr_misalign_for_aligned_access (dr0_info));
2912 12689 : if (dump_enabled_p ())
2913 : {
2914 344 : dump_printf_loc (MSG_NOTE, vect_location,
2915 : "Alignment of access forced using peeling.\n");
2916 344 : dump_printf_loc (MSG_NOTE, vect_location,
2917 : "Peeling for alignment will be applied.\n");
2918 : }
2919 :
2920 : /* The inside-loop cost will be accounted for in vectorizable_load
2921 : and vectorizable_store correctly with adjusted alignments.
2922 : Drop the body_cst_vec on the floor here. */
2923 12689 : return opt_result::success ();
2924 : }
2925 : }
2926 :
2927 : /* (2) Versioning to force alignment. */
2928 :
2929 : /* Try versioning if:
2930 : 1) optimize loop for speed and the cost-model is not cheap
2931 : 2) there is at least one unsupported misaligned data ref with an unknown
2932 : misalignment, and
2933 : 3) all misaligned data refs with a known misalignment are supported, and
2934 : 4) the number of runtime alignment checks is within reason. */
2935 :
2936 354377 : do_versioning
2937 354377 : = (optimize_loop_nest_for_speed_p (loop)
2938 353949 : && !loop->inner /* FORNOW */
2939 706731 : && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
2940 :
2941 : if (do_versioning)
2942 : {
2943 356448 : FOR_EACH_VEC_ELT (datarefs, i, dr)
2944 : {
2945 268290 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2946 268290 : if (!vect_relevant_for_alignment_p (dr_info))
2947 188571 : continue;
2948 :
2949 184700 : stmt_vec_info stmt_info = dr_info->stmt;
2950 184700 : if (STMT_VINFO_STRIDED_P (stmt_info))
2951 : {
2952 : do_versioning = false;
2953 5023 : break;
2954 : }
2955 :
2956 183630 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2957 183630 : bool negative = tree_int_cst_compare (DR_STEP (dr),
2958 183630 : size_zero_node) < 0;
2959 183630 : poly_int64 off = 0;
2960 183630 : if (negative)
2961 3388 : off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2962 3388 : * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2963 183630 : int misalignment;
2964 183630 : if ((misalignment = dr_misalignment (dr_info, vectype, off)) == 0)
2965 104981 : continue;
2966 :
2967 78649 : enum dr_alignment_support supportable_dr_alignment
2968 78649 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2969 : misalignment);
2970 78649 : if (supportable_dr_alignment == dr_unaligned_unsupported)
2971 : {
2972 15277 : if (misalignment != DR_MISALIGNMENT_UNKNOWN
2973 15277 : || (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2974 11848 : >= (unsigned) param_vect_max_version_for_alignment_checks))
2975 : {
2976 : do_versioning = false;
2977 5023 : break;
2978 : }
2979 :
2980 : /* Forcing alignment in the first iteration is no good if
2981 : we don't keep it across iterations. For now, just disable
2982 : versioning in this case.
2983 : ?? We could actually unroll the loop to achieve the required
2984 : overall step alignment, and forcing the alignment could be
2985 : done by doing some iterations of the non-vectorized loop. */
2986 11440 : if (!multiple_p (vf * DR_STEP_ALIGNMENT (dr),
2987 11440 : DR_TARGET_ALIGNMENT (dr_info)))
2988 : {
2989 : do_versioning = false;
2990 : break;
2991 : }
2992 :
2993 : /* Use "mask = DR_TARGET_ALIGNMENT - 1" to test rightmost address
2994 : bits for runtime alignment check. For example, for 16 bytes
2995 : target alignment the mask is 15 = 0xf. */
2996 11440 : poly_uint64 mask = DR_TARGET_ALIGNMENT (dr_info) - 1;
2997 :
2998 : /* FORNOW: use the same mask to test all potentially unaligned
2999 : references in the loop. */
3000 11440 : if (maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), 0U)
3001 11440 : && maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), mask))
3002 : {
3003 : do_versioning = false;
3004 : break;
3005 : }
3006 :
3007 11324 : LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
3008 11324 : LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
3009 : }
3010 : }
3011 :
3012 : /* Versioning requires at least one misaligned data reference. */
3013 93181 : if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3014 : do_versioning = false;
3015 5618 : else if (!do_versioning)
3016 540 : LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
3017 : }
3018 :
3019 : /* If we are trying peeling with versioning but versioning is disabled for
3020 : some reason, peeling should be turned off together. */
3021 354377 : if (try_peeling_with_versioning && !do_versioning)
3022 : do_peeling = false;
3023 :
3024 342263 : if (do_versioning)
3025 : {
3026 : const vec<stmt_vec_info> &may_misalign_stmts
3027 : = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3028 : stmt_vec_info stmt_info;
3029 :
3030 : /* It can now be assumed that the data references in the statements
3031 : in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
3032 : of the loop being vectorized. */
3033 13762 : FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
3034 : {
3035 8684 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
3036 8684 : SET_DR_MISALIGNMENT (dr_info,
3037 : vect_dr_misalign_for_aligned_access (dr_info));
3038 8684 : if (dump_enabled_p ())
3039 146 : dump_printf_loc (MSG_NOTE, vect_location,
3040 : "Alignment of access forced using versioning.\n");
3041 : }
3042 :
3043 5078 : if (do_peeling)
3044 : {
3045 : /* This point is reached if peeling and versioning are used together
3046 : to ensure alignment. Update data structures to make sure the loop
3047 : is correctly peeled and a right runtime check is added for loop
3048 : versioning. */
3049 1052 : gcc_assert (try_peeling_with_versioning);
3050 1052 : LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
3051 1052 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
3052 1052 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo) = true;
3053 1052 : if (dump_enabled_p ())
3054 11 : dump_printf_loc (MSG_NOTE, vect_location,
3055 : "Both peeling and versioning will be applied.\n");
3056 : }
3057 : else
3058 : {
3059 : /* This point is reached if versioning is used alone. */
3060 4026 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo) = false;
3061 4026 : if (dump_enabled_p ())
3062 82 : dump_printf_loc (MSG_NOTE, vect_location,
3063 : "Versioning for alignment will be applied.\n");
3064 : }
3065 :
3066 5078 : return opt_result::success ();
3067 : }
3068 :
3069 : /* This point is reached if neither peeling nor versioning is being done. */
3070 349299 : gcc_assert (! (do_peeling || do_versioning));
3071 :
3072 349299 : return opt_result::success ();
3073 748174 : }
3074 :
3075 :
3076 : /* Function vect_analyze_data_refs_alignment
3077 :
3078 : Analyze the alignment of the data-references in the loop. */
3079 :
3080 : void
3081 412779 : vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
3082 : {
3083 412779 : DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
3084 :
3085 412779 : vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3086 412779 : struct data_reference *dr;
3087 412779 : unsigned int i;
3088 :
3089 412779 : vect_record_base_alignments (loop_vinfo);
3090 1767123 : FOR_EACH_VEC_ELT (datarefs, i, dr)
3091 : {
3092 955928 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
3093 955928 : if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
3094 : {
3095 955928 : if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
3096 1244004 : && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
3097 127901 : continue;
3098 :
3099 828027 : vect_compute_data_ref_alignment (loop_vinfo, dr_info,
3100 : STMT_VINFO_VECTYPE (dr_info->stmt));
3101 : }
3102 : }
3103 412779 : }
3104 :
3105 :
3106 : /* Analyze alignment of DRs of stmts in NODE. */
3107 :
3108 : static bool
3109 823508 : vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
3110 : {
3111 : /* Alignment is maintained in the first element of the group. */
3112 823508 : stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3113 823508 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
3114 823508 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
3115 823508 : tree vectype = SLP_TREE_VECTYPE (node);
3116 823508 : poly_uint64 vector_alignment
3117 823508 : = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
3118 : BITS_PER_UNIT);
3119 823508 : if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
3120 781669 : vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
3121 : /* Re-analyze alignment when we're facing a vectorization with a bigger
3122 : alignment requirement. */
3123 41839 : else if (known_lt (dr_info->target_alignment, vector_alignment))
3124 : {
3125 67 : poly_uint64 old_target_alignment = dr_info->target_alignment;
3126 67 : int old_misalignment = dr_info->misalignment;
3127 67 : vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
3128 : /* But keep knowledge about a smaller alignment. */
3129 67 : if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
3130 38 : && dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
3131 : {
3132 1 : dr_info->target_alignment = old_target_alignment;
3133 1 : dr_info->misalignment = old_misalignment;
3134 : }
3135 : }
3136 : /* When we ever face unordered target alignments the first one wins in terms
3137 : of analyzing and the other will become unknown in dr_misalignment. */
3138 823508 : return true;
3139 : }
3140 :
3141 : /* Function vect_slp_analyze_instance_alignment
3142 :
3143 : Analyze the alignment of the data-references in the SLP instance.
3144 : Return FALSE if a data reference is found that cannot be vectorized. */
3145 :
3146 : bool
3147 792242 : vect_slp_analyze_instance_alignment (vec_info *vinfo,
3148 : slp_instance instance)
3149 : {
3150 792242 : DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment");
3151 :
3152 792242 : slp_tree node;
3153 792242 : unsigned i;
3154 952336 : FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
3155 160094 : if (! vect_slp_analyze_node_alignment (vinfo, node))
3156 : return false;
3157 :
3158 792242 : if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
3159 792242 : && ! vect_slp_analyze_node_alignment
3160 663414 : (vinfo, SLP_INSTANCE_TREE (instance)))
3161 : return false;
3162 :
3163 : return true;
3164 : }
3165 :
3166 :
3167 : /* Analyze groups of accesses: check that DR_INFO belongs to a group of
3168 : accesses of legal size, step, etc. Detect gaps, single element
3169 : interleaving, and other special cases. Set grouped access info.
3170 : Collect groups of strided stores for further use in SLP analysis.
3171 : Worker for vect_analyze_group_access. */
3172 :
3173 : static bool
3174 12615330 : vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
3175 : {
3176 12615330 : data_reference *dr = dr_info->dr;
3177 12615330 : tree step = DR_STEP (dr);
3178 12615330 : tree scalar_type = TREE_TYPE (DR_REF (dr));
3179 12615330 : HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
3180 12615330 : stmt_vec_info stmt_info = dr_info->stmt;
3181 12615330 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3182 12615330 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3183 12615330 : HOST_WIDE_INT dr_step = -1;
3184 12615330 : HOST_WIDE_INT groupsize, last_accessed_element = 1;
3185 12615330 : bool slp_impossible = false;
3186 :
3187 : /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
3188 : size of the interleaving group (including gaps). */
3189 12615330 : if (tree_fits_shwi_p (step))
3190 : {
3191 12605844 : dr_step = tree_to_shwi (step);
3192 : /* Check that STEP is a multiple of type size. Otherwise there is
3193 : a non-element-sized gap at the end of the group which we
3194 : cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
3195 : ??? As we can handle non-constant step fine here we should
3196 : simply remove uses of DR_GROUP_GAP between the last and first
3197 : element and instead rely on DR_STEP. DR_GROUP_SIZE then would
3198 : simply not include that gap. */
3199 12605844 : if ((dr_step % type_size) != 0)
3200 : {
3201 498 : if (dump_enabled_p ())
3202 27 : dump_printf_loc (MSG_NOTE, vect_location,
3203 : "Step %T is not a multiple of the element size"
3204 : " for %T\n",
3205 : step, DR_REF (dr));
3206 498 : return false;
3207 : }
3208 12605346 : groupsize = absu_hwi (dr_step) / type_size;
3209 : }
3210 : else
3211 : groupsize = 0;
3212 :
3213 : /* Not consecutive access is possible only if it is a part of interleaving. */
3214 12614832 : if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
3215 : {
3216 : /* Check if it this DR is a part of interleaving, and is a single
3217 : element of the group that is accessed in the loop. */
3218 :
3219 : /* Gaps are supported only for loads. STEP must be a multiple of the type
3220 : size. */
3221 8473978 : if (DR_IS_READ (dr)
3222 5065792 : && (dr_step % type_size) == 0
3223 : && groupsize > 0
3224 : /* This could be UINT_MAX but as we are generating code in a very
3225 : inefficient way we have to cap earlier.
3226 : See PR91403 for example. */
3227 5065792 : && groupsize <= 4096)
3228 : {
3229 72942 : DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
3230 72942 : DR_GROUP_SIZE (stmt_info) = groupsize;
3231 72942 : DR_GROUP_GAP (stmt_info) = groupsize - 1;
3232 72942 : if (dump_enabled_p ())
3233 1492 : dump_printf_loc (MSG_NOTE, vect_location,
3234 : "Detected single element interleaving %T"
3235 : " step %T\n",
3236 : DR_REF (dr), step);
3237 :
3238 72942 : return true;
3239 : }
3240 :
3241 8401036 : if (dump_enabled_p ())
3242 3129 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3243 : "not consecutive access %G", stmt_info->stmt);
3244 :
3245 8401036 : if (bb_vinfo)
3246 : {
3247 : /* Mark the statement as unvectorizable. */
3248 8382039 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3249 8382039 : return true;
3250 : }
3251 :
3252 18997 : if (dump_enabled_p ())
3253 305 : dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
3254 18997 : STMT_VINFO_STRIDED_P (stmt_info) = true;
3255 18997 : return true;
3256 : }
3257 :
3258 4140854 : if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3259 : {
3260 : /* First stmt in the interleaving chain. Check the chain. */
3261 1503065 : stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3262 1503065 : struct data_reference *data_ref = dr;
3263 1503065 : unsigned int count = 1;
3264 1503065 : tree prev_init = DR_INIT (data_ref);
3265 1503065 : HOST_WIDE_INT diff, gaps = 0;
3266 :
3267 : /* By construction, all group members have INTEGER_CST DR_INITs. */
3268 4140863 : while (next)
3269 : {
3270 : /* We never have the same DR multiple times. */
3271 2637860 : gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
3272 : DR_INIT (STMT_VINFO_DATA_REF (next))) != 0);
3273 :
3274 2637860 : data_ref = STMT_VINFO_DATA_REF (next);
3275 :
3276 : /* All group members have the same STEP by construction. */
3277 2637860 : gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
3278 :
3279 : /* Check that the distance between two accesses is equal to the type
3280 : size. Otherwise, we have gaps. */
3281 2637860 : diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
3282 2637860 : - TREE_INT_CST_LOW (prev_init)) / type_size;
3283 2637860 : if (diff < 1 || diff > UINT_MAX)
3284 : {
3285 : /* For artificial testcases with array accesses with large
3286 : constant indices we can run into overflow issues which
3287 : can end up fooling the groupsize constraint below so
3288 : check the individual gaps (which are represented as
3289 : unsigned int) as well. */
3290 0 : if (dump_enabled_p ())
3291 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3292 : "interleaved access with gap larger "
3293 : "than representable\n");
3294 0 : return false;
3295 : }
3296 2637860 : if (diff != 1)
3297 : {
3298 : /* FORNOW: SLP of accesses with gaps is not supported. */
3299 101693 : slp_impossible = true;
3300 101693 : if (DR_IS_WRITE (data_ref))
3301 : {
3302 62 : if (dump_enabled_p ())
3303 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3304 : "interleaved store with gaps\n");
3305 62 : return false;
3306 : }
3307 :
3308 101631 : gaps += diff - 1;
3309 : }
3310 :
3311 2637798 : last_accessed_element += diff;
3312 :
3313 : /* Store the gap from the previous member of the group. If there is no
3314 : gap in the access, DR_GROUP_GAP is always 1. */
3315 2637798 : DR_GROUP_GAP (next) = diff;
3316 :
3317 2637798 : prev_init = DR_INIT (data_ref);
3318 2637798 : next = DR_GROUP_NEXT_ELEMENT (next);
3319 : /* Count the number of data-refs in the chain. */
3320 2637798 : count++;
3321 : }
3322 :
3323 1503003 : if (groupsize == 0)
3324 1432956 : groupsize = count + gaps;
3325 :
3326 : /* This could be UINT_MAX but as we are generating code in a very
3327 : inefficient way we have to cap earlier. See PR78699 for example. */
3328 1503003 : if (groupsize > 4096)
3329 : {
3330 1 : if (dump_enabled_p ())
3331 1 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3332 : "group is too large\n");
3333 1 : return false;
3334 : }
3335 :
3336 : /* Check that the size of the interleaving is equal to count for stores,
3337 : i.e., that there are no gaps. */
3338 1503002 : if (groupsize != count
3339 105700 : && !DR_IS_READ (dr))
3340 : {
3341 11387 : groupsize = count;
3342 11387 : STMT_VINFO_STRIDED_P (stmt_info) = true;
3343 : }
3344 :
3345 : /* If there is a gap after the last load in the group it is the
3346 : difference between the groupsize and the last accessed
3347 : element.
3348 : When there is no gap, this difference should be 0. */
3349 1503002 : DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
3350 :
3351 1503002 : DR_GROUP_SIZE (stmt_info) = groupsize;
3352 1503002 : if (dump_enabled_p ())
3353 : {
3354 7977 : dump_printf_loc (MSG_NOTE, vect_location,
3355 : "Detected interleaving ");
3356 7977 : if (DR_IS_READ (dr))
3357 4299 : dump_printf (MSG_NOTE, "load ");
3358 3678 : else if (STMT_VINFO_STRIDED_P (stmt_info))
3359 496 : dump_printf (MSG_NOTE, "strided store ");
3360 : else
3361 3182 : dump_printf (MSG_NOTE, "store ");
3362 7977 : dump_printf (MSG_NOTE, "of size %u\n",
3363 : (unsigned)groupsize);
3364 7977 : dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
3365 7977 : next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3366 39054 : while (next)
3367 : {
3368 31077 : if (DR_GROUP_GAP (next) != 1)
3369 277 : dump_printf_loc (MSG_NOTE, vect_location,
3370 : "\t<gap of %d elements>\n",
3371 277 : DR_GROUP_GAP (next) - 1);
3372 31077 : dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
3373 31077 : next = DR_GROUP_NEXT_ELEMENT (next);
3374 : }
3375 7977 : if (DR_GROUP_GAP (stmt_info) != 0)
3376 388 : dump_printf_loc (MSG_NOTE, vect_location,
3377 : "\t<gap of %d elements>\n",
3378 388 : DR_GROUP_GAP (stmt_info));
3379 : }
3380 :
3381 : /* SLP: create an SLP data structure for every interleaving group of
3382 : stores for further analysis in vect_analyse_slp. */
3383 1503002 : if (DR_IS_WRITE (dr) && !slp_impossible)
3384 : {
3385 923001 : if (loop_vinfo)
3386 29299 : LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt_info);
3387 923001 : if (bb_vinfo)
3388 893702 : BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3389 : }
3390 : }
3391 :
3392 : return true;
3393 : }
3394 :
3395 : /* Analyze groups of accesses: check that DR_INFO belongs to a group of
3396 : accesses of legal size, step, etc. Detect gaps, single element
3397 : interleaving, and other special cases. Set grouped access info.
3398 : Collect groups of strided stores for further use in SLP analysis. */
3399 :
3400 : static bool
3401 12615330 : vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
3402 : {
3403 12615330 : if (!vect_analyze_group_access_1 (vinfo, dr_info))
3404 : {
3405 : /* Dissolve the group if present. */
3406 561 : stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
3407 792 : while (stmt_info)
3408 : {
3409 231 : stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3410 231 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3411 231 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3412 231 : stmt_info = next;
3413 : }
3414 : return false;
3415 : }
3416 : return true;
3417 : }
3418 :
3419 : /* Analyze the access pattern of the data-reference DR_INFO.
3420 : In case of non-consecutive accesses call vect_analyze_group_access() to
3421 : analyze groups of accesses. */
3422 :
3423 : static bool
3424 13369785 : vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
3425 : {
3426 13369785 : data_reference *dr = dr_info->dr;
3427 13369785 : tree step = DR_STEP (dr);
3428 13369785 : tree scalar_type = TREE_TYPE (DR_REF (dr));
3429 13369785 : stmt_vec_info stmt_info = dr_info->stmt;
3430 13369785 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3431 13369785 : class loop *loop = NULL;
3432 :
3433 13369785 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
3434 : return true;
3435 :
3436 13269983 : if (loop_vinfo)
3437 968105 : loop = LOOP_VINFO_LOOP (loop_vinfo);
3438 :
3439 13269983 : if (loop_vinfo && !step)
3440 : {
3441 0 : if (dump_enabled_p ())
3442 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3443 : "bad data-ref access in loop\n");
3444 0 : return false;
3445 : }
3446 :
3447 : /* Allow loads with zero step in inner-loop vectorization. */
3448 13269983 : if (loop_vinfo && integer_zerop (step))
3449 : {
3450 14111 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3451 14111 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3452 14111 : if (!nested_in_vect_loop_p (loop, stmt_info))
3453 13850 : return DR_IS_READ (dr);
3454 : /* Allow references with zero step for outer loops marked
3455 : with pragma omp simd only - it guarantees absence of
3456 : loop-carried dependencies between inner loop iterations. */
3457 261 : if (loop->safelen < 2)
3458 : {
3459 225 : if (dump_enabled_p ())
3460 6 : dump_printf_loc (MSG_NOTE, vect_location,
3461 : "zero step in inner loop of nest\n");
3462 225 : return false;
3463 : }
3464 : }
3465 :
3466 13255872 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
3467 : {
3468 : /* Interleaved accesses are not yet supported within outer-loop
3469 : vectorization for references in the inner-loop. */
3470 5802 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3471 5802 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3472 :
3473 : /* For the rest of the analysis we use the outer-loop step. */
3474 5802 : step = STMT_VINFO_DR_STEP (stmt_info);
3475 5802 : if (integer_zerop (step))
3476 : {
3477 1281 : if (dump_enabled_p ())
3478 238 : dump_printf_loc (MSG_NOTE, vect_location,
3479 : "zero step in outer loop.\n");
3480 1281 : return DR_IS_READ (dr);
3481 : }
3482 : }
3483 :
3484 : /* Consecutive? */
3485 13254627 : if (TREE_CODE (step) == INTEGER_CST)
3486 : {
3487 13215613 : HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
3488 13215613 : if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
3489 13215613 : || (dr_step < 0
3490 28961 : && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
3491 : {
3492 : /* Mark that it is not interleaving. */
3493 606899 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3494 606899 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3495 606899 : return true;
3496 : }
3497 : }
3498 :
3499 12647728 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
3500 : {
3501 3329 : if (dump_enabled_p ())
3502 163 : dump_printf_loc (MSG_NOTE, vect_location,
3503 : "grouped access in outer loop.\n");
3504 3329 : return false;
3505 : }
3506 :
3507 :
3508 : /* Assume this is a DR handled by non-constant strided load case. */
3509 12644399 : if (TREE_CODE (step) != INTEGER_CST)
3510 38555 : return (STMT_VINFO_STRIDED_P (stmt_info)
3511 38555 : && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3512 9486 : || vect_analyze_group_access (vinfo, dr_info)));
3513 :
3514 : /* Not consecutive access - check if it's a part of interleaving group. */
3515 12605844 : return vect_analyze_group_access (vinfo, dr_info);
3516 : }
3517 :
3518 : /* Compare two data-references DRA and DRB to group them into chunks
3519 : suitable for grouping. */
3520 :
3521 : static int
3522 349651282 : dr_group_sort_cmp (const void *dra_, const void *drb_)
3523 : {
3524 349651282 : dr_vec_info *dra_info = *(dr_vec_info **)const_cast<void *>(dra_);
3525 349651282 : dr_vec_info *drb_info = *(dr_vec_info **)const_cast<void *>(drb_);
3526 349651282 : data_reference_p dra = dra_info->dr;
3527 349651282 : data_reference_p drb = drb_info->dr;
3528 349651282 : int cmp;
3529 :
3530 : /* Stabilize sort. */
3531 349651282 : if (dra == drb)
3532 : return 0;
3533 :
3534 : /* Different group IDs lead never belong to the same group. */
3535 349651282 : if (dra_info->group != drb_info->group)
3536 382540528 : return dra_info->group < drb_info->group ? -1 : 1;
3537 :
3538 : /* Ordering of DRs according to base. */
3539 97975849 : cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3540 : DR_BASE_ADDRESS (drb));
3541 97975849 : if (cmp != 0)
3542 : return cmp;
3543 :
3544 : /* And according to DR_OFFSET. */
3545 53067798 : cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
3546 53067798 : if (cmp != 0)
3547 : return cmp;
3548 :
3549 : /* Put reads before writes. */
3550 52718377 : if (DR_IS_READ (dra) != DR_IS_READ (drb))
3551 4254407 : return DR_IS_READ (dra) ? -1 : 1;
3552 :
3553 : /* Then sort after access size. */
3554 49824224 : cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
3555 49824224 : TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
3556 49824224 : if (cmp != 0)
3557 : return cmp;
3558 :
3559 : /* And after step. */
3560 43056497 : cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
3561 43056497 : if (cmp != 0)
3562 : return cmp;
3563 :
3564 : /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
3565 43049123 : cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
3566 43049123 : if (cmp == 0)
3567 487232 : return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
3568 : return cmp;
3569 : }
3570 :
3571 : /* If OP is the result of a conversion, return the unconverted value,
3572 : otherwise return null. */
3573 :
3574 : static tree
3575 387 : strip_conversion (tree op)
3576 : {
3577 387 : if (TREE_CODE (op) != SSA_NAME)
3578 : return NULL_TREE;
3579 387 : gimple *stmt = SSA_NAME_DEF_STMT (op);
3580 387 : if (!is_gimple_assign (stmt)
3581 387 : || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
3582 : return NULL_TREE;
3583 186 : return gimple_assign_rhs1 (stmt);
3584 : }
3585 :
3586 : /* Return true if vectorizable_* routines can handle statements STMT1_INFO
3587 : and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can
3588 : be grouped in SLP mode. */
3589 :
3590 : static bool
3591 7047062 : can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
3592 : bool allow_slp_p)
3593 : {
3594 7047062 : if (gimple_assign_single_p (stmt1_info->stmt))
3595 7045391 : return gimple_assign_single_p (stmt2_info->stmt);
3596 :
3597 1671 : gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
3598 1671 : if (call1 && gimple_call_internal_p (call1))
3599 : {
3600 : /* Check for two masked loads or two masked stores. */
3601 1909 : gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt);
3602 1655 : if (!call2 || !gimple_call_internal_p (call2))
3603 : return false;
3604 1655 : internal_fn ifn = gimple_call_internal_fn (call1);
3605 1655 : if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
3606 : return false;
3607 1655 : if (ifn != gimple_call_internal_fn (call2))
3608 : return false;
3609 :
3610 : /* Check that the masks are the same. Cope with casts of masks,
3611 : like those created by build_mask_conversion. */
3612 1655 : tree mask1 = gimple_call_arg (call1, 2);
3613 1655 : tree mask2 = gimple_call_arg (call2, 2);
3614 1655 : if (!operand_equal_p (mask1, mask2, 0) && !allow_slp_p)
3615 : {
3616 294 : mask1 = strip_conversion (mask1);
3617 294 : if (!mask1)
3618 : return false;
3619 93 : mask2 = strip_conversion (mask2);
3620 93 : if (!mask2)
3621 : return false;
3622 93 : if (!operand_equal_p (mask1, mask2, 0))
3623 : return false;
3624 : }
3625 1417 : return true;
3626 : }
3627 :
3628 : return false;
3629 : }
3630 :
3631 : /* Function vect_analyze_data_ref_accesses.
3632 :
3633 : Analyze the access pattern of all the data references in the loop.
3634 :
3635 : FORNOW: the only access pattern that is considered vectorizable is a
3636 : simple step 1 (consecutive) access.
3637 :
3638 : FORNOW: handle only arrays and pointer accesses. */
3639 :
3640 : opt_result
3641 2645845 : vect_analyze_data_ref_accesses (vec_info *vinfo,
3642 : vec<int> *dataref_groups)
3643 : {
3644 2645845 : unsigned int i;
3645 2645845 : vec<data_reference_p> datarefs = vinfo->shared->datarefs;
3646 :
3647 2645845 : DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
3648 :
3649 2645845 : if (datarefs.is_empty ())
3650 1062578 : return opt_result::success ();
3651 :
3652 : /* Sort the array of datarefs to make building the interleaving chains
3653 : linear. Don't modify the original vector's order, it is needed for
3654 : determining what dependencies are reversed. */
3655 1583267 : vec<dr_vec_info *> datarefs_copy;
3656 1583267 : datarefs_copy.create (datarefs.length ());
3657 16735593 : for (unsigned i = 0; i < datarefs.length (); i++)
3658 : {
3659 15152326 : dr_vec_info *dr_info = vinfo->lookup_dr (datarefs[i]);
3660 : /* If the caller computed DR grouping use that, otherwise group by
3661 : basic blocks. */
3662 15152326 : if (dataref_groups)
3663 14070455 : dr_info->group = (*dataref_groups)[i];
3664 : else
3665 1081871 : dr_info->group = gimple_bb (DR_STMT (datarefs[i]))->index;
3666 15152326 : datarefs_copy.quick_push (dr_info);
3667 : }
3668 1583267 : datarefs_copy.qsort (dr_group_sort_cmp);
3669 1583267 : hash_set<stmt_vec_info> to_fixup;
3670 :
3671 : /* Build the interleaving chains. */
3672 14306508 : for (i = 0; i < datarefs_copy.length () - 1;)
3673 : {
3674 11139974 : dr_vec_info *dr_info_a = datarefs_copy[i];
3675 11139974 : data_reference_p dra = dr_info_a->dr;
3676 11139974 : int dra_group_id = dr_info_a->group;
3677 11139974 : stmt_vec_info stmtinfo_a = dr_info_a->stmt;
3678 11139974 : stmt_vec_info lastinfo = NULL;
3679 11139974 : if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
3680 9507275 : || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
3681 : {
3682 1697334 : ++i;
3683 1697334 : continue;
3684 : }
3685 24837095 : for (i = i + 1; i < datarefs_copy.length (); ++i)
3686 : {
3687 11871725 : dr_vec_info *dr_info_b = datarefs_copy[i];
3688 11871725 : data_reference_p drb = dr_info_b->dr;
3689 11871725 : int drb_group_id = dr_info_b->group;
3690 11871725 : stmt_vec_info stmtinfo_b = dr_info_b->stmt;
3691 11871725 : if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
3692 11564152 : || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
3693 : break;
3694 :
3695 : /* ??? Imperfect sorting (non-compatible types, non-modulo
3696 : accesses, same accesses) can lead to a group to be artificially
3697 : split here as we don't just skip over those. If it really
3698 : matters we can push those to a worklist and re-iterate
3699 : over them. The we can just skip ahead to the next DR here. */
3700 :
3701 : /* DRs in a different DR group should not be put into the same
3702 : interleaving group. */
3703 11560535 : if (dra_group_id != drb_group_id)
3704 : break;
3705 :
3706 : /* Check that the data-refs have same first location (except init)
3707 : and they are both either store or load (not load and store,
3708 : not masked loads or stores). */
3709 7322024 : if (DR_IS_READ (dra) != DR_IS_READ (drb)
3710 6019347 : || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3711 : DR_BASE_ADDRESS (drb)) != 0
3712 4395191 : || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
3713 11698249 : || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
3714 : break;
3715 :
3716 : /* Check that the data-refs have the same constant size. */
3717 4376200 : tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
3718 4376200 : tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
3719 4376200 : if (!tree_fits_uhwi_p (sza)
3720 4376200 : || !tree_fits_uhwi_p (szb)
3721 8752400 : || !tree_int_cst_equal (sza, szb))
3722 : break;
3723 :
3724 : /* Check that the data-refs have the same step. */
3725 4026237 : if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
3726 : break;
3727 :
3728 : /* Check the types are compatible.
3729 : ??? We don't distinguish this during sorting. */
3730 4025517 : if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
3731 4025517 : TREE_TYPE (DR_REF (drb))))
3732 : break;
3733 :
3734 : /* Check that the DR_INITs are compile-time constants. */
3735 2883318 : if (!tree_fits_shwi_p (DR_INIT (dra))
3736 2883318 : || !tree_fits_shwi_p (DR_INIT (drb)))
3737 : break;
3738 :
3739 : /* Different .GOMP_SIMD_LANE calls still give the same lane,
3740 : just hold extra information. */
3741 2883318 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
3742 1240 : && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
3743 2884558 : && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
3744 : break;
3745 :
3746 : /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
3747 2882078 : HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
3748 2882078 : HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
3749 2882078 : HOST_WIDE_INT init_prev
3750 2882078 : = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]->dr));
3751 2882078 : gcc_assert (init_a <= init_b
3752 : && init_a <= init_prev
3753 : && init_prev <= init_b);
3754 :
3755 : /* Do not place the same access in the interleaving chain twice. */
3756 2882078 : if (init_b == init_prev)
3757 : {
3758 29477 : gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]->dr))
3759 : < gimple_uid (DR_STMT (drb)));
3760 : /* Simply link in duplicates and fix up the chain below. */
3761 : }
3762 : else
3763 : {
3764 : /* If init_b == init_a + the size of the type * k, we have an
3765 : interleaving, and DRA is accessed before DRB. */
3766 2852601 : unsigned HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
3767 2852601 : if (type_size_a == 0
3768 2852601 : || (((unsigned HOST_WIDE_INT)init_b - init_a)
3769 2852601 : % type_size_a != 0))
3770 : break;
3771 :
3772 : /* If we have a store, the accesses are adjacent. This splits
3773 : groups into chunks we support (we don't support vectorization
3774 : of stores with gaps). */
3775 2850872 : if (!DR_IS_READ (dra)
3776 1871993 : && (((unsigned HOST_WIDE_INT)init_b - init_prev)
3777 : != type_size_a))
3778 : break;
3779 :
3780 : /* For datarefs with big gap, it's better to split them into different
3781 : groups.
3782 : .i.e a[0], a[1], a[2], .. a[7], a[100], a[101],..., a[107] */
3783 2670412 : if ((unsigned HOST_WIDE_INT)(init_b - init_prev)
3784 : > MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT)
3785 : break;
3786 :
3787 : /* If the step (if not zero or non-constant) is smaller than the
3788 : difference between data-refs' inits this splits groups into
3789 : suitable sizes. */
3790 2660930 : if (tree_fits_shwi_p (DR_STEP (dra)))
3791 : {
3792 2654684 : unsigned HOST_WIDE_INT step
3793 2654684 : = absu_hwi (tree_to_shwi (DR_STEP (dra)));
3794 2654684 : if (step != 0
3795 163902 : && step <= ((unsigned HOST_WIDE_INT)init_b - init_a))
3796 : break;
3797 : }
3798 : }
3799 :
3800 2671214 : if (dump_enabled_p ())
3801 31982 : dump_printf_loc (MSG_NOTE, vect_location,
3802 31982 : DR_IS_READ (dra)
3803 : ? "Detected interleaving load %T and %T\n"
3804 : : "Detected interleaving store %T and %T\n",
3805 : DR_REF (dra), DR_REF (drb));
3806 :
3807 : /* Link the found element into the group list. */
3808 2671214 : if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3809 : {
3810 1481760 : DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
3811 1481760 : lastinfo = stmtinfo_a;
3812 : }
3813 2671214 : DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
3814 2671214 : DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
3815 2671214 : lastinfo = stmtinfo_b;
3816 :
3817 2671214 : if (! STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3818 : {
3819 2670837 : STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
3820 2670837 : = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
3821 :
3822 2670837 : if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3823 126 : dump_printf_loc (MSG_NOTE, vect_location,
3824 : "Load suitable for SLP vectorization only.\n");
3825 : }
3826 :
3827 2671214 : if (init_b == init_prev
3828 29477 : && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3829 2687717 : && dump_enabled_p ())
3830 213 : dump_printf_loc (MSG_NOTE, vect_location,
3831 : "Queuing group with duplicate access for fixup\n");
3832 : }
3833 : }
3834 :
3835 : /* Fixup groups with duplicate entries by splitting it. */
3836 1626008 : while (1)
3837 : {
3838 1626008 : hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
3839 1626008 : if (!(it != to_fixup.end ()))
3840 : break;
3841 42741 : stmt_vec_info grp = *it;
3842 42741 : to_fixup.remove (grp);
3843 :
3844 : /* Find the earliest duplicate group member. */
3845 42741 : unsigned first_duplicate = -1u;
3846 42741 : stmt_vec_info next, g = grp;
3847 272859 : while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3848 : {
3849 187377 : if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
3850 187377 : DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
3851 187377 : && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
3852 : first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
3853 : g = next;
3854 : }
3855 42741 : if (first_duplicate == -1U)
3856 16503 : continue;
3857 :
3858 : /* Then move all stmts after the first duplicate to a new group.
3859 : Note this is a heuristic but one with the property that *it
3860 : is fixed up completely. */
3861 26238 : g = grp;
3862 26238 : stmt_vec_info newgroup = NULL, ng = grp;
3863 237415 : while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3864 : {
3865 184939 : if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
3866 : {
3867 178745 : DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
3868 178745 : if (!newgroup)
3869 : {
3870 26238 : newgroup = next;
3871 26238 : STMT_VINFO_SLP_VECT_ONLY (newgroup)
3872 26238 : = STMT_VINFO_SLP_VECT_ONLY (grp);
3873 : }
3874 : else
3875 152507 : DR_GROUP_NEXT_ELEMENT (ng) = next;
3876 178745 : ng = next;
3877 178745 : DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
3878 : }
3879 : else
3880 : g = DR_GROUP_NEXT_ELEMENT (g);
3881 : }
3882 26238 : DR_GROUP_NEXT_ELEMENT (ng) = NULL;
3883 :
3884 : /* Fixup the new group which still may contain duplicates. */
3885 26238 : to_fixup.add (newgroup);
3886 : }
3887 :
3888 1583267 : dr_vec_info *dr_info;
3889 16713681 : FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)
3890 : {
3891 15138362 : if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
3892 15138362 : && !vect_analyze_data_ref_access (vinfo, dr_info))
3893 : {
3894 8002 : if (dump_enabled_p ())
3895 291 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3896 : "not vectorized: complicated access pattern.\n");
3897 :
3898 8002 : if (is_a <bb_vec_info> (vinfo))
3899 : {
3900 : /* Mark the statement as not vectorizable. */
3901 54 : STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
3902 54 : continue;
3903 : }
3904 : else
3905 : {
3906 7948 : datarefs_copy.release ();
3907 7948 : return opt_result::failure_at (dr_info->stmt->stmt,
3908 : "not vectorized:"
3909 : " complicated access pattern.\n");
3910 : }
3911 : }
3912 : }
3913 :
3914 1575319 : datarefs_copy.release ();
3915 1575319 : return opt_result::success ();
3916 1583267 : }
3917 :
3918 : /* Function vect_vfa_segment_size.
3919 :
3920 : Input:
3921 : DR_INFO: The data reference.
3922 : LENGTH_FACTOR: segment length to consider.
3923 :
3924 : Return a value suitable for the dr_with_seg_len::seg_len field.
3925 : This is the "distance travelled" by the pointer from the first
3926 : iteration in the segment to the last. Note that it does not include
3927 : the size of the access; in effect it only describes the first byte. */
3928 :
3929 : static tree
3930 146656 : vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
3931 : {
3932 146656 : length_factor = size_binop (MINUS_EXPR,
3933 : fold_convert (sizetype, length_factor),
3934 : size_one_node);
3935 146656 : return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
3936 : length_factor);
3937 : }
3938 :
3939 : /* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),
3940 : gives the worst-case number of bytes covered by the segment. */
3941 :
3942 : static unsigned HOST_WIDE_INT
3943 147138 : vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info)
3944 : {
3945 147138 : stmt_vec_info stmt_vinfo = dr_info->stmt;
3946 147138 : tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
3947 147138 : unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
3948 147138 : unsigned HOST_WIDE_INT access_size = ref_size;
3949 147138 : if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
3950 : {
3951 42311 : gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
3952 42311 : access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
3953 : }
3954 147138 : tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3955 147138 : int misalignment;
3956 294276 : if (((misalignment = dr_misalignment (dr_info, vectype)), true)
3957 147138 : && (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
3958 : == dr_explicit_realign_optimized))
3959 : {
3960 : /* We might access a full vector's worth. */
3961 0 : access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
3962 : }
3963 147138 : return access_size;
3964 : }
3965 :
3966 : /* Get the minimum alignment for all the scalar accesses that DR_INFO
3967 : describes. */
3968 :
3969 : static unsigned int
3970 147138 : vect_vfa_align (dr_vec_info *dr_info)
3971 : {
3972 0 : return dr_alignment (dr_info->dr);
3973 : }
3974 :
3975 : /* Function vect_no_alias_p.
3976 :
3977 : Given data references A and B with equal base and offset, see whether
3978 : the alias relation can be decided at compilation time. Return 1 if
3979 : it can and the references alias, 0 if it can and the references do
3980 : not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A,
3981 : SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
3982 : of dr_with_seg_len::{seg_len,access_size} for A and B. */
3983 :
3984 : static int
3985 4338 : vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b,
3986 : tree segment_length_a, tree segment_length_b,
3987 : unsigned HOST_WIDE_INT access_size_a,
3988 : unsigned HOST_WIDE_INT access_size_b)
3989 : {
3990 4338 : poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
3991 4338 : poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
3992 4338 : poly_uint64 const_length_a;
3993 4338 : poly_uint64 const_length_b;
3994 :
3995 : /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
3996 : bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
3997 : [a, a+12) */
3998 4338 : if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < 0)
3999 : {
4000 250 : const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
4001 250 : offset_a -= const_length_a;
4002 : }
4003 : else
4004 4088 : const_length_a = tree_to_poly_uint64 (segment_length_a);
4005 4338 : if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < 0)
4006 : {
4007 408 : const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
4008 408 : offset_b -= const_length_b;
4009 : }
4010 : else
4011 3930 : const_length_b = tree_to_poly_uint64 (segment_length_b);
4012 :
4013 4338 : const_length_a += access_size_a;
4014 4338 : const_length_b += access_size_b;
4015 :
4016 4338 : if (ranges_known_overlap_p (offset_a, const_length_a,
4017 : offset_b, const_length_b))
4018 : return 1;
4019 :
4020 536 : if (!ranges_maybe_overlap_p (offset_a, const_length_a,
4021 : offset_b, const_length_b))
4022 536 : return 0;
4023 :
4024 : return -1;
4025 : }
4026 :
4027 : /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
4028 : in DDR is >= VF. */
4029 :
4030 : static bool
4031 86469 : dependence_distance_ge_vf (data_dependence_relation *ddr,
4032 : unsigned int loop_depth, poly_uint64 vf)
4033 : {
4034 86469 : if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
4035 91476 : || DDR_NUM_DIST_VECTS (ddr) == 0)
4036 : return false;
4037 :
4038 : /* If the dependence is exact, we should have limited the VF instead. */
4039 5042 : gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
4040 :
4041 : unsigned int i;
4042 : lambda_vector dist_v;
4043 10115 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4044 : {
4045 10080 : HOST_WIDE_INT dist = dist_v[loop_depth];
4046 10080 : if (dist != 0
4047 5042 : && !(dist > 0 && DDR_REVERSED_P (ddr))
4048 15122 : && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
4049 : return false;
4050 : }
4051 :
4052 35 : if (dump_enabled_p ())
4053 2 : dump_printf_loc (MSG_NOTE, vect_location,
4054 : "dependence distance between %T and %T is >= VF\n",
4055 2 : DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
4056 :
4057 : return true;
4058 : }
4059 :
4060 : /* Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. */
4061 :
4062 : static void
4063 437 : dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
4064 : {
4065 437 : dump_printf (dump_kind, "%s (%T) >= ",
4066 437 : lower_bound.unsigned_p ? "unsigned" : "abs",
4067 437 : lower_bound.expr);
4068 437 : dump_dec (dump_kind, lower_bound.min_value);
4069 437 : }
4070 :
4071 : /* Record that the vectorized loop requires the vec_lower_bound described
4072 : by EXPR, UNSIGNED_P and MIN_VALUE. */
4073 :
4074 : static void
4075 6596 : vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
4076 : poly_uint64 min_value)
4077 : {
4078 6596 : vec<vec_lower_bound> &lower_bounds
4079 : = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
4080 7568 : for (unsigned int i = 0; i < lower_bounds.length (); ++i)
4081 5874 : if (operand_equal_p (lower_bounds[i].expr, expr, 0))
4082 : {
4083 4902 : unsigned_p &= lower_bounds[i].unsigned_p;
4084 4902 : min_value = upper_bound (lower_bounds[i].min_value, min_value);
4085 4902 : if (lower_bounds[i].unsigned_p != unsigned_p
4086 4902 : || maybe_lt (lower_bounds[i].min_value, min_value))
4087 : {
4088 790 : lower_bounds[i].unsigned_p = unsigned_p;
4089 790 : lower_bounds[i].min_value = min_value;
4090 790 : if (dump_enabled_p ())
4091 : {
4092 250 : dump_printf_loc (MSG_NOTE, vect_location,
4093 : "updating run-time check to ");
4094 250 : dump_lower_bound (MSG_NOTE, lower_bounds[i]);
4095 250 : dump_printf (MSG_NOTE, "\n");
4096 : }
4097 : }
4098 4902 : return;
4099 : }
4100 :
4101 1694 : vec_lower_bound lower_bound (expr, unsigned_p, min_value);
4102 1694 : if (dump_enabled_p ())
4103 : {
4104 187 : dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
4105 187 : dump_lower_bound (MSG_NOTE, lower_bound);
4106 187 : dump_printf (MSG_NOTE, "\n");
4107 : }
4108 1694 : LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
4109 : }
4110 :
4111 : /* Return true if it's unlikely that the step of the vectorized form of DR_INFO
4112 : will span fewer than GAP bytes. */
4113 :
4114 : static bool
4115 5312 : vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
4116 : poly_int64 gap)
4117 : {
4118 5312 : stmt_vec_info stmt_info = dr_info->stmt;
4119 5312 : HOST_WIDE_INT count
4120 5312 : = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
4121 5312 : if (DR_GROUP_FIRST_ELEMENT (stmt_info))
4122 4552 : count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
4123 5312 : return (estimated_poly_value (gap)
4124 5312 : <= count * vect_get_scalar_dr_size (dr_info));
4125 : }
4126 :
4127 : /* Return true if we know that there is no alias between DR_INFO_A and
4128 : DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
4129 : When returning true, set *LOWER_BOUND_OUT to this N. */
4130 :
4131 : static bool
4132 19428 : vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b,
4133 : poly_uint64 *lower_bound_out)
4134 : {
4135 : /* Check that there is a constant gap of known sign between DR_A
4136 : and DR_B. */
4137 19428 : data_reference *dr_a = dr_info_a->dr;
4138 19428 : data_reference *dr_b = dr_info_b->dr;
4139 19428 : poly_int64 init_a, init_b;
4140 19428 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
4141 8792 : || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
4142 8106 : || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
4143 8096 : || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
4144 8096 : || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
4145 19428 : || !ordered_p (init_a, init_b))
4146 11332 : return false;
4147 :
4148 : /* Sort DR_A and DR_B by the address they access. */
4149 8096 : if (maybe_lt (init_b, init_a))
4150 : {
4151 116 : std::swap (init_a, init_b);
4152 116 : std::swap (dr_info_a, dr_info_b);
4153 116 : std::swap (dr_a, dr_b);
4154 : }
4155 :
4156 : /* If the two accesses could be dependent within a scalar iteration,
4157 : make sure that we'd retain their order. */
4158 8096 : if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
4159 8096 : && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
4160 : return false;
4161 :
4162 : /* There is no alias if abs (DR_STEP) is greater than or equal to
4163 : the bytes spanned by the combination of the two accesses. */
4164 8096 : *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a;
4165 8096 : return true;
4166 : }
4167 :
4168 : /* Function vect_prune_runtime_alias_test_list.
4169 :
4170 : Prune a list of ddrs to be tested at run-time by versioning for alias.
4171 : Merge several alias checks into one if possible.
4172 : Return FALSE if resulting list of ddrs is longer then allowed by
4173 : PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
4174 :
4175 : opt_result
4176 412779 : vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
4177 : {
4178 412779 : typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
4179 412779 : hash_set <tree_pair_hash> compared_objects;
4180 :
4181 412779 : const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
4182 412779 : vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
4183 : = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
4184 412779 : const vec<vec_object_pair> &check_unequal_addrs
4185 : = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
4186 412779 : poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4187 412779 : tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
4188 :
4189 412779 : ddr_p ddr;
4190 412779 : unsigned int i;
4191 412779 : tree length_factor;
4192 :
4193 412779 : DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
4194 :
4195 : /* Step values are irrelevant for aliasing if the number of vector
4196 : iterations is equal to the number of scalar iterations (which can
4197 : happen for fully-SLP loops). */
4198 412779 : bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
4199 :
4200 412779 : if (!vf_one_p)
4201 : {
4202 : /* Convert the checks for nonzero steps into bound tests. */
4203 : tree value;
4204 409524 : FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
4205 1641 : vect_check_lower_bound (loop_vinfo, value, true, 1);
4206 : }
4207 :
4208 412779 : if (may_alias_ddrs.is_empty ())
4209 386226 : return opt_result::success ();
4210 :
4211 26553 : comp_alias_ddrs.create (may_alias_ddrs.length ());
4212 :
4213 26553 : unsigned int loop_depth
4214 26553 : = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
4215 26553 : LOOP_VINFO_LOOP_NEST (loop_vinfo));
4216 :
4217 : /* First, we collect all data ref pairs for aliasing checks. */
4218 109208 : FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
4219 : {
4220 86469 : poly_uint64 lower_bound;
4221 86469 : tree segment_length_a, segment_length_b;
4222 86469 : unsigned HOST_WIDE_INT access_size_a, access_size_b;
4223 86469 : unsigned HOST_WIDE_INT align_a, align_b;
4224 :
4225 : /* Ignore the alias if the VF we chose ended up being no greater
4226 : than the dependence distance. */
4227 86469 : if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
4228 13424 : continue;
4229 :
4230 86434 : if (DDR_OBJECT_A (ddr))
4231 : {
4232 106 : vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
4233 106 : if (!compared_objects.add (new_pair))
4234 : {
4235 22 : if (dump_enabled_p ())
4236 16 : dump_printf_loc (MSG_NOTE, vect_location,
4237 : "checking that %T and %T"
4238 : " have different addresses\n",
4239 : new_pair.first, new_pair.second);
4240 22 : LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
4241 : }
4242 106 : continue;
4243 106 : }
4244 :
4245 86328 : dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
4246 86328 : stmt_vec_info stmt_info_a = dr_info_a->stmt;
4247 :
4248 86328 : dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
4249 86328 : stmt_vec_info stmt_info_b = dr_info_b->stmt;
4250 :
4251 86328 : bool preserves_scalar_order_p
4252 86328 : = vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
4253 86328 : bool ignore_step_p
4254 : = (vf_one_p
4255 86328 : && (preserves_scalar_order_p
4256 4037 : || operand_equal_p (DR_STEP (dr_info_a->dr),
4257 4037 : DR_STEP (dr_info_b->dr))));
4258 :
4259 : /* Skip the pair if inter-iteration dependencies are irrelevant
4260 : and intra-iteration dependencies are guaranteed to be honored. */
4261 15825 : if (ignore_step_p
4262 8033 : && (preserves_scalar_order_p
4263 3318 : || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
4264 : &lower_bound)))
4265 : {
4266 7792 : if (dump_enabled_p ())
4267 2528 : dump_printf_loc (MSG_NOTE, vect_location,
4268 : "no need for alias check between "
4269 : "%T and %T when VF is 1\n",
4270 2528 : DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
4271 7792 : continue;
4272 : }
4273 :
4274 : /* See whether we can handle the alias using a bounds check on
4275 : the step, and whether that's likely to be the best approach.
4276 : (It might not be, for example, if the minimum step is much larger
4277 : than the number of bytes handled by one vector iteration.) */
4278 78536 : if (!ignore_step_p
4279 78295 : && TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
4280 16110 : && vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
4281 : &lower_bound)
4282 83555 : && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound)
4283 293 : || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound)))
4284 : {
4285 4955 : bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
4286 4955 : if (dump_enabled_p ())
4287 : {
4288 3384 : dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
4289 : "%T and %T when the step %T is outside ",
4290 : DR_REF (dr_info_a->dr),
4291 1692 : DR_REF (dr_info_b->dr),
4292 1692 : DR_STEP (dr_info_a->dr));
4293 1692 : if (unsigned_p)
4294 504 : dump_printf (MSG_NOTE, "[0");
4295 : else
4296 : {
4297 1188 : dump_printf (MSG_NOTE, "(");
4298 1188 : dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
4299 : }
4300 1692 : dump_printf (MSG_NOTE, ", ");
4301 1692 : dump_dec (MSG_NOTE, lower_bound);
4302 1692 : dump_printf (MSG_NOTE, ")\n");
4303 : }
4304 4955 : vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
4305 : unsigned_p, lower_bound);
4306 4955 : continue;
4307 4955 : }
4308 :
4309 73581 : stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
4310 73581 : if (dr_group_first_a)
4311 : {
4312 20940 : stmt_info_a = dr_group_first_a;
4313 20940 : dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
4314 : }
4315 :
4316 73581 : stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
4317 73581 : if (dr_group_first_b)
4318 : {
4319 21371 : stmt_info_b = dr_group_first_b;
4320 21371 : dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
4321 : }
4322 :
4323 73581 : if (ignore_step_p)
4324 : {
4325 241 : segment_length_a = size_zero_node;
4326 241 : segment_length_b = size_zero_node;
4327 : }
4328 : else
4329 : {
4330 73340 : if (!operand_equal_p (DR_STEP (dr_info_a->dr),
4331 73340 : DR_STEP (dr_info_b->dr), 0))
4332 : {
4333 16427 : length_factor = scalar_loop_iters;
4334 16427 : if (TREE_CODE (length_factor) == SCEV_NOT_KNOWN)
4335 12 : return opt_result::failure_at (vect_location,
4336 : "Unsupported alias check on"
4337 : " uncounted loop\n");
4338 : }
4339 : else
4340 56913 : length_factor = size_int (vect_factor);
4341 73328 : segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
4342 73328 : segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
4343 : }
4344 73569 : access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a);
4345 73569 : access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b);
4346 73569 : align_a = vect_vfa_align (dr_info_a);
4347 73569 : align_b = vect_vfa_align (dr_info_b);
4348 :
4349 : /* See whether the alias is known at compilation time. */
4350 73569 : if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
4351 73569 : DR_BASE_ADDRESS (dr_info_b->dr), 0)
4352 6180 : && operand_equal_p (DR_OFFSET (dr_info_a->dr),
4353 6180 : DR_OFFSET (dr_info_b->dr), 0)
4354 4488 : && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
4355 4414 : && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
4356 4404 : && poly_int_tree_p (segment_length_a)
4357 77931 : && poly_int_tree_p (segment_length_b))
4358 : {
4359 4338 : int res = vect_compile_time_alias (dr_info_a, dr_info_b,
4360 : segment_length_a,
4361 : segment_length_b,
4362 : access_size_a,
4363 : access_size_b);
4364 4338 : if (res >= 0 && dump_enabled_p ())
4365 : {
4366 208 : dump_printf_loc (MSG_NOTE, vect_location,
4367 : "can tell at compile time that %T and %T",
4368 104 : DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
4369 104 : if (res == 0)
4370 57 : dump_printf (MSG_NOTE, " do not alias\n");
4371 : else
4372 47 : dump_printf (MSG_NOTE, " alias\n");
4373 : }
4374 :
4375 4338 : if (res == 0)
4376 536 : continue;
4377 :
4378 3802 : if (res == 1)
4379 3802 : return opt_result::failure_at (stmt_info_b->stmt,
4380 : "not vectorized:"
4381 : " compilation time alias: %G%G",
4382 : stmt_info_a->stmt,
4383 : stmt_info_b->stmt);
4384 : }
4385 :
4386 : /* dr_with_seg_len requires the alignment to apply to the segment length
4387 : and access size, not just the start address. The access size can be
4388 : smaller than the pointer alignment for grouped accesses and bitfield
4389 : references; see PR115192 and PR116125 respectively. */
4390 69231 : align_a = std::min (align_a, least_bit_hwi (access_size_a));
4391 69231 : align_b = std::min (align_b, least_bit_hwi (access_size_b));
4392 :
4393 69231 : dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
4394 69231 : access_size_a, align_a);
4395 69231 : dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
4396 69231 : access_size_b, align_b);
4397 : /* Canonicalize the order to be the one that's needed for accurate
4398 : RAW, WAR and WAW flags, in cases where the data references are
4399 : well-ordered. The order doesn't really matter otherwise,
4400 : but we might as well be consistent. */
4401 69231 : if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
4402 5190 : std::swap (dr_a, dr_b);
4403 :
4404 69231 : dr_with_seg_len_pair_t dr_with_seg_len_pair
4405 : (dr_a, dr_b, (preserves_scalar_order_p
4406 : ? dr_with_seg_len_pair_t::WELL_ORDERED
4407 76066 : : dr_with_seg_len_pair_t::REORDERED));
4408 :
4409 69231 : comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
4410 : }
4411 :
4412 22739 : prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
4413 :
4414 45478 : unsigned int count = (comp_alias_ddrs.length ()
4415 22739 : + check_unequal_addrs.length ());
4416 :
4417 22739 : if (count
4418 22739 : && (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
4419 : == VECT_COST_MODEL_VERY_CHEAP))
4420 12970 : return opt_result::failure_at
4421 12970 : (vect_location, "would need a runtime alias check\n");
4422 :
4423 9769 : if (dump_enabled_p ())
4424 1946 : dump_printf_loc (MSG_NOTE, vect_location,
4425 : "improved number of alias checks from %d to %d\n",
4426 : may_alias_ddrs.length (), count);
4427 9769 : unsigned limit = param_vect_max_version_for_alias_checks;
4428 9769 : if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)) == VECT_COST_MODEL_CHEAP)
4429 934 : limit = param_vect_max_version_for_alias_checks * 6 / 10;
4430 9769 : if (count > limit)
4431 162 : return opt_result::failure_at
4432 162 : (vect_location,
4433 : "number of versioning for alias run-time tests exceeds %d "
4434 : "(--param vect-max-version-for-alias-checks)\n", limit);
4435 :
4436 9607 : return opt_result::success ();
4437 412779 : }
4438 :
4439 : /* Structure to hold information about a supported gather/scatter
4440 : configuration. */
4441 : struct gather_scatter_config
4442 : {
4443 : internal_fn ifn;
4444 : tree offset_vectype;
4445 : int scale;
4446 : vec<int> elsvals;
4447 : };
4448 :
4449 : /* Determine which gather/scatter IFN is supported for the given parameters.
4450 : IFN_MASK_GATHER_LOAD, IFN_GATHER_LOAD, and IFN_MASK_LEN_GATHER_LOAD
4451 : are mutually exclusive, so we only need to find one. Return the
4452 : supported IFN or IFN_LAST if none are supported. */
4453 :
4454 : static internal_fn
4455 1179750 : vect_gather_scatter_which_ifn (bool read_p, bool masked_p,
4456 : tree vectype, tree memory_type,
4457 : tree offset_vectype, int scale,
4458 : vec<int> *elsvals)
4459 : {
4460 : /* Work out which functions to try. */
4461 1179750 : internal_fn ifn, alt_ifn, alt_ifn2;
4462 1179750 : if (read_p)
4463 : {
4464 877166 : ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
4465 : alt_ifn = IFN_MASK_GATHER_LOAD;
4466 : alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
4467 : }
4468 : else
4469 : {
4470 302584 : ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
4471 : alt_ifn = IFN_MASK_SCATTER_STORE;
4472 : alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
4473 : }
4474 :
4475 1179750 : if (!offset_vectype)
4476 : return IFN_LAST;
4477 :
4478 1179750 : if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
4479 : offset_vectype, scale, elsvals))
4480 : return ifn;
4481 1179750 : if (internal_gather_scatter_fn_supported_p (alt_ifn, vectype, memory_type,
4482 : offset_vectype, scale, elsvals))
4483 : return alt_ifn;
4484 1179750 : if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, memory_type,
4485 : offset_vectype, scale, elsvals))
4486 : return alt_ifn2;
4487 :
4488 : return IFN_LAST;
4489 : }
4490 :
4491 : /* Collect all supported offset vector types for a gather load or scatter
4492 : store. READ_P is true for loads and false for stores. MASKED_P is true
4493 : if the load or store is conditional. VECTYPE is the data vector type.
4494 : MEMORY_TYPE is the type of the memory elements being loaded or stored,
4495 : and OFFSET_TYPE is the type of the offset.
4496 : SCALE is the amount by which the offset should be multiplied.
4497 :
4498 : Return a vector of all configurations the target supports (which can
4499 : be none). */
4500 :
4501 : static auto_vec<gather_scatter_config>
4502 85143 : vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
4503 : tree vectype, tree memory_type,
4504 : tree offset_type, int scale)
4505 : {
4506 85143 : auto_vec<gather_scatter_config> configs;
4507 :
4508 85143 : auto_vec<tree, 8> offset_types_to_try;
4509 :
4510 : /* Try all sizes from the offset type's precision up to POINTER_SIZE. */
4511 85143 : for (unsigned int bits = TYPE_PRECISION (offset_type);
4512 398599 : bits <= POINTER_SIZE;
4513 299920 : bits *= 2)
4514 : {
4515 : /* Signed variant. */
4516 299920 : offset_types_to_try.safe_push
4517 299920 : (build_nonstandard_integer_type (bits, 0));
4518 : /* Unsigned variant. */
4519 299920 : offset_types_to_try.safe_push
4520 299920 : (build_nonstandard_integer_type (bits, 1));
4521 : }
4522 :
4523 : /* Once we find which IFN works for one offset type, we know that it
4524 : will work for other offset types as well. Then we can perform
4525 : the checks for the remaining offset types with only that IFN.
4526 : However, we might need to try different offset types to find which
4527 : IFN is supported, since the check is offset-type-specific. */
4528 : internal_fn ifn = IFN_LAST;
4529 :
4530 : /* Try each offset type. */
4531 684983 : for (unsigned int i = 0; i < offset_types_to_try.length (); i++)
4532 : {
4533 599840 : tree offset_type = offset_types_to_try[i];
4534 599840 : tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
4535 599840 : if (!offset_vectype)
4536 10194 : continue;
4537 :
4538 : /* Try multiple scale values. Start with exact match, then try
4539 : smaller common scales that a target might support . */
4540 589646 : int scales_to_try[] = {scale, 1, 2, 4, 8};
4541 :
4542 3537876 : for (unsigned int j = 0;
4543 3537876 : j < sizeof (scales_to_try) / sizeof (*scales_to_try);
4544 : j++)
4545 : {
4546 2948230 : int try_scale = scales_to_try[j];
4547 :
4548 : /* Skip scales >= requested scale (except for exact match). */
4549 2948230 : if (j > 0 && try_scale >= scale)
4550 1768480 : continue;
4551 :
4552 : /* Skip if requested scale is not a multiple of this scale. */
4553 1179894 : if (j > 0 && scale % try_scale != 0)
4554 144 : continue;
4555 :
4556 1179750 : vec<int> elsvals = vNULL;
4557 :
4558 : /* If we haven't determined which IFN is supported yet, try all three
4559 : to find which one the target supports. */
4560 1179750 : if (ifn == IFN_LAST)
4561 : {
4562 1179750 : ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
4563 : vectype, memory_type,
4564 : offset_vectype, try_scale,
4565 : &elsvals);
4566 1179750 : if (ifn != IFN_LAST)
4567 : {
4568 : /* Found which IFN is supported. Save this configuration. */
4569 0 : gather_scatter_config config;
4570 0 : config.ifn = ifn;
4571 0 : config.offset_vectype = offset_vectype;
4572 0 : config.scale = try_scale;
4573 0 : config.elsvals = elsvals;
4574 0 : configs.safe_push (config);
4575 : }
4576 : }
4577 : else
4578 : {
4579 : /* We already know which IFN is supported, just check if this
4580 : offset type and scale work with it. */
4581 0 : if (internal_gather_scatter_fn_supported_p (ifn, vectype,
4582 : memory_type,
4583 : offset_vectype,
4584 : try_scale,
4585 : &elsvals))
4586 : {
4587 0 : gather_scatter_config config;
4588 0 : config.ifn = ifn;
4589 0 : config.offset_vectype = offset_vectype;
4590 0 : config.scale = try_scale;
4591 0 : config.elsvals = elsvals;
4592 0 : configs.safe_push (config);
4593 : }
4594 : }
4595 : }
4596 : }
4597 :
4598 85143 : return configs;
4599 85143 : }
4600 :
4601 : /* Check whether we can use an internal function for a gather load
4602 : or scatter store. READ_P is true for loads and false for stores.
4603 : MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
4604 : the type of the memory elements being loaded or stored. OFFSET_TYPE
4605 : is the type of the offset that is being applied to the invariant
4606 : base address. If OFFSET_TYPE is scalar the function chooses an
4607 : appropriate vector type for it. SCALE is the amount by which the
4608 : offset should be multiplied *after* it has been converted to address width.
4609 : If the target does not support the requested SCALE, SUPPORTED_SCALE
4610 : will contain the scale that is actually supported
4611 : (which may be smaller, requiring additional multiplication).
4612 : Otherwise SUPPORTED_SCALE is 0.
4613 :
4614 : Return true if the function is supported, storing the function id in
4615 : *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
4616 : If we support an offset vector type with different signedness than
4617 : OFFSET_TYPE store it in SUPPORTED_OFFSET_VECTYPE.
4618 :
4619 : If we can use gather/scatter and ELSVALS is nonzero, store the possible
4620 : else values in ELSVALS. */
4621 :
4622 : bool
4623 85143 : vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
4624 : tree vectype, tree memory_type, tree offset_type,
4625 : int scale, int *supported_scale,
4626 : internal_fn *ifn_out,
4627 : tree *offset_vectype_out,
4628 : tree *supported_offset_vectype,
4629 : vec<int> *elsvals)
4630 : {
4631 85143 : *supported_offset_vectype = NULL_TREE;
4632 85143 : *supported_scale = 0;
4633 85143 : unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
4634 85143 : unsigned int element_bits = vector_element_bits (vectype);
4635 85143 : if (element_bits != memory_bits)
4636 : /* For now the vector elements must be the same width as the
4637 : memory elements. */
4638 : return false;
4639 :
4640 : /* Get the original offset vector type for comparison. */
4641 85143 : tree offset_vectype = VECTOR_TYPE_P (offset_type)
4642 85143 : ? offset_type : get_vectype_for_scalar_type (vinfo, offset_type);
4643 :
4644 : /* If there is no offset vectype, bail. */
4645 70960 : if (!offset_vectype)
4646 : return false;
4647 :
4648 85143 : offset_type = TREE_TYPE (offset_vectype);
4649 :
4650 : /* Get all supported configurations for this data vector type. */
4651 85143 : auto_vec<gather_scatter_config> configs
4652 : = vect_gather_scatter_get_configs (vinfo, read_p, masked_p, vectype,
4653 85143 : memory_type, offset_type, scale);
4654 :
4655 85143 : if (configs.is_empty ())
4656 : return false;
4657 :
4658 : /* Selection priority:
4659 : 1 - Exact scale match + offset type match
4660 : 2 - Exact scale match + sign-swapped offset
4661 : 3 - Smaller scale + offset type match
4662 : 4 - Smaller scale + sign-swapped offset
4663 : Within each category, prefer smaller offset types. */
4664 :
4665 : /* First pass: exact scale match with no conversion. */
4666 0 : for (unsigned int i = 0; i < configs.length (); i++)
4667 : {
4668 0 : if (configs[i].scale == scale
4669 0 : && TYPE_SIGN (configs[i].offset_vectype)
4670 0 : == TYPE_SIGN (offset_vectype))
4671 : {
4672 0 : *ifn_out = configs[i].ifn;
4673 0 : *offset_vectype_out = configs[i].offset_vectype;
4674 0 : if (elsvals)
4675 0 : *elsvals = configs[i].elsvals;
4676 0 : return true;
4677 : }
4678 : }
4679 :
4680 : /* No direct match. This means we try to find either
4681 : - a sign-swapped offset vectype or
4682 : - a different scale and 2x larger offset type
4683 : - a different scale and larger sign-swapped offset vectype. */
4684 0 : unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE (offset_vectype));
4685 0 : unsigned int needed_precision
4686 0 : = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
4687 0 : needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
4688 :
4689 : /* Second pass: No direct match. This means we try to find a sign-swapped
4690 : offset vectype. */
4691 0 : enum tree_code tmp;
4692 0 : for (unsigned int i = 0; i < configs.length (); i++)
4693 : {
4694 0 : unsigned int precision
4695 0 : = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
4696 0 : if (configs[i].scale == scale
4697 0 : && precision >= needed_precision
4698 0 : && (supportable_convert_operation (CONVERT_EXPR,
4699 0 : configs[i].offset_vectype,
4700 : offset_vectype, &tmp)
4701 0 : || (needed_precision == offset_precision
4702 0 : && tree_nop_conversion_p (configs[i].offset_vectype,
4703 : offset_vectype))))
4704 : {
4705 0 : *ifn_out = configs[i].ifn;
4706 0 : *offset_vectype_out = offset_vectype;
4707 0 : *supported_offset_vectype = configs[i].offset_vectype;
4708 0 : if (elsvals)
4709 0 : *elsvals = configs[i].elsvals;
4710 0 : return true;
4711 : }
4712 : }
4713 :
4714 : /* Third pass: Try a smaller scale with the same signedness. */
4715 0 : needed_precision = offset_precision * 2;
4716 0 : needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
4717 :
4718 0 : for (unsigned int i = 0; i < configs.length (); i++)
4719 : {
4720 0 : unsigned int precision
4721 0 : = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
4722 0 : if (configs[i].scale < scale
4723 0 : && TYPE_SIGN (configs[i].offset_vectype)
4724 0 : == TYPE_SIGN (offset_vectype)
4725 0 : && precision >= needed_precision)
4726 : {
4727 0 : *ifn_out = configs[i].ifn;
4728 0 : *offset_vectype_out = configs[i].offset_vectype;
4729 0 : *supported_scale = configs[i].scale;
4730 0 : if (elsvals)
4731 0 : *elsvals = configs[i].elsvals;
4732 0 : return true;
4733 : }
4734 : }
4735 :
4736 : /* Fourth pass: Try a smaller scale and sign-swapped offset vectype. */
4737 0 : needed_precision
4738 0 : = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
4739 0 : needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
4740 :
4741 0 : for (unsigned int i = 0; i < configs.length (); i++)
4742 : {
4743 0 : unsigned int precision
4744 0 : = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
4745 0 : if (configs[i].scale < scale
4746 0 : && precision >= needed_precision
4747 0 : && (supportable_convert_operation (CONVERT_EXPR,
4748 0 : configs[i].offset_vectype,
4749 : offset_vectype, &tmp)
4750 0 : || (needed_precision == offset_precision
4751 0 : && tree_nop_conversion_p (configs[i].offset_vectype,
4752 : offset_vectype))))
4753 : {
4754 0 : *ifn_out = configs[i].ifn;
4755 0 : *offset_vectype_out = offset_vectype;
4756 0 : *supported_offset_vectype = configs[i].offset_vectype;
4757 0 : *supported_scale = configs[i].scale;
4758 0 : if (elsvals)
4759 0 : *elsvals = configs[i].elsvals;
4760 0 : return true;
4761 : }
4762 : }
4763 :
4764 : return false;
4765 85143 : }
4766 :
4767 : /* STMT_INFO is a call to an internal gather load or scatter store function.
4768 : Describe the operation in INFO. */
4769 :
4770 : void
4771 0 : vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
4772 : gather_scatter_info *info)
4773 : {
4774 0 : gcall *call = as_a <gcall *> (stmt_info->stmt);
4775 0 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4776 0 : data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4777 :
4778 0 : info->ifn = gimple_call_internal_fn (call);
4779 0 : info->decl = NULL_TREE;
4780 0 : info->base = gimple_call_arg (call, 0);
4781 0 : info->alias_ptr = gimple_call_arg
4782 0 : (call, internal_fn_alias_ptr_index (info->ifn));
4783 0 : info->offset = gimple_call_arg
4784 0 : (call, internal_fn_offset_index (info->ifn));
4785 0 : info->offset_vectype = NULL_TREE;
4786 0 : info->scale = TREE_INT_CST_LOW (gimple_call_arg
4787 : (call, internal_fn_scale_index (info->ifn)));
4788 0 : info->element_type = TREE_TYPE (vectype);
4789 0 : info->memory_type = TREE_TYPE (DR_REF (dr));
4790 0 : }
4791 :
4792 : /* Return true if a non-affine read or write in STMT_INFO is suitable for a
4793 : gather load or scatter store with VECTYPE. Describe the operation in *INFO
4794 : if so. If it is suitable and ELSVALS is nonzero store the supported else
4795 : values in the vector it points to. */
4796 :
4797 : bool
4798 350507 : vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
4799 : loop_vec_info loop_vinfo,
4800 : gather_scatter_info *info, vec<int> *elsvals)
4801 : {
4802 350507 : HOST_WIDE_INT scale = 1;
4803 350507 : poly_int64 pbitpos, pbitsize;
4804 350507 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4805 350507 : struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4806 350507 : tree offtype = NULL_TREE;
4807 350507 : tree decl = NULL_TREE, base, off;
4808 350507 : tree memory_type = TREE_TYPE (DR_REF (dr));
4809 350507 : machine_mode pmode;
4810 350507 : int punsignedp, reversep, pvolatilep = 0;
4811 350507 : internal_fn ifn;
4812 350507 : tree offset_vectype;
4813 350507 : bool masked_p = false;
4814 :
4815 : /* See whether this is already a call to a gather/scatter internal function.
4816 : If not, see whether it's a masked load or store. */
4817 350507 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
4818 6306 : if (call && gimple_call_internal_p (call))
4819 : {
4820 6306 : ifn = gimple_call_internal_fn (call);
4821 6306 : if (internal_gather_scatter_fn_p (ifn))
4822 : {
4823 0 : vect_describe_gather_scatter_call (stmt_info, info);
4824 :
4825 : /* In pattern recog we simply used a ZERO else value that
4826 : we need to correct here. To that end just re-use the
4827 : (already succesful) check if we support a gather IFN
4828 : and have it populate the else values. */
4829 0 : if (DR_IS_READ (dr) && internal_fn_mask_index (ifn) >= 0 && elsvals)
4830 0 : supports_vec_gather_load_p (TYPE_MODE (vectype), elsvals);
4831 0 : return true;
4832 : }
4833 6306 : masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
4834 : }
4835 :
4836 : /* True if we should aim to use internal functions rather than
4837 : built-in functions. */
4838 350507 : bool use_ifn_p = (DR_IS_READ (dr)
4839 350507 : ? supports_vec_gather_load_p (TYPE_MODE (vectype),
4840 : elsvals)
4841 350507 : : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
4842 :
4843 350507 : base = DR_REF (dr);
4844 : /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
4845 : see if we can use the def stmt of the address. */
4846 350507 : if (masked_p
4847 6306 : && TREE_CODE (base) == MEM_REF
4848 6306 : && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
4849 6306 : && integer_zerop (TREE_OPERAND (base, 1))
4850 356813 : && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
4851 : {
4852 6306 : gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
4853 6306 : if (is_gimple_assign (def_stmt)
4854 6306 : && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
4855 639 : base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
4856 : }
4857 :
4858 : /* The gather and scatter builtins need address of the form
4859 : loop_invariant + vector * {1, 2, 4, 8}
4860 : or
4861 : loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
4862 : Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
4863 : of loop invariants/SSA_NAMEs defined in the loop, with casts,
4864 : multiplications and additions in it. To get a vector, we need
4865 : a single SSA_NAME that will be defined in the loop and will
4866 : contain everything that is not loop invariant and that can be
4867 : vectorized. The following code attempts to find such a preexistng
4868 : SSA_NAME OFF and put the loop invariants into a tree BASE
4869 : that can be gimplified before the loop. */
4870 350507 : base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
4871 : &punsignedp, &reversep, &pvolatilep);
4872 350507 : if (reversep)
4873 : return false;
4874 :
4875 : /* PR 107346. Packed structs can have fields at offsets that are not
4876 : multiples of BITS_PER_UNIT. Do not use gather/scatters in such cases. */
4877 350507 : if (!multiple_p (pbitpos, BITS_PER_UNIT))
4878 : return false;
4879 :
4880 : /* We need to be able to form an address to the base which for example
4881 : isn't possible for hard registers. */
4882 350507 : if (may_be_nonaddressable_p (base))
4883 : return false;
4884 :
4885 350499 : poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
4886 :
4887 350499 : if (TREE_CODE (base) == MEM_REF)
4888 : {
4889 284625 : if (!integer_zerop (TREE_OPERAND (base, 1)))
4890 : {
4891 33687 : if (off == NULL_TREE)
4892 33370 : off = wide_int_to_tree (sizetype, mem_ref_offset (base));
4893 : else
4894 317 : off = size_binop (PLUS_EXPR, off,
4895 : fold_convert (sizetype, TREE_OPERAND (base, 1)));
4896 : }
4897 284625 : base = TREE_OPERAND (base, 0);
4898 : }
4899 : else
4900 65874 : base = build_fold_addr_expr (base);
4901 :
4902 350499 : if (off == NULL_TREE)
4903 226185 : off = size_zero_node;
4904 :
4905 : /* BASE must be loop invariant. If it is not invariant, but OFF is, then we
4906 : * can fix that by swapping BASE and OFF. */
4907 350499 : if (!expr_invariant_in_loop_p (loop, base))
4908 : {
4909 260088 : if (!expr_invariant_in_loop_p (loop, off))
4910 : return false;
4911 :
4912 259813 : std::swap (base, off);
4913 : }
4914 :
4915 350224 : base = fold_convert (sizetype, base);
4916 350224 : base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
4917 350224 : int tmp_scale;
4918 350224 : tree tmp_offset_vectype;
4919 :
4920 : /* OFF at this point may be either a SSA_NAME or some tree expression
4921 : from get_inner_reference. Try to peel off loop invariants from it
4922 : into BASE as long as possible. */
4923 350224 : STRIP_NOPS (off);
4924 918637 : while (offtype == NULL_TREE)
4925 : {
4926 798553 : enum tree_code code;
4927 798553 : tree op0, op1, add = NULL_TREE;
4928 :
4929 798553 : if (TREE_CODE (off) == SSA_NAME)
4930 : {
4931 612908 : gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4932 :
4933 612908 : if (expr_invariant_in_loop_p (loop, off))
4934 0 : return false;
4935 :
4936 612908 : if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
4937 : break;
4938 :
4939 482136 : op0 = gimple_assign_rhs1 (def_stmt);
4940 482136 : code = gimple_assign_rhs_code (def_stmt);
4941 482136 : op1 = gimple_assign_rhs2 (def_stmt);
4942 : }
4943 : else
4944 : {
4945 185645 : if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
4946 : return false;
4947 185645 : code = TREE_CODE (off);
4948 185645 : extract_ops_from_tree (off, &code, &op0, &op1);
4949 : }
4950 667781 : switch (code)
4951 : {
4952 204364 : case POINTER_PLUS_EXPR:
4953 204364 : case PLUS_EXPR:
4954 204364 : if (expr_invariant_in_loop_p (loop, op0))
4955 : {
4956 135126 : add = op0;
4957 135126 : off = op1;
4958 188492 : do_add:
4959 188492 : add = fold_convert (sizetype, add);
4960 188492 : if (scale != 1)
4961 46639 : add = size_binop (MULT_EXPR, add, size_int (scale));
4962 188492 : base = size_binop (PLUS_EXPR, base, add);
4963 568413 : continue;
4964 : }
4965 69238 : if (expr_invariant_in_loop_p (loop, op1))
4966 : {
4967 53070 : add = op1;
4968 53070 : off = op0;
4969 53070 : goto do_add;
4970 : }
4971 : break;
4972 492 : case MINUS_EXPR:
4973 492 : if (expr_invariant_in_loop_p (loop, op1))
4974 : {
4975 296 : add = fold_convert (sizetype, op1);
4976 296 : add = size_binop (MINUS_EXPR, size_zero_node, add);
4977 296 : off = op0;
4978 296 : goto do_add;
4979 : }
4980 : break;
4981 203615 : case MULT_EXPR:
4982 203615 : if (scale == 1 && tree_fits_shwi_p (op1))
4983 : {
4984 170523 : int new_scale = tree_to_shwi (op1);
4985 : /* Only treat this as a scaling operation if the target
4986 : supports it for at least some offset type. */
4987 170523 : if (use_ifn_p
4988 0 : && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
4989 : masked_p, vectype, memory_type,
4990 : signed_char_type_node,
4991 : new_scale, &tmp_scale,
4992 : &ifn,
4993 : &offset_vectype,
4994 : &tmp_offset_vectype,
4995 : elsvals)
4996 170523 : && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
4997 : masked_p, vectype, memory_type,
4998 : unsigned_char_type_node,
4999 : new_scale, &tmp_scale,
5000 : &ifn,
5001 : &offset_vectype,
5002 : &tmp_offset_vectype,
5003 : elsvals))
5004 : break;
5005 170523 : scale = new_scale;
5006 170523 : off = op0;
5007 170523 : continue;
5008 170523 : }
5009 : break;
5010 0 : case SSA_NAME:
5011 0 : off = op0;
5012 0 : continue;
5013 215339 : CASE_CONVERT:
5014 430662 : if (!POINTER_TYPE_P (TREE_TYPE (op0))
5015 430662 : && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
5016 : break;
5017 :
5018 : /* Don't include the conversion if the target is happy with
5019 : the current offset type. */
5020 215339 : if (use_ifn_p
5021 0 : && TREE_CODE (off) == SSA_NAME
5022 0 : && !POINTER_TYPE_P (TREE_TYPE (off))
5023 215339 : && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
5024 : masked_p, vectype, memory_type,
5025 0 : TREE_TYPE (off),
5026 : scale, &tmp_scale,
5027 : &ifn,
5028 : &offset_vectype,
5029 : &tmp_offset_vectype,
5030 : elsvals))
5031 : break;
5032 :
5033 215339 : if (TYPE_PRECISION (TREE_TYPE (op0))
5034 215339 : == TYPE_PRECISION (TREE_TYPE (off)))
5035 : {
5036 89314 : off = op0;
5037 89314 : continue;
5038 : }
5039 :
5040 : /* Include the conversion if it is widening and we're using
5041 : the IFN path or the target can handle the converted from
5042 : offset or the current size is not already the same as the
5043 : data vector element size. */
5044 126025 : if ((TYPE_PRECISION (TREE_TYPE (op0))
5045 126025 : < TYPE_PRECISION (TREE_TYPE (off)))
5046 126025 : && (use_ifn_p
5047 125267 : || (DR_IS_READ (dr)
5048 81443 : ? (targetm.vectorize.builtin_gather
5049 81443 : && targetm.vectorize.builtin_gather (vectype,
5050 81443 : TREE_TYPE (op0),
5051 : scale))
5052 43824 : : (targetm.vectorize.builtin_scatter
5053 43824 : && targetm.vectorize.builtin_scatter (vectype,
5054 43824 : TREE_TYPE (op0),
5055 : scale)))
5056 124171 : || !operand_equal_p (TYPE_SIZE (TREE_TYPE (off)),
5057 124171 : TYPE_SIZE (TREE_TYPE (vectype)), 0)))
5058 : {
5059 120084 : off = op0;
5060 120084 : offtype = TREE_TYPE (off);
5061 120084 : STRIP_NOPS (off);
5062 120084 : continue;
5063 : }
5064 : break;
5065 : default:
5066 : break;
5067 0 : }
5068 : break;
5069 : }
5070 :
5071 : /* If at the end OFF still isn't a SSA_NAME or isn't
5072 : defined in the loop, punt. */
5073 350224 : if (TREE_CODE (off) != SSA_NAME
5074 350224 : || expr_invariant_in_loop_p (loop, off))
5075 6377 : return false;
5076 :
5077 343847 : if (offtype == NULL_TREE)
5078 224117 : offtype = TREE_TYPE (off);
5079 :
5080 343847 : if (use_ifn_p)
5081 : {
5082 0 : if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
5083 : vectype, memory_type, offtype,
5084 : scale, &tmp_scale,
5085 : &ifn, &offset_vectype,
5086 : &tmp_offset_vectype,
5087 : elsvals))
5088 0 : ifn = IFN_LAST;
5089 : decl = NULL_TREE;
5090 : }
5091 : else
5092 : {
5093 343847 : if (DR_IS_READ (dr))
5094 : {
5095 258789 : if (targetm.vectorize.builtin_gather)
5096 258789 : decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
5097 : }
5098 : else
5099 : {
5100 85058 : if (targetm.vectorize.builtin_scatter)
5101 85058 : decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
5102 : }
5103 343847 : ifn = IFN_LAST;
5104 : /* The offset vector type will be read from DECL when needed. */
5105 343847 : offset_vectype = NULL_TREE;
5106 : }
5107 :
5108 343847 : gcc_checking_assert (expr_invariant_in_loop_p (loop, base));
5109 343847 : gcc_checking_assert (!expr_invariant_in_loop_p (loop, off));
5110 :
5111 343847 : info->ifn = ifn;
5112 343847 : info->decl = decl;
5113 343847 : info->base = base;
5114 :
5115 687694 : info->alias_ptr = build_int_cst
5116 343847 : (reference_alias_ptr_type (DR_REF (dr)),
5117 343847 : get_object_alignment (DR_REF (dr)));
5118 :
5119 343847 : info->offset = off;
5120 343847 : info->offset_vectype = offset_vectype;
5121 343847 : info->scale = scale;
5122 343847 : info->element_type = TREE_TYPE (vectype);
5123 343847 : info->memory_type = memory_type;
5124 343847 : return true;
5125 : }
5126 :
5127 : /* Find the data references in STMT, analyze them with respect to LOOP and
5128 : append them to DATAREFS. Return false if datarefs in this stmt cannot
5129 : be handled. */
5130 :
5131 : opt_result
5132 32223947 : vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
5133 : vec<data_reference_p> *datarefs,
5134 : vec<int> *dataref_groups, int group_id)
5135 : {
5136 : /* We can ignore clobbers for dataref analysis - they are removed during
5137 : loop vectorization and BB vectorization checks dependences with a
5138 : stmt walk. */
5139 32223947 : if (gimple_clobber_p (stmt))
5140 1101021 : return opt_result::success ();
5141 :
5142 57904670 : if (gimple_has_volatile_ops (stmt))
5143 320444 : return opt_result::failure_at (stmt, "not vectorized: volatile type: %G",
5144 : stmt);
5145 :
5146 30802482 : if (stmt_can_throw_internal (cfun, stmt))
5147 693166 : return opt_result::failure_at (stmt,
5148 : "not vectorized:"
5149 : " statement can throw an exception: %G",
5150 : stmt);
5151 :
5152 30109316 : auto_vec<data_reference_p, 2> refs;
5153 30109316 : opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
5154 30109316 : if (!res)
5155 3675761 : return res;
5156 :
5157 26433555 : if (refs.is_empty ())
5158 15174144 : return opt_result::success ();
5159 :
5160 11259411 : if (refs.length () > 1)
5161 : {
5162 1247095 : while (!refs.is_empty ())
5163 831699 : free_data_ref (refs.pop ());
5164 415396 : return opt_result::failure_at (stmt,
5165 : "not vectorized: more than one "
5166 : "data ref in stmt: %G", stmt);
5167 : }
5168 :
5169 10844015 : data_reference_p dr = refs.pop ();
5170 10844015 : if (gcall *call = dyn_cast <gcall *> (stmt))
5171 20459 : if (!gimple_call_internal_p (call)
5172 20459 : || (gimple_call_internal_fn (call) != IFN_MASK_LOAD
5173 17387 : && gimple_call_internal_fn (call) != IFN_MASK_STORE))
5174 : {
5175 16876 : free_data_ref (dr);
5176 16876 : return opt_result::failure_at (stmt,
5177 : "not vectorized: dr in a call %G", stmt);
5178 : }
5179 :
5180 10827139 : if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
5181 10827139 : && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
5182 : {
5183 53706 : free_data_ref (dr);
5184 53706 : return opt_result::failure_at (stmt,
5185 : "not vectorized:"
5186 : " statement is an unsupported"
5187 : " bitfield access %G", stmt);
5188 : }
5189 :
5190 10773433 : if (DR_BASE_ADDRESS (dr)
5191 10685824 : && TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
5192 : {
5193 987 : free_data_ref (dr);
5194 987 : return opt_result::failure_at (stmt,
5195 : "not vectorized:"
5196 : " base addr of dr is a constant\n");
5197 : }
5198 :
5199 : /* Check whether this may be a SIMD lane access and adjust the
5200 : DR to make it easier for us to handle it. */
5201 10772446 : if (loop
5202 599252 : && loop->simduid
5203 10711 : && (!DR_BASE_ADDRESS (dr)
5204 2960 : || !DR_OFFSET (dr)
5205 2960 : || !DR_INIT (dr)
5206 2960 : || !DR_STEP (dr)))
5207 : {
5208 7751 : struct data_reference *newdr
5209 7751 : = create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
5210 7751 : DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
5211 7751 : if (DR_BASE_ADDRESS (newdr)
5212 7751 : && DR_OFFSET (newdr)
5213 7751 : && DR_INIT (newdr)
5214 7751 : && DR_STEP (newdr)
5215 7751 : && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
5216 15502 : && integer_zerop (DR_STEP (newdr)))
5217 : {
5218 7751 : tree base_address = DR_BASE_ADDRESS (newdr);
5219 7751 : tree off = DR_OFFSET (newdr);
5220 7751 : tree step = ssize_int (1);
5221 7751 : if (integer_zerop (off)
5222 7751 : && TREE_CODE (base_address) == POINTER_PLUS_EXPR)
5223 : {
5224 82 : off = TREE_OPERAND (base_address, 1);
5225 82 : base_address = TREE_OPERAND (base_address, 0);
5226 : }
5227 7751 : STRIP_NOPS (off);
5228 7751 : if (TREE_CODE (off) == MULT_EXPR
5229 7751 : && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
5230 : {
5231 7500 : step = TREE_OPERAND (off, 1);
5232 7500 : off = TREE_OPERAND (off, 0);
5233 7500 : STRIP_NOPS (off);
5234 : }
5235 541 : if (CONVERT_EXPR_P (off)
5236 7751 : && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
5237 7210 : < TYPE_PRECISION (TREE_TYPE (off))))
5238 7210 : off = TREE_OPERAND (off, 0);
5239 7751 : if (TREE_CODE (off) == SSA_NAME)
5240 : {
5241 7226 : gimple *def = SSA_NAME_DEF_STMT (off);
5242 : /* Look through widening conversion. */
5243 7226 : if (is_gimple_assign (def)
5244 7226 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
5245 : {
5246 0 : tree rhs1 = gimple_assign_rhs1 (def);
5247 0 : if (TREE_CODE (rhs1) == SSA_NAME
5248 0 : && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
5249 0 : && (TYPE_PRECISION (TREE_TYPE (off))
5250 0 : > TYPE_PRECISION (TREE_TYPE (rhs1))))
5251 0 : def = SSA_NAME_DEF_STMT (rhs1);
5252 : }
5253 7226 : if (is_gimple_call (def)
5254 7090 : && gimple_call_internal_p (def)
5255 14316 : && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
5256 : {
5257 7090 : tree arg = gimple_call_arg (def, 0);
5258 7090 : tree reft = TREE_TYPE (DR_REF (newdr));
5259 7090 : gcc_assert (TREE_CODE (arg) == SSA_NAME);
5260 7090 : arg = SSA_NAME_VAR (arg);
5261 7090 : if (arg == loop->simduid
5262 : /* For now. */
5263 7090 : && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
5264 : {
5265 7065 : DR_BASE_ADDRESS (newdr) = base_address;
5266 7065 : DR_OFFSET (newdr) = ssize_int (0);
5267 7065 : DR_STEP (newdr) = step;
5268 7065 : DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
5269 7065 : DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
5270 : /* Mark as simd-lane access. */
5271 7065 : tree arg2 = gimple_call_arg (def, 1);
5272 7065 : newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
5273 7065 : free_data_ref (dr);
5274 7065 : datarefs->safe_push (newdr);
5275 7065 : if (dataref_groups)
5276 0 : dataref_groups->safe_push (group_id);
5277 7065 : return opt_result::success ();
5278 : }
5279 : }
5280 : }
5281 : }
5282 686 : free_data_ref (newdr);
5283 : }
5284 :
5285 10765381 : datarefs->safe_push (dr);
5286 10765381 : if (dataref_groups)
5287 10173194 : dataref_groups->safe_push (group_id);
5288 10765381 : return opt_result::success ();
5289 30109316 : }
5290 :
5291 : /* Function vect_analyze_data_refs.
5292 :
5293 : Find all the data references in the loop or basic block.
5294 :
5295 : The general structure of the analysis of data refs in the vectorizer is as
5296 : follows:
5297 : 1- vect_analyze_data_refs(loop/bb): call
5298 : compute_data_dependences_for_loop/bb to find and analyze all data-refs
5299 : in the loop/bb and their dependences.
5300 : 2- vect_analyze_dependences(): apply dependence testing using ddrs.
5301 : 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
5302 : 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
5303 :
5304 : */
5305 :
5306 : opt_result
5307 2718220 : vect_analyze_data_refs (vec_info *vinfo, bool *fatal)
5308 : {
5309 2718220 : class loop *loop = NULL;
5310 2718220 : unsigned int i;
5311 2718220 : struct data_reference *dr;
5312 2718220 : tree scalar_type;
5313 :
5314 2718220 : DUMP_VECT_SCOPE ("vect_analyze_data_refs");
5315 :
5316 2718220 : if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5317 512773 : loop = LOOP_VINFO_LOOP (loop_vinfo);
5318 :
5319 : /* Go through the data-refs, check that the analysis succeeded. Update
5320 : pointer from stmt_vec_info struct to DR and vectype. */
5321 :
5322 2718220 : vec<data_reference_p> datarefs = vinfo->shared->datarefs;
5323 17882694 : FOR_EACH_VEC_ELT (datarefs, i, dr)
5324 : {
5325 15236849 : enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
5326 :
5327 15236849 : gcc_assert (DR_REF (dr));
5328 15236849 : stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
5329 15236849 : gcc_assert (!stmt_info->dr_aux.dr);
5330 15236849 : stmt_info->dr_aux.dr = dr;
5331 15236849 : stmt_info->dr_aux.stmt = stmt_info;
5332 :
5333 : /* Check that analysis of the data-ref succeeded. */
5334 15236849 : if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
5335 15120037 : || !DR_STEP (dr))
5336 : {
5337 233624 : bool maybe_gather
5338 116812 : = DR_IS_READ (dr)
5339 116812 : && !TREE_THIS_VOLATILE (DR_REF (dr));
5340 233624 : bool maybe_scatter
5341 : = DR_IS_WRITE (dr)
5342 116812 : && !TREE_THIS_VOLATILE (DR_REF (dr));
5343 :
5344 : /* If target supports vector gather loads or scatter stores,
5345 : see if they can't be used. */
5346 116812 : if (is_a <loop_vec_info> (vinfo)
5347 116812 : && !nested_in_vect_loop_p (loop, stmt_info))
5348 : {
5349 113438 : if (maybe_gather || maybe_scatter)
5350 : {
5351 113438 : if (maybe_gather)
5352 : gatherscatter = GATHER;
5353 : else
5354 21739 : gatherscatter = SCATTER;
5355 : }
5356 : }
5357 :
5358 21739 : if (gatherscatter == SG_NONE)
5359 : {
5360 3374 : if (dump_enabled_p ())
5361 5 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5362 : "not vectorized: data ref analysis "
5363 : "failed %G", stmt_info->stmt);
5364 3374 : if (is_a <bb_vec_info> (vinfo))
5365 : {
5366 : /* In BB vectorization the ref can still participate
5367 : in dependence analysis, we just can't vectorize it. */
5368 3024 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
5369 3024 : continue;
5370 : }
5371 350 : return opt_result::failure_at (stmt_info->stmt,
5372 : "not vectorized:"
5373 : " data ref analysis failed: %G",
5374 : stmt_info->stmt);
5375 : }
5376 : }
5377 :
5378 : /* See if this was detected as SIMD lane access. */
5379 15233475 : if (dr->aux == (void *)-1
5380 15233475 : || dr->aux == (void *)-2
5381 15224573 : || dr->aux == (void *)-3
5382 15223733 : || dr->aux == (void *)-4)
5383 : {
5384 10542 : if (nested_in_vect_loop_p (loop, stmt_info))
5385 0 : return opt_result::failure_at (stmt_info->stmt,
5386 : "not vectorized:"
5387 : " data ref analysis failed: %G",
5388 : stmt_info->stmt);
5389 10542 : STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
5390 10542 : = -(uintptr_t) dr->aux;
5391 : }
5392 :
5393 15233475 : tree base = get_base_address (DR_REF (dr));
5394 15233475 : if (base && VAR_P (base) && DECL_NONALIASED (base))
5395 : {
5396 8892 : if (dump_enabled_p ())
5397 186 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5398 : "not vectorized: base object not addressable "
5399 : "for stmt: %G", stmt_info->stmt);
5400 8892 : if (is_a <bb_vec_info> (vinfo))
5401 : {
5402 : /* In BB vectorization the ref can still participate
5403 : in dependence analysis, we just can't vectorize it. */
5404 8892 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
5405 8892 : continue;
5406 : }
5407 0 : return opt_result::failure_at (stmt_info->stmt,
5408 : "not vectorized: base object not"
5409 : " addressable for stmt: %G",
5410 : stmt_info->stmt);
5411 : }
5412 :
5413 15224583 : if (is_a <loop_vec_info> (vinfo)
5414 1166044 : && DR_STEP (dr)
5415 16277189 : && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
5416 : {
5417 44589 : if (nested_in_vect_loop_p (loop, stmt_info))
5418 372 : return opt_result::failure_at (stmt_info->stmt,
5419 : "not vectorized: "
5420 : "not suitable for strided load %G",
5421 : stmt_info->stmt);
5422 44217 : STMT_VINFO_STRIDED_P (stmt_info) = true;
5423 : }
5424 :
5425 : /* Update DR field in stmt_vec_info struct. */
5426 :
5427 : /* If the dataref is in an inner-loop of the loop that is considered for
5428 : for vectorization, we also want to analyze the access relative to
5429 : the outer-loop (DR contains information only relative to the
5430 : inner-most enclosing loop). We do that by building a reference to the
5431 : first location accessed by the inner-loop, and analyze it relative to
5432 : the outer-loop. */
5433 15224211 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
5434 : {
5435 : /* Build a reference to the first location accessed by the
5436 : inner loop: *(BASE + INIT + OFFSET). By construction,
5437 : this address must be invariant in the inner loop, so we
5438 : can consider it as being used in the outer loop. */
5439 11872 : tree base = unshare_expr (DR_BASE_ADDRESS (dr));
5440 11872 : tree offset = unshare_expr (DR_OFFSET (dr));
5441 11872 : tree init = unshare_expr (DR_INIT (dr));
5442 11872 : tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
5443 : init, offset);
5444 11872 : tree init_addr = fold_build_pointer_plus (base, init_offset);
5445 11872 : tree init_ref = build_fold_indirect_ref (init_addr);
5446 :
5447 11872 : if (dump_enabled_p ())
5448 1222 : dump_printf_loc (MSG_NOTE, vect_location,
5449 : "analyze in outer loop: %T\n", init_ref);
5450 :
5451 11872 : opt_result res
5452 11872 : = dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
5453 11872 : init_ref, loop, stmt_info->stmt);
5454 11872 : if (!res)
5455 : /* dr_analyze_innermost already explained the failure. */
5456 161 : return res;
5457 :
5458 11711 : if (dump_enabled_p ())
5459 1218 : dump_printf_loc (MSG_NOTE, vect_location,
5460 : "\touter base_address: %T\n"
5461 : "\touter offset from base address: %T\n"
5462 : "\touter constant offset from base address: %T\n"
5463 : "\touter step: %T\n"
5464 : "\touter base alignment: %d\n\n"
5465 : "\touter base misalignment: %d\n"
5466 : "\touter offset alignment: %d\n"
5467 : "\touter step alignment: %d\n",
5468 : STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
5469 : STMT_VINFO_DR_OFFSET (stmt_info),
5470 : STMT_VINFO_DR_INIT (stmt_info),
5471 : STMT_VINFO_DR_STEP (stmt_info),
5472 : STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
5473 : STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
5474 : STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
5475 : STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
5476 : }
5477 :
5478 : /* Set vectype for STMT. */
5479 15224050 : scalar_type = TREE_TYPE (DR_REF (dr));
5480 15224050 : tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5481 15224050 : if (!vectype)
5482 : {
5483 1820643 : if (dump_enabled_p ())
5484 : {
5485 2042 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5486 : "not vectorized: no vectype for stmt: %G",
5487 : stmt_info->stmt);
5488 2042 : dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
5489 2042 : dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
5490 : scalar_type);
5491 2042 : dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5492 : }
5493 :
5494 1820643 : if (is_a <bb_vec_info> (vinfo))
5495 : {
5496 : /* No vector type is fine, the ref can still participate
5497 : in dependence analysis, we just can't vectorize it. */
5498 1756661 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
5499 1756661 : continue;
5500 : }
5501 63982 : if (fatal)
5502 63982 : *fatal = false;
5503 63982 : return opt_result::failure_at (stmt_info->stmt,
5504 : "not vectorized:"
5505 : " no vectype for stmt: %G"
5506 : " scalar_type: %T\n",
5507 : stmt_info->stmt, scalar_type);
5508 : }
5509 : else
5510 : {
5511 13403407 : if (dump_enabled_p ())
5512 82764 : dump_printf_loc (MSG_NOTE, vect_location,
5513 : "got vectype for stmt: %G%T\n",
5514 : stmt_info->stmt, vectype);
5515 : }
5516 :
5517 : /* Leave the BB vectorizer to pick the vector type later, based on
5518 : the final dataref group size and SLP node size. */
5519 13403407 : if (is_a <loop_vec_info> (vinfo))
5520 1101529 : STMT_VINFO_VECTYPE (stmt_info) = vectype;
5521 :
5522 13403407 : if (gatherscatter != SG_NONE)
5523 : {
5524 107736 : gather_scatter_info gs_info;
5525 107736 : if (!vect_check_gather_scatter (stmt_info, vectype,
5526 : as_a <loop_vec_info> (vinfo),
5527 : &gs_info)
5528 211454 : || !get_vectype_for_scalar_type (vinfo,
5529 103718 : TREE_TYPE (gs_info.offset)))
5530 : {
5531 7510 : if (fatal)
5532 7510 : *fatal = false;
5533 7510 : return opt_result::failure_at
5534 7878 : (stmt_info->stmt,
5535 : (gatherscatter == GATHER)
5536 : ? "not vectorized: not suitable for gather load %G"
5537 : : "not vectorized: not suitable for scatter store %G",
5538 : stmt_info->stmt);
5539 : }
5540 100226 : STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
5541 : }
5542 : }
5543 :
5544 : /* We used to stop processing and prune the list here. Verify we no
5545 : longer need to. */
5546 4229112 : gcc_assert (i == datarefs.length ());
5547 :
5548 2645845 : return opt_result::success ();
5549 : }
5550 :
5551 :
5552 : /* Function vect_get_new_vect_var.
5553 :
5554 : Returns a name for a new variable. The current naming scheme appends the
5555 : prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
5556 : the name of vectorizer generated variables, and appends that to NAME if
5557 : provided. */
5558 :
5559 : tree
5560 1944576 : vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
5561 : {
5562 1944576 : const char *prefix;
5563 1944576 : tree new_vect_var;
5564 :
5565 1944576 : switch (var_kind)
5566 : {
5567 : case vect_simple_var:
5568 : prefix = "vect";
5569 : break;
5570 22798 : case vect_scalar_var:
5571 22798 : prefix = "stmp";
5572 22798 : break;
5573 20102 : case vect_mask_var:
5574 20102 : prefix = "mask";
5575 20102 : break;
5576 1396266 : case vect_pointer_var:
5577 1396266 : prefix = "vectp";
5578 1396266 : break;
5579 0 : default:
5580 0 : gcc_unreachable ();
5581 : }
5582 :
5583 1944576 : if (name)
5584 : {
5585 1096535 : char* tmp = concat (prefix, "_", name, NULL);
5586 1096535 : new_vect_var = create_tmp_reg (type, tmp);
5587 1096535 : free (tmp);
5588 : }
5589 : else
5590 848041 : new_vect_var = create_tmp_reg (type, prefix);
5591 :
5592 1944576 : return new_vect_var;
5593 : }
5594 :
5595 : /* Like vect_get_new_vect_var but return an SSA name. */
5596 :
5597 : tree
5598 6545 : vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
5599 : {
5600 6545 : const char *prefix;
5601 6545 : tree new_vect_var;
5602 :
5603 6545 : switch (var_kind)
5604 : {
5605 : case vect_simple_var:
5606 : prefix = "vect";
5607 : break;
5608 312 : case vect_scalar_var:
5609 312 : prefix = "stmp";
5610 312 : break;
5611 0 : case vect_pointer_var:
5612 0 : prefix = "vectp";
5613 0 : break;
5614 0 : default:
5615 0 : gcc_unreachable ();
5616 : }
5617 :
5618 6545 : if (name)
5619 : {
5620 6068 : char* tmp = concat (prefix, "_", name, NULL);
5621 6068 : new_vect_var = make_temp_ssa_name (type, NULL, tmp);
5622 6068 : free (tmp);
5623 : }
5624 : else
5625 477 : new_vect_var = make_temp_ssa_name (type, NULL, prefix);
5626 :
5627 6545 : return new_vect_var;
5628 : }
5629 :
5630 : /* Duplicate points-to info on NAME from DR_INFO. */
5631 :
5632 : static void
5633 431321 : vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
5634 : {
5635 431321 : if (DR_PTR_INFO (dr_info->dr))
5636 : {
5637 289766 : duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
5638 : /* DR_PTR_INFO is for a base SSA name, not including constant or
5639 : variable offsets in the ref so its alignment info does not apply. */
5640 289766 : mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
5641 : }
5642 141555 : else if (!SSA_NAME_PTR_INFO (name))
5643 : {
5644 141555 : tree base = get_base_address (dr_info->dr->ref);
5645 141555 : if (VAR_P (base)
5646 : || TREE_CODE (base) == PARM_DECL
5647 : || TREE_CODE (base) == RESULT_DECL)
5648 : {
5649 129638 : struct ptr_info_def *pi = get_ptr_info (name);
5650 129638 : pt_solution_set_var (&pi->pt, base);
5651 : }
5652 : }
5653 431321 : }
5654 :
5655 : /* Function vect_create_addr_base_for_vector_ref.
5656 :
5657 : Create an expression that computes the address of the first memory location
5658 : that will be accessed for a data reference.
5659 :
5660 : Input:
5661 : STMT_INFO: The statement containing the data reference.
5662 : NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
5663 : OFFSET: Optional. If supplied, it is be added to the initial address.
5664 : LOOP: Specify relative to which loop-nest should the address be computed.
5665 : For example, when the dataref is in an inner-loop nested in an
5666 : outer-loop that is now being vectorized, LOOP can be either the
5667 : outer-loop, or the inner-loop. The first memory location accessed
5668 : by the following dataref ('in' points to short):
5669 :
5670 : for (i=0; i<N; i++)
5671 : for (j=0; j<M; j++)
5672 : s += in[i+j]
5673 :
5674 : is as follows:
5675 : if LOOP=i_loop: &in (relative to i_loop)
5676 : if LOOP=j_loop: &in+i*2B (relative to j_loop)
5677 :
5678 : Output:
5679 : 1. Return an SSA_NAME whose value is the address of the memory location of
5680 : the first vector of the data reference.
5681 : 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
5682 : these statement(s) which define the returned SSA_NAME.
5683 :
5684 : FORNOW: We are only handling array accesses with step 1. */
5685 :
5686 : tree
5687 698266 : vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
5688 : gimple_seq *new_stmt_list,
5689 : tree offset)
5690 : {
5691 698266 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5692 698266 : struct data_reference *dr = dr_info->dr;
5693 698266 : const char *base_name;
5694 698266 : tree addr_base;
5695 698266 : tree dest;
5696 698266 : gimple_seq seq = NULL;
5697 698266 : tree vect_ptr_type;
5698 698266 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5699 698266 : innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
5700 :
5701 698266 : tree data_ref_base = unshare_expr (drb->base_address);
5702 698266 : tree base_offset = unshare_expr (get_dr_vinfo_offset (vinfo, dr_info, true));
5703 698266 : tree init = unshare_expr (drb->init);
5704 :
5705 698266 : if (loop_vinfo)
5706 128186 : base_name = get_name (data_ref_base);
5707 : else
5708 : {
5709 570080 : base_offset = ssize_int (0);
5710 570080 : init = ssize_int (0);
5711 570080 : base_name = get_name (DR_REF (dr));
5712 : }
5713 :
5714 : /* Create base_offset */
5715 698266 : base_offset = size_binop (PLUS_EXPR,
5716 : fold_convert (sizetype, base_offset),
5717 : fold_convert (sizetype, init));
5718 :
5719 698266 : if (offset)
5720 : {
5721 3107 : offset = fold_convert (sizetype, offset);
5722 3107 : base_offset = fold_build2 (PLUS_EXPR, sizetype,
5723 : base_offset, offset);
5724 : }
5725 :
5726 : /* base + base_offset */
5727 698266 : if (loop_vinfo)
5728 128186 : addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
5729 : else
5730 1140160 : addr_base = build1 (ADDR_EXPR,
5731 570080 : build_pointer_type (TREE_TYPE (DR_REF (dr))),
5732 : /* Strip zero offset components since we don't need
5733 : them and they can confuse late diagnostics if
5734 : we CSE them wrongly. See PR106904 for example. */
5735 : unshare_expr (strip_zero_offset_components
5736 : (DR_REF (dr))));
5737 :
5738 698266 : vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
5739 698266 : dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
5740 698266 : addr_base = force_gimple_operand (addr_base, &seq, true, dest);
5741 698266 : gimple_seq_add_seq (new_stmt_list, seq);
5742 :
5743 698266 : if (TREE_CODE (addr_base) == SSA_NAME
5744 : /* We should only duplicate pointer info to newly created SSA names. */
5745 704974 : && SSA_NAME_VAR (addr_base) == dest)
5746 : {
5747 174825 : gcc_assert (!SSA_NAME_PTR_INFO (addr_base));
5748 174825 : vect_duplicate_ssa_name_ptr_info (addr_base, dr_info);
5749 : }
5750 :
5751 698266 : if (dump_enabled_p ())
5752 25281 : dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
5753 :
5754 698266 : return addr_base;
5755 : }
5756 :
5757 :
5758 : /* Function vect_create_data_ref_ptr.
5759 :
5760 : Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
5761 : location accessed in the loop by STMT_INFO, along with the def-use update
5762 : chain to appropriately advance the pointer through the loop iterations.
5763 : Also set aliasing information for the pointer. This pointer is used by
5764 : the callers to this function to create a memory reference expression for
5765 : vector load/store access.
5766 :
5767 : Input:
5768 : 1. STMT_INFO: a stmt that references memory. Expected to be of the form
5769 : GIMPLE_ASSIGN <name, data-ref> or
5770 : GIMPLE_ASSIGN <data-ref, name>.
5771 : 2. AGGR_TYPE: the type of the reference, which should be either a vector
5772 : or an array.
5773 : 3. AT_LOOP: the loop where the vector memref is to be created.
5774 : 4. OFFSET (optional): a byte offset to be added to the initial address
5775 : accessed by the data-ref in STMT_INFO.
5776 : 5. BSI: location where the new stmts are to be placed if there is no loop
5777 : 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
5778 : pointing to the initial address.
5779 : 8. IV_STEP (optional, defaults to NULL): the amount that should be added
5780 : to the IV during each iteration of the loop. NULL says to move
5781 : by one copy of AGGR_TYPE up or down, depending on the step of the
5782 : data reference.
5783 :
5784 : Output:
5785 : 1. Declare a new ptr to vector_type, and have it point to the base of the
5786 : data reference (initial addressed accessed by the data reference).
5787 : For example, for vector of type V8HI, the following code is generated:
5788 :
5789 : v8hi *ap;
5790 : ap = (v8hi *)initial_address;
5791 :
5792 : if OFFSET is not supplied:
5793 : initial_address = &a[init];
5794 : if OFFSET is supplied:
5795 : initial_address = &a[init] + OFFSET;
5796 : if BYTE_OFFSET is supplied:
5797 : initial_address = &a[init] + BYTE_OFFSET;
5798 :
5799 : Return the initial_address in INITIAL_ADDRESS.
5800 :
5801 : 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
5802 : update the pointer in each iteration of the loop.
5803 :
5804 : Return the increment stmt that updates the pointer in PTR_INCR.
5805 :
5806 : 3. Return the pointer. */
5807 :
5808 : tree
5809 698000 : vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
5810 : tree aggr_type, class loop *at_loop, tree offset,
5811 : tree *initial_address, gimple_stmt_iterator *gsi,
5812 : gimple **ptr_incr, bool only_init,
5813 : tree iv_step)
5814 : {
5815 698000 : const char *base_name;
5816 698000 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5817 698000 : class loop *loop = NULL;
5818 698000 : bool nested_in_vect_loop = false;
5819 698000 : class loop *containing_loop = NULL;
5820 698000 : tree aggr_ptr_type;
5821 698000 : tree aggr_ptr;
5822 698000 : tree new_temp;
5823 698000 : gimple_seq new_stmt_list = NULL;
5824 698000 : edge pe = NULL;
5825 698000 : basic_block new_bb;
5826 698000 : tree aggr_ptr_init;
5827 698000 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5828 698000 : struct data_reference *dr = dr_info->dr;
5829 698000 : tree aptr;
5830 698000 : gimple_stmt_iterator incr_gsi;
5831 698000 : bool insert_after;
5832 698000 : tree indx_before_incr, indx_after_incr;
5833 698000 : gimple *incr;
5834 698000 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5835 :
5836 698000 : gcc_assert (iv_step != NULL_TREE
5837 : || TREE_CODE (aggr_type) == ARRAY_TYPE
5838 : || TREE_CODE (aggr_type) == VECTOR_TYPE);
5839 :
5840 698000 : if (loop_vinfo)
5841 : {
5842 127920 : loop = LOOP_VINFO_LOOP (loop_vinfo);
5843 127920 : nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
5844 127920 : containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
5845 127920 : pe = loop_preheader_edge (loop);
5846 : }
5847 : else
5848 : {
5849 570080 : gcc_assert (bb_vinfo);
5850 570080 : only_init = true;
5851 570080 : *ptr_incr = NULL;
5852 : }
5853 :
5854 : /* Create an expression for the first address accessed by this load
5855 : in LOOP. */
5856 698000 : base_name = get_name (DR_BASE_ADDRESS (dr));
5857 :
5858 698000 : if (dump_enabled_p ())
5859 : {
5860 25180 : tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
5861 25180 : dump_printf_loc (MSG_NOTE, vect_location,
5862 : "create %s-pointer variable to type: %T",
5863 25180 : get_tree_code_name (TREE_CODE (aggr_type)),
5864 : aggr_type);
5865 25180 : if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
5866 13512 : dump_printf (MSG_NOTE, " vectorizing an array ref: ");
5867 11668 : else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
5868 0 : dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
5869 11668 : else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
5870 1638 : dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
5871 : else
5872 10030 : dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
5873 25180 : dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
5874 : }
5875 :
5876 : /* (1) Create the new aggregate-pointer variable.
5877 : Vector and array types inherit the alias set of their component
5878 : type by default so we need to use a ref-all pointer if the data
5879 : reference does not conflict with the created aggregated data
5880 : reference because it is not addressable. */
5881 698000 : bool need_ref_all = false;
5882 698000 : if (!alias_sets_conflict_p (get_alias_set (aggr_type),
5883 : get_alias_set (DR_REF (dr))))
5884 : need_ref_all = true;
5885 : /* Likewise for any of the data references in the stmt group. */
5886 594141 : else if (DR_GROUP_SIZE (stmt_info) > 1)
5887 : {
5888 480745 : stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
5889 1342963 : do
5890 : {
5891 1342963 : struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
5892 1342963 : if (!alias_sets_conflict_p (get_alias_set (aggr_type),
5893 : get_alias_set (DR_REF (sdr))))
5894 : {
5895 : need_ref_all = true;
5896 : break;
5897 : }
5898 1341914 : sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
5899 : }
5900 1341914 : while (sinfo);
5901 : }
5902 698000 : aggr_ptr_type = build_pointer_type_for_mode (aggr_type, VOIDmode,
5903 : need_ref_all);
5904 698000 : aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
5905 :
5906 :
5907 : /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
5908 : vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
5909 : def-use update cycles for the pointer: one relative to the outer-loop
5910 : (LOOP), which is what steps (3) and (4) below do. The other is relative
5911 : to the inner-loop (which is the inner-most loop containing the dataref),
5912 : and this is done be step (5) below.
5913 :
5914 : When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
5915 : inner-most loop, and so steps (3),(4) work the same, and step (5) is
5916 : redundant. Steps (3),(4) create the following:
5917 :
5918 : vp0 = &base_addr;
5919 : LOOP: vp1 = phi(vp0,vp2)
5920 : ...
5921 : ...
5922 : vp2 = vp1 + step
5923 : goto LOOP
5924 :
5925 : If there is an inner-loop nested in loop, then step (5) will also be
5926 : applied, and an additional update in the inner-loop will be created:
5927 :
5928 : vp0 = &base_addr;
5929 : LOOP: vp1 = phi(vp0,vp2)
5930 : ...
5931 : inner: vp3 = phi(vp1,vp4)
5932 : vp4 = vp3 + inner_step
5933 : if () goto inner
5934 : ...
5935 : vp2 = vp1 + step
5936 : if () goto LOOP */
5937 :
5938 : /* (2) Calculate the initial address of the aggregate-pointer, and set
5939 : the aggregate-pointer to point to it before the loop. */
5940 :
5941 : /* Create: (&(base[init_val]+offset) in the loop preheader. */
5942 :
5943 698000 : new_temp = vect_create_addr_base_for_vector_ref (vinfo,
5944 : stmt_info, &new_stmt_list,
5945 : offset);
5946 698000 : if (new_stmt_list)
5947 : {
5948 174704 : if (pe)
5949 : {
5950 54471 : new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
5951 54471 : gcc_assert (!new_bb);
5952 : }
5953 : else
5954 120233 : gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
5955 : }
5956 :
5957 698000 : *initial_address = new_temp;
5958 698000 : aggr_ptr_init = new_temp;
5959 :
5960 : /* (3) Handle the updating of the aggregate-pointer inside the loop.
5961 : This is needed when ONLY_INIT is false, and also when AT_LOOP is the
5962 : inner-loop nested in LOOP (during outer-loop vectorization). */
5963 :
5964 : /* No update in loop is required. */
5965 698000 : if (only_init && (!loop_vinfo || at_loop == loop))
5966 : aptr = aggr_ptr_init;
5967 : else
5968 : {
5969 : /* Accesses to invariant addresses should be handled specially
5970 : by the caller. */
5971 127912 : tree step = vect_dr_behavior (vinfo, dr_info)->step;
5972 127912 : gcc_assert (!integer_zerop (step));
5973 :
5974 127912 : if (iv_step == NULL_TREE)
5975 : {
5976 : /* The step of the aggregate pointer is the type size,
5977 : negated for downward accesses. */
5978 0 : iv_step = TYPE_SIZE_UNIT (aggr_type);
5979 0 : if (tree_int_cst_sgn (step) == -1)
5980 0 : iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
5981 : }
5982 :
5983 127912 : standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5984 :
5985 127912 : create_iv (aggr_ptr_init, PLUS_EXPR,
5986 : iv_step, aggr_ptr, loop, &incr_gsi, insert_after,
5987 : &indx_before_incr, &indx_after_incr);
5988 127912 : incr = gsi_stmt (incr_gsi);
5989 :
5990 : /* Copy the points-to information if it exists. */
5991 127912 : vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
5992 127912 : vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
5993 127912 : if (ptr_incr)
5994 127912 : *ptr_incr = incr;
5995 :
5996 127912 : aptr = indx_before_incr;
5997 : }
5998 :
5999 698000 : if (!nested_in_vect_loop || only_init)
6000 : return aptr;
6001 :
6002 :
6003 : /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
6004 : nested in LOOP, if exists. */
6005 :
6006 336 : gcc_assert (nested_in_vect_loop);
6007 336 : if (!only_init)
6008 : {
6009 336 : standard_iv_increment_position (containing_loop, &incr_gsi,
6010 : &insert_after);
6011 336 : create_iv (aptr, PLUS_EXPR, DR_STEP (dr),
6012 : aggr_ptr, containing_loop, &incr_gsi, insert_after,
6013 : &indx_before_incr, &indx_after_incr);
6014 336 : incr = gsi_stmt (incr_gsi);
6015 :
6016 : /* Copy the points-to information if it exists. */
6017 336 : vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
6018 336 : vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
6019 336 : if (ptr_incr)
6020 336 : *ptr_incr = incr;
6021 :
6022 336 : return indx_before_incr;
6023 : }
6024 : else
6025 : gcc_unreachable ();
6026 : }
6027 :
6028 :
6029 : /* Function bump_vector_ptr
6030 :
6031 : Increment a pointer (to a vector type) by vector-size. If requested,
6032 : i.e. if PTR-INCR is given, then also connect the new increment stmt
6033 : to the existing def-use update-chain of the pointer, by modifying
6034 : the PTR_INCR as illustrated below:
6035 :
6036 : The pointer def-use update-chain before this function:
6037 : DATAREF_PTR = phi (p_0, p_2)
6038 : ....
6039 : PTR_INCR: p_2 = DATAREF_PTR + step
6040 :
6041 : The pointer def-use update-chain after this function:
6042 : DATAREF_PTR = phi (p_0, p_2)
6043 : ....
6044 : NEW_DATAREF_PTR = DATAREF_PTR + BUMP
6045 : ....
6046 : PTR_INCR: p_2 = NEW_DATAREF_PTR + step
6047 :
6048 : Input:
6049 : DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
6050 : in the loop.
6051 : PTR_INCR - optional. The stmt that updates the pointer in each iteration of
6052 : the loop. The increment amount across iterations is expected
6053 : to be vector_size.
6054 : BSI - location where the new update stmt is to be placed.
6055 : STMT_INFO - the original scalar memory-access stmt that is being vectorized.
6056 : UPDATE - The offset by which to bump the pointer.
6057 :
6058 : Output: Return NEW_DATAREF_PTR as illustrated above.
6059 :
6060 : */
6061 :
6062 : tree
6063 240783 : bump_vector_ptr (vec_info *vinfo,
6064 : tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
6065 : stmt_vec_info stmt_info, tree update)
6066 : {
6067 240783 : struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6068 240783 : gimple *incr_stmt;
6069 240783 : ssa_op_iter iter;
6070 240783 : use_operand_p use_p;
6071 240783 : tree new_dataref_ptr;
6072 :
6073 240783 : if (TREE_CODE (dataref_ptr) == SSA_NAME)
6074 111652 : new_dataref_ptr = copy_ssa_name (dataref_ptr);
6075 129131 : else if (is_gimple_min_invariant (dataref_ptr))
6076 : /* When possible avoid emitting a separate increment stmt that will
6077 : force the addressed object addressable. */
6078 258262 : return build1 (ADDR_EXPR, TREE_TYPE (dataref_ptr),
6079 129131 : fold_build2 (MEM_REF,
6080 : TREE_TYPE (TREE_TYPE (dataref_ptr)),
6081 : dataref_ptr,
6082 129131 : fold_convert (ptr_type_node, update)));
6083 : else
6084 0 : new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
6085 111652 : incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
6086 : dataref_ptr, update);
6087 111652 : vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
6088 : /* Fold the increment, avoiding excessive chains use-def chains of
6089 : those, leading to compile-time issues for passes until the next
6090 : forwprop pass which would do this as well. */
6091 111652 : gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
6092 111652 : if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
6093 : {
6094 72331 : incr_stmt = gsi_stmt (fold_gsi);
6095 72331 : update_stmt (incr_stmt);
6096 : }
6097 :
6098 : /* Copy the points-to information if it exists. */
6099 111652 : duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
6100 :
6101 111652 : if (!ptr_incr)
6102 : return new_dataref_ptr;
6103 :
6104 : /* Update the vector-pointer's cross-iteration increment. */
6105 112698 : FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
6106 : {
6107 56349 : tree use = USE_FROM_PTR (use_p);
6108 :
6109 56349 : if (use == dataref_ptr)
6110 56349 : SET_USE (use_p, new_dataref_ptr);
6111 : else
6112 0 : gcc_assert (operand_equal_p (use, update, 0));
6113 : }
6114 :
6115 : return new_dataref_ptr;
6116 : }
6117 :
6118 :
6119 : /* Copy memory reference info such as base/clique from the SRC reference
6120 : to the DEST MEM_REF. */
6121 :
6122 : void
6123 949226 : vect_copy_ref_info (tree dest, tree src)
6124 : {
6125 949226 : if (TREE_CODE (dest) != MEM_REF)
6126 : return;
6127 :
6128 : tree src_base = src;
6129 1903930 : while (handled_component_p (src_base))
6130 959363 : src_base = TREE_OPERAND (src_base, 0);
6131 944567 : if (TREE_CODE (src_base) != MEM_REF
6132 944567 : && TREE_CODE (src_base) != TARGET_MEM_REF)
6133 : return;
6134 :
6135 515714 : MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
6136 515714 : MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
6137 : }
6138 :
6139 :
6140 : /* Function vect_create_destination_var.
6141 :
6142 : Create a new temporary of type VECTYPE. */
6143 :
6144 : tree
6145 531414 : vect_create_destination_var (tree scalar_dest, tree vectype)
6146 : {
6147 531414 : tree vec_dest;
6148 531414 : const char *name;
6149 531414 : char *new_name;
6150 531414 : tree type;
6151 531414 : enum vect_var_kind kind;
6152 :
6153 531414 : kind = vectype
6154 1040030 : ? VECTOR_BOOLEAN_TYPE_P (vectype)
6155 508616 : ? vect_mask_var
6156 : : vect_simple_var
6157 : : vect_scalar_var;
6158 22798 : type = vectype ? vectype : TREE_TYPE (scalar_dest);
6159 :
6160 531414 : gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
6161 :
6162 531414 : name = get_name (scalar_dest);
6163 531414 : if (name)
6164 190625 : new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
6165 : else
6166 340789 : new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
6167 531414 : vec_dest = vect_get_new_vect_var (type, kind, new_name);
6168 531414 : free (new_name);
6169 :
6170 531414 : return vec_dest;
6171 : }
6172 :
6173 : /* Function vect_grouped_store_supported.
6174 :
6175 : Returns TRUE if interleave high and interleave low permutations
6176 : are supported, and FALSE otherwise. */
6177 :
6178 : bool
6179 2701 : vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
6180 : {
6181 2701 : machine_mode mode = TYPE_MODE (vectype);
6182 :
6183 : /* vect_permute_store_chain requires the group size to be equal to 3 or
6184 : be a power of two. */
6185 2701 : if (count != 3 && exact_log2 (count) == -1)
6186 : {
6187 552 : if (dump_enabled_p ())
6188 11 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6189 : "the size of the group of accesses"
6190 : " is not a power of 2 or not eqaul to 3\n");
6191 552 : return false;
6192 : }
6193 :
6194 : /* Check that the permutation is supported. */
6195 2149 : if (VECTOR_MODE_P (mode))
6196 : {
6197 2149 : unsigned int i;
6198 2149 : if (count == 3)
6199 : {
6200 947 : unsigned int j0 = 0, j1 = 0, j2 = 0;
6201 947 : unsigned int i, j;
6202 :
6203 947 : unsigned int nelt;
6204 1894 : if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
6205 : {
6206 : if (dump_enabled_p ())
6207 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6208 : "cannot handle groups of 3 stores for"
6209 : " variable-length vectors\n");
6210 : return false;
6211 : }
6212 :
6213 947 : vec_perm_builder sel (nelt, nelt, 1);
6214 947 : sel.quick_grow (nelt);
6215 947 : vec_perm_indices indices;
6216 3563 : for (j = 0; j < 3; j++)
6217 : {
6218 2691 : int nelt0 = ((3 - j) * nelt) % 3;
6219 2691 : int nelt1 = ((3 - j) * nelt + 1) % 3;
6220 2691 : int nelt2 = ((3 - j) * nelt + 2) % 3;
6221 9525 : for (i = 0; i < nelt; i++)
6222 : {
6223 6834 : if (3 * i + nelt0 < nelt)
6224 2316 : sel[3 * i + nelt0] = j0++;
6225 6834 : if (3 * i + nelt1 < nelt)
6226 2277 : sel[3 * i + nelt1] = nelt + j1++;
6227 6834 : if (3 * i + nelt2 < nelt)
6228 2241 : sel[3 * i + nelt2] = 0;
6229 : }
6230 2691 : indices.new_vector (sel, 2, nelt);
6231 2691 : if (!can_vec_perm_const_p (mode, mode, indices))
6232 : {
6233 66 : if (dump_enabled_p ())
6234 37 : dump_printf (MSG_MISSED_OPTIMIZATION,
6235 : "permutation op not supported by target.\n");
6236 66 : return false;
6237 : }
6238 :
6239 8883 : for (i = 0; i < nelt; i++)
6240 : {
6241 6258 : if (3 * i + nelt0 < nelt)
6242 2092 : sel[3 * i + nelt0] = 3 * i + nelt0;
6243 6258 : if (3 * i + nelt1 < nelt)
6244 2083 : sel[3 * i + nelt1] = 3 * i + nelt1;
6245 6258 : if (3 * i + nelt2 < nelt)
6246 2083 : sel[3 * i + nelt2] = nelt + j2++;
6247 : }
6248 2625 : indices.new_vector (sel, 2, nelt);
6249 2625 : if (!can_vec_perm_const_p (mode, mode, indices))
6250 : {
6251 9 : if (dump_enabled_p ())
6252 9 : dump_printf (MSG_MISSED_OPTIMIZATION,
6253 : "permutation op not supported by target.\n");
6254 9 : return false;
6255 : }
6256 : }
6257 : return true;
6258 947 : }
6259 : else
6260 : {
6261 : /* If length is not equal to 3 then only power of 2 is supported. */
6262 1202 : gcc_assert (pow2p_hwi (count));
6263 2404 : poly_uint64 nelt = GET_MODE_NUNITS (mode);
6264 :
6265 : /* The encoding has 2 interleaved stepped patterns. */
6266 2404 : if(!multiple_p (nelt, 2))
6267 1156 : return false;
6268 1202 : vec_perm_builder sel (nelt, 2, 3);
6269 1202 : sel.quick_grow (6);
6270 6010 : for (i = 0; i < 3; i++)
6271 : {
6272 3606 : sel[i * 2] = i;
6273 3606 : sel[i * 2 + 1] = i + nelt;
6274 : }
6275 1202 : vec_perm_indices indices (sel, 2, nelt);
6276 1202 : if (can_vec_perm_const_p (mode, mode, indices))
6277 : {
6278 8092 : for (i = 0; i < 6; i++)
6279 6936 : sel[i] += exact_div (nelt, 2);
6280 1156 : indices.new_vector (sel, 2, nelt);
6281 1156 : if (can_vec_perm_const_p (mode, mode, indices))
6282 1156 : return true;
6283 : }
6284 1202 : }
6285 : }
6286 :
6287 46 : if (dump_enabled_p ())
6288 3 : dump_printf (MSG_MISSED_OPTIMIZATION,
6289 : "permutation op not supported by target.\n");
6290 : return false;
6291 : }
6292 :
6293 : /* Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT vectors
6294 : of type VECTYPE. MASKED_P says whether the masked form is needed. */
6295 :
6296 : internal_fn
6297 40170 : vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
6298 : bool masked_p)
6299 : {
6300 40170 : if (vect_lanes_optab_supported_p ("vec_mask_len_store_lanes",
6301 : vec_mask_len_store_lanes_optab, vectype,
6302 : count))
6303 : return IFN_MASK_LEN_STORE_LANES;
6304 40170 : else if (masked_p)
6305 : {
6306 159 : if (vect_lanes_optab_supported_p ("vec_mask_store_lanes",
6307 : vec_mask_store_lanes_optab, vectype,
6308 : count))
6309 : return IFN_MASK_STORE_LANES;
6310 : }
6311 : else
6312 : {
6313 40011 : if (vect_lanes_optab_supported_p ("vec_store_lanes",
6314 : vec_store_lanes_optab, vectype, count))
6315 : return IFN_STORE_LANES;
6316 : }
6317 : return IFN_LAST;
6318 : }
6319 :
6320 :
6321 : /* Function vect_setup_realignment
6322 :
6323 : This function is called when vectorizing an unaligned load using
6324 : the dr_explicit_realign[_optimized] scheme.
6325 : This function generates the following code at the loop prolog:
6326 :
6327 : p = initial_addr;
6328 : x msq_init = *(floor(p)); # prolog load
6329 : realignment_token = call target_builtin;
6330 : loop:
6331 : x msq = phi (msq_init, ---)
6332 :
6333 : The stmts marked with x are generated only for the case of
6334 : dr_explicit_realign_optimized.
6335 :
6336 : The code above sets up a new (vector) pointer, pointing to the first
6337 : location accessed by STMT_INFO, and a "floor-aligned" load using that
6338 : pointer. It also generates code to compute the "realignment-token"
6339 : (if the relevant target hook was defined), and creates a phi-node at the
6340 : loop-header bb whose arguments are the result of the prolog-load (created
6341 : by this function) and the result of a load that takes place in the loop
6342 : (to be created by the caller to this function).
6343 :
6344 : For the case of dr_explicit_realign_optimized:
6345 : The caller to this function uses the phi-result (msq) to create the
6346 : realignment code inside the loop, and sets up the missing phi argument,
6347 : as follows:
6348 : loop:
6349 : msq = phi (msq_init, lsq)
6350 : lsq = *(floor(p')); # load in loop
6351 : result = realign_load (msq, lsq, realignment_token);
6352 :
6353 : For the case of dr_explicit_realign:
6354 : loop:
6355 : msq = *(floor(p)); # load in loop
6356 : p' = p + (VS-1);
6357 : lsq = *(floor(p')); # load in loop
6358 : result = realign_load (msq, lsq, realignment_token);
6359 :
6360 : Input:
6361 : STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
6362 : a memory location that may be unaligned.
6363 : BSI - place where new code is to be inserted.
6364 : ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
6365 : is used.
6366 :
6367 : Output:
6368 : REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
6369 : target hook, if defined.
6370 : Return value - the result of the loop-header phi node. */
6371 :
6372 : tree
6373 0 : vect_setup_realignment (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6374 : gimple_stmt_iterator *gsi, tree *realignment_token,
6375 : enum dr_alignment_support alignment_support_scheme,
6376 : tree init_addr,
6377 : class loop **at_loop)
6378 : {
6379 0 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6380 0 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6381 0 : struct data_reference *dr = dr_info->dr;
6382 0 : class loop *loop = NULL;
6383 0 : edge pe = NULL;
6384 0 : tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
6385 0 : tree vec_dest;
6386 0 : gimple *inc;
6387 0 : tree ptr;
6388 0 : tree data_ref;
6389 0 : basic_block new_bb;
6390 0 : tree msq_init = NULL_TREE;
6391 0 : tree new_temp;
6392 0 : gphi *phi_stmt;
6393 0 : tree msq = NULL_TREE;
6394 0 : gimple_seq stmts = NULL;
6395 0 : bool compute_in_loop = false;
6396 0 : bool nested_in_vect_loop = false;
6397 0 : class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
6398 0 : class loop *loop_for_initial_load = NULL;
6399 :
6400 0 : if (loop_vinfo)
6401 : {
6402 0 : loop = LOOP_VINFO_LOOP (loop_vinfo);
6403 0 : nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
6404 : }
6405 :
6406 0 : gcc_assert (alignment_support_scheme == dr_explicit_realign
6407 : || alignment_support_scheme == dr_explicit_realign_optimized);
6408 :
6409 : /* We need to generate three things:
6410 : 1. the misalignment computation
6411 : 2. the extra vector load (for the optimized realignment scheme).
6412 : 3. the phi node for the two vectors from which the realignment is
6413 : done (for the optimized realignment scheme). */
6414 :
6415 : /* 1. Determine where to generate the misalignment computation.
6416 :
6417 : If INIT_ADDR is NULL_TREE, this indicates that the misalignment
6418 : calculation will be generated by this function, outside the loop (in the
6419 : preheader). Otherwise, INIT_ADDR had already been computed for us by the
6420 : caller, inside the loop.
6421 :
6422 : Background: If the misalignment remains fixed throughout the iterations of
6423 : the loop, then both realignment schemes are applicable, and also the
6424 : misalignment computation can be done outside LOOP. This is because we are
6425 : vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
6426 : are a multiple of VS (the Vector Size), and therefore the misalignment in
6427 : different vectorized LOOP iterations is always the same.
6428 : The problem arises only if the memory access is in an inner-loop nested
6429 : inside LOOP, which is now being vectorized using outer-loop vectorization.
6430 : This is the only case when the misalignment of the memory access may not
6431 : remain fixed throughout the iterations of the inner-loop (as explained in
6432 : detail in vect_supportable_dr_alignment). In this case, not only is the
6433 : optimized realignment scheme not applicable, but also the misalignment
6434 : computation (and generation of the realignment token that is passed to
6435 : REALIGN_LOAD) have to be done inside the loop.
6436 :
6437 : In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
6438 : or not, which in turn determines if the misalignment is computed inside
6439 : the inner-loop, or outside LOOP. */
6440 :
6441 0 : if (init_addr != NULL_TREE || !loop_vinfo)
6442 : {
6443 0 : compute_in_loop = true;
6444 0 : gcc_assert (alignment_support_scheme == dr_explicit_realign);
6445 : }
6446 :
6447 :
6448 : /* 2. Determine where to generate the extra vector load.
6449 :
6450 : For the optimized realignment scheme, instead of generating two vector
6451 : loads in each iteration, we generate a single extra vector load in the
6452 : preheader of the loop, and in each iteration reuse the result of the
6453 : vector load from the previous iteration. In case the memory access is in
6454 : an inner-loop nested inside LOOP, which is now being vectorized using
6455 : outer-loop vectorization, we need to determine whether this initial vector
6456 : load should be generated at the preheader of the inner-loop, or can be
6457 : generated at the preheader of LOOP. If the memory access has no evolution
6458 : in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
6459 : to be generated inside LOOP (in the preheader of the inner-loop). */
6460 :
6461 0 : if (nested_in_vect_loop)
6462 : {
6463 0 : tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
6464 0 : bool invariant_in_outerloop =
6465 0 : (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
6466 0 : loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
6467 : }
6468 : else
6469 : loop_for_initial_load = loop;
6470 0 : if (at_loop)
6471 0 : *at_loop = loop_for_initial_load;
6472 :
6473 0 : tree vuse = NULL_TREE;
6474 0 : if (loop_for_initial_load)
6475 : {
6476 0 : pe = loop_preheader_edge (loop_for_initial_load);
6477 0 : if (gphi *vphi = get_virtual_phi (loop_for_initial_load->header))
6478 0 : vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
6479 : }
6480 0 : if (!vuse)
6481 0 : vuse = gimple_vuse (gsi_stmt (*gsi));
6482 :
6483 : /* 3. For the case of the optimized realignment, create the first vector
6484 : load at the loop preheader. */
6485 :
6486 0 : if (alignment_support_scheme == dr_explicit_realign_optimized)
6487 : {
6488 : /* Create msq_init = *(floor(p1)) in the loop preheader */
6489 0 : gassign *new_stmt;
6490 :
6491 0 : gcc_assert (!compute_in_loop);
6492 0 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6493 0 : ptr = vect_create_data_ref_ptr (vinfo, stmt_info, vectype,
6494 : loop_for_initial_load, NULL_TREE,
6495 : &init_addr, NULL, &inc, true);
6496 0 : if (TREE_CODE (ptr) == SSA_NAME)
6497 0 : new_temp = copy_ssa_name (ptr);
6498 : else
6499 0 : new_temp = make_ssa_name (TREE_TYPE (ptr));
6500 0 : poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
6501 0 : tree type = TREE_TYPE (ptr);
6502 0 : new_stmt = gimple_build_assign
6503 0 : (new_temp, BIT_AND_EXPR, ptr,
6504 0 : fold_build2 (MINUS_EXPR, type,
6505 : build_int_cst (type, 0),
6506 : build_int_cst (type, align)));
6507 0 : new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6508 0 : gcc_assert (!new_bb);
6509 0 : data_ref
6510 0 : = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
6511 : build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
6512 0 : vect_copy_ref_info (data_ref, DR_REF (dr));
6513 0 : new_stmt = gimple_build_assign (vec_dest, data_ref);
6514 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
6515 0 : gimple_assign_set_lhs (new_stmt, new_temp);
6516 0 : gimple_set_vuse (new_stmt, vuse);
6517 0 : if (pe)
6518 : {
6519 0 : new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6520 0 : gcc_assert (!new_bb);
6521 : }
6522 : else
6523 0 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
6524 :
6525 0 : msq_init = gimple_assign_lhs (new_stmt);
6526 : }
6527 :
6528 : /* 4. Create realignment token using a target builtin, if available.
6529 : It is done either inside the containing loop, or before LOOP (as
6530 : determined above). */
6531 :
6532 0 : if (targetm.vectorize.builtin_mask_for_load)
6533 : {
6534 0 : gcall *new_stmt;
6535 0 : tree builtin_decl;
6536 :
6537 : /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
6538 0 : if (!init_addr)
6539 : {
6540 : /* Generate the INIT_ADDR computation outside LOOP. */
6541 0 : init_addr = vect_create_addr_base_for_vector_ref (vinfo,
6542 : stmt_info, &stmts,
6543 : NULL_TREE);
6544 0 : if (loop)
6545 : {
6546 0 : pe = loop_preheader_edge (loop);
6547 0 : new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6548 0 : gcc_assert (!new_bb);
6549 : }
6550 : else
6551 0 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
6552 : }
6553 :
6554 0 : builtin_decl = targetm.vectorize.builtin_mask_for_load ();
6555 0 : new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
6556 0 : vec_dest =
6557 0 : vect_create_destination_var (scalar_dest,
6558 : gimple_call_return_type (new_stmt));
6559 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
6560 0 : gimple_call_set_lhs (new_stmt, new_temp);
6561 :
6562 0 : if (compute_in_loop)
6563 0 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
6564 : else
6565 : {
6566 : /* Generate the misalignment computation outside LOOP. */
6567 0 : pe = loop_preheader_edge (loop);
6568 0 : new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6569 0 : gcc_assert (!new_bb);
6570 : }
6571 :
6572 0 : *realignment_token = gimple_call_lhs (new_stmt);
6573 :
6574 : /* The result of the CALL_EXPR to this builtin is determined from
6575 : the value of the parameter and no global variables are touched
6576 : which makes the builtin a "const" function. Requiring the
6577 : builtin to have the "const" attribute makes it unnecessary
6578 : to call mark_call_clobbered. */
6579 0 : gcc_assert (TREE_READONLY (builtin_decl));
6580 : }
6581 :
6582 0 : if (alignment_support_scheme == dr_explicit_realign)
6583 : return msq;
6584 :
6585 0 : gcc_assert (!compute_in_loop);
6586 0 : gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
6587 :
6588 :
6589 : /* 5. Create msq = phi <msq_init, lsq> in loop */
6590 :
6591 0 : pe = loop_preheader_edge (containing_loop);
6592 0 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6593 0 : msq = make_ssa_name (vec_dest);
6594 0 : phi_stmt = create_phi_node (msq, containing_loop->header);
6595 0 : add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
6596 :
6597 0 : return msq;
6598 : }
6599 :
6600 :
6601 : /* Function vect_grouped_load_supported.
6602 :
6603 : COUNT is the size of the load group (the number of statements plus the
6604 : number of gaps). SINGLE_ELEMENT_P is true if there is actually
6605 : only one statement, with a gap of COUNT - 1.
6606 :
6607 : Returns true if a suitable permute exists. */
6608 :
6609 : bool
6610 1925 : vect_grouped_load_supported (tree vectype, bool single_element_p,
6611 : unsigned HOST_WIDE_INT count)
6612 : {
6613 1925 : machine_mode mode = TYPE_MODE (vectype);
6614 :
6615 : /* If this is single-element interleaving with an element distance
6616 : that leaves unused vector loads around punt - we at least create
6617 : very sub-optimal code in that case (and blow up memory,
6618 : see PR65518). */
6619 1925 : if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
6620 : {
6621 24 : if (dump_enabled_p ())
6622 3 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6623 : "single-element interleaving not supported "
6624 : "for not adjacent vector loads\n");
6625 24 : return false;
6626 : }
6627 :
6628 : /* vect_permute_load_chain requires the group size to be equal to 3 or
6629 : be a power of two. */
6630 1901 : if (count != 3 && exact_log2 (count) == -1)
6631 : {
6632 226 : if (dump_enabled_p ())
6633 14 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6634 : "the size of the group of accesses"
6635 : " is not a power of 2 or not equal to 3\n");
6636 226 : return false;
6637 : }
6638 :
6639 : /* Check that the permutation is supported. */
6640 1675 : if (VECTOR_MODE_P (mode))
6641 : {
6642 1675 : unsigned int i, j;
6643 1675 : if (count == 3)
6644 : {
6645 835 : unsigned int nelt;
6646 1670 : if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
6647 : {
6648 : if (dump_enabled_p ())
6649 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6650 : "cannot handle groups of 3 loads for"
6651 : " variable-length vectors\n");
6652 : return false;
6653 : }
6654 :
6655 835 : vec_perm_builder sel (nelt, nelt, 1);
6656 835 : sel.quick_grow (nelt);
6657 835 : vec_perm_indices indices;
6658 835 : unsigned int k;
6659 3304 : for (k = 0; k < 3; k++)
6660 : {
6661 8825 : for (i = 0; i < nelt; i++)
6662 6344 : if (3 * i + k < 2 * nelt)
6663 4235 : sel[i] = 3 * i + k;
6664 : else
6665 2109 : sel[i] = 0;
6666 2481 : indices.new_vector (sel, 2, nelt);
6667 2481 : if (!can_vec_perm_const_p (mode, mode, indices))
6668 : {
6669 12 : if (dump_enabled_p ())
6670 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6671 : "shuffle of 3 loads is not supported by"
6672 : " target\n");
6673 12 : return false;
6674 : }
6675 8661 : for (i = 0, j = 0; i < nelt; i++)
6676 6192 : if (3 * i + k < 2 * nelt)
6677 4128 : sel[i] = i;
6678 : else
6679 2064 : sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
6680 2469 : indices.new_vector (sel, 2, nelt);
6681 2469 : if (!can_vec_perm_const_p (mode, mode, indices))
6682 : {
6683 0 : if (dump_enabled_p ())
6684 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6685 : "shuffle of 3 loads is not supported by"
6686 : " target\n");
6687 0 : return false;
6688 : }
6689 : }
6690 : return true;
6691 835 : }
6692 : else
6693 : {
6694 : /* If length is not equal to 3 then only power of 2 is supported. */
6695 840 : gcc_assert (pow2p_hwi (count));
6696 1680 : poly_uint64 nelt = GET_MODE_NUNITS (mode);
6697 :
6698 : /* The encoding has a single stepped pattern. */
6699 840 : vec_perm_builder sel (nelt, 1, 3);
6700 840 : sel.quick_grow (3);
6701 4200 : for (i = 0; i < 3; i++)
6702 2520 : sel[i] = i * 2;
6703 840 : vec_perm_indices indices (sel, 2, nelt);
6704 840 : if (can_vec_perm_const_p (mode, mode, indices))
6705 : {
6706 3348 : for (i = 0; i < 3; i++)
6707 2511 : sel[i] = i * 2 + 1;
6708 837 : indices.new_vector (sel, 2, nelt);
6709 837 : if (can_vec_perm_const_p (mode, mode, indices))
6710 837 : return true;
6711 : }
6712 840 : }
6713 : }
6714 :
6715 3 : if (dump_enabled_p ())
6716 2 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6717 : "extract even/odd not supported by target\n");
6718 : return false;
6719 : }
6720 :
6721 : /* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT vectors
6722 : of type VECTYPE. MASKED_P says whether the masked form is needed.
6723 : If it is available and ELSVALS is nonzero store the possible else values
6724 : in the vector it points to. */
6725 :
6726 : internal_fn
6727 144317 : vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
6728 : bool masked_p, vec<int> *elsvals)
6729 : {
6730 144317 : if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
6731 : vec_mask_len_load_lanes_optab, vectype,
6732 : count, elsvals))
6733 : return IFN_MASK_LEN_LOAD_LANES;
6734 144317 : else if (masked_p)
6735 : {
6736 30 : if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
6737 : vec_mask_load_lanes_optab, vectype,
6738 : count, elsvals))
6739 : return IFN_MASK_LOAD_LANES;
6740 : }
6741 : else
6742 : {
6743 144287 : if (vect_lanes_optab_supported_p ("vec_load_lanes", vec_load_lanes_optab,
6744 : vectype, count, elsvals))
6745 : return IFN_LOAD_LANES;
6746 : }
6747 : return IFN_LAST;
6748 : }
6749 :
6750 : /* Function vect_force_dr_alignment_p.
6751 :
6752 : Returns whether the alignment of a DECL can be forced to be aligned
6753 : on ALIGNMENT bit boundary. */
6754 :
6755 : bool
6756 710232 : vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
6757 : {
6758 710232 : if (!VAR_P (decl))
6759 : return false;
6760 :
6761 210358 : if (decl_in_symtab_p (decl)
6762 210358 : && (!symtab_node::get (decl)
6763 22108 : || !symtab_node::get (decl)->can_increase_alignment_p ()))
6764 13442 : return false;
6765 :
6766 196916 : if (TREE_STATIC (decl))
6767 8666 : return (known_le (alignment,
6768 8666 : (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
6769 : else
6770 188250 : return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
6771 : }
6772 :
6773 : /* Return whether the data reference DR_INFO is supported with respect to its
6774 : alignment.
6775 : If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
6776 : it is aligned, i.e., check if it is possible to vectorize it with different
6777 : alignment. If IS_GATHER_SCATTER is true we are dealing with a
6778 : gather/scatter. */
6779 :
6780 : enum dr_alignment_support
6781 2848062 : vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
6782 : tree vectype, int misalignment,
6783 : bool is_gather_scatter)
6784 : {
6785 2848062 : data_reference *dr = dr_info->dr;
6786 2848062 : stmt_vec_info stmt_info = dr_info->stmt;
6787 2848062 : machine_mode mode = TYPE_MODE (vectype);
6788 2848062 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6789 2848062 : class loop *vect_loop = NULL;
6790 2848062 : bool nested_in_vect_loop = false;
6791 :
6792 2848062 : if (misalignment == 0)
6793 : return dr_aligned;
6794 1752165 : else if (dr_safe_speculative_read_required (stmt_info))
6795 : return dr_unaligned_unsupported;
6796 :
6797 1363284 : if (loop_vinfo)
6798 : {
6799 953776 : vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
6800 953776 : nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt_info);
6801 : }
6802 :
6803 : /* Possibly unaligned access. */
6804 :
6805 : /* We can choose between using the implicit realignment scheme (generating
6806 : a misaligned_move stmt) and the explicit realignment scheme (generating
6807 : aligned loads with a REALIGN_LOAD). There are two variants to the
6808 : explicit realignment scheme: optimized, and unoptimized.
6809 : We can optimize the realignment only if the step between consecutive
6810 : vector loads is equal to the vector size. Since the vector memory
6811 : accesses advance in steps of VS (Vector Size) in the vectorized loop, it
6812 : is guaranteed that the misalignment amount remains the same throughout the
6813 : execution of the vectorized loop. Therefore, we can create the
6814 : "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
6815 : at the loop preheader.
6816 :
6817 : However, in the case of outer-loop vectorization, when vectorizing a
6818 : memory access in the inner-loop nested within the LOOP that is now being
6819 : vectorized, while it is guaranteed that the misalignment of the
6820 : vectorized memory access will remain the same in different outer-loop
6821 : iterations, it is *not* guaranteed that is will remain the same throughout
6822 : the execution of the inner-loop. This is because the inner-loop advances
6823 : with the original scalar step (and not in steps of VS). If the inner-loop
6824 : step happens to be a multiple of VS, then the misalignment remains fixed
6825 : and we can use the optimized realignment scheme. For example:
6826 :
6827 : for (i=0; i<N; i++)
6828 : for (j=0; j<M; j++)
6829 : s += a[i+j];
6830 :
6831 : When vectorizing the i-loop in the above example, the step between
6832 : consecutive vector loads is 1, and so the misalignment does not remain
6833 : fixed across the execution of the inner-loop, and the realignment cannot
6834 : be optimized (as illustrated in the following pseudo vectorized loop):
6835 :
6836 : for (i=0; i<N; i+=4)
6837 : for (j=0; j<M; j++){
6838 : vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
6839 : // when j is {0,1,2,3,4,5,6,7,...} respectively.
6840 : // (assuming that we start from an aligned address).
6841 : }
6842 :
6843 : We therefore have to use the unoptimized realignment scheme:
6844 :
6845 : for (i=0; i<N; i+=4)
6846 : for (j=k; j<M; j+=4)
6847 : vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
6848 : // that the misalignment of the initial address is
6849 : // 0).
6850 :
6851 : The loop can then be vectorized as follows:
6852 :
6853 : for (k=0; k<4; k++){
6854 : rt = get_realignment_token (&vp[k]);
6855 : for (i=0; i<N; i+=4){
6856 : v1 = vp[i+k];
6857 : for (j=k; j<M; j+=4){
6858 : v2 = vp[i+j+VS-1];
6859 : va = REALIGN_LOAD <v1,v2,rt>;
6860 : vs += va;
6861 : v1 = v2;
6862 : }
6863 : }
6864 : } */
6865 :
6866 1363284 : if (DR_IS_READ (dr) && !is_gather_scatter)
6867 : {
6868 612840 : if (can_implement_p (vec_realign_load_optab, mode)
6869 612840 : && (!targetm.vectorize.builtin_mask_for_load
6870 0 : || targetm.vectorize.builtin_mask_for_load ()))
6871 : {
6872 : /* If we are doing SLP then the accesses need not have the
6873 : same alignment, instead it depends on the SLP group size. */
6874 0 : if (loop_vinfo
6875 0 : && STMT_VINFO_GROUPED_ACCESS (stmt_info)
6876 0 : && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6877 0 : * (DR_GROUP_SIZE
6878 0 : (DR_GROUP_FIRST_ELEMENT (stmt_info))),
6879 0 : TYPE_VECTOR_SUBPARTS (vectype)))
6880 : ;
6881 0 : else if (!loop_vinfo
6882 0 : || (nested_in_vect_loop
6883 0 : && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
6884 0 : GET_MODE_SIZE (TYPE_MODE (vectype)))))
6885 0 : return dr_explicit_realign;
6886 : else
6887 0 : return dr_explicit_realign_optimized;
6888 : }
6889 : }
6890 :
6891 1363284 : bool is_packed = not_size_aligned (DR_REF (dr));
6892 1363284 : if (misalignment == DR_MISALIGNMENT_UNKNOWN
6893 1363284 : && is_gather_scatter)
6894 3234 : misalignment = (get_object_alignment (DR_REF (dr))
6895 3234 : % (GET_MODE_BITSIZE (GET_MODE_INNER (mode))))
6896 3234 : / BITS_PER_UNIT;
6897 1363284 : if (targetm.vectorize.support_vector_misalignment (mode, misalignment,
6898 : is_packed,
6899 : is_gather_scatter))
6900 : return dr_unaligned_supported;
6901 :
6902 : /* Unsupported. */
6903 : return dr_unaligned_unsupported;
6904 : }
|