Line data Source code
1 : /* Data References Analysis and Manipulation Utilities for Vectorization.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 : Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 : and Ira Rosen <irar@il.ibm.com>
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify it under
9 : the terms of the GNU General Public License as published by the Free
10 : Software Foundation; either version 3, or (at your option) any later
11 : version.
12 :
13 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 : for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : #define INCLUDE_ALGORITHM
23 : #include "config.h"
24 : #include "system.h"
25 : #include "coretypes.h"
26 : #include "backend.h"
27 : #include "target.h"
28 : #include "rtl.h"
29 : #include "tree.h"
30 : #include "gimple.h"
31 : #include "predict.h"
32 : #include "memmodel.h"
33 : #include "tm_p.h"
34 : #include "ssa.h"
35 : #include "optabs-tree.h"
36 : #include "cgraph.h"
37 : #include "dumpfile.h"
38 : #include "pretty-print.h"
39 : #include "alias.h"
40 : #include "fold-const.h"
41 : #include "stor-layout.h"
42 : #include "tree-eh.h"
43 : #include "gimplify.h"
44 : #include "gimple-iterator.h"
45 : #include "gimplify-me.h"
46 : #include "tree-ssa-loop-ivopts.h"
47 : #include "tree-ssa-loop-manip.h"
48 : #include "tree-ssa-loop.h"
49 : #include "cfgloop.h"
50 : #include "tree-scalar-evolution.h"
51 : #include "tree-vectorizer.h"
52 : #include "expr.h"
53 : #include "builtins.h"
54 : #include "tree-cfg.h"
55 : #include "tree-hash-traits.h"
56 : #include "vec-perm-indices.h"
57 : #include "internal-fn.h"
58 : #include "gimple-fold.h"
59 : #include "optabs-query.h"
60 :
61 : /* Return true if load- or store-lanes optab OPTAB is implemented for
62 : COUNT vectors of type VECTYPE. NAME is the name of OPTAB.
63 :
64 : If it is implemented and ELSVALS is nonzero store the possible else
65 : values in the vector it points to. */
66 :
67 : static bool
68 370066 : vect_lanes_optab_supported_p (const char *name, convert_optab optab,
69 : tree vectype, unsigned HOST_WIDE_INT count,
70 : vec<int> *elsvals = nullptr)
71 : {
72 370066 : machine_mode mode, array_mode;
73 370066 : bool limit_p;
74 :
75 370066 : mode = TYPE_MODE (vectype);
76 370066 : if (!targetm.array_mode (mode, count).exists (&array_mode))
77 : {
78 740132 : poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
79 370066 : limit_p = !targetm.array_mode_supported_p (mode, count);
80 370066 : if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
81 : {
82 317812 : if (dump_enabled_p ())
83 12924 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
84 : "no array mode for %s[%wu]\n",
85 12924 : GET_MODE_NAME (mode), count);
86 317812 : return false;
87 : }
88 : }
89 :
90 52254 : enum insn_code icode;
91 52254 : if ((icode = convert_optab_handler (optab, array_mode, mode))
92 : == CODE_FOR_nothing)
93 : {
94 52254 : if (dump_enabled_p ())
95 4152 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
96 : "cannot use %s<%s><%s>\n", name,
97 4152 : GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
98 52254 : return false;
99 : }
100 :
101 0 : if (dump_enabled_p ())
102 0 : dump_printf_loc (MSG_NOTE, vect_location,
103 0 : "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
104 0 : GET_MODE_NAME (mode));
105 :
106 0 : if (elsvals)
107 0 : get_supported_else_vals (icode,
108 0 : internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES),
109 : *elsvals);
110 :
111 : return true;
112 : }
113 :
114 : /* Helper function to identify a simd clone call. If this is a call to a
115 : function with simd clones then return the corresponding cgraph_node,
116 : otherwise return NULL. */
117 :
118 : static cgraph_node*
119 622171 : simd_clone_call_p (gimple *stmt)
120 : {
121 699956 : gcall *call = dyn_cast <gcall *> (stmt);
122 79469 : if (!call)
123 : return NULL;
124 :
125 79469 : tree fndecl = NULL_TREE;
126 79469 : if (gimple_call_internal_p (call, IFN_MASK_CALL))
127 226 : fndecl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
128 : else
129 79243 : fndecl = gimple_call_fndecl (stmt);
130 :
131 79469 : if (fndecl == NULL_TREE)
132 : return NULL;
133 :
134 36796 : cgraph_node *node = cgraph_node::get (fndecl);
135 36796 : if (node && node->simd_clones != NULL)
136 : return node;
137 :
138 : return NULL;
139 : }
140 :
141 :
142 :
143 : /* Return the smallest scalar part of STMT_INFO.
144 : This is used to determine the vectype of the stmt. We generally set the
145 : vectype according to the type of the result (lhs). For stmts whose
146 : result-type is different than the type of the arguments (e.g., demotion,
147 : promotion), vectype will be reset appropriately (later). Note that we have
148 : to visit the smallest datatype in this function, because that determines the
149 : VF. If the smallest datatype in the loop is present only as the rhs of a
150 : promotion operation - we'd miss it.
151 : Such a case, where a variable of this datatype does not appear in the lhs
152 : anywhere in the loop, can only occur if it's an invariant: e.g.:
153 : 'int_x = (int) short_inv', which we'd expect to have been optimized away by
154 : invariant motion. However, we cannot rely on invariant motion to always
155 : take invariants out of the loop, and so in the case of promotion we also
156 : have to check the rhs.
157 : LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
158 : types. */
159 :
160 : tree
161 5044044 : vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type)
162 : {
163 5044044 : HOST_WIDE_INT lhs, rhs;
164 :
165 : /* During the analysis phase, this function is called on arbitrary
166 : statements that might not have scalar results. */
167 5044044 : if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
168 : return scalar_type;
169 :
170 5044044 : lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
171 :
172 5044044 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
173 5044044 : if (assign)
174 : {
175 4421873 : scalar_type = TREE_TYPE (gimple_assign_lhs (assign));
176 4421873 : if (gimple_assign_cast_p (assign)
177 4018973 : || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
178 4018337 : || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
179 4018337 : || gimple_assign_rhs_code (assign) == SAD_EXPR
180 4018228 : || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
181 4014484 : || gimple_assign_rhs_code (assign) == WIDEN_MULT_PLUS_EXPR
182 4014484 : || gimple_assign_rhs_code (assign) == WIDEN_MULT_MINUS_EXPR
183 4014484 : || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
184 8436357 : || gimple_assign_rhs_code (assign) == FLOAT_EXPR)
185 : {
186 421841 : tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
187 :
188 421841 : rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
189 421841 : if (rhs < lhs)
190 5044044 : scalar_type = rhs_type;
191 : }
192 : }
193 622171 : else if (cgraph_node *node = simd_clone_call_p (stmt_info->stmt))
194 : {
195 1684 : auto clone = node->simd_clones->simdclone;
196 5120 : for (unsigned int i = 0; i < clone->nargs; ++i)
197 : {
198 3436 : if (clone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
199 : {
200 1983 : tree arg_scalar_type = TREE_TYPE (clone->args[i].vector_type);
201 1983 : rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (arg_scalar_type));
202 1983 : if (rhs < lhs)
203 : {
204 3436 : scalar_type = arg_scalar_type;
205 3436 : lhs = rhs;
206 : }
207 : }
208 : }
209 : }
210 620487 : else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
211 : {
212 77785 : unsigned int i = 0;
213 77785 : if (gimple_call_internal_p (call))
214 : {
215 40324 : internal_fn ifn = gimple_call_internal_fn (call);
216 40324 : if (internal_load_fn_p (ifn))
217 : /* For loads the LHS type does the trick. */
218 : i = ~0U;
219 35483 : else if (internal_store_fn_p (ifn))
220 : {
221 : /* For stores use the type of the stored value. */
222 2742 : i = internal_fn_stored_value_index (ifn);
223 2742 : scalar_type = TREE_TYPE (gimple_call_arg (call, i));
224 2742 : i = ~0U;
225 : }
226 32741 : else if (internal_fn_mask_index (ifn) == 0)
227 11080 : i = 1;
228 : }
229 77785 : if (i < gimple_call_num_args (call))
230 : {
231 65625 : tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
232 65625 : if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
233 : {
234 65625 : rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
235 65625 : if (rhs < lhs)
236 5044044 : scalar_type = rhs_type;
237 : }
238 : }
239 : }
240 :
241 : return scalar_type;
242 : }
243 :
244 :
245 : /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
246 : tested at run-time. Return TRUE if DDR was successfully inserted.
247 : Return false if versioning is not supported. */
248 :
249 : static opt_result
250 169173 : vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
251 : {
252 169173 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
253 :
254 169173 : if ((unsigned) param_vect_max_version_for_alias_checks == 0)
255 54 : return opt_result::failure_at (vect_location,
256 : "will not create alias checks, as"
257 : " --param vect-max-version-for-alias-checks"
258 : " == 0\n");
259 :
260 169119 : opt_result res
261 169119 : = runtime_alias_check_p (ddr, loop,
262 169119 : optimize_loop_nest_for_speed_p (loop));
263 169119 : if (!res)
264 143 : return res;
265 :
266 168976 : LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
267 168976 : return opt_result::success ();
268 : }
269 :
270 : /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero. */
271 :
272 : static void
273 1528 : vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
274 : {
275 1528 : const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
276 2295 : for (unsigned int i = 0; i < checks.length(); ++i)
277 767 : if (checks[i] == value)
278 : return;
279 :
280 1528 : if (dump_enabled_p ())
281 432 : dump_printf_loc (MSG_NOTE, vect_location,
282 : "need run-time check that %T is nonzero\n",
283 : value);
284 1528 : LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
285 : }
286 :
287 : /* Return true if we know that the order of vectorized DR_INFO_A and
288 : vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
289 : DR_INFO_B. At least one of the accesses is a write. */
290 :
291 : static bool
292 144329 : vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
293 : {
294 144329 : stmt_vec_info stmtinfo_a = dr_info_a->stmt;
295 144329 : stmt_vec_info stmtinfo_b = dr_info_b->stmt;
296 :
297 : /* Single statements are always kept in their original order. */
298 144329 : if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
299 239226 : && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
300 : return true;
301 :
302 : /* If there is a loop invariant read involved we might vectorize it in
303 : the prologue, breaking scalar order with respect to the in-loop store. */
304 26144 : if ((DR_IS_READ (dr_info_a->dr) && integer_zerop (DR_STEP (dr_info_a->dr)))
305 80816 : || (DR_IS_READ (dr_info_b->dr) && integer_zerop (DR_STEP (dr_info_b->dr))))
306 1726 : return false;
307 :
308 : /* STMT_A and STMT_B belong to overlapping groups. All loads are
309 : emitted at the position of the first scalar load.
310 : Stores in a group are emitted at the position of the last scalar store.
311 : Compute that position and check whether the resulting order matches
312 : the current one. */
313 54181 : stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
314 54181 : if (il_a)
315 : {
316 48941 : if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
317 213632 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
318 188239 : s = DR_GROUP_NEXT_ELEMENT (s))
319 188239 : il_a = get_later_stmt (il_a, s);
320 : else /* DR_IS_READ */
321 93502 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
322 69954 : s = DR_GROUP_NEXT_ELEMENT (s))
323 69954 : if (get_later_stmt (il_a, s) == il_a)
324 2144 : il_a = s;
325 : }
326 : else
327 : il_a = stmtinfo_a;
328 54181 : stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
329 54181 : if (il_b)
330 : {
331 47323 : if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
332 273547 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
333 235517 : s = DR_GROUP_NEXT_ELEMENT (s))
334 235517 : il_b = get_later_stmt (il_b, s);
335 : else /* DR_IS_READ */
336 42771 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
337 33478 : s = DR_GROUP_NEXT_ELEMENT (s))
338 33478 : if (get_later_stmt (il_b, s) == il_b)
339 327 : il_b = s;
340 : }
341 : else
342 : il_b = stmtinfo_b;
343 54181 : bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
344 54181 : return (get_later_stmt (il_a, il_b) == il_a) == a_after_b;
345 : }
346 :
347 : /* A subroutine of vect_analyze_data_ref_dependence. Handle
348 : DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
349 : distances. These distances are conservatively correct but they don't
350 : reflect a guaranteed dependence.
351 :
352 : Return true if this function does all the work necessary to avoid
353 : an alias or false if the caller should use the dependence distances
354 : to limit the vectorization factor in the usual way. LOOP_DEPTH is
355 : the depth of the loop described by LOOP_VINFO and the other arguments
356 : are as for vect_analyze_data_ref_dependence. */
357 :
358 : static bool
359 8308 : vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
360 : loop_vec_info loop_vinfo,
361 : int loop_depth, unsigned int *max_vf)
362 : {
363 8308 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
364 33250 : for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr))
365 : {
366 16351 : int dist = dist_v[loop_depth];
367 16351 : if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
368 : {
369 : /* If the user asserted safelen >= DIST consecutive iterations
370 : can be executed concurrently, assume independence.
371 :
372 : ??? An alternative would be to add the alias check even
373 : in this case, and vectorize the fallback loop with the
374 : maximum VF set to safelen. However, if the user has
375 : explicitly given a length, it's less likely that that
376 : would be a win. */
377 8057 : if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
378 : {
379 32 : if ((unsigned int) loop->safelen < *max_vf)
380 2 : *max_vf = loop->safelen;
381 32 : LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
382 32 : continue;
383 : }
384 :
385 : /* For dependence distances of 2 or more, we have the option
386 : of limiting VF or checking for an alias at runtime.
387 : Prefer to check at runtime if we can, to avoid limiting
388 : the VF unnecessarily when the bases are in fact independent.
389 :
390 : Note that the alias checks will be removed if the VF ends up
391 : being small enough. */
392 8025 : dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
393 8025 : dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
394 8025 : return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
395 8025 : && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
396 16058 : && vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
397 : }
398 : }
399 : return true;
400 : }
401 :
402 :
403 : /* Function vect_analyze_data_ref_dependence.
404 :
405 : FIXME: I needed to change the sense of the returned flag.
406 :
407 : Return FALSE if there (might) exist a dependence between a memory-reference
408 : DRA and a memory-reference DRB. When versioning for alias may check a
409 : dependence at run-time, return TRUE. Adjust *MAX_VF according to
410 : the data dependence. */
411 :
412 : static opt_result
413 1486752 : vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
414 : loop_vec_info loop_vinfo,
415 : unsigned int *max_vf)
416 : {
417 1486752 : unsigned int i;
418 1486752 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
419 1486752 : struct data_reference *dra = DDR_A (ddr);
420 1486752 : struct data_reference *drb = DDR_B (ddr);
421 1486752 : dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
422 1486752 : dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
423 1486752 : stmt_vec_info stmtinfo_a = dr_info_a->stmt;
424 1486752 : stmt_vec_info stmtinfo_b = dr_info_b->stmt;
425 1486752 : lambda_vector dist_v;
426 1486752 : unsigned int loop_depth;
427 :
428 : /* If user asserted safelen consecutive iterations can be
429 : executed concurrently, assume independence. */
430 1666574 : auto apply_safelen = [&]()
431 : {
432 179822 : if (loop->safelen >= 2)
433 : {
434 7464 : if ((unsigned int) loop->safelen < *max_vf)
435 1896 : *max_vf = loop->safelen;
436 7464 : LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
437 7464 : return true;
438 : }
439 : return false;
440 1486752 : };
441 :
442 : /* In loop analysis all data references should be vectorizable. */
443 1486752 : if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
444 1486752 : || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
445 0 : gcc_unreachable ();
446 :
447 : /* Independent data accesses. */
448 1486752 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
449 1220527 : return opt_result::success ();
450 :
451 266225 : if (dra == drb
452 266225 : || (DR_IS_READ (dra) && DR_IS_READ (drb)))
453 0 : return opt_result::success ();
454 :
455 : /* We do not have to consider dependences between accesses that belong
456 : to the same group, unless the stride could be smaller than the
457 : group size. */
458 266225 : if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
459 115282 : && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
460 115282 : == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
461 284808 : && !STMT_VINFO_STRIDED_P (stmtinfo_a))
462 2281 : return opt_result::success ();
463 :
464 : /* Even if we have an anti-dependence then, as the vectorized loop covers at
465 : least two scalar iterations, there is always also a true dependence.
466 : As the vectorizer does not re-order loads and stores we can ignore
467 : the anti-dependence if TBAA can disambiguate both DRs similar to the
468 : case with known negative distance anti-dependences (positive
469 : distance anti-dependences would violate TBAA constraints). */
470 132015 : if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
471 131929 : || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
472 414504 : && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
473 : get_alias_set (DR_REF (drb))))
474 6274 : return opt_result::success ();
475 :
476 257670 : if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
477 247737 : || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
478 : {
479 12608 : if (apply_safelen ())
480 1398 : return opt_result::success ();
481 :
482 11210 : return opt_result::failure_at
483 11210 : (stmtinfo_a->stmt,
484 : "possible alias involving gather/scatter between %T and %T\n",
485 : DR_REF (dra), DR_REF (drb));
486 : }
487 :
488 : /* Unknown data dependence. */
489 245062 : if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
490 : {
491 166653 : if (apply_safelen ())
492 6066 : return opt_result::success ();
493 :
494 160587 : if (dump_enabled_p ())
495 7665 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
496 : "versioning for alias required: "
497 : "can't determine dependence between %T and %T\n",
498 : DR_REF (dra), DR_REF (drb));
499 :
500 : /* Add to list of ddrs that need to be tested at run-time. */
501 160587 : return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
502 : }
503 :
504 : /* Known data dependence. */
505 78409 : if (DDR_NUM_DIST_VECTS (ddr) == 0)
506 : {
507 561 : if (apply_safelen ())
508 0 : return opt_result::success ();
509 :
510 561 : if (dump_enabled_p ())
511 156 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
512 : "versioning for alias required: "
513 : "bad dist vector for %T and %T\n",
514 : DR_REF (dra), DR_REF (drb));
515 : /* Add to list of ddrs that need to be tested at run-time. */
516 561 : return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
517 : }
518 :
519 77848 : loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
520 :
521 77848 : if (DDR_COULD_BE_INDEPENDENT_P (ddr)
522 77848 : && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
523 : loop_depth, max_vf))
524 8300 : return opt_result::success ();
525 :
526 132693 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
527 : {
528 69570 : int dist = dist_v[loop_depth];
529 :
530 69570 : if (dump_enabled_p ())
531 4361 : dump_printf_loc (MSG_NOTE, vect_location,
532 : "dependence distance = %d.\n", dist);
533 :
534 69570 : if (dist == 0)
535 : {
536 58139 : if (dump_enabled_p ())
537 3567 : dump_printf_loc (MSG_NOTE, vect_location,
538 : "dependence distance == 0 between %T and %T\n",
539 : DR_REF (dra), DR_REF (drb));
540 :
541 : /* When we perform grouped accesses and perform implicit CSE
542 : by detecting equal accesses and doing disambiguation with
543 : runtime alias tests like for
544 : .. = a[i];
545 : .. = a[i+1];
546 : a[i] = ..;
547 : a[i+1] = ..;
548 : *p = ..;
549 : .. = a[i];
550 : .. = a[i+1];
551 : where we will end up loading { a[i], a[i+1] } once, make
552 : sure that inserting group loads before the first load and
553 : stores after the last store will do the right thing.
554 : Similar for groups like
555 : a[i] = ...;
556 : ... = a[i];
557 : a[i+1] = ...;
558 : where loads from the group interleave with the store. */
559 58139 : if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
560 0 : return opt_result::failure_at (stmtinfo_a->stmt,
561 : "READ_WRITE dependence"
562 : " in interleaving.\n");
563 :
564 58139 : if (loop->safelen < 2)
565 : {
566 54258 : tree indicator = dr_zero_step_indicator (dra);
567 54258 : if (!indicator || integer_zerop (indicator))
568 0 : return opt_result::failure_at (stmtinfo_a->stmt,
569 : "access also has a zero step\n");
570 54258 : else if (TREE_CODE (indicator) != INTEGER_CST)
571 1528 : vect_check_nonzero_value (loop_vinfo, indicator);
572 : }
573 58139 : continue;
574 58139 : }
575 :
576 11431 : if (dist > 0 && DDR_REVERSED_P (ddr))
577 : {
578 : /* If DDR_REVERSED_P the order of the data-refs in DDR was
579 : reversed (to make distance vector positive), and the actual
580 : distance is negative. */
581 3912 : if (dump_enabled_p ())
582 105 : dump_printf_loc (MSG_NOTE, vect_location,
583 : "dependence distance negative.\n");
584 : /* When doing outer loop vectorization, we need to check if there is
585 : a backward dependence at the inner loop level if the dependence
586 : at the outer loop is reversed. See PR81740. */
587 3912 : if (nested_in_vect_loop_p (loop, stmtinfo_a)
588 3900 : || nested_in_vect_loop_p (loop, stmtinfo_b))
589 : {
590 12 : unsigned inner_depth = index_in_loop_nest (loop->inner->num,
591 12 : DDR_LOOP_NEST (ddr));
592 12 : if (dist_v[inner_depth] < 0)
593 9 : return opt_result::failure_at (stmtinfo_a->stmt,
594 : "not vectorized, dependence "
595 : "between data-refs %T and %T\n",
596 : DR_REF (dra), DR_REF (drb));
597 : }
598 : /* Record a negative dependence distance to later limit the
599 : amount of stmt copying / unrolling we can perform.
600 : Only need to handle read-after-write dependence. */
601 3903 : if (DR_IS_READ (drb)
602 156 : && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
603 36 : || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
604 156 : STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
605 3903 : continue;
606 3903 : }
607 :
608 7519 : unsigned int abs_dist = abs (dist);
609 7519 : if (abs_dist >= 2 && abs_dist < *max_vf)
610 : {
611 : /* The dependence distance requires reduction of the maximal
612 : vectorization factor. */
613 558 : *max_vf = abs_dist;
614 558 : if (dump_enabled_p ())
615 30 : dump_printf_loc (MSG_NOTE, vect_location,
616 : "adjusting maximal vectorization factor to %i\n",
617 : *max_vf);
618 : }
619 :
620 7519 : if (abs_dist >= *max_vf)
621 : {
622 : /* Dependence distance does not create dependence, as far as
623 : vectorization is concerned, in this case. */
624 1103 : if (dump_enabled_p ())
625 437 : dump_printf_loc (MSG_NOTE, vect_location,
626 : "dependence distance >= VF.\n");
627 1103 : continue;
628 : }
629 :
630 6416 : return opt_result::failure_at (stmtinfo_a->stmt,
631 : "not vectorized, possible dependence "
632 : "between data-refs %T and %T\n",
633 : DR_REF (dra), DR_REF (drb));
634 : }
635 :
636 63123 : return opt_result::success ();
637 : }
638 :
639 : /* Function vect_analyze_early_break_dependences.
640 :
641 : Examine all the data references in the loop and make sure that if we have
642 : multiple exits that we are able to safely move stores such that they become
643 : safe for vectorization. The function also calculates the place where to move
644 : the instructions to and computes what the new vUSE chain should be.
645 :
646 : This works in tandem with the CFG that will be produced by
647 : slpeel_tree_duplicate_loop_to_edge_cfg later on.
648 :
649 : This function tries to validate whether an early break vectorization
650 : is possible for the current instruction sequence. Returns True i
651 : possible, otherwise False.
652 :
653 : Requirements:
654 : - Any memory access must be to a fixed size buffer.
655 : - There must not be any loads and stores to the same object.
656 : - Multiple loads are allowed as long as they don't alias.
657 :
658 : NOTE:
659 : This implementation is very conservative. Any overlapping loads/stores
660 : that take place before the early break statement gets rejected aside from
661 : WAR dependencies.
662 :
663 : i.e.:
664 :
665 : a[i] = 8
666 : c = a[i]
667 : if (b[i])
668 : ...
669 :
670 : is not allowed, but
671 :
672 : c = a[i]
673 : a[i] = 8
674 : if (b[i])
675 : ...
676 :
677 : is which is the common case. */
678 :
679 : static opt_result
680 141676 : vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
681 : {
682 141676 : DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences");
683 :
684 : /* List of all load data references found during traversal. */
685 141676 : auto_vec<data_reference *> bases;
686 141676 : basic_block dest_bb = NULL;
687 :
688 141676 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
689 141676 : class loop *loop_nest = loop_outer (loop);
690 :
691 141676 : if (dump_enabled_p ())
692 1582 : dump_printf_loc (MSG_NOTE, vect_location,
693 : "loop contains multiple exits, analyzing"
694 : " statement dependencies.\n");
695 :
696 141676 : if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
697 25518 : if (dump_enabled_p ())
698 286 : dump_printf_loc (MSG_NOTE, vect_location,
699 : "alternate exit has been chosen as main exit.\n");
700 :
701 : /* Since we don't support general control flow, the location we'll move the
702 : side-effects to is always the latch connected exit. When we support
703 : general control flow we can do better but for now this is fine. Move
704 : side-effects to the in-loop destination of the last early exit. For the
705 : PEELED case we move the side-effects to the latch block as this is
706 : guaranteed to be the last block to be executed when a vector iteration
707 : finished. */
708 141676 : if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
709 25518 : dest_bb = loop->latch;
710 : else
711 116158 : dest_bb = single_pred (loop->latch);
712 :
713 : /* We start looking from dest_bb, for the non-PEELED case we don't want to
714 : move any stores already present, but we do want to read and validate the
715 : loads. */
716 141676 : basic_block bb = dest_bb;
717 :
718 : /* We move stores across all loads to the beginning of dest_bb, so
719 : the first block processed below doesn't need dependence checking. */
720 141676 : bool check_deps = false;
721 :
722 511836 : do
723 : {
724 326756 : gimple_stmt_iterator gsi = gsi_last_bb (bb);
725 :
726 : /* Now analyze all the remaining statements and try to determine which
727 : instructions are allowed/needed to be moved. */
728 2435105 : while (!gsi_end_p (gsi))
729 : {
730 2114315 : gimple *stmt = gsi_stmt (gsi);
731 2114315 : gsi_prev (&gsi);
732 2114315 : if (is_gimple_debug (stmt))
733 1866158 : continue;
734 :
735 1112324 : stmt_vec_info orig_stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
736 1112324 : stmt_vec_info stmt_vinfo
737 1112324 : = vect_stmt_to_vectorize (orig_stmt_vinfo);
738 1112324 : auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
739 1112324 : if (!dr_ref)
740 : {
741 : /* Trapping statements after the last early exit are fine. */
742 858435 : if (check_deps)
743 : {
744 520135 : bool could_trap_p = false;
745 520135 : gimple *cur_stmt = STMT_VINFO_STMT (stmt_vinfo);
746 520135 : could_trap_p = gimple_could_trap_p (cur_stmt);
747 520135 : if (STMT_VINFO_IN_PATTERN_P (orig_stmt_vinfo))
748 : {
749 192456 : gimple_stmt_iterator gsi2;
750 192456 : auto stmt_seq
751 192456 : = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_vinfo);
752 192456 : for (gsi2 = gsi_start (stmt_seq);
753 388190 : !could_trap_p && !gsi_end_p (gsi2); gsi_next (&gsi2))
754 : {
755 195734 : cur_stmt = gsi_stmt (gsi2);
756 195734 : could_trap_p = gimple_could_trap_p (cur_stmt);
757 : }
758 : }
759 :
760 520135 : if (could_trap_p)
761 : {
762 5424 : if (dump_enabled_p ())
763 150 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
764 : "cannot vectorize as operation may trap.\n");
765 5424 : return opt_result::failure_at (cur_stmt,
766 : "can't safely apply code motion to dependencies"
767 : " to vectorize the early exit. %G may trap.\n",
768 : cur_stmt);
769 : }
770 : }
771 :
772 853011 : continue;
773 853011 : }
774 :
775 : /* We know everything below dest_bb is safe since we know we
776 : had a full vector iteration when reaching it. Either by
777 : the loop entry / IV exit test being last or because this
778 : is the loop latch itself. */
779 253889 : if (!check_deps)
780 11156 : continue;
781 :
782 : /* Check if vector accesses to the object will be within bounds.
783 : must be a constant or assume loop will be versioned or niters
784 : bounded by VF so accesses are within range. We only need to check
785 : the reads since writes are moved to a safe place where if we get
786 : there we know they are safe to perform. */
787 242733 : if (DR_IS_READ (dr_ref))
788 : {
789 226926 : dr_set_safe_speculative_read_required (stmt_vinfo, true);
790 226926 : bool inbounds = ref_within_array_bound (stmt, DR_REF (dr_ref));
791 226926 : DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_vinfo)) = inbounds;
792 :
793 226926 : if (dump_enabled_p ())
794 2457 : dump_printf_loc (MSG_NOTE, vect_location,
795 : "marking DR (read) as possibly needing peeling "
796 : "for alignment at %G", stmt);
797 : }
798 :
799 242733 : if (DR_IS_READ (dr_ref))
800 226926 : bases.safe_push (dr_ref);
801 15807 : else if (DR_IS_WRITE (dr_ref))
802 : {
803 : /* We are moving writes down in the CFG. To be sure that this
804 : is valid after vectorization we have to check all the loads
805 : we are sinking the stores past to see if any of them may
806 : alias or are the same object.
807 :
808 : Same objects will not be an issue because unless the store
809 : is marked volatile the value can be forwarded. If the
810 : store is marked volatile we don't vectorize the loop
811 : anyway.
812 :
813 : That leaves the check for aliasing. We don't really need
814 : to care about the stores aliasing with each other since the
815 : stores are moved in order so the effects are still observed
816 : correctly. This leaves the check for WAR dependencies
817 : which we would be introducing here if the DR can alias.
818 : The check is quadratic in loads/stores but I have not found
819 : a better API to do this. I believe all loads and stores
820 : must be checked. We also must check them when we
821 : encountered the store, since we don't care about loads past
822 : the store. */
823 :
824 49035 : for (auto dr_read : bases)
825 15466 : if (dr_may_alias_p (dr_ref, dr_read, loop_nest))
826 : {
827 542 : if (dump_enabled_p ())
828 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION,
829 : vect_location,
830 : "early breaks not supported: "
831 : "overlapping loads and stores "
832 : "found before the break "
833 : "statement.\n");
834 :
835 542 : return opt_result::failure_at (stmt,
836 : "can't safely apply code motion to dependencies"
837 : " to vectorize the early exit. %G may alias with"
838 : " %G\n", stmt, dr_read->stmt);
839 : }
840 : }
841 :
842 484382 : if (gimple_vdef (stmt))
843 : {
844 15265 : if (dump_enabled_p ())
845 282 : dump_printf_loc (MSG_NOTE, vect_location,
846 : "==> recording stmt %G", stmt);
847 :
848 15265 : LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (stmt);
849 : }
850 696043 : else if (gimple_vuse (stmt))
851 : {
852 226926 : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_insert (0, stmt);
853 226926 : if (dump_enabled_p ())
854 2457 : dump_printf_loc (MSG_NOTE, vect_location,
855 : "marked statement for vUSE update: %G", stmt);
856 : }
857 : }
858 :
859 320790 : if (!single_pred_p (bb))
860 : {
861 135710 : gcc_assert (bb == loop->header);
862 135710 : break;
863 : }
864 :
865 : /* If we possibly sink through a virtual PHI make sure to elide that. */
866 185080 : if (gphi *vphi = get_virtual_phi (bb))
867 107 : LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (vphi);
868 :
869 : /* All earlier blocks need dependence checking. */
870 185080 : check_deps = true;
871 185080 : bb = single_pred (bb);
872 185080 : }
873 : while (1);
874 :
875 : /* We don't allow outer -> inner loop transitions which should have been
876 : trapped already during loop form analysis. */
877 135710 : gcc_assert (dest_bb->loop_father == loop);
878 :
879 : /* Check that the destination block we picked has only one pred. To relax this we
880 : have to take special care when moving the statements. We don't currently support
881 : such control flow however this check is there to simplify how we handle
882 : labels that may be present anywhere in the IL. This check is to ensure that the
883 : labels aren't significant for the CFG. */
884 135710 : if (!single_pred (dest_bb))
885 0 : return opt_result::failure_at (vect_location,
886 : "chosen loop exit block (BB %d) does not have a "
887 : "single predecessor which is currently not "
888 : "supported for early break vectorization.\n",
889 : dest_bb->index);
890 :
891 135710 : LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
892 : /* Check if loop has a side-effect (stores), force scalar epilogue. */
893 612980 : for (auto dr : LOOP_VINFO_DATAREFS (loop_vinfo))
894 232420 : if (DR_IS_WRITE (dr))
895 : {
896 13040 : LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true;
897 13040 : break;
898 : }
899 :
900 135710 : if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
901 : {
902 : /* All uses shall be updated to that of the first load. Entries are
903 : stored in reverse order. */
904 125186 : tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).last ());
905 350869 : for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
906 : {
907 225683 : if (dump_enabled_p ())
908 2394 : dump_printf_loc (MSG_NOTE, vect_location,
909 : "will update use: %T, mem_ref: %G", vuse, g);
910 : }
911 : }
912 :
913 135710 : if (dump_enabled_p ())
914 1428 : dump_printf_loc (MSG_NOTE, vect_location,
915 : "recorded statements to be moved to BB %d\n",
916 1428 : LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
917 :
918 135710 : return opt_result::success ();
919 141676 : }
920 :
921 : /* Function vect_analyze_data_ref_dependences.
922 :
923 : Examine all the data references in the loop, and make sure there do not
924 : exist any data dependences between them. Set *MAX_VF according to
925 : the maximum vectorization factor the data dependences allow. */
926 :
927 : opt_result
928 387406 : vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
929 : unsigned int *max_vf)
930 : {
931 387406 : unsigned int i;
932 387406 : struct data_dependence_relation *ddr;
933 :
934 387406 : DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
935 :
936 387406 : if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
937 : {
938 161121 : LOOP_VINFO_DDRS (loop_vinfo)
939 161121 : .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
940 161121 : * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
941 : /* We do not need read-read dependences. */
942 322242 : bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
943 : &LOOP_VINFO_DDRS (loop_vinfo),
944 161121 : LOOP_VINFO_LOOP_NEST (loop_vinfo),
945 : false);
946 161121 : gcc_assert (res);
947 : }
948 :
949 387406 : LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
950 :
951 : /* For epilogues we either have no aliases or alias versioning
952 : was applied to original loop. Therefore we may just get max_vf
953 : using VF of original loop. */
954 387406 : if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
955 12570 : *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
956 : else
957 1843764 : FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
958 : {
959 1486752 : opt_result res
960 1486752 : = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
961 1486752 : if (!res)
962 17824 : return res;
963 : }
964 :
965 : /* If we have early break statements in the loop, check to see if they
966 : are of a form we can vectorizer. */
967 369582 : if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
968 141676 : return vect_analyze_early_break_dependences (loop_vinfo);
969 :
970 227906 : return opt_result::success ();
971 : }
972 :
973 :
974 : /* Function vect_slp_analyze_data_ref_dependence.
975 :
976 : Classify the dependence between the memory-references DRA and DRB of DDR
977 : for VINFO using the classical (affine) data-dependence test. Return
978 : chrec_known if they are provably independent, chrec_dont_know if the test
979 : cannot analyze them (in which case the caller can still try to disambiguate
980 : them with the alias oracle), and the dependence (NULL_TREE) otherwise. */
981 :
982 : static tree
983 6887601 : vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
984 : struct data_dependence_relation *ddr)
985 : {
986 6887601 : struct data_reference *dra = DDR_A (ddr);
987 6887601 : struct data_reference *drb = DDR_B (ddr);
988 6887601 : dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
989 6887601 : dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
990 :
991 : /* We need to check dependences of statements marked as unvectorizable
992 : as well, they still can prohibit vectorization. */
993 :
994 : /* Independent data accesses. */
995 6887601 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
996 : return chrec_known;
997 :
998 1104601 : if (dra == drb)
999 : return chrec_known;
1000 :
1001 : /* Read-read is OK. */
1002 8832 : if (DR_IS_READ (dra) && DR_IS_READ (drb))
1003 : return chrec_known;
1004 :
1005 : /* If dra and drb are part of the same interleaving chain consider
1006 : them independent. */
1007 8832 : if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
1008 8832 : && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
1009 8832 : == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
1010 : return chrec_known;
1011 :
1012 : /* Unknown data dependence. */
1013 8832 : if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1014 : {
1015 8832 : if (dump_enabled_p ())
1016 60 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1017 : "can't determine dependence between %T and %T\n",
1018 : DR_REF (dra), DR_REF (drb));
1019 : }
1020 0 : else if (dump_enabled_p ())
1021 0 : dump_printf_loc (MSG_NOTE, vect_location,
1022 : "determined dependence between %T and %T\n",
1023 : DR_REF (dra), DR_REF (drb));
1024 :
1025 8832 : return DDR_ARE_DEPENDENT (ddr);
1026 : }
1027 :
1028 :
1029 : /* Analyze dependences involved in the transform of a store SLP NODE. */
1030 :
1031 : static bool
1032 660867 : vect_slp_analyze_store_dependences (vec_info *vinfo, slp_tree node)
1033 : {
1034 : /* This walks over all stmts involved in the SLP store done
1035 : in NODE verifying we can sink them up to the last stmt in the
1036 : group. */
1037 660867 : stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
1038 660867 : gcc_assert (DR_IS_WRITE (STMT_VINFO_DATA_REF (last_access_info)));
1039 :
1040 2401446 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
1041 : {
1042 1749021 : stmt_vec_info access_info
1043 1749021 : = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
1044 1749021 : if (access_info == last_access_info)
1045 653252 : continue;
1046 1095769 : data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
1047 1095769 : ao_ref ref;
1048 1095769 : bool ref_initialized_p = false;
1049 1095769 : for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
1050 10573169 : gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
1051 : {
1052 9485842 : gimple *stmt = gsi_stmt (gsi);
1053 16836345 : if (! gimple_vuse (stmt))
1054 2597915 : continue;
1055 :
1056 : /* If we couldn't record a (single) data reference for this stmt,
1057 : or the classical dependence test cannot analyze it, we have to
1058 : resort to the alias oracle. */
1059 6887927 : stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
1060 6887927 : data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
1061 6887927 : if (dr_b)
1062 : {
1063 6887387 : gcc_assert (!gimple_visited_p (stmt));
1064 :
1065 6887387 : ddr_p ddr = initialize_data_dependence_relation (dr_a,
1066 6887387 : dr_b, vNULL);
1067 6887387 : tree dep = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1068 6887387 : free_dependence_relation (ddr);
1069 6887387 : if (dep == chrec_known)
1070 6878591 : continue;
1071 8796 : if (dep != chrec_dont_know)
1072 8442 : return false;
1073 : /* Unknown dependence - fall through to the alias oracle. */
1074 : }
1075 :
1076 : /* We are moving a store - this means we cannot use TBAA for
1077 : disambiguation. */
1078 9336 : if (!ref_initialized_p)
1079 : {
1080 9075 : ao_ref_init (&ref, DR_REF (dr_a));
1081 9075 : ref_initialized_p = true;
1082 : }
1083 9336 : if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
1084 9336 : || ref_maybe_used_by_stmt_p (stmt, &ref, false))
1085 8442 : return false;
1086 : }
1087 : }
1088 : return true;
1089 : }
1090 :
1091 : /* Analyze dependences involved in the transform of a load SLP NODE. STORES
1092 : contain the vector of scalar stores of this instance if we are
1093 : disambiguating the loads. */
1094 :
1095 : static bool
1096 152158 : vect_slp_analyze_load_dependences (vec_info *vinfo, slp_tree node,
1097 : vec<stmt_vec_info> stores,
1098 : stmt_vec_info last_store_info)
1099 : {
1100 : /* This walks over all stmts involved in the SLP load done
1101 : in NODE verifying we can hoist them up to the first stmt in the
1102 : group. */
1103 152158 : stmt_vec_info first_access_info = vect_find_first_scalar_stmt_in_slp (node);
1104 152158 : gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (first_access_info)));
1105 :
1106 536010 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
1107 : {
1108 383888 : if (! SLP_TREE_SCALAR_STMTS (node)[k])
1109 158585 : continue;
1110 383888 : stmt_vec_info access_info
1111 383888 : = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
1112 383888 : if (access_info == first_access_info)
1113 158585 : continue;
1114 225303 : data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
1115 225303 : ao_ref ref;
1116 225303 : bool ref_initialized_p = false;
1117 225303 : hash_set<stmt_vec_info> grp_visited;
1118 225303 : for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
1119 4348181 : gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi))
1120 : {
1121 2061475 : gimple *stmt = gsi_stmt (gsi);
1122 3358625 : if (! gimple_vdef (stmt))
1123 2005767 : continue;
1124 :
1125 280497 : stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
1126 :
1127 : /* If we run into a store of this same instance (we've just
1128 : marked those) then delay dependence checking until we run
1129 : into the last store because this is where it will have
1130 : been sunk to (and we verified that we can do that already). */
1131 280497 : if (gimple_visited_p (stmt))
1132 : {
1133 224789 : if (stmt_info != last_store_info)
1134 224787 : continue;
1135 :
1136 10 : for (stmt_vec_info &store_info : stores)
1137 : {
1138 4 : data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
1139 4 : ddr_p ddr = initialize_data_dependence_relation
1140 4 : (dr_a, store_dr, vNULL);
1141 4 : tree dep
1142 4 : = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1143 4 : free_dependence_relation (ddr);
1144 4 : if (dep == chrec_known)
1145 4 : continue;
1146 0 : if (dep != chrec_dont_know)
1147 36 : return false;
1148 : /* The classical dependence test cannot analyze this;
1149 : resort to the alias oracle. We are hoisting a load
1150 : so TBAA may be used for disambiguation. */
1151 0 : if (!ref_initialized_p)
1152 : {
1153 0 : ao_ref_init (&ref, DR_REF (dr_a));
1154 0 : ref_initialized_p = true;
1155 : }
1156 0 : if (stmt_may_clobber_ref_p_1 (store_info->stmt, &ref, true))
1157 : return false;
1158 : }
1159 2 : continue;
1160 2 : }
1161 :
1162 114327 : auto check_hoist = [&] (stmt_vec_info stmt_info) -> bool
1163 : {
1164 : /* We are hoisting a load - this means we can use TBAA for
1165 : disambiguation. */
1166 58619 : if (!ref_initialized_p)
1167 : {
1168 10972 : ao_ref_init (&ref, DR_REF (dr_a));
1169 10972 : ref_initialized_p = true;
1170 : }
1171 58619 : if (stmt_may_clobber_ref_p_1 (stmt_info->stmt, &ref, true))
1172 : {
1173 : /* If we couldn't record a (single) data reference for this
1174 : stmt we have to give up now. */
1175 210 : data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
1176 210 : if (!dr_b)
1177 : return false;
1178 210 : ddr_p ddr = initialize_data_dependence_relation (dr_a,
1179 210 : dr_b, vNULL);
1180 210 : tree dep
1181 210 : = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1182 210 : free_dependence_relation (ddr);
1183 : /* The alias oracle above could not rule out a conflict;
1184 : only a proven-independent (chrec_known) result lets us
1185 : hoist the load past this store. */
1186 210 : if (dep != chrec_known)
1187 : return false;
1188 : }
1189 : /* No dependence. */
1190 : return true;
1191 55708 : };
1192 55708 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1193 : {
1194 : /* When we run into a store group we have to honor
1195 : that earlier stores might be moved here. We don't
1196 : know exactly which and where to since we lack a
1197 : back-mapping from DR to SLP node, so assume all
1198 : earlier stores are sunk here. It's enough to
1199 : consider the last stmt of a group for this.
1200 : ??? Both this and the fact that we disregard that
1201 : the conflicting instance might be removed later
1202 : is overly conservative. */
1203 55246 : if (!grp_visited.add (DR_GROUP_FIRST_ELEMENT (stmt_info)))
1204 10772 : for (auto store_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1205 129309 : store_info != NULL;
1206 118537 : store_info = DR_GROUP_NEXT_ELEMENT (store_info))
1207 118573 : if ((store_info == stmt_info
1208 107810 : || get_later_stmt (store_info, stmt_info) == stmt_info)
1209 165967 : && !check_hoist (store_info))
1210 : return false;
1211 : }
1212 : else
1213 : {
1214 462 : if (!check_hoist (stmt_info))
1215 : return false;
1216 : }
1217 : }
1218 225303 : }
1219 : return true;
1220 : }
1221 :
1222 :
1223 : /* Function vect_analyze_data_ref_dependences.
1224 :
1225 : Examine all the data references in the basic-block, and make sure there
1226 : do not exist any data dependences between them. Set *MAX_VF according to
1227 : the maximum vectorization factor the data dependences allow. */
1228 :
1229 : bool
1230 788684 : vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
1231 : {
1232 788684 : DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
1233 :
1234 : /* The stores of this instance are at the root of the SLP tree. */
1235 788684 : slp_tree store = NULL;
1236 788684 : if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store)
1237 660867 : store = SLP_INSTANCE_TREE (instance);
1238 :
1239 : /* Verify we can sink stores to the vectorized stmt insert location. */
1240 660867 : stmt_vec_info last_store_info = NULL;
1241 660867 : if (store)
1242 : {
1243 660867 : if (! vect_slp_analyze_store_dependences (vinfo, store))
1244 : return false;
1245 :
1246 : /* Mark stores in this instance and remember the last one. */
1247 652425 : last_store_info = vect_find_last_scalar_stmt_in_slp (store);
1248 2392147 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
1249 1739722 : gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true);
1250 : }
1251 :
1252 780242 : bool res = true;
1253 :
1254 : /* Verify we can sink loads to the vectorized stmt insert location,
1255 : special-casing stores of this instance. */
1256 1175798 : for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
1257 152158 : if (! vect_slp_analyze_load_dependences (vinfo, load,
1258 : store
1259 : ? SLP_TREE_SCALAR_STMTS (store)
1260 : : vNULL, last_store_info))
1261 : {
1262 : res = false;
1263 : break;
1264 : }
1265 :
1266 : /* Unset the visited flag. */
1267 780242 : if (store)
1268 2392147 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
1269 1739722 : gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
1270 :
1271 : /* If this is a SLP instance with a store check if there's a dependent
1272 : load that cannot be forwarded from a previous iteration of a loop
1273 : both are in. This is to avoid situations like that in PR115777. */
1274 780242 : if (res && store)
1275 : {
1276 652401 : stmt_vec_info store_info
1277 652401 : = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (store)[0]);
1278 652401 : class loop *store_loop = gimple_bb (store_info->stmt)->loop_father;
1279 652401 : if (! loop_outer (store_loop))
1280 557164 : return res;
1281 95237 : vec<loop_p> loop_nest;
1282 95237 : loop_nest.create (1);
1283 95237 : loop_nest.quick_push (store_loop);
1284 95237 : data_reference *drs = nullptr;
1285 177730 : for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
1286 : {
1287 36627 : if (! STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (load)[0]))
1288 0 : continue;
1289 36627 : stmt_vec_info load_info
1290 36627 : = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (load)[0]);
1291 36627 : if (gimple_bb (load_info->stmt)->loop_father != store_loop)
1292 5073 : continue;
1293 :
1294 : /* For now concern ourselves with write-after-read as we also
1295 : only look for re-use of the store within the same SLP instance.
1296 : We can still get a RAW here when the instance contains a PHI
1297 : with a backedge though, thus this test. */
1298 31554 : if (! vect_stmt_dominates_stmt_p (STMT_VINFO_STMT (load_info),
1299 : STMT_VINFO_STMT (store_info)))
1300 11681 : continue;
1301 :
1302 19873 : if (! drs)
1303 : {
1304 19014 : drs = create_data_ref (loop_preheader_edge (store_loop),
1305 : store_loop,
1306 19014 : DR_REF (STMT_VINFO_DATA_REF (store_info)),
1307 : store_info->stmt, false, false);
1308 19014 : if (! DR_BASE_ADDRESS (drs)
1309 16087 : || TREE_CODE (DR_STEP (drs)) != INTEGER_CST)
1310 : break;
1311 : }
1312 16669 : data_reference *drl
1313 16669 : = create_data_ref (loop_preheader_edge (store_loop),
1314 : store_loop,
1315 16669 : DR_REF (STMT_VINFO_DATA_REF (load_info)),
1316 : load_info->stmt, true, false);
1317 :
1318 : /* See whether the DRs have a known constant distance throughout
1319 : the containing loop iteration. */
1320 31651 : if (! DR_BASE_ADDRESS (drl)
1321 14560 : || ! operand_equal_p (DR_STEP (drs), DR_STEP (drl))
1322 8577 : || ! operand_equal_p (DR_BASE_ADDRESS (drs),
1323 8577 : DR_BASE_ADDRESS (drl))
1324 18366 : || ! operand_equal_p (DR_OFFSET (drs), DR_OFFSET (drl)))
1325 : {
1326 14982 : free_data_ref (drl);
1327 14982 : continue;
1328 : }
1329 :
1330 : /* If the next iteration load overlaps with a non-power-of-two offset
1331 : we are surely failing any STLF attempt. */
1332 1687 : HOST_WIDE_INT step = TREE_INT_CST_LOW (DR_STEP (drl));
1333 1687 : unsigned HOST_WIDE_INT sizes
1334 1687 : = (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drs))))
1335 1687 : * DR_GROUP_SIZE (store_info));
1336 1687 : unsigned HOST_WIDE_INT sizel
1337 1687 : = (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drl))))
1338 1687 : * DR_GROUP_SIZE (load_info));
1339 1687 : if (ranges_overlap_p (TREE_INT_CST_LOW (DR_INIT (drl)) + step, sizel,
1340 1687 : TREE_INT_CST_LOW (DR_INIT (drs)), sizes))
1341 : {
1342 835 : unsigned HOST_WIDE_INT dist
1343 835 : = absu_hwi (TREE_INT_CST_LOW (DR_INIT (drl)) + step
1344 835 : - TREE_INT_CST_LOW (DR_INIT (drs)));
1345 835 : poly_uint64 loadsz = tree_to_poly_uint64
1346 835 : (TYPE_SIZE_UNIT (SLP_TREE_VECTYPE (load)));
1347 835 : poly_uint64 storesz = tree_to_poly_uint64
1348 835 : (TYPE_SIZE_UNIT (SLP_TREE_VECTYPE (store)));
1349 : /* When the overlap aligns with vector sizes used for the loads
1350 : and the vector stores are larger or equal to the loads
1351 : forwarding should work. */
1352 1670 : if (maybe_gt (loadsz, storesz) || ! multiple_p (dist, loadsz))
1353 70 : load->avoid_stlf_fail = true;
1354 : }
1355 1687 : free_data_ref (drl);
1356 : }
1357 95237 : if (drs)
1358 19014 : free_data_ref (drs);
1359 95237 : loop_nest.release ();
1360 : }
1361 :
1362 : return res;
1363 : }
1364 :
1365 : /* Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET
1366 : applied. */
1367 :
1368 : int
1369 6619782 : dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
1370 : {
1371 6619782 : HOST_WIDE_INT diff = 0;
1372 : /* Alignment is only analyzed for the first element of a DR group,
1373 : use that but adjust misalignment by the offset of the access. */
1374 6619782 : if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
1375 : {
1376 2295826 : dr_vec_info *first_dr
1377 2295826 : = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
1378 : /* vect_analyze_data_ref_accesses guarantees that DR_INIT are
1379 : INTEGER_CSTs and the first element in the group has the lowest
1380 : address. */
1381 2295826 : diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
1382 2295826 : - TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
1383 2295826 : gcc_assert (diff >= 0);
1384 : dr_info = first_dr;
1385 : }
1386 :
1387 6619782 : int misalign = dr_info->misalignment;
1388 6619782 : gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
1389 6619782 : if (misalign == DR_MISALIGNMENT_UNKNOWN)
1390 : return misalign;
1391 :
1392 : /* If the access is only aligned for a vector type with smaller alignment
1393 : requirement the access has unknown misalignment. */
1394 4025611 : if (maybe_lt (dr_info->target_alignment * BITS_PER_UNIT,
1395 4025611 : targetm.vectorize.preferred_vector_alignment (vectype)))
1396 : return DR_MISALIGNMENT_UNKNOWN;
1397 :
1398 : /* Apply the offset from the DR group start and the externally supplied
1399 : offset which can for example result from a negative stride access. */
1400 4025608 : poly_int64 misalignment = misalign + diff + offset;
1401 :
1402 : /* Below we reject compile-time non-constant target alignments, but if
1403 : our misalignment is zero, then we are known to already be aligned
1404 : w.r.t. any such possible target alignment. */
1405 4025608 : if (known_eq (misalignment, 0))
1406 : return 0;
1407 :
1408 632133 : unsigned HOST_WIDE_INT target_alignment_c;
1409 632133 : if (!dr_info->target_alignment.is_constant (&target_alignment_c)
1410 632133 : || !known_misalignment (misalignment, target_alignment_c, &misalign))
1411 : return DR_MISALIGNMENT_UNKNOWN;
1412 632133 : return misalign;
1413 : }
1414 :
1415 : /* Record the base alignment guarantee given by DRB, which occurs
1416 : in STMT_INFO. */
1417 :
1418 : static void
1419 4612346 : vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info,
1420 : innermost_loop_behavior *drb)
1421 : {
1422 4612346 : bool existed;
1423 4612346 : std::pair<stmt_vec_info, innermost_loop_behavior *> &entry
1424 4612346 : = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
1425 4612346 : if (!existed || entry.second->base_alignment < drb->base_alignment)
1426 : {
1427 1429394 : entry = std::make_pair (stmt_info, drb);
1428 1429394 : if (dump_enabled_p ())
1429 32783 : dump_printf_loc (MSG_NOTE, vect_location,
1430 : "recording new base alignment for %T\n"
1431 : " alignment: %d\n"
1432 : " misalignment: %d\n"
1433 : " based on: %G",
1434 : drb->base_address,
1435 : drb->base_alignment,
1436 : drb->base_misalignment,
1437 : stmt_info->stmt);
1438 : }
1439 4612346 : }
1440 :
1441 : /* If the region we're going to vectorize is reached, all unconditional
1442 : data references occur at least once. We can therefore pool the base
1443 : alignment guarantees from each unconditional reference. Do this by
1444 : going through all the data references in VINFO and checking whether
1445 : the containing statement makes the reference unconditionally. If so,
1446 : record the alignment of the base address in VINFO so that it can be
1447 : used for all other references with the same base. */
1448 :
1449 : void
1450 1025559 : vect_record_base_alignments (vec_info *vinfo)
1451 : {
1452 1025559 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1453 412326 : class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1454 15017152 : for (data_reference *dr : vinfo->shared->datarefs)
1455 : {
1456 12038899 : dr_vec_info *dr_info = vinfo->lookup_dr (dr);
1457 12038899 : stmt_vec_info stmt_info = dr_info->stmt;
1458 12038899 : if (!DR_IS_CONDITIONAL_IN_STMT (dr)
1459 12028853 : && STMT_VINFO_VECTORIZABLE (stmt_info)
1460 4629315 : && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1461 : {
1462 4610809 : vect_record_base_alignment (vinfo, stmt_info, &DR_INNERMOST (dr));
1463 :
1464 : /* If DR is nested in the loop that is being vectorized, we can also
1465 : record the alignment of the base wrt the outer loop. */
1466 12968559 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
1467 1537 : vect_record_base_alignment
1468 1537 : (vinfo, stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
1469 : }
1470 : }
1471 1025559 : }
1472 :
1473 : /* Function vect_compute_data_ref_alignment
1474 :
1475 : Compute the misalignment of the data reference DR_INFO when vectorizing
1476 : with VECTYPE.
1477 :
1478 : Output:
1479 : 1. initialized misalignment info for DR_INFO
1480 :
1481 : FOR NOW: No analysis is actually performed. Misalignment is calculated
1482 : only for trivial cases. TODO. */
1483 :
1484 : static void
1485 1604785 : vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
1486 : tree vectype)
1487 : {
1488 1604785 : stmt_vec_info stmt_info = dr_info->stmt;
1489 1604785 : vec_base_alignments *base_alignments = &vinfo->base_alignments;
1490 1604785 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1491 1604785 : class loop *loop = NULL;
1492 1604785 : tree ref = DR_REF (dr_info->dr);
1493 :
1494 1604785 : if (dump_enabled_p ())
1495 51682 : dump_printf_loc (MSG_NOTE, vect_location,
1496 : "vect_compute_data_ref_alignment:\n");
1497 :
1498 1604785 : if (loop_vinfo)
1499 827006 : loop = LOOP_VINFO_LOOP (loop_vinfo);
1500 :
1501 : /* Initialize misalignment to unknown. */
1502 1604785 : SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1503 :
1504 1604785 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1505 : return;
1506 :
1507 1584559 : innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
1508 1584559 : bool step_preserves_misalignment_p;
1509 :
1510 1584559 : poly_uint64 vector_alignment
1511 1584559 : = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
1512 : BITS_PER_UNIT);
1513 :
1514 1584559 : if (loop_vinfo
1515 1584559 : && dr_safe_speculative_read_required (stmt_info))
1516 : {
1517 : /* The required target alignment must be a power-of-2 value and is
1518 : computed as the product of vector element size, VF and group size.
1519 : We compute the constant part first as VF may be a variable. For
1520 : variable VF, the power-of-2 check of VF is deferred to runtime. */
1521 307444 : auto align_factor_c
1522 307444 : = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1523 307444 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1524 89978 : align_factor_c *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
1525 :
1526 307444 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1527 307444 : poly_uint64 new_alignment = vf * align_factor_c;
1528 :
1529 614888 : if ((vf.is_constant () && pow2p_hwi (new_alignment.to_constant ()))
1530 : || (!vf.is_constant () && pow2p_hwi (align_factor_c)))
1531 : {
1532 244215 : if (dump_enabled_p ())
1533 : {
1534 2960 : dump_printf_loc (MSG_NOTE, vect_location,
1535 : "alignment increased due to early break to ");
1536 2960 : dump_dec (MSG_NOTE, new_alignment);
1537 2960 : dump_printf (MSG_NOTE, " bytes.\n");
1538 : }
1539 244215 : vector_alignment = new_alignment;
1540 : }
1541 : }
1542 :
1543 1584559 : SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
1544 :
1545 : /* If the main loop has peeled for alignment we have no way of knowing
1546 : whether the data accesses in the epilogues are aligned. We can't at
1547 : compile time answer the question whether we have entered the main loop or
1548 : not. Fixes PR 92351. */
1549 1584559 : if (loop_vinfo)
1550 : {
1551 806780 : loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
1552 806780 : if (orig_loop_vinfo
1553 32513 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
1554 : return;
1555 : }
1556 :
1557 1584342 : unsigned HOST_WIDE_INT vect_align_c;
1558 1584342 : if (!vector_alignment.is_constant (&vect_align_c))
1559 : return;
1560 :
1561 : /* No step for BB vectorization. */
1562 1584342 : if (!loop)
1563 : {
1564 777779 : gcc_assert (integer_zerop (drb->step));
1565 : step_preserves_misalignment_p = true;
1566 : }
1567 :
1568 : else
1569 : {
1570 : /* We can only use base and misalignment information relative to
1571 : an innermost loop if the misalignment stays the same throughout the
1572 : execution of the loop. As above, this is the case if the stride of
1573 : the dataref evenly divides by the alignment. Make sure to check
1574 : previous epilogues and the main loop. */
1575 : step_preserves_misalignment_p = true;
1576 : auto lvinfo = loop_vinfo;
1577 1646148 : while (lvinfo)
1578 : {
1579 839585 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (lvinfo);
1580 839585 : step_preserves_misalignment_p
1581 839585 : &= multiple_p (drb->step_alignment * vf, vect_align_c);
1582 839585 : lvinfo = LOOP_VINFO_ORIG_LOOP_INFO (lvinfo);
1583 : }
1584 :
1585 806563 : if (!step_preserves_misalignment_p && dump_enabled_p ())
1586 322 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1587 : "step doesn't divide the vector alignment.\n");
1588 :
1589 : /* In case the dataref is in an inner-loop of the loop that is being
1590 : vectorized (LOOP), we use the base and misalignment information
1591 : relative to the outer-loop (LOOP). This is ok only if the
1592 : misalignment stays the same throughout the execution of the
1593 : inner-loop, which is why we have to check that the stride of the
1594 : dataref in the inner-loop evenly divides by the vector alignment. */
1595 806563 : if (step_preserves_misalignment_p
1596 806563 : && nested_in_vect_loop_p (loop, stmt_info))
1597 : {
1598 1536 : step_preserves_misalignment_p
1599 1536 : = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
1600 :
1601 1536 : if (dump_enabled_p ())
1602 : {
1603 496 : if (step_preserves_misalignment_p)
1604 358 : dump_printf_loc (MSG_NOTE, vect_location,
1605 : "inner step divides the vector alignment.\n");
1606 : else
1607 138 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1608 : "inner step doesn't divide the vector"
1609 : " alignment.\n");
1610 : }
1611 : }
1612 : }
1613 :
1614 1584342 : unsigned int base_alignment = drb->base_alignment;
1615 1584342 : unsigned int base_misalignment = drb->base_misalignment;
1616 :
1617 : /* Calculate the maximum of the pooled base address alignment and the
1618 : alignment that we can compute for DR itself. */
1619 1584342 : std::pair<stmt_vec_info, innermost_loop_behavior *> *entry
1620 1584342 : = base_alignments->get (drb->base_address);
1621 1584342 : if (entry
1622 1579530 : && base_alignment < (*entry).second->base_alignment
1623 1587649 : && (loop_vinfo
1624 2397 : || (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt_info->stmt),
1625 2397 : gimple_bb (entry->first->stmt))
1626 2289 : && (gimple_bb (stmt_info->stmt) != gimple_bb (entry->first->stmt)
1627 2053 : || (entry->first->dr_aux.group <= dr_info->group)))))
1628 : {
1629 3182 : base_alignment = entry->second->base_alignment;
1630 3182 : base_misalignment = entry->second->base_misalignment;
1631 : }
1632 :
1633 1584342 : if (drb->offset_alignment < vect_align_c
1634 1516504 : || !step_preserves_misalignment_p
1635 : /* We need to know whether the step wrt the vectorized loop is
1636 : negative when computing the starting misalignment below. */
1637 1508123 : || TREE_CODE (drb->step) != INTEGER_CST)
1638 : {
1639 104067 : if (dump_enabled_p ())
1640 3715 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1641 : "Unknown alignment for access: %T\n", ref);
1642 104067 : return;
1643 : }
1644 :
1645 1480275 : if (base_alignment < vect_align_c)
1646 : {
1647 732320 : unsigned int max_alignment;
1648 732320 : tree base = get_base_for_alignment (drb->base_address, &max_alignment);
1649 732320 : if (max_alignment < vect_align_c
1650 729946 : || (loop_vinfo && LOOP_VINFO_EPILOGUE_P (loop_vinfo))
1651 1441626 : || !vect_can_force_dr_alignment_p (base,
1652 709306 : vect_align_c * BITS_PER_UNIT))
1653 : {
1654 535274 : if (dump_enabled_p ())
1655 14334 : dump_printf_loc (MSG_NOTE, vect_location,
1656 : "can't force alignment of ref: %T\n", ref);
1657 535274 : return;
1658 : }
1659 :
1660 : /* Force the alignment of the decl.
1661 : NOTE: This is the only change to the code we make during
1662 : the analysis phase, before deciding to vectorize the loop. */
1663 197046 : if (dump_enabled_p ())
1664 7944 : dump_printf_loc (MSG_NOTE, vect_location,
1665 : "force alignment of %T\n", ref);
1666 :
1667 197046 : dr_info->base_decl = base;
1668 197046 : dr_info->base_misaligned = true;
1669 197046 : base_misalignment = 0;
1670 : }
1671 945001 : poly_int64 misalignment
1672 945001 : = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
1673 :
1674 945001 : unsigned int const_misalignment;
1675 945001 : if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
1676 : {
1677 : if (dump_enabled_p ())
1678 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1679 : "Non-constant misalignment for access: %T\n", ref);
1680 : return;
1681 : }
1682 :
1683 945001 : SET_DR_MISALIGNMENT (dr_info, const_misalignment);
1684 :
1685 945001 : if (dump_enabled_p ())
1686 32308 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1687 : "misalign = %d bytes of ref %T\n",
1688 : const_misalignment, ref);
1689 :
1690 : return;
1691 : }
1692 :
1693 : /* Return whether DR_INFO, which is related to DR_PEEL_INFO in
1694 : that it only differs in DR_INIT, is aligned if DR_PEEL_INFO
1695 : is made aligned via peeling. */
1696 :
1697 : static bool
1698 1986758 : vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info,
1699 : dr_vec_info *dr_peel_info)
1700 : {
1701 1986758 : if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info),
1702 1987526 : DR_TARGET_ALIGNMENT (dr_info)))
1703 : {
1704 1985990 : poly_offset_int diff
1705 1985990 : = (wi::to_poly_offset (DR_INIT (dr_peel_info->dr))
1706 1985990 : - wi::to_poly_offset (DR_INIT (dr_info->dr)));
1707 1985990 : if (known_eq (diff, 0)
1708 1985990 : || multiple_p (diff, DR_TARGET_ALIGNMENT (dr_info)))
1709 747227 : return true;
1710 : }
1711 : return false;
1712 : }
1713 :
1714 : /* Return whether DR_INFO is aligned if DR_PEEL_INFO is made
1715 : aligned via peeling. */
1716 :
1717 : static bool
1718 198518 : vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
1719 : dr_vec_info *dr_peel_info)
1720 : {
1721 198518 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr),
1722 198518 : DR_BASE_ADDRESS (dr_peel_info->dr), 0)
1723 49000 : || !operand_equal_p (DR_OFFSET (dr_info->dr),
1724 49000 : DR_OFFSET (dr_peel_info->dr), 0)
1725 246610 : || !operand_equal_p (DR_STEP (dr_info->dr),
1726 48092 : DR_STEP (dr_peel_info->dr), 0))
1727 150824 : return false;
1728 :
1729 47694 : return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
1730 : }
1731 :
1732 : /* Compute the value for dr_info->misalign so that the access appears
1733 : aligned. This is used by peeling to compensate for dr_misalignment
1734 : applying the offset for negative step. */
1735 :
1736 : int
1737 21899 : vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
1738 : {
1739 21899 : if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
1740 : return 0;
1741 :
1742 201 : tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1743 201 : poly_int64 misalignment
1744 201 : = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
1745 201 : * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1746 :
1747 201 : unsigned HOST_WIDE_INT target_alignment_c;
1748 201 : int misalign;
1749 201 : if (!dr_info->target_alignment.is_constant (&target_alignment_c)
1750 201 : || !known_misalignment (misalignment, target_alignment_c, &misalign))
1751 : return DR_MISALIGNMENT_UNKNOWN;
1752 201 : return misalign;
1753 : }
1754 :
1755 : /* Function vect_update_misalignment_for_peel.
1756 : Sets DR_INFO's misalignment
1757 : - to 0 if it has the same alignment as DR_PEEL_INFO,
1758 : - to the misalignment computed using NPEEL if DR_INFO's salignment is known,
1759 : - to -1 (unknown) otherwise.
1760 :
1761 : DR_INFO - the data reference whose misalignment is to be adjusted.
1762 : DR_PEEL_INFO - the data reference whose misalignment is being made
1763 : zero in the vector loop by the peel.
1764 : NPEEL - the number of iterations in the peel loop if the misalignment
1765 : of DR_PEEL_INFO is known at compile time. */
1766 :
1767 : static void
1768 2775 : vect_update_misalignment_for_peel (dr_vec_info *dr_info,
1769 : dr_vec_info *dr_peel_info, int npeel)
1770 : {
1771 : /* If dr_info is aligned of dr_peel_info is, then mark it so. */
1772 2775 : if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
1773 : {
1774 444 : SET_DR_MISALIGNMENT (dr_info,
1775 : vect_dr_misalign_for_aligned_access (dr_peel_info));
1776 444 : return;
1777 : }
1778 :
1779 2331 : unsigned HOST_WIDE_INT alignment;
1780 2331 : if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
1781 2331 : && known_alignment_for_access_p (dr_info,
1782 2331 : STMT_VINFO_VECTYPE (dr_info->stmt))
1783 218 : && known_alignment_for_access_p (dr_peel_info,
1784 218 : STMT_VINFO_VECTYPE (dr_peel_info->stmt)))
1785 : {
1786 202 : int misal = dr_info->misalignment;
1787 202 : misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1788 202 : misal &= alignment - 1;
1789 202 : set_dr_misalignment (dr_info, misal);
1790 202 : return;
1791 : }
1792 :
1793 2129 : if (dump_enabled_p ())
1794 40 : dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1795 : "to unknown (-1).\n");
1796 2129 : SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1797 : }
1798 :
1799 : /* Return true if alignment is relevant for DR_INFO. */
1800 :
1801 : static bool
1802 1802746 : vect_relevant_for_alignment_p (dr_vec_info *dr_info)
1803 : {
1804 1802746 : stmt_vec_info stmt_info = dr_info->stmt;
1805 :
1806 1802746 : if (!STMT_VINFO_RELEVANT_P (stmt_info))
1807 : return false;
1808 :
1809 : /* For interleaving, only the alignment of the first access matters. */
1810 1801824 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1811 2044802 : && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
1812 : return false;
1813 :
1814 : /* Scatter-gather and invariant accesses continue to address individual
1815 : scalars, so vector-level alignment is irrelevant. */
1816 1694647 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
1817 1694647 : || integer_zerop (DR_STEP (dr_info->dr)))
1818 54727 : return false;
1819 :
1820 : /* Strided accesses perform only component accesses, alignment is
1821 : irrelevant for them. */
1822 1639920 : if (STMT_VINFO_STRIDED_P (stmt_info)
1823 1639920 : && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1824 : return false;
1825 :
1826 : return true;
1827 : }
1828 :
1829 : /* Given an memory reference EXP return whether its alignment is less
1830 : than its size. */
1831 :
1832 : static bool
1833 1596858 : not_size_aligned (tree exp)
1834 : {
1835 1596858 : if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
1836 : return true;
1837 :
1838 1596858 : return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
1839 1596858 : > get_object_alignment (exp));
1840 : }
1841 :
1842 : /* Function vector_alignment_reachable_p
1843 :
1844 : Return true if vector alignment for DR_INFO is reachable by peeling
1845 : a few loop iterations. Return false otherwise. */
1846 :
1847 : static bool
1848 613133 : vector_alignment_reachable_p (dr_vec_info *dr_info, poly_uint64 vf)
1849 : {
1850 613133 : stmt_vec_info stmt_info = dr_info->stmt;
1851 613133 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1852 613133 : poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1853 1226266 : poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
1854 613133 : unsigned elem_size = vector_element_size (vector_size, nelements);
1855 613133 : unsigned group_size = 1;
1856 :
1857 613133 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1858 : {
1859 : /* For interleaved access we peel only if number of iterations in
1860 : the prolog loop ({VF - misalignment}), is a multiple of the
1861 : number of the interleaved accesses. */
1862 :
1863 : /* FORNOW: handle only known alignment. */
1864 87492 : if (!known_alignment_for_access_p (dr_info, vectype))
1865 613133 : return false;
1866 :
1867 52060 : unsigned mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
1868 64532 : if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
1869 : return false;
1870 :
1871 12472 : group_size = DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
1872 : }
1873 :
1874 : /* If the vectorization factor does not guarantee DR advancement of
1875 : a multiple of the target alignment no peeling will help. */
1876 538113 : if (!multiple_p (elem_size * group_size * vf, dr_target_alignment (dr_info)))
1877 154 : return false;
1878 :
1879 : /* If misalignment is known at the compile time then allow peeling
1880 : only if natural alignment is reachable through peeling. */
1881 537959 : if (known_alignment_for_access_p (dr_info, vectype)
1882 838267 : && !aligned_access_p (dr_info, vectype))
1883 : {
1884 14238 : HOST_WIDE_INT elmsize =
1885 14238 : int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1886 14238 : if (dump_enabled_p ())
1887 : {
1888 768 : dump_printf_loc (MSG_NOTE, vect_location,
1889 : "data size = %wd. misalignment = %d.\n", elmsize,
1890 : dr_misalignment (dr_info, vectype));
1891 : }
1892 14238 : if (dr_misalignment (dr_info, vectype) % elmsize)
1893 : {
1894 72 : if (dump_enabled_p ())
1895 7 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1896 : "data size does not divide the misalignment.\n");
1897 72 : return false;
1898 : }
1899 : }
1900 :
1901 537887 : if (!known_alignment_for_access_p (dr_info, vectype))
1902 : {
1903 237651 : tree type = TREE_TYPE (DR_REF (dr_info->dr));
1904 237651 : bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
1905 237651 : if (dump_enabled_p ())
1906 16013 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1907 : "Unknown misalignment, %snaturally aligned\n",
1908 : is_packed ? "not " : "");
1909 237651 : return targetm.vectorize.vector_alignment_reachable (type, is_packed);
1910 : }
1911 :
1912 : return true;
1913 : }
1914 :
1915 :
1916 : /* Calculate the cost of the memory access represented by DR_INFO. */
1917 :
1918 : static void
1919 732917 : vect_get_data_access_cost (vec_info *vinfo, dr_vec_info *dr_info,
1920 : dr_alignment_support alignment_support_scheme,
1921 : int misalignment,
1922 : unsigned int *inside_cost,
1923 : unsigned int *outside_cost,
1924 : stmt_vector_for_cost *body_cost_vec,
1925 : stmt_vector_for_cost *prologue_cost_vec)
1926 : {
1927 732917 : stmt_vec_info stmt_info = dr_info->stmt;
1928 :
1929 732917 : if (DR_IS_READ (dr_info->dr))
1930 512272 : vect_get_load_cost (vinfo, stmt_info, NULL, 1,
1931 : alignment_support_scheme, misalignment, true,
1932 : inside_cost, outside_cost, prologue_cost_vec,
1933 : body_cost_vec, false);
1934 : else
1935 220645 : vect_get_store_cost (vinfo,stmt_info, NULL, 1,
1936 : alignment_support_scheme, misalignment, inside_cost,
1937 : body_cost_vec);
1938 :
1939 732917 : if (dump_enabled_p ())
1940 29885 : dump_printf_loc (MSG_NOTE, vect_location,
1941 : "vect_get_data_access_cost: inside_cost = %d, "
1942 : "outside_cost = %d.\n", *inside_cost, *outside_cost);
1943 732917 : }
1944 :
1945 :
1946 : typedef struct _vect_peel_info
1947 : {
1948 : dr_vec_info *dr_info;
1949 : int npeel;
1950 : unsigned int count;
1951 : } *vect_peel_info;
1952 :
1953 : typedef struct _vect_peel_extended_info
1954 : {
1955 : vec_info *vinfo;
1956 : struct _vect_peel_info peel_info;
1957 : unsigned int inside_cost;
1958 : unsigned int outside_cost;
1959 : } *vect_peel_extended_info;
1960 :
1961 :
1962 : /* Peeling hashtable helpers. */
1963 :
1964 : struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1965 : {
1966 : static inline hashval_t hash (const _vect_peel_info *);
1967 : static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
1968 : };
1969 :
1970 : inline hashval_t
1971 747068 : peel_info_hasher::hash (const _vect_peel_info *peel_info)
1972 : {
1973 747068 : return (hashval_t) peel_info->npeel;
1974 : }
1975 :
1976 : inline bool
1977 388623 : peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
1978 : {
1979 388623 : return (a->npeel == b->npeel);
1980 : }
1981 :
1982 :
1983 : /* Insert DR_INFO into peeling hash table with NPEEL as key. */
1984 :
1985 : static void
1986 359109 : vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
1987 : loop_vec_info loop_vinfo, dr_vec_info *dr_info,
1988 : int npeel, bool supportable_if_not_aligned)
1989 : {
1990 359109 : struct _vect_peel_info elem, *slot;
1991 359109 : _vect_peel_info **new_slot;
1992 :
1993 359109 : elem.npeel = npeel;
1994 359109 : slot = peeling_htab->find (&elem);
1995 359109 : if (slot)
1996 157766 : slot->count++;
1997 : else
1998 : {
1999 201343 : slot = XNEW (struct _vect_peel_info);
2000 201343 : slot->npeel = npeel;
2001 201343 : slot->dr_info = dr_info;
2002 201343 : slot->count = 1;
2003 201343 : new_slot = peeling_htab->find_slot (slot, INSERT);
2004 201343 : *new_slot = slot;
2005 : }
2006 :
2007 : /* If this DR is not supported with unknown misalignment then bias
2008 : this slot when the cost model is disabled. */
2009 359109 : if (!supportable_if_not_aligned
2010 359109 : && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2011 4656 : slot->count += VECT_MAX_COST;
2012 359109 : }
2013 :
2014 :
2015 : /* Traverse peeling hash table to find peeling option that aligns maximum
2016 : number of data accesses. */
2017 :
2018 : int
2019 35801 : vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
2020 : _vect_peel_extended_info *max)
2021 : {
2022 35801 : vect_peel_info elem = *slot;
2023 :
2024 35801 : if (elem->count > max->peel_info.count
2025 21747 : || (elem->count == max->peel_info.count
2026 17037 : && max->peel_info.npeel > elem->npeel))
2027 : {
2028 14070 : max->peel_info.npeel = elem->npeel;
2029 14070 : max->peel_info.count = elem->count;
2030 14070 : max->peel_info.dr_info = elem->dr_info;
2031 : }
2032 :
2033 35801 : return 1;
2034 : }
2035 :
2036 : /* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking
2037 : data access costs for all data refs. If UNKNOWN_MISALIGNMENT is true,
2038 : npeel is computed at runtime but DR0_INFO's misalignment will be zero
2039 : after peeling. */
2040 :
2041 : static void
2042 401748 : vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
2043 : dr_vec_info *dr0_info,
2044 : unsigned int *inside_cost,
2045 : unsigned int *outside_cost,
2046 : stmt_vector_for_cost *body_cost_vec,
2047 : stmt_vector_for_cost *prologue_cost_vec,
2048 : unsigned int npeel)
2049 : {
2050 401748 : vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2051 :
2052 401748 : bool dr0_alignment_known_p
2053 : = (dr0_info
2054 735971 : && known_alignment_for_access_p (dr0_info,
2055 334223 : STMT_VINFO_VECTYPE (dr0_info->stmt)));
2056 :
2057 1975450 : for (data_reference *dr : datarefs)
2058 : {
2059 770206 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2060 770206 : if (!vect_relevant_for_alignment_p (dr_info))
2061 37289 : continue;
2062 :
2063 732917 : tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
2064 732917 : dr_alignment_support alignment_support_scheme;
2065 732917 : int misalignment;
2066 732917 : unsigned HOST_WIDE_INT alignment;
2067 :
2068 732917 : bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2069 732917 : size_zero_node) < 0;
2070 732917 : poly_int64 off = 0;
2071 732917 : if (negative)
2072 24157 : off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2073 24157 : * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2074 :
2075 732917 : if (npeel == 0)
2076 372380 : misalignment = dr_misalignment (dr_info, vectype, off);
2077 360537 : else if (dr_info == dr0_info
2078 360537 : || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
2079 : misalignment = 0;
2080 125507 : else if (!dr0_alignment_known_p
2081 8115 : || !known_alignment_for_access_p (dr_info, vectype)
2082 133622 : || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
2083 : misalignment = DR_MISALIGNMENT_UNKNOWN;
2084 : else
2085 : {
2086 7106 : misalignment = dr_misalignment (dr_info, vectype, off);
2087 7106 : misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
2088 7106 : misalignment &= alignment - 1;
2089 : }
2090 732917 : alignment_support_scheme
2091 732917 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2092 : misalignment);
2093 :
2094 732917 : vect_get_data_access_cost (loop_vinfo, dr_info,
2095 : alignment_support_scheme, misalignment,
2096 : inside_cost, outside_cost,
2097 : body_cost_vec, prologue_cost_vec);
2098 : }
2099 401748 : }
2100 :
2101 : /* Traverse peeling hash table and calculate cost for each peeling option.
2102 : Find the one with the lowest cost. */
2103 :
2104 : int
2105 145907 : vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
2106 : _vect_peel_extended_info *min)
2107 : {
2108 145907 : vect_peel_info elem = *slot;
2109 145907 : unsigned int inside_cost = 0, outside_cost = 0;
2110 145907 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (min->vinfo);
2111 145907 : stmt_vector_for_cost prologue_cost_vec, body_cost_vec;
2112 :
2113 145907 : prologue_cost_vec.create (2);
2114 145907 : body_cost_vec.create (2);
2115 :
2116 145907 : vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost,
2117 : &outside_cost, &body_cost_vec,
2118 145907 : &prologue_cost_vec, elem->npeel);
2119 :
2120 145907 : body_cost_vec.release ();
2121 145907 : prologue_cost_vec.release ();
2122 :
2123 145907 : outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel);
2124 :
2125 145907 : if (inside_cost < min->inside_cost
2126 1679 : || (inside_cost == min->inside_cost
2127 1255 : && outside_cost < min->outside_cost))
2128 : {
2129 144234 : min->inside_cost = inside_cost;
2130 144234 : min->outside_cost = outside_cost;
2131 144234 : min->peel_info.dr_info = elem->dr_info;
2132 144234 : min->peel_info.npeel = elem->npeel;
2133 144234 : min->peel_info.count = elem->count;
2134 : }
2135 :
2136 145907 : return 1;
2137 : }
2138 :
2139 :
2140 : /* Choose best peeling option by traversing peeling hash table and either
2141 : choosing an option with the lowest cost (if cost model is enabled) or the
2142 : option that aligns as many accesses as possible. */
2143 :
2144 : static struct _vect_peel_extended_info
2145 156925 : vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
2146 : loop_vec_info loop_vinfo)
2147 : {
2148 156925 : struct _vect_peel_extended_info res;
2149 :
2150 156925 : res.peel_info.dr_info = NULL;
2151 156925 : res.vinfo = loop_vinfo;
2152 :
2153 156925 : if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2154 : {
2155 142924 : res.inside_cost = INT_MAX;
2156 142924 : res.outside_cost = INT_MAX;
2157 142924 : peeling_htab->traverse <_vect_peel_extended_info *,
2158 288831 : vect_peeling_hash_get_lowest_cost> (&res);
2159 : }
2160 : else
2161 : {
2162 14001 : res.peel_info.count = 0;
2163 14001 : peeling_htab->traverse <_vect_peel_extended_info *,
2164 49802 : vect_peeling_hash_get_most_frequent> (&res);
2165 14001 : res.inside_cost = 0;
2166 14001 : res.outside_cost = 0;
2167 : }
2168 :
2169 156925 : return res;
2170 : }
2171 :
2172 : /* Return if vectorization is definitely, possibly, or unlikely to be
2173 : supportable after loop peeling. */
2174 :
2175 : static enum peeling_support
2176 78400 : vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
2177 : unsigned npeel)
2178 : {
2179 78400 : vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2180 78400 : enum dr_alignment_support supportable_dr_alignment;
2181 :
2182 78400 : bool dr0_alignment_known_p
2183 156800 : = known_alignment_for_access_p (dr0_info,
2184 78400 : STMT_VINFO_VECTYPE (dr0_info->stmt));
2185 78400 : bool has_unsupported_dr_p = false;
2186 78400 : unsigned int dr0_step = tree_to_shwi (DR_STEP (dr0_info->dr));
2187 78400 : int known_unsupported_misalignment = DR_MISALIGNMENT_UNKNOWN;
2188 :
2189 : /* Check if each data ref can be vectorized after peeling. */
2190 335240 : for (data_reference *dr : datarefs)
2191 : {
2192 115984 : if (dr == dr0_info->dr)
2193 77444 : continue;
2194 :
2195 38540 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2196 38540 : if (!vect_relevant_for_alignment_p (dr_info)
2197 38540 : || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
2198 6711 : continue;
2199 :
2200 31829 : tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
2201 31829 : int misalignment;
2202 31829 : unsigned HOST_WIDE_INT alignment;
2203 31829 : if (!dr0_alignment_known_p
2204 1854 : || !known_alignment_for_access_p (dr_info, vectype)
2205 33683 : || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
2206 : misalignment = DR_MISALIGNMENT_UNKNOWN;
2207 : else
2208 : {
2209 1840 : misalignment = dr_misalignment (dr_info, vectype);
2210 1840 : misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
2211 1840 : misalignment &= alignment - 1;
2212 : }
2213 31829 : supportable_dr_alignment
2214 31829 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2215 : misalignment);
2216 31829 : if (supportable_dr_alignment == dr_unaligned_unsupported)
2217 : {
2218 30404 : has_unsupported_dr_p = true;
2219 :
2220 : /* If unaligned unsupported DRs exist, we do following checks to see
2221 : if they can be mutually aligned to support vectorization. If yes,
2222 : we can try peeling and create a runtime (mutual alignment) check
2223 : to guard the peeled loop. If no, return PEELING_UNSUPPORTED. */
2224 :
2225 : /* 1) If unaligned unsupported DRs have different alignment steps, the
2226 : probability of DRs being mutually aligned is very low, and it's
2227 : quite complex to check mutual alignment at runtime. We return
2228 : PEELING_UNSUPPORTED in this case. */
2229 30404 : if (tree_to_shwi (DR_STEP (dr)) != dr0_step)
2230 78400 : return peeling_unsupported;
2231 :
2232 : /* 2) Based on above same alignment step condition, if one known
2233 : misaligned DR has zero misalignment, or different misalignment
2234 : amount from another known misaligned DR, peeling is unable to
2235 : help make all these DRs aligned together. We won't try peeling
2236 : with versioning anymore. */
2237 26204 : int curr_dr_misalignment = dr_misalignment (dr_info, vectype);
2238 26204 : if (curr_dr_misalignment == 0)
2239 : return peeling_unsupported;
2240 14460 : if (known_unsupported_misalignment != DR_MISALIGNMENT_UNKNOWN)
2241 : {
2242 8 : if (curr_dr_misalignment != DR_MISALIGNMENT_UNKNOWN
2243 8 : && curr_dr_misalignment != known_unsupported_misalignment)
2244 : return peeling_unsupported;
2245 : }
2246 : else
2247 : known_unsupported_misalignment = curr_dr_misalignment;
2248 : }
2249 : }
2250 :
2251 : /* Vectorization is known to be supportable with peeling alone when there is
2252 : no unsupported DR. */
2253 62456 : return has_unsupported_dr_p ? peeling_maybe_supported
2254 : : peeling_known_supported;
2255 : }
2256 :
2257 : /* Compare two data-references DRA and DRB to group them into chunks
2258 : with related alignment. */
2259 :
2260 : static int
2261 4597918 : dr_align_group_sort_cmp (const void *dra_, const void *drb_)
2262 : {
2263 4597918 : data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2264 4597918 : data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
2265 4597918 : int cmp;
2266 :
2267 : /* Stabilize sort. */
2268 4597918 : if (dra == drb)
2269 : return 0;
2270 :
2271 : /* Ordering of DRs according to base. */
2272 4597918 : cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2273 : DR_BASE_ADDRESS (drb));
2274 4597918 : if (cmp != 0)
2275 : return cmp;
2276 :
2277 : /* And according to DR_OFFSET. */
2278 2029943 : cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2279 2029943 : if (cmp != 0)
2280 : return cmp;
2281 :
2282 : /* And after step. */
2283 2015706 : cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
2284 2015706 : if (cmp != 0)
2285 : return cmp;
2286 :
2287 : /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2288 2010471 : cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
2289 2010471 : if (cmp == 0)
2290 237097 : return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2291 : return cmp;
2292 : }
2293 :
2294 : /* Function vect_enhance_data_refs_alignment
2295 :
2296 : This pass will use loop versioning and loop peeling in order to enhance
2297 : the alignment of data references in the loop.
2298 :
2299 : FOR NOW: we assume that whatever versioning/peeling takes place, only the
2300 : original loop is to be vectorized. Any other loops that are created by
2301 : the transformations performed in this pass - are not supposed to be
2302 : vectorized. This restriction will be relaxed.
2303 :
2304 : This pass will require a cost model to guide it whether to apply peeling
2305 : or versioning or a combination of the two. For example, the scheme that
2306 : intel uses when given a loop with several memory accesses, is as follows:
2307 : choose one memory access ('p') which alignment you want to force by doing
2308 : peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2309 : other accesses are not necessarily aligned, or (2) use loop versioning to
2310 : generate one loop in which all accesses are aligned, and another loop in
2311 : which only 'p' is necessarily aligned.
2312 :
2313 : ("Automatic Intra-Register Vectorization for the Intel Architecture",
2314 : Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2315 : Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2316 :
2317 : Devising a cost model is the most critical aspect of this work. It will
2318 : guide us on which access to peel for, whether to use loop versioning, how
2319 : many versions to create, etc. The cost model will probably consist of
2320 : generic considerations as well as target specific considerations (on
2321 : powerpc for example, misaligned stores are more painful than misaligned
2322 : loads).
2323 :
2324 : Here are the general steps involved in alignment enhancements:
2325 :
2326 : -- original loop, before alignment analysis:
2327 : for (i=0; i<N; i++){
2328 : x = q[i]; # DR_MISALIGNMENT(q) = unknown
2329 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2330 : }
2331 :
2332 : -- After vect_compute_data_refs_alignment:
2333 : for (i=0; i<N; i++){
2334 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2335 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2336 : }
2337 :
2338 : -- Possibility 1: we do loop versioning:
2339 : if (p is aligned) {
2340 : for (i=0; i<N; i++){ # loop 1A
2341 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2342 : p[i] = y; # DR_MISALIGNMENT(p) = 0
2343 : }
2344 : }
2345 : else {
2346 : for (i=0; i<N; i++){ # loop 1B
2347 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2348 : p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2349 : }
2350 : }
2351 :
2352 : -- Possibility 2: we do loop peeling:
2353 : for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2354 : x = q[i];
2355 : p[i] = y;
2356 : }
2357 : for (i = 3; i < N; i++){ # loop 2A
2358 : x = q[i]; # DR_MISALIGNMENT(q) = 0
2359 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2360 : }
2361 :
2362 : -- Possibility 3: combination of loop peeling and versioning:
2363 : if (p & q are mutually aligned) {
2364 : for (i=0; i<3; i++){ # (peeled loop iterations).
2365 : x = q[i];
2366 : p[i] = y;
2367 : }
2368 : for (i=3; i<N; i++){ # loop 3A
2369 : x = q[i]; # DR_MISALIGNMENT(q) = 0
2370 : p[i] = y; # DR_MISALIGNMENT(p) = 0
2371 : }
2372 : }
2373 : else {
2374 : for (i=0; i<N; i++){ # (scalar loop, not to be vectorized).
2375 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2376 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2377 : }
2378 : }
2379 :
2380 : These loops are later passed to loop_transform to be vectorized. The
2381 : vectorizer will use the alignment information to guide the transformation
2382 : (whether to generate regular loads/stores, or with special handling for
2383 : misalignment). */
2384 :
2385 : opt_result
2386 380940 : vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
2387 : {
2388 380940 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2389 380940 : dr_vec_info *first_store = NULL;
2390 380940 : dr_vec_info *dr0_info = NULL;
2391 380940 : struct data_reference *dr;
2392 380940 : unsigned int i;
2393 380940 : bool do_peeling = false;
2394 380940 : bool do_versioning = false;
2395 380940 : bool try_peeling_with_versioning = false;
2396 380940 : unsigned int npeel = 0;
2397 380940 : bool one_misalignment_known = false;
2398 380940 : bool one_misalignment_unknown = false;
2399 380940 : bool one_dr_unsupportable = false;
2400 380940 : dr_vec_info *unsupportable_dr_info = NULL;
2401 380940 : unsigned int dr0_same_align_drs = 0, first_store_same_align_drs = 0;
2402 380940 : hash_table<peel_info_hasher> peeling_htab (1);
2403 :
2404 380940 : DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
2405 :
2406 : /* Reset data so we can safely be called multiple times. */
2407 380940 : LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
2408 380940 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
2409 :
2410 380940 : if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ())
2411 14156 : return opt_result::success ();
2412 :
2413 : /* Sort the vector of datarefs so DRs that have the same or dependent
2414 : alignment are next to each other. */
2415 366784 : auto_vec<data_reference_p> datarefs
2416 366784 : = LOOP_VINFO_DATAREFS (loop_vinfo).copy ();
2417 366784 : datarefs.qsort (dr_align_group_sort_cmp);
2418 :
2419 : /* Compute the number of DRs that become aligned when we peel
2420 : a dataref so it becomes aligned. */
2421 733568 : auto_vec<unsigned> n_same_align_refs (datarefs.length ());
2422 366784 : n_same_align_refs.quick_grow_cleared (datarefs.length ());
2423 366784 : unsigned i0;
2424 753243 : for (i0 = 0; i0 < datarefs.length (); ++i0)
2425 379821 : if (DR_BASE_ADDRESS (datarefs[i0]))
2426 : break;
2427 2384220 : for (i = i0 + 1; i <= datarefs.length (); ++i)
2428 : {
2429 825326 : if (i == datarefs.length ()
2430 465180 : || !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0]),
2431 465180 : DR_BASE_ADDRESS (datarefs[i]), 0)
2432 218592 : || !operand_equal_p (DR_OFFSET (datarefs[i0]),
2433 218592 : DR_OFFSET (datarefs[i]), 0)
2434 1042608 : || !operand_equal_p (DR_STEP (datarefs[i0]),
2435 217282 : DR_STEP (datarefs[i]), 0))
2436 : {
2437 : /* The subgroup [i0, i-1] now only differs in DR_INIT and
2438 : possibly DR_TARGET_ALIGNMENT. Still the whole subgroup
2439 : will get known misalignment if we align one of the refs
2440 : with the largest DR_TARGET_ALIGNMENT. */
2441 1433916 : for (unsigned j = i0; j < i; ++j)
2442 : {
2443 825326 : dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs[j]);
2444 3589716 : for (unsigned k = i0; k < i; ++k)
2445 : {
2446 2764390 : if (k == j)
2447 825326 : continue;
2448 1939064 : dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs[k]);
2449 1939064 : if (vect_dr_aligned_if_related_peeled_dr_is (dr_infok,
2450 : dr_infoj))
2451 708376 : n_same_align_refs[j]++;
2452 : }
2453 : }
2454 : i0 = i;
2455 : }
2456 : }
2457 :
2458 : /* While cost model enhancements are expected in the future, the high level
2459 : view of the code at this time is as follows:
2460 :
2461 : A) If there is a misaligned access then see if doing peeling alone can
2462 : make all data references satisfy vect_supportable_dr_alignment. If so,
2463 : update data structures and return.
2464 :
2465 : B) If peeling alone wasn't possible and there is a data reference with an
2466 : unknown misalignment that does not satisfy vect_supportable_dr_alignment
2467 : then we may use either of the following two approaches.
2468 :
2469 : B1) Try peeling with versioning: Add a runtime loop versioning check to
2470 : see if all unsupportable data references are mutually aligned, which
2471 : means they will be uniformly aligned after a certain amount of loop
2472 : peeling. If peeling and versioning can be used together, set
2473 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT_P to TRUE and return.
2474 :
2475 : B2) Try versioning alone: Add a runtime loop versioning check to see if
2476 : all unsupportable data references are already uniformly aligned
2477 : without loop peeling. If versioning can be applied alone, set
2478 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT_P to FALSE and return.
2479 :
2480 : Above B1 is more powerful and more likely to be adopted than B2. But B2
2481 : is still available and useful in some cases, for example, the cost model
2482 : does not allow much peeling.
2483 :
2484 : C) If none of above was successful then the alignment was not enhanced,
2485 : just return. */
2486 :
2487 : /* (1) Peeling to force alignment. */
2488 :
2489 : /* (1.1) Decide whether to perform peeling, how many iterations to peel, and
2490 : if vectorization may be supported by peeling with versioning.
2491 : Considerations:
2492 : - How many accesses will become aligned due to the peeling
2493 : - How many accesses will become unaligned due to the peeling,
2494 : and the cost of misaligned accesses.
2495 : - The cost of peeling (the extra runtime checks, the increase
2496 : in code size). */
2497 :
2498 366784 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2499 1043339 : FOR_EACH_VEC_ELT (datarefs, i, dr)
2500 : {
2501 721888 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2502 721888 : if (!vect_relevant_for_alignment_p (dr_info))
2503 108755 : continue;
2504 :
2505 613133 : stmt_vec_info stmt_info = dr_info->stmt;
2506 613133 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2507 :
2508 : /* With variable VF, unsafe speculative read can be avoided for known
2509 : inbounds DRs as long as partial vectors are used. */
2510 613133 : if (!vf.is_constant ()
2511 : && dr_safe_speculative_read_required (stmt_info)
2512 : && DR_SCALAR_KNOWN_BOUNDS (dr_info))
2513 : {
2514 : dr_set_safe_speculative_read_required (stmt_info, false);
2515 : LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
2516 : }
2517 :
2518 613133 : do_peeling = vector_alignment_reachable_p (dr_info, vf);
2519 613133 : if (do_peeling)
2520 : {
2521 535701 : if (known_alignment_for_access_p (dr_info, vectype))
2522 : {
2523 300236 : unsigned int npeel_tmp = 0;
2524 300236 : bool negative = tree_int_cst_compare (DR_STEP (dr),
2525 300236 : size_zero_node) < 0;
2526 :
2527 : /* If known_alignment_for_access_p then we have set
2528 : DR_MISALIGNMENT which is only done if we know it at compiler
2529 : time, so it is safe to assume target alignment is constant.
2530 : */
2531 300236 : unsigned int target_align =
2532 300236 : DR_TARGET_ALIGNMENT (dr_info).to_constant ();
2533 300236 : unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
2534 300236 : poly_int64 off = 0;
2535 300236 : if (negative)
2536 2564 : off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
2537 300236 : unsigned int mis = dr_misalignment (dr_info, vectype, off);
2538 300236 : mis = negative ? mis : -mis;
2539 300236 : if (mis != 0)
2540 13207 : npeel_tmp = (mis & (target_align - 1)) / dr_size;
2541 :
2542 : /* For multiple types, it is possible that the bigger type access
2543 : will have more than one peeling option. E.g., a loop with two
2544 : types: one of size (vector size / 4), and the other one of
2545 : size (vector size / 8). Vectorization factor will 8. If both
2546 : accesses are misaligned by 3, the first one needs one scalar
2547 : iteration to be aligned, and the second one needs 5. But the
2548 : first one will be aligned also by peeling 5 scalar
2549 : iterations, and in that case both accesses will be aligned.
2550 : Hence, except for the immediate peeling amount, we also want
2551 : to try to add full vector size, while we don't exceed
2552 : vectorization factor.
2553 : We do this automatically for cost model, since we calculate
2554 : cost for every peeling option. */
2555 300236 : poly_uint64 nscalars = npeel_tmp;
2556 300236 : if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2557 : {
2558 39790 : unsigned group_size = 1;
2559 39790 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2560 1917 : group_size = DR_GROUP_SIZE (stmt_info);
2561 39790 : nscalars = vf * group_size;
2562 : }
2563 :
2564 : /* Save info about DR in the hash table. Also include peeling
2565 : amounts according to the explanation above. Indicate
2566 : the alignment status when the ref is not aligned.
2567 : ??? Rather than using unknown alignment here we should
2568 : prune all entries from the peeling hashtable which cause
2569 : DRs to be not supported. */
2570 300236 : bool supportable_if_not_aligned
2571 : = vect_supportable_dr_alignment
2572 300236 : (loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN);
2573 659345 : while (known_le (npeel_tmp, nscalars))
2574 : {
2575 359109 : vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
2576 : dr_info, npeel_tmp,
2577 : supportable_if_not_aligned);
2578 359109 : npeel_tmp += MAX (1, target_align / dr_size);
2579 : }
2580 :
2581 300236 : one_misalignment_known = true;
2582 : }
2583 : else
2584 : {
2585 : /* If we don't know any misalignment values, we prefer
2586 : peeling for data-ref that has the maximum number of data-refs
2587 : with the same alignment, unless the target prefers to align
2588 : stores over load. */
2589 235465 : unsigned same_align_drs = n_same_align_refs[i];
2590 235465 : if (!dr0_info
2591 235465 : || dr0_same_align_drs < same_align_drs)
2592 : {
2593 : dr0_same_align_drs = same_align_drs;
2594 : dr0_info = dr_info;
2595 : }
2596 : /* For data-refs with the same number of related
2597 : accesses prefer the one where the misalign
2598 : computation will be invariant in the outermost loop. */
2599 76257 : else if (dr0_same_align_drs == same_align_drs)
2600 : {
2601 74794 : class loop *ivloop0, *ivloop;
2602 74794 : ivloop0 = outermost_invariant_loop_for_expr
2603 74794 : (loop, DR_BASE_ADDRESS (dr0_info->dr));
2604 74794 : ivloop = outermost_invariant_loop_for_expr
2605 74794 : (loop, DR_BASE_ADDRESS (dr));
2606 74794 : if ((ivloop && !ivloop0)
2607 74794 : || (ivloop && ivloop0
2608 74786 : && flow_loop_nested_p (ivloop, ivloop0)))
2609 : dr0_info = dr_info;
2610 : }
2611 :
2612 235465 : one_misalignment_unknown = true;
2613 :
2614 : /* Check for data refs with unsupportable alignment that
2615 : can be peeled. */
2616 235465 : enum dr_alignment_support supportable_dr_alignment
2617 235465 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2618 : DR_MISALIGNMENT_UNKNOWN);
2619 235465 : if (supportable_dr_alignment == dr_unaligned_unsupported)
2620 : {
2621 96639 : one_dr_unsupportable = true;
2622 96639 : unsupportable_dr_info = dr_info;
2623 : }
2624 :
2625 235465 : if (!first_store && DR_IS_WRITE (dr))
2626 : {
2627 51074 : first_store = dr_info;
2628 51074 : first_store_same_align_drs = same_align_drs;
2629 : }
2630 : }
2631 : }
2632 : else
2633 : {
2634 77432 : if (!aligned_access_p (dr_info, vectype))
2635 : {
2636 45333 : if (dump_enabled_p ())
2637 2091 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2638 : "vector alignment may not be reachable\n");
2639 : break;
2640 : }
2641 : }
2642 : }
2643 :
2644 : /* Check if we can possibly peel the loop. */
2645 366784 : if (!vect_can_advance_ivs_p (loop_vinfo)
2646 363318 : || !slpeel_can_duplicate_loop_p (loop, LOOP_VINFO_MAIN_EXIT (loop_vinfo),
2647 363318 : loop_preheader_edge (loop))
2648 363318 : || loop->inner
2649 : /* We don't currently maintain the LCSSA for prologue peeled inversed
2650 : loops. */
2651 728497 : || (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
2652 29373 : && !LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)))
2653 : do_peeling = false;
2654 :
2655 366784 : struct _vect_peel_extended_info peel_for_known_alignment;
2656 366784 : struct _vect_peel_extended_info peel_for_unknown_alignment;
2657 366784 : struct _vect_peel_extended_info best_peel;
2658 :
2659 366784 : peel_for_unknown_alignment.inside_cost = INT_MAX;
2660 366784 : peel_for_unknown_alignment.outside_cost = INT_MAX;
2661 366784 : peel_for_unknown_alignment.peel_info.count = 0;
2662 :
2663 366784 : if (do_peeling
2664 366784 : && one_misalignment_unknown)
2665 : {
2666 : /* Check if the target requires to prefer stores over loads, i.e., if
2667 : misaligned stores are more expensive than misaligned loads (taking
2668 : drs with same alignment into account). */
2669 144008 : unsigned int load_inside_cost = 0;
2670 144008 : unsigned int load_outside_cost = 0;
2671 144008 : unsigned int store_inside_cost = 0;
2672 144008 : unsigned int store_outside_cost = 0;
2673 144008 : unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
2674 :
2675 144008 : stmt_vector_for_cost dummy;
2676 144008 : dummy.create (2);
2677 144008 : vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
2678 : &load_inside_cost,
2679 : &load_outside_cost,
2680 : &dummy, &dummy, estimated_npeels);
2681 144008 : dummy.release ();
2682 :
2683 144008 : if (first_store)
2684 : {
2685 44308 : dummy.create (2);
2686 44308 : vect_get_peeling_costs_all_drs (loop_vinfo, first_store,
2687 : &store_inside_cost,
2688 : &store_outside_cost,
2689 : &dummy, &dummy,
2690 : estimated_npeels);
2691 44308 : dummy.release ();
2692 : }
2693 : else
2694 : {
2695 99700 : store_inside_cost = INT_MAX;
2696 99700 : store_outside_cost = INT_MAX;
2697 : }
2698 :
2699 144008 : if (load_inside_cost > store_inside_cost
2700 144008 : || (load_inside_cost == store_inside_cost
2701 43759 : && load_outside_cost > store_outside_cost))
2702 : {
2703 144008 : dr0_info = first_store;
2704 144008 : dr0_same_align_drs = first_store_same_align_drs;
2705 144008 : peel_for_unknown_alignment.inside_cost = store_inside_cost;
2706 144008 : peel_for_unknown_alignment.outside_cost = store_outside_cost;
2707 : }
2708 : else
2709 : {
2710 144008 : peel_for_unknown_alignment.inside_cost = load_inside_cost;
2711 144008 : peel_for_unknown_alignment.outside_cost = load_outside_cost;
2712 : }
2713 :
2714 144008 : peel_for_unknown_alignment.outside_cost
2715 144008 : += vect_get_known_peeling_cost (loop_vinfo, estimated_npeels);
2716 :
2717 144008 : peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + 1;
2718 : }
2719 :
2720 366784 : peel_for_unknown_alignment.peel_info.npeel = 0;
2721 366784 : peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
2722 :
2723 366784 : best_peel = peel_for_unknown_alignment;
2724 :
2725 366784 : peel_for_known_alignment.inside_cost = INT_MAX;
2726 366784 : peel_for_known_alignment.outside_cost = INT_MAX;
2727 366784 : peel_for_known_alignment.peel_info.count = 0;
2728 366784 : peel_for_known_alignment.peel_info.dr_info = NULL;
2729 :
2730 366784 : if (do_peeling && one_misalignment_known)
2731 : {
2732 : /* Peeling is possible, but there is no data access that is not supported
2733 : unless aligned. So we try to choose the best possible peeling from
2734 : the hash table. */
2735 156925 : peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
2736 156925 : (&peeling_htab, loop_vinfo);
2737 : }
2738 :
2739 : /* Compare costs of peeling for known and unknown alignment. */
2740 366784 : if (peel_for_known_alignment.peel_info.dr_info != NULL
2741 156925 : && peel_for_unknown_alignment.inside_cost
2742 : >= peel_for_known_alignment.inside_cost)
2743 : {
2744 142669 : best_peel = peel_for_known_alignment;
2745 :
2746 : /* If the best peeling for known alignment has NPEEL == 0, perform no
2747 : peeling at all except if there is an unsupportable dr that we can
2748 : align. */
2749 142669 : if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
2750 : do_peeling = false;
2751 : }
2752 :
2753 : /* If there is an unsupportable data ref, prefer this over all choices so far
2754 : since we'd have to discard a chosen peeling except when it accidentally
2755 : aligned the unsupportable data ref. */
2756 231788 : if (one_dr_unsupportable)
2757 : dr0_info = unsupportable_dr_info;
2758 287895 : else if (do_peeling)
2759 : {
2760 : /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
2761 : TODO: Use nopeel_outside_cost or get rid of it? */
2762 67525 : unsigned nopeel_inside_cost = 0;
2763 67525 : unsigned nopeel_outside_cost = 0;
2764 :
2765 67525 : stmt_vector_for_cost dummy;
2766 67525 : dummy.create (2);
2767 67525 : vect_get_peeling_costs_all_drs (loop_vinfo, NULL, &nopeel_inside_cost,
2768 : &nopeel_outside_cost, &dummy, &dummy, 0);
2769 67525 : dummy.release ();
2770 :
2771 : /* Add epilogue costs. As we do not peel for alignment here, no prologue
2772 : costs will be recorded. */
2773 67525 : nopeel_outside_cost += vect_get_known_peeling_cost (loop_vinfo, 0);
2774 :
2775 67525 : npeel = best_peel.peel_info.npeel;
2776 67525 : dr0_info = best_peel.peel_info.dr_info;
2777 :
2778 : /* If no peeling is not more expensive than the best peeling we
2779 : have so far, don't perform any peeling. */
2780 67525 : if (nopeel_inside_cost <= best_peel.inside_cost)
2781 60999 : do_peeling = false;
2782 : }
2783 :
2784 146414 : if (do_peeling)
2785 : {
2786 78400 : stmt_vec_info stmt_info = dr0_info->stmt;
2787 78400 : if (known_alignment_for_access_p (dr0_info,
2788 : STMT_VINFO_VECTYPE (stmt_info)))
2789 : {
2790 6503 : bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
2791 6503 : size_zero_node) < 0;
2792 6503 : if (!npeel)
2793 : {
2794 : /* Since it's known at compile time, compute the number of
2795 : iterations in the peeled loop (the peeling factor) for use in
2796 : updating DR_MISALIGNMENT values. The peeling factor is the
2797 : vectorization factor minus the misalignment as an element
2798 : count. */
2799 0 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2800 0 : poly_int64 off = 0;
2801 0 : if (negative)
2802 0 : off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2803 0 : * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2804 0 : unsigned int mis
2805 0 : = dr_misalignment (dr0_info, vectype, off);
2806 0 : mis = negative ? mis : -mis;
2807 : /* If known_alignment_for_access_p then we have set
2808 : DR_MISALIGNMENT which is only done if we know it at compiler
2809 : time, so it is safe to assume target alignment is constant.
2810 : */
2811 0 : unsigned int target_align =
2812 0 : DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
2813 0 : npeel = ((mis & (target_align - 1))
2814 0 : / vect_get_scalar_dr_size (dr0_info));
2815 : }
2816 :
2817 : /* For interleaved data access every iteration accesses all the
2818 : members of the group, therefore we divide the number of iterations
2819 : by the group size. */
2820 6503 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2821 281 : npeel /= DR_GROUP_SIZE (stmt_info);
2822 :
2823 6503 : if (dump_enabled_p ())
2824 284 : dump_printf_loc (MSG_NOTE, vect_location,
2825 : "Try peeling by %d\n", npeel);
2826 : }
2827 :
2828 : /* Check how peeling for alignment can support vectorization. Function
2829 : vect_peeling_supportable returns one of the three possible values:
2830 : - PEELING_KNOWN_SUPPORTED: indicates that we know all unsupported
2831 : datarefs can be aligned after peeling. We can use peeling alone.
2832 : - PEELING_MAYBE_SUPPORTED: indicates that peeling may be able to make
2833 : these datarefs aligned but we are not sure about it at compile time.
2834 : We will try peeling with versioning to add a runtime check to guard
2835 : the peeled loop.
2836 : - PEELING_UNSUPPORTED: indicates that peeling is almost impossible to
2837 : support vectorization. We will stop trying peeling. */
2838 78400 : switch (vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
2839 : {
2840 : case peeling_known_supported:
2841 : break;
2842 13266 : case peeling_maybe_supported:
2843 13266 : try_peeling_with_versioning = true;
2844 13266 : break;
2845 15944 : case peeling_unsupported:
2846 15944 : do_peeling = false;
2847 15944 : break;
2848 : }
2849 :
2850 : /* Check if all datarefs are supportable and log. */
2851 78400 : if (do_peeling
2852 78400 : && npeel == 0
2853 78400 : && known_alignment_for_access_p (dr0_info,
2854 : STMT_VINFO_VECTYPE (stmt_info)))
2855 3 : return opt_result::success ();
2856 :
2857 : /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
2858 78397 : if (do_peeling)
2859 : {
2860 62453 : unsigned max_allowed_peel
2861 62453 : = param_vect_max_peeling_for_alignment;
2862 62453 : if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP)
2863 : max_allowed_peel = 0;
2864 14565 : if (max_allowed_peel != (unsigned)-1)
2865 : {
2866 47909 : unsigned max_peel = npeel;
2867 47909 : if (max_peel == 0)
2868 : {
2869 45146 : poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
2870 45146 : unsigned HOST_WIDE_INT target_align_c;
2871 45146 : if (target_align.is_constant (&target_align_c))
2872 90292 : max_peel =
2873 45146 : target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
2874 : else
2875 : {
2876 : do_peeling = false;
2877 : if (dump_enabled_p ())
2878 : dump_printf_loc (MSG_NOTE, vect_location,
2879 : "Disable peeling, max peels set and vector"
2880 : " alignment unknown\n");
2881 : }
2882 : }
2883 47909 : if (max_peel > max_allowed_peel)
2884 : {
2885 47901 : do_peeling = false;
2886 47901 : if (dump_enabled_p ())
2887 53 : dump_printf_loc (MSG_NOTE, vect_location,
2888 : "Disable peeling, max peels reached: %d\n", max_peel);
2889 : }
2890 : }
2891 : }
2892 :
2893 : /* Cost model #2 - if peeling may result in a remaining loop not
2894 : iterating enough to be vectorized then do not peel. Since this
2895 : is a cost heuristic rather than a correctness decision, use the
2896 : most likely runtime value for variable vectorization factors. */
2897 53 : if (do_peeling
2898 14552 : && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2899 : {
2900 3193 : unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2901 3193 : unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
2902 3193 : if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
2903 3193 : < assumed_vf + max_peel)
2904 : do_peeling = false;
2905 : }
2906 :
2907 : if (do_peeling)
2908 : {
2909 : /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
2910 : If the misalignment of DR_i is identical to that of dr0 then set
2911 : DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
2912 : dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2913 : by the peeling factor times the element size of DR_i (MOD the
2914 : vectorization factor times the size). Otherwise, the
2915 : misalignment of DR_i must be set to unknown. */
2916 30615 : FOR_EACH_VEC_ELT (datarefs, i, dr)
2917 16872 : if (dr != dr0_info->dr)
2918 : {
2919 3129 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2920 3129 : if (!vect_relevant_for_alignment_p (dr_info))
2921 354 : continue;
2922 :
2923 2775 : vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
2924 : }
2925 : }
2926 :
2927 78397 : if (do_peeling && !try_peeling_with_versioning)
2928 : {
2929 : /* Update data structures if peeling will be applied alone. */
2930 12691 : LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
2931 12691 : if (npeel)
2932 2104 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
2933 : else
2934 10587 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
2935 12691 : SET_DR_MISALIGNMENT (dr0_info,
2936 : vect_dr_misalign_for_aligned_access (dr0_info));
2937 12691 : if (dump_enabled_p ())
2938 : {
2939 346 : dump_printf_loc (MSG_NOTE, vect_location,
2940 : "Alignment of access forced using peeling.\n");
2941 346 : dump_printf_loc (MSG_NOTE, vect_location,
2942 : "Peeling for alignment will be applied.\n");
2943 : }
2944 :
2945 : /* The inside-loop cost will be accounted for in vectorizable_load
2946 : and vectorizable_store correctly with adjusted alignments.
2947 : Drop the body_cst_vec on the floor here. */
2948 12691 : return opt_result::success ();
2949 : }
2950 : }
2951 :
2952 : /* (2) Versioning to force alignment. */
2953 :
2954 : /* Try versioning if:
2955 : 1) optimize loop for speed and the cost-model is not cheap
2956 : 2) there is at least one unsupported misaligned data ref with an unknown
2957 : misalignment, and
2958 : 3) all misaligned data refs with a known misalignment are supported, and
2959 : 4) the number of runtime alignment checks is within reason. */
2960 :
2961 354090 : do_versioning
2962 354090 : = (optimize_loop_nest_for_speed_p (loop)
2963 353651 : && !loop->inner /* FORNOW */
2964 706136 : && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
2965 :
2966 : if (do_versioning)
2967 : {
2968 357452 : FOR_EACH_VEC_ELT (datarefs, i, dr)
2969 : {
2970 268983 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2971 268983 : if (!vect_relevant_for_alignment_p (dr_info))
2972 188929 : continue;
2973 :
2974 185352 : stmt_vec_info stmt_info = dr_info->stmt;
2975 185352 : if (STMT_VINFO_STRIDED_P (stmt_info))
2976 : {
2977 : do_versioning = false;
2978 5041 : break;
2979 : }
2980 :
2981 184264 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2982 184264 : bool negative = tree_int_cst_compare (DR_STEP (dr),
2983 184264 : size_zero_node) < 0;
2984 184264 : poly_int64 off = 0;
2985 184264 : if (negative)
2986 3385 : off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2987 3385 : * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2988 184264 : int misalignment;
2989 184264 : if ((misalignment = dr_misalignment (dr_info, vectype, off)) == 0)
2990 105298 : continue;
2991 :
2992 78966 : enum dr_alignment_support supportable_dr_alignment
2993 78966 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2994 : misalignment);
2995 78966 : if (supportable_dr_alignment == dr_unaligned_unsupported)
2996 : {
2997 15357 : if (misalignment != DR_MISALIGNMENT_UNKNOWN
2998 15357 : || (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2999 11928 : >= (unsigned) param_vect_max_version_for_alignment_checks))
3000 : {
3001 : do_versioning = false;
3002 5041 : break;
3003 : }
3004 :
3005 : /* Forcing alignment in the first iteration is no good if
3006 : we don't keep it across iterations. For now, just disable
3007 : versioning in this case.
3008 : ?? We could actually unroll the loop to achieve the required
3009 : overall step alignment, and forcing the alignment could be
3010 : done by doing some iterations of the non-vectorized loop. */
3011 11520 : if (!multiple_p (vf * DR_STEP_ALIGNMENT (dr),
3012 11520 : DR_TARGET_ALIGNMENT (dr_info)))
3013 : {
3014 : do_versioning = false;
3015 : break;
3016 : }
3017 :
3018 : /* Use "mask = DR_TARGET_ALIGNMENT - 1" to test rightmost address
3019 : bits for runtime alignment check. For example, for 16 bytes
3020 : target alignment the mask is 15 = 0xf. */
3021 11520 : poly_uint64 mask = DR_TARGET_ALIGNMENT (dr_info) - 1;
3022 :
3023 : /* FORNOW: use the same mask to test all potentially unaligned
3024 : references in the loop. */
3025 11520 : if (maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), 0U)
3026 11520 : && maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), mask))
3027 : {
3028 : do_versioning = false;
3029 : break;
3030 : }
3031 :
3032 11404 : LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
3033 11404 : LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
3034 : }
3035 : }
3036 :
3037 : /* Versioning requires at least one misaligned data reference. */
3038 93510 : if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3039 : do_versioning = false;
3040 5698 : else if (!do_versioning)
3041 540 : LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
3042 : }
3043 :
3044 : /* If we are trying peeling with versioning but versioning is disabled for
3045 : some reason, peeling should be turned off together. */
3046 354090 : if (try_peeling_with_versioning && !do_versioning)
3047 : do_peeling = false;
3048 :
3049 341968 : if (do_versioning)
3050 : {
3051 : const vec<stmt_vec_info> &may_misalign_stmts
3052 : = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3053 : stmt_vec_info stmt_info;
3054 :
3055 : /* It can now be assumed that the data references in the statements
3056 : in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
3057 : of the loop being vectorized. */
3058 13922 : FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
3059 : {
3060 8764 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
3061 8764 : SET_DR_MISALIGNMENT (dr_info,
3062 : vect_dr_misalign_for_aligned_access (dr_info));
3063 8764 : if (dump_enabled_p ())
3064 146 : dump_printf_loc (MSG_NOTE, vect_location,
3065 : "Alignment of access forced using versioning.\n");
3066 : }
3067 :
3068 5158 : if (do_peeling)
3069 : {
3070 : /* This point is reached if peeling and versioning are used together
3071 : to ensure alignment. Update data structures to make sure the loop
3072 : is correctly peeled and a right runtime check is added for loop
3073 : versioning. */
3074 1052 : gcc_assert (try_peeling_with_versioning);
3075 1052 : LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
3076 1052 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
3077 1052 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo) = true;
3078 1052 : if (dump_enabled_p ())
3079 11 : dump_printf_loc (MSG_NOTE, vect_location,
3080 : "Both peeling and versioning will be applied.\n");
3081 : }
3082 : else
3083 : {
3084 : /* This point is reached if versioning is used alone. */
3085 4106 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo) = false;
3086 4106 : if (dump_enabled_p ())
3087 82 : dump_printf_loc (MSG_NOTE, vect_location,
3088 : "Versioning for alignment will be applied.\n");
3089 : }
3090 :
3091 5158 : return opt_result::success ();
3092 : }
3093 :
3094 : /* This point is reached if neither peeling nor versioning is being done. */
3095 348932 : gcc_assert (! (do_peeling || do_versioning));
3096 :
3097 348932 : return opt_result::success ();
3098 747724 : }
3099 :
3100 :
3101 : /* Function vect_analyze_data_refs_alignment
3102 :
3103 : Analyze the alignment of the data-references in the loop. */
3104 :
3105 : void
3106 412326 : vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
3107 : {
3108 412326 : DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
3109 :
3110 412326 : vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3111 412326 : struct data_reference *dr;
3112 412326 : unsigned int i;
3113 :
3114 412326 : vect_record_base_alignments (loop_vinfo);
3115 1765556 : FOR_EACH_VEC_ELT (datarefs, i, dr)
3116 : {
3117 955389 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
3118 955389 : if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
3119 : {
3120 955389 : if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
3121 1244297 : && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
3122 128383 : continue;
3123 :
3124 827006 : vect_compute_data_ref_alignment (loop_vinfo, dr_info,
3125 : STMT_VINFO_VECTYPE (dr_info->stmt));
3126 : }
3127 : }
3128 412326 : }
3129 :
3130 :
3131 : /* Analyze alignment of DRs of stmts in NODE. */
3132 :
3133 : static bool
3134 816286 : vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
3135 : {
3136 : /* Alignment is maintained in the first element of the group. */
3137 816286 : stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3138 816286 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
3139 816286 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
3140 816286 : tree vectype = SLP_TREE_VECTYPE (node);
3141 816286 : poly_uint64 vector_alignment
3142 816286 : = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
3143 : BITS_PER_UNIT);
3144 816286 : if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
3145 777708 : vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
3146 : /* Re-analyze alignment when we're facing a vectorization with a bigger
3147 : alignment requirement. */
3148 38578 : else if (known_lt (dr_info->target_alignment, vector_alignment))
3149 : {
3150 71 : poly_uint64 old_target_alignment = dr_info->target_alignment;
3151 71 : int old_misalignment = dr_info->misalignment;
3152 71 : vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
3153 : /* But keep knowledge about a smaller alignment. */
3154 71 : if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
3155 38 : && dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
3156 : {
3157 1 : dr_info->target_alignment = old_target_alignment;
3158 1 : dr_info->misalignment = old_misalignment;
3159 : }
3160 : }
3161 : /* When we ever face unordered target alignments the first one wins in terms
3162 : of analyzing and the other will become unknown in dr_misalignment. */
3163 816286 : return true;
3164 : }
3165 :
3166 : /* Function vect_slp_analyze_instance_alignment
3167 :
3168 : Analyze the alignment of the data-references in the SLP instance.
3169 : Return FALSE if a data reference is found that cannot be vectorized. */
3170 :
3171 : bool
3172 788684 : vect_slp_analyze_instance_alignment (vec_info *vinfo,
3173 : slp_instance instance)
3174 : {
3175 788684 : DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment");
3176 :
3177 788684 : slp_tree node;
3178 788684 : unsigned i;
3179 944103 : FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
3180 155419 : if (! vect_slp_analyze_node_alignment (vinfo, node))
3181 : return false;
3182 :
3183 788684 : if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
3184 788684 : && ! vect_slp_analyze_node_alignment
3185 660867 : (vinfo, SLP_INSTANCE_TREE (instance)))
3186 : return false;
3187 :
3188 : return true;
3189 : }
3190 :
3191 :
3192 : /* Analyze groups of accesses: check that DR_INFO belongs to a group of
3193 : accesses of legal size, step, etc. Detect gaps, single element
3194 : interleaving, and other special cases. Set grouped access info.
3195 : Collect groups of strided stores for further use in SLP analysis.
3196 : Worker for vect_analyze_group_access. */
3197 :
3198 : static bool
3199 12500174 : vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
3200 : {
3201 12500174 : data_reference *dr = dr_info->dr;
3202 12500174 : tree step = DR_STEP (dr);
3203 12500174 : tree scalar_type = TREE_TYPE (DR_REF (dr));
3204 12500174 : HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
3205 12500174 : stmt_vec_info stmt_info = dr_info->stmt;
3206 12500174 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3207 12500174 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3208 12500174 : HOST_WIDE_INT dr_step = -1;
3209 12500174 : HOST_WIDE_INT groupsize, last_accessed_element = 1;
3210 12500174 : bool slp_impossible = false;
3211 :
3212 : /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
3213 : size of the interleaving group (including gaps). */
3214 12500174 : if (tree_fits_shwi_p (step))
3215 : {
3216 12490576 : dr_step = tree_to_shwi (step);
3217 : /* Check that STEP is a multiple of type size. Otherwise there is
3218 : a non-element-sized gap at the end of the group which we
3219 : cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
3220 : ??? As we can handle non-constant step fine here we should
3221 : simply remove uses of DR_GROUP_GAP between the last and first
3222 : element and instead rely on DR_STEP. DR_GROUP_SIZE then would
3223 : simply not include that gap. */
3224 12490576 : if ((dr_step % type_size) != 0)
3225 : {
3226 498 : if (dump_enabled_p ())
3227 27 : dump_printf_loc (MSG_NOTE, vect_location,
3228 : "Step %T is not a multiple of the element size"
3229 : " for %T\n",
3230 : step, DR_REF (dr));
3231 498 : return false;
3232 : }
3233 12490078 : groupsize = absu_hwi (dr_step) / type_size;
3234 : }
3235 : else
3236 : groupsize = 0;
3237 :
3238 : /* Not consecutive access is possible only if it is a part of interleaving. */
3239 12499676 : if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
3240 : {
3241 : /* Check if it this DR is a part of interleaving, and is a single
3242 : element of the group that is accessed in the loop. */
3243 :
3244 : /* Gaps are supported only for loads. STEP must be a multiple of the type
3245 : size. */
3246 8369505 : if (DR_IS_READ (dr)
3247 4996780 : && (dr_step % type_size) == 0
3248 : && groupsize > 0
3249 : /* This could be UINT_MAX but as we are generating code in a very
3250 : inefficient way we have to cap earlier.
3251 : See PR91403 for example. */
3252 4996780 : && groupsize <= 4096)
3253 : {
3254 73051 : DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
3255 73051 : DR_GROUP_SIZE (stmt_info) = groupsize;
3256 73051 : DR_GROUP_GAP (stmt_info) = groupsize - 1;
3257 73051 : if (dump_enabled_p ())
3258 1492 : dump_printf_loc (MSG_NOTE, vect_location,
3259 : "Detected single element interleaving %T"
3260 : " step %T\n",
3261 : DR_REF (dr), step);
3262 :
3263 73051 : return true;
3264 : }
3265 :
3266 8296454 : if (dump_enabled_p ())
3267 3130 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3268 : "not consecutive access %G", stmt_info->stmt);
3269 :
3270 8296454 : if (bb_vinfo)
3271 : {
3272 : /* Mark the statement as unvectorizable. */
3273 8277521 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3274 8277521 : return true;
3275 : }
3276 :
3277 18933 : if (dump_enabled_p ())
3278 305 : dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
3279 18933 : STMT_VINFO_STRIDED_P (stmt_info) = true;
3280 18933 : return true;
3281 : }
3282 :
3283 4130171 : if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3284 : {
3285 : /* First stmt in the interleaving chain. Check the chain. */
3286 1499405 : stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3287 1499405 : struct data_reference *data_ref = dr;
3288 1499405 : unsigned int count = 1;
3289 1499405 : tree prev_init = DR_INIT (data_ref);
3290 1499405 : HOST_WIDE_INT diff, gaps = 0;
3291 :
3292 : /* By construction, all group members have INTEGER_CST DR_INITs. */
3293 4130180 : while (next)
3294 : {
3295 : /* We never have the same DR multiple times. */
3296 2630837 : gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
3297 : DR_INIT (STMT_VINFO_DATA_REF (next))) != 0);
3298 :
3299 2630837 : data_ref = STMT_VINFO_DATA_REF (next);
3300 :
3301 : /* All group members have the same STEP by construction. */
3302 2630837 : gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
3303 :
3304 : /* Check that the distance between two accesses is equal to the type
3305 : size. Otherwise, we have gaps. */
3306 2630837 : diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
3307 2630837 : - TREE_INT_CST_LOW (prev_init)) / type_size;
3308 2630837 : if (diff < 1 || diff > UINT_MAX)
3309 : {
3310 : /* For artificial testcases with array accesses with large
3311 : constant indices we can run into overflow issues which
3312 : can end up fooling the groupsize constraint below so
3313 : check the individual gaps (which are represented as
3314 : unsigned int) as well. */
3315 0 : if (dump_enabled_p ())
3316 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3317 : "interleaved access with gap larger "
3318 : "than representable\n");
3319 0 : return false;
3320 : }
3321 2630837 : if (diff != 1)
3322 : {
3323 : /* FORNOW: SLP of accesses with gaps is not supported. */
3324 102471 : slp_impossible = true;
3325 102471 : if (DR_IS_WRITE (data_ref))
3326 : {
3327 62 : if (dump_enabled_p ())
3328 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3329 : "interleaved store with gaps\n");
3330 62 : return false;
3331 : }
3332 :
3333 102409 : gaps += diff - 1;
3334 : }
3335 :
3336 2630775 : last_accessed_element += diff;
3337 :
3338 : /* Store the gap from the previous member of the group. If there is no
3339 : gap in the access, DR_GROUP_GAP is always 1. */
3340 2630775 : DR_GROUP_GAP (next) = diff;
3341 :
3342 2630775 : prev_init = DR_INIT (data_ref);
3343 2630775 : next = DR_GROUP_NEXT_ELEMENT (next);
3344 : /* Count the number of data-refs in the chain. */
3345 2630775 : count++;
3346 : }
3347 :
3348 1499343 : if (groupsize == 0)
3349 1429131 : groupsize = count + gaps;
3350 :
3351 : /* This could be UINT_MAX but as we are generating code in a very
3352 : inefficient way we have to cap earlier. See PR78699 for example. */
3353 1499343 : if (groupsize > 4096)
3354 : {
3355 1 : if (dump_enabled_p ())
3356 1 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3357 : "group is too large\n");
3358 1 : return false;
3359 : }
3360 :
3361 : /* Check that the size of the interleaving is equal to count for stores,
3362 : i.e., that there are no gaps. */
3363 1499342 : if (groupsize != count
3364 105924 : && !DR_IS_READ (dr))
3365 : {
3366 11368 : groupsize = count;
3367 11368 : STMT_VINFO_STRIDED_P (stmt_info) = true;
3368 : }
3369 :
3370 : /* If there is a gap after the last load in the group it is the
3371 : difference between the groupsize and the last accessed
3372 : element.
3373 : When there is no gap, this difference should be 0. */
3374 1499342 : DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
3375 :
3376 1499342 : DR_GROUP_SIZE (stmt_info) = groupsize;
3377 1499342 : if (dump_enabled_p ())
3378 : {
3379 8020 : dump_printf_loc (MSG_NOTE, vect_location,
3380 : "Detected interleaving ");
3381 8020 : if (DR_IS_READ (dr))
3382 4323 : dump_printf (MSG_NOTE, "load ");
3383 3697 : else if (STMT_VINFO_STRIDED_P (stmt_info))
3384 496 : dump_printf (MSG_NOTE, "strided store ");
3385 : else
3386 3201 : dump_printf (MSG_NOTE, "store ");
3387 8020 : dump_printf (MSG_NOTE, "of size %u\n",
3388 : (unsigned)groupsize);
3389 8020 : dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
3390 8020 : next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3391 39497 : while (next)
3392 : {
3393 31477 : if (DR_GROUP_GAP (next) != 1)
3394 317 : dump_printf_loc (MSG_NOTE, vect_location,
3395 : "\t<gap of %d elements>\n",
3396 317 : DR_GROUP_GAP (next) - 1);
3397 31477 : dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
3398 31477 : next = DR_GROUP_NEXT_ELEMENT (next);
3399 : }
3400 8020 : if (DR_GROUP_GAP (stmt_info) != 0)
3401 398 : dump_printf_loc (MSG_NOTE, vect_location,
3402 : "\t<gap of %d elements>\n",
3403 398 : DR_GROUP_GAP (stmt_info));
3404 : }
3405 :
3406 : /* SLP: create an SLP data structure for every interleaving group of
3407 : stores for further analysis in vect_analyse_slp. */
3408 1499342 : if (DR_IS_WRITE (dr) && !slp_impossible)
3409 : {
3410 920838 : if (loop_vinfo)
3411 29369 : LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt_info);
3412 920838 : if (bb_vinfo)
3413 891469 : BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3414 : }
3415 : }
3416 :
3417 : return true;
3418 : }
3419 :
3420 : /* Analyze groups of accesses: check that DR_INFO belongs to a group of
3421 : accesses of legal size, step, etc. Detect gaps, single element
3422 : interleaving, and other special cases. Set grouped access info.
3423 : Collect groups of strided stores for further use in SLP analysis. */
3424 :
3425 : static bool
3426 12500174 : vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
3427 : {
3428 12500174 : if (!vect_analyze_group_access_1 (vinfo, dr_info))
3429 : {
3430 : /* Dissolve the group if present. */
3431 561 : stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
3432 792 : while (stmt_info)
3433 : {
3434 231 : stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3435 231 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3436 231 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3437 231 : stmt_info = next;
3438 : }
3439 : return false;
3440 : }
3441 : return true;
3442 : }
3443 :
3444 : /* Analyze the access pattern of the data-reference DR_INFO.
3445 : In case of non-consecutive accesses call vect_analyze_group_access() to
3446 : analyze groups of accesses. */
3447 :
3448 : static bool
3449 13254921 : vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
3450 : {
3451 13254921 : data_reference *dr = dr_info->dr;
3452 13254921 : tree step = DR_STEP (dr);
3453 13254921 : tree scalar_type = TREE_TYPE (DR_REF (dr));
3454 13254921 : stmt_vec_info stmt_info = dr_info->stmt;
3455 13254921 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3456 13254921 : class loop *loop = NULL;
3457 :
3458 13254921 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
3459 : return true;
3460 :
3461 13154910 : if (loop_vinfo)
3462 968958 : loop = LOOP_VINFO_LOOP (loop_vinfo);
3463 :
3464 13154910 : if (loop_vinfo && !step)
3465 : {
3466 0 : if (dump_enabled_p ())
3467 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3468 : "bad data-ref access in loop\n");
3469 0 : return false;
3470 : }
3471 :
3472 : /* Allow loads with zero step in inner-loop vectorization. */
3473 13154910 : if (loop_vinfo && integer_zerop (step))
3474 : {
3475 14162 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3476 14162 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3477 14162 : if (!nested_in_vect_loop_p (loop, stmt_info))
3478 13901 : return DR_IS_READ (dr);
3479 : /* Allow references with zero step for outer loops marked
3480 : with pragma omp simd only - it guarantees absence of
3481 : loop-carried dependencies between inner loop iterations. */
3482 261 : if (loop->safelen < 2)
3483 : {
3484 225 : if (dump_enabled_p ())
3485 6 : dump_printf_loc (MSG_NOTE, vect_location,
3486 : "zero step in inner loop of nest\n");
3487 225 : return false;
3488 : }
3489 : }
3490 :
3491 13140748 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
3492 : {
3493 : /* Interleaved accesses are not yet supported within outer-loop
3494 : vectorization for references in the inner-loop. */
3495 5812 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3496 5812 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3497 :
3498 : /* For the rest of the analysis we use the outer-loop step. */
3499 5812 : step = STMT_VINFO_DR_STEP (stmt_info);
3500 5812 : if (integer_zerop (step))
3501 : {
3502 1290 : if (dump_enabled_p ())
3503 241 : dump_printf_loc (MSG_NOTE, vect_location,
3504 : "zero step in outer loop.\n");
3505 1290 : return DR_IS_READ (dr);
3506 : }
3507 : }
3508 :
3509 : /* Consecutive? */
3510 13139494 : if (TREE_CODE (step) == INTEGER_CST)
3511 : {
3512 13100216 : HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
3513 13100216 : if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
3514 13100216 : || (dr_step < 0
3515 28891 : && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
3516 : {
3517 : /* Mark that it is not interleaving. */
3518 606769 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3519 606769 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3520 606769 : return true;
3521 : }
3522 : }
3523 :
3524 12532725 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
3525 : {
3526 3330 : if (dump_enabled_p ())
3527 163 : dump_printf_loc (MSG_NOTE, vect_location,
3528 : "grouped access in outer loop.\n");
3529 3330 : return false;
3530 : }
3531 :
3532 :
3533 : /* Assume this is a DR handled by non-constant strided load case. */
3534 12529395 : if (TREE_CODE (step) != INTEGER_CST)
3535 38819 : return (STMT_VINFO_STRIDED_P (stmt_info)
3536 38819 : && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3537 9598 : || vect_analyze_group_access (vinfo, dr_info)));
3538 :
3539 : /* Not consecutive access - check if it's a part of interleaving group. */
3540 12490576 : return vect_analyze_group_access (vinfo, dr_info);
3541 : }
3542 :
3543 : /* Compare two data-references DRA and DRB to group them into chunks
3544 : suitable for grouping. */
3545 :
3546 : static int
3547 345861699 : dr_group_sort_cmp (const void *dra_, const void *drb_)
3548 : {
3549 345861699 : dr_vec_info *dra_info = *(dr_vec_info **)const_cast<void *>(dra_);
3550 345861699 : dr_vec_info *drb_info = *(dr_vec_info **)const_cast<void *>(drb_);
3551 345861699 : data_reference_p dra = dra_info->dr;
3552 345861699 : data_reference_p drb = drb_info->dr;
3553 345861699 : int cmp;
3554 :
3555 : /* Stabilize sort. */
3556 345861699 : if (dra == drb)
3557 : return 0;
3558 :
3559 : /* Different group IDs lead never belong to the same group. */
3560 345861699 : if (dra_info->group != drb_info->group)
3561 377032164 : return dra_info->group < drb_info->group ? -1 : 1;
3562 :
3563 : /* Ordering of DRs according to base. */
3564 97832872 : cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3565 : DR_BASE_ADDRESS (drb));
3566 97832872 : if (cmp != 0)
3567 : return cmp;
3568 :
3569 : /* And according to DR_OFFSET. */
3570 52980237 : cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
3571 52980237 : if (cmp != 0)
3572 : return cmp;
3573 :
3574 : /* Put reads before writes. */
3575 52629372 : if (DR_IS_READ (dra) != DR_IS_READ (drb))
3576 4265279 : return DR_IS_READ (dra) ? -1 : 1;
3577 :
3578 : /* Then sort after access size. */
3579 49727981 : cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
3580 49727981 : TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
3581 49727981 : if (cmp != 0)
3582 : return cmp;
3583 :
3584 : /* And after step. */
3585 43011253 : cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
3586 43011253 : if (cmp != 0)
3587 : return cmp;
3588 :
3589 : /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
3590 43003879 : cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
3591 43003879 : if (cmp == 0)
3592 497942 : return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
3593 : return cmp;
3594 : }
3595 :
3596 : /* If OP is the result of a conversion, return the unconverted value,
3597 : otherwise return null. */
3598 :
3599 : static tree
3600 402 : strip_conversion (tree op)
3601 : {
3602 402 : if (TREE_CODE (op) != SSA_NAME)
3603 : return NULL_TREE;
3604 402 : gimple *stmt = SSA_NAME_DEF_STMT (op);
3605 402 : if (!is_gimple_assign (stmt)
3606 402 : || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
3607 : return NULL_TREE;
3608 186 : return gimple_assign_rhs1 (stmt);
3609 : }
3610 :
3611 : /* Return true if vectorizable_* routines can handle statements STMT1_INFO
3612 : and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can
3613 : be grouped in SLP mode. */
3614 :
3615 : static bool
3616 7020828 : can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
3617 : bool allow_slp_p)
3618 : {
3619 7020828 : if (gimple_assign_single_p (stmt1_info->stmt))
3620 7018926 : return gimple_assign_single_p (stmt2_info->stmt);
3621 :
3622 1902 : gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
3623 1902 : if (call1 && gimple_call_internal_p (call1))
3624 : {
3625 : /* Check for two masked loads or two masked stores. */
3626 2155 : gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt);
3627 1886 : if (!call2 || !gimple_call_internal_p (call2))
3628 : return false;
3629 1886 : internal_fn ifn = gimple_call_internal_fn (call1);
3630 1886 : if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
3631 : return false;
3632 1886 : if (ifn != gimple_call_internal_fn (call2))
3633 : return false;
3634 :
3635 : /* Check that the masks are the same. Cope with casts of masks,
3636 : like those created by build_mask_conversion. */
3637 1886 : tree mask1 = gimple_call_arg (call1, 2);
3638 1886 : tree mask2 = gimple_call_arg (call2, 2);
3639 1886 : if (!operand_equal_p (mask1, mask2, 0) && !allow_slp_p)
3640 : {
3641 309 : mask1 = strip_conversion (mask1);
3642 309 : if (!mask1)
3643 : return false;
3644 93 : mask2 = strip_conversion (mask2);
3645 93 : if (!mask2)
3646 : return false;
3647 93 : if (!operand_equal_p (mask1, mask2, 0))
3648 : return false;
3649 : }
3650 1633 : return true;
3651 : }
3652 :
3653 : return false;
3654 : }
3655 :
3656 : /* Function vect_analyze_data_ref_accesses.
3657 :
3658 : Analyze the access pattern of all the data references in the loop.
3659 :
3660 : FORNOW: the only access pattern that is considered vectorizable is a
3661 : simple step 1 (consecutive) access.
3662 :
3663 : FORNOW: handle only arrays and pointer accesses. */
3664 :
3665 : opt_result
3666 2624173 : vect_analyze_data_ref_accesses (vec_info *vinfo,
3667 : vec<int> *dataref_groups)
3668 : {
3669 2624173 : unsigned int i;
3670 2624173 : vec<data_reference_p> datarefs = vinfo->shared->datarefs;
3671 :
3672 2624173 : DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
3673 :
3674 2624173 : if (datarefs.is_empty ())
3675 1048828 : return opt_result::success ();
3676 :
3677 : /* Sort the array of datarefs to make building the interleaving chains
3678 : linear. Don't modify the original vector's order, it is needed for
3679 : determining what dependencies are reversed. */
3680 1575345 : vec<dr_vec_info *> datarefs_copy;
3681 1575345 : datarefs_copy.create (datarefs.length ());
3682 16592463 : for (unsigned i = 0; i < datarefs.length (); i++)
3683 : {
3684 15017118 : dr_vec_info *dr_info = vinfo->lookup_dr (datarefs[i]);
3685 : /* If the caller computed DR grouping use that, otherwise group by
3686 : basic blocks. */
3687 15017118 : if (dataref_groups)
3688 13934131 : dr_info->group = (*dataref_groups)[i];
3689 : else
3690 1082987 : dr_info->group = gimple_bb (DR_STMT (datarefs[i]))->index;
3691 15017118 : datarefs_copy.quick_push (dr_info);
3692 : }
3693 1575345 : datarefs_copy.qsort (dr_group_sort_cmp);
3694 1575345 : hash_set<stmt_vec_info> to_fixup;
3695 :
3696 : /* Build the interleaving chains. */
3697 14169326 : for (i = 0; i < datarefs_copy.length () - 1;)
3698 : {
3699 11018636 : dr_vec_info *dr_info_a = datarefs_copy[i];
3700 11018636 : data_reference_p dra = dr_info_a->dr;
3701 11018636 : int dra_group_id = dr_info_a->group;
3702 11018636 : stmt_vec_info stmtinfo_a = dr_info_a->stmt;
3703 11018636 : stmt_vec_info lastinfo = NULL;
3704 11018636 : if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
3705 9406428 : || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
3706 : {
3707 1677107 : ++i;
3708 1677107 : continue;
3709 : }
3710 24600129 : for (i = i + 1; i < datarefs_copy.length (); ++i)
3711 : {
3712 11764666 : dr_vec_info *dr_info_b = datarefs_copy[i];
3713 11764666 : data_reference_p drb = dr_info_b->dr;
3714 11764666 : int drb_group_id = dr_info_b->group;
3715 11764666 : stmt_vec_info stmtinfo_b = dr_info_b->stmt;
3716 11764666 : if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
3717 11458902 : || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
3718 : break;
3719 :
3720 : /* ??? Imperfect sorting (non-compatible types, non-modulo
3721 : accesses, same accesses) can lead to a group to be artificially
3722 : split here as we don't just skip over those. If it really
3723 : matters we can push those to a worklist and re-iterate
3724 : over them. The we can just skip ahead to the next DR here. */
3725 :
3726 : /* DRs in a different DR group should not be put into the same
3727 : interleaving group. */
3728 11455281 : if (dra_group_id != drb_group_id)
3729 : break;
3730 :
3731 : /* Check that the data-refs have same first location (except init)
3732 : and they are both either store or load (not load and store,
3733 : not masked loads or stores). */
3734 7287543 : if (DR_IS_READ (dra) != DR_IS_READ (drb)
3735 5993262 : || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3736 : DR_BASE_ADDRESS (drb)) != 0
3737 4375626 : || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
3738 11644174 : || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
3739 : break;
3740 :
3741 : /* Check that the data-refs have the same constant size. */
3742 4356606 : tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
3743 4356606 : tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
3744 4356606 : if (!tree_fits_uhwi_p (sza)
3745 4356606 : || !tree_fits_uhwi_p (szb)
3746 8713212 : || !tree_int_cst_equal (sza, szb))
3747 : break;
3748 :
3749 : /* Check that the data-refs have the same step. */
3750 4011781 : if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
3751 : break;
3752 :
3753 : /* Check the types are compatible.
3754 : ??? We don't distinguish this during sorting. */
3755 4011061 : if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
3756 4011061 : TREE_TYPE (DR_REF (drb))))
3757 : break;
3758 :
3759 : /* Check that the DR_INITs are compile-time constants. */
3760 2875753 : if (!tree_fits_shwi_p (DR_INIT (dra))
3761 2875753 : || !tree_fits_shwi_p (DR_INIT (drb)))
3762 : break;
3763 :
3764 : /* Different .GOMP_SIMD_LANE calls still give the same lane,
3765 : just hold extra information. */
3766 2875753 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
3767 1240 : && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
3768 2876993 : && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
3769 : break;
3770 :
3771 : /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
3772 2874513 : HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
3773 2874513 : HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
3774 2874513 : HOST_WIDE_INT init_prev
3775 2874513 : = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]->dr));
3776 2874513 : gcc_assert (init_a <= init_b
3777 : && init_a <= init_prev
3778 : && init_prev <= init_b);
3779 :
3780 : /* Do not place the same access in the interleaving chain twice. */
3781 2874513 : if (init_b == init_prev)
3782 : {
3783 29902 : gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]->dr))
3784 : < gimple_uid (DR_STMT (drb)));
3785 : /* Simply link in duplicates and fix up the chain below. */
3786 : }
3787 : else
3788 : {
3789 : /* If init_b == init_a + the size of the type * k, we have an
3790 : interleaving, and DRA is accessed before DRB. */
3791 2844611 : unsigned HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
3792 2844611 : if (type_size_a == 0
3793 2844611 : || (((unsigned HOST_WIDE_INT)init_b - init_a)
3794 2844611 : % type_size_a != 0))
3795 : break;
3796 :
3797 : /* If we have a store, the accesses are adjacent. This splits
3798 : groups into chunks we support (we don't support vectorization
3799 : of stores with gaps). */
3800 2842885 : if (!DR_IS_READ (dra)
3801 1865366 : && (((unsigned HOST_WIDE_INT)init_b - init_prev)
3802 : != type_size_a))
3803 : break;
3804 :
3805 : /* For datarefs with big gap, it's better to split them into different
3806 : groups.
3807 : .i.e a[0], a[1], a[2], .. a[7], a[100], a[101],..., a[107] */
3808 2663537 : if ((unsigned HOST_WIDE_INT)(init_b - init_prev)
3809 : > MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT)
3810 : break;
3811 :
3812 : /* If the step (if not zero or non-constant) is smaller than the
3813 : difference between data-refs' inits this splits groups into
3814 : suitable sizes. */
3815 2653944 : if (tree_fits_shwi_p (DR_STEP (dra)))
3816 : {
3817 2647636 : unsigned HOST_WIDE_INT step
3818 2647636 : = absu_hwi (tree_to_shwi (DR_STEP (dra)));
3819 2647636 : if (step != 0
3820 164399 : && step <= ((unsigned HOST_WIDE_INT)init_b - init_a))
3821 : break;
3822 : }
3823 : }
3824 :
3825 2664619 : if (dump_enabled_p ())
3826 32385 : dump_printf_loc (MSG_NOTE, vect_location,
3827 32385 : DR_IS_READ (dra)
3828 : ? "Detected interleaving load %T and %T\n"
3829 : : "Detected interleaving store %T and %T\n",
3830 : DR_REF (dra), DR_REF (drb));
3831 :
3832 : /* Link the found element into the group list. */
3833 2664619 : if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3834 : {
3835 1477725 : DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
3836 1477725 : lastinfo = stmtinfo_a;
3837 : }
3838 2664619 : DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
3839 2664619 : DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
3840 2664619 : lastinfo = stmtinfo_b;
3841 :
3842 2664619 : if (! STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3843 : {
3844 2664197 : STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
3845 2664197 : = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
3846 :
3847 2664197 : if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3848 126 : dump_printf_loc (MSG_NOTE, vect_location,
3849 : "Load suitable for SLP vectorization only.\n");
3850 : }
3851 :
3852 2664619 : if (init_b == init_prev
3853 29902 : && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3854 2681129 : && dump_enabled_p ())
3855 213 : dump_printf_loc (MSG_NOTE, vect_location,
3856 : "Queuing group with duplicate access for fixup\n");
3857 : }
3858 : }
3859 :
3860 : /* Fixup groups with duplicate entries by splitting it. */
3861 1618511 : while (1)
3862 : {
3863 1618511 : hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
3864 1618511 : if (!(it != to_fixup.end ()))
3865 : break;
3866 43166 : stmt_vec_info grp = *it;
3867 43166 : to_fixup.remove (grp);
3868 :
3869 : /* Find the earliest duplicate group member. */
3870 43166 : unsigned first_duplicate = -1u;
3871 43166 : stmt_vec_info next, g = grp;
3872 276908 : while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3873 : {
3874 190576 : if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
3875 190576 : DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
3876 190576 : && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
3877 : first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
3878 : g = next;
3879 : }
3880 43166 : if (first_duplicate == -1U)
3881 16510 : continue;
3882 :
3883 : /* Then move all stmts after the first duplicate to a new group.
3884 : Note this is a heuristic but one with the property that *it
3885 : is fixed up completely. */
3886 26656 : g = grp;
3887 26656 : stmt_vec_info newgroup = NULL, ng = grp;
3888 241443 : while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3889 : {
3890 188131 : if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
3891 : {
3892 181929 : DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
3893 181929 : if (!newgroup)
3894 : {
3895 26656 : newgroup = next;
3896 26656 : STMT_VINFO_SLP_VECT_ONLY (newgroup)
3897 26656 : = STMT_VINFO_SLP_VECT_ONLY (grp);
3898 : }
3899 : else
3900 155273 : DR_GROUP_NEXT_ELEMENT (ng) = next;
3901 181929 : ng = next;
3902 181929 : DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
3903 : }
3904 : else
3905 : g = DR_GROUP_NEXT_ELEMENT (g);
3906 : }
3907 26656 : DR_GROUP_NEXT_ELEMENT (ng) = NULL;
3908 :
3909 : /* Fixup the new group which still may contain duplicates. */
3910 26656 : to_fixup.add (newgroup);
3911 : }
3912 :
3913 1575345 : dr_vec_info *dr_info;
3914 16570435 : FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)
3915 : {
3916 15003100 : if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
3917 15003100 : && !vect_analyze_data_ref_access (vinfo, dr_info))
3918 : {
3919 8064 : if (dump_enabled_p ())
3920 292 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3921 : "not vectorized: complicated access pattern.\n");
3922 :
3923 8064 : if (is_a <bb_vec_info> (vinfo))
3924 : {
3925 : /* Mark the statement as not vectorizable. */
3926 54 : STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
3927 54 : continue;
3928 : }
3929 : else
3930 : {
3931 8010 : datarefs_copy.release ();
3932 8010 : return opt_result::failure_at (dr_info->stmt->stmt,
3933 : "not vectorized:"
3934 : " complicated access pattern.\n");
3935 : }
3936 : }
3937 : }
3938 :
3939 1567335 : datarefs_copy.release ();
3940 1567335 : return opt_result::success ();
3941 1575345 : }
3942 :
3943 : /* Function vect_vfa_segment_size.
3944 :
3945 : Input:
3946 : DR_INFO: The data reference.
3947 : LENGTH_FACTOR: segment length to consider.
3948 :
3949 : Return a value suitable for the dr_with_seg_len::seg_len field.
3950 : This is the "distance travelled" by the pointer from the first
3951 : iteration in the segment to the last. Note that it does not include
3952 : the size of the access; in effect it only describes the first byte. */
3953 :
3954 : static tree
3955 146308 : vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
3956 : {
3957 146308 : length_factor = size_binop (MINUS_EXPR,
3958 : fold_convert (sizetype, length_factor),
3959 : size_one_node);
3960 146308 : return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
3961 : length_factor);
3962 : }
3963 :
3964 : /* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),
3965 : gives the worst-case number of bytes covered by the segment. */
3966 :
3967 : static unsigned HOST_WIDE_INT
3968 146790 : vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info)
3969 : {
3970 146790 : stmt_vec_info stmt_vinfo = dr_info->stmt;
3971 146790 : tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
3972 146790 : unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
3973 146790 : unsigned HOST_WIDE_INT access_size = ref_size;
3974 146790 : if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
3975 : {
3976 42341 : gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
3977 42341 : access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
3978 : }
3979 146790 : tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3980 146790 : int misalignment;
3981 293580 : if (((misalignment = dr_misalignment (dr_info, vectype)), true)
3982 146790 : && (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
3983 : == dr_explicit_realign_optimized))
3984 : {
3985 : /* We might access a full vector's worth. */
3986 0 : access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
3987 : }
3988 146790 : return access_size;
3989 : }
3990 :
3991 : /* Get the minimum alignment for all the scalar accesses that DR_INFO
3992 : describes. */
3993 :
3994 : static unsigned int
3995 146790 : vect_vfa_align (dr_vec_info *dr_info)
3996 : {
3997 0 : return dr_alignment (dr_info->dr);
3998 : }
3999 :
4000 : /* Function vect_no_alias_p.
4001 :
4002 : Given data references A and B with equal base and offset, see whether
4003 : the alias relation can be decided at compilation time. Return 1 if
4004 : it can and the references alias, 0 if it can and the references do
4005 : not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A,
4006 : SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
4007 : of dr_with_seg_len::{seg_len,access_size} for A and B. */
4008 :
4009 : static int
4010 4344 : vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b,
4011 : tree segment_length_a, tree segment_length_b,
4012 : unsigned HOST_WIDE_INT access_size_a,
4013 : unsigned HOST_WIDE_INT access_size_b)
4014 : {
4015 4344 : poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
4016 4344 : poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
4017 4344 : poly_uint64 const_length_a;
4018 4344 : poly_uint64 const_length_b;
4019 :
4020 : /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
4021 : bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
4022 : [a, a+12) */
4023 4344 : if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < 0)
4024 : {
4025 250 : const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
4026 250 : offset_a -= const_length_a;
4027 : }
4028 : else
4029 4094 : const_length_a = tree_to_poly_uint64 (segment_length_a);
4030 4344 : if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < 0)
4031 : {
4032 408 : const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
4033 408 : offset_b -= const_length_b;
4034 : }
4035 : else
4036 3936 : const_length_b = tree_to_poly_uint64 (segment_length_b);
4037 :
4038 4344 : const_length_a += access_size_a;
4039 4344 : const_length_b += access_size_b;
4040 :
4041 4344 : if (ranges_known_overlap_p (offset_a, const_length_a,
4042 : offset_b, const_length_b))
4043 : return 1;
4044 :
4045 536 : if (!ranges_maybe_overlap_p (offset_a, const_length_a,
4046 : offset_b, const_length_b))
4047 536 : return 0;
4048 :
4049 : return -1;
4050 : }
4051 :
4052 : /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
4053 : in DDR is >= VF. */
4054 :
4055 : static bool
4056 86331 : dependence_distance_ge_vf (data_dependence_relation *ddr,
4057 : unsigned int loop_depth, poly_uint64 vf)
4058 : {
4059 86331 : if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
4060 91338 : || DDR_NUM_DIST_VECTS (ddr) == 0)
4061 : return false;
4062 :
4063 : /* If the dependence is exact, we should have limited the VF instead. */
4064 5042 : gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
4065 :
4066 : unsigned int i;
4067 : lambda_vector dist_v;
4068 10115 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4069 : {
4070 10080 : HOST_WIDE_INT dist = dist_v[loop_depth];
4071 10080 : if (dist != 0
4072 5042 : && !(dist > 0 && DDR_REVERSED_P (ddr))
4073 15122 : && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
4074 : return false;
4075 : }
4076 :
4077 35 : if (dump_enabled_p ())
4078 2 : dump_printf_loc (MSG_NOTE, vect_location,
4079 : "dependence distance between %T and %T is >= VF\n",
4080 2 : DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
4081 :
4082 : return true;
4083 : }
4084 :
4085 : /* Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. */
4086 :
4087 : static void
4088 437 : dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
4089 : {
4090 437 : dump_printf (dump_kind, "%s (%T) >= ",
4091 437 : lower_bound.unsigned_p ? "unsigned" : "abs",
4092 437 : lower_bound.expr);
4093 437 : dump_dec (dump_kind, lower_bound.min_value);
4094 437 : }
4095 :
4096 : /* Record that the vectorized loop requires the vec_lower_bound described
4097 : by EXPR, UNSIGNED_P and MIN_VALUE. */
4098 :
4099 : static void
4100 6684 : vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
4101 : poly_uint64 min_value)
4102 : {
4103 6684 : vec<vec_lower_bound> &lower_bounds
4104 : = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
4105 7656 : for (unsigned int i = 0; i < lower_bounds.length (); ++i)
4106 5892 : if (operand_equal_p (lower_bounds[i].expr, expr, 0))
4107 : {
4108 4920 : unsigned_p &= lower_bounds[i].unsigned_p;
4109 4920 : min_value = upper_bound (lower_bounds[i].min_value, min_value);
4110 4920 : if (lower_bounds[i].unsigned_p != unsigned_p
4111 4920 : || maybe_lt (lower_bounds[i].min_value, min_value))
4112 : {
4113 798 : lower_bounds[i].unsigned_p = unsigned_p;
4114 798 : lower_bounds[i].min_value = min_value;
4115 798 : if (dump_enabled_p ())
4116 : {
4117 250 : dump_printf_loc (MSG_NOTE, vect_location,
4118 : "updating run-time check to ");
4119 250 : dump_lower_bound (MSG_NOTE, lower_bounds[i]);
4120 250 : dump_printf (MSG_NOTE, "\n");
4121 : }
4122 : }
4123 4920 : return;
4124 : }
4125 :
4126 1764 : vec_lower_bound lower_bound (expr, unsigned_p, min_value);
4127 1764 : if (dump_enabled_p ())
4128 : {
4129 187 : dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
4130 187 : dump_lower_bound (MSG_NOTE, lower_bound);
4131 187 : dump_printf (MSG_NOTE, "\n");
4132 : }
4133 1764 : LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
4134 : }
4135 :
4136 : /* Return true if it's unlikely that the step of the vectorized form of DR_INFO
4137 : will span fewer than GAP bytes. */
4138 :
4139 : static bool
4140 5348 : vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
4141 : poly_int64 gap)
4142 : {
4143 5348 : stmt_vec_info stmt_info = dr_info->stmt;
4144 5348 : HOST_WIDE_INT count
4145 5348 : = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
4146 5348 : if (DR_GROUP_FIRST_ELEMENT (stmt_info))
4147 4588 : count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
4148 5348 : return (estimated_poly_value (gap)
4149 5348 : <= count * vect_get_scalar_dr_size (dr_info));
4150 : }
4151 :
4152 : /* Return true if we know that there is no alias between DR_INFO_A and
4153 : DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
4154 : When returning true, set *LOWER_BOUND_OUT to this N. */
4155 :
4156 : static bool
4157 19466 : vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b,
4158 : poly_uint64 *lower_bound_out)
4159 : {
4160 : /* Check that there is a constant gap of known sign between DR_A
4161 : and DR_B. */
4162 19466 : data_reference *dr_a = dr_info_a->dr;
4163 19466 : data_reference *dr_b = dr_info_b->dr;
4164 19466 : poly_int64 init_a, init_b;
4165 19466 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
4166 8828 : || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
4167 8142 : || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
4168 8132 : || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
4169 8132 : || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
4170 19466 : || !ordered_p (init_a, init_b))
4171 11334 : return false;
4172 :
4173 : /* Sort DR_A and DR_B by the address they access. */
4174 8132 : if (maybe_lt (init_b, init_a))
4175 : {
4176 116 : std::swap (init_a, init_b);
4177 116 : std::swap (dr_info_a, dr_info_b);
4178 116 : std::swap (dr_a, dr_b);
4179 : }
4180 :
4181 : /* If the two accesses could be dependent within a scalar iteration,
4182 : make sure that we'd retain their order. */
4183 8132 : if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
4184 8132 : && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
4185 : return false;
4186 :
4187 : /* There is no alias if abs (DR_STEP) is greater than or equal to
4188 : the bytes spanned by the combination of the two accesses. */
4189 8132 : *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a;
4190 8132 : return true;
4191 : }
4192 :
4193 : /* Function vect_prune_runtime_alias_test_list.
4194 :
4195 : Prune a list of ddrs to be tested at run-time by versioning for alias.
4196 : Merge several alias checks into one if possible.
4197 : Return FALSE if resulting list of ddrs is longer then allowed by
4198 : PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
4199 :
4200 : opt_result
4201 412326 : vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
4202 : {
4203 412326 : typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
4204 412326 : hash_set <tree_pair_hash> compared_objects;
4205 :
4206 412326 : const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
4207 412326 : vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
4208 : = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
4209 412326 : const vec<vec_object_pair> &check_unequal_addrs
4210 : = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
4211 412326 : poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4212 412326 : tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
4213 :
4214 412326 : ddr_p ddr;
4215 412326 : unsigned int i;
4216 412326 : tree length_factor;
4217 :
4218 412326 : DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
4219 :
4220 : /* Step values are irrelevant for aliasing if the number of vector
4221 : iterations is equal to the number of scalar iterations (which can
4222 : happen for fully-SLP loops). */
4223 412326 : bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
4224 :
4225 412326 : if (!vf_one_p)
4226 : {
4227 : /* Convert the checks for nonzero steps into bound tests. */
4228 : tree value;
4229 409098 : FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
4230 1693 : vect_check_lower_bound (loop_vinfo, value, true, 1);
4231 : }
4232 :
4233 412326 : if (may_alias_ddrs.is_empty ())
4234 385941 : return opt_result::success ();
4235 :
4236 26385 : comp_alias_ddrs.create (may_alias_ddrs.length ());
4237 :
4238 26385 : unsigned int loop_depth
4239 26385 : = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
4240 26385 : LOOP_VINFO_LOOP_NEST (loop_vinfo));
4241 :
4242 : /* First, we collect all data ref pairs for aliasing checks. */
4243 108896 : FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
4244 : {
4245 86331 : poly_uint64 lower_bound;
4246 86331 : tree segment_length_a, segment_length_b;
4247 86331 : unsigned HOST_WIDE_INT access_size_a, access_size_b;
4248 86331 : unsigned HOST_WIDE_INT align_a, align_b;
4249 :
4250 : /* Ignore the alias if the VF we chose ended up being no greater
4251 : than the dependence distance. */
4252 86331 : if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
4253 13460 : continue;
4254 :
4255 86296 : if (DDR_OBJECT_A (ddr))
4256 : {
4257 106 : vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
4258 106 : if (!compared_objects.add (new_pair))
4259 : {
4260 22 : if (dump_enabled_p ())
4261 16 : dump_printf_loc (MSG_NOTE, vect_location,
4262 : "checking that %T and %T"
4263 : " have different addresses\n",
4264 : new_pair.first, new_pair.second);
4265 22 : LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
4266 : }
4267 106 : continue;
4268 106 : }
4269 :
4270 86190 : dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
4271 86190 : stmt_vec_info stmt_info_a = dr_info_a->stmt;
4272 :
4273 86190 : dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
4274 86190 : stmt_vec_info stmt_info_b = dr_info_b->stmt;
4275 :
4276 86190 : bool preserves_scalar_order_p
4277 86190 : = vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
4278 86190 : bool ignore_step_p
4279 : = (vf_one_p
4280 86190 : && (preserves_scalar_order_p
4281 4037 : || operand_equal_p (DR_STEP (dr_info_a->dr),
4282 4037 : DR_STEP (dr_info_b->dr))));
4283 :
4284 : /* Skip the pair if inter-iteration dependencies are irrelevant
4285 : and intra-iteration dependencies are guaranteed to be honored. */
4286 15825 : if (ignore_step_p
4287 8033 : && (preserves_scalar_order_p
4288 3318 : || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
4289 : &lower_bound)))
4290 : {
4291 7792 : if (dump_enabled_p ())
4292 2528 : dump_printf_loc (MSG_NOTE, vect_location,
4293 : "no need for alias check between "
4294 : "%T and %T when VF is 1\n",
4295 2528 : DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
4296 7792 : continue;
4297 : }
4298 :
4299 : /* See whether we can handle the alias using a bounds check on
4300 : the step, and whether that's likely to be the best approach.
4301 : (It might not be, for example, if the minimum step is much larger
4302 : than the number of bytes handled by one vector iteration.) */
4303 78398 : if (!ignore_step_p
4304 78157 : && TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
4305 16148 : && vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
4306 : &lower_bound)
4307 83453 : && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound)
4308 293 : || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound)))
4309 : {
4310 4991 : bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
4311 4991 : if (dump_enabled_p ())
4312 : {
4313 3384 : dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
4314 : "%T and %T when the step %T is outside ",
4315 : DR_REF (dr_info_a->dr),
4316 1692 : DR_REF (dr_info_b->dr),
4317 1692 : DR_STEP (dr_info_a->dr));
4318 1692 : if (unsigned_p)
4319 504 : dump_printf (MSG_NOTE, "[0");
4320 : else
4321 : {
4322 1188 : dump_printf (MSG_NOTE, "(");
4323 1188 : dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
4324 : }
4325 1692 : dump_printf (MSG_NOTE, ", ");
4326 1692 : dump_dec (MSG_NOTE, lower_bound);
4327 1692 : dump_printf (MSG_NOTE, ")\n");
4328 : }
4329 4991 : vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
4330 : unsigned_p, lower_bound);
4331 4991 : continue;
4332 4991 : }
4333 :
4334 73407 : stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
4335 73407 : if (dr_group_first_a)
4336 : {
4337 20948 : stmt_info_a = dr_group_first_a;
4338 20948 : dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
4339 : }
4340 :
4341 73407 : stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
4342 73407 : if (dr_group_first_b)
4343 : {
4344 21393 : stmt_info_b = dr_group_first_b;
4345 21393 : dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
4346 : }
4347 :
4348 73407 : if (ignore_step_p)
4349 : {
4350 241 : segment_length_a = size_zero_node;
4351 241 : segment_length_b = size_zero_node;
4352 : }
4353 : else
4354 : {
4355 73166 : if (!operand_equal_p (DR_STEP (dr_info_a->dr),
4356 73166 : DR_STEP (dr_info_b->dr), 0))
4357 : {
4358 16209 : length_factor = scalar_loop_iters;
4359 16209 : if (TREE_CODE (length_factor) == SCEV_NOT_KNOWN)
4360 12 : return opt_result::failure_at (vect_location,
4361 : "Unsupported alias check on"
4362 : " uncounted loop\n");
4363 : }
4364 : else
4365 56957 : length_factor = size_int (vect_factor);
4366 73154 : segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
4367 73154 : segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
4368 : }
4369 73395 : access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a);
4370 73395 : access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b);
4371 73395 : align_a = vect_vfa_align (dr_info_a);
4372 73395 : align_b = vect_vfa_align (dr_info_b);
4373 :
4374 : /* See whether the alias is known at compilation time. */
4375 73395 : if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
4376 73395 : DR_BASE_ADDRESS (dr_info_b->dr), 0)
4377 6184 : && operand_equal_p (DR_OFFSET (dr_info_a->dr),
4378 6184 : DR_OFFSET (dr_info_b->dr), 0)
4379 4492 : && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
4380 4418 : && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
4381 4408 : && poly_int_tree_p (segment_length_a)
4382 77761 : && poly_int_tree_p (segment_length_b))
4383 : {
4384 4344 : int res = vect_compile_time_alias (dr_info_a, dr_info_b,
4385 : segment_length_a,
4386 : segment_length_b,
4387 : access_size_a,
4388 : access_size_b);
4389 4344 : if (res >= 0 && dump_enabled_p ())
4390 : {
4391 208 : dump_printf_loc (MSG_NOTE, vect_location,
4392 : "can tell at compile time that %T and %T",
4393 104 : DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
4394 104 : if (res == 0)
4395 57 : dump_printf (MSG_NOTE, " do not alias\n");
4396 : else
4397 47 : dump_printf (MSG_NOTE, " alias\n");
4398 : }
4399 :
4400 4344 : if (res == 0)
4401 536 : continue;
4402 :
4403 3808 : if (res == 1)
4404 3808 : return opt_result::failure_at (stmt_info_b->stmt,
4405 : "not vectorized:"
4406 : " compilation time alias: %G%G",
4407 : stmt_info_a->stmt,
4408 : stmt_info_b->stmt);
4409 : }
4410 :
4411 : /* dr_with_seg_len requires the alignment to apply to the segment length
4412 : and access size, not just the start address. The access size can be
4413 : smaller than the pointer alignment for grouped accesses and bitfield
4414 : references; see PR115192 and PR116125 respectively. */
4415 69051 : align_a = std::min (align_a, least_bit_hwi (access_size_a));
4416 69051 : align_b = std::min (align_b, least_bit_hwi (access_size_b));
4417 :
4418 69051 : dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
4419 69051 : access_size_a, align_a);
4420 69051 : dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
4421 69051 : access_size_b, align_b);
4422 : /* Canonicalize the order to be the one that's needed for accurate
4423 : RAW, WAR and WAW flags, in cases where the data references are
4424 : well-ordered. The order doesn't really matter otherwise,
4425 : but we might as well be consistent. */
4426 69051 : if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
4427 5190 : std::swap (dr_a, dr_b);
4428 :
4429 69051 : dr_with_seg_len_pair_t dr_with_seg_len_pair
4430 : (dr_a, dr_b, (preserves_scalar_order_p
4431 : ? dr_with_seg_len_pair_t::WELL_ORDERED
4432 75894 : : dr_with_seg_len_pair_t::REORDERED));
4433 :
4434 69051 : comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
4435 : }
4436 :
4437 22565 : prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
4438 :
4439 45130 : unsigned int count = (comp_alias_ddrs.length ()
4440 22565 : + check_unequal_addrs.length ());
4441 :
4442 22565 : if (count
4443 22565 : && (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
4444 : == VECT_COST_MODEL_VERY_CHEAP))
4445 12757 : return opt_result::failure_at
4446 12757 : (vect_location, "would need a runtime alias check\n");
4447 :
4448 9808 : if (dump_enabled_p ())
4449 1946 : dump_printf_loc (MSG_NOTE, vect_location,
4450 : "improved number of alias checks from %d to %d\n",
4451 : may_alias_ddrs.length (), count);
4452 9808 : unsigned limit = param_vect_max_version_for_alias_checks;
4453 9808 : if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)) == VECT_COST_MODEL_CHEAP)
4454 936 : limit = param_vect_max_version_for_alias_checks * 6 / 10;
4455 9808 : if (count > limit)
4456 162 : return opt_result::failure_at
4457 162 : (vect_location,
4458 : "number of versioning for alias run-time tests exceeds %d "
4459 : "(--param vect-max-version-for-alias-checks)\n", limit);
4460 :
4461 9646 : return opt_result::success ();
4462 412326 : }
4463 :
4464 : /* Structure to hold information about a supported gather/scatter
4465 : configuration. */
4466 : struct gather_scatter_config
4467 : {
4468 : internal_fn ifn;
4469 : tree offset_vectype;
4470 : int scale;
4471 : vec<int> elsvals;
4472 : };
4473 :
4474 : /* Determine which gather/scatter IFN is supported for the given parameters.
4475 : IFN_MASK_GATHER_LOAD, IFN_GATHER_LOAD, and IFN_MASK_LEN_GATHER_LOAD
4476 : are mutually exclusive, so we only need to find one. Return the
4477 : supported IFN or IFN_LAST if none are supported. */
4478 :
4479 : static internal_fn
4480 1173436 : vect_gather_scatter_which_ifn (bool read_p, bool masked_p,
4481 : tree vectype, tree memory_type,
4482 : tree offset_vectype, int scale,
4483 : vec<int> *elsvals)
4484 : {
4485 : /* Work out which functions to try. */
4486 1173436 : internal_fn ifn, alt_ifn, alt_ifn2;
4487 1173436 : if (read_p)
4488 : {
4489 876692 : ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
4490 : alt_ifn = IFN_MASK_GATHER_LOAD;
4491 : alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
4492 : }
4493 : else
4494 : {
4495 296744 : ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
4496 : alt_ifn = IFN_MASK_SCATTER_STORE;
4497 : alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
4498 : }
4499 :
4500 1173436 : if (!offset_vectype)
4501 : return IFN_LAST;
4502 :
4503 1173436 : if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
4504 : offset_vectype, scale, elsvals))
4505 : return ifn;
4506 1173436 : if (internal_gather_scatter_fn_supported_p (alt_ifn, vectype, memory_type,
4507 : offset_vectype, scale, elsvals))
4508 : return alt_ifn;
4509 1173436 : if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, memory_type,
4510 : offset_vectype, scale, elsvals))
4511 : return alt_ifn2;
4512 :
4513 : return IFN_LAST;
4514 : }
4515 :
4516 : /* Collect all supported offset vector types for a gather load or scatter
4517 : store. READ_P is true for loads and false for stores. MASKED_P is true
4518 : if the load or store is conditional. VECTYPE is the data vector type.
4519 : MEMORY_TYPE is the type of the memory elements being loaded or stored,
4520 : and OFFSET_TYPE is the type of the offset.
4521 : SCALE is the amount by which the offset should be multiplied.
4522 :
4523 : Return a vector of all configurations the target supports (which can
4524 : be none). */
4525 :
4526 : static auto_vec<gather_scatter_config>
4527 84449 : vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
4528 : tree vectype, tree memory_type,
4529 : tree offset_type, int scale)
4530 : {
4531 84449 : auto_vec<gather_scatter_config> configs;
4532 :
4533 84449 : auto_vec<tree, 8> offset_types_to_try;
4534 :
4535 : /* Try all sizes from the offset type's precision up to POINTER_SIZE. */
4536 84449 : for (unsigned int bits = TYPE_PRECISION (offset_type);
4537 395480 : bits <= POINTER_SIZE;
4538 297853 : bits *= 2)
4539 : {
4540 : /* Signed variant. */
4541 297853 : offset_types_to_try.safe_push
4542 297853 : (build_nonstandard_integer_type (bits, 0));
4543 : /* Unsigned variant. */
4544 297853 : offset_types_to_try.safe_push
4545 297853 : (build_nonstandard_integer_type (bits, 1));
4546 : }
4547 :
4548 : /* Once we find which IFN works for one offset type, we know that it
4549 : will work for other offset types as well. Then we can perform
4550 : the checks for the remaining offset types with only that IFN.
4551 : However, we might need to try different offset types to find which
4552 : IFN is supported, since the check is offset-type-specific. */
4553 : internal_fn ifn = IFN_LAST;
4554 :
4555 : /* Try each offset type. */
4556 680155 : for (unsigned int i = 0; i < offset_types_to_try.length (); i++)
4557 : {
4558 595706 : tree offset_type = offset_types_to_try[i];
4559 595706 : tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
4560 595706 : if (!offset_vectype)
4561 10158 : continue;
4562 :
4563 : /* Try multiple scale values. Start with exact match, then try
4564 : smaller common scales that a target might support . */
4565 585548 : int scales_to_try[] = {scale, 1, 2, 4, 8};
4566 :
4567 3513288 : for (unsigned int j = 0;
4568 3513288 : j < sizeof (scales_to_try) / sizeof (*scales_to_try);
4569 : j++)
4570 : {
4571 2927740 : int try_scale = scales_to_try[j];
4572 :
4573 : /* Skip scales >= requested scale (except for exact match). */
4574 2927740 : if (j > 0 && try_scale >= scale)
4575 1754304 : continue;
4576 :
4577 : /* Skip if requested scale is not a multiple of this scale. */
4578 1173580 : if (j > 0 && scale % try_scale != 0)
4579 144 : continue;
4580 :
4581 1173436 : vec<int> elsvals = vNULL;
4582 :
4583 : /* If we haven't determined which IFN is supported yet, try all three
4584 : to find which one the target supports. */
4585 1173436 : if (ifn == IFN_LAST)
4586 : {
4587 1173436 : ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
4588 : vectype, memory_type,
4589 : offset_vectype, try_scale,
4590 : &elsvals);
4591 1173436 : if (ifn != IFN_LAST)
4592 : {
4593 : /* Found which IFN is supported. Save this configuration. */
4594 0 : gather_scatter_config config;
4595 0 : config.ifn = ifn;
4596 0 : config.offset_vectype = offset_vectype;
4597 0 : config.scale = try_scale;
4598 0 : config.elsvals = elsvals;
4599 0 : configs.safe_push (config);
4600 : }
4601 : }
4602 : else
4603 : {
4604 : /* We already know which IFN is supported, just check if this
4605 : offset type and scale work with it. */
4606 0 : if (internal_gather_scatter_fn_supported_p (ifn, vectype,
4607 : memory_type,
4608 : offset_vectype,
4609 : try_scale,
4610 : &elsvals))
4611 : {
4612 0 : gather_scatter_config config;
4613 0 : config.ifn = ifn;
4614 0 : config.offset_vectype = offset_vectype;
4615 0 : config.scale = try_scale;
4616 0 : config.elsvals = elsvals;
4617 0 : configs.safe_push (config);
4618 : }
4619 : }
4620 : }
4621 : }
4622 :
4623 84449 : return configs;
4624 84449 : }
4625 :
4626 : /* Check whether we can use an internal function for a gather load
4627 : or scatter store. READ_P is true for loads and false for stores.
4628 : MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
4629 : the type of the memory elements being loaded or stored. OFFSET_TYPE
4630 : is the type of the offset that is being applied to the invariant
4631 : base address. If OFFSET_TYPE is scalar the function chooses an
4632 : appropriate vector type for it. SCALE is the amount by which the
4633 : offset should be multiplied *after* it has been converted to address width.
4634 : If the target does not support the requested SCALE, SUPPORTED_SCALE
4635 : will contain the scale that is actually supported
4636 : (which may be smaller, requiring additional multiplication).
4637 : Otherwise SUPPORTED_SCALE is 0.
4638 :
4639 : Return true if the function is supported, storing the function id in
4640 : *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
4641 : If we support an offset vector type with different signedness than
4642 : OFFSET_TYPE store it in SUPPORTED_OFFSET_VECTYPE.
4643 :
4644 : If we can use gather/scatter and ELSVALS is nonzero, store the possible
4645 : else values in ELSVALS. */
4646 :
4647 : bool
4648 84449 : vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
4649 : tree vectype, tree memory_type, tree offset_type,
4650 : int scale, int *supported_scale,
4651 : internal_fn *ifn_out,
4652 : tree *offset_vectype_out,
4653 : tree *supported_offset_vectype,
4654 : vec<int> *elsvals)
4655 : {
4656 84449 : *supported_offset_vectype = NULL_TREE;
4657 84449 : *supported_scale = 0;
4658 84449 : unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
4659 84449 : unsigned int element_bits = vector_element_bits (vectype);
4660 84449 : if (element_bits != memory_bits)
4661 : /* For now the vector elements must be the same width as the
4662 : memory elements. */
4663 : return false;
4664 :
4665 : /* Get the original offset vector type for comparison. */
4666 84449 : tree offset_vectype = VECTOR_TYPE_P (offset_type)
4667 84449 : ? offset_type : get_vectype_for_scalar_type (vinfo, offset_type);
4668 :
4669 : /* If there is no offset vectype, bail. */
4670 70376 : if (!offset_vectype)
4671 : return false;
4672 :
4673 84449 : offset_type = TREE_TYPE (offset_vectype);
4674 :
4675 : /* Get all supported configurations for this data vector type. */
4676 84449 : auto_vec<gather_scatter_config> configs
4677 : = vect_gather_scatter_get_configs (vinfo, read_p, masked_p, vectype,
4678 84449 : memory_type, offset_type, scale);
4679 :
4680 84449 : if (configs.is_empty ())
4681 : return false;
4682 :
4683 : /* Selection priority:
4684 : 1 - Exact scale match + offset type match
4685 : 2 - Exact scale match + sign-swapped offset
4686 : 3 - Smaller scale + offset type match
4687 : 4 - Smaller scale + sign-swapped offset
4688 : Within each category, prefer smaller offset types. */
4689 :
4690 : /* First pass: exact scale match with no conversion. */
4691 0 : for (unsigned int i = 0; i < configs.length (); i++)
4692 : {
4693 0 : if (configs[i].scale == scale
4694 0 : && TYPE_SIGN (configs[i].offset_vectype)
4695 0 : == TYPE_SIGN (offset_vectype))
4696 : {
4697 0 : *ifn_out = configs[i].ifn;
4698 0 : *offset_vectype_out = configs[i].offset_vectype;
4699 0 : if (elsvals)
4700 0 : *elsvals = configs[i].elsvals;
4701 0 : return true;
4702 : }
4703 : }
4704 :
4705 : /* No direct match. This means we try to find either
4706 : - a sign-swapped offset vectype or
4707 : - a different scale and 2x larger offset type
4708 : - a different scale and larger sign-swapped offset vectype. */
4709 0 : unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE (offset_vectype));
4710 0 : unsigned int needed_precision
4711 0 : = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
4712 0 : needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
4713 :
4714 : /* Second pass: No direct match. This means we try to find a sign-swapped
4715 : offset vectype. */
4716 0 : enum tree_code tmp;
4717 0 : for (unsigned int i = 0; i < configs.length (); i++)
4718 : {
4719 0 : unsigned int precision
4720 0 : = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
4721 0 : if (configs[i].scale == scale
4722 0 : && precision >= needed_precision
4723 0 : && (supportable_convert_operation (CONVERT_EXPR,
4724 0 : configs[i].offset_vectype,
4725 : offset_vectype, &tmp)
4726 0 : || (needed_precision == offset_precision
4727 0 : && tree_nop_conversion_p (configs[i].offset_vectype,
4728 : offset_vectype))))
4729 : {
4730 0 : *ifn_out = configs[i].ifn;
4731 0 : *offset_vectype_out = offset_vectype;
4732 0 : *supported_offset_vectype = configs[i].offset_vectype;
4733 0 : if (elsvals)
4734 0 : *elsvals = configs[i].elsvals;
4735 0 : return true;
4736 : }
4737 : }
4738 :
4739 : /* Third pass: Try a smaller scale with the same signedness. */
4740 0 : needed_precision = offset_precision * 2;
4741 0 : needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
4742 :
4743 0 : for (unsigned int i = 0; i < configs.length (); i++)
4744 : {
4745 0 : unsigned int precision
4746 0 : = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
4747 0 : if (configs[i].scale < scale
4748 0 : && TYPE_SIGN (configs[i].offset_vectype)
4749 0 : == TYPE_SIGN (offset_vectype)
4750 0 : && precision >= needed_precision)
4751 : {
4752 0 : *ifn_out = configs[i].ifn;
4753 0 : *offset_vectype_out = configs[i].offset_vectype;
4754 0 : *supported_scale = configs[i].scale;
4755 : /* Only set SUPPORTED_OFFSET_VECTYPE if this is a real
4756 : conversion. */
4757 0 : if (!useless_type_conversion_p (offset_vectype,
4758 0 : configs[i].offset_vectype))
4759 0 : *supported_offset_vectype = configs[i].offset_vectype;
4760 0 : if (elsvals)
4761 0 : *elsvals = configs[i].elsvals;
4762 0 : return true;
4763 : }
4764 : }
4765 :
4766 : /* Fourth pass: Try a smaller scale and sign-swapped offset vectype. */
4767 0 : needed_precision
4768 0 : = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
4769 0 : needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
4770 :
4771 0 : for (unsigned int i = 0; i < configs.length (); i++)
4772 : {
4773 0 : unsigned int precision
4774 0 : = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
4775 0 : if (configs[i].scale < scale
4776 0 : && precision >= needed_precision
4777 0 : && (supportable_convert_operation (CONVERT_EXPR,
4778 0 : configs[i].offset_vectype,
4779 : offset_vectype, &tmp)
4780 0 : || (needed_precision == offset_precision
4781 0 : && tree_nop_conversion_p (configs[i].offset_vectype,
4782 : offset_vectype))))
4783 : {
4784 0 : *ifn_out = configs[i].ifn;
4785 0 : *offset_vectype_out = offset_vectype;
4786 0 : *supported_offset_vectype = configs[i].offset_vectype;
4787 0 : *supported_scale = configs[i].scale;
4788 0 : if (elsvals)
4789 0 : *elsvals = configs[i].elsvals;
4790 0 : return true;
4791 : }
4792 : }
4793 :
4794 : return false;
4795 84449 : }
4796 :
4797 : /* STMT_INFO is a call to an internal gather load or scatter store function.
4798 : Describe the operation in INFO. */
4799 :
4800 : void
4801 0 : vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
4802 : gather_scatter_info *info)
4803 : {
4804 0 : gcall *call = as_a <gcall *> (stmt_info->stmt);
4805 0 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4806 0 : data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4807 :
4808 0 : info->ifn = gimple_call_internal_fn (call);
4809 0 : info->decl = NULL_TREE;
4810 0 : info->base = gimple_call_arg (call, 0);
4811 0 : info->alias_ptr = gimple_call_arg
4812 0 : (call, internal_fn_alias_ptr_index (info->ifn));
4813 0 : info->offset = gimple_call_arg
4814 0 : (call, internal_fn_offset_index (info->ifn));
4815 0 : info->offset_vectype = NULL_TREE;
4816 0 : info->scale = TREE_INT_CST_LOW (gimple_call_arg
4817 : (call, internal_fn_scale_index (info->ifn)));
4818 0 : info->element_type = TREE_TYPE (vectype);
4819 0 : info->memory_type = TREE_TYPE (DR_REF (dr));
4820 0 : }
4821 :
4822 : /* Return true if a non-affine read or write in STMT_INFO is suitable for a
4823 : gather load or scatter store with VECTYPE. Describe the operation in *INFO
4824 : if so. If it is suitable and ELSVALS is nonzero store the supported else
4825 : values in the vector it points to. */
4826 :
4827 : bool
4828 350457 : vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
4829 : loop_vec_info loop_vinfo,
4830 : gather_scatter_info *info, vec<int> *elsvals)
4831 : {
4832 350457 : HOST_WIDE_INT scale = 1;
4833 350457 : poly_int64 pbitpos, pbitsize;
4834 350457 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4835 350457 : struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4836 350457 : tree offtype = NULL_TREE;
4837 350457 : tree decl = NULL_TREE, base, off;
4838 350457 : tree memory_type = TREE_TYPE (DR_REF (dr));
4839 350457 : machine_mode pmode;
4840 350457 : int punsignedp, reversep, pvolatilep = 0;
4841 350457 : internal_fn ifn;
4842 350457 : tree offset_vectype;
4843 350457 : bool masked_p = false;
4844 :
4845 : /* See whether this is already a call to a gather/scatter internal function.
4846 : If not, see whether it's a masked load or store. */
4847 350457 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
4848 6282 : if (call && gimple_call_internal_p (call))
4849 : {
4850 6282 : ifn = gimple_call_internal_fn (call);
4851 6282 : if (internal_gather_scatter_fn_p (ifn))
4852 : {
4853 0 : vect_describe_gather_scatter_call (stmt_info, info);
4854 :
4855 : /* In pattern recog we simply used a ZERO else value that
4856 : we need to correct here. To that end just re-use the
4857 : (already successful) check if we support a gather IFN
4858 : and have it populate the else values. */
4859 0 : if (DR_IS_READ (dr) && internal_fn_mask_index (ifn) >= 0 && elsvals)
4860 0 : supports_vec_gather_load_p (TYPE_MODE (vectype), elsvals);
4861 0 : return true;
4862 : }
4863 6282 : masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
4864 : }
4865 :
4866 : /* True if we should aim to use internal functions rather than
4867 : built-in functions. */
4868 350457 : bool use_ifn_p = (DR_IS_READ (dr)
4869 350457 : ? supports_vec_gather_load_p (TYPE_MODE (vectype),
4870 : elsvals)
4871 350457 : : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
4872 :
4873 350457 : base = DR_REF (dr);
4874 : /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
4875 : see if we can use the def stmt of the address. */
4876 350457 : if (masked_p
4877 6282 : && TREE_CODE (base) == MEM_REF
4878 6282 : && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
4879 6282 : && integer_zerop (TREE_OPERAND (base, 1))
4880 356739 : && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
4881 : {
4882 6282 : gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
4883 6282 : if (is_gimple_assign (def_stmt)
4884 6282 : && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
4885 615 : base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
4886 : }
4887 :
4888 : /* The gather and scatter builtins need address of the form
4889 : loop_invariant + vector * {1, 2, 4, 8}
4890 : or
4891 : loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
4892 : Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
4893 : of loop invariants/SSA_NAMEs defined in the loop, with casts,
4894 : multiplications and additions in it. To get a vector, we need
4895 : a single SSA_NAME that will be defined in the loop and will
4896 : contain everything that is not loop invariant and that can be
4897 : vectorized. The following code attempts to find such a preexistng
4898 : SSA_NAME OFF and put the loop invariants into a tree BASE
4899 : that can be gimplified before the loop. */
4900 350457 : base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
4901 : &punsignedp, &reversep, &pvolatilep);
4902 350457 : if (reversep)
4903 : return false;
4904 :
4905 : /* PR 107346. Packed structs can have fields at offsets that are not
4906 : multiples of BITS_PER_UNIT. Do not use gather/scatters in such cases. */
4907 350457 : if (!multiple_p (pbitpos, BITS_PER_UNIT))
4908 : return false;
4909 :
4910 : /* We need to be able to form an address to the base which for example
4911 : isn't possible for hard registers. */
4912 350457 : if (may_be_nonaddressable_p (base))
4913 : return false;
4914 :
4915 350449 : poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
4916 :
4917 350449 : if (TREE_CODE (base) == MEM_REF)
4918 : {
4919 284194 : if (!integer_zerop (TREE_OPERAND (base, 1)))
4920 : {
4921 33615 : if (off == NULL_TREE)
4922 33298 : off = wide_int_to_tree (sizetype, mem_ref_offset (base));
4923 : else
4924 317 : off = size_binop (PLUS_EXPR, off,
4925 : fold_convert (sizetype, TREE_OPERAND (base, 1)));
4926 : }
4927 284194 : base = TREE_OPERAND (base, 0);
4928 : }
4929 : else
4930 66255 : base = build_fold_addr_expr (base);
4931 :
4932 350449 : if (off == NULL_TREE)
4933 225830 : off = size_zero_node;
4934 :
4935 : /* BASE must be loop invariant. If it is not invariant, but OFF is, then we
4936 : * can fix that by swapping BASE and OFF. */
4937 350449 : if (!expr_invariant_in_loop_p (loop, base))
4938 : {
4939 259655 : if (!expr_invariant_in_loop_p (loop, off))
4940 : return false;
4941 :
4942 259386 : std::swap (base, off);
4943 : }
4944 :
4945 350180 : base = fold_convert (sizetype, base);
4946 350180 : base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
4947 350180 : int tmp_scale;
4948 350180 : tree tmp_offset_vectype;
4949 :
4950 : /* OFF at this point may be either a SSA_NAME or some tree expression
4951 : from get_inner_reference. Try to peel off loop invariants from it
4952 : into BASE as long as possible. */
4953 350180 : STRIP_NOPS (off);
4954 917927 : while (offtype == NULL_TREE)
4955 : {
4956 797453 : enum tree_code code;
4957 797453 : tree op0, op1, add = NULL_TREE;
4958 :
4959 797453 : if (TREE_CODE (off) == SSA_NAME)
4960 : {
4961 611170 : gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4962 :
4963 611170 : if (expr_invariant_in_loop_p (loop, off))
4964 0 : return false;
4965 :
4966 611170 : if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
4967 : break;
4968 :
4969 480538 : op0 = gimple_assign_rhs1 (def_stmt);
4970 480538 : code = gimple_assign_rhs_code (def_stmt);
4971 480538 : op1 = gimple_assign_rhs2 (def_stmt);
4972 : }
4973 : else
4974 : {
4975 186283 : if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
4976 : return false;
4977 186283 : code = TREE_CODE (off);
4978 186283 : extract_ops_from_tree (off, &code, &op0, &op1);
4979 : }
4980 666821 : switch (code)
4981 : {
4982 203916 : case POINTER_PLUS_EXPR:
4983 203916 : case PLUS_EXPR:
4984 203916 : if (expr_invariant_in_loop_p (loop, op0))
4985 : {
4986 134744 : add = op0;
4987 134744 : off = op1;
4988 187876 : do_add:
4989 187876 : add = fold_convert (sizetype, add);
4990 187876 : if (scale != 1)
4991 46551 : add = size_binop (MULT_EXPR, add, size_int (scale));
4992 187876 : base = size_binop (PLUS_EXPR, base, add);
4993 567747 : continue;
4994 : }
4995 69172 : if (expr_invariant_in_loop_p (loop, op1))
4996 : {
4997 52838 : add = op1;
4998 52838 : off = op0;
4999 52838 : goto do_add;
5000 : }
5001 : break;
5002 487 : case MINUS_EXPR:
5003 487 : if (expr_invariant_in_loop_p (loop, op1))
5004 : {
5005 294 : add = fold_convert (sizetype, op1);
5006 294 : add = size_binop (MINUS_EXPR, size_zero_node, add);
5007 294 : off = op0;
5008 294 : goto do_add;
5009 : }
5010 : break;
5011 203071 : case MULT_EXPR:
5012 203071 : if (scale == 1 && tree_fits_shwi_p (op1))
5013 : {
5014 170299 : int new_scale = tree_to_shwi (op1);
5015 : /* Only treat this as a scaling operation if the target
5016 : supports it for at least some offset type. */
5017 170299 : if (use_ifn_p
5018 0 : && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
5019 : masked_p, vectype, memory_type,
5020 : signed_char_type_node,
5021 : new_scale, &tmp_scale,
5022 : &ifn,
5023 : &offset_vectype,
5024 : &tmp_offset_vectype,
5025 : elsvals)
5026 170299 : && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
5027 : masked_p, vectype, memory_type,
5028 : unsigned_char_type_node,
5029 : new_scale, &tmp_scale,
5030 : &ifn,
5031 : &offset_vectype,
5032 : &tmp_offset_vectype,
5033 : elsvals))
5034 : break;
5035 170299 : scale = new_scale;
5036 170299 : off = op0;
5037 170299 : continue;
5038 170299 : }
5039 : break;
5040 0 : case SSA_NAME:
5041 0 : off = op0;
5042 0 : continue;
5043 215639 : CASE_CONVERT:
5044 431262 : if (!POINTER_TYPE_P (TREE_TYPE (op0))
5045 431262 : && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
5046 : break;
5047 :
5048 : /* Don't include the conversion if the target is happy with
5049 : the current offset type. */
5050 215639 : if (use_ifn_p
5051 0 : && TREE_CODE (off) == SSA_NAME
5052 0 : && !POINTER_TYPE_P (TREE_TYPE (off))
5053 215639 : && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
5054 : masked_p, vectype, memory_type,
5055 0 : TREE_TYPE (off),
5056 : scale, &tmp_scale,
5057 : &ifn,
5058 : &offset_vectype,
5059 : &tmp_offset_vectype,
5060 : elsvals))
5061 : break;
5062 :
5063 215639 : if (TYPE_PRECISION (TREE_TYPE (op0))
5064 215639 : == TYPE_PRECISION (TREE_TYPE (off)))
5065 : {
5066 89098 : off = op0;
5067 89098 : continue;
5068 : }
5069 :
5070 : /* Include the conversion if it is widening and we're using
5071 : the IFN path or the target can handle the converted from
5072 : offset or the current size is not already the same as the
5073 : data vector element size. */
5074 126541 : if ((TYPE_PRECISION (TREE_TYPE (op0))
5075 126541 : < TYPE_PRECISION (TREE_TYPE (off)))
5076 126541 : && (use_ifn_p
5077 125813 : || (DR_IS_READ (dr)
5078 82077 : ? (targetm.vectorize.builtin_gather
5079 82077 : && targetm.vectorize.builtin_gather (vectype,
5080 82077 : TREE_TYPE (op0),
5081 : scale))
5082 43736 : : (targetm.vectorize.builtin_scatter
5083 43736 : && targetm.vectorize.builtin_scatter (vectype,
5084 43736 : TREE_TYPE (op0),
5085 : scale)))
5086 124717 : || !operand_equal_p (TYPE_SIZE (TREE_TYPE (off)),
5087 124717 : TYPE_SIZE (TREE_TYPE (vectype)), 0)))
5088 : {
5089 120474 : off = op0;
5090 120474 : offtype = TREE_TYPE (off);
5091 120474 : STRIP_NOPS (off);
5092 120474 : continue;
5093 : }
5094 : break;
5095 : default:
5096 : break;
5097 0 : }
5098 : break;
5099 : }
5100 :
5101 : /* If at the end OFF still isn't a SSA_NAME or isn't
5102 : defined in the loop, punt. */
5103 350180 : if (TREE_CODE (off) != SSA_NAME
5104 350180 : || expr_invariant_in_loop_p (loop, off))
5105 6347 : return false;
5106 :
5107 343833 : if (offtype == NULL_TREE)
5108 223713 : offtype = TREE_TYPE (off);
5109 :
5110 343833 : if (use_ifn_p)
5111 : {
5112 0 : if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
5113 : vectype, memory_type, offtype,
5114 : scale, &tmp_scale,
5115 : &ifn, &offset_vectype,
5116 : &tmp_offset_vectype,
5117 : elsvals))
5118 0 : ifn = IFN_LAST;
5119 : decl = NULL_TREE;
5120 : }
5121 : else
5122 : {
5123 343833 : if (DR_IS_READ (dr))
5124 : {
5125 259425 : if (targetm.vectorize.builtin_gather)
5126 259425 : decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
5127 : }
5128 : else
5129 : {
5130 84408 : if (targetm.vectorize.builtin_scatter)
5131 84408 : decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
5132 : }
5133 343833 : ifn = IFN_LAST;
5134 : /* The offset vector type will be read from DECL when needed. */
5135 343833 : offset_vectype = NULL_TREE;
5136 : }
5137 :
5138 343833 : gcc_checking_assert (expr_invariant_in_loop_p (loop, base));
5139 343833 : gcc_checking_assert (!expr_invariant_in_loop_p (loop, off));
5140 :
5141 343833 : info->ifn = ifn;
5142 343833 : info->decl = decl;
5143 343833 : info->base = base;
5144 :
5145 687666 : info->alias_ptr = build_int_cst
5146 343833 : (reference_alias_ptr_type (DR_REF (dr)),
5147 343833 : get_object_alignment (DR_REF (dr)));
5148 :
5149 343833 : info->offset = off;
5150 343833 : info->offset_vectype = offset_vectype;
5151 343833 : info->scale = scale;
5152 343833 : info->element_type = TREE_TYPE (vectype);
5153 343833 : info->memory_type = memory_type;
5154 343833 : return true;
5155 : }
5156 :
5157 : /* Find the data references in STMT, analyze them with respect to LOOP and
5158 : append them to DATAREFS. Return false if datarefs in this stmt cannot
5159 : be handled. */
5160 :
5161 : opt_result
5162 31984749 : vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
5163 : vec<data_reference_p> *datarefs,
5164 : vec<int> *dataref_groups, int group_id)
5165 : {
5166 : /* We can ignore clobbers for dataref analysis - they are removed during
5167 : loop vectorization and BB vectorization checks dependences with a
5168 : stmt walk. */
5169 31984749 : if (gimple_clobber_p (stmt))
5170 1084230 : return opt_result::success ();
5171 :
5172 57508460 : if (gimple_has_volatile_ops (stmt))
5173 320469 : return opt_result::failure_at (stmt, "not vectorized: volatile type: %G",
5174 : stmt);
5175 :
5176 30580050 : if (stmt_can_throw_internal (cfun, stmt))
5177 677346 : return opt_result::failure_at (stmt,
5178 : "not vectorized:"
5179 : " statement can throw an exception: %G",
5180 : stmt);
5181 :
5182 29902704 : auto_vec<data_reference_p, 2> refs;
5183 29902704 : opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
5184 29902704 : if (!res)
5185 3638840 : return res;
5186 :
5187 26263864 : if (refs.is_empty ())
5188 15093792 : return opt_result::success ();
5189 :
5190 11170072 : if (refs.length () > 1)
5191 : {
5192 1245307 : while (!refs.is_empty ())
5193 830507 : free_data_ref (refs.pop ());
5194 414800 : return opt_result::failure_at (stmt,
5195 : "not vectorized: more than one "
5196 : "data ref in stmt: %G", stmt);
5197 : }
5198 :
5199 10755272 : data_reference_p dr = refs.pop ();
5200 10755272 : if (gcall *call = dyn_cast <gcall *> (stmt))
5201 20563 : if (!gimple_call_internal_p (call)
5202 20563 : || (gimple_call_internal_fn (call) != IFN_MASK_LOAD
5203 17491 : && gimple_call_internal_fn (call) != IFN_MASK_STORE))
5204 : {
5205 16982 : free_data_ref (dr);
5206 16982 : return opt_result::failure_at (stmt,
5207 : "not vectorized: dr in a call %G", stmt);
5208 : }
5209 :
5210 10738290 : if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
5211 10738290 : && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
5212 : {
5213 53619 : free_data_ref (dr);
5214 53619 : return opt_result::failure_at (stmt,
5215 : "not vectorized:"
5216 : " statement is an unsupported"
5217 : " bitfield access %G", stmt);
5218 : }
5219 :
5220 10684671 : if (DR_BASE_ADDRESS (dr)
5221 10597141 : && TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
5222 : {
5223 992 : free_data_ref (dr);
5224 992 : return opt_result::failure_at (stmt,
5225 : "not vectorized:"
5226 : " base addr of dr is a constant\n");
5227 : }
5228 :
5229 : /* Check whether this may be a SIMD lane access and adjust the
5230 : DR to make it easier for us to handle it. */
5231 10683679 : if (loop
5232 599196 : && loop->simduid
5233 10711 : && (!DR_BASE_ADDRESS (dr)
5234 2960 : || !DR_OFFSET (dr)
5235 2960 : || !DR_INIT (dr)
5236 2960 : || !DR_STEP (dr)))
5237 : {
5238 7751 : struct data_reference *newdr
5239 7751 : = create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
5240 7751 : DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
5241 7751 : if (DR_BASE_ADDRESS (newdr)
5242 7751 : && DR_OFFSET (newdr)
5243 7751 : && DR_INIT (newdr)
5244 7751 : && DR_STEP (newdr)
5245 7751 : && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
5246 15502 : && integer_zerop (DR_STEP (newdr)))
5247 : {
5248 7751 : tree base_address = DR_BASE_ADDRESS (newdr);
5249 7751 : tree off = DR_OFFSET (newdr);
5250 7751 : tree step = ssize_int (1);
5251 7751 : if (integer_zerop (off)
5252 7751 : && TREE_CODE (base_address) == POINTER_PLUS_EXPR)
5253 : {
5254 82 : off = TREE_OPERAND (base_address, 1);
5255 82 : base_address = TREE_OPERAND (base_address, 0);
5256 : }
5257 7751 : STRIP_NOPS (off);
5258 7751 : if (TREE_CODE (off) == MULT_EXPR
5259 7751 : && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
5260 : {
5261 7500 : step = TREE_OPERAND (off, 1);
5262 7500 : off = TREE_OPERAND (off, 0);
5263 7500 : STRIP_NOPS (off);
5264 : }
5265 541 : if (CONVERT_EXPR_P (off)
5266 7751 : && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
5267 7210 : < TYPE_PRECISION (TREE_TYPE (off))))
5268 7210 : off = TREE_OPERAND (off, 0);
5269 7751 : if (TREE_CODE (off) == SSA_NAME)
5270 : {
5271 7226 : gimple *def = SSA_NAME_DEF_STMT (off);
5272 : /* Look through widening conversion. */
5273 7226 : if (is_gimple_assign (def)
5274 7226 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
5275 : {
5276 0 : tree rhs1 = gimple_assign_rhs1 (def);
5277 0 : if (TREE_CODE (rhs1) == SSA_NAME
5278 0 : && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
5279 0 : && (TYPE_PRECISION (TREE_TYPE (off))
5280 0 : > TYPE_PRECISION (TREE_TYPE (rhs1))))
5281 0 : def = SSA_NAME_DEF_STMT (rhs1);
5282 : }
5283 7226 : if (is_gimple_call (def)
5284 7090 : && gimple_call_internal_p (def)
5285 14316 : && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
5286 : {
5287 7090 : tree arg = gimple_call_arg (def, 0);
5288 7090 : tree reft = TREE_TYPE (DR_REF (newdr));
5289 7090 : gcc_assert (TREE_CODE (arg) == SSA_NAME);
5290 7090 : arg = SSA_NAME_VAR (arg);
5291 7090 : if (arg == loop->simduid
5292 : /* For now. */
5293 7090 : && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
5294 : {
5295 7065 : DR_BASE_ADDRESS (newdr) = base_address;
5296 7065 : DR_OFFSET (newdr) = ssize_int (0);
5297 7065 : DR_STEP (newdr) = step;
5298 7065 : DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
5299 7065 : DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
5300 : /* Mark as simd-lane access. */
5301 7065 : tree arg2 = gimple_call_arg (def, 1);
5302 7065 : newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
5303 7065 : free_data_ref (dr);
5304 7065 : datarefs->safe_push (newdr);
5305 7065 : if (dataref_groups)
5306 0 : dataref_groups->safe_push (group_id);
5307 7065 : return opt_result::success ();
5308 : }
5309 : }
5310 : }
5311 : }
5312 686 : free_data_ref (newdr);
5313 : }
5314 :
5315 10676614 : datarefs->safe_push (dr);
5316 10676614 : if (dataref_groups)
5317 10084483 : dataref_groups->safe_push (group_id);
5318 10676614 : return opt_result::success ();
5319 29902704 : }
5320 :
5321 : /* Function vect_analyze_data_refs.
5322 :
5323 : Find all the data references in the loop or basic block.
5324 :
5325 : The general structure of the analysis of data refs in the vectorizer is as
5326 : follows:
5327 : 1- vect_analyze_data_refs(loop/bb): call
5328 : compute_data_dependences_for_loop/bb to find and analyze all data-refs
5329 : in the loop/bb and their dependences.
5330 : 2- vect_analyze_dependences(): apply dependence testing using ddrs.
5331 : 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
5332 : 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
5333 :
5334 : */
5335 :
5336 : opt_result
5337 2696649 : vect_analyze_data_refs (vec_info *vinfo, bool *fatal)
5338 : {
5339 2696649 : class loop *loop = NULL;
5340 2696649 : unsigned int i;
5341 2696649 : struct data_reference *dr;
5342 2696649 : tree scalar_type;
5343 :
5344 2696649 : DUMP_VECT_SCOPE ("vect_analyze_data_refs");
5345 :
5346 2696649 : if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5347 512928 : loop = LOOP_VINFO_LOOP (loop_vinfo);
5348 :
5349 : /* Go through the data-refs, check that the analysis succeeded. Update
5350 : pointer from stmt_vec_info struct to DR and vectype. */
5351 :
5352 2696649 : vec<data_reference_p> datarefs = vinfo->shared->datarefs;
5353 17725997 : FOR_EACH_VEC_ELT (datarefs, i, dr)
5354 : {
5355 15101824 : enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
5356 :
5357 15101824 : gcc_assert (DR_REF (dr));
5358 15101824 : stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
5359 15101824 : gcc_assert (!stmt_info->dr_aux.dr);
5360 15101824 : stmt_info->dr_aux.dr = dr;
5361 15101824 : stmt_info->dr_aux.stmt = stmt_info;
5362 :
5363 : /* Check that analysis of the data-ref succeeded. */
5364 15101824 : if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
5365 14984836 : || !DR_STEP (dr))
5366 : {
5367 233976 : bool maybe_gather
5368 116988 : = DR_IS_READ (dr)
5369 116988 : && !TREE_THIS_VOLATILE (DR_REF (dr));
5370 233976 : bool maybe_scatter
5371 : = DR_IS_WRITE (dr)
5372 116988 : && !TREE_THIS_VOLATILE (DR_REF (dr));
5373 :
5374 : /* If target supports vector gather loads or scatter stores,
5375 : see if they can't be used. */
5376 116988 : if (is_a <loop_vec_info> (vinfo)
5377 116988 : && !nested_in_vect_loop_p (loop, stmt_info))
5378 : {
5379 113614 : if (maybe_gather || maybe_scatter)
5380 : {
5381 113614 : if (maybe_gather)
5382 : gatherscatter = GATHER;
5383 : else
5384 21742 : gatherscatter = SCATTER;
5385 : }
5386 : }
5387 :
5388 21742 : if (gatherscatter == SG_NONE)
5389 : {
5390 3374 : if (dump_enabled_p ())
5391 5 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5392 : "not vectorized: data ref analysis "
5393 : "failed %G", stmt_info->stmt);
5394 3374 : if (is_a <bb_vec_info> (vinfo))
5395 : {
5396 : /* In BB vectorization the ref can still participate
5397 : in dependence analysis, we just can't vectorize it. */
5398 3024 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
5399 3024 : continue;
5400 : }
5401 350 : return opt_result::failure_at (stmt_info->stmt,
5402 : "not vectorized:"
5403 : " data ref analysis failed: %G",
5404 : stmt_info->stmt);
5405 : }
5406 : }
5407 :
5408 : /* See if this was detected as SIMD lane access. */
5409 15098450 : if (dr->aux == (void *)-1
5410 15098450 : || dr->aux == (void *)-2
5411 15089548 : || dr->aux == (void *)-3
5412 15088708 : || dr->aux == (void *)-4)
5413 : {
5414 10542 : if (nested_in_vect_loop_p (loop, stmt_info))
5415 0 : return opt_result::failure_at (stmt_info->stmt,
5416 : "not vectorized:"
5417 : " data ref analysis failed: %G",
5418 : stmt_info->stmt);
5419 10542 : STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
5420 10542 : = -(uintptr_t) dr->aux;
5421 : }
5422 :
5423 15098450 : tree base = get_base_address (DR_REF (dr));
5424 15098450 : if (base && VAR_P (base) && DECL_NONALIASED (base))
5425 : {
5426 8894 : if (dump_enabled_p ())
5427 186 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5428 : "not vectorized: base object not addressable "
5429 : "for stmt: %G", stmt_info->stmt);
5430 8894 : if (is_a <bb_vec_info> (vinfo))
5431 : {
5432 : /* In BB vectorization the ref can still participate
5433 : in dependence analysis, we just can't vectorize it. */
5434 8893 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
5435 8893 : continue;
5436 : }
5437 1 : return opt_result::failure_at (stmt_info->stmt,
5438 : "not vectorized: base object not"
5439 : " addressable for stmt: %G",
5440 : stmt_info->stmt);
5441 : }
5442 :
5443 15089556 : if (is_a <loop_vec_info> (vinfo)
5444 1167342 : && DR_STEP (dr)
5445 16143284 : && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
5446 : {
5447 44874 : if (nested_in_vect_loop_p (loop, stmt_info))
5448 389 : return opt_result::failure_at (stmt_info->stmt,
5449 : "not vectorized: "
5450 : "not suitable for strided load %G",
5451 : stmt_info->stmt);
5452 44485 : STMT_VINFO_STRIDED_P (stmt_info) = true;
5453 : }
5454 :
5455 : /* Update DR field in stmt_vec_info struct. */
5456 :
5457 : /* If the dataref is in an inner-loop of the loop that is considered for
5458 : for vectorization, we also want to analyze the access relative to
5459 : the outer-loop (DR contains information only relative to the
5460 : inner-most enclosing loop). We do that by building a reference to the
5461 : first location accessed by the inner-loop, and analyze it relative to
5462 : the outer-loop. */
5463 15089167 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
5464 : {
5465 : /* Build a reference to the first location accessed by the
5466 : inner loop: *(BASE + INIT + OFFSET). By construction,
5467 : this address must be invariant in the inner loop, so we
5468 : can consider it as being used in the outer loop. */
5469 11900 : tree base = unshare_expr (DR_BASE_ADDRESS (dr));
5470 11900 : tree offset = unshare_expr (DR_OFFSET (dr));
5471 11900 : tree init = unshare_expr (DR_INIT (dr));
5472 11900 : tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
5473 : init, offset);
5474 11900 : tree init_addr = fold_build_pointer_plus (base, init_offset);
5475 11900 : tree init_ref = build_fold_indirect_ref (init_addr);
5476 :
5477 11900 : if (dump_enabled_p ())
5478 1228 : dump_printf_loc (MSG_NOTE, vect_location,
5479 : "analyze in outer loop: %T\n", init_ref);
5480 :
5481 11900 : opt_result res
5482 11900 : = dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
5483 11900 : init_ref, loop, stmt_info->stmt);
5484 11900 : if (!res)
5485 : /* dr_analyze_innermost already explained the failure. */
5486 166 : return res;
5487 :
5488 11734 : if (dump_enabled_p ())
5489 1224 : dump_printf_loc (MSG_NOTE, vect_location,
5490 : "\touter base_address: %T\n"
5491 : "\touter offset from base address: %T\n"
5492 : "\touter constant offset from base address: %T\n"
5493 : "\touter step: %T\n"
5494 : "\touter base alignment: %d\n\n"
5495 : "\touter base misalignment: %d\n"
5496 : "\touter offset alignment: %d\n"
5497 : "\touter step alignment: %d\n",
5498 : STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
5499 : STMT_VINFO_DR_OFFSET (stmt_info),
5500 : STMT_VINFO_DR_INIT (stmt_info),
5501 : STMT_VINFO_DR_STEP (stmt_info),
5502 : STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
5503 : STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
5504 : STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
5505 : STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
5506 : }
5507 :
5508 : /* Set vectype for STMT. */
5509 15089001 : scalar_type = TREE_TYPE (DR_REF (dr));
5510 15089001 : tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5511 15089001 : if (!vectype)
5512 : {
5513 1800363 : if (dump_enabled_p ())
5514 : {
5515 2043 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5516 : "not vectorized: no vectype for stmt: %G",
5517 : stmt_info->stmt);
5518 2043 : dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
5519 2043 : dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
5520 : scalar_type);
5521 2043 : dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5522 : }
5523 :
5524 1800363 : if (is_a <bb_vec_info> (vinfo))
5525 : {
5526 : /* No vector type is fine, the ref can still participate
5527 : in dependence analysis, we just can't vectorize it. */
5528 1736262 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
5529 1736262 : continue;
5530 : }
5531 64101 : if (fatal)
5532 64101 : *fatal = false;
5533 64101 : return opt_result::failure_at (stmt_info->stmt,
5534 : "not vectorized:"
5535 : " no vectype for stmt: %G"
5536 : " scalar_type: %T\n",
5537 : stmt_info->stmt, scalar_type);
5538 : }
5539 : else
5540 : {
5541 13288638 : if (dump_enabled_p ())
5542 83298 : dump_printf_loc (MSG_NOTE, vect_location,
5543 : "got vectype for stmt: %G%T\n",
5544 : stmt_info->stmt, vectype);
5545 : }
5546 :
5547 : /* Leave the BB vectorizer to pick the vector type later, based on
5548 : the final dataref group size and SLP node size. */
5549 13288638 : if (is_a <loop_vec_info> (vinfo))
5550 1102686 : STMT_VINFO_VECTYPE (stmt_info) = vectype;
5551 :
5552 13288638 : if (gatherscatter != SG_NONE)
5553 : {
5554 107908 : gather_scatter_info gs_info;
5555 107908 : if (!vect_check_gather_scatter (stmt_info, vectype,
5556 : as_a <loop_vec_info> (vinfo),
5557 : &gs_info)
5558 211834 : || !get_vectype_for_scalar_type (vinfo,
5559 103926 : TREE_TYPE (gs_info.offset)))
5560 : {
5561 7469 : if (fatal)
5562 7469 : *fatal = false;
5563 7469 : return opt_result::failure_at
5564 7837 : (stmt_info->stmt,
5565 : (gatherscatter == GATHER)
5566 : ? "not vectorized: not suitable for gather load %G"
5567 : : "not vectorized: not suitable for scatter store %G",
5568 : stmt_info->stmt);
5569 : }
5570 100439 : STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
5571 : }
5572 : }
5573 :
5574 : /* We used to stop processing and prune the list here. Verify we no
5575 : longer need to. */
5576 4199518 : gcc_assert (i == datarefs.length ());
5577 :
5578 2624173 : return opt_result::success ();
5579 : }
5580 :
5581 :
5582 : /* Function vect_get_new_vect_var.
5583 :
5584 : Returns a name for a new variable. The current naming scheme appends the
5585 : prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
5586 : the name of vectorizer generated variables, and appends that to NAME if
5587 : provided. */
5588 :
5589 : tree
5590 1945854 : vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
5591 : {
5592 1945854 : const char *prefix;
5593 1945854 : tree new_vect_var;
5594 :
5595 1945854 : switch (var_kind)
5596 : {
5597 : case vect_simple_var:
5598 : prefix = "vect";
5599 : break;
5600 23041 : case vect_scalar_var:
5601 23041 : prefix = "stmp";
5602 23041 : break;
5603 20208 : case vect_mask_var:
5604 20208 : prefix = "mask";
5605 20208 : break;
5606 1394242 : case vect_pointer_var:
5607 1394242 : prefix = "vectp";
5608 1394242 : break;
5609 0 : default:
5610 0 : gcc_unreachable ();
5611 : }
5612 :
5613 1945854 : if (name)
5614 : {
5615 1099449 : char* tmp = concat (prefix, "_", name, NULL);
5616 1099449 : new_vect_var = create_tmp_reg (type, tmp);
5617 1099449 : free (tmp);
5618 : }
5619 : else
5620 846405 : new_vect_var = create_tmp_reg (type, prefix);
5621 :
5622 1945854 : return new_vect_var;
5623 : }
5624 :
5625 : /* Like vect_get_new_vect_var but return an SSA name. */
5626 :
5627 : tree
5628 7111 : vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
5629 : {
5630 7111 : const char *prefix;
5631 7111 : tree new_vect_var;
5632 :
5633 7111 : switch (var_kind)
5634 : {
5635 : case vect_simple_var:
5636 : prefix = "vect";
5637 : break;
5638 312 : case vect_scalar_var:
5639 312 : prefix = "stmp";
5640 312 : break;
5641 0 : case vect_pointer_var:
5642 0 : prefix = "vectp";
5643 0 : break;
5644 0 : default:
5645 0 : gcc_unreachable ();
5646 : }
5647 :
5648 7111 : if (name)
5649 : {
5650 6634 : char* tmp = concat (prefix, "_", name, NULL);
5651 6634 : new_vect_var = make_temp_ssa_name (type, NULL, tmp);
5652 6634 : free (tmp);
5653 : }
5654 : else
5655 477 : new_vect_var = make_temp_ssa_name (type, NULL, prefix);
5656 :
5657 7111 : return new_vect_var;
5658 : }
5659 :
5660 : /* Duplicate points-to info on NAME from DR_INFO. */
5661 :
5662 : static void
5663 432024 : vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
5664 : {
5665 432024 : if (DR_PTR_INFO (dr_info->dr))
5666 : {
5667 290105 : duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
5668 : /* DR_PTR_INFO is for a base SSA name, not including constant or
5669 : variable offsets in the ref so its alignment info does not apply. */
5670 290105 : mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
5671 : }
5672 141919 : else if (!SSA_NAME_PTR_INFO (name))
5673 : {
5674 141919 : tree base = get_base_address (dr_info->dr->ref);
5675 141919 : if (VAR_P (base)
5676 : || TREE_CODE (base) == PARM_DECL
5677 : || TREE_CODE (base) == RESULT_DECL)
5678 : {
5679 130075 : struct ptr_info_def *pi = get_ptr_info (name);
5680 130075 : pt_solution_set_var (&pi->pt, base);
5681 : }
5682 : }
5683 432024 : }
5684 :
5685 : /* Function vect_create_addr_base_for_vector_ref.
5686 :
5687 : Create an expression that computes the address of the first memory location
5688 : that will be accessed for a data reference.
5689 :
5690 : Input:
5691 : STMT_INFO: The statement containing the data reference.
5692 : NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
5693 : OFFSET: Optional. If supplied, it is be added to the initial address.
5694 : LOOP: Specify relative to which loop-nest should the address be computed.
5695 : For example, when the dataref is in an inner-loop nested in an
5696 : outer-loop that is now being vectorized, LOOP can be either the
5697 : outer-loop, or the inner-loop. The first memory location accessed
5698 : by the following dataref ('in' points to short):
5699 :
5700 : for (i=0; i<N; i++)
5701 : for (j=0; j<M; j++)
5702 : s += in[i+j]
5703 :
5704 : is as follows:
5705 : if LOOP=i_loop: &in (relative to i_loop)
5706 : if LOOP=j_loop: &in+i*2B (relative to j_loop)
5707 :
5708 : Output:
5709 : 1. Return an SSA_NAME whose value is the address of the memory location of
5710 : the first vector of the data reference.
5711 : 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
5712 : these statement(s) which define the returned SSA_NAME.
5713 :
5714 : FORNOW: We are only handling array accesses with step 1. */
5715 :
5716 : tree
5717 697255 : vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
5718 : gimple_seq *new_stmt_list,
5719 : tree offset)
5720 : {
5721 697255 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5722 697255 : struct data_reference *dr = dr_info->dr;
5723 697255 : const char *base_name;
5724 697255 : tree addr_base;
5725 697255 : tree dest;
5726 697255 : gimple_seq seq = NULL;
5727 697255 : tree vect_ptr_type;
5728 697255 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5729 697255 : innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
5730 :
5731 697255 : tree data_ref_base = unshare_expr (drb->base_address);
5732 697255 : tree base_offset = unshare_expr (get_dr_vinfo_offset (vinfo, dr_info, true));
5733 697255 : tree init = unshare_expr (drb->init);
5734 :
5735 697255 : if (loop_vinfo)
5736 128630 : base_name = get_name (data_ref_base);
5737 : else
5738 : {
5739 568625 : base_offset = ssize_int (0);
5740 568625 : init = ssize_int (0);
5741 568625 : base_name = get_name (DR_REF (dr));
5742 : }
5743 :
5744 : /* Create base_offset */
5745 697255 : base_offset = size_binop (PLUS_EXPR,
5746 : fold_convert (sizetype, base_offset),
5747 : fold_convert (sizetype, init));
5748 :
5749 697255 : if (offset)
5750 : {
5751 3110 : offset = fold_convert (sizetype, offset);
5752 3110 : base_offset = fold_build2 (PLUS_EXPR, sizetype,
5753 : base_offset, offset);
5754 : }
5755 :
5756 : /* base + base_offset */
5757 697255 : if (loop_vinfo)
5758 128630 : addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
5759 : else
5760 1137250 : addr_base = build1 (ADDR_EXPR,
5761 568625 : build_pointer_type (TREE_TYPE (DR_REF (dr))),
5762 : /* Strip zero offset components since we don't need
5763 : them and they can confuse late diagnostics if
5764 : we CSE them wrongly. See PR106904 for example. */
5765 : unshare_expr (strip_zero_offset_components
5766 : (DR_REF (dr))));
5767 :
5768 697255 : vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
5769 697255 : dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
5770 697255 : addr_base = force_gimple_operand (addr_base, &seq, true, dest);
5771 697255 : gimple_seq_add_seq (new_stmt_list, seq);
5772 :
5773 697255 : if (TREE_CODE (addr_base) == SSA_NAME
5774 : /* We should only duplicate pointer info to newly created SSA names. */
5775 703990 : && SSA_NAME_VAR (addr_base) == dest)
5776 : {
5777 174644 : gcc_assert (!SSA_NAME_PTR_INFO (addr_base));
5778 174644 : vect_duplicate_ssa_name_ptr_info (addr_base, dr_info);
5779 : }
5780 :
5781 697255 : if (dump_enabled_p ())
5782 25315 : dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
5783 :
5784 697255 : return addr_base;
5785 : }
5786 :
5787 :
5788 : /* Function vect_create_data_ref_ptr.
5789 :
5790 : Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
5791 : location accessed in the loop by STMT_INFO, along with the def-use update
5792 : chain to appropriately advance the pointer through the loop iterations.
5793 : Also set aliasing information for the pointer. This pointer is used by
5794 : the callers to this function to create a memory reference expression for
5795 : vector load/store access.
5796 :
5797 : Input:
5798 : 1. STMT_INFO: a stmt that references memory. Expected to be of the form
5799 : GIMPLE_ASSIGN <name, data-ref> or
5800 : GIMPLE_ASSIGN <data-ref, name>.
5801 : 2. AGGR_TYPE: the type of the reference, which should be either a vector
5802 : or an array.
5803 : 3. AT_LOOP: the loop where the vector memref is to be created.
5804 : 4. OFFSET (optional): a byte offset to be added to the initial address
5805 : accessed by the data-ref in STMT_INFO.
5806 : 5. BSI: location where the new stmts are to be placed if there is no loop
5807 : 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
5808 : pointing to the initial address.
5809 : 8. IV_STEP (optional, defaults to NULL): the amount that should be added
5810 : to the IV during each iteration of the loop. NULL says to move
5811 : by one copy of AGGR_TYPE up or down, depending on the step of the
5812 : data reference.
5813 :
5814 : Output:
5815 : 1. Declare a new ptr to vector_type, and have it point to the base of the
5816 : data reference (initial addressed accessed by the data reference).
5817 : For example, for vector of type V8HI, the following code is generated:
5818 :
5819 : v8hi *ap;
5820 : ap = (v8hi *)initial_address;
5821 :
5822 : if OFFSET is not supplied:
5823 : initial_address = &a[init];
5824 : if OFFSET is supplied:
5825 : initial_address = &a[init] + OFFSET;
5826 : if BYTE_OFFSET is supplied:
5827 : initial_address = &a[init] + BYTE_OFFSET;
5828 :
5829 : Return the initial_address in INITIAL_ADDRESS.
5830 :
5831 : 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
5832 : update the pointer in each iteration of the loop.
5833 :
5834 : Return the increment stmt that updates the pointer in PTR_INCR.
5835 :
5836 : 3. Return the pointer. */
5837 :
5838 : tree
5839 696987 : vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
5840 : tree aggr_type, class loop *at_loop, tree offset,
5841 : tree *initial_address, gimple_stmt_iterator *gsi,
5842 : gimple **ptr_incr, bool only_init,
5843 : tree iv_step)
5844 : {
5845 696987 : const char *base_name;
5846 696987 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5847 696987 : class loop *loop = NULL;
5848 696987 : bool nested_in_vect_loop = false;
5849 696987 : class loop *containing_loop = NULL;
5850 696987 : tree aggr_ptr_type;
5851 696987 : tree aggr_ptr;
5852 696987 : tree new_temp;
5853 696987 : gimple_seq new_stmt_list = NULL;
5854 696987 : edge pe = NULL;
5855 696987 : basic_block new_bb;
5856 696987 : tree aggr_ptr_init;
5857 696987 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5858 696987 : struct data_reference *dr = dr_info->dr;
5859 696987 : tree aptr;
5860 696987 : gimple_stmt_iterator incr_gsi;
5861 696987 : bool insert_after;
5862 696987 : tree indx_before_incr, indx_after_incr;
5863 696987 : gimple *incr;
5864 696987 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5865 :
5866 696987 : gcc_assert (iv_step != NULL_TREE
5867 : || TREE_CODE (aggr_type) == ARRAY_TYPE
5868 : || TREE_CODE (aggr_type) == VECTOR_TYPE);
5869 :
5870 696987 : if (loop_vinfo)
5871 : {
5872 128362 : loop = LOOP_VINFO_LOOP (loop_vinfo);
5873 128362 : nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
5874 128362 : containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
5875 128362 : pe = loop_preheader_edge (loop);
5876 : }
5877 : else
5878 : {
5879 568625 : gcc_assert (bb_vinfo);
5880 568625 : only_init = true;
5881 568625 : *ptr_incr = NULL;
5882 : }
5883 :
5884 : /* Create an expression for the first address accessed by this load
5885 : in LOOP. */
5886 696987 : base_name = get_name (DR_BASE_ADDRESS (dr));
5887 :
5888 696987 : if (dump_enabled_p ())
5889 : {
5890 25212 : tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
5891 25212 : dump_printf_loc (MSG_NOTE, vect_location,
5892 : "create %s-pointer variable to type: %T",
5893 25212 : get_tree_code_name (TREE_CODE (aggr_type)),
5894 : aggr_type);
5895 25212 : if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
5896 13519 : dump_printf (MSG_NOTE, " vectorizing an array ref: ");
5897 11693 : else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
5898 0 : dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
5899 11693 : else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
5900 1638 : dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
5901 : else
5902 10055 : dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
5903 25212 : dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
5904 : }
5905 :
5906 : /* (1) Create the new aggregate-pointer variable.
5907 : Vector and array types inherit the alias set of their component
5908 : type by default so we need to use a ref-all pointer if the data
5909 : reference does not conflict with the created aggregated data
5910 : reference because it is not addressable. */
5911 696987 : bool need_ref_all = false;
5912 696987 : if (!alias_sets_conflict_p (get_alias_set (aggr_type),
5913 : get_alias_set (DR_REF (dr))))
5914 : need_ref_all = true;
5915 : /* Likewise for any of the data references in the stmt group. */
5916 593523 : else if (DR_GROUP_SIZE (stmt_info) > 1)
5917 : {
5918 479820 : stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
5919 1341370 : do
5920 : {
5921 1341370 : struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
5922 1341370 : if (!alias_sets_conflict_p (get_alias_set (aggr_type),
5923 : get_alias_set (DR_REF (sdr))))
5924 : {
5925 : need_ref_all = true;
5926 : break;
5927 : }
5928 1340311 : sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
5929 : }
5930 1340311 : while (sinfo);
5931 : }
5932 696987 : aggr_ptr_type = build_pointer_type_for_mode (aggr_type, VOIDmode,
5933 : need_ref_all);
5934 696987 : aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
5935 :
5936 :
5937 : /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
5938 : vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
5939 : def-use update cycles for the pointer: one relative to the outer-loop
5940 : (LOOP), which is what steps (3) and (4) below do. The other is relative
5941 : to the inner-loop (which is the inner-most loop containing the dataref),
5942 : and this is done be step (5) below.
5943 :
5944 : When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
5945 : inner-most loop, and so steps (3),(4) work the same, and step (5) is
5946 : redundant. Steps (3),(4) create the following:
5947 :
5948 : vp0 = &base_addr;
5949 : LOOP: vp1 = phi(vp0,vp2)
5950 : ...
5951 : ...
5952 : vp2 = vp1 + step
5953 : goto LOOP
5954 :
5955 : If there is an inner-loop nested in loop, then step (5) will also be
5956 : applied, and an additional update in the inner-loop will be created:
5957 :
5958 : vp0 = &base_addr;
5959 : LOOP: vp1 = phi(vp0,vp2)
5960 : ...
5961 : inner: vp3 = phi(vp1,vp4)
5962 : vp4 = vp3 + inner_step
5963 : if () goto inner
5964 : ...
5965 : vp2 = vp1 + step
5966 : if () goto LOOP */
5967 :
5968 : /* (2) Calculate the initial address of the aggregate-pointer, and set
5969 : the aggregate-pointer to point to it before the loop. */
5970 :
5971 : /* Create: (&(base[init_val]+offset) in the loop preheader. */
5972 :
5973 696987 : new_temp = vect_create_addr_base_for_vector_ref (vinfo,
5974 : stmt_info, &new_stmt_list,
5975 : offset);
5976 696987 : if (new_stmt_list)
5977 : {
5978 174523 : if (pe)
5979 : {
5980 54619 : new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
5981 54619 : gcc_assert (!new_bb);
5982 : }
5983 : else
5984 119904 : gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
5985 : }
5986 :
5987 696987 : *initial_address = new_temp;
5988 696987 : aggr_ptr_init = new_temp;
5989 :
5990 : /* (3) Handle the updating of the aggregate-pointer inside the loop.
5991 : This is needed when ONLY_INIT is false, and also when AT_LOOP is the
5992 : inner-loop nested in LOOP (during outer-loop vectorization). */
5993 :
5994 : /* No update in loop is required. */
5995 696987 : if (only_init && (!loop_vinfo || at_loop == loop))
5996 : aptr = aggr_ptr_init;
5997 : else
5998 : {
5999 : /* Accesses to invariant addresses should be handled specially
6000 : by the caller. */
6001 128354 : tree step = vect_dr_behavior (vinfo, dr_info)->step;
6002 128354 : gcc_assert (!integer_zerop (step));
6003 :
6004 128354 : if (iv_step == NULL_TREE)
6005 : {
6006 : /* The step of the aggregate pointer is the type size,
6007 : negated for downward accesses. */
6008 0 : iv_step = TYPE_SIZE_UNIT (aggr_type);
6009 0 : if (tree_int_cst_sgn (step) == -1)
6010 0 : iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
6011 : }
6012 :
6013 128354 : standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6014 :
6015 128354 : create_iv (aggr_ptr_init, PLUS_EXPR,
6016 : iv_step, aggr_ptr, loop, &incr_gsi, insert_after,
6017 : &indx_before_incr, &indx_after_incr);
6018 128354 : incr = gsi_stmt (incr_gsi);
6019 :
6020 : /* Copy the points-to information if it exists. */
6021 128354 : vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
6022 128354 : vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
6023 128354 : if (ptr_incr)
6024 128354 : *ptr_incr = incr;
6025 :
6026 128354 : aptr = indx_before_incr;
6027 : }
6028 :
6029 696987 : if (!nested_in_vect_loop || only_init)
6030 : return aptr;
6031 :
6032 :
6033 : /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
6034 : nested in LOOP, if exists. */
6035 :
6036 336 : gcc_assert (nested_in_vect_loop);
6037 336 : if (!only_init)
6038 : {
6039 336 : standard_iv_increment_position (containing_loop, &incr_gsi,
6040 : &insert_after);
6041 336 : create_iv (aptr, PLUS_EXPR, DR_STEP (dr),
6042 : aggr_ptr, containing_loop, &incr_gsi, insert_after,
6043 : &indx_before_incr, &indx_after_incr);
6044 336 : incr = gsi_stmt (incr_gsi);
6045 :
6046 : /* Copy the points-to information if it exists. */
6047 336 : vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
6048 336 : vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
6049 336 : if (ptr_incr)
6050 336 : *ptr_incr = incr;
6051 :
6052 336 : return indx_before_incr;
6053 : }
6054 : else
6055 : gcc_unreachable ();
6056 : }
6057 :
6058 :
6059 : /* Function bump_vector_ptr
6060 :
6061 : Increment a pointer (to a vector type) by vector-size. If requested,
6062 : i.e. if PTR-INCR is given, then also connect the new increment stmt
6063 : to the existing def-use update-chain of the pointer, by modifying
6064 : the PTR_INCR as illustrated below:
6065 :
6066 : The pointer def-use update-chain before this function:
6067 : DATAREF_PTR = phi (p_0, p_2)
6068 : ....
6069 : PTR_INCR: p_2 = DATAREF_PTR + step
6070 :
6071 : The pointer def-use update-chain after this function:
6072 : DATAREF_PTR = phi (p_0, p_2)
6073 : ....
6074 : NEW_DATAREF_PTR = DATAREF_PTR + BUMP
6075 : ....
6076 : PTR_INCR: p_2 = NEW_DATAREF_PTR + step
6077 :
6078 : Input:
6079 : DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
6080 : in the loop.
6081 : PTR_INCR - optional. The stmt that updates the pointer in each iteration of
6082 : the loop. The increment amount across iterations is expected
6083 : to be vector_size.
6084 : BSI - location where the new update stmt is to be placed.
6085 : STMT_INFO - the original scalar memory-access stmt that is being vectorized.
6086 : UPDATE - The offset by which to bump the pointer.
6087 :
6088 : Output: Return NEW_DATAREF_PTR as illustrated above.
6089 :
6090 : */
6091 :
6092 : tree
6093 242027 : bump_vector_ptr (vec_info *vinfo,
6094 : tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
6095 : stmt_vec_info stmt_info, tree update)
6096 : {
6097 242027 : struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6098 242027 : gimple *incr_stmt;
6099 242027 : ssa_op_iter iter;
6100 242027 : use_operand_p use_p;
6101 242027 : tree new_dataref_ptr;
6102 :
6103 242027 : if (TREE_CODE (dataref_ptr) == SSA_NAME)
6104 113158 : new_dataref_ptr = copy_ssa_name (dataref_ptr);
6105 128869 : else if (is_gimple_min_invariant (dataref_ptr))
6106 : /* When possible avoid emitting a separate increment stmt that will
6107 : force the addressed object addressable. */
6108 257738 : return build1 (ADDR_EXPR, TREE_TYPE (dataref_ptr),
6109 128869 : fold_build2 (MEM_REF,
6110 : TREE_TYPE (TREE_TYPE (dataref_ptr)),
6111 : dataref_ptr,
6112 128869 : fold_convert (ptr_type_node, update)));
6113 : else
6114 0 : new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
6115 113158 : incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
6116 : dataref_ptr, update);
6117 113158 : vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
6118 : /* Fold the increment, avoiding excessive chains use-def chains of
6119 : those, leading to compile-time issues for passes until the next
6120 : forwprop pass which would do this as well. */
6121 113158 : gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
6122 113158 : if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
6123 : {
6124 73697 : incr_stmt = gsi_stmt (fold_gsi);
6125 73697 : update_stmt (incr_stmt);
6126 : }
6127 :
6128 : /* Copy the points-to information if it exists. */
6129 113158 : duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
6130 :
6131 113158 : if (!ptr_incr)
6132 : return new_dataref_ptr;
6133 :
6134 : /* Update the vector-pointer's cross-iteration increment. */
6135 116046 : FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
6136 : {
6137 58023 : tree use = USE_FROM_PTR (use_p);
6138 :
6139 58023 : if (use == dataref_ptr)
6140 58023 : SET_USE (use_p, new_dataref_ptr);
6141 : else
6142 0 : gcc_assert (operand_equal_p (use, update, 0));
6143 : }
6144 :
6145 : return new_dataref_ptr;
6146 : }
6147 :
6148 :
6149 : /* Copy memory reference info such as base/clique from the SRC reference
6150 : to the DEST MEM_REF. */
6151 :
6152 : void
6153 949970 : vect_copy_ref_info (tree dest, tree src)
6154 : {
6155 949970 : if (TREE_CODE (dest) != MEM_REF)
6156 : return;
6157 :
6158 : tree src_base = src;
6159 1907512 : while (handled_component_p (src_base))
6160 962201 : src_base = TREE_OPERAND (src_base, 0);
6161 945311 : if (TREE_CODE (src_base) != MEM_REF
6162 945311 : && TREE_CODE (src_base) != TARGET_MEM_REF)
6163 : return;
6164 :
6165 515269 : MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
6166 515269 : MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
6167 : }
6168 :
6169 :
6170 : /* Function vect_create_destination_var.
6171 :
6172 : Create a new temporary of type VECTYPE. */
6173 :
6174 : tree
6175 534680 : vect_create_destination_var (tree scalar_dest, tree vectype)
6176 : {
6177 534680 : tree vec_dest;
6178 534680 : const char *name;
6179 534680 : char *new_name;
6180 534680 : tree type;
6181 534680 : enum vect_var_kind kind;
6182 :
6183 534680 : kind = vectype
6184 1046319 : ? VECTOR_BOOLEAN_TYPE_P (vectype)
6185 511639 : ? vect_mask_var
6186 : : vect_simple_var
6187 : : vect_scalar_var;
6188 23041 : type = vectype ? vectype : TREE_TYPE (scalar_dest);
6189 :
6190 534680 : gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
6191 :
6192 534680 : name = get_name (scalar_dest);
6193 534680 : if (name)
6194 191164 : new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
6195 : else
6196 343516 : new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
6197 534680 : vec_dest = vect_get_new_vect_var (type, kind, new_name);
6198 534680 : free (new_name);
6199 :
6200 534680 : return vec_dest;
6201 : }
6202 :
6203 : /* Function vect_grouped_store_supported.
6204 :
6205 : Returns TRUE if interleave high and interleave low permutations
6206 : are supported, and FALSE otherwise. */
6207 :
6208 : bool
6209 2756 : vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
6210 : {
6211 2756 : machine_mode mode = TYPE_MODE (vectype);
6212 :
6213 : /* vect_permute_store_chain requires the group size to be equal to 3 or
6214 : be a power of two. */
6215 2756 : if (count != 3 && exact_log2 (count) == -1)
6216 : {
6217 548 : if (dump_enabled_p ())
6218 11 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6219 : "the size of the group of accesses"
6220 : " is not a power of 2 or not equal to 3\n");
6221 548 : return false;
6222 : }
6223 :
6224 : /* Check that the permutation is supported. */
6225 2208 : if (VECTOR_MODE_P (mode))
6226 : {
6227 2208 : unsigned int i;
6228 2208 : if (count == 3)
6229 : {
6230 955 : unsigned int j0 = 0, j1 = 0, j2 = 0;
6231 955 : unsigned int i, j;
6232 :
6233 955 : unsigned int nelt;
6234 1910 : if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
6235 : {
6236 : if (dump_enabled_p ())
6237 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6238 : "cannot handle groups of 3 stores for"
6239 : " variable-length vectors\n");
6240 : return false;
6241 : }
6242 :
6243 955 : vec_perm_builder sel (nelt, nelt, 1);
6244 955 : sel.quick_grow (nelt);
6245 955 : vec_perm_indices indices;
6246 3595 : for (j = 0; j < 3; j++)
6247 : {
6248 2715 : int nelt0 = ((3 - j) * nelt) % 3;
6249 2715 : int nelt1 = ((3 - j) * nelt + 1) % 3;
6250 2715 : int nelt2 = ((3 - j) * nelt + 2) % 3;
6251 9621 : for (i = 0; i < nelt; i++)
6252 : {
6253 6906 : if (3 * i + nelt0 < nelt)
6254 2340 : sel[3 * i + nelt0] = j0++;
6255 6906 : if (3 * i + nelt1 < nelt)
6256 2301 : sel[3 * i + nelt1] = nelt + j1++;
6257 6906 : if (3 * i + nelt2 < nelt)
6258 2265 : sel[3 * i + nelt2] = 0;
6259 : }
6260 2715 : indices.new_vector (sel, 2, nelt);
6261 2715 : if (!can_vec_perm_const_p (mode, mode, indices))
6262 : {
6263 66 : if (dump_enabled_p ())
6264 37 : dump_printf (MSG_MISSED_OPTIMIZATION,
6265 : "permutation op not supported by target.\n");
6266 66 : return false;
6267 : }
6268 :
6269 8979 : for (i = 0; i < nelt; i++)
6270 : {
6271 6330 : if (3 * i + nelt0 < nelt)
6272 2116 : sel[3 * i + nelt0] = 3 * i + nelt0;
6273 6330 : if (3 * i + nelt1 < nelt)
6274 2107 : sel[3 * i + nelt1] = 3 * i + nelt1;
6275 6330 : if (3 * i + nelt2 < nelt)
6276 2107 : sel[3 * i + nelt2] = nelt + j2++;
6277 : }
6278 2649 : indices.new_vector (sel, 2, nelt);
6279 2649 : if (!can_vec_perm_const_p (mode, mode, indices))
6280 : {
6281 9 : if (dump_enabled_p ())
6282 9 : dump_printf (MSG_MISSED_OPTIMIZATION,
6283 : "permutation op not supported by target.\n");
6284 9 : return false;
6285 : }
6286 : }
6287 : return true;
6288 955 : }
6289 : else
6290 : {
6291 : /* If length is not equal to 3 then only power of 2 is supported. */
6292 1253 : gcc_assert (pow2p_hwi (count));
6293 2506 : poly_uint64 nelt = GET_MODE_NUNITS (mode);
6294 :
6295 : /* The encoding has 2 interleaved stepped patterns. */
6296 2506 : if(!multiple_p (nelt, 2))
6297 1207 : return false;
6298 1253 : vec_perm_builder sel (nelt, 2, 3);
6299 1253 : sel.quick_grow (6);
6300 6265 : for (i = 0; i < 3; i++)
6301 : {
6302 3759 : sel[i * 2] = i;
6303 3759 : sel[i * 2 + 1] = i + nelt;
6304 : }
6305 1253 : vec_perm_indices indices (sel, 2, nelt);
6306 1253 : if (can_vec_perm_const_p (mode, mode, indices))
6307 : {
6308 8449 : for (i = 0; i < 6; i++)
6309 7242 : sel[i] += exact_div (nelt, 2);
6310 1207 : indices.new_vector (sel, 2, nelt);
6311 1207 : if (can_vec_perm_const_p (mode, mode, indices))
6312 1207 : return true;
6313 : }
6314 1253 : }
6315 : }
6316 :
6317 46 : if (dump_enabled_p ())
6318 3 : dump_printf (MSG_MISSED_OPTIMIZATION,
6319 : "permutation op not supported by target.\n");
6320 : return false;
6321 : }
6322 :
6323 : /* Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT vectors
6324 : of type VECTYPE. MASKED_P says whether the masked form is needed. */
6325 :
6326 : internal_fn
6327 40486 : vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
6328 : bool masked_p)
6329 : {
6330 40486 : if (vect_lanes_optab_supported_p ("vec_mask_len_store_lanes",
6331 : vec_mask_len_store_lanes_optab, vectype,
6332 : count))
6333 : return IFN_MASK_LEN_STORE_LANES;
6334 40486 : else if (masked_p)
6335 : {
6336 159 : if (vect_lanes_optab_supported_p ("vec_mask_store_lanes",
6337 : vec_mask_store_lanes_optab, vectype,
6338 : count))
6339 : return IFN_MASK_STORE_LANES;
6340 : }
6341 : else
6342 : {
6343 40327 : if (vect_lanes_optab_supported_p ("vec_store_lanes",
6344 : vec_store_lanes_optab, vectype, count))
6345 : return IFN_STORE_LANES;
6346 : }
6347 : return IFN_LAST;
6348 : }
6349 :
6350 :
6351 : /* Function vect_setup_realignment
6352 :
6353 : This function is called when vectorizing an unaligned load using
6354 : the dr_explicit_realign[_optimized] scheme.
6355 : This function generates the following code at the loop prolog:
6356 :
6357 : p = initial_addr;
6358 : x msq_init = *(floor(p)); # prolog load
6359 : realignment_token = call target_builtin;
6360 : loop:
6361 : x msq = phi (msq_init, ---)
6362 :
6363 : The stmts marked with x are generated only for the case of
6364 : dr_explicit_realign_optimized.
6365 :
6366 : The code above sets up a new (vector) pointer, pointing to the first
6367 : location accessed by STMT_INFO, and a "floor-aligned" load using that
6368 : pointer. It also generates code to compute the "realignment-token"
6369 : (if the relevant target hook was defined), and creates a phi-node at the
6370 : loop-header bb whose arguments are the result of the prolog-load (created
6371 : by this function) and the result of a load that takes place in the loop
6372 : (to be created by the caller to this function).
6373 :
6374 : For the case of dr_explicit_realign_optimized:
6375 : The caller to this function uses the phi-result (msq) to create the
6376 : realignment code inside the loop, and sets up the missing phi argument,
6377 : as follows:
6378 : loop:
6379 : msq = phi (msq_init, lsq)
6380 : lsq = *(floor(p')); # load in loop
6381 : result = realign_load (msq, lsq, realignment_token);
6382 :
6383 : For the case of dr_explicit_realign:
6384 : loop:
6385 : msq = *(floor(p)); # load in loop
6386 : p' = p + (VS-1);
6387 : lsq = *(floor(p')); # load in loop
6388 : result = realign_load (msq, lsq, realignment_token);
6389 :
6390 : Input:
6391 : STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
6392 : a memory location that may be unaligned.
6393 : BSI - place where new code is to be inserted.
6394 : ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
6395 : is used.
6396 :
6397 : Output:
6398 : REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
6399 : target hook, if defined.
6400 : Return value - the result of the loop-header phi node. */
6401 :
6402 : tree
6403 0 : vect_setup_realignment (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6404 : gimple_stmt_iterator *gsi, tree *realignment_token,
6405 : enum dr_alignment_support alignment_support_scheme,
6406 : tree init_addr,
6407 : class loop **at_loop)
6408 : {
6409 0 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6410 0 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6411 0 : struct data_reference *dr = dr_info->dr;
6412 0 : class loop *loop = NULL;
6413 0 : edge pe = NULL;
6414 0 : tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
6415 0 : tree vec_dest;
6416 0 : gimple *inc;
6417 0 : tree ptr;
6418 0 : tree data_ref;
6419 0 : basic_block new_bb;
6420 0 : tree msq_init = NULL_TREE;
6421 0 : tree new_temp;
6422 0 : gphi *phi_stmt;
6423 0 : tree msq = NULL_TREE;
6424 0 : gimple_seq stmts = NULL;
6425 0 : bool compute_in_loop = false;
6426 0 : bool nested_in_vect_loop = false;
6427 0 : class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
6428 0 : class loop *loop_for_initial_load = NULL;
6429 :
6430 0 : if (loop_vinfo)
6431 : {
6432 0 : loop = LOOP_VINFO_LOOP (loop_vinfo);
6433 0 : nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
6434 : }
6435 :
6436 0 : gcc_assert (alignment_support_scheme == dr_explicit_realign
6437 : || alignment_support_scheme == dr_explicit_realign_optimized);
6438 :
6439 : /* We need to generate three things:
6440 : 1. the misalignment computation
6441 : 2. the extra vector load (for the optimized realignment scheme).
6442 : 3. the phi node for the two vectors from which the realignment is
6443 : done (for the optimized realignment scheme). */
6444 :
6445 : /* 1. Determine where to generate the misalignment computation.
6446 :
6447 : If INIT_ADDR is NULL_TREE, this indicates that the misalignment
6448 : calculation will be generated by this function, outside the loop (in the
6449 : preheader). Otherwise, INIT_ADDR had already been computed for us by the
6450 : caller, inside the loop.
6451 :
6452 : Background: If the misalignment remains fixed throughout the iterations of
6453 : the loop, then both realignment schemes are applicable, and also the
6454 : misalignment computation can be done outside LOOP. This is because we are
6455 : vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
6456 : are a multiple of VS (the Vector Size), and therefore the misalignment in
6457 : different vectorized LOOP iterations is always the same.
6458 : The problem arises only if the memory access is in an inner-loop nested
6459 : inside LOOP, which is now being vectorized using outer-loop vectorization.
6460 : This is the only case when the misalignment of the memory access may not
6461 : remain fixed throughout the iterations of the inner-loop (as explained in
6462 : detail in vect_supportable_dr_alignment). In this case, not only is the
6463 : optimized realignment scheme not applicable, but also the misalignment
6464 : computation (and generation of the realignment token that is passed to
6465 : REALIGN_LOAD) have to be done inside the loop.
6466 :
6467 : In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
6468 : or not, which in turn determines if the misalignment is computed inside
6469 : the inner-loop, or outside LOOP. */
6470 :
6471 0 : if (init_addr != NULL_TREE || !loop_vinfo)
6472 : {
6473 0 : compute_in_loop = true;
6474 0 : gcc_assert (alignment_support_scheme == dr_explicit_realign);
6475 : }
6476 :
6477 :
6478 : /* 2. Determine where to generate the extra vector load.
6479 :
6480 : For the optimized realignment scheme, instead of generating two vector
6481 : loads in each iteration, we generate a single extra vector load in the
6482 : preheader of the loop, and in each iteration reuse the result of the
6483 : vector load from the previous iteration. In case the memory access is in
6484 : an inner-loop nested inside LOOP, which is now being vectorized using
6485 : outer-loop vectorization, we need to determine whether this initial vector
6486 : load should be generated at the preheader of the inner-loop, or can be
6487 : generated at the preheader of LOOP. If the memory access has no evolution
6488 : in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
6489 : to be generated inside LOOP (in the preheader of the inner-loop). */
6490 :
6491 0 : if (nested_in_vect_loop)
6492 : {
6493 0 : tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
6494 0 : bool invariant_in_outerloop =
6495 0 : (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
6496 0 : loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
6497 : }
6498 : else
6499 : loop_for_initial_load = loop;
6500 0 : if (at_loop)
6501 0 : *at_loop = loop_for_initial_load;
6502 :
6503 0 : tree vuse = NULL_TREE;
6504 0 : if (loop_for_initial_load)
6505 : {
6506 0 : pe = loop_preheader_edge (loop_for_initial_load);
6507 0 : if (gphi *vphi = get_virtual_phi (loop_for_initial_load->header))
6508 0 : vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
6509 : }
6510 0 : if (!vuse)
6511 0 : vuse = gimple_vuse (gsi_stmt (*gsi));
6512 :
6513 : /* 3. For the case of the optimized realignment, create the first vector
6514 : load at the loop preheader. */
6515 :
6516 0 : if (alignment_support_scheme == dr_explicit_realign_optimized)
6517 : {
6518 : /* Create msq_init = *(floor(p1)) in the loop preheader */
6519 0 : gassign *new_stmt;
6520 :
6521 0 : gcc_assert (!compute_in_loop);
6522 0 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6523 0 : ptr = vect_create_data_ref_ptr (vinfo, stmt_info, vectype,
6524 : loop_for_initial_load, NULL_TREE,
6525 : &init_addr, NULL, &inc, true);
6526 0 : if (TREE_CODE (ptr) == SSA_NAME)
6527 0 : new_temp = copy_ssa_name (ptr);
6528 : else
6529 0 : new_temp = make_ssa_name (TREE_TYPE (ptr));
6530 0 : poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
6531 0 : tree type = TREE_TYPE (ptr);
6532 0 : new_stmt = gimple_build_assign
6533 0 : (new_temp, BIT_AND_EXPR, ptr,
6534 0 : fold_build2 (MINUS_EXPR, type,
6535 : build_int_cst (type, 0),
6536 : build_int_cst (type, align)));
6537 0 : new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6538 0 : gcc_assert (!new_bb);
6539 0 : data_ref
6540 0 : = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
6541 : build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
6542 0 : vect_copy_ref_info (data_ref, DR_REF (dr));
6543 0 : new_stmt = gimple_build_assign (vec_dest, data_ref);
6544 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
6545 0 : gimple_assign_set_lhs (new_stmt, new_temp);
6546 0 : gimple_set_vuse (new_stmt, vuse);
6547 0 : if (pe)
6548 : {
6549 0 : new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6550 0 : gcc_assert (!new_bb);
6551 : }
6552 : else
6553 0 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
6554 :
6555 0 : msq_init = gimple_assign_lhs (new_stmt);
6556 : }
6557 :
6558 : /* 4. Create realignment token using a target builtin, if available.
6559 : It is done either inside the containing loop, or before LOOP (as
6560 : determined above). */
6561 :
6562 0 : if (targetm.vectorize.builtin_mask_for_load)
6563 : {
6564 0 : gcall *new_stmt;
6565 0 : tree builtin_decl;
6566 :
6567 : /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
6568 0 : if (!init_addr)
6569 : {
6570 : /* Generate the INIT_ADDR computation outside LOOP. */
6571 0 : init_addr = vect_create_addr_base_for_vector_ref (vinfo,
6572 : stmt_info, &stmts,
6573 : NULL_TREE);
6574 0 : if (loop)
6575 : {
6576 0 : pe = loop_preheader_edge (loop);
6577 0 : new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6578 0 : gcc_assert (!new_bb);
6579 : }
6580 : else
6581 0 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
6582 : }
6583 :
6584 0 : builtin_decl = targetm.vectorize.builtin_mask_for_load ();
6585 0 : new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
6586 0 : vec_dest =
6587 0 : vect_create_destination_var (scalar_dest,
6588 : gimple_call_return_type (new_stmt));
6589 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
6590 0 : gimple_call_set_lhs (new_stmt, new_temp);
6591 :
6592 0 : if (compute_in_loop)
6593 0 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
6594 : else
6595 : {
6596 : /* Generate the misalignment computation outside LOOP. */
6597 0 : pe = loop_preheader_edge (loop);
6598 0 : new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6599 0 : gcc_assert (!new_bb);
6600 : }
6601 :
6602 0 : *realignment_token = gimple_call_lhs (new_stmt);
6603 :
6604 : /* The result of the CALL_EXPR to this builtin is determined from
6605 : the value of the parameter and no global variables are touched
6606 : which makes the builtin a "const" function. Requiring the
6607 : builtin to have the "const" attribute makes it unnecessary
6608 : to call mark_call_clobbered. */
6609 0 : gcc_assert (TREE_READONLY (builtin_decl));
6610 : }
6611 :
6612 0 : if (alignment_support_scheme == dr_explicit_realign)
6613 : return msq;
6614 :
6615 0 : gcc_assert (!compute_in_loop);
6616 0 : gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
6617 :
6618 :
6619 : /* 5. Create msq = phi <msq_init, lsq> in loop */
6620 :
6621 0 : pe = loop_preheader_edge (containing_loop);
6622 0 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6623 0 : msq = make_ssa_name (vec_dest);
6624 0 : phi_stmt = create_phi_node (msq, containing_loop->header);
6625 0 : add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
6626 :
6627 0 : return msq;
6628 : }
6629 :
6630 :
6631 : /* Function vect_grouped_load_supported.
6632 :
6633 : COUNT is the size of the load group (the number of statements plus the
6634 : number of gaps). SINGLE_ELEMENT_P is true if there is actually
6635 : only one statement, with a gap of COUNT - 1.
6636 :
6637 : Returns true if a suitable permute exists. */
6638 :
6639 : bool
6640 1957 : vect_grouped_load_supported (tree vectype, bool single_element_p,
6641 : unsigned HOST_WIDE_INT count)
6642 : {
6643 1957 : machine_mode mode = TYPE_MODE (vectype);
6644 :
6645 : /* If this is single-element interleaving with an element distance
6646 : that leaves unused vector loads around punt - we at least create
6647 : very sub-optimal code in that case (and blow up memory,
6648 : see PR65518). */
6649 1957 : if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
6650 : {
6651 42 : if (dump_enabled_p ())
6652 3 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6653 : "single-element interleaving not supported "
6654 : "for not adjacent vector loads\n");
6655 42 : return false;
6656 : }
6657 :
6658 : /* vect_permute_load_chain requires the group size to be equal to 3 or
6659 : be a power of two. */
6660 1915 : if (count != 3 && exact_log2 (count) == -1)
6661 : {
6662 226 : if (dump_enabled_p ())
6663 14 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6664 : "the size of the group of accesses"
6665 : " is not a power of 2 or not equal to 3\n");
6666 226 : return false;
6667 : }
6668 :
6669 : /* Check that the permutation is supported. */
6670 1689 : if (VECTOR_MODE_P (mode))
6671 : {
6672 1689 : unsigned int i, j;
6673 1689 : if (count == 3)
6674 : {
6675 843 : unsigned int nelt;
6676 1686 : if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
6677 : {
6678 : if (dump_enabled_p ())
6679 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6680 : "cannot handle groups of 3 loads for"
6681 : " variable-length vectors\n");
6682 : return false;
6683 : }
6684 :
6685 843 : vec_perm_builder sel (nelt, nelt, 1);
6686 843 : sel.quick_grow (nelt);
6687 843 : vec_perm_indices indices;
6688 843 : unsigned int k;
6689 3336 : for (k = 0; k < 3; k++)
6690 : {
6691 8921 : for (i = 0; i < nelt; i++)
6692 6416 : if (3 * i + k < 2 * nelt)
6693 4283 : sel[i] = 3 * i + k;
6694 : else
6695 2133 : sel[i] = 0;
6696 2505 : indices.new_vector (sel, 2, nelt);
6697 2505 : if (!can_vec_perm_const_p (mode, mode, indices))
6698 : {
6699 12 : if (dump_enabled_p ())
6700 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6701 : "shuffle of 3 loads is not supported by"
6702 : " target\n");
6703 12 : return false;
6704 : }
6705 8757 : for (i = 0, j = 0; i < nelt; i++)
6706 6264 : if (3 * i + k < 2 * nelt)
6707 4176 : sel[i] = i;
6708 : else
6709 2088 : sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
6710 2493 : indices.new_vector (sel, 2, nelt);
6711 2493 : if (!can_vec_perm_const_p (mode, mode, indices))
6712 : {
6713 0 : if (dump_enabled_p ())
6714 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6715 : "shuffle of 3 loads is not supported by"
6716 : " target\n");
6717 0 : return false;
6718 : }
6719 : }
6720 : return true;
6721 843 : }
6722 : else
6723 : {
6724 : /* If length is not equal to 3 then only power of 2 is supported. */
6725 846 : gcc_assert (pow2p_hwi (count));
6726 1692 : poly_uint64 nelt = GET_MODE_NUNITS (mode);
6727 :
6728 : /* The encoding has a single stepped pattern. */
6729 846 : vec_perm_builder sel (nelt, 1, 3);
6730 846 : sel.quick_grow (3);
6731 4230 : for (i = 0; i < 3; i++)
6732 2538 : sel[i] = i * 2;
6733 846 : vec_perm_indices indices (sel, 2, nelt);
6734 846 : if (can_vec_perm_const_p (mode, mode, indices))
6735 : {
6736 3372 : for (i = 0; i < 3; i++)
6737 2529 : sel[i] = i * 2 + 1;
6738 843 : indices.new_vector (sel, 2, nelt);
6739 843 : if (can_vec_perm_const_p (mode, mode, indices))
6740 843 : return true;
6741 : }
6742 846 : }
6743 : }
6744 :
6745 3 : if (dump_enabled_p ())
6746 2 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6747 : "extract even/odd not supported by target\n");
6748 : return false;
6749 : }
6750 :
6751 : /* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT vectors
6752 : of type VECTYPE. MASKED_P says whether the masked form is needed.
6753 : If it is available and ELSVALS is nonzero store the possible else values
6754 : in the vector it points to. */
6755 :
6756 : internal_fn
6757 144547 : vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
6758 : bool masked_p, vec<int> *elsvals)
6759 : {
6760 144547 : if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
6761 : vec_mask_len_load_lanes_optab, vectype,
6762 : count, elsvals))
6763 : return IFN_MASK_LEN_LOAD_LANES;
6764 144547 : else if (masked_p)
6765 : {
6766 30 : if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
6767 : vec_mask_load_lanes_optab, vectype,
6768 : count, elsvals))
6769 : return IFN_MASK_LOAD_LANES;
6770 : }
6771 : else
6772 : {
6773 144517 : if (vect_lanes_optab_supported_p ("vec_load_lanes", vec_load_lanes_optab,
6774 : vectype, count, elsvals))
6775 : return IFN_LOAD_LANES;
6776 : }
6777 : return IFN_LAST;
6778 : }
6779 :
6780 : /* Function vect_force_dr_alignment_p.
6781 :
6782 : Returns whether the alignment of a DECL can be forced to be aligned
6783 : on ALIGNMENT bit boundary. */
6784 :
6785 : bool
6786 709306 : vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
6787 : {
6788 709306 : if (!VAR_P (decl))
6789 : return false;
6790 :
6791 210516 : if (decl_in_symtab_p (decl)
6792 210516 : && (!symtab_node::get (decl)
6793 22174 : || !symtab_node::get (decl)->can_increase_alignment_p ()))
6794 13470 : return false;
6795 :
6796 197046 : if (TREE_STATIC (decl))
6797 8704 : return (known_le (alignment,
6798 8704 : (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
6799 : else
6800 188342 : return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
6801 : }
6802 :
6803 : /* Return whether the data reference DR_INFO is supported with respect to its
6804 : alignment.
6805 : If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
6806 : it is aligned, i.e., check if it is possible to vectorize it with different
6807 : alignment. If IS_GATHER_SCATTER is true we are dealing with a
6808 : gather/scatter. */
6809 :
6810 : enum dr_alignment_support
6811 2843093 : vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
6812 : tree vectype, int misalignment,
6813 : bool is_gather_scatter)
6814 : {
6815 2843093 : data_reference *dr = dr_info->dr;
6816 2843093 : stmt_vec_info stmt_info = dr_info->stmt;
6817 2843093 : machine_mode mode = TYPE_MODE (vectype);
6818 2843093 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6819 2843093 : class loop *vect_loop = NULL;
6820 2843093 : bool nested_in_vect_loop = false;
6821 :
6822 2843093 : if (misalignment == 0)
6823 : return dr_aligned;
6824 1748210 : else if (dr_safe_speculative_read_required (stmt_info))
6825 : return dr_unaligned_unsupported;
6826 :
6827 1359207 : if (loop_vinfo)
6828 : {
6829 954539 : vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
6830 954539 : nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt_info);
6831 : }
6832 :
6833 : /* Possibly unaligned access. */
6834 :
6835 : /* We can choose between using the implicit realignment scheme (generating
6836 : a misaligned_move stmt) and the explicit realignment scheme (generating
6837 : aligned loads with a REALIGN_LOAD). There are two variants to the
6838 : explicit realignment scheme: optimized, and unoptimized.
6839 : We can optimize the realignment only if the step between consecutive
6840 : vector loads is equal to the vector size. Since the vector memory
6841 : accesses advance in steps of VS (Vector Size) in the vectorized loop, it
6842 : is guaranteed that the misalignment amount remains the same throughout the
6843 : execution of the vectorized loop. Therefore, we can create the
6844 : "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
6845 : at the loop preheader.
6846 :
6847 : However, in the case of outer-loop vectorization, when vectorizing a
6848 : memory access in the inner-loop nested within the LOOP that is now being
6849 : vectorized, while it is guaranteed that the misalignment of the
6850 : vectorized memory access will remain the same in different outer-loop
6851 : iterations, it is *not* guaranteed that is will remain the same throughout
6852 : the execution of the inner-loop. This is because the inner-loop advances
6853 : with the original scalar step (and not in steps of VS). If the inner-loop
6854 : step happens to be a multiple of VS, then the misalignment remains fixed
6855 : and we can use the optimized realignment scheme. For example:
6856 :
6857 : for (i=0; i<N; i++)
6858 : for (j=0; j<M; j++)
6859 : s += a[i+j];
6860 :
6861 : When vectorizing the i-loop in the above example, the step between
6862 : consecutive vector loads is 1, and so the misalignment does not remain
6863 : fixed across the execution of the inner-loop, and the realignment cannot
6864 : be optimized (as illustrated in the following pseudo vectorized loop):
6865 :
6866 : for (i=0; i<N; i+=4)
6867 : for (j=0; j<M; j++){
6868 : vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
6869 : // when j is {0,1,2,3,4,5,6,7,...} respectively.
6870 : // (assuming that we start from an aligned address).
6871 : }
6872 :
6873 : We therefore have to use the unoptimized realignment scheme:
6874 :
6875 : for (i=0; i<N; i+=4)
6876 : for (j=k; j<M; j+=4)
6877 : vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
6878 : // that the misalignment of the initial address is
6879 : // 0).
6880 :
6881 : The loop can then be vectorized as follows:
6882 :
6883 : for (k=0; k<4; k++){
6884 : rt = get_realignment_token (&vp[k]);
6885 : for (i=0; i<N; i+=4){
6886 : v1 = vp[i+k];
6887 : for (j=k; j<M; j+=4){
6888 : v2 = vp[i+j+VS-1];
6889 : va = REALIGN_LOAD <v1,v2,rt>;
6890 : vs += va;
6891 : v1 = v2;
6892 : }
6893 : }
6894 : } */
6895 :
6896 1359207 : if (DR_IS_READ (dr) && !is_gather_scatter)
6897 : {
6898 609601 : if (can_implement_p (vec_realign_load_optab, mode)
6899 609601 : && (!targetm.vectorize.builtin_mask_for_load
6900 0 : || targetm.vectorize.builtin_mask_for_load ()))
6901 : {
6902 : /* If we are doing SLP then the accesses need not have the
6903 : same alignment, instead it depends on the SLP group size. */
6904 0 : if (loop_vinfo
6905 0 : && STMT_VINFO_GROUPED_ACCESS (stmt_info)
6906 0 : && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6907 0 : * (DR_GROUP_SIZE
6908 0 : (DR_GROUP_FIRST_ELEMENT (stmt_info))),
6909 0 : TYPE_VECTOR_SUBPARTS (vectype)))
6910 : ;
6911 0 : else if (!loop_vinfo
6912 0 : || (nested_in_vect_loop
6913 0 : && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
6914 0 : GET_MODE_SIZE (TYPE_MODE (vectype)))))
6915 0 : return dr_explicit_realign;
6916 : else
6917 0 : return dr_explicit_realign_optimized;
6918 : }
6919 : }
6920 :
6921 1359207 : bool is_packed = not_size_aligned (DR_REF (dr));
6922 1359207 : if (misalignment == DR_MISALIGNMENT_UNKNOWN
6923 1359207 : && is_gather_scatter)
6924 3174 : misalignment = (get_object_alignment (DR_REF (dr))
6925 3174 : % (GET_MODE_BITSIZE (GET_MODE_INNER (mode))))
6926 3174 : / BITS_PER_UNIT;
6927 1359207 : if (targetm.vectorize.support_vector_misalignment (mode, misalignment,
6928 : is_packed,
6929 : is_gather_scatter))
6930 : return dr_unaligned_supported;
6931 :
6932 : /* Unsupported. */
6933 : return dr_unaligned_unsupported;
6934 : }
|