Line data Source code
1 : /* Data References Analysis and Manipulation Utilities for Vectorization.
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 : Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 : and Ira Rosen <irar@il.ibm.com>
5 :
6 : This file is part of GCC.
7 :
8 : GCC is free software; you can redistribute it and/or modify it under
9 : the terms of the GNU General Public License as published by the Free
10 : Software Foundation; either version 3, or (at your option) any later
11 : version.
12 :
13 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 : for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with GCC; see the file COPYING3. If not see
20 : <http://www.gnu.org/licenses/>. */
21 :
22 : #define INCLUDE_ALGORITHM
23 : #include "config.h"
24 : #include "system.h"
25 : #include "coretypes.h"
26 : #include "backend.h"
27 : #include "target.h"
28 : #include "rtl.h"
29 : #include "tree.h"
30 : #include "gimple.h"
31 : #include "predict.h"
32 : #include "memmodel.h"
33 : #include "tm_p.h"
34 : #include "ssa.h"
35 : #include "optabs-tree.h"
36 : #include "cgraph.h"
37 : #include "dumpfile.h"
38 : #include "pretty-print.h"
39 : #include "alias.h"
40 : #include "fold-const.h"
41 : #include "stor-layout.h"
42 : #include "tree-eh.h"
43 : #include "gimplify.h"
44 : #include "gimple-iterator.h"
45 : #include "gimplify-me.h"
46 : #include "tree-ssa-loop-ivopts.h"
47 : #include "tree-ssa-loop-manip.h"
48 : #include "tree-ssa-loop.h"
49 : #include "cfgloop.h"
50 : #include "tree-scalar-evolution.h"
51 : #include "tree-vectorizer.h"
52 : #include "expr.h"
53 : #include "builtins.h"
54 : #include "tree-cfg.h"
55 : #include "tree-hash-traits.h"
56 : #include "vec-perm-indices.h"
57 : #include "internal-fn.h"
58 : #include "gimple-fold.h"
59 : #include "optabs-query.h"
60 :
61 : /* Return true if load- or store-lanes optab OPTAB is implemented for
62 : COUNT vectors of type VECTYPE. NAME is the name of OPTAB.
63 :
64 : If it is implemented and ELSVALS is nonzero store the possible else
65 : values in the vector it points to. */
66 :
67 : static bool
68 342986 : vect_lanes_optab_supported_p (const char *name, convert_optab optab,
69 : tree vectype, unsigned HOST_WIDE_INT count,
70 : vec<int> *elsvals = nullptr)
71 : {
72 342986 : machine_mode mode, array_mode;
73 342986 : bool limit_p;
74 :
75 342986 : mode = TYPE_MODE (vectype);
76 342986 : if (!targetm.array_mode (mode, count).exists (&array_mode))
77 : {
78 685972 : poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
79 342986 : limit_p = !targetm.array_mode_supported_p (mode, count);
80 342986 : if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
81 : {
82 303706 : if (dump_enabled_p ())
83 12272 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
84 : "no array mode for %s[%wu]\n",
85 12272 : GET_MODE_NAME (mode), count);
86 303706 : return false;
87 : }
88 : }
89 :
90 39280 : enum insn_code icode;
91 39280 : if ((icode = convert_optab_handler (optab, array_mode, mode))
92 : == CODE_FOR_nothing)
93 : {
94 39280 : if (dump_enabled_p ())
95 4104 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
96 : "cannot use %s<%s><%s>\n", name,
97 4104 : GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
98 39280 : return false;
99 : }
100 :
101 0 : if (dump_enabled_p ())
102 0 : dump_printf_loc (MSG_NOTE, vect_location,
103 0 : "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
104 0 : GET_MODE_NAME (mode));
105 :
106 0 : if (elsvals)
107 0 : get_supported_else_vals (icode,
108 0 : internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES),
109 : *elsvals);
110 :
111 : return true;
112 : }
113 :
114 : /* Helper function to identify a simd clone call. If this is a call to a
115 : function with simd clones then return the corresponding cgraph_node,
116 : otherwise return NULL. */
117 :
118 : static cgraph_node*
119 560398 : simd_clone_call_p (gimple *stmt)
120 : {
121 628932 : gcall *call = dyn_cast <gcall *> (stmt);
122 70125 : if (!call)
123 : return NULL;
124 :
125 70125 : tree fndecl = NULL_TREE;
126 70125 : if (gimple_call_internal_p (call, IFN_MASK_CALL))
127 226 : fndecl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
128 : else
129 69899 : fndecl = gimple_call_fndecl (stmt);
130 :
131 70125 : if (fndecl == NULL_TREE)
132 : return NULL;
133 :
134 35693 : cgraph_node *node = cgraph_node::get (fndecl);
135 35693 : if (node && node->simd_clones != NULL)
136 : return node;
137 :
138 : return NULL;
139 : }
140 :
141 :
142 :
143 : /* Return the smallest scalar part of STMT_INFO.
144 : This is used to determine the vectype of the stmt. We generally set the
145 : vectype according to the type of the result (lhs). For stmts whose
146 : result-type is different than the type of the arguments (e.g., demotion,
147 : promotion), vectype will be reset appropriately (later). Note that we have
148 : to visit the smallest datatype in this function, because that determines the
149 : VF. If the smallest datatype in the loop is present only as the rhs of a
150 : promotion operation - we'd miss it.
151 : Such a case, where a variable of this datatype does not appear in the lhs
152 : anywhere in the loop, can only occur if it's an invariant: e.g.:
153 : 'int_x = (int) short_inv', which we'd expect to have been optimized away by
154 : invariant motion. However, we cannot rely on invariant motion to always
155 : take invariants out of the loop, and so in the case of promotion we also
156 : have to check the rhs.
157 : LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
158 : types. */
159 :
160 : tree
161 4564404 : vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type)
162 : {
163 4564404 : HOST_WIDE_INT lhs, rhs;
164 :
165 : /* During the analysis phase, this function is called on arbitrary
166 : statements that might not have scalar results. */
167 4564404 : if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
168 : return scalar_type;
169 :
170 4564404 : lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
171 :
172 4564404 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
173 4564404 : if (assign)
174 : {
175 4004006 : scalar_type = TREE_TYPE (gimple_assign_lhs (assign));
176 4004006 : if (gimple_assign_cast_p (assign)
177 3643907 : || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
178 3643513 : || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
179 3643513 : || gimple_assign_rhs_code (assign) == SAD_EXPR
180 3643430 : || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
181 3639674 : || gimple_assign_rhs_code (assign) == WIDEN_MULT_PLUS_EXPR
182 3639674 : || gimple_assign_rhs_code (assign) == WIDEN_MULT_MINUS_EXPR
183 3639674 : || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
184 7643680 : || gimple_assign_rhs_code (assign) == FLOAT_EXPR)
185 : {
186 373864 : tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
187 :
188 373864 : rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
189 373864 : if (rhs < lhs)
190 4564404 : scalar_type = rhs_type;
191 : }
192 : }
193 560398 : else if (cgraph_node *node = simd_clone_call_p (stmt_info->stmt))
194 : {
195 1591 : auto clone = node->simd_clones->simdclone;
196 4871 : for (unsigned int i = 0; i < clone->nargs; ++i)
197 : {
198 3280 : if (clone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
199 : {
200 1876 : tree arg_scalar_type = TREE_TYPE (clone->args[i].vector_type);
201 1876 : rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (arg_scalar_type));
202 1876 : if (rhs < lhs)
203 : {
204 3280 : scalar_type = arg_scalar_type;
205 3280 : lhs = rhs;
206 : }
207 : }
208 : }
209 : }
210 558807 : else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
211 : {
212 68534 : unsigned int i = 0;
213 68534 : if (gimple_call_internal_p (call))
214 : {
215 32058 : internal_fn ifn = gimple_call_internal_fn (call);
216 32058 : if (internal_load_fn_p (ifn))
217 : /* For loads the LHS type does the trick. */
218 : i = ~0U;
219 28426 : else if (internal_store_fn_p (ifn))
220 : {
221 : /* For stores use the tyep of the stored value. */
222 1523 : i = internal_fn_stored_value_index (ifn);
223 1523 : scalar_type = TREE_TYPE (gimple_call_arg (call, i));
224 1523 : i = ~0U;
225 : }
226 26903 : else if (internal_fn_mask_index (ifn) == 0)
227 5406 : i = 1;
228 : }
229 68534 : if (i < gimple_call_num_args (call))
230 : {
231 58802 : tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
232 58802 : if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
233 : {
234 58802 : rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
235 58802 : if (rhs < lhs)
236 4564404 : scalar_type = rhs_type;
237 : }
238 : }
239 : }
240 :
241 : return scalar_type;
242 : }
243 :
244 :
245 : /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
246 : tested at run-time. Return TRUE if DDR was successfully inserted.
247 : Return false if versioning is not supported. */
248 :
249 : static opt_result
250 147684 : vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
251 : {
252 147684 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
253 :
254 147684 : if ((unsigned) param_vect_max_version_for_alias_checks == 0)
255 54 : return opt_result::failure_at (vect_location,
256 : "will not create alias checks, as"
257 : " --param vect-max-version-for-alias-checks"
258 : " == 0\n");
259 :
260 147630 : opt_result res
261 147630 : = runtime_alias_check_p (ddr, loop,
262 147630 : optimize_loop_nest_for_speed_p (loop));
263 147630 : if (!res)
264 143 : return res;
265 :
266 147487 : LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
267 147487 : return opt_result::success ();
268 : }
269 :
270 : /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero. */
271 :
272 : static void
273 1348 : vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
274 : {
275 1348 : const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
276 2071 : for (unsigned int i = 0; i < checks.length(); ++i)
277 723 : if (checks[i] == value)
278 : return;
279 :
280 1348 : if (dump_enabled_p ())
281 432 : dump_printf_loc (MSG_NOTE, vect_location,
282 : "need run-time check that %T is nonzero\n",
283 : value);
284 1348 : LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
285 : }
286 :
287 : /* Return true if we know that the order of vectorized DR_INFO_A and
288 : vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
289 : DR_INFO_B. At least one of the accesses is a write. */
290 :
291 : static bool
292 108292 : vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
293 : {
294 108292 : stmt_vec_info stmtinfo_a = dr_info_a->stmt;
295 108292 : stmt_vec_info stmtinfo_b = dr_info_b->stmt;
296 :
297 : /* Single statements are always kept in their original order. */
298 108292 : if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
299 175814 : && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
300 : return true;
301 :
302 : /* If there is a loop invariant read involved we might vectorize it in
303 : the prologue, breaking scalar oder with respect to the in-loop store. */
304 21214 : if ((DR_IS_READ (dr_info_a->dr) && integer_zerop (DR_STEP (dr_info_a->dr)))
305 66333 : || (DR_IS_READ (dr_info_b->dr) && integer_zerop (DR_STEP (dr_info_b->dr))))
306 1244 : return false;
307 :
308 : /* STMT_A and STMT_B belong to overlapping groups. All loads are
309 : emitted at the position of the first scalar load.
310 : Stores in a group are emitted at the position of the last scalar store.
311 : Compute that position and check whether the resulting order matches
312 : the current one. */
313 44750 : stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
314 44750 : if (il_a)
315 : {
316 40401 : if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
317 161562 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
318 140867 : s = DR_GROUP_NEXT_ELEMENT (s))
319 140867 : il_a = get_later_stmt (il_a, s);
320 : else /* DR_IS_READ */
321 78045 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
322 58339 : s = DR_GROUP_NEXT_ELEMENT (s))
323 58339 : if (get_later_stmt (il_a, s) == il_a)
324 1550 : il_a = s;
325 : }
326 : else
327 : il_a = stmtinfo_a;
328 44750 : stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
329 44750 : if (il_b)
330 : {
331 39197 : if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
332 211503 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
333 180243 : s = DR_GROUP_NEXT_ELEMENT (s))
334 180243 : il_b = get_later_stmt (il_b, s);
335 : else /* DR_IS_READ */
336 37371 : for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
337 29434 : s = DR_GROUP_NEXT_ELEMENT (s))
338 29434 : if (get_later_stmt (il_b, s) == il_b)
339 153 : il_b = s;
340 : }
341 : else
342 : il_b = stmtinfo_b;
343 44750 : bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
344 44750 : return (get_later_stmt (il_a, il_b) == il_a) == a_after_b;
345 : }
346 :
347 : /* A subroutine of vect_analyze_data_ref_dependence. Handle
348 : DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
349 : distances. These distances are conservatively correct but they don't
350 : reflect a guaranteed dependence.
351 :
352 : Return true if this function does all the work necessary to avoid
353 : an alias or false if the caller should use the dependence distances
354 : to limit the vectorization factor in the usual way. LOOP_DEPTH is
355 : the depth of the loop described by LOOP_VINFO and the other arguments
356 : are as for vect_analyze_data_ref_dependence. */
357 :
358 : static bool
359 7963 : vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
360 : loop_vec_info loop_vinfo,
361 : int loop_depth, unsigned int *max_vf)
362 : {
363 7963 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
364 31870 : for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr))
365 : {
366 15778 : int dist = dist_v[loop_depth];
367 15778 : if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
368 : {
369 : /* If the user asserted safelen >= DIST consecutive iterations
370 : can be executed concurrently, assume independence.
371 :
372 : ??? An alternative would be to add the alias check even
373 : in this case, and vectorize the fallback loop with the
374 : maximum VF set to safelen. However, if the user has
375 : explicitly given a length, it's less likely that that
376 : would be a win. */
377 7829 : if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
378 : {
379 32 : if ((unsigned int) loop->safelen < *max_vf)
380 2 : *max_vf = loop->safelen;
381 32 : LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
382 32 : continue;
383 : }
384 :
385 : /* For dependence distances of 2 or more, we have the option
386 : of limiting VF or checking for an alias at runtime.
387 : Prefer to check at runtime if we can, to avoid limiting
388 : the VF unnecessarily when the bases are in fact independent.
389 :
390 : Note that the alias checks will be removed if the VF ends up
391 : being small enough. */
392 7797 : dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
393 7797 : dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
394 7797 : return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
395 7797 : && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
396 15602 : && vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
397 : }
398 : }
399 : return true;
400 : }
401 :
402 :
403 : /* Function vect_analyze_data_ref_dependence.
404 :
405 : FIXME: I needed to change the sense of the returned flag.
406 :
407 : Return FALSE if there (might) exist a dependence between a memory-reference
408 : DRA and a memory-reference DRB. When versioning for alias may check a
409 : dependence at run-time, return TRUE. Adjust *MAX_VF according to
410 : the data dependence. */
411 :
412 : static opt_result
413 980877 : vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
414 : loop_vec_info loop_vinfo,
415 : unsigned int *max_vf)
416 : {
417 980877 : unsigned int i;
418 980877 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
419 980877 : struct data_reference *dra = DDR_A (ddr);
420 980877 : struct data_reference *drb = DDR_B (ddr);
421 980877 : dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
422 980877 : dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
423 980877 : stmt_vec_info stmtinfo_a = dr_info_a->stmt;
424 980877 : stmt_vec_info stmtinfo_b = dr_info_b->stmt;
425 980877 : lambda_vector dist_v;
426 980877 : unsigned int loop_depth;
427 :
428 : /* If user asserted safelen consecutive iterations can be
429 : executed concurrently, assume independence. */
430 1141295 : auto apply_safelen = [&]()
431 : {
432 160418 : if (loop->safelen >= 2)
433 : {
434 7444 : if ((unsigned int) loop->safelen < *max_vf)
435 1896 : *max_vf = loop->safelen;
436 7444 : LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
437 7444 : return true;
438 : }
439 : return false;
440 980877 : };
441 :
442 : /* In loop analysis all data references should be vectorizable. */
443 980877 : if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
444 980877 : || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
445 0 : gcc_unreachable ();
446 :
447 : /* Independent data accesses. */
448 980877 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
449 757669 : return opt_result::success ();
450 :
451 223208 : if (dra == drb
452 223208 : || (DR_IS_READ (dra) && DR_IS_READ (drb)))
453 0 : return opt_result::success ();
454 :
455 : /* We do not have to consider dependences between accesses that belong
456 : to the same group, unless the stride could be smaller than the
457 : group size. */
458 223208 : if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
459 103242 : && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
460 103242 : == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
461 239457 : && !STMT_VINFO_STRIDED_P (stmtinfo_a))
462 2297 : return opt_result::success ();
463 :
464 : /* Even if we have an anti-dependence then, as the vectorized loop covers at
465 : least two scalar iterations, there is always also a true dependence.
466 : As the vectorizer does not re-order loads and stores we can ignore
467 : the anti-dependence if TBAA can disambiguate both DRs similar to the
468 : case with known negative distance anti-dependences (positive
469 : distance anti-dependences would violate TBAA constraints). */
470 100436 : if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
471 120475 : || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
472 336209 : && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
473 : get_alias_set (DR_REF (drb))))
474 5531 : return opt_result::success ();
475 :
476 215380 : if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
477 203556 : || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
478 : {
479 14483 : if (apply_safelen ())
480 1396 : return opt_result::success ();
481 :
482 13087 : return opt_result::failure_at
483 13087 : (stmtinfo_a->stmt,
484 : "possible alias involving gather/scatter between %T and %T\n",
485 : DR_REF (dra), DR_REF (drb));
486 : }
487 :
488 : /* Unknown data dependence. */
489 200897 : if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
490 : {
491 145400 : if (apply_safelen ())
492 6048 : return opt_result::success ();
493 :
494 139352 : if (dump_enabled_p ())
495 7447 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
496 : "versioning for alias required: "
497 : "can't determine dependence between %T and %T\n",
498 : DR_REF (dra), DR_REF (drb));
499 :
500 : /* Add to list of ddrs that need to be tested at run-time. */
501 139352 : return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
502 : }
503 :
504 : /* Known data dependence. */
505 55497 : if (DDR_NUM_DIST_VECTS (ddr) == 0)
506 : {
507 535 : if (apply_safelen ())
508 0 : return opt_result::success ();
509 :
510 535 : if (dump_enabled_p ())
511 132 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
512 : "versioning for alias required: "
513 : "bad dist vector for %T and %T\n",
514 : DR_REF (dra), DR_REF (drb));
515 : /* Add to list of ddrs that need to be tested at run-time. */
516 535 : return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
517 : }
518 :
519 54962 : loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
520 :
521 54962 : if (DDR_COULD_BE_INDEPENDENT_P (ddr)
522 54962 : && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
523 : loop_depth, max_vf))
524 7955 : return opt_result::success ();
525 :
526 87697 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
527 : {
528 47027 : int dist = dist_v[loop_depth];
529 :
530 47027 : if (dump_enabled_p ())
531 4268 : dump_printf_loc (MSG_NOTE, vect_location,
532 : "dependence distance = %d.\n", dist);
533 :
534 47027 : if (dist == 0)
535 : {
536 36028 : if (dump_enabled_p ())
537 3474 : dump_printf_loc (MSG_NOTE, vect_location,
538 : "dependence distance == 0 between %T and %T\n",
539 : DR_REF (dra), DR_REF (drb));
540 :
541 : /* When we perform grouped accesses and perform implicit CSE
542 : by detecting equal accesses and doing disambiguation with
543 : runtime alias tests like for
544 : .. = a[i];
545 : .. = a[i+1];
546 : a[i] = ..;
547 : a[i+1] = ..;
548 : *p = ..;
549 : .. = a[i];
550 : .. = a[i+1];
551 : where we will end up loading { a[i], a[i+1] } once, make
552 : sure that inserting group loads before the first load and
553 : stores after the last store will do the right thing.
554 : Similar for groups like
555 : a[i] = ...;
556 : ... = a[i];
557 : a[i+1] = ...;
558 : where loads from the group interleave with the store. */
559 36028 : if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
560 0 : return opt_result::failure_at (stmtinfo_a->stmt,
561 : "READ_WRITE dependence"
562 : " in interleaving.\n");
563 :
564 36028 : if (loop->safelen < 2)
565 : {
566 32274 : tree indicator = dr_zero_step_indicator (dra);
567 32274 : if (!indicator || integer_zerop (indicator))
568 0 : return opt_result::failure_at (stmtinfo_a->stmt,
569 : "access also has a zero step\n");
570 32274 : else if (TREE_CODE (indicator) != INTEGER_CST)
571 1348 : vect_check_nonzero_value (loop_vinfo, indicator);
572 : }
573 36028 : continue;
574 36028 : }
575 :
576 10999 : if (dist > 0 && DDR_REVERSED_P (ddr))
577 : {
578 : /* If DDR_REVERSED_P the order of the data-refs in DDR was
579 : reversed (to make distance vector positive), and the actual
580 : distance is negative. */
581 3696 : if (dump_enabled_p ())
582 105 : dump_printf_loc (MSG_NOTE, vect_location,
583 : "dependence distance negative.\n");
584 : /* When doing outer loop vectorization, we need to check if there is
585 : a backward dependence at the inner loop level if the dependence
586 : at the outer loop is reversed. See PR81740. */
587 3696 : if (nested_in_vect_loop_p (loop, stmtinfo_a)
588 3684 : || nested_in_vect_loop_p (loop, stmtinfo_b))
589 : {
590 12 : unsigned inner_depth = index_in_loop_nest (loop->inner->num,
591 12 : DDR_LOOP_NEST (ddr));
592 12 : if (dist_v[inner_depth] < 0)
593 9 : return opt_result::failure_at (stmtinfo_a->stmt,
594 : "not vectorized, dependence "
595 : "between data-refs %T and %T\n",
596 : DR_REF (dra), DR_REF (drb));
597 : }
598 : /* Record a negative dependence distance to later limit the
599 : amount of stmt copying / unrolling we can perform.
600 : Only need to handle read-after-write dependence. */
601 3687 : if (DR_IS_READ (drb)
602 76 : && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
603 12 : || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
604 76 : STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
605 3687 : continue;
606 3687 : }
607 :
608 7303 : unsigned int abs_dist = abs (dist);
609 7303 : if (abs_dist >= 2 && abs_dist < *max_vf)
610 : {
611 : /* The dependence distance requires reduction of the maximal
612 : vectorization factor. */
613 486 : *max_vf = abs_dist;
614 486 : if (dump_enabled_p ())
615 30 : dump_printf_loc (MSG_NOTE, vect_location,
616 : "adjusting maximal vectorization factor to %i\n",
617 : *max_vf);
618 : }
619 :
620 7303 : if (abs_dist >= *max_vf)
621 : {
622 : /* Dependence distance does not create dependence, as far as
623 : vectorization is concerned, in this case. */
624 975 : if (dump_enabled_p ())
625 437 : dump_printf_loc (MSG_NOTE, vect_location,
626 : "dependence distance >= VF.\n");
627 975 : continue;
628 : }
629 :
630 6328 : return opt_result::failure_at (stmtinfo_a->stmt,
631 : "not vectorized, possible dependence "
632 : "between data-refs %T and %T\n",
633 : DR_REF (dra), DR_REF (drb));
634 : }
635 :
636 40670 : return opt_result::success ();
637 : }
638 :
639 : /* Function vect_analyze_early_break_dependences.
640 :
641 : Examine all the data references in the loop and make sure that if we have
642 : multiple exits that we are able to safely move stores such that they become
643 : safe for vectorization. The function also calculates the place where to move
644 : the instructions to and computes what the new vUSE chain should be.
645 :
646 : This works in tandem with the CFG that will be produced by
647 : slpeel_tree_duplicate_loop_to_edge_cfg later on.
648 :
649 : This function tries to validate whether an early break vectorization
650 : is possible for the current instruction sequence. Returns True i
651 : possible, otherwise False.
652 :
653 : Requirements:
654 : - Any memory access must be to a fixed size buffer.
655 : - There must not be any loads and stores to the same object.
656 : - Multiple loads are allowed as long as they don't alias.
657 :
658 : NOTE:
659 : This implementation is very conservative. Any overlapping loads/stores
660 : that take place before the early break statement gets rejected aside from
661 : WAR dependencies.
662 :
663 : i.e.:
664 :
665 : a[i] = 8
666 : c = a[i]
667 : if (b[i])
668 : ...
669 :
670 : is not allowed, but
671 :
672 : c = a[i]
673 : a[i] = 8
674 : if (b[i])
675 : ...
676 :
677 : is which is the common case. */
678 :
679 : static opt_result
680 138968 : vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
681 : {
682 138968 : DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences");
683 :
684 : /* List of all load data references found during traversal. */
685 138968 : auto_vec<data_reference *> bases;
686 138968 : basic_block dest_bb = NULL;
687 :
688 138968 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
689 138968 : class loop *loop_nest = loop_outer (loop);
690 :
691 138968 : if (dump_enabled_p ())
692 1508 : dump_printf_loc (MSG_NOTE, vect_location,
693 : "loop contains multiple exits, analyzing"
694 : " statement dependencies.\n");
695 :
696 138968 : if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
697 25326 : if (dump_enabled_p ())
698 277 : dump_printf_loc (MSG_NOTE, vect_location,
699 : "alternate exit has been chosen as main exit.\n");
700 :
701 : /* Since we don't support general control flow, the location we'll move the
702 : side-effects to is always the latch connected exit. When we support
703 : general control flow we can do better but for now this is fine. Move
704 : side-effects to the in-loop destination of the last early exit. For the
705 : PEELED case we move the side-effects to the latch block as this is
706 : guaranteed to be the last block to be executed when a vector iteration
707 : finished. */
708 138968 : if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
709 25326 : dest_bb = loop->latch;
710 : else
711 113642 : dest_bb = single_pred (loop->latch);
712 :
713 : /* We start looking from dest_bb, for the non-PEELED case we don't want to
714 : move any stores already present, but we do want to read and validate the
715 : loads. */
716 138968 : basic_block bb = dest_bb;
717 :
718 : /* We move stores across all loads to the beginning of dest_bb, so
719 : the first block processed below doesn't need dependence checking. */
720 138968 : bool check_deps = false;
721 :
722 503878 : do
723 : {
724 321423 : gimple_stmt_iterator gsi = gsi_last_bb (bb);
725 :
726 : /* Now analyze all the remaining statements and try to determine which
727 : instructions are allowed/needed to be moved. */
728 2394440 : while (!gsi_end_p (gsi))
729 : {
730 2078582 : gimple *stmt = gsi_stmt (gsi);
731 2078582 : gsi_prev (&gsi);
732 2078582 : if (is_gimple_debug (stmt))
733 1836217 : continue;
734 :
735 1091399 : stmt_vec_info orig_stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
736 1091399 : stmt_vec_info stmt_vinfo
737 1091399 : = vect_stmt_to_vectorize (orig_stmt_vinfo);
738 1091399 : auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
739 1091399 : if (!dr_ref)
740 : {
741 : /* Trapping statements after the last early exit are fine. */
742 843099 : if (check_deps)
743 : {
744 512336 : bool could_trap_p = false;
745 512336 : gimple *cur_stmt = STMT_VINFO_STMT (stmt_vinfo);
746 512336 : could_trap_p = gimple_could_trap_p (cur_stmt);
747 512336 : if (STMT_VINFO_IN_PATTERN_P (orig_stmt_vinfo))
748 : {
749 188956 : gimple_stmt_iterator gsi2;
750 188956 : auto stmt_seq
751 188956 : = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_vinfo);
752 188956 : for (gsi2 = gsi_start (stmt_seq);
753 381318 : !could_trap_p && !gsi_end_p (gsi2); gsi_next (&gsi2))
754 : {
755 192362 : cur_stmt = gsi_stmt (gsi2);
756 192362 : could_trap_p = gimple_could_trap_p (cur_stmt);
757 : }
758 : }
759 :
760 512336 : if (could_trap_p)
761 : {
762 5020 : if (dump_enabled_p ())
763 144 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
764 : "cannot vectorize as operation may trap.\n");
765 5020 : return opt_result::failure_at (cur_stmt,
766 : "can't safely apply code motion to dependencies"
767 : " to vectorize the early exit. %G may trap.\n",
768 : cur_stmt);
769 : }
770 : }
771 :
772 838079 : continue;
773 838079 : }
774 :
775 : /* We know everything below dest_bb is safe since we know we
776 : had a full vector iteration when reaching it. Either by
777 : the loop entry / IV exit test being last or because this
778 : is the loop latch itself. */
779 248300 : if (!check_deps)
780 10955 : continue;
781 :
782 : /* Check if vector accesses to the object will be within bounds.
783 : must be a constant or assume loop will be versioned or niters
784 : bounded by VF so accesses are within range. We only need to check
785 : the reads since writes are moved to a safe place where if we get
786 : there we know they are safe to perform. */
787 237345 : if (DR_IS_READ (dr_ref))
788 : {
789 221622 : dr_set_safe_speculative_read_required (stmt_vinfo, true);
790 221622 : bool inbounds = ref_within_array_bound (stmt, DR_REF (dr_ref));
791 221622 : DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_vinfo)) = inbounds;
792 :
793 221622 : if (dump_enabled_p ())
794 2251 : dump_printf_loc (MSG_NOTE, vect_location,
795 : "marking DR (read) as possibly needing peeling "
796 : "for alignment at %G", stmt);
797 : }
798 :
799 237345 : if (DR_IS_READ (dr_ref))
800 221622 : bases.safe_push (dr_ref);
801 15723 : else if (DR_IS_WRITE (dr_ref))
802 : {
803 : /* We are moving writes down in the CFG. To be sure that this
804 : is valid after vectorization we have to check all the loads
805 : we are sinking the stores past to see if any of them may
806 : alias or are the same object.
807 :
808 : Same objects will not be an issue because unless the store
809 : is marked volatile the value can be forwarded. If the
810 : store is marked volatile we don't vectorize the loop
811 : anyway.
812 :
813 : That leaves the check for aliasing. We don't really need
814 : to care about the stores aliasing with each other since the
815 : stores are moved in order so the effects are still observed
816 : correctly. This leaves the check for WAR dependencies
817 : which we would be introducing here if the DR can alias.
818 : The check is quadratic in loads/stores but I have not found
819 : a better API to do this. I believe all loads and stores
820 : must be checked. We also must check them when we
821 : encountered the store, since we don't care about loads past
822 : the store. */
823 :
824 48813 : for (auto dr_read : bases)
825 15405 : if (dr_may_alias_p (dr_ref, dr_read, loop_nest))
826 : {
827 545 : if (dump_enabled_p ())
828 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION,
829 : vect_location,
830 : "early breaks not supported: "
831 : "overlapping loads and stores "
832 : "found before the break "
833 : "statement.\n");
834 :
835 545 : return opt_result::failure_at (stmt,
836 : "can't safely apply code motion to dependencies"
837 : " to vectorize the early exit. %G may alias with"
838 : " %G\n", stmt, dr_read->stmt);
839 : }
840 : }
841 :
842 473600 : if (gimple_vdef (stmt))
843 : {
844 15178 : if (dump_enabled_p ())
845 280 : dump_printf_loc (MSG_NOTE, vect_location,
846 : "==> recording stmt %G", stmt);
847 :
848 15178 : LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (stmt);
849 : }
850 680044 : else if (gimple_vuse (stmt))
851 : {
852 221622 : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_insert (0, stmt);
853 221622 : if (dump_enabled_p ())
854 2251 : dump_printf_loc (MSG_NOTE, vect_location,
855 : "marked statement for vUSE update: %G", stmt);
856 : }
857 : }
858 :
859 315858 : if (!single_pred_p (bb))
860 : {
861 133403 : gcc_assert (bb == loop->header);
862 133403 : break;
863 : }
864 :
865 : /* If we possibly sink through a virtual PHI make sure to elide that. */
866 182455 : if (gphi *vphi = get_virtual_phi (bb))
867 107 : LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (vphi);
868 :
869 : /* All earlier blocks need dependence checking. */
870 182455 : check_deps = true;
871 182455 : bb = single_pred (bb);
872 182455 : }
873 : while (1);
874 :
875 : /* We don't allow outer -> inner loop transitions which should have been
876 : trapped already during loop form analysis. */
877 133403 : gcc_assert (dest_bb->loop_father == loop);
878 :
879 : /* Check that the destination block we picked has only one pred. To relax this we
880 : have to take special care when moving the statements. We don't currently support
881 : such control flow however this check is there to simplify how we handle
882 : labels that may be present anywhere in the IL. This check is to ensure that the
883 : labels aren't significant for the CFG. */
884 133403 : if (!single_pred (dest_bb))
885 0 : return opt_result::failure_at (vect_location,
886 : "chosen loop exit block (BB %d) does not have a "
887 : "single predecessor which is currently not "
888 : "supported for early break vectorization.\n",
889 : dest_bb->index);
890 :
891 133403 : LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
892 :
893 133403 : if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
894 : {
895 : /* All uses shall be updated to that of the first load. Entries are
896 : stored in reverse order. */
897 122768 : tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).last ());
898 343148 : for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
899 : {
900 220380 : if (dump_enabled_p ())
901 2188 : dump_printf_loc (MSG_NOTE, vect_location,
902 : "will update use: %T, mem_ref: %G", vuse, g);
903 : }
904 : }
905 :
906 133403 : if (dump_enabled_p ())
907 1360 : dump_printf_loc (MSG_NOTE, vect_location,
908 : "recorded statements to be moved to BB %d\n",
909 1360 : LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
910 :
911 133403 : return opt_result::success ();
912 138968 : }
913 :
914 : /* Function vect_analyze_data_ref_dependences.
915 :
916 : Examine all the data references in the loop, and make sure there do not
917 : exist any data dependences between them. Set *MAX_VF according to
918 : the maximum vectorization factor the data dependences allow. */
919 :
920 : opt_result
921 322647 : vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
922 : unsigned int *max_vf)
923 : {
924 322647 : unsigned int i;
925 322647 : struct data_dependence_relation *ddr;
926 :
927 322647 : DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
928 :
929 322647 : if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
930 : {
931 160614 : LOOP_VINFO_DDRS (loop_vinfo)
932 160614 : .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
933 160614 : * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
934 : /* We do not need read-read dependences. */
935 321228 : bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
936 : &LOOP_VINFO_DDRS (loop_vinfo),
937 160614 : LOOP_VINFO_LOOP_NEST (loop_vinfo),
938 : false);
939 160614 : gcc_assert (res);
940 : }
941 :
942 322647 : LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
943 :
944 : /* For epilogues we either have no aliases or alias versioning
945 : was applied to original loop. Therefore we may just get max_vf
946 : using VF of original loop. */
947 322647 : if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
948 15070 : *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
949 : else
950 1268841 : FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
951 : {
952 980877 : opt_result res
953 980877 : = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
954 980877 : if (!res)
955 19613 : return res;
956 : }
957 :
958 : /* If we have early break statements in the loop, check to see if they
959 : are of a form we can vectorizer. */
960 303034 : if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
961 138968 : return vect_analyze_early_break_dependences (loop_vinfo);
962 :
963 164066 : return opt_result::success ();
964 : }
965 :
966 :
967 : /* Function vect_slp_analyze_data_ref_dependence.
968 :
969 : Return TRUE if there (might) exist a dependence between a memory-reference
970 : DRA and a memory-reference DRB for VINFO. When versioning for alias
971 : may check a dependence at run-time, return FALSE. Adjust *MAX_VF
972 : according to the data dependence. */
973 :
974 : static bool
975 6859783 : vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
976 : struct data_dependence_relation *ddr)
977 : {
978 6859783 : struct data_reference *dra = DDR_A (ddr);
979 6859783 : struct data_reference *drb = DDR_B (ddr);
980 6859783 : dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
981 6859783 : dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
982 :
983 : /* We need to check dependences of statements marked as unvectorizable
984 : as well, they still can prohibit vectorization. */
985 :
986 : /* Independent data accesses. */
987 6859783 : if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
988 : return false;
989 :
990 1092173 : if (dra == drb)
991 : return false;
992 :
993 : /* Read-read is OK. */
994 8401 : if (DR_IS_READ (dra) && DR_IS_READ (drb))
995 : return false;
996 :
997 : /* If dra and drb are part of the same interleaving chain consider
998 : them independent. */
999 8401 : if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
1000 8401 : && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
1001 8401 : == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
1002 : return false;
1003 :
1004 : /* Unknown data dependence. */
1005 8401 : if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1006 : {
1007 8401 : if (dump_enabled_p ())
1008 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1009 : "can't determine dependence between %T and %T\n",
1010 : DR_REF (dra), DR_REF (drb));
1011 : }
1012 0 : else if (dump_enabled_p ())
1013 0 : dump_printf_loc (MSG_NOTE, vect_location,
1014 : "determined dependence between %T and %T\n",
1015 : DR_REF (dra), DR_REF (drb));
1016 :
1017 : return true;
1018 : }
1019 :
1020 :
1021 : /* Analyze dependences involved in the transform of a store SLP NODE. */
1022 :
1023 : static bool
1024 654740 : vect_slp_analyze_store_dependences (vec_info *vinfo, slp_tree node)
1025 : {
1026 : /* This walks over all stmts involved in the SLP store done
1027 : in NODE verifying we can sink them up to the last stmt in the
1028 : group. */
1029 654740 : stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
1030 654740 : gcc_assert (DR_IS_WRITE (STMT_VINFO_DATA_REF (last_access_info)));
1031 :
1032 2377137 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
1033 : {
1034 1730768 : stmt_vec_info access_info
1035 1730768 : = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
1036 1730768 : if (access_info == last_access_info)
1037 646996 : continue;
1038 1083772 : data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
1039 1083772 : ao_ref ref;
1040 1083772 : bool ref_initialized_p = false;
1041 1083772 : for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
1042 10575173 : gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
1043 : {
1044 9499772 : gimple *stmt = gsi_stmt (gsi);
1045 16847107 : if (! gimple_vuse (stmt))
1046 2639661 : continue;
1047 :
1048 : /* If we couldn't record a (single) data reference for this
1049 : stmt we have to resort to the alias oracle. */
1050 6860111 : stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
1051 6860111 : data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
1052 6860111 : if (!dr_b)
1053 : {
1054 : /* We are moving a store - this means
1055 : we cannot use TBAA for disambiguation. */
1056 546 : if (!ref_initialized_p)
1057 546 : ao_ref_init (&ref, DR_REF (dr_a));
1058 546 : if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
1059 546 : || ref_maybe_used_by_stmt_p (stmt, &ref, false))
1060 8371 : return false;
1061 542 : continue;
1062 : }
1063 :
1064 6859565 : gcc_assert (!gimple_visited_p (stmt));
1065 :
1066 6859565 : ddr_p ddr = initialize_data_dependence_relation (dr_a,
1067 6859565 : dr_b, vNULL);
1068 6859565 : bool dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1069 6859565 : free_dependence_relation (ddr);
1070 6859565 : if (dependent)
1071 : return false;
1072 : }
1073 : }
1074 : return true;
1075 : }
1076 :
1077 : /* Analyze dependences involved in the transform of a load SLP NODE. STORES
1078 : contain the vector of scalar stores of this instance if we are
1079 : disambiguating the loads. */
1080 :
1081 : static bool
1082 151865 : vect_slp_analyze_load_dependences (vec_info *vinfo, slp_tree node,
1083 : vec<stmt_vec_info> stores,
1084 : stmt_vec_info last_store_info)
1085 : {
1086 : /* This walks over all stmts involved in the SLP load done
1087 : in NODE verifying we can hoist them up to the first stmt in the
1088 : group. */
1089 151865 : stmt_vec_info first_access_info = vect_find_first_scalar_stmt_in_slp (node);
1090 151865 : gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (first_access_info)));
1091 :
1092 530703 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
1093 : {
1094 378872 : if (! SLP_TREE_SCALAR_STMTS (node)[k])
1095 159048 : continue;
1096 378872 : stmt_vec_info access_info
1097 378872 : = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
1098 378872 : if (access_info == first_access_info)
1099 159048 : continue;
1100 219824 : data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
1101 219824 : ao_ref ref;
1102 219824 : bool ref_initialized_p = false;
1103 219824 : hash_set<stmt_vec_info> grp_visited;
1104 219824 : for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
1105 4334442 : gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi))
1106 : {
1107 2057343 : gimple *stmt = gsi_stmt (gsi);
1108 3356605 : if (! gimple_vdef (stmt))
1109 2001727 : continue;
1110 :
1111 278903 : stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
1112 :
1113 : /* If we run into a store of this same instance (we've just
1114 : marked those) then delay dependence checking until we run
1115 : into the last store because this is where it will have
1116 : been sunk to (and we verified that we can do that already). */
1117 278903 : if (gimple_visited_p (stmt))
1118 : {
1119 223287 : if (stmt_info != last_store_info)
1120 223285 : continue;
1121 :
1122 10 : for (stmt_vec_info &store_info : stores)
1123 : {
1124 4 : data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
1125 4 : ddr_p ddr = initialize_data_dependence_relation
1126 4 : (dr_a, store_dr, vNULL);
1127 4 : bool dependent
1128 4 : = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1129 4 : free_dependence_relation (ddr);
1130 4 : if (dependent)
1131 34 : return false;
1132 : }
1133 2 : continue;
1134 2 : }
1135 :
1136 114133 : auto check_hoist = [&] (stmt_vec_info stmt_info) -> bool
1137 : {
1138 : /* We are hoisting a load - this means we can use TBAA for
1139 : disambiguation. */
1140 58517 : if (!ref_initialized_p)
1141 58517 : ao_ref_init (&ref, DR_REF (dr_a));
1142 58517 : if (stmt_may_clobber_ref_p_1 (stmt_info->stmt, &ref, true))
1143 : {
1144 : /* If we couldn't record a (single) data reference for this
1145 : stmt we have to give up now. */
1146 214 : data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
1147 214 : if (!dr_b)
1148 : return false;
1149 214 : ddr_p ddr = initialize_data_dependence_relation (dr_a,
1150 214 : dr_b, vNULL);
1151 214 : bool dependent
1152 214 : = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1153 214 : free_dependence_relation (ddr);
1154 214 : if (dependent)
1155 : return false;
1156 : }
1157 : /* No dependence. */
1158 : return true;
1159 55616 : };
1160 55616 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1161 : {
1162 : /* When we run into a store group we have to honor
1163 : that earlier stores might be moved here. We don't
1164 : know exactly which and where to since we lack a
1165 : back-mapping from DR to SLP node, so assume all
1166 : earlier stores are sunk here. It's enough to
1167 : consider the last stmt of a group for this.
1168 : ??? Both this and the fact that we disregard that
1169 : the conflicting instance might be removed later
1170 : is overly conservative. */
1171 55164 : if (!grp_visited.add (DR_GROUP_FIRST_ELEMENT (stmt_info)))
1172 10663 : for (auto store_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1173 129007 : store_info != NULL;
1174 118344 : store_info = DR_GROUP_NEXT_ELEMENT (store_info))
1175 118378 : if ((store_info == stmt_info
1176 107724 : || get_later_stmt (store_info, stmt_info) == stmt_info)
1177 165789 : && !check_hoist (store_info))
1178 : return false;
1179 : }
1180 : else
1181 : {
1182 452 : if (!check_hoist (stmt_info))
1183 : return false;
1184 : }
1185 : }
1186 219824 : }
1187 : return true;
1188 : }
1189 :
1190 :
1191 : /* Function vect_analyze_data_ref_dependences.
1192 :
1193 : Examine all the data references in the basic-block, and make sure there
1194 : do not exist any data dependences between them. Set *MAX_VF according to
1195 : the maximum vectorization factor the data dependences allow. */
1196 :
1197 : bool
1198 781633 : vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
1199 : {
1200 781633 : DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
1201 :
1202 : /* The stores of this instance are at the root of the SLP tree. */
1203 781633 : slp_tree store = NULL;
1204 781633 : if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store)
1205 654740 : store = SLP_INSTANCE_TREE (instance);
1206 :
1207 : /* Verify we can sink stores to the vectorized stmt insert location. */
1208 654740 : stmt_vec_info last_store_info = NULL;
1209 654740 : if (store)
1210 : {
1211 654740 : if (! vect_slp_analyze_store_dependences (vinfo, store))
1212 : return false;
1213 :
1214 : /* Mark stores in this instance and remember the last one. */
1215 646369 : last_store_info = vect_find_last_scalar_stmt_in_slp (store);
1216 2368109 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
1217 1721740 : gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true);
1218 : }
1219 :
1220 773262 : bool res = true;
1221 :
1222 : /* Verify we can sink loads to the vectorized stmt insert location,
1223 : special-casing stores of this instance. */
1224 1162047 : for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
1225 151865 : if (! vect_slp_analyze_load_dependences (vinfo, load,
1226 : store
1227 : ? SLP_TREE_SCALAR_STMTS (store)
1228 : : vNULL, last_store_info))
1229 : {
1230 : res = false;
1231 : break;
1232 : }
1233 :
1234 : /* Unset the visited flag. */
1235 773262 : if (store)
1236 2368109 : for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
1237 1721740 : gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
1238 :
1239 : /* If this is a SLP instance with a store check if there's a dependent
1240 : load that cannot be forwarded from a previous iteration of a loop
1241 : both are in. This is to avoid situations like that in PR115777. */
1242 773262 : if (res && store)
1243 : {
1244 646345 : stmt_vec_info store_info
1245 646345 : = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (store)[0]);
1246 646345 : class loop *store_loop = gimple_bb (store_info->stmt)->loop_father;
1247 646345 : if (! loop_outer (store_loop))
1248 551256 : return res;
1249 95089 : vec<loop_p> loop_nest;
1250 95089 : loop_nest.create (1);
1251 95089 : loop_nest.quick_push (store_loop);
1252 95089 : data_reference *drs = nullptr;
1253 176075 : for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
1254 : {
1255 36167 : if (! STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (load)[0]))
1256 0 : continue;
1257 36167 : stmt_vec_info load_info
1258 36167 : = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (load)[0]);
1259 36167 : if (gimple_bb (load_info->stmt)->loop_father != store_loop)
1260 5170 : continue;
1261 :
1262 : /* For now concern ourselves with write-after-read as we also
1263 : only look for re-use of the store within the same SLP instance.
1264 : We can still get a RAW here when the instance contais a PHI
1265 : with a backedge though, thus this test. */
1266 30997 : if (! vect_stmt_dominates_stmt_p (STMT_VINFO_STMT (load_info),
1267 : STMT_VINFO_STMT (store_info)))
1268 11877 : continue;
1269 :
1270 19120 : if (! drs)
1271 : {
1272 18262 : drs = create_data_ref (loop_preheader_edge (store_loop),
1273 : store_loop,
1274 18262 : DR_REF (STMT_VINFO_DATA_REF (store_info)),
1275 : store_info->stmt, false, false);
1276 18262 : if (! DR_BASE_ADDRESS (drs)
1277 15446 : || TREE_CODE (DR_STEP (drs)) != INTEGER_CST)
1278 : break;
1279 : }
1280 16001 : data_reference *drl
1281 16001 : = create_data_ref (loop_preheader_edge (store_loop),
1282 : store_loop,
1283 16001 : DR_REF (STMT_VINFO_DATA_REF (load_info)),
1284 : load_info->stmt, true, false);
1285 :
1286 : /* See whether the DRs have a known constant distance throughout
1287 : the containing loop iteration. */
1288 30289 : if (! DR_BASE_ADDRESS (drl)
1289 14240 : || ! operand_equal_p (DR_STEP (drs), DR_STEP (drl))
1290 8298 : || ! operand_equal_p (DR_BASE_ADDRESS (drs),
1291 8298 : DR_BASE_ADDRESS (drl))
1292 17718 : || ! operand_equal_p (DR_OFFSET (drs), DR_OFFSET (drl)))
1293 : {
1294 14288 : free_data_ref (drl);
1295 14288 : continue;
1296 : }
1297 :
1298 : /* If the next iteration load overlaps with a non-power-of-two offset
1299 : we are surely failing any STLF attempt. */
1300 1713 : HOST_WIDE_INT step = TREE_INT_CST_LOW (DR_STEP (drl));
1301 1713 : unsigned HOST_WIDE_INT sizes
1302 1713 : = (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drs))))
1303 1713 : * DR_GROUP_SIZE (store_info));
1304 1713 : unsigned HOST_WIDE_INT sizel
1305 1713 : = (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drl))))
1306 1713 : * DR_GROUP_SIZE (load_info));
1307 1713 : if (ranges_overlap_p (TREE_INT_CST_LOW (DR_INIT (drl)) + step, sizel,
1308 1713 : TREE_INT_CST_LOW (DR_INIT (drs)), sizes))
1309 : {
1310 834 : unsigned HOST_WIDE_INT dist
1311 834 : = absu_hwi (TREE_INT_CST_LOW (DR_INIT (drl)) + step
1312 834 : - TREE_INT_CST_LOW (DR_INIT (drs)));
1313 834 : poly_uint64 loadsz = tree_to_poly_uint64
1314 834 : (TYPE_SIZE_UNIT (SLP_TREE_VECTYPE (load)));
1315 834 : poly_uint64 storesz = tree_to_poly_uint64
1316 834 : (TYPE_SIZE_UNIT (SLP_TREE_VECTYPE (store)));
1317 : /* When the overlap aligns with vector sizes used for the loads
1318 : and the vector stores are larger or equal to the loads
1319 : forwarding should work. */
1320 1668 : if (maybe_gt (loadsz, storesz) || ! multiple_p (dist, loadsz))
1321 73 : load->avoid_stlf_fail = true;
1322 : }
1323 1713 : free_data_ref (drl);
1324 : }
1325 95089 : if (drs)
1326 18262 : free_data_ref (drs);
1327 95089 : loop_nest.release ();
1328 : }
1329 :
1330 : return res;
1331 : }
1332 :
1333 : /* Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET
1334 : applied. */
1335 :
1336 : int
1337 5693384 : dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
1338 : {
1339 5693384 : HOST_WIDE_INT diff = 0;
1340 : /* Alignment is only analyzed for the first element of a DR group,
1341 : use that but adjust misalignment by the offset of the access. */
1342 5693384 : if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
1343 : {
1344 2237546 : dr_vec_info *first_dr
1345 2237546 : = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
1346 : /* vect_analyze_data_ref_accesses guarantees that DR_INIT are
1347 : INTEGER_CSTs and the first element in the group has the lowest
1348 : address. */
1349 2237546 : diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
1350 2237546 : - TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
1351 2237546 : gcc_assert (diff >= 0);
1352 : dr_info = first_dr;
1353 : }
1354 :
1355 5693384 : int misalign = dr_info->misalignment;
1356 5693384 : gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
1357 5693384 : if (misalign == DR_MISALIGNMENT_UNKNOWN)
1358 : return misalign;
1359 :
1360 : /* If the access is only aligned for a vector type with smaller alignment
1361 : requirement the access has unknown misalignment. */
1362 3453202 : if (maybe_lt (dr_info->target_alignment * BITS_PER_UNIT,
1363 3453202 : targetm.vectorize.preferred_vector_alignment (vectype)))
1364 : return DR_MISALIGNMENT_UNKNOWN;
1365 :
1366 : /* Apply the offset from the DR group start and the externally supplied
1367 : offset which can for example result from a negative stride access. */
1368 3453199 : poly_int64 misalignment = misalign + diff + offset;
1369 :
1370 : /* Below we reject compile-time non-constant target alignments, but if
1371 : our misalignment is zero, then we are known to already be aligned
1372 : w.r.t. any such possible target alignment. */
1373 3453199 : if (known_eq (misalignment, 0))
1374 : return 0;
1375 :
1376 621313 : unsigned HOST_WIDE_INT target_alignment_c;
1377 621313 : if (!dr_info->target_alignment.is_constant (&target_alignment_c)
1378 621313 : || !known_misalignment (misalignment, target_alignment_c, &misalign))
1379 : return DR_MISALIGNMENT_UNKNOWN;
1380 621313 : return misalign;
1381 : }
1382 :
1383 : /* Record the base alignment guarantee given by DRB, which occurs
1384 : in STMT_INFO. */
1385 :
1386 : static void
1387 4432085 : vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info,
1388 : innermost_loop_behavior *drb)
1389 : {
1390 4432085 : bool existed;
1391 4432085 : std::pair<stmt_vec_info, innermost_loop_behavior *> &entry
1392 4432085 : = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
1393 4432085 : if (!existed || entry.second->base_alignment < drb->base_alignment)
1394 : {
1395 1330183 : entry = std::make_pair (stmt_info, drb);
1396 1330183 : if (dump_enabled_p ())
1397 31627 : dump_printf_loc (MSG_NOTE, vect_location,
1398 : "recording new base alignment for %T\n"
1399 : " alignment: %d\n"
1400 : " misalignment: %d\n"
1401 : " based on: %G",
1402 : drb->base_address,
1403 : drb->base_alignment,
1404 : drb->base_misalignment,
1405 : stmt_info->stmt);
1406 : }
1407 4432085 : }
1408 :
1409 : /* If the region we're going to vectorize is reached, all unconditional
1410 : data references occur at least once. We can therefore pool the base
1411 : alignment guarantees from each unconditional reference. Do this by
1412 : going through all the data references in VINFO and checking whether
1413 : the containing statement makes the reference unconditionally. If so,
1414 : record the alignment of the base address in VINFO so that it can be
1415 : used for all other references with the same base. */
1416 :
1417 : void
1418 967646 : vect_record_base_alignments (vec_info *vinfo)
1419 : {
1420 967646 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1421 358643 : class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1422 14728798 : for (data_reference *dr : vinfo->shared->datarefs)
1423 : {
1424 11921300 : dr_vec_info *dr_info = vinfo->lookup_dr (dr);
1425 11921300 : stmt_vec_info stmt_info = dr_info->stmt;
1426 11921300 : if (!DR_IS_CONDITIONAL_IN_STMT (dr)
1427 11913730 : && STMT_VINFO_VECTORIZABLE (stmt_info)
1428 4448751 : && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1429 : {
1430 4430671 : vect_record_base_alignment (vinfo, stmt_info, &DR_INNERMOST (dr));
1431 :
1432 : /* If DR is nested in the loop that is being vectorized, we can also
1433 : record the alignment of the base wrt the outer loop. */
1434 12716147 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
1435 1414 : vect_record_base_alignment
1436 1414 : (vinfo, stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
1437 : }
1438 : }
1439 967646 : }
1440 :
1441 : /* Function vect_compute_data_ref_alignment
1442 :
1443 : Compute the misalignment of the data reference DR_INFO when vectorizing
1444 : with VECTYPE.
1445 :
1446 : Output:
1447 : 1. initialized misalignment info for DR_INFO
1448 :
1449 : FOR NOW: No analysis is actually performed. Misalignment is calculated
1450 : only for trivial cases. TODO. */
1451 :
1452 : static void
1453 1478683 : vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
1454 : tree vectype)
1455 : {
1456 1478683 : stmt_vec_info stmt_info = dr_info->stmt;
1457 1478683 : vec_base_alignments *base_alignments = &vinfo->base_alignments;
1458 1478683 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1459 1478683 : class loop *loop = NULL;
1460 1478683 : tree ref = DR_REF (dr_info->dr);
1461 :
1462 1478683 : if (dump_enabled_p ())
1463 49662 : dump_printf_loc (MSG_NOTE, vect_location,
1464 : "vect_compute_data_ref_alignment:\n");
1465 :
1466 1478683 : if (loop_vinfo)
1467 708882 : loop = LOOP_VINFO_LOOP (loop_vinfo);
1468 :
1469 : /* Initialize misalignment to unknown. */
1470 1478683 : SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1471 :
1472 1478683 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1473 : return;
1474 :
1475 1459055 : innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
1476 1459055 : bool step_preserves_misalignment_p;
1477 :
1478 1459055 : poly_uint64 vector_alignment
1479 1459055 : = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
1480 : BITS_PER_UNIT);
1481 :
1482 1459055 : if (loop_vinfo
1483 1459055 : && dr_safe_speculative_read_required (stmt_info))
1484 : {
1485 : /* The required target alignment must be a power-of-2 value and is
1486 : computed as the product of vector element size, VF and group size.
1487 : We compute the constant part first as VF may be a variable. For
1488 : variable VF, the power-of-2 check of VF is deferred to runtime. */
1489 300240 : auto align_factor_c
1490 300240 : = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1491 300240 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1492 89572 : align_factor_c *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
1493 :
1494 300240 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1495 300240 : poly_uint64 new_alignment = vf * align_factor_c;
1496 :
1497 600480 : if ((vf.is_constant () && pow2p_hwi (new_alignment.to_constant ()))
1498 : || (!vf.is_constant () && pow2p_hwi (align_factor_c)))
1499 : {
1500 237269 : if (dump_enabled_p ())
1501 : {
1502 2858 : dump_printf_loc (MSG_NOTE, vect_location,
1503 : "alignment increased due to early break to ");
1504 2858 : dump_dec (MSG_NOTE, new_alignment);
1505 2858 : dump_printf (MSG_NOTE, " bytes.\n");
1506 : }
1507 237269 : vector_alignment = new_alignment;
1508 : }
1509 : }
1510 :
1511 1459055 : SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
1512 :
1513 : /* If the main loop has peeled for alignment we have no way of knowing
1514 : whether the data accesses in the epilogues are aligned. We can't at
1515 : compile time answer the question whether we have entered the main loop or
1516 : not. Fixes PR 92351. */
1517 1459055 : if (loop_vinfo)
1518 : {
1519 689254 : loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
1520 689254 : if (orig_loop_vinfo
1521 32495 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
1522 : return;
1523 : }
1524 :
1525 1458838 : unsigned HOST_WIDE_INT vect_align_c;
1526 1458838 : if (!vector_alignment.is_constant (&vect_align_c))
1527 : return;
1528 :
1529 : /* No step for BB vectorization. */
1530 1458838 : if (!loop)
1531 : {
1532 769801 : gcc_assert (integer_zerop (drb->step));
1533 : step_preserves_misalignment_p = true;
1534 : }
1535 :
1536 : else
1537 : {
1538 : /* We can only use base and misalignment information relative to
1539 : an innermost loop if the misalignment stays the same throughout the
1540 : execution of the loop. As above, this is the case if the stride of
1541 : the dataref evenly divides by the alignment. Make sure to check
1542 : previous epilogues and the main loop. */
1543 : step_preserves_misalignment_p = true;
1544 : auto lvinfo = loop_vinfo;
1545 1411081 : while (lvinfo)
1546 : {
1547 722044 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (lvinfo);
1548 722044 : step_preserves_misalignment_p
1549 722044 : &= multiple_p (drb->step_alignment * vf, vect_align_c);
1550 722044 : lvinfo = LOOP_VINFO_ORIG_LOOP_INFO (lvinfo);
1551 : }
1552 :
1553 689037 : if (!step_preserves_misalignment_p && dump_enabled_p ())
1554 315 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1555 : "step doesn't divide the vector alignment.\n");
1556 :
1557 : /* In case the dataref is in an inner-loop of the loop that is being
1558 : vectorized (LOOP), we use the base and misalignment information
1559 : relative to the outer-loop (LOOP). This is ok only if the
1560 : misalignment stays the same throughout the execution of the
1561 : inner-loop, which is why we have to check that the stride of the
1562 : dataref in the inner-loop evenly divides by the vector alignment. */
1563 689037 : if (step_preserves_misalignment_p
1564 689037 : && nested_in_vect_loop_p (loop, stmt_info))
1565 : {
1566 1413 : step_preserves_misalignment_p
1567 1413 : = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
1568 :
1569 1413 : if (dump_enabled_p ())
1570 : {
1571 498 : if (step_preserves_misalignment_p)
1572 358 : dump_printf_loc (MSG_NOTE, vect_location,
1573 : "inner step divides the vector alignment.\n");
1574 : else
1575 140 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1576 : "inner step doesn't divide the vector"
1577 : " alignment.\n");
1578 : }
1579 : }
1580 : }
1581 :
1582 1458838 : unsigned int base_alignment = drb->base_alignment;
1583 1458838 : unsigned int base_misalignment = drb->base_misalignment;
1584 :
1585 : /* Calculate the maximum of the pooled base address alignment and the
1586 : alignment that we can compute for DR itself. */
1587 1458838 : std::pair<stmt_vec_info, innermost_loop_behavior *> *entry
1588 1458838 : = base_alignments->get (drb->base_address);
1589 1458838 : if (entry
1590 1455927 : && base_alignment < (*entry).second->base_alignment
1591 1460672 : && (loop_vinfo
1592 1148 : || (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt_info->stmt),
1593 1148 : gimple_bb (entry->first->stmt))
1594 1003 : && (gimple_bb (stmt_info->stmt) != gimple_bb (entry->first->stmt)
1595 769 : || (entry->first->dr_aux.group <= dr_info->group)))))
1596 : {
1597 1672 : base_alignment = entry->second->base_alignment;
1598 1672 : base_misalignment = entry->second->base_misalignment;
1599 : }
1600 :
1601 1458838 : if (drb->offset_alignment < vect_align_c
1602 1394048 : || !step_preserves_misalignment_p
1603 : /* We need to know whether the step wrt the vectorized loop is
1604 : negative when computing the starting misalignment below. */
1605 1385864 : || TREE_CODE (drb->step) != INTEGER_CST)
1606 : {
1607 100513 : if (dump_enabled_p ())
1608 3506 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1609 : "Unknown alignment for access: %T\n", ref);
1610 100513 : return;
1611 : }
1612 :
1613 1358325 : if (base_alignment < vect_align_c)
1614 : {
1615 682580 : unsigned int max_alignment;
1616 682580 : tree base = get_base_for_alignment (drb->base_address, &max_alignment);
1617 682580 : if (max_alignment < vect_align_c
1618 680185 : || (loop_vinfo && LOOP_VINFO_EPILOGUE_P (loop_vinfo))
1619 1343621 : || !vect_can_force_dr_alignment_p (base,
1620 661041 : vect_align_c * BITS_PER_UNIT))
1621 : {
1622 486719 : if (dump_enabled_p ())
1623 13518 : dump_printf_loc (MSG_NOTE, vect_location,
1624 : "can't force alignment of ref: %T\n", ref);
1625 486719 : return;
1626 : }
1627 :
1628 : /* Force the alignment of the decl.
1629 : NOTE: This is the only change to the code we make during
1630 : the analysis phase, before deciding to vectorize the loop. */
1631 195861 : if (dump_enabled_p ())
1632 7925 : dump_printf_loc (MSG_NOTE, vect_location,
1633 : "force alignment of %T\n", ref);
1634 :
1635 195861 : dr_info->base_decl = base;
1636 195861 : dr_info->base_misaligned = true;
1637 195861 : base_misalignment = 0;
1638 : }
1639 871606 : poly_int64 misalignment
1640 871606 : = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
1641 :
1642 871606 : unsigned int const_misalignment;
1643 871606 : if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
1644 : {
1645 : if (dump_enabled_p ())
1646 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1647 : "Non-constant misalignment for access: %T\n", ref);
1648 : return;
1649 : }
1650 :
1651 871606 : SET_DR_MISALIGNMENT (dr_info, const_misalignment);
1652 :
1653 871606 : if (dump_enabled_p ())
1654 31458 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1655 : "misalign = %d bytes of ref %T\n",
1656 : const_misalignment, ref);
1657 :
1658 : return;
1659 : }
1660 :
1661 : /* Return whether DR_INFO, which is related to DR_PEEL_INFO in
1662 : that it only differs in DR_INIT, is aligned if DR_PEEL_INFO
1663 : is made aligned via peeling. */
1664 :
1665 : static bool
1666 1446031 : vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info,
1667 : dr_vec_info *dr_peel_info)
1668 : {
1669 1446031 : if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info),
1670 1446775 : DR_TARGET_ALIGNMENT (dr_info)))
1671 : {
1672 1445287 : poly_offset_int diff
1673 1445287 : = (wi::to_poly_offset (DR_INIT (dr_peel_info->dr))
1674 1445287 : - wi::to_poly_offset (DR_INIT (dr_info->dr)));
1675 1445287 : if (known_eq (diff, 0)
1676 1445287 : || multiple_p (diff, DR_TARGET_ALIGNMENT (dr_info)))
1677 479633 : return true;
1678 : }
1679 : return false;
1680 : }
1681 :
1682 : /* Return whether DR_INFO is aligned if DR_PEEL_INFO is made
1683 : aligned via peeling. */
1684 :
1685 : static bool
1686 155249 : vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
1687 : dr_vec_info *dr_peel_info)
1688 : {
1689 155249 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr),
1690 155249 : DR_BASE_ADDRESS (dr_peel_info->dr), 0)
1691 36009 : || !operand_equal_p (DR_OFFSET (dr_info->dr),
1692 36009 : DR_OFFSET (dr_peel_info->dr), 0)
1693 190387 : || !operand_equal_p (DR_STEP (dr_info->dr),
1694 35138 : DR_STEP (dr_peel_info->dr), 0))
1695 120482 : return false;
1696 :
1697 34767 : return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
1698 : }
1699 :
1700 : /* Compute the value for dr_info->misalign so that the access appears
1701 : aligned. This is used by peeling to compensate for dr_misalignment
1702 : applying the offset for negative step. */
1703 :
1704 : int
1705 20299 : vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
1706 : {
1707 20299 : if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
1708 : return 0;
1709 :
1710 198 : tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1711 198 : poly_int64 misalignment
1712 198 : = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
1713 198 : * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1714 :
1715 198 : unsigned HOST_WIDE_INT target_alignment_c;
1716 198 : int misalign;
1717 198 : if (!dr_info->target_alignment.is_constant (&target_alignment_c)
1718 198 : || !known_misalignment (misalignment, target_alignment_c, &misalign))
1719 : return DR_MISALIGNMENT_UNKNOWN;
1720 198 : return misalign;
1721 : }
1722 :
1723 : /* Function vect_update_misalignment_for_peel.
1724 : Sets DR_INFO's misalignment
1725 : - to 0 if it has the same alignment as DR_PEEL_INFO,
1726 : - to the misalignment computed using NPEEL if DR_INFO's salignment is known,
1727 : - to -1 (unknown) otherwise.
1728 :
1729 : DR_INFO - the data reference whose misalignment is to be adjusted.
1730 : DR_PEEL_INFO - the data reference whose misalignment is being made
1731 : zero in the vector loop by the peel.
1732 : NPEEL - the number of iterations in the peel loop if the misalignment
1733 : of DR_PEEL_INFO is known at compile time. */
1734 :
1735 : static void
1736 2741 : vect_update_misalignment_for_peel (dr_vec_info *dr_info,
1737 : dr_vec_info *dr_peel_info, int npeel)
1738 : {
1739 : /* If dr_info is aligned of dr_peel_info is, then mark it so. */
1740 2741 : if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
1741 : {
1742 444 : SET_DR_MISALIGNMENT (dr_info,
1743 : vect_dr_misalign_for_aligned_access (dr_peel_info));
1744 444 : return;
1745 : }
1746 :
1747 2297 : unsigned HOST_WIDE_INT alignment;
1748 2297 : if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
1749 2297 : && known_alignment_for_access_p (dr_info,
1750 2297 : STMT_VINFO_VECTYPE (dr_info->stmt))
1751 234 : && known_alignment_for_access_p (dr_peel_info,
1752 234 : STMT_VINFO_VECTYPE (dr_peel_info->stmt)))
1753 : {
1754 186 : int misal = dr_info->misalignment;
1755 186 : misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1756 186 : misal &= alignment - 1;
1757 186 : set_dr_misalignment (dr_info, misal);
1758 186 : return;
1759 : }
1760 :
1761 2111 : if (dump_enabled_p ())
1762 31 : dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1763 : "to unknown (-1).\n");
1764 2111 : SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1765 : }
1766 :
1767 : /* Return true if alignment is relevant for DR_INFO. */
1768 :
1769 : static bool
1770 1433954 : vect_relevant_for_alignment_p (dr_vec_info *dr_info)
1771 : {
1772 1433954 : stmt_vec_info stmt_info = dr_info->stmt;
1773 :
1774 1433954 : if (!STMT_VINFO_RELEVANT_P (stmt_info))
1775 : return false;
1776 :
1777 : /* For interleaving, only the alignment of the first access matters. */
1778 1432904 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1779 1641940 : && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
1780 : return false;
1781 :
1782 : /* Scatter-gather and invariant accesses continue to address individual
1783 : scalars, so vector-level alignment is irrelevant. */
1784 1350431 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
1785 1350431 : || integer_zerop (DR_STEP (dr_info->dr)))
1786 51747 : return false;
1787 :
1788 : /* Strided accesses perform only component accesses, alignment is
1789 : irrelevant for them. */
1790 1298684 : if (STMT_VINFO_STRIDED_P (stmt_info)
1791 1298684 : && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1792 : return false;
1793 :
1794 : return true;
1795 : }
1796 :
1797 : /* Given an memory reference EXP return whether its alignment is less
1798 : than its size. */
1799 :
1800 : static bool
1801 1286130 : not_size_aligned (tree exp)
1802 : {
1803 1286130 : if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
1804 : return true;
1805 :
1806 1286130 : return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
1807 1286130 : > get_object_alignment (exp));
1808 : }
1809 :
1810 : /* Function vector_alignment_reachable_p
1811 :
1812 : Return true if vector alignment for DR_INFO is reachable by peeling
1813 : a few loop iterations. Return false otherwise. */
1814 :
1815 : static bool
1816 501639 : vector_alignment_reachable_p (dr_vec_info *dr_info, poly_uint64 vf)
1817 : {
1818 501639 : stmt_vec_info stmt_info = dr_info->stmt;
1819 501639 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1820 501639 : poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1821 1003278 : poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
1822 501639 : unsigned elem_size = vector_element_size (vector_size, nelements);
1823 501639 : unsigned group_size = 1;
1824 :
1825 501639 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1826 : {
1827 : /* For interleaved access we peel only if number of iterations in
1828 : the prolog loop ({VF - misalignment}), is a multiple of the
1829 : number of the interleaved accesses. */
1830 :
1831 : /* FORNOW: handle only known alignment. */
1832 82498 : if (!known_alignment_for_access_p (dr_info, vectype))
1833 501639 : return false;
1834 :
1835 48873 : unsigned mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
1836 59868 : if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
1837 : return false;
1838 :
1839 10995 : group_size = DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
1840 : }
1841 :
1842 : /* If the vectorization factor does not guarantee DR advancement of
1843 : a multiple of the target alignment no peeling will help. */
1844 430136 : if (!multiple_p (elem_size * group_size * vf, dr_target_alignment (dr_info)))
1845 122 : return false;
1846 :
1847 : /* If misalignment is known at the compile time then allow peeling
1848 : only if natural alignment is reachable through peeling. */
1849 430014 : if (known_alignment_for_access_p (dr_info, vectype)
1850 665566 : && !aligned_access_p (dr_info, vectype))
1851 : {
1852 13389 : HOST_WIDE_INT elmsize =
1853 13389 : int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1854 13389 : if (dump_enabled_p ())
1855 : {
1856 750 : dump_printf_loc (MSG_NOTE, vect_location,
1857 : "data size = %wd. misalignment = %d.\n", elmsize,
1858 : dr_misalignment (dr_info, vectype));
1859 : }
1860 13389 : if (dr_misalignment (dr_info, vectype) % elmsize)
1861 : {
1862 53 : if (dump_enabled_p ())
1863 7 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1864 : "data size does not divide the misalignment.\n");
1865 53 : return false;
1866 : }
1867 : }
1868 :
1869 429961 : if (!known_alignment_for_access_p (dr_info, vectype))
1870 : {
1871 194462 : tree type = TREE_TYPE (DR_REF (dr_info->dr));
1872 194462 : bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
1873 194462 : if (dump_enabled_p ())
1874 14785 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1875 : "Unknown misalignment, %snaturally aligned\n",
1876 : is_packed ? "not " : "");
1877 194462 : return targetm.vectorize.vector_alignment_reachable (type, is_packed);
1878 : }
1879 :
1880 : return true;
1881 : }
1882 :
1883 :
1884 : /* Calculate the cost of the memory access represented by DR_INFO. */
1885 :
1886 : static void
1887 541571 : vect_get_data_access_cost (vec_info *vinfo, dr_vec_info *dr_info,
1888 : dr_alignment_support alignment_support_scheme,
1889 : int misalignment,
1890 : unsigned int *inside_cost,
1891 : unsigned int *outside_cost,
1892 : stmt_vector_for_cost *body_cost_vec,
1893 : stmt_vector_for_cost *prologue_cost_vec)
1894 : {
1895 541571 : stmt_vec_info stmt_info = dr_info->stmt;
1896 :
1897 541571 : if (DR_IS_READ (dr_info->dr))
1898 396072 : vect_get_load_cost (vinfo, stmt_info, NULL, 1,
1899 : alignment_support_scheme, misalignment, true,
1900 : inside_cost, outside_cost, prologue_cost_vec,
1901 : body_cost_vec, false);
1902 : else
1903 145499 : vect_get_store_cost (vinfo,stmt_info, NULL, 1,
1904 : alignment_support_scheme, misalignment, inside_cost,
1905 : body_cost_vec);
1906 :
1907 541571 : if (dump_enabled_p ())
1908 27476 : dump_printf_loc (MSG_NOTE, vect_location,
1909 : "vect_get_data_access_cost: inside_cost = %d, "
1910 : "outside_cost = %d.\n", *inside_cost, *outside_cost);
1911 541571 : }
1912 :
1913 :
1914 : typedef struct _vect_peel_info
1915 : {
1916 : dr_vec_info *dr_info;
1917 : int npeel;
1918 : unsigned int count;
1919 : } *vect_peel_info;
1920 :
1921 : typedef struct _vect_peel_extended_info
1922 : {
1923 : vec_info *vinfo;
1924 : struct _vect_peel_info peel_info;
1925 : unsigned int inside_cost;
1926 : unsigned int outside_cost;
1927 : } *vect_peel_extended_info;
1928 :
1929 :
1930 : /* Peeling hashtable helpers. */
1931 :
1932 : struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1933 : {
1934 : static inline hashval_t hash (const _vect_peel_info *);
1935 : static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
1936 : };
1937 :
1938 : inline hashval_t
1939 647137 : peel_info_hasher::hash (const _vect_peel_info *peel_info)
1940 : {
1941 647137 : return (hashval_t) peel_info->npeel;
1942 : }
1943 :
1944 : inline bool
1945 353679 : peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
1946 : {
1947 353679 : return (a->npeel == b->npeel);
1948 : }
1949 :
1950 :
1951 : /* Insert DR_INFO into peeling hash table with NPEEL as key. */
1952 :
1953 : static void
1954 294116 : vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
1955 : loop_vec_info loop_vinfo, dr_vec_info *dr_info,
1956 : int npeel, bool supportable_if_not_aligned)
1957 : {
1958 294116 : struct _vect_peel_info elem, *slot;
1959 294116 : _vect_peel_info **new_slot;
1960 :
1961 294116 : elem.npeel = npeel;
1962 294116 : slot = peeling_htab->find (&elem);
1963 294116 : if (slot)
1964 124746 : slot->count++;
1965 : else
1966 : {
1967 169370 : slot = XNEW (struct _vect_peel_info);
1968 169370 : slot->npeel = npeel;
1969 169370 : slot->dr_info = dr_info;
1970 169370 : slot->count = 1;
1971 169370 : new_slot = peeling_htab->find_slot (slot, INSERT);
1972 169370 : *new_slot = slot;
1973 : }
1974 :
1975 : /* If this DR is not supported with unknown misalignment then bias
1976 : this slot when the cost model is disabled. */
1977 294116 : if (!supportable_if_not_aligned
1978 294116 : && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1979 4584 : slot->count += VECT_MAX_COST;
1980 294116 : }
1981 :
1982 :
1983 : /* Traverse peeling hash table to find peeling option that aligns maximum
1984 : number of data accesses. */
1985 :
1986 : int
1987 35621 : vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1988 : _vect_peel_extended_info *max)
1989 : {
1990 35621 : vect_peel_info elem = *slot;
1991 :
1992 35621 : if (elem->count > max->peel_info.count
1993 21645 : || (elem->count == max->peel_info.count
1994 16938 : && max->peel_info.npeel > elem->npeel))
1995 : {
1996 13992 : max->peel_info.npeel = elem->npeel;
1997 13992 : max->peel_info.count = elem->count;
1998 13992 : max->peel_info.dr_info = elem->dr_info;
1999 : }
2000 :
2001 35621 : return 1;
2002 : }
2003 :
2004 : /* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking
2005 : data access costs for all data refs. If UNKNOWN_MISALIGNMENT is true,
2006 : npeel is computed at runtime but DR0_INFO's misalignment will be zero
2007 : after peeling. */
2008 :
2009 : static void
2010 310278 : vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
2011 : dr_vec_info *dr0_info,
2012 : unsigned int *inside_cost,
2013 : unsigned int *outside_cost,
2014 : stmt_vector_for_cost *body_cost_vec,
2015 : stmt_vector_for_cost *prologue_cost_vec,
2016 : unsigned int npeel)
2017 : {
2018 310278 : vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2019 :
2020 310278 : bool dr0_alignment_known_p
2021 : = (dr0_info
2022 573646 : && known_alignment_for_access_p (dr0_info,
2023 263368 : STMT_VINFO_VECTYPE (dr0_info->stmt)));
2024 :
2025 1506266 : for (data_reference *dr : datarefs)
2026 : {
2027 575432 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2028 575432 : if (!vect_relevant_for_alignment_p (dr_info))
2029 33861 : continue;
2030 :
2031 541571 : tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
2032 541571 : dr_alignment_support alignment_support_scheme;
2033 541571 : int misalignment;
2034 541571 : unsigned HOST_WIDE_INT alignment;
2035 :
2036 541571 : bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
2037 541571 : size_zero_node) < 0;
2038 541571 : poly_int64 off = 0;
2039 541571 : if (negative)
2040 21806 : off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2041 21806 : * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2042 :
2043 541571 : if (npeel == 0)
2044 263805 : misalignment = dr_misalignment (dr_info, vectype, off);
2045 277766 : else if (dr_info == dr0_info
2046 277766 : || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
2047 : misalignment = 0;
2048 94917 : else if (!dr0_alignment_known_p
2049 7431 : || !known_alignment_for_access_p (dr_info, vectype)
2050 102348 : || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
2051 : misalignment = DR_MISALIGNMENT_UNKNOWN;
2052 : else
2053 : {
2054 6435 : misalignment = dr_misalignment (dr_info, vectype, off);
2055 6435 : misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
2056 6435 : misalignment &= alignment - 1;
2057 : }
2058 541571 : alignment_support_scheme
2059 541571 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2060 : misalignment);
2061 :
2062 541571 : vect_get_data_access_cost (loop_vinfo, dr_info,
2063 : alignment_support_scheme, misalignment,
2064 : inside_cost, outside_cost,
2065 : body_cost_vec, prologue_cost_vec);
2066 : }
2067 310278 : }
2068 :
2069 : /* Traverse peeling hash table and calculate cost for each peeling option.
2070 : Find the one with the lowest cost. */
2071 :
2072 : int
2073 114458 : vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
2074 : _vect_peel_extended_info *min)
2075 : {
2076 114458 : vect_peel_info elem = *slot;
2077 114458 : int dummy;
2078 114458 : unsigned int inside_cost = 0, outside_cost = 0;
2079 114458 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (min->vinfo);
2080 114458 : stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
2081 : epilogue_cost_vec;
2082 :
2083 114458 : prologue_cost_vec.create (2);
2084 114458 : body_cost_vec.create (2);
2085 114458 : epilogue_cost_vec.create (2);
2086 :
2087 114458 : vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost,
2088 : &outside_cost, &body_cost_vec,
2089 114458 : &prologue_cost_vec, elem->npeel);
2090 :
2091 114458 : body_cost_vec.release ();
2092 :
2093 228916 : outside_cost += vect_get_known_peeling_cost
2094 114458 : (loop_vinfo, elem->npeel, &dummy,
2095 : &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2096 : &prologue_cost_vec, &epilogue_cost_vec);
2097 :
2098 : /* Prologue and epilogue costs are added to the target model later.
2099 : These costs depend only on the scalar iteration cost, the
2100 : number of peeling iterations finally chosen, and the number of
2101 : misaligned statements. So discard the information found here. */
2102 114458 : prologue_cost_vec.release ();
2103 114458 : epilogue_cost_vec.release ();
2104 :
2105 114458 : if (inside_cost < min->inside_cost
2106 1437 : || (inside_cost == min->inside_cost
2107 1063 : && outside_cost < min->outside_cost))
2108 : {
2109 113027 : min->inside_cost = inside_cost;
2110 113027 : min->outside_cost = outside_cost;
2111 113027 : min->peel_info.dr_info = elem->dr_info;
2112 113027 : min->peel_info.npeel = elem->npeel;
2113 113027 : min->peel_info.count = elem->count;
2114 : }
2115 :
2116 114458 : return 1;
2117 : }
2118 :
2119 :
2120 : /* Choose best peeling option by traversing peeling hash table and either
2121 : choosing an option with the lowest cost (if cost model is enabled) or the
2122 : option that aligns as many accesses as possible. */
2123 :
2124 : static struct _vect_peel_extended_info
2125 125640 : vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
2126 : loop_vec_info loop_vinfo)
2127 : {
2128 125640 : struct _vect_peel_extended_info res;
2129 :
2130 125640 : res.peel_info.dr_info = NULL;
2131 125640 : res.vinfo = loop_vinfo;
2132 :
2133 125640 : if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2134 : {
2135 111717 : res.inside_cost = INT_MAX;
2136 111717 : res.outside_cost = INT_MAX;
2137 111717 : peeling_htab->traverse <_vect_peel_extended_info *,
2138 226175 : vect_peeling_hash_get_lowest_cost> (&res);
2139 : }
2140 : else
2141 : {
2142 13923 : res.peel_info.count = 0;
2143 13923 : peeling_htab->traverse <_vect_peel_extended_info *,
2144 49544 : vect_peeling_hash_get_most_frequent> (&res);
2145 13923 : res.inside_cost = 0;
2146 13923 : res.outside_cost = 0;
2147 : }
2148 :
2149 125640 : return res;
2150 : }
2151 :
2152 : /* Return if vectorization is definitely, possibly, or unlikely to be
2153 : supportable after loop peeling. */
2154 :
2155 : static enum peeling_support
2156 77787 : vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
2157 : unsigned npeel)
2158 : {
2159 77787 : vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2160 77787 : enum dr_alignment_support supportable_dr_alignment;
2161 :
2162 77787 : bool dr0_alignment_known_p
2163 155574 : = known_alignment_for_access_p (dr0_info,
2164 77787 : STMT_VINFO_VECTYPE (dr0_info->stmt));
2165 77787 : bool has_unsupported_dr_p = false;
2166 77787 : unsigned int dr0_step = tree_to_shwi (DR_STEP (dr0_info->dr));
2167 77787 : int known_unsupported_misalignment = DR_MISALIGNMENT_UNKNOWN;
2168 :
2169 : /* Check if each data ref can be vectorized after peeling. */
2170 332633 : for (data_reference *dr : datarefs)
2171 : {
2172 115007 : if (dr == dr0_info->dr)
2173 76846 : continue;
2174 :
2175 38161 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2176 38161 : if (!vect_relevant_for_alignment_p (dr_info)
2177 38161 : || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
2178 6671 : continue;
2179 :
2180 31490 : tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
2181 31490 : int misalignment;
2182 31490 : unsigned HOST_WIDE_INT alignment;
2183 31490 : if (!dr0_alignment_known_p
2184 1838 : || !known_alignment_for_access_p (dr_info, vectype)
2185 33328 : || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
2186 : misalignment = DR_MISALIGNMENT_UNKNOWN;
2187 : else
2188 : {
2189 1824 : misalignment = dr_misalignment (dr_info, vectype);
2190 1824 : misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
2191 1824 : misalignment &= alignment - 1;
2192 : }
2193 31490 : supportable_dr_alignment
2194 31490 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2195 : misalignment);
2196 31490 : if (supportable_dr_alignment == dr_unaligned_unsupported)
2197 : {
2198 30053 : has_unsupported_dr_p = true;
2199 :
2200 : /* If unaligned unsupported DRs exist, we do following checks to see
2201 : if they can be mutually aligned to support vectorization. If yes,
2202 : we can try peeling and create a runtime (mutual alignment) check
2203 : to guard the peeled loop. If no, return PEELING_UNSUPPORTED. */
2204 :
2205 : /* 1) If unaligned unsupported DRs have different alignment steps, the
2206 : probability of DRs being mutually aligned is very low, and it's
2207 : quite complex to check mutual alignment at runtime. We return
2208 : PEELING_UNSUPPORTED in this case. */
2209 30053 : if (tree_to_shwi (DR_STEP (dr)) != dr0_step)
2210 77787 : return peeling_unsupported;
2211 :
2212 : /* 2) Based on above same alignment step condition, if one known
2213 : misaligned DR has zero misalignment, or different misalignment
2214 : amount from another known misaligned DR, peeling is unable to
2215 : help make all these DRs aligned together. We won't try peeling
2216 : with versioning anymore. */
2217 25813 : int curr_dr_misalignment = dr_misalignment (dr_info, vectype);
2218 25813 : if (curr_dr_misalignment == 0)
2219 : return peeling_unsupported;
2220 14318 : if (known_unsupported_misalignment != DR_MISALIGNMENT_UNKNOWN)
2221 : {
2222 8 : if (curr_dr_misalignment != DR_MISALIGNMENT_UNKNOWN
2223 8 : && curr_dr_misalignment != known_unsupported_misalignment)
2224 : return peeling_unsupported;
2225 : }
2226 : else
2227 : known_unsupported_misalignment = curr_dr_misalignment;
2228 : }
2229 : }
2230 :
2231 : /* Vectorization is known to be supportable with peeling alone when there is
2232 : no unsupported DR. */
2233 62052 : return has_unsupported_dr_p ? peeling_maybe_supported
2234 : : peeling_known_supported;
2235 : }
2236 :
2237 : /* Compare two data-references DRA and DRB to group them into chunks
2238 : with related alignment. */
2239 :
2240 : static int
2241 3679466 : dr_align_group_sort_cmp (const void *dra_, const void *drb_)
2242 : {
2243 3679466 : data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2244 3679466 : data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
2245 3679466 : int cmp;
2246 :
2247 : /* Stabilize sort. */
2248 3679466 : if (dra == drb)
2249 : return 0;
2250 :
2251 : /* Ordering of DRs according to base. */
2252 3679466 : cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2253 : DR_BASE_ADDRESS (drb));
2254 3679466 : if (cmp != 0)
2255 : return cmp;
2256 :
2257 : /* And according to DR_OFFSET. */
2258 1617318 : cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2259 1617318 : if (cmp != 0)
2260 : return cmp;
2261 :
2262 : /* And after step. */
2263 1605520 : cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
2264 1605520 : if (cmp != 0)
2265 : return cmp;
2266 :
2267 : /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2268 1601021 : cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
2269 1601021 : if (cmp == 0)
2270 174156 : return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2271 : return cmp;
2272 : }
2273 :
2274 : /* Function vect_enhance_data_refs_alignment
2275 :
2276 : This pass will use loop versioning and loop peeling in order to enhance
2277 : the alignment of data references in the loop.
2278 :
2279 : FOR NOW: we assume that whatever versioning/peeling takes place, only the
2280 : original loop is to be vectorized. Any other loops that are created by
2281 : the transformations performed in this pass - are not supposed to be
2282 : vectorized. This restriction will be relaxed.
2283 :
2284 : This pass will require a cost model to guide it whether to apply peeling
2285 : or versioning or a combination of the two. For example, the scheme that
2286 : intel uses when given a loop with several memory accesses, is as follows:
2287 : choose one memory access ('p') which alignment you want to force by doing
2288 : peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2289 : other accesses are not necessarily aligned, or (2) use loop versioning to
2290 : generate one loop in which all accesses are aligned, and another loop in
2291 : which only 'p' is necessarily aligned.
2292 :
2293 : ("Automatic Intra-Register Vectorization for the Intel Architecture",
2294 : Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2295 : Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2296 :
2297 : Devising a cost model is the most critical aspect of this work. It will
2298 : guide us on which access to peel for, whether to use loop versioning, how
2299 : many versions to create, etc. The cost model will probably consist of
2300 : generic considerations as well as target specific considerations (on
2301 : powerpc for example, misaligned stores are more painful than misaligned
2302 : loads).
2303 :
2304 : Here are the general steps involved in alignment enhancements:
2305 :
2306 : -- original loop, before alignment analysis:
2307 : for (i=0; i<N; i++){
2308 : x = q[i]; # DR_MISALIGNMENT(q) = unknown
2309 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2310 : }
2311 :
2312 : -- After vect_compute_data_refs_alignment:
2313 : for (i=0; i<N; i++){
2314 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2315 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2316 : }
2317 :
2318 : -- Possibility 1: we do loop versioning:
2319 : if (p is aligned) {
2320 : for (i=0; i<N; i++){ # loop 1A
2321 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2322 : p[i] = y; # DR_MISALIGNMENT(p) = 0
2323 : }
2324 : }
2325 : else {
2326 : for (i=0; i<N; i++){ # loop 1B
2327 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2328 : p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2329 : }
2330 : }
2331 :
2332 : -- Possibility 2: we do loop peeling:
2333 : for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2334 : x = q[i];
2335 : p[i] = y;
2336 : }
2337 : for (i = 3; i < N; i++){ # loop 2A
2338 : x = q[i]; # DR_MISALIGNMENT(q) = 0
2339 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2340 : }
2341 :
2342 : -- Possibility 3: combination of loop peeling and versioning:
2343 : if (p & q are mutually aligned) {
2344 : for (i=0; i<3; i++){ # (peeled loop iterations).
2345 : x = q[i];
2346 : p[i] = y;
2347 : }
2348 : for (i=3; i<N; i++){ # loop 3A
2349 : x = q[i]; # DR_MISALIGNMENT(q) = 0
2350 : p[i] = y; # DR_MISALIGNMENT(p) = 0
2351 : }
2352 : }
2353 : else {
2354 : for (i=0; i<N; i++){ # (scalar loop, not to be vectorized).
2355 : x = q[i]; # DR_MISALIGNMENT(q) = 3
2356 : p[i] = y; # DR_MISALIGNMENT(p) = unknown
2357 : }
2358 : }
2359 :
2360 : These loops are later passed to loop_transform to be vectorized. The
2361 : vectorizer will use the alignment information to guide the transformation
2362 : (whether to generate regular loads/stores, or with special handling for
2363 : misalignment). */
2364 :
2365 : opt_result
2366 326339 : vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
2367 : {
2368 326339 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2369 326339 : dr_vec_info *first_store = NULL;
2370 326339 : dr_vec_info *dr0_info = NULL;
2371 326339 : struct data_reference *dr;
2372 326339 : unsigned int i;
2373 326339 : bool do_peeling = false;
2374 326339 : bool do_versioning = false;
2375 326339 : bool try_peeling_with_versioning = false;
2376 326339 : unsigned int npeel = 0;
2377 326339 : bool one_misalignment_known = false;
2378 326339 : bool one_misalignment_unknown = false;
2379 326339 : bool one_dr_unsupportable = false;
2380 326339 : dr_vec_info *unsupportable_dr_info = NULL;
2381 326339 : unsigned int dr0_same_align_drs = 0, first_store_same_align_drs = 0;
2382 326339 : hash_table<peel_info_hasher> peeling_htab (1);
2383 :
2384 326339 : DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
2385 :
2386 : /* Reset data so we can safely be called multiple times. */
2387 326339 : LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
2388 326339 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
2389 :
2390 326339 : if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ())
2391 13290 : return opt_result::success ();
2392 :
2393 : /* Sort the vector of datarefs so DRs that have the same or dependent
2394 : alignment are next to each other. */
2395 313049 : auto_vec<data_reference_p> datarefs
2396 313049 : = LOOP_VINFO_DATAREFS (loop_vinfo).copy ();
2397 313049 : datarefs.qsort (dr_align_group_sort_cmp);
2398 :
2399 : /* Compute the number of DRs that become aligned when we peel
2400 : a dataref so it becomes aligned. */
2401 626098 : auto_vec<unsigned> n_same_align_refs (datarefs.length ());
2402 313049 : n_same_align_refs.quick_grow_cleared (datarefs.length ());
2403 313049 : unsigned i0;
2404 645082 : for (i0 = 0; i0 < datarefs.length (); ++i0)
2405 325460 : if (DR_BASE_ADDRESS (datarefs[i0]))
2406 : break;
2407 2003884 : for (i = i0 + 1; i <= datarefs.length (); ++i)
2408 : {
2409 688893 : if (i == datarefs.length ()
2410 382417 : || !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0]),
2411 382417 : DR_BASE_ADDRESS (datarefs[i]), 0)
2412 184593 : || !operand_equal_p (DR_OFFSET (datarefs[i0]),
2413 184593 : DR_OFFSET (datarefs[i]), 0)
2414 872328 : || !operand_equal_p (DR_STEP (datarefs[i0]),
2415 183435 : DR_STEP (datarefs[i]), 0))
2416 : {
2417 : /* The subgroup [i0, i-1] now only differs in DR_INIT and
2418 : possibly DR_TARGET_ALIGNMENT. Still the whole subgroup
2419 : will get known misalignment if we align one of the refs
2420 : with the largest DR_TARGET_ALIGNMENT. */
2421 1194833 : for (unsigned j = i0; j < i; ++j)
2422 : {
2423 688893 : dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs[j]);
2424 2789050 : for (unsigned k = i0; k < i; ++k)
2425 : {
2426 2100157 : if (k == j)
2427 688893 : continue;
2428 1411264 : dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs[k]);
2429 1411264 : if (vect_dr_aligned_if_related_peeled_dr_is (dr_infok,
2430 : dr_infoj))
2431 453088 : n_same_align_refs[j]++;
2432 : }
2433 : }
2434 : i0 = i;
2435 : }
2436 : }
2437 :
2438 : /* While cost model enhancements are expected in the future, the high level
2439 : view of the code at this time is as follows:
2440 :
2441 : A) If there is a misaligned access then see if doing peeling alone can
2442 : make all data references satisfy vect_supportable_dr_alignment. If so,
2443 : update data structures and return.
2444 :
2445 : B) If peeling alone wasn't possible and there is a data reference with an
2446 : unknown misalignment that does not satisfy vect_supportable_dr_alignment
2447 : then we may use either of the following two approaches.
2448 :
2449 : B1) Try peeling with versioning: Add a runtime loop versioning check to
2450 : see if all unsupportable data references are mutually aligned, which
2451 : means they will be uniformly aligned after a certain amount of loop
2452 : peeling. If peeling and versioning can be used together, set
2453 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT_P to TRUE and return.
2454 :
2455 : B2) Try versioning alone: Add a runtime loop versioning check to see if
2456 : all unsupportable data references are already uniformly aligned
2457 : without loop peeling. If versioning can be applied alone, set
2458 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT_P to FALSE and return.
2459 :
2460 : Above B1 is more powerful and more likely to be adopted than B2. But B2
2461 : is still available and useful in some cases, for example, the cost model
2462 : does not allow much peeling.
2463 :
2464 : C) If none of above was successful then the alignment was not enhanced,
2465 : just return. */
2466 :
2467 : /* (1) Peeling to force alignment. */
2468 :
2469 : /* (1.1) Decide whether to perform peeling, how many iterations to peel, and
2470 : if vectorization may be supported by peeling with versioning.
2471 : Considerations:
2472 : - How many accesses will become aligned due to the peeling
2473 : - How many accesses will become unaligned due to the peeling,
2474 : and the cost of misaligned accesses.
2475 : - The cost of peeling (the extra runtime checks, the increase
2476 : in code size). */
2477 :
2478 313049 : poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2479 867303 : FOR_EACH_VEC_ELT (datarefs, i, dr)
2480 : {
2481 597383 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2482 597383 : if (!vect_relevant_for_alignment_p (dr_info))
2483 95744 : continue;
2484 :
2485 501639 : stmt_vec_info stmt_info = dr_info->stmt;
2486 501639 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2487 :
2488 : /* With variable VF, unsafe speculative read can be avoided for known
2489 : inbounds DRs as long as partial vectors are used. */
2490 501639 : if (!vf.is_constant ()
2491 : && dr_safe_speculative_read_required (stmt_info)
2492 : && DR_SCALAR_KNOWN_BOUNDS (dr_info))
2493 : {
2494 : dr_set_safe_speculative_read_required (stmt_info, false);
2495 : LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
2496 : }
2497 :
2498 501639 : do_peeling = vector_alignment_reachable_p (dr_info, vf);
2499 501639 : if (do_peeling)
2500 : {
2501 427890 : if (known_alignment_for_access_p (dr_info, vectype))
2502 : {
2503 235499 : unsigned int npeel_tmp = 0;
2504 235499 : bool negative = tree_int_cst_compare (DR_STEP (dr),
2505 235499 : size_zero_node) < 0;
2506 :
2507 : /* If known_alignment_for_access_p then we have set
2508 : DR_MISALIGNMENT which is only done if we know it at compiler
2509 : time, so it is safe to assume target alignment is constant.
2510 : */
2511 235499 : unsigned int target_align =
2512 235499 : DR_TARGET_ALIGNMENT (dr_info).to_constant ();
2513 235499 : unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
2514 235499 : poly_int64 off = 0;
2515 235499 : if (negative)
2516 2378 : off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
2517 235499 : unsigned int mis = dr_misalignment (dr_info, vectype, off);
2518 235499 : mis = negative ? mis : -mis;
2519 235499 : if (mis != 0)
2520 12375 : npeel_tmp = (mis & (target_align - 1)) / dr_size;
2521 :
2522 : /* For multiple types, it is possible that the bigger type access
2523 : will have more than one peeling option. E.g., a loop with two
2524 : types: one of size (vector size / 4), and the other one of
2525 : size (vector size / 8). Vectorization factor will 8. If both
2526 : accesses are misaligned by 3, the first one needs one scalar
2527 : iteration to be aligned, and the second one needs 5. But the
2528 : first one will be aligned also by peeling 5 scalar
2529 : iterations, and in that case both accesses will be aligned.
2530 : Hence, except for the immediate peeling amount, we also want
2531 : to try to add full vector size, while we don't exceed
2532 : vectorization factor.
2533 : We do this automatically for cost model, since we calculate
2534 : cost for every peeling option. */
2535 235499 : poly_uint64 nscalars = npeel_tmp;
2536 235499 : if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2537 : {
2538 39621 : unsigned group_size = 1;
2539 39621 : if (STMT_SLP_TYPE (stmt_info)
2540 39621 : && STMT_VINFO_GROUPED_ACCESS (stmt_info))
2541 1900 : group_size = DR_GROUP_SIZE (stmt_info);
2542 39621 : nscalars = vf * group_size;
2543 : }
2544 :
2545 : /* Save info about DR in the hash table. Also include peeling
2546 : amounts according to the explanation above. Indicate
2547 : the alignment status when the ref is not aligned.
2548 : ??? Rather than using unknown alignment here we should
2549 : prune all entries from the peeling hashtable which cause
2550 : DRs to be not supported. */
2551 235499 : bool supportable_if_not_aligned
2552 : = vect_supportable_dr_alignment
2553 235499 : (loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN);
2554 529615 : while (known_le (npeel_tmp, nscalars))
2555 : {
2556 294116 : vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
2557 : dr_info, npeel_tmp,
2558 : supportable_if_not_aligned);
2559 294116 : npeel_tmp += MAX (1, target_align / dr_size);
2560 : }
2561 :
2562 235499 : one_misalignment_known = true;
2563 : }
2564 : else
2565 : {
2566 : /* If we don't know any misalignment values, we prefer
2567 : peeling for data-ref that has the maximum number of data-refs
2568 : with the same alignment, unless the target prefers to align
2569 : stores over load. */
2570 192391 : unsigned same_align_drs = n_same_align_refs[i];
2571 192391 : if (!dr0_info
2572 192391 : || dr0_same_align_drs < same_align_drs)
2573 : {
2574 : dr0_same_align_drs = same_align_drs;
2575 : dr0_info = dr_info;
2576 : }
2577 : /* For data-refs with the same number of related
2578 : accesses prefer the one where the misalign
2579 : computation will be invariant in the outermost loop. */
2580 57092 : else if (dr0_same_align_drs == same_align_drs)
2581 : {
2582 56136 : class loop *ivloop0, *ivloop;
2583 56136 : ivloop0 = outermost_invariant_loop_for_expr
2584 56136 : (loop, DR_BASE_ADDRESS (dr0_info->dr));
2585 56136 : ivloop = outermost_invariant_loop_for_expr
2586 56136 : (loop, DR_BASE_ADDRESS (dr));
2587 56136 : if ((ivloop && !ivloop0)
2588 56136 : || (ivloop && ivloop0
2589 56130 : && flow_loop_nested_p (ivloop, ivloop0)))
2590 : dr0_info = dr_info;
2591 : }
2592 :
2593 192391 : one_misalignment_unknown = true;
2594 :
2595 : /* Check for data refs with unsupportable alignment that
2596 : can be peeled. */
2597 192391 : enum dr_alignment_support supportable_dr_alignment
2598 192391 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2599 : DR_MISALIGNMENT_UNKNOWN);
2600 192391 : if (supportable_dr_alignment == dr_unaligned_unsupported)
2601 : {
2602 96343 : one_dr_unsupportable = true;
2603 96343 : unsupportable_dr_info = dr_info;
2604 : }
2605 :
2606 192391 : if (!first_store && DR_IS_WRITE (dr))
2607 : {
2608 38903 : first_store = dr_info;
2609 38903 : first_store_same_align_drs = same_align_drs;
2610 : }
2611 : }
2612 : }
2613 : else
2614 : {
2615 73749 : if (!aligned_access_p (dr_info, vectype))
2616 : {
2617 43129 : if (dump_enabled_p ())
2618 2046 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2619 : "vector alignment may not be reachable\n");
2620 : break;
2621 : }
2622 : }
2623 : }
2624 :
2625 : /* Check if we can possibly peel the loop. */
2626 313049 : if (!vect_can_advance_ivs_p (loop_vinfo)
2627 307953 : || !slpeel_can_duplicate_loop_p (loop, LOOP_VINFO_MAIN_EXIT (loop_vinfo),
2628 307953 : loop_preheader_edge (loop))
2629 307953 : || loop->inner
2630 : /* We don't currently maintaing the LCSSA for prologue peeled inversed
2631 : loops. */
2632 619547 : || (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
2633 28803 : && !LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)))
2634 : do_peeling = false;
2635 :
2636 313049 : struct _vect_peel_extended_info peel_for_known_alignment;
2637 313049 : struct _vect_peel_extended_info peel_for_unknown_alignment;
2638 313049 : struct _vect_peel_extended_info best_peel;
2639 :
2640 313049 : peel_for_unknown_alignment.inside_cost = INT_MAX;
2641 313049 : peel_for_unknown_alignment.outside_cost = INT_MAX;
2642 313049 : peel_for_unknown_alignment.peel_info.count = 0;
2643 :
2644 313049 : if (do_peeling
2645 313049 : && one_misalignment_unknown)
2646 : {
2647 : /* Check if the target requires to prefer stores over loads, i.e., if
2648 : misaligned stores are more expensive than misaligned loads (taking
2649 : drs with same alignment into account). */
2650 118430 : unsigned int load_inside_cost = 0;
2651 118430 : unsigned int load_outside_cost = 0;
2652 118430 : unsigned int store_inside_cost = 0;
2653 118430 : unsigned int store_outside_cost = 0;
2654 118430 : unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
2655 :
2656 118430 : stmt_vector_for_cost dummy;
2657 118430 : dummy.create (2);
2658 118430 : vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
2659 : &load_inside_cost,
2660 : &load_outside_cost,
2661 : &dummy, &dummy, estimated_npeels);
2662 118430 : dummy.release ();
2663 :
2664 118430 : if (first_store)
2665 : {
2666 30480 : dummy.create (2);
2667 30480 : vect_get_peeling_costs_all_drs (loop_vinfo, first_store,
2668 : &store_inside_cost,
2669 : &store_outside_cost,
2670 : &dummy, &dummy,
2671 : estimated_npeels);
2672 30480 : dummy.release ();
2673 : }
2674 : else
2675 : {
2676 87950 : store_inside_cost = INT_MAX;
2677 87950 : store_outside_cost = INT_MAX;
2678 : }
2679 :
2680 118430 : if (load_inside_cost > store_inside_cost
2681 118430 : || (load_inside_cost == store_inside_cost
2682 29911 : && load_outside_cost > store_outside_cost))
2683 : {
2684 118430 : dr0_info = first_store;
2685 118430 : dr0_same_align_drs = first_store_same_align_drs;
2686 118430 : peel_for_unknown_alignment.inside_cost = store_inside_cost;
2687 118430 : peel_for_unknown_alignment.outside_cost = store_outside_cost;
2688 : }
2689 : else
2690 : {
2691 118430 : peel_for_unknown_alignment.inside_cost = load_inside_cost;
2692 118430 : peel_for_unknown_alignment.outside_cost = load_outside_cost;
2693 : }
2694 :
2695 118430 : stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2696 118430 : prologue_cost_vec.create (2);
2697 118430 : epilogue_cost_vec.create (2);
2698 :
2699 118430 : int dummy2;
2700 236860 : peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
2701 118430 : (loop_vinfo, estimated_npeels, &dummy2,
2702 : &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2703 : &prologue_cost_vec, &epilogue_cost_vec);
2704 :
2705 118430 : prologue_cost_vec.release ();
2706 118430 : epilogue_cost_vec.release ();
2707 :
2708 118430 : peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + 1;
2709 : }
2710 :
2711 313049 : peel_for_unknown_alignment.peel_info.npeel = 0;
2712 313049 : peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
2713 :
2714 313049 : best_peel = peel_for_unknown_alignment;
2715 :
2716 313049 : peel_for_known_alignment.inside_cost = INT_MAX;
2717 313049 : peel_for_known_alignment.outside_cost = INT_MAX;
2718 313049 : peel_for_known_alignment.peel_info.count = 0;
2719 313049 : peel_for_known_alignment.peel_info.dr_info = NULL;
2720 :
2721 313049 : if (do_peeling && one_misalignment_known)
2722 : {
2723 : /* Peeling is possible, but there is no data access that is not supported
2724 : unless aligned. So we try to choose the best possible peeling from
2725 : the hash table. */
2726 125640 : peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
2727 125640 : (&peeling_htab, loop_vinfo);
2728 : }
2729 :
2730 : /* Compare costs of peeling for known and unknown alignment. */
2731 313049 : if (peel_for_known_alignment.peel_info.dr_info != NULL
2732 125640 : && peel_for_unknown_alignment.inside_cost
2733 : >= peel_for_known_alignment.inside_cost)
2734 : {
2735 111543 : best_peel = peel_for_known_alignment;
2736 :
2737 : /* If the best peeling for known alignment has NPEEL == 0, perform no
2738 : peeling at all except if there is an unsupportable dr that we can
2739 : align. */
2740 111543 : if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
2741 : do_peeling = false;
2742 : }
2743 :
2744 : /* If there is an unsupportable data ref, prefer this over all choices so far
2745 : since we'd have to discard a chosen peeling except when it accidentally
2746 : aligned the unsupportable data ref. */
2747 208935 : if (one_dr_unsupportable)
2748 : dr0_info = unsupportable_dr_info;
2749 234249 : else if (do_peeling)
2750 : {
2751 : /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
2752 : TODO: Use nopeel_outside_cost or get rid of it? */
2753 46910 : unsigned nopeel_inside_cost = 0;
2754 46910 : unsigned nopeel_outside_cost = 0;
2755 :
2756 46910 : stmt_vector_for_cost dummy;
2757 46910 : dummy.create (2);
2758 46910 : vect_get_peeling_costs_all_drs (loop_vinfo, NULL, &nopeel_inside_cost,
2759 : &nopeel_outside_cost, &dummy, &dummy, 0);
2760 46910 : dummy.release ();
2761 :
2762 : /* Add epilogue costs. As we do not peel for alignment here, no prologue
2763 : costs will be recorded. */
2764 46910 : stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2765 46910 : prologue_cost_vec.create (2);
2766 46910 : epilogue_cost_vec.create (2);
2767 :
2768 46910 : int dummy2;
2769 93820 : nopeel_outside_cost += vect_get_known_peeling_cost
2770 46910 : (loop_vinfo, 0, &dummy2,
2771 : &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2772 : &prologue_cost_vec, &epilogue_cost_vec);
2773 :
2774 46910 : prologue_cost_vec.release ();
2775 46910 : epilogue_cost_vec.release ();
2776 :
2777 46910 : npeel = best_peel.peel_info.npeel;
2778 46910 : dr0_info = best_peel.peel_info.dr_info;
2779 :
2780 : /* If no peeling is not more expensive than the best peeling we
2781 : have so far, don't perform any peeling. */
2782 46910 : if (nopeel_inside_cost <= best_peel.inside_cost)
2783 40452 : do_peeling = false;
2784 : }
2785 :
2786 125710 : if (do_peeling)
2787 : {
2788 77787 : stmt_vec_info stmt_info = dr0_info->stmt;
2789 77787 : if (known_alignment_for_access_p (dr0_info,
2790 : STMT_VINFO_VECTYPE (stmt_info)))
2791 : {
2792 6442 : bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
2793 6442 : size_zero_node) < 0;
2794 6442 : if (!npeel)
2795 : {
2796 : /* Since it's known at compile time, compute the number of
2797 : iterations in the peeled loop (the peeling factor) for use in
2798 : updating DR_MISALIGNMENT values. The peeling factor is the
2799 : vectorization factor minus the misalignment as an element
2800 : count. */
2801 0 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2802 0 : poly_int64 off = 0;
2803 0 : if (negative)
2804 0 : off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2805 0 : * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2806 0 : unsigned int mis
2807 0 : = dr_misalignment (dr0_info, vectype, off);
2808 0 : mis = negative ? mis : -mis;
2809 : /* If known_alignment_for_access_p then we have set
2810 : DR_MISALIGNMENT which is only done if we know it at compiler
2811 : time, so it is safe to assume target alignment is constant.
2812 : */
2813 0 : unsigned int target_align =
2814 0 : DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
2815 0 : npeel = ((mis & (target_align - 1))
2816 0 : / vect_get_scalar_dr_size (dr0_info));
2817 : }
2818 :
2819 : /* For interleaved data access every iteration accesses all the
2820 : members of the group, therefore we divide the number of iterations
2821 : by the group size. */
2822 6442 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2823 281 : npeel /= DR_GROUP_SIZE (stmt_info);
2824 :
2825 6442 : if (dump_enabled_p ())
2826 280 : dump_printf_loc (MSG_NOTE, vect_location,
2827 : "Try peeling by %d\n", npeel);
2828 : }
2829 :
2830 : /* Check how peeling for alignment can support vectorization. Function
2831 : vect_peeling_supportable returns one of the three possible values:
2832 : - PEELING_KNOWN_SUPPORTED: indicates that we know all unsupported
2833 : datarefs can be aligned after peeling. We can use peeling alone.
2834 : - PEELING_MAYBE_SUPPORTED: indicates that peeling may be able to make
2835 : these datarefs aligned but we are not sure about it at compile time.
2836 : We will try peeling with versioning to add a runtime check to guard
2837 : the peeled loop.
2838 : - PEELING_UNSUPPORTED: indicates that peeling is almost impossible to
2839 : support vectorization. We will stop trying peeling. */
2840 77787 : switch (vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
2841 : {
2842 : case peeling_known_supported:
2843 : break;
2844 13124 : case peeling_maybe_supported:
2845 13124 : try_peeling_with_versioning = true;
2846 13124 : break;
2847 15735 : case peeling_unsupported:
2848 15735 : do_peeling = false;
2849 15735 : break;
2850 : }
2851 :
2852 : /* Check if all datarefs are supportable and log. */
2853 77787 : if (do_peeling
2854 77787 : && npeel == 0
2855 77787 : && known_alignment_for_access_p (dr0_info,
2856 : STMT_VINFO_VECTYPE (stmt_info)))
2857 3 : return opt_result::success ();
2858 :
2859 : /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
2860 77784 : if (do_peeling)
2861 : {
2862 62049 : unsigned max_allowed_peel
2863 62049 : = param_vect_max_peeling_for_alignment;
2864 62049 : if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP)
2865 : max_allowed_peel = 0;
2866 13161 : if (max_allowed_peel != (unsigned)-1)
2867 : {
2868 48909 : unsigned max_peel = npeel;
2869 48909 : if (max_peel == 0)
2870 : {
2871 46188 : poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
2872 46188 : unsigned HOST_WIDE_INT target_align_c;
2873 46188 : if (target_align.is_constant (&target_align_c))
2874 92376 : max_peel =
2875 46188 : target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
2876 : else
2877 : {
2878 : do_peeling = false;
2879 : if (dump_enabled_p ())
2880 : dump_printf_loc (MSG_NOTE, vect_location,
2881 : "Disable peeling, max peels set and vector"
2882 : " alignment unknown\n");
2883 : }
2884 : }
2885 48909 : if (max_peel > max_allowed_peel)
2886 : {
2887 48901 : do_peeling = false;
2888 48901 : if (dump_enabled_p ())
2889 53 : dump_printf_loc (MSG_NOTE, vect_location,
2890 : "Disable peeling, max peels reached: %d\n", max_peel);
2891 : }
2892 : }
2893 : }
2894 :
2895 : /* Cost model #2 - if peeling may result in a remaining loop not
2896 : iterating enough to be vectorized then do not peel. Since this
2897 : is a cost heuristic rather than a correctness decision, use the
2898 : most likely runtime value for variable vectorization factors. */
2899 53 : if (do_peeling
2900 13148 : && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2901 : {
2902 3169 : unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2903 3169 : unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
2904 3169 : if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
2905 3169 : < assumed_vf + max_peel)
2906 : do_peeling = false;
2907 : }
2908 :
2909 : if (do_peeling)
2910 : {
2911 : /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
2912 : If the misalignment of DR_i is identical to that of dr0 then set
2913 : DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
2914 : dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2915 : by the peeling factor times the element size of DR_i (MOD the
2916 : vectorization factor times the size). Otherwise, the
2917 : misalignment of DR_i must be set to unknown. */
2918 27762 : FOR_EACH_VEC_ELT (datarefs, i, dr)
2919 15427 : if (dr != dr0_info->dr)
2920 : {
2921 3092 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2922 3092 : if (!vect_relevant_for_alignment_p (dr_info))
2923 351 : continue;
2924 :
2925 2741 : vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
2926 : }
2927 : }
2928 :
2929 77784 : if (do_peeling && !try_peeling_with_versioning)
2930 : {
2931 : /* Update data structures if peeling will be applied alone. */
2932 11329 : LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
2933 11329 : if (npeel)
2934 2085 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
2935 : else
2936 9244 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
2937 11329 : SET_DR_MISALIGNMENT (dr0_info,
2938 : vect_dr_misalign_for_aligned_access (dr0_info));
2939 11329 : if (dump_enabled_p ())
2940 : {
2941 338 : dump_printf_loc (MSG_NOTE, vect_location,
2942 : "Alignment of access forced using peeling.\n");
2943 338 : dump_printf_loc (MSG_NOTE, vect_location,
2944 : "Peeling for alignment will be applied.\n");
2945 : }
2946 :
2947 : /* The inside-loop cost will be accounted for in vectorizable_load
2948 : and vectorizable_store correctly with adjusted alignments.
2949 : Drop the body_cst_vec on the floor here. */
2950 11329 : return opt_result::success ();
2951 : }
2952 : }
2953 :
2954 : /* (2) Versioning to force alignment. */
2955 :
2956 : /* Try versioning if:
2957 : 1) optimize loop for speed and the cost-model is not cheap
2958 : 2) there is at least one unsupported misaligned data ref with an unknown
2959 : misalignment, and
2960 : 3) all misaligned data refs with a known misalignment are supported, and
2961 : 4) the number of runtime alignment checks is within reason. */
2962 :
2963 301717 : do_versioning
2964 301717 : = (optimize_loop_nest_for_speed_p (loop)
2965 301289 : && !loop->inner /* FORNOW */
2966 601551 : && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
2967 :
2968 : if (do_versioning)
2969 : {
2970 293169 : FOR_EACH_VEC_ELT (datarefs, i, dr)
2971 : {
2972 219886 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2973 219886 : if (!vect_relevant_for_alignment_p (dr_info))
2974 158542 : continue;
2975 :
2976 149797 : stmt_vec_info stmt_info = dr_info->stmt;
2977 149797 : if (STMT_VINFO_STRIDED_P (stmt_info))
2978 : {
2979 : do_versioning = false;
2980 4815 : break;
2981 : }
2982 :
2983 148903 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2984 148903 : bool negative = tree_int_cst_compare (DR_STEP (dr),
2985 148903 : size_zero_node) < 0;
2986 148903 : poly_int64 off = 0;
2987 148903 : if (negative)
2988 3056 : off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
2989 3056 : * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2990 148903 : int misalignment;
2991 148903 : if ((misalignment = dr_misalignment (dr_info, vectype, off)) == 0)
2992 88453 : continue;
2993 :
2994 60450 : enum dr_alignment_support supportable_dr_alignment
2995 60450 : = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2996 : misalignment);
2997 60450 : if (supportable_dr_alignment == dr_unaligned_unsupported)
2998 : {
2999 15063 : if (misalignment != DR_MISALIGNMENT_UNKNOWN
3000 15063 : || (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
3001 11662 : >= (unsigned) param_vect_max_version_for_alignment_checks))
3002 : {
3003 : do_versioning = false;
3004 4815 : break;
3005 : }
3006 :
3007 : /* Forcing alignment in the first iteration is no good if
3008 : we don't keep it across iterations. For now, just disable
3009 : versioning in this case.
3010 : ?? We could actually unroll the loop to achieve the required
3011 : overall step alignment, and forcing the alignment could be
3012 : done by doing some iterations of the non-vectorized loop. */
3013 11258 : if (!multiple_p (vf * DR_STEP_ALIGNMENT (dr),
3014 11258 : DR_TARGET_ALIGNMENT (dr_info)))
3015 : {
3016 : do_versioning = false;
3017 : break;
3018 : }
3019 :
3020 : /* Use "mask = DR_TARGET_ALIGNMENT - 1" to test rightmost address
3021 : bits for runtime alignment check. For example, for 16 bytes
3022 : target alignment the mask is 15 = 0xf. */
3023 11258 : poly_uint64 mask = DR_TARGET_ALIGNMENT (dr_info) - 1;
3024 :
3025 : /* FORNOW: use the same mask to test all potentially unaligned
3026 : references in the loop. */
3027 11258 : if (maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), 0U)
3028 11258 : && maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), mask))
3029 : {
3030 : do_versioning = false;
3031 : break;
3032 : }
3033 :
3034 11142 : LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
3035 11142 : LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
3036 : }
3037 : }
3038 :
3039 : /* Versioning requires at least one misaligned data reference. */
3040 78098 : if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
3041 : do_versioning = false;
3042 5487 : else if (!do_versioning)
3043 536 : LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
3044 : }
3045 :
3046 : /* If we are trying peeling with versioning but versioning is disabled for
3047 : some reason, peeling should be turned off together. */
3048 301717 : if (try_peeling_with_versioning && !do_versioning)
3049 : do_peeling = false;
3050 :
3051 289691 : if (do_versioning)
3052 : {
3053 : const vec<stmt_vec_info> &may_misalign_stmts
3054 : = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3055 : stmt_vec_info stmt_info;
3056 :
3057 : /* It can now be assumed that the data references in the statements
3058 : in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
3059 : of the loop being vectorized. */
3060 13477 : FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
3061 : {
3062 8526 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
3063 8526 : SET_DR_MISALIGNMENT (dr_info,
3064 : vect_dr_misalign_for_aligned_access (dr_info));
3065 8526 : if (dump_enabled_p ())
3066 126 : dump_printf_loc (MSG_NOTE, vect_location,
3067 : "Alignment of access forced using versioning.\n");
3068 : }
3069 :
3070 4951 : if (do_peeling)
3071 : {
3072 : /* This point is reached if peeling and versioning are used together
3073 : to ensure alignment. Update data structures to make sure the loop
3074 : is correctly peeled and a right runtime check is added for loop
3075 : versioning. */
3076 1006 : gcc_assert (try_peeling_with_versioning);
3077 1006 : LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
3078 1006 : LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
3079 1006 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo) = true;
3080 1006 : if (dump_enabled_p ())
3081 6 : dump_printf_loc (MSG_NOTE, vect_location,
3082 : "Both peeling and versioning will be applied.\n");
3083 : }
3084 : else
3085 : {
3086 : /* This point is reached if versioning is used alone. */
3087 3945 : LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo) = false;
3088 3945 : if (dump_enabled_p ())
3089 73 : dump_printf_loc (MSG_NOTE, vect_location,
3090 : "Versioning for alignment will be applied.\n");
3091 : }
3092 :
3093 4951 : return opt_result::success ();
3094 : }
3095 :
3096 : /* This point is reached if neither peeling nor versioning is being done. */
3097 296766 : gcc_assert (! (do_peeling || do_versioning));
3098 :
3099 296766 : return opt_result::success ();
3100 639388 : }
3101 :
3102 :
3103 : /* Function vect_analyze_data_refs_alignment
3104 :
3105 : Analyze the alignment of the data-references in the loop. */
3106 :
3107 : void
3108 358643 : vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
3109 : {
3110 358643 : DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
3111 :
3112 358643 : vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3113 358643 : struct data_reference *dr;
3114 358643 : unsigned int i;
3115 :
3116 358643 : vect_record_base_alignments (loop_vinfo);
3117 1521044 : FOR_EACH_VEC_ELT (datarefs, i, dr)
3118 : {
3119 817834 : dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
3120 817834 : if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
3121 : {
3122 817834 : if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
3123 1078197 : && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
3124 108952 : continue;
3125 :
3126 708882 : vect_compute_data_ref_alignment (loop_vinfo, dr_info,
3127 : STMT_VINFO_VECTYPE (dr_info->stmt));
3128 : }
3129 : }
3130 358643 : }
3131 :
3132 :
3133 : /* Analyze alignment of DRs of stmts in NODE. */
3134 :
3135 : static bool
3136 809970 : vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
3137 : {
3138 : /* Alignment is maintained in the first element of the group. */
3139 809970 : stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3140 809970 : first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
3141 809970 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
3142 809970 : tree vectype = SLP_TREE_VECTYPE (node);
3143 809970 : poly_uint64 vector_alignment
3144 809970 : = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
3145 : BITS_PER_UNIT);
3146 809970 : if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
3147 769734 : vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
3148 : /* Re-analyze alignment when we're facing a vectorization with a bigger
3149 : alignment requirement. */
3150 40236 : else if (known_lt (dr_info->target_alignment, vector_alignment))
3151 : {
3152 67 : poly_uint64 old_target_alignment = dr_info->target_alignment;
3153 67 : int old_misalignment = dr_info->misalignment;
3154 67 : vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
3155 : /* But keep knowledge about a smaller alignment. */
3156 67 : if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
3157 38 : && dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
3158 : {
3159 1 : dr_info->target_alignment = old_target_alignment;
3160 1 : dr_info->misalignment = old_misalignment;
3161 : }
3162 : }
3163 : /* When we ever face unordered target alignments the first one wins in terms
3164 : of analyzing and the other will become unknown in dr_misalignment. */
3165 809970 : return true;
3166 : }
3167 :
3168 : /* Function vect_slp_analyze_instance_alignment
3169 :
3170 : Analyze the alignment of the data-references in the SLP instance.
3171 : Return FALSE if a data reference is found that cannot be vectorized. */
3172 :
3173 : bool
3174 781633 : vect_slp_analyze_instance_alignment (vec_info *vinfo,
3175 : slp_instance instance)
3176 : {
3177 781633 : DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment");
3178 :
3179 781633 : slp_tree node;
3180 781633 : unsigned i;
3181 936863 : FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
3182 155230 : if (! vect_slp_analyze_node_alignment (vinfo, node))
3183 : return false;
3184 :
3185 781633 : if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
3186 781633 : && ! vect_slp_analyze_node_alignment
3187 654740 : (vinfo, SLP_INSTANCE_TREE (instance)))
3188 : return false;
3189 :
3190 : return true;
3191 : }
3192 :
3193 :
3194 : /* Analyze groups of accesses: check that DR_INFO belongs to a group of
3195 : accesses of legal size, step, etc. Detect gaps, single element
3196 : interleaving, and other special cases. Set grouped access info.
3197 : Collect groups of strided stores for further use in SLP analysis.
3198 : Worker for vect_analyze_group_access. */
3199 :
3200 : static bool
3201 12538676 : vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
3202 : {
3203 12538676 : data_reference *dr = dr_info->dr;
3204 12538676 : tree step = DR_STEP (dr);
3205 12538676 : tree scalar_type = TREE_TYPE (DR_REF (dr));
3206 12538676 : HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
3207 12538676 : stmt_vec_info stmt_info = dr_info->stmt;
3208 12538676 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3209 12538676 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3210 12538676 : HOST_WIDE_INT dr_step = -1;
3211 12538676 : HOST_WIDE_INT groupsize, last_accessed_element = 1;
3212 12538676 : bool slp_impossible = false;
3213 :
3214 : /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
3215 : size of the interleaving group (including gaps). */
3216 12538676 : if (tree_fits_shwi_p (step))
3217 : {
3218 12530614 : dr_step = tree_to_shwi (step);
3219 : /* Check that STEP is a multiple of type size. Otherwise there is
3220 : a non-element-sized gap at the end of the group which we
3221 : cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
3222 : ??? As we can handle non-constant step fine here we should
3223 : simply remove uses of DR_GROUP_GAP between the last and first
3224 : element and instead rely on DR_STEP. DR_GROUP_SIZE then would
3225 : simply not include that gap. */
3226 12530614 : if ((dr_step % type_size) != 0)
3227 : {
3228 494 : if (dump_enabled_p ())
3229 27 : dump_printf_loc (MSG_NOTE, vect_location,
3230 : "Step %T is not a multiple of the element size"
3231 : " for %T\n",
3232 : step, DR_REF (dr));
3233 494 : return false;
3234 : }
3235 12530120 : groupsize = absu_hwi (dr_step) / type_size;
3236 : }
3237 : else
3238 : groupsize = 0;
3239 :
3240 : /* Not consecutive access is possible only if it is a part of interleaving. */
3241 12538182 : if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
3242 : {
3243 : /* Check if it this DR is a part of interleaving, and is a single
3244 : element of the group that is accessed in the loop. */
3245 :
3246 : /* Gaps are supported only for loads. STEP must be a multiple of the type
3247 : size. */
3248 8494480 : if (DR_IS_READ (dr)
3249 5095350 : && (dr_step % type_size) == 0
3250 : && groupsize > 0
3251 : /* This could be UINT_MAX but as we are generating code in a very
3252 : inefficient way we have to cap earlier.
3253 : See PR91403 for example. */
3254 5095350 : && groupsize <= 4096)
3255 : {
3256 71568 : DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
3257 71568 : DR_GROUP_SIZE (stmt_info) = groupsize;
3258 71568 : DR_GROUP_GAP (stmt_info) = groupsize - 1;
3259 71568 : if (dump_enabled_p ())
3260 1353 : dump_printf_loc (MSG_NOTE, vect_location,
3261 : "Detected single element interleaving %T"
3262 : " step %T\n",
3263 : DR_REF (dr), step);
3264 :
3265 71568 : return true;
3266 : }
3267 :
3268 8422912 : if (dump_enabled_p ())
3269 3129 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3270 : "not consecutive access %G", stmt_info->stmt);
3271 :
3272 8422912 : if (bb_vinfo)
3273 : {
3274 : /* Mark the statement as unvectorizable. */
3275 8404805 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3276 8404805 : return true;
3277 : }
3278 :
3279 18107 : if (dump_enabled_p ())
3280 305 : dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
3281 18107 : STMT_VINFO_STRIDED_P (stmt_info) = true;
3282 18107 : return true;
3283 : }
3284 :
3285 4043702 : if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3286 : {
3287 : /* First stmt in the interleaving chain. Check the chain. */
3288 1473603 : stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3289 1473603 : struct data_reference *data_ref = dr;
3290 1473603 : unsigned int count = 1;
3291 1473603 : tree prev_init = DR_INIT (data_ref);
3292 1473603 : HOST_WIDE_INT diff, gaps = 0;
3293 :
3294 : /* By construction, all group members have INTEGER_CST DR_INITs. */
3295 4043711 : while (next)
3296 : {
3297 : /* We never have the same DR multiple times. */
3298 2570170 : gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
3299 : DR_INIT (STMT_VINFO_DATA_REF (next))) != 0);
3300 :
3301 2570170 : data_ref = STMT_VINFO_DATA_REF (next);
3302 :
3303 : /* All group members have the same STEP by construction. */
3304 2570170 : gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
3305 :
3306 : /* Check that the distance between two accesses is equal to the type
3307 : size. Otherwise, we have gaps. */
3308 2570170 : diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
3309 2570170 : - TREE_INT_CST_LOW (prev_init)) / type_size;
3310 2570170 : if (diff < 1 || diff > UINT_MAX)
3311 : {
3312 : /* For artificial testcases with array accesses with large
3313 : constant indices we can run into overflow issues which
3314 : can end up fooling the groupsize constraint below so
3315 : check the individual gaps (which are represented as
3316 : unsigned int) as well. */
3317 0 : if (dump_enabled_p ())
3318 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3319 : "interleaved access with gap larger "
3320 : "than representable\n");
3321 0 : return false;
3322 : }
3323 2570170 : if (diff != 1)
3324 : {
3325 : /* FORNOW: SLP of accesses with gaps is not supported. */
3326 100157 : slp_impossible = true;
3327 100157 : if (DR_IS_WRITE (data_ref))
3328 : {
3329 62 : if (dump_enabled_p ())
3330 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3331 : "interleaved store with gaps\n");
3332 62 : return false;
3333 : }
3334 :
3335 100095 : gaps += diff - 1;
3336 : }
3337 :
3338 2570108 : last_accessed_element += diff;
3339 :
3340 : /* Store the gap from the previous member of the group. If there is no
3341 : gap in the access, DR_GROUP_GAP is always 1. */
3342 2570108 : DR_GROUP_GAP (next) = diff;
3343 :
3344 2570108 : prev_init = DR_INIT (data_ref);
3345 2570108 : next = DR_GROUP_NEXT_ELEMENT (next);
3346 : /* Count the number of data-refs in the chain. */
3347 2570108 : count++;
3348 : }
3349 :
3350 1473541 : if (groupsize == 0)
3351 1415805 : groupsize = count + gaps;
3352 :
3353 : /* This could be UINT_MAX but as we are generating code in a very
3354 : inefficient way we have to cap earlier. See PR78699 for example. */
3355 1473541 : if (groupsize > 4096)
3356 : {
3357 1 : if (dump_enabled_p ())
3358 1 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3359 : "group is too large\n");
3360 1 : return false;
3361 : }
3362 :
3363 : /* Check that the size of the interleaving is equal to count for stores,
3364 : i.e., that there are no gaps. */
3365 1473540 : if (groupsize != count
3366 103812 : && !DR_IS_READ (dr))
3367 : {
3368 10657 : groupsize = count;
3369 10657 : STMT_VINFO_STRIDED_P (stmt_info) = true;
3370 : }
3371 :
3372 : /* If there is a gap after the last load in the group it is the
3373 : difference between the groupsize and the last accessed
3374 : element.
3375 : When there is no gap, this difference should be 0. */
3376 1473540 : DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
3377 :
3378 1473540 : DR_GROUP_SIZE (stmt_info) = groupsize;
3379 1473540 : if (dump_enabled_p ())
3380 : {
3381 7919 : dump_printf_loc (MSG_NOTE, vect_location,
3382 : "Detected interleaving ");
3383 7919 : if (DR_IS_READ (dr))
3384 4263 : dump_printf (MSG_NOTE, "load ");
3385 3656 : else if (STMT_VINFO_STRIDED_P (stmt_info))
3386 490 : dump_printf (MSG_NOTE, "strided store ");
3387 : else
3388 3166 : dump_printf (MSG_NOTE, "store ");
3389 7919 : dump_printf (MSG_NOTE, "of size %u\n",
3390 : (unsigned)groupsize);
3391 7919 : dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
3392 7919 : next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3393 38780 : while (next)
3394 : {
3395 30861 : if (DR_GROUP_GAP (next) != 1)
3396 271 : dump_printf_loc (MSG_NOTE, vect_location,
3397 : "\t<gap of %d elements>\n",
3398 271 : DR_GROUP_GAP (next) - 1);
3399 30861 : dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
3400 30861 : next = DR_GROUP_NEXT_ELEMENT (next);
3401 : }
3402 7919 : if (DR_GROUP_GAP (stmt_info) != 0)
3403 383 : dump_printf_loc (MSG_NOTE, vect_location,
3404 : "\t<gap of %d elements>\n",
3405 383 : DR_GROUP_GAP (stmt_info));
3406 : }
3407 :
3408 : /* SLP: create an SLP data structure for every interleaving group of
3409 : stores for further analysis in vect_analyse_slp. */
3410 1473540 : if (DR_IS_WRITE (dr) && !slp_impossible)
3411 : {
3412 906467 : if (loop_vinfo)
3413 23850 : LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt_info);
3414 906467 : if (bb_vinfo)
3415 882617 : BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3416 : }
3417 : }
3418 :
3419 : return true;
3420 : }
3421 :
3422 : /* Analyze groups of accesses: check that DR_INFO belongs to a group of
3423 : accesses of legal size, step, etc. Detect gaps, single element
3424 : interleaving, and other special cases. Set grouped access info.
3425 : Collect groups of strided stores for further use in SLP analysis. */
3426 :
3427 : static bool
3428 12538676 : vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
3429 : {
3430 12538676 : if (!vect_analyze_group_access_1 (vinfo, dr_info))
3431 : {
3432 : /* Dissolve the group if present. */
3433 557 : stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
3434 788 : while (stmt_info)
3435 : {
3436 231 : stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3437 231 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3438 231 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3439 231 : stmt_info = next;
3440 : }
3441 : return false;
3442 : }
3443 : return true;
3444 : }
3445 :
3446 : /* Analyze the access pattern of the data-reference DR_INFO.
3447 : In case of non-consecutive accesses call vect_analyze_group_access() to
3448 : analyze groups of accesses. */
3449 :
3450 : static bool
3451 13169576 : vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
3452 : {
3453 13169576 : data_reference *dr = dr_info->dr;
3454 13169576 : tree step = DR_STEP (dr);
3455 13169576 : tree scalar_type = TREE_TYPE (DR_REF (dr));
3456 13169576 : stmt_vec_info stmt_info = dr_info->stmt;
3457 13169576 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3458 13169576 : class loop *loop = NULL;
3459 :
3460 13169576 : if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
3461 : return true;
3462 :
3463 13064239 : if (loop_vinfo)
3464 792847 : loop = LOOP_VINFO_LOOP (loop_vinfo);
3465 :
3466 13064239 : if (loop_vinfo && !step)
3467 : {
3468 0 : if (dump_enabled_p ())
3469 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3470 : "bad data-ref access in loop\n");
3471 0 : return false;
3472 : }
3473 :
3474 : /* Allow loads with zero step in inner-loop vectorization. */
3475 13064239 : if (loop_vinfo && integer_zerop (step))
3476 : {
3477 13709 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3478 13709 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3479 13709 : if (!nested_in_vect_loop_p (loop, stmt_info))
3480 13448 : return DR_IS_READ (dr);
3481 : /* Allow references with zero step for outer loops marked
3482 : with pragma omp simd only - it guarantees absence of
3483 : loop-carried dependencies between inner loop iterations. */
3484 261 : if (loop->safelen < 2)
3485 : {
3486 225 : if (dump_enabled_p ())
3487 6 : dump_printf_loc (MSG_NOTE, vect_location,
3488 : "zero step in inner loop of nest\n");
3489 225 : return false;
3490 : }
3491 : }
3492 :
3493 13050530 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
3494 : {
3495 : /* Interleaved accesses are not yet supported within outer-loop
3496 : vectorization for references in the inner-loop. */
3497 5663 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3498 5663 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3499 :
3500 : /* For the rest of the analysis we use the outer-loop step. */
3501 5663 : step = STMT_VINFO_DR_STEP (stmt_info);
3502 5663 : if (integer_zerop (step))
3503 : {
3504 1232 : if (dump_enabled_p ())
3505 237 : dump_printf_loc (MSG_NOTE, vect_location,
3506 : "zero step in outer loop.\n");
3507 1232 : return DR_IS_READ (dr);
3508 : }
3509 : }
3510 :
3511 : /* Consecutive? */
3512 13049334 : if (TREE_CODE (step) == INTEGER_CST)
3513 : {
3514 13012610 : HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
3515 13012610 : if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
3516 13012610 : || (dr_step < 0
3517 26843 : && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
3518 : {
3519 : /* Mark that it is not interleaving. */
3520 479121 : DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3521 479121 : DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3522 479121 : return true;
3523 : }
3524 : }
3525 :
3526 12570213 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
3527 : {
3528 3336 : if (dump_enabled_p ())
3529 163 : dump_printf_loc (MSG_NOTE, vect_location,
3530 : "grouped access in outer loop.\n");
3531 3336 : return false;
3532 : }
3533 :
3534 :
3535 : /* Assume this is a DR handled by non-constant strided load case. */
3536 12566877 : if (TREE_CODE (step) != INTEGER_CST)
3537 36263 : return (STMT_VINFO_STRIDED_P (stmt_info)
3538 36263 : && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3539 8062 : || vect_analyze_group_access (vinfo, dr_info)));
3540 :
3541 : /* Not consecutive access - check if it's a part of interleaving group. */
3542 12530614 : return vect_analyze_group_access (vinfo, dr_info);
3543 : }
3544 :
3545 : /* Compare two data-references DRA and DRB to group them into chunks
3546 : suitable for grouping. */
3547 :
3548 : static int
3549 346826998 : dr_group_sort_cmp (const void *dra_, const void *drb_)
3550 : {
3551 346826998 : dr_vec_info *dra_info = *(dr_vec_info **)const_cast<void *>(dra_);
3552 346826998 : dr_vec_info *drb_info = *(dr_vec_info **)const_cast<void *>(drb_);
3553 346826998 : data_reference_p dra = dra_info->dr;
3554 346826998 : data_reference_p drb = drb_info->dr;
3555 346826998 : int cmp;
3556 :
3557 : /* Stabilize sort. */
3558 346826998 : if (dra == drb)
3559 : return 0;
3560 :
3561 : /* Different group IDs lead never belong to the same group. */
3562 346826998 : if (dra_info->group != drb_info->group)
3563 381806142 : return dra_info->group < drb_info->group ? -1 : 1;
3564 :
3565 : /* Ordering of DRs according to base. */
3566 95625080 : cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3567 : DR_BASE_ADDRESS (drb));
3568 95625080 : if (cmp != 0)
3569 : return cmp;
3570 :
3571 : /* And according to DR_OFFSET. */
3572 51767804 : cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
3573 51767804 : if (cmp != 0)
3574 : return cmp;
3575 :
3576 : /* Put reads before writes. */
3577 51403696 : if (DR_IS_READ (dra) != DR_IS_READ (drb))
3578 4119019 : return DR_IS_READ (dra) ? -1 : 1;
3579 :
3580 : /* Then sort after access size. */
3581 48600358 : cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
3582 48600358 : TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
3583 48600358 : if (cmp != 0)
3584 : return cmp;
3585 :
3586 : /* And after step. */
3587 42071691 : cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
3588 42071691 : if (cmp != 0)
3589 : return cmp;
3590 :
3591 : /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
3592 42065556 : cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
3593 42065556 : if (cmp == 0)
3594 501807 : return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
3595 : return cmp;
3596 : }
3597 :
3598 : /* If OP is the result of a conversion, return the unconverted value,
3599 : otherwise return null. */
3600 :
3601 : static tree
3602 359 : strip_conversion (tree op)
3603 : {
3604 359 : if (TREE_CODE (op) != SSA_NAME)
3605 : return NULL_TREE;
3606 359 : gimple *stmt = SSA_NAME_DEF_STMT (op);
3607 359 : if (!is_gimple_assign (stmt)
3608 359 : || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
3609 : return NULL_TREE;
3610 182 : return gimple_assign_rhs1 (stmt);
3611 : }
3612 :
3613 : /* Return true if vectorizable_* routines can handle statements STMT1_INFO
3614 : and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can
3615 : be grouped in SLP mode. */
3616 :
3617 : static bool
3618 6891147 : can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
3619 : bool allow_slp_p)
3620 : {
3621 6891147 : if (gimple_assign_single_p (stmt1_info->stmt))
3622 6889544 : return gimple_assign_single_p (stmt2_info->stmt);
3623 :
3624 1603 : gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
3625 1603 : if (call1 && gimple_call_internal_p (call1))
3626 : {
3627 : /* Check for two masked loads or two masked stores. */
3628 1815 : gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt);
3629 1592 : if (!call2 || !gimple_call_internal_p (call2))
3630 : return false;
3631 1592 : internal_fn ifn = gimple_call_internal_fn (call1);
3632 1592 : if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
3633 : return false;
3634 1592 : if (ifn != gimple_call_internal_fn (call2))
3635 : return false;
3636 :
3637 : /* Check that the masks are the same. Cope with casts of masks,
3638 : like those created by build_mask_conversion. */
3639 1592 : tree mask1 = gimple_call_arg (call1, 2);
3640 1592 : tree mask2 = gimple_call_arg (call2, 2);
3641 1592 : if (!operand_equal_p (mask1, mask2, 0) && !allow_slp_p)
3642 : {
3643 268 : mask1 = strip_conversion (mask1);
3644 268 : if (!mask1)
3645 : return false;
3646 91 : mask2 = strip_conversion (mask2);
3647 91 : if (!mask2)
3648 : return false;
3649 91 : if (!operand_equal_p (mask1, mask2, 0))
3650 : return false;
3651 : }
3652 1380 : return true;
3653 : }
3654 :
3655 : return false;
3656 : }
3657 :
3658 : /* Function vect_analyze_data_ref_accesses.
3659 :
3660 : Analyze the access pattern of all the data references in the loop.
3661 :
3662 : FORNOW: the only access pattern that is considered vectorizable is a
3663 : simple step 1 (consecutive) access.
3664 :
3665 : FORNOW: handle only arrays and pointer accesses. */
3666 :
3667 : opt_result
3668 2573955 : vect_analyze_data_ref_accesses (vec_info *vinfo,
3669 : vec<int> *dataref_groups)
3670 : {
3671 2573955 : unsigned int i;
3672 2573955 : vec<data_reference_p> datarefs = vinfo->shared->datarefs;
3673 :
3674 2573955 : DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
3675 :
3676 2573955 : if (datarefs.is_empty ())
3677 1058251 : return opt_result::success ();
3678 :
3679 : /* Sort the array of datarefs to make building the interleaving chains
3680 : linear. Don't modify the original vector's order, it is needed for
3681 : determining what dependencies are reversed. */
3682 1515704 : vec<dr_vec_info *> datarefs_copy;
3683 1515704 : datarefs_copy.create (datarefs.length ());
3684 16447633 : for (unsigned i = 0; i < datarefs.length (); i++)
3685 : {
3686 14931929 : dr_vec_info *dr_info = vinfo->lookup_dr (datarefs[i]);
3687 : /* If the caller computed DR grouping use that, otherwise group by
3688 : basic blocks. */
3689 14931929 : if (dataref_groups)
3690 14019926 : dr_info->group = (*dataref_groups)[i];
3691 : else
3692 912003 : dr_info->group = gimple_bb (DR_STMT (datarefs[i]))->index;
3693 14931929 : datarefs_copy.quick_push (dr_info);
3694 : }
3695 1515704 : datarefs_copy.qsort (dr_group_sort_cmp);
3696 1515704 : hash_set<stmt_vec_info> to_fixup;
3697 :
3698 : /* Build the interleaving chains. */
3699 14077254 : for (i = 0; i < datarefs_copy.length () - 1;)
3700 : {
3701 11045846 : dr_vec_info *dr_info_a = datarefs_copy[i];
3702 11045846 : data_reference_p dra = dr_info_a->dr;
3703 11045846 : int dra_group_id = dr_info_a->group;
3704 11045846 : stmt_vec_info stmtinfo_a = dr_info_a->stmt;
3705 11045846 : stmt_vec_info lastinfo = NULL;
3706 11045846 : if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
3707 9433230 : || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
3708 : {
3709 1680713 : ++i;
3710 1680713 : continue;
3711 : }
3712 24531289 : for (i = i + 1; i < datarefs_copy.length (); ++i)
3713 : {
3714 11735512 : dr_vec_info *dr_info_b = datarefs_copy[i];
3715 11735512 : data_reference_p drb = dr_info_b->dr;
3716 11735512 : int drb_group_id = dr_info_b->group;
3717 11735512 : stmt_vec_info stmtinfo_b = dr_info_b->stmt;
3718 11735512 : if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
3719 11429168 : || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
3720 : break;
3721 :
3722 : /* ??? Imperfect sorting (non-compatible types, non-modulo
3723 : accesses, same accesses) can lead to a group to be artificially
3724 : split here as we don't just skip over those. If it really
3725 : matters we can push those to a worklist and re-iterate
3726 : over them. The we can just skip ahead to the next DR here. */
3727 :
3728 : /* DRs in a different DR group should not be put into the same
3729 : interleaving group. */
3730 11425563 : if (dra_group_id != drb_group_id)
3731 : break;
3732 :
3733 : /* Check that the data-refs have same first location (except init)
3734 : and they are both either store or load (not load and store,
3735 : not masked loads or stores). */
3736 7160639 : if (DR_IS_READ (dra) != DR_IS_READ (drb)
3737 5893781 : || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3738 : DR_BASE_ADDRESS (drb)) != 0
3739 4307652 : || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
3740 11447557 : || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
3741 : break;
3742 :
3743 : /* Check that the data-refs have the same constant size. */
3744 4286901 : tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
3745 4286901 : tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
3746 4286901 : if (!tree_fits_uhwi_p (sza)
3747 4286901 : || !tree_fits_uhwi_p (szb)
3748 8573802 : || !tree_int_cst_equal (sza, szb))
3749 : break;
3750 :
3751 : /* Check that the data-refs have the same step. */
3752 3942461 : if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
3753 : break;
3754 :
3755 : /* Check the types are compatible.
3756 : ??? We don't distinguish this during sorting. */
3757 3941850 : if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
3758 3941850 : TREE_TYPE (DR_REF (drb))))
3759 : break;
3760 :
3761 : /* Check that the DR_INITs are compile-time constants. */
3762 2810950 : if (!tree_fits_shwi_p (DR_INIT (dra))
3763 2810950 : || !tree_fits_shwi_p (DR_INIT (drb)))
3764 : break;
3765 :
3766 : /* Different .GOMP_SIMD_LANE calls still give the same lane,
3767 : just hold extra information. */
3768 2810950 : if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
3769 1240 : && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
3770 2812190 : && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
3771 : break;
3772 :
3773 : /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
3774 2809710 : HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
3775 2809710 : HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
3776 2809710 : HOST_WIDE_INT init_prev
3777 2809710 : = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]->dr));
3778 2809710 : gcc_assert (init_a <= init_b
3779 : && init_a <= init_prev
3780 : && init_prev <= init_b);
3781 :
3782 : /* Do not place the same access in the interleaving chain twice. */
3783 2809710 : if (init_b == init_prev)
3784 : {
3785 30378 : gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]->dr))
3786 : < gimple_uid (DR_STMT (drb)));
3787 : /* Simply link in duplicates and fix up the chain below. */
3788 : }
3789 : else
3790 : {
3791 : /* If init_b == init_a + the size of the type * k, we have an
3792 : interleaving, and DRA is accessed before DRB. */
3793 2779332 : unsigned HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
3794 2779332 : if (type_size_a == 0
3795 2779332 : || (((unsigned HOST_WIDE_INT)init_b - init_a)
3796 2779332 : % type_size_a != 0))
3797 : break;
3798 :
3799 : /* If we have a store, the accesses are adjacent. This splits
3800 : groups into chunks we support (we don't support vectorization
3801 : of stores with gaps). */
3802 2777526 : if (!DR_IS_READ (dra)
3803 1828452 : && (((unsigned HOST_WIDE_INT)init_b - init_prev)
3804 : != type_size_a))
3805 : break;
3806 :
3807 : /* For datarefs with big gap, it's better to split them into different
3808 : groups.
3809 : .i.e a[0], a[1], a[2], .. a[7], a[100], a[101],..., a[107] */
3810 2600941 : if ((unsigned HOST_WIDE_INT)(init_b - init_prev)
3811 : > MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT)
3812 : break;
3813 :
3814 : /* If the step (if not zero or non-constant) is smaller than the
3815 : difference between data-refs' inits this splits groups into
3816 : suitable sizes. */
3817 2591909 : if (tree_fits_shwi_p (DR_STEP (dra)))
3818 : {
3819 2586759 : unsigned HOST_WIDE_INT step
3820 2586759 : = absu_hwi (tree_to_shwi (DR_STEP (dra)));
3821 2586759 : if (step != 0
3822 132607 : && step <= ((unsigned HOST_WIDE_INT)init_b - init_a))
3823 : break;
3824 : }
3825 : }
3826 :
3827 2604606 : if (dump_enabled_p ())
3828 31689 : dump_printf_loc (MSG_NOTE, vect_location,
3829 31689 : DR_IS_READ (dra)
3830 : ? "Detected interleaving load %T and %T\n"
3831 : : "Detected interleaving store %T and %T\n",
3832 : DR_REF (dra), DR_REF (drb));
3833 :
3834 : /* Link the found element into the group list. */
3835 2604606 : if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3836 : {
3837 1450586 : DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
3838 1450586 : lastinfo = stmtinfo_a;
3839 : }
3840 2604606 : DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
3841 2604606 : DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
3842 2604606 : lastinfo = stmtinfo_b;
3843 :
3844 2604606 : if (! STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3845 : {
3846 2604229 : STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
3847 2604229 : = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
3848 :
3849 2604229 : if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3850 126 : dump_printf_loc (MSG_NOTE, vect_location,
3851 : "Load suitable for SLP vectorization only.\n");
3852 : }
3853 :
3854 2604606 : if (init_b == init_prev
3855 30378 : && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3856 2622595 : && dump_enabled_p ())
3857 218 : dump_printf_loc (MSG_NOTE, vect_location,
3858 : "Queuing group with duplicate access for fixup\n");
3859 : }
3860 : }
3861 :
3862 : /* Fixup groups with duplicate entries by splitting it. */
3863 1561265 : while (1)
3864 : {
3865 1561265 : hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
3866 1561265 : if (!(it != to_fixup.end ()))
3867 : break;
3868 45561 : stmt_vec_info grp = *it;
3869 45561 : to_fixup.remove (grp);
3870 :
3871 : /* Find the earliest duplicate group member. */
3872 45561 : unsigned first_duplicate = -1u;
3873 45561 : stmt_vec_info next, g = grp;
3874 277972 : while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3875 : {
3876 186850 : if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
3877 186850 : DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
3878 186850 : && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
3879 : first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
3880 : g = next;
3881 : }
3882 45561 : if (first_duplicate == -1U)
3883 17989 : continue;
3884 :
3885 : /* Then move all stmts after the first duplicate to a new group.
3886 : Note this is a heuristic but one with the property that *it
3887 : is fixed up completely. */
3888 27572 : g = grp;
3889 27572 : stmt_vec_info newgroup = NULL, ng = grp;
3890 239769 : while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3891 : {
3892 184625 : if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
3893 : {
3894 178944 : DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
3895 178944 : if (!newgroup)
3896 : {
3897 27572 : newgroup = next;
3898 27572 : STMT_VINFO_SLP_VECT_ONLY (newgroup)
3899 27572 : = STMT_VINFO_SLP_VECT_ONLY (grp);
3900 : }
3901 : else
3902 151372 : DR_GROUP_NEXT_ELEMENT (ng) = next;
3903 178944 : ng = next;
3904 178944 : DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
3905 : }
3906 : else
3907 : g = DR_GROUP_NEXT_ELEMENT (g);
3908 : }
3909 27572 : DR_GROUP_NEXT_ELEMENT (ng) = NULL;
3910 :
3911 : /* Fixup the new group which still may contain duplicates. */
3912 27572 : to_fixup.add (newgroup);
3913 : }
3914 :
3915 1515704 : dr_vec_info *dr_info;
3916 16425893 : FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)
3917 : {
3918 14918110 : if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
3919 14918110 : && !vect_analyze_data_ref_access (vinfo, dr_info))
3920 : {
3921 7975 : if (dump_enabled_p ())
3922 291 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3923 : "not vectorized: complicated access pattern.\n");
3924 :
3925 7975 : if (is_a <bb_vec_info> (vinfo))
3926 : {
3927 : /* Mark the statement as not vectorizable. */
3928 54 : STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
3929 54 : continue;
3930 : }
3931 : else
3932 : {
3933 7921 : datarefs_copy.release ();
3934 7921 : return opt_result::failure_at (dr_info->stmt->stmt,
3935 : "not vectorized:"
3936 : " complicated access pattern.\n");
3937 : }
3938 : }
3939 : }
3940 :
3941 1507783 : datarefs_copy.release ();
3942 1507783 : return opt_result::success ();
3943 1515704 : }
3944 :
3945 : /* Function vect_vfa_segment_size.
3946 :
3947 : Input:
3948 : DR_INFO: The data reference.
3949 : LENGTH_FACTOR: segment length to consider.
3950 :
3951 : Return a value suitable for the dr_with_seg_len::seg_len field.
3952 : This is the "distance travelled" by the pointer from the first
3953 : iteration in the segment to the last. Note that it does not include
3954 : the size of the access; in effect it only describes the first byte. */
3955 :
3956 : static tree
3957 123470 : vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
3958 : {
3959 123470 : length_factor = size_binop (MINUS_EXPR,
3960 : fold_convert (sizetype, length_factor),
3961 : size_one_node);
3962 123470 : return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
3963 : length_factor);
3964 : }
3965 :
3966 : /* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),
3967 : gives the worst-case number of bytes covered by the segment. */
3968 :
3969 : static unsigned HOST_WIDE_INT
3970 123952 : vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info)
3971 : {
3972 123952 : stmt_vec_info stmt_vinfo = dr_info->stmt;
3973 123952 : tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
3974 123952 : unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
3975 123952 : unsigned HOST_WIDE_INT access_size = ref_size;
3976 123952 : if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
3977 : {
3978 38359 : gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
3979 38359 : access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
3980 : }
3981 123952 : tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3982 123952 : int misalignment;
3983 247904 : if (((misalignment = dr_misalignment (dr_info, vectype)), true)
3984 123952 : && (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
3985 : == dr_explicit_realign_optimized))
3986 : {
3987 : /* We might access a full vector's worth. */
3988 0 : access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
3989 : }
3990 123952 : return access_size;
3991 : }
3992 :
3993 : /* Get the minimum alignment for all the scalar accesses that DR_INFO
3994 : describes. */
3995 :
3996 : static unsigned int
3997 123952 : vect_vfa_align (dr_vec_info *dr_info)
3998 : {
3999 0 : return dr_alignment (dr_info->dr);
4000 : }
4001 :
4002 : /* Function vect_no_alias_p.
4003 :
4004 : Given data references A and B with equal base and offset, see whether
4005 : the alias relation can be decided at compilation time. Return 1 if
4006 : it can and the references alias, 0 if it can and the references do
4007 : not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A,
4008 : SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
4009 : of dr_with_seg_len::{seg_len,access_size} for A and B. */
4010 :
4011 : static int
4012 4191 : vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b,
4013 : tree segment_length_a, tree segment_length_b,
4014 : unsigned HOST_WIDE_INT access_size_a,
4015 : unsigned HOST_WIDE_INT access_size_b)
4016 : {
4017 4191 : poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
4018 4191 : poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
4019 4191 : poly_uint64 const_length_a;
4020 4191 : poly_uint64 const_length_b;
4021 :
4022 : /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
4023 : bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
4024 : [a, a+12) */
4025 4191 : if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < 0)
4026 : {
4027 243 : const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
4028 243 : offset_a -= const_length_a;
4029 : }
4030 : else
4031 3948 : const_length_a = tree_to_poly_uint64 (segment_length_a);
4032 4191 : if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < 0)
4033 : {
4034 395 : const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
4035 395 : offset_b -= const_length_b;
4036 : }
4037 : else
4038 3796 : const_length_b = tree_to_poly_uint64 (segment_length_b);
4039 :
4040 4191 : const_length_a += access_size_a;
4041 4191 : const_length_b += access_size_b;
4042 :
4043 4191 : if (ranges_known_overlap_p (offset_a, const_length_a,
4044 : offset_b, const_length_b))
4045 : return 1;
4046 :
4047 459 : if (!ranges_maybe_overlap_p (offset_a, const_length_a,
4048 : offset_b, const_length_b))
4049 459 : return 0;
4050 :
4051 : return -1;
4052 : }
4053 :
4054 : /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
4055 : in DDR is >= VF. */
4056 :
4057 : static bool
4058 72371 : dependence_distance_ge_vf (data_dependence_relation *ddr,
4059 : unsigned int loop_depth, poly_uint64 vf)
4060 : {
4061 72371 : if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
4062 77191 : || DDR_NUM_DIST_VECTS (ddr) == 0)
4063 : return false;
4064 :
4065 : /* If the dependence is exact, we should have limited the VF instead. */
4066 4851 : gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
4067 :
4068 : unsigned int i;
4069 : lambda_vector dist_v;
4070 9729 : FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4071 : {
4072 9698 : HOST_WIDE_INT dist = dist_v[loop_depth];
4073 9698 : if (dist != 0
4074 4851 : && !(dist > 0 && DDR_REVERSED_P (ddr))
4075 14549 : && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
4076 : return false;
4077 : }
4078 :
4079 31 : if (dump_enabled_p ())
4080 2 : dump_printf_loc (MSG_NOTE, vect_location,
4081 : "dependence distance between %T and %T is >= VF\n",
4082 2 : DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
4083 :
4084 : return true;
4085 : }
4086 :
4087 : /* Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. */
4088 :
4089 : static void
4090 437 : dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
4091 : {
4092 437 : dump_printf (dump_kind, "%s (%T) >= ",
4093 437 : lower_bound.unsigned_p ? "unsigned" : "abs",
4094 437 : lower_bound.expr);
4095 437 : dump_dec (dump_kind, lower_bound.min_value);
4096 437 : }
4097 :
4098 : /* Record that the vectorized loop requires the vec_lower_bound described
4099 : by EXPR, UNSIGNED_P and MIN_VALUE. */
4100 :
4101 : static void
4102 6441 : vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
4103 : poly_uint64 min_value)
4104 : {
4105 6441 : vec<vec_lower_bound> &lower_bounds
4106 : = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
4107 7413 : for (unsigned int i = 0; i < lower_bounds.length (); ++i)
4108 5831 : if (operand_equal_p (lower_bounds[i].expr, expr, 0))
4109 : {
4110 4859 : unsigned_p &= lower_bounds[i].unsigned_p;
4111 4859 : min_value = upper_bound (lower_bounds[i].min_value, min_value);
4112 4859 : if (lower_bounds[i].unsigned_p != unsigned_p
4113 4859 : || maybe_lt (lower_bounds[i].min_value, min_value))
4114 : {
4115 782 : lower_bounds[i].unsigned_p = unsigned_p;
4116 782 : lower_bounds[i].min_value = min_value;
4117 782 : if (dump_enabled_p ())
4118 : {
4119 250 : dump_printf_loc (MSG_NOTE, vect_location,
4120 : "updating run-time check to ");
4121 250 : dump_lower_bound (MSG_NOTE, lower_bounds[i]);
4122 250 : dump_printf (MSG_NOTE, "\n");
4123 : }
4124 : }
4125 4859 : return;
4126 : }
4127 :
4128 1582 : vec_lower_bound lower_bound (expr, unsigned_p, min_value);
4129 1582 : if (dump_enabled_p ())
4130 : {
4131 187 : dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
4132 187 : dump_lower_bound (MSG_NOTE, lower_bound);
4133 187 : dump_printf (MSG_NOTE, "\n");
4134 : }
4135 1582 : LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
4136 : }
4137 :
4138 : /* Return true if it's unlikely that the step of the vectorized form of DR_INFO
4139 : will span fewer than GAP bytes. */
4140 :
4141 : static bool
4142 5271 : vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
4143 : poly_int64 gap)
4144 : {
4145 5271 : stmt_vec_info stmt_info = dr_info->stmt;
4146 5271 : HOST_WIDE_INT count
4147 5271 : = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
4148 5271 : if (DR_GROUP_FIRST_ELEMENT (stmt_info))
4149 4511 : count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
4150 5271 : return (estimated_poly_value (gap)
4151 5271 : <= count * vect_get_scalar_dr_size (dr_info));
4152 : }
4153 :
4154 : /* Return true if we know that there is no alias between DR_INFO_A and
4155 : DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
4156 : When returning true, set *LOWER_BOUND_OUT to this N. */
4157 :
4158 : static bool
4159 18449 : vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b,
4160 : poly_uint64 *lower_bound_out)
4161 : {
4162 : /* Check that there is a constant gap of known sign between DR_A
4163 : and DR_B. */
4164 18449 : data_reference *dr_a = dr_info_a->dr;
4165 18449 : data_reference *dr_b = dr_info_b->dr;
4166 18449 : poly_int64 init_a, init_b;
4167 18449 : if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
4168 8055 : || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
4169 7369 : || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
4170 7359 : || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
4171 7359 : || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
4172 18449 : || !ordered_p (init_a, init_b))
4173 11090 : return false;
4174 :
4175 : /* Sort DR_A and DR_B by the address they access. */
4176 7359 : if (maybe_lt (init_b, init_a))
4177 : {
4178 116 : std::swap (init_a, init_b);
4179 116 : std::swap (dr_info_a, dr_info_b);
4180 116 : std::swap (dr_a, dr_b);
4181 : }
4182 :
4183 : /* If the two accesses could be dependent within a scalar iteration,
4184 : make sure that we'd retain their order. */
4185 7359 : if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
4186 7359 : && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
4187 : return false;
4188 :
4189 : /* There is no alias if abs (DR_STEP) is greater than or equal to
4190 : the bytes spanned by the combination of the two accesses. */
4191 7359 : *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a;
4192 7359 : return true;
4193 : }
4194 :
4195 : /* Function vect_prune_runtime_alias_test_list.
4196 :
4197 : Prune a list of ddrs to be tested at run-time by versioning for alias.
4198 : Merge several alias checks into one if possible.
4199 : Return FALSE if resulting list of ddrs is longer then allowed by
4200 : PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
4201 :
4202 : opt_result
4203 358643 : vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
4204 : {
4205 358643 : typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
4206 358643 : hash_set <tree_pair_hash> compared_objects;
4207 :
4208 358643 : const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
4209 358643 : vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
4210 : = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
4211 358643 : const vec<vec_object_pair> &check_unequal_addrs
4212 : = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
4213 358643 : poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4214 358643 : tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
4215 :
4216 358643 : ddr_p ddr;
4217 358643 : unsigned int i;
4218 358643 : tree length_factor;
4219 :
4220 358643 : DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
4221 :
4222 : /* Step values are irrelevant for aliasing if the number of vector
4223 : iterations is equal to the number of scalar iterations (which can
4224 : happen for fully-SLP loops). */
4225 358643 : bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
4226 :
4227 358643 : if (!vf_one_p)
4228 : {
4229 : /* Convert the checks for nonzero steps into bound tests. */
4230 : tree value;
4231 356280 : FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
4232 1527 : vect_check_lower_bound (loop_vinfo, value, true, 1);
4233 : }
4234 :
4235 358643 : if (may_alias_ddrs.is_empty ())
4236 334997 : return opt_result::success ();
4237 :
4238 23646 : comp_alias_ddrs.create (may_alias_ddrs.length ());
4239 :
4240 23646 : unsigned int loop_depth
4241 23646 : = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
4242 23646 : LOOP_VINFO_LOOP_NEST (loop_vinfo));
4243 :
4244 : /* First, we collect all data ref pairs for aliasing checks. */
4245 92273 : FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
4246 : {
4247 72371 : poly_uint64 lower_bound;
4248 72371 : tree segment_length_a, segment_length_b;
4249 72371 : unsigned HOST_WIDE_INT access_size_a, access_size_b;
4250 72371 : unsigned HOST_WIDE_INT align_a, align_b;
4251 :
4252 : /* Ignore the alias if the VF we chose ended up being no greater
4253 : than the dependence distance. */
4254 72371 : if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
4255 10842 : continue;
4256 :
4257 72340 : if (DDR_OBJECT_A (ddr))
4258 : {
4259 76 : vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
4260 76 : if (!compared_objects.add (new_pair))
4261 : {
4262 18 : if (dump_enabled_p ())
4263 12 : dump_printf_loc (MSG_NOTE, vect_location,
4264 : "checking that %T and %T"
4265 : " have different addresses\n",
4266 : new_pair.first, new_pair.second);
4267 18 : LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
4268 : }
4269 76 : continue;
4270 76 : }
4271 :
4272 72264 : dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
4273 72264 : stmt_vec_info stmt_info_a = dr_info_a->stmt;
4274 :
4275 72264 : dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
4276 72264 : stmt_vec_info stmt_info_b = dr_info_b->stmt;
4277 :
4278 72264 : bool preserves_scalar_order_p
4279 72264 : = vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
4280 72264 : bool ignore_step_p
4281 : = (vf_one_p
4282 72264 : && (preserves_scalar_order_p
4283 2967 : || operand_equal_p (DR_STEP (dr_info_a->dr),
4284 2967 : DR_STEP (dr_info_b->dr))));
4285 :
4286 : /* Skip the pair if inter-iteration dependencies are irrelevant
4287 : and intra-iteration dependencies are guaranteed to be honored. */
4288 10965 : if (ignore_step_p
4289 5603 : && (preserves_scalar_order_p
4290 2622 : || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
4291 : &lower_bound)))
4292 : {
4293 5362 : if (dump_enabled_p ())
4294 2496 : dump_printf_loc (MSG_NOTE, vect_location,
4295 : "no need for alias check between "
4296 : "%T and %T when VF is 1\n",
4297 2496 : DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
4298 5362 : continue;
4299 : }
4300 :
4301 : /* See whether we can handle the alias using a bounds check on
4302 : the step, and whether that's likely to be the best approach.
4303 : (It might not be, for example, if the minimum step is much larger
4304 : than the number of bytes handled by one vector iteration.) */
4305 66902 : if (!ignore_step_p
4306 66661 : && TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
4307 15827 : && vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
4308 : &lower_bound)
4309 71880 : && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound)
4310 293 : || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound)))
4311 : {
4312 4914 : bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
4313 4914 : if (dump_enabled_p ())
4314 : {
4315 3384 : dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
4316 : "%T and %T when the step %T is outside ",
4317 : DR_REF (dr_info_a->dr),
4318 1692 : DR_REF (dr_info_b->dr),
4319 1692 : DR_STEP (dr_info_a->dr));
4320 1692 : if (unsigned_p)
4321 504 : dump_printf (MSG_NOTE, "[0");
4322 : else
4323 : {
4324 1188 : dump_printf (MSG_NOTE, "(");
4325 1188 : dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
4326 : }
4327 1692 : dump_printf (MSG_NOTE, ", ");
4328 1692 : dump_dec (MSG_NOTE, lower_bound);
4329 1692 : dump_printf (MSG_NOTE, ")\n");
4330 : }
4331 4914 : vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
4332 : unsigned_p, lower_bound);
4333 4914 : continue;
4334 4914 : }
4335 :
4336 61988 : stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
4337 61988 : if (dr_group_first_a)
4338 : {
4339 18911 : stmt_info_a = dr_group_first_a;
4340 18911 : dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
4341 : }
4342 :
4343 61988 : stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
4344 61988 : if (dr_group_first_b)
4345 : {
4346 19448 : stmt_info_b = dr_group_first_b;
4347 19448 : dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
4348 : }
4349 :
4350 61988 : if (ignore_step_p)
4351 : {
4352 241 : segment_length_a = size_zero_node;
4353 241 : segment_length_b = size_zero_node;
4354 : }
4355 : else
4356 : {
4357 61747 : if (!operand_equal_p (DR_STEP (dr_info_a->dr),
4358 61747 : DR_STEP (dr_info_b->dr), 0))
4359 : {
4360 13031 : length_factor = scalar_loop_iters;
4361 13031 : if (TREE_CODE (length_factor) == SCEV_NOT_KNOWN)
4362 12 : return opt_result::failure_at (vect_location,
4363 : "Unsupported alias check on"
4364 : " uncounted loop\n");
4365 : }
4366 : else
4367 48716 : length_factor = size_int (vect_factor);
4368 61735 : segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
4369 61735 : segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
4370 : }
4371 61976 : access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a);
4372 61976 : access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b);
4373 61976 : align_a = vect_vfa_align (dr_info_a);
4374 61976 : align_b = vect_vfa_align (dr_info_b);
4375 :
4376 : /* See whether the alias is known at compilation time. */
4377 61976 : if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
4378 61976 : DR_BASE_ADDRESS (dr_info_b->dr), 0)
4379 5960 : && operand_equal_p (DR_OFFSET (dr_info_a->dr),
4380 5960 : DR_OFFSET (dr_info_b->dr), 0)
4381 4323 : && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
4382 4249 : && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
4383 4239 : && poly_int_tree_p (segment_length_a)
4384 66178 : && poly_int_tree_p (segment_length_b))
4385 : {
4386 4191 : int res = vect_compile_time_alias (dr_info_a, dr_info_b,
4387 : segment_length_a,
4388 : segment_length_b,
4389 : access_size_a,
4390 : access_size_b);
4391 4191 : if (res >= 0 && dump_enabled_p ())
4392 : {
4393 208 : dump_printf_loc (MSG_NOTE, vect_location,
4394 : "can tell at compile time that %T and %T",
4395 104 : DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
4396 104 : if (res == 0)
4397 57 : dump_printf (MSG_NOTE, " do not alias\n");
4398 : else
4399 47 : dump_printf (MSG_NOTE, " alias\n");
4400 : }
4401 :
4402 4191 : if (res == 0)
4403 459 : continue;
4404 :
4405 3732 : if (res == 1)
4406 3732 : return opt_result::failure_at (stmt_info_b->stmt,
4407 : "not vectorized:"
4408 : " compilation time alias: %G%G",
4409 : stmt_info_a->stmt,
4410 : stmt_info_b->stmt);
4411 : }
4412 :
4413 : /* dr_with_seg_len requires the alignment to apply to the segment length
4414 : and access size, not just the start address. The access size can be
4415 : smaller than the pointer alignment for grouped accesses and bitfield
4416 : references; see PR115192 and PR116125 respectively. */
4417 57785 : align_a = std::min (align_a, least_bit_hwi (access_size_a));
4418 57785 : align_b = std::min (align_b, least_bit_hwi (access_size_b));
4419 :
4420 57785 : dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
4421 57785 : access_size_a, align_a);
4422 57785 : dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
4423 57785 : access_size_b, align_b);
4424 : /* Canonicalize the order to be the one that's needed for accurate
4425 : RAW, WAR and WAW flags, in cases where the data references are
4426 : well-ordered. The order doesn't really matter otherwise,
4427 : but we might as well be consistent. */
4428 57785 : if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
4429 4312 : std::swap (dr_a, dr_b);
4430 :
4431 57785 : dr_with_seg_len_pair_t dr_with_seg_len_pair
4432 : (dr_a, dr_b, (preserves_scalar_order_p
4433 : ? dr_with_seg_len_pair_t::WELL_ORDERED
4434 63336 : : dr_with_seg_len_pair_t::REORDERED));
4435 :
4436 57785 : comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
4437 : }
4438 :
4439 19902 : prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
4440 :
4441 39804 : unsigned int count = (comp_alias_ddrs.length ()
4442 19902 : + check_unequal_addrs.length ());
4443 :
4444 19902 : if (count
4445 19902 : && (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
4446 : == VECT_COST_MODEL_VERY_CHEAP))
4447 13281 : return opt_result::failure_at
4448 13281 : (vect_location, "would need a runtime alias check\n");
4449 :
4450 6621 : if (dump_enabled_p ())
4451 1894 : dump_printf_loc (MSG_NOTE, vect_location,
4452 : "improved number of alias checks from %d to %d\n",
4453 : may_alias_ddrs.length (), count);
4454 6621 : unsigned limit = param_vect_max_version_for_alias_checks;
4455 6621 : if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)) == VECT_COST_MODEL_CHEAP)
4456 757 : limit = param_vect_max_version_for_alias_checks * 6 / 10;
4457 6621 : if (count > limit)
4458 162 : return opt_result::failure_at
4459 162 : (vect_location,
4460 : "number of versioning for alias run-time tests exceeds %d "
4461 : "(--param vect-max-version-for-alias-checks)\n", limit);
4462 :
4463 6459 : return opt_result::success ();
4464 358643 : }
4465 :
4466 : /* Structure to hold information about a supported gather/scatter
4467 : configuration. */
4468 : struct gather_scatter_config
4469 : {
4470 : internal_fn ifn;
4471 : tree offset_vectype;
4472 : int scale;
4473 : vec<int> elsvals;
4474 : };
4475 :
4476 : /* Determine which gather/scatter IFN is supported for the given parameters.
4477 : IFN_MASK_GATHER_LOAD, IFN_GATHER_LOAD, and IFN_MASK_LEN_GATHER_LOAD
4478 : are mutually exclusive, so we only need to find one. Return the
4479 : supported IFN or IFN_LAST if none are supported. */
4480 :
4481 : static internal_fn
4482 1407910 : vect_gather_scatter_which_ifn (bool read_p, bool masked_p,
4483 : tree vectype, tree memory_type,
4484 : tree offset_vectype, int scale,
4485 : vec<int> *elsvals)
4486 : {
4487 : /* Work out which functions to try. */
4488 1407910 : internal_fn ifn, alt_ifn, alt_ifn2;
4489 1407910 : if (read_p)
4490 : {
4491 1122664 : ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
4492 : alt_ifn = IFN_MASK_GATHER_LOAD;
4493 : alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
4494 : }
4495 : else
4496 : {
4497 285246 : ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
4498 : alt_ifn = IFN_MASK_SCATTER_STORE;
4499 : alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
4500 : }
4501 :
4502 1407910 : if (!offset_vectype)
4503 : return IFN_LAST;
4504 :
4505 1407910 : if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
4506 : offset_vectype, scale, elsvals))
4507 : return ifn;
4508 1407910 : if (internal_gather_scatter_fn_supported_p (alt_ifn, vectype, memory_type,
4509 : offset_vectype, scale, elsvals))
4510 : return alt_ifn;
4511 1407910 : if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, memory_type,
4512 : offset_vectype, scale, elsvals))
4513 : return alt_ifn2;
4514 :
4515 : return IFN_LAST;
4516 : }
4517 :
4518 : /* Collect all supported offset vector types for a gather load or scatter
4519 : store. READ_P is true for loads and false for stores. MASKED_P is true
4520 : if the load or store is conditional. VECTYPE is the data vector type.
4521 : MEMORY_TYPE is the type of the memory elements being loaded or stored,
4522 : and OFFSET_TYPE is the type of the offset.
4523 : SCALE is the amount by which the offset should be multiplied.
4524 :
4525 : Return a vector of all configurations the target supports (which can
4526 : be none). */
4527 :
4528 : static auto_vec<gather_scatter_config>
4529 95396 : vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
4530 : tree vectype, tree memory_type,
4531 : tree offset_type, int scale)
4532 : {
4533 95396 : auto_vec<gather_scatter_config> configs;
4534 :
4535 95396 : auto_vec<tree, 8> offset_types_to_try;
4536 :
4537 : /* Try all sizes from the offset type's precision up to POINTER_SIZE. */
4538 95396 : for (unsigned int bits = TYPE_PRECISION (offset_type);
4539 453595 : bits <= POINTER_SIZE;
4540 344753 : bits *= 2)
4541 : {
4542 : /* Signed variant. */
4543 344753 : offset_types_to_try.safe_push
4544 344753 : (build_nonstandard_integer_type (bits, 0));
4545 : /* Unsigned variant. */
4546 344753 : offset_types_to_try.safe_push
4547 344753 : (build_nonstandard_integer_type (bits, 1));
4548 : }
4549 :
4550 : /* Once we find which IFN works for one offset type, we know that it
4551 : will work for other offset types as well. Then we can perform
4552 : the checks for the remaining offset types with only that IFN.
4553 : However, we might need to try different offset types to find which
4554 : IFN is supported, since the check is offset-type-specific. */
4555 : internal_fn ifn = IFN_LAST;
4556 :
4557 : /* Try each offset type. */
4558 784902 : for (unsigned int i = 0; i < offset_types_to_try.length (); i++)
4559 : {
4560 689506 : tree offset_type = offset_types_to_try[i];
4561 689506 : tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
4562 689506 : if (!offset_vectype)
4563 9346 : continue;
4564 :
4565 : /* Try multiple scale values. Start with exact match, then try
4566 : smaller common scales that a target might support . */
4567 680160 : int scales_to_try[] = {scale, 1, 2, 4, 8};
4568 :
4569 4080960 : for (unsigned int j = 0;
4570 4080960 : j < sizeof (scales_to_try) / sizeof (*scales_to_try);
4571 : j++)
4572 : {
4573 3400800 : int try_scale = scales_to_try[j];
4574 :
4575 : /* Skip scales >= requested scale (except for exact match). */
4576 3400800 : if (j > 0 && try_scale >= scale)
4577 1992890 : continue;
4578 :
4579 : /* Skip if requested scale is not a multiple of this scale. */
4580 1408054 : if (j > 0 && scale % try_scale != 0)
4581 144 : continue;
4582 :
4583 1407910 : vec<int> elsvals = vNULL;
4584 :
4585 : /* If we haven't determined which IFN is supported yet, try all three
4586 : to find which one the target supports. */
4587 1407910 : if (ifn == IFN_LAST)
4588 : {
4589 1407910 : ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
4590 : vectype, memory_type,
4591 : offset_vectype, try_scale,
4592 : &elsvals);
4593 1407910 : if (ifn != IFN_LAST)
4594 : {
4595 : /* Found which IFN is supported. Save this configuration. */
4596 0 : gather_scatter_config config;
4597 0 : config.ifn = ifn;
4598 0 : config.offset_vectype = offset_vectype;
4599 0 : config.scale = try_scale;
4600 0 : config.elsvals = elsvals;
4601 0 : configs.safe_push (config);
4602 : }
4603 : }
4604 : else
4605 : {
4606 : /* We already know which IFN is supported, just check if this
4607 : offset type and scale work with it. */
4608 0 : if (internal_gather_scatter_fn_supported_p (ifn, vectype,
4609 : memory_type,
4610 : offset_vectype,
4611 : try_scale,
4612 : &elsvals))
4613 : {
4614 0 : gather_scatter_config config;
4615 0 : config.ifn = ifn;
4616 0 : config.offset_vectype = offset_vectype;
4617 0 : config.scale = try_scale;
4618 0 : config.elsvals = elsvals;
4619 0 : configs.safe_push (config);
4620 : }
4621 : }
4622 : }
4623 : }
4624 :
4625 95396 : return configs;
4626 95396 : }
4627 :
4628 : /* Check whether we can use an internal function for a gather load
4629 : or scatter store. READ_P is true for loads and false for stores.
4630 : MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
4631 : the type of the memory elements being loaded or stored. OFFSET_TYPE
4632 : is the type of the offset that is being applied to the invariant
4633 : base address. If OFFSET_TYPE is scalar the function chooses an
4634 : appropriate vector type for it. SCALE is the amount by which the
4635 : offset should be multiplied *after* it has been converted to address width.
4636 : If the target does not support the requested SCALE, SUPPORTED_SCALE
4637 : will contain the scale that is actually supported
4638 : (which may be smaller, requiring additional multiplication).
4639 : Otherwise SUPPORTED_SCALE is 0.
4640 :
4641 : Return true if the function is supported, storing the function id in
4642 : *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
4643 : If we support an offset vector type with different signedness than
4644 : OFFSET_TYPE store it in SUPPORTED_OFFSET_VECTYPE.
4645 :
4646 : If we can use gather/scatter and ELSVALS is nonzero, store the possible
4647 : else values in ELSVALS. */
4648 :
4649 : bool
4650 95396 : vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
4651 : tree vectype, tree memory_type, tree offset_type,
4652 : int scale, int *supported_scale,
4653 : internal_fn *ifn_out,
4654 : tree *offset_vectype_out,
4655 : tree *supported_offset_vectype,
4656 : vec<int> *elsvals)
4657 : {
4658 95396 : *supported_offset_vectype = NULL_TREE;
4659 95396 : *supported_scale = 0;
4660 95396 : unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
4661 95396 : unsigned int element_bits = vector_element_bits (vectype);
4662 95396 : if (element_bits != memory_bits)
4663 : /* For now the vector elements must be the same width as the
4664 : memory elements. */
4665 : return false;
4666 :
4667 : /* Get the original offset vector type for comparison. */
4668 95396 : tree offset_vectype = VECTOR_TYPE_P (offset_type)
4669 95396 : ? offset_type : get_vectype_for_scalar_type (vinfo, offset_type);
4670 :
4671 : /* If there is no offset vectype, bail. */
4672 82502 : if (!offset_vectype)
4673 : return false;
4674 :
4675 95396 : offset_type = TREE_TYPE (offset_vectype);
4676 :
4677 : /* Get all supported configurations for this data vector type. */
4678 95396 : auto_vec<gather_scatter_config> configs
4679 : = vect_gather_scatter_get_configs (vinfo, read_p, masked_p, vectype,
4680 95396 : memory_type, offset_type, scale);
4681 :
4682 95396 : if (configs.is_empty ())
4683 : return false;
4684 :
4685 : /* Selection priority:
4686 : 1 - Exact scale match + offset type match
4687 : 2 - Exact scale match + sign-swapped offset
4688 : 3 - Smaller scale + offset type match
4689 : 4 - Smaller scale + sign-swapped offset
4690 : Within each category, prefer smaller offset types. */
4691 :
4692 : /* First pass: exact scale match with no conversion. */
4693 0 : for (unsigned int i = 0; i < configs.length (); i++)
4694 : {
4695 0 : if (configs[i].scale == scale
4696 0 : && TYPE_SIGN (configs[i].offset_vectype)
4697 0 : == TYPE_SIGN (offset_vectype))
4698 : {
4699 0 : *ifn_out = configs[i].ifn;
4700 0 : *offset_vectype_out = configs[i].offset_vectype;
4701 0 : if (elsvals)
4702 0 : *elsvals = configs[i].elsvals;
4703 0 : return true;
4704 : }
4705 : }
4706 :
4707 : /* No direct match. This means we try to find either
4708 : - a sign-swapped offset vectype or
4709 : - a different scale and 2x larger offset type
4710 : - a different scale and larger sign-swapped offset vectype. */
4711 0 : unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE (offset_vectype));
4712 0 : unsigned int needed_precision
4713 0 : = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
4714 0 : needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
4715 :
4716 : /* Second pass: No direct match. This means we try to find a sign-swapped
4717 : offset vectype. */
4718 0 : enum tree_code tmp;
4719 0 : for (unsigned int i = 0; i < configs.length (); i++)
4720 : {
4721 0 : unsigned int precision
4722 0 : = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
4723 0 : if (configs[i].scale == scale
4724 0 : && precision >= needed_precision
4725 0 : && (supportable_convert_operation (CONVERT_EXPR,
4726 0 : configs[i].offset_vectype,
4727 : offset_vectype, &tmp)
4728 0 : || (needed_precision == offset_precision
4729 0 : && tree_nop_conversion_p (configs[i].offset_vectype,
4730 : offset_vectype))))
4731 : {
4732 0 : *ifn_out = configs[i].ifn;
4733 0 : *offset_vectype_out = offset_vectype;
4734 0 : *supported_offset_vectype = configs[i].offset_vectype;
4735 0 : if (elsvals)
4736 0 : *elsvals = configs[i].elsvals;
4737 0 : return true;
4738 : }
4739 : }
4740 :
4741 : /* Third pass: Try a smaller scale with the same signedness. */
4742 0 : needed_precision = offset_precision * 2;
4743 0 : needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
4744 :
4745 0 : for (unsigned int i = 0; i < configs.length (); i++)
4746 : {
4747 0 : unsigned int precision
4748 0 : = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
4749 0 : if (configs[i].scale < scale
4750 0 : && TYPE_SIGN (configs[i].offset_vectype)
4751 0 : == TYPE_SIGN (offset_vectype)
4752 0 : && precision >= needed_precision)
4753 : {
4754 0 : *ifn_out = configs[i].ifn;
4755 0 : *offset_vectype_out = configs[i].offset_vectype;
4756 0 : *supported_scale = configs[i].scale;
4757 0 : if (elsvals)
4758 0 : *elsvals = configs[i].elsvals;
4759 0 : return true;
4760 : }
4761 : }
4762 :
4763 : /* Fourth pass: Try a smaller scale and sign-swapped offset vectype. */
4764 0 : needed_precision
4765 0 : = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
4766 0 : needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
4767 :
4768 0 : for (unsigned int i = 0; i < configs.length (); i++)
4769 : {
4770 0 : unsigned int precision
4771 0 : = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
4772 0 : if (configs[i].scale < scale
4773 0 : && precision >= needed_precision
4774 0 : && (supportable_convert_operation (CONVERT_EXPR,
4775 0 : configs[i].offset_vectype,
4776 : offset_vectype, &tmp)
4777 0 : || (needed_precision == offset_precision
4778 0 : && tree_nop_conversion_p (configs[i].offset_vectype,
4779 : offset_vectype))))
4780 : {
4781 0 : *ifn_out = configs[i].ifn;
4782 0 : *offset_vectype_out = offset_vectype;
4783 0 : *supported_offset_vectype = configs[i].offset_vectype;
4784 0 : *supported_scale = configs[i].scale;
4785 0 : if (elsvals)
4786 0 : *elsvals = configs[i].elsvals;
4787 0 : return true;
4788 : }
4789 : }
4790 :
4791 : return false;
4792 95396 : }
4793 :
4794 : /* STMT_INFO is a call to an internal gather load or scatter store function.
4795 : Describe the operation in INFO. */
4796 :
4797 : void
4798 0 : vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
4799 : gather_scatter_info *info)
4800 : {
4801 0 : gcall *call = as_a <gcall *> (stmt_info->stmt);
4802 0 : tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4803 0 : data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4804 :
4805 0 : info->ifn = gimple_call_internal_fn (call);
4806 0 : info->decl = NULL_TREE;
4807 0 : info->base = gimple_call_arg (call, 0);
4808 0 : info->alias_ptr = gimple_call_arg
4809 0 : (call, internal_fn_alias_ptr_index (info->ifn));
4810 0 : info->offset = gimple_call_arg
4811 0 : (call, internal_fn_offset_index (info->ifn));
4812 0 : info->offset_vectype = NULL_TREE;
4813 0 : info->scale = TREE_INT_CST_LOW (gimple_call_arg
4814 : (call, internal_fn_scale_index (info->ifn)));
4815 0 : info->element_type = TREE_TYPE (vectype);
4816 0 : info->memory_type = TREE_TYPE (DR_REF (dr));
4817 0 : }
4818 :
4819 : /* Return true if a non-affine read or write in STMT_INFO is suitable for a
4820 : gather load or scatter store with VECTYPE. Describe the operation in *INFO
4821 : if so. If it is suitable and ELSVALS is nonzero store the supported else
4822 : values in the vector it points to. */
4823 :
4824 : bool
4825 371617 : vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
4826 : loop_vec_info loop_vinfo,
4827 : gather_scatter_info *info, vec<int> *elsvals)
4828 : {
4829 371617 : HOST_WIDE_INT scale = 1;
4830 371617 : poly_int64 pbitpos, pbitsize;
4831 371617 : class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4832 371617 : struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4833 371617 : tree offtype = NULL_TREE;
4834 371617 : tree decl = NULL_TREE, base, off;
4835 371617 : tree memory_type = TREE_TYPE (DR_REF (dr));
4836 371617 : machine_mode pmode;
4837 371617 : int punsignedp, reversep, pvolatilep = 0;
4838 371617 : internal_fn ifn;
4839 371617 : tree offset_vectype;
4840 371617 : bool masked_p = false;
4841 :
4842 : /* See whether this is already a call to a gather/scatter internal function.
4843 : If not, see whether it's a masked load or store. */
4844 371617 : gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
4845 5663 : if (call && gimple_call_internal_p (call))
4846 : {
4847 5663 : ifn = gimple_call_internal_fn (call);
4848 5663 : if (internal_gather_scatter_fn_p (ifn))
4849 : {
4850 0 : vect_describe_gather_scatter_call (stmt_info, info);
4851 :
4852 : /* In pattern recog we simply used a ZERO else value that
4853 : we need to correct here. To that end just re-use the
4854 : (already succesful) check if we support a gather IFN
4855 : and have it populate the else values. */
4856 0 : if (DR_IS_READ (dr) && internal_fn_mask_index (ifn) >= 0 && elsvals)
4857 0 : supports_vec_gather_load_p (TYPE_MODE (vectype), elsvals);
4858 0 : return true;
4859 : }
4860 5663 : masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
4861 : }
4862 :
4863 : /* True if we should aim to use internal functions rather than
4864 : built-in functions. */
4865 371617 : bool use_ifn_p = (DR_IS_READ (dr)
4866 371617 : ? supports_vec_gather_load_p (TYPE_MODE (vectype),
4867 : elsvals)
4868 371617 : : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
4869 :
4870 371617 : base = DR_REF (dr);
4871 : /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
4872 : see if we can use the def stmt of the address. */
4873 371617 : if (masked_p
4874 5663 : && TREE_CODE (base) == MEM_REF
4875 5663 : && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
4876 5663 : && integer_zerop (TREE_OPERAND (base, 1))
4877 377280 : && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
4878 : {
4879 5663 : gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
4880 5663 : if (is_gimple_assign (def_stmt)
4881 5663 : && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
4882 556 : base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
4883 : }
4884 :
4885 : /* The gather and scatter builtins need address of the form
4886 : loop_invariant + vector * {1, 2, 4, 8}
4887 : or
4888 : loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
4889 : Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
4890 : of loop invariants/SSA_NAMEs defined in the loop, with casts,
4891 : multiplications and additions in it. To get a vector, we need
4892 : a single SSA_NAME that will be defined in the loop and will
4893 : contain everything that is not loop invariant and that can be
4894 : vectorized. The following code attempts to find such a preexistng
4895 : SSA_NAME OFF and put the loop invariants into a tree BASE
4896 : that can be gimplified before the loop. */
4897 371617 : base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
4898 : &punsignedp, &reversep, &pvolatilep);
4899 371617 : if (reversep)
4900 : return false;
4901 :
4902 : /* PR 107346. Packed structs can have fields at offsets that are not
4903 : multiples of BITS_PER_UNIT. Do not use gather/scatters in such cases. */
4904 371617 : if (!multiple_p (pbitpos, BITS_PER_UNIT))
4905 : return false;
4906 :
4907 : /* We need to be able to form an address to the base which for example
4908 : isn't possible for hard registers. */
4909 371617 : if (may_be_nonaddressable_p (base))
4910 : return false;
4911 :
4912 371609 : poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
4913 :
4914 371609 : if (TREE_CODE (base) == MEM_REF)
4915 : {
4916 300015 : if (!integer_zerop (TREE_OPERAND (base, 1)))
4917 : {
4918 33705 : if (off == NULL_TREE)
4919 33388 : off = wide_int_to_tree (sizetype, mem_ref_offset (base));
4920 : else
4921 317 : off = size_binop (PLUS_EXPR, off,
4922 : fold_convert (sizetype, TREE_OPERAND (base, 1)));
4923 : }
4924 300015 : base = TREE_OPERAND (base, 0);
4925 : }
4926 : else
4927 71594 : base = build_fold_addr_expr (base);
4928 :
4929 371609 : if (off == NULL_TREE)
4930 236085 : off = size_zero_node;
4931 :
4932 : /* BASE must be loop invariant. If it is not invariant, but OFF is, then we
4933 : * can fix that by swapping BASE and OFF. */
4934 371609 : if (!expr_invariant_in_loop_p (loop, base))
4935 : {
4936 269974 : if (!expr_invariant_in_loop_p (loop, off))
4937 : return false;
4938 :
4939 269699 : std::swap (base, off);
4940 : }
4941 :
4942 371334 : base = fold_convert (sizetype, base);
4943 371334 : base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
4944 371334 : int tmp_scale;
4945 371334 : tree tmp_offset_vectype;
4946 :
4947 : /* OFF at this point may be either a SSA_NAME or some tree expression
4948 : from get_inner_reference. Try to peel off loop invariants from it
4949 : into BASE as long as possible. */
4950 371334 : STRIP_NOPS (off);
4951 975947 : while (offtype == NULL_TREE)
4952 : {
4953 842152 : enum tree_code code;
4954 842152 : tree op0, op1, add = NULL_TREE;
4955 :
4956 842152 : if (TREE_CODE (off) == SSA_NAME)
4957 : {
4958 637605 : gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4959 :
4960 637605 : if (expr_invariant_in_loop_p (loop, off))
4961 0 : return false;
4962 :
4963 637605 : if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
4964 : break;
4965 :
4966 501407 : op0 = gimple_assign_rhs1 (def_stmt);
4967 501407 : code = gimple_assign_rhs_code (def_stmt);
4968 501407 : op1 = gimple_assign_rhs2 (def_stmt);
4969 : }
4970 : else
4971 : {
4972 204547 : if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
4973 : return false;
4974 204547 : code = TREE_CODE (off);
4975 204547 : extract_ops_from_tree (off, &code, &op0, &op1);
4976 : }
4977 705954 : switch (code)
4978 : {
4979 219788 : case POINTER_PLUS_EXPR:
4980 219788 : case PLUS_EXPR:
4981 219788 : if (expr_invariant_in_loop_p (loop, op0))
4982 : {
4983 144813 : add = op0;
4984 144813 : off = op1;
4985 203666 : do_add:
4986 203666 : add = fold_convert (sizetype, add);
4987 203666 : if (scale != 1)
4988 46460 : add = size_binop (MULT_EXPR, add, size_int (scale));
4989 203666 : base = size_binop (PLUS_EXPR, base, add);
4990 604613 : continue;
4991 : }
4992 74975 : if (expr_invariant_in_loop_p (loop, op1))
4993 : {
4994 58659 : add = op1;
4995 58659 : off = op0;
4996 58659 : goto do_add;
4997 : }
4998 : break;
4999 390 : case MINUS_EXPR:
5000 390 : if (expr_invariant_in_loop_p (loop, op1))
5001 : {
5002 194 : add = fold_convert (sizetype, op1);
5003 194 : add = size_binop (MINUS_EXPR, size_zero_node, add);
5004 194 : off = op0;
5005 194 : goto do_add;
5006 : }
5007 : break;
5008 206841 : case MULT_EXPR:
5009 206841 : if (scale == 1 && tree_fits_shwi_p (op1))
5010 : {
5011 173991 : int new_scale = tree_to_shwi (op1);
5012 : /* Only treat this as a scaling operation if the target
5013 : supports it for at least some offset type. */
5014 173991 : if (use_ifn_p
5015 0 : && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
5016 : masked_p, vectype, memory_type,
5017 : signed_char_type_node,
5018 : new_scale, &tmp_scale,
5019 : &ifn,
5020 : &offset_vectype,
5021 : &tmp_offset_vectype,
5022 : elsvals)
5023 173991 : && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
5024 : masked_p, vectype, memory_type,
5025 : unsigned_char_type_node,
5026 : new_scale, &tmp_scale,
5027 : &ifn,
5028 : &offset_vectype,
5029 : &tmp_offset_vectype,
5030 : elsvals))
5031 : break;
5032 173991 : scale = new_scale;
5033 173991 : off = op0;
5034 173991 : continue;
5035 173991 : }
5036 : break;
5037 0 : case SSA_NAME:
5038 0 : off = op0;
5039 0 : continue;
5040 233300 : CASE_CONVERT:
5041 466584 : if (!POINTER_TYPE_P (TREE_TYPE (op0))
5042 466584 : && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
5043 : break;
5044 :
5045 : /* Don't include the conversion if the target is happy with
5046 : the current offset type. */
5047 233300 : if (use_ifn_p
5048 0 : && TREE_CODE (off) == SSA_NAME
5049 0 : && !POINTER_TYPE_P (TREE_TYPE (off))
5050 233300 : && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
5051 : masked_p, vectype, memory_type,
5052 0 : TREE_TYPE (off),
5053 : scale, &tmp_scale,
5054 : &ifn,
5055 : &offset_vectype,
5056 : &tmp_offset_vectype,
5057 : elsvals))
5058 : break;
5059 :
5060 233300 : if (TYPE_PRECISION (TREE_TYPE (op0))
5061 233300 : == TYPE_PRECISION (TREE_TYPE (off)))
5062 : {
5063 93161 : off = op0;
5064 93161 : continue;
5065 : }
5066 :
5067 : /* Include the conversion if it is widening and we're using
5068 : the IFN path or the target can handle the converted from
5069 : offset or the current size is not already the same as the
5070 : data vector element size. */
5071 140139 : if ((TYPE_PRECISION (TREE_TYPE (op0))
5072 140139 : < TYPE_PRECISION (TREE_TYPE (off)))
5073 140139 : && (use_ifn_p
5074 138911 : || (DR_IS_READ (dr)
5075 83689 : ? (targetm.vectorize.builtin_gather
5076 83689 : && targetm.vectorize.builtin_gather (vectype,
5077 83689 : TREE_TYPE (op0),
5078 : scale))
5079 55222 : : (targetm.vectorize.builtin_scatter
5080 55222 : && targetm.vectorize.builtin_scatter (vectype,
5081 55222 : TREE_TYPE (op0),
5082 : scale)))
5083 138223 : || !operand_equal_p (TYPE_SIZE (TREE_TYPE (off)),
5084 138223 : TYPE_SIZE (TREE_TYPE (vectype)), 0)))
5085 : {
5086 133795 : off = op0;
5087 133795 : offtype = TREE_TYPE (off);
5088 133795 : STRIP_NOPS (off);
5089 133795 : continue;
5090 : }
5091 : break;
5092 : default:
5093 : break;
5094 0 : }
5095 : break;
5096 : }
5097 :
5098 : /* If at the end OFF still isn't a SSA_NAME or isn't
5099 : defined in the loop, punt. */
5100 371334 : if (TREE_CODE (off) != SSA_NAME
5101 371334 : || expr_invariant_in_loop_p (loop, off))
5102 6801 : return false;
5103 :
5104 364533 : if (offtype == NULL_TREE)
5105 231092 : offtype = TREE_TYPE (off);
5106 :
5107 364533 : if (use_ifn_p)
5108 : {
5109 0 : if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
5110 : vectype, memory_type, offtype,
5111 : scale, &tmp_scale,
5112 : &ifn, &offset_vectype,
5113 : &tmp_offset_vectype,
5114 : elsvals))
5115 0 : ifn = IFN_LAST;
5116 : decl = NULL_TREE;
5117 : }
5118 : else
5119 : {
5120 364533 : if (DR_IS_READ (dr))
5121 : {
5122 269303 : if (targetm.vectorize.builtin_gather)
5123 269303 : decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
5124 : }
5125 : else
5126 : {
5127 95230 : if (targetm.vectorize.builtin_scatter)
5128 95230 : decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
5129 : }
5130 364533 : ifn = IFN_LAST;
5131 : /* The offset vector type will be read from DECL when needed. */
5132 364533 : offset_vectype = NULL_TREE;
5133 : }
5134 :
5135 364533 : gcc_checking_assert (expr_invariant_in_loop_p (loop, base));
5136 364533 : gcc_checking_assert (!expr_invariant_in_loop_p (loop, off));
5137 :
5138 364533 : info->ifn = ifn;
5139 364533 : info->decl = decl;
5140 364533 : info->base = base;
5141 :
5142 729066 : info->alias_ptr = build_int_cst
5143 364533 : (reference_alias_ptr_type (DR_REF (dr)),
5144 364533 : get_object_alignment (DR_REF (dr)));
5145 :
5146 364533 : info->offset = off;
5147 364533 : info->offset_vectype = offset_vectype;
5148 364533 : info->scale = scale;
5149 364533 : info->element_type = TREE_TYPE (vectype);
5150 364533 : info->memory_type = memory_type;
5151 364533 : return true;
5152 : }
5153 :
5154 : /* Find the data references in STMT, analyze them with respect to LOOP and
5155 : append them to DATAREFS. Return false if datarefs in this stmt cannot
5156 : be handled. */
5157 :
5158 : opt_result
5159 32206938 : vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
5160 : vec<data_reference_p> *datarefs,
5161 : vec<int> *dataref_groups, int group_id)
5162 : {
5163 : /* We can ignore clobbers for dataref analysis - they are removed during
5164 : loop vectorization and BB vectorization checks dependences with a
5165 : stmt walk. */
5166 32206938 : if (gimple_clobber_p (stmt))
5167 1084381 : return opt_result::success ();
5168 :
5169 57867774 : if (gimple_has_volatile_ops (stmt))
5170 320066 : return opt_result::failure_at (stmt, "not vectorized: volatile type: %G",
5171 : stmt);
5172 :
5173 30802491 : if (stmt_can_throw_internal (cfun, stmt))
5174 688242 : return opt_result::failure_at (stmt,
5175 : "not vectorized:"
5176 : " statement can throw an exception: %G",
5177 : stmt);
5178 :
5179 30114249 : auto_vec<data_reference_p, 2> refs;
5180 30114249 : opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
5181 30114249 : if (!res)
5182 3654391 : return res;
5183 :
5184 26459858 : if (refs.is_empty ())
5185 15220927 : return opt_result::success ();
5186 :
5187 11238931 : if (refs.length () > 1)
5188 : {
5189 1248832 : while (!refs.is_empty ())
5190 832857 : free_data_ref (refs.pop ());
5191 415975 : return opt_result::failure_at (stmt,
5192 : "not vectorized: more than one "
5193 : "data ref in stmt: %G", stmt);
5194 : }
5195 :
5196 10822956 : data_reference_p dr = refs.pop ();
5197 10822956 : if (gcall *call = dyn_cast <gcall *> (stmt))
5198 19049 : if (!gimple_call_internal_p (call)
5199 19049 : || (gimple_call_internal_fn (call) != IFN_MASK_LOAD
5200 15969 : && gimple_call_internal_fn (call) != IFN_MASK_STORE))
5201 : {
5202 15475 : free_data_ref (dr);
5203 15475 : return opt_result::failure_at (stmt,
5204 : "not vectorized: dr in a call %G", stmt);
5205 : }
5206 :
5207 10807481 : if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
5208 10807481 : && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
5209 : {
5210 64281 : free_data_ref (dr);
5211 64281 : return opt_result::failure_at (stmt,
5212 : "not vectorized:"
5213 : " statement is an unsupported"
5214 : " bitfield access %G", stmt);
5215 : }
5216 :
5217 10743200 : if (DR_BASE_ADDRESS (dr)
5218 10651556 : && TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
5219 : {
5220 972 : free_data_ref (dr);
5221 972 : return opt_result::failure_at (stmt,
5222 : "not vectorized:"
5223 : " base addr of dr is a constant\n");
5224 : }
5225 :
5226 : /* Check whether this may be a SIMD lane access and adjust the
5227 : DR to make it easier for us to handle it. */
5228 10742228 : if (loop
5229 601834 : && loop->simduid
5230 10711 : && (!DR_BASE_ADDRESS (dr)
5231 2960 : || !DR_OFFSET (dr)
5232 2960 : || !DR_INIT (dr)
5233 2960 : || !DR_STEP (dr)))
5234 : {
5235 7751 : struct data_reference *newdr
5236 7751 : = create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
5237 7751 : DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
5238 7751 : if (DR_BASE_ADDRESS (newdr)
5239 7751 : && DR_OFFSET (newdr)
5240 7751 : && DR_INIT (newdr)
5241 7751 : && DR_STEP (newdr)
5242 7751 : && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
5243 15502 : && integer_zerop (DR_STEP (newdr)))
5244 : {
5245 7751 : tree base_address = DR_BASE_ADDRESS (newdr);
5246 7751 : tree off = DR_OFFSET (newdr);
5247 7751 : tree step = ssize_int (1);
5248 7751 : if (integer_zerop (off)
5249 7751 : && TREE_CODE (base_address) == POINTER_PLUS_EXPR)
5250 : {
5251 82 : off = TREE_OPERAND (base_address, 1);
5252 82 : base_address = TREE_OPERAND (base_address, 0);
5253 : }
5254 7751 : STRIP_NOPS (off);
5255 7751 : if (TREE_CODE (off) == MULT_EXPR
5256 7751 : && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
5257 : {
5258 7500 : step = TREE_OPERAND (off, 1);
5259 7500 : off = TREE_OPERAND (off, 0);
5260 7500 : STRIP_NOPS (off);
5261 : }
5262 541 : if (CONVERT_EXPR_P (off)
5263 7751 : && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
5264 7210 : < TYPE_PRECISION (TREE_TYPE (off))))
5265 7210 : off = TREE_OPERAND (off, 0);
5266 7751 : if (TREE_CODE (off) == SSA_NAME)
5267 : {
5268 7226 : gimple *def = SSA_NAME_DEF_STMT (off);
5269 : /* Look through widening conversion. */
5270 7226 : if (is_gimple_assign (def)
5271 7226 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
5272 : {
5273 0 : tree rhs1 = gimple_assign_rhs1 (def);
5274 0 : if (TREE_CODE (rhs1) == SSA_NAME
5275 0 : && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
5276 0 : && (TYPE_PRECISION (TREE_TYPE (off))
5277 0 : > TYPE_PRECISION (TREE_TYPE (rhs1))))
5278 0 : def = SSA_NAME_DEF_STMT (rhs1);
5279 : }
5280 7226 : if (is_gimple_call (def)
5281 7090 : && gimple_call_internal_p (def)
5282 14316 : && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
5283 : {
5284 7090 : tree arg = gimple_call_arg (def, 0);
5285 7090 : tree reft = TREE_TYPE (DR_REF (newdr));
5286 7090 : gcc_assert (TREE_CODE (arg) == SSA_NAME);
5287 7090 : arg = SSA_NAME_VAR (arg);
5288 7090 : if (arg == loop->simduid
5289 : /* For now. */
5290 7090 : && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
5291 : {
5292 7065 : DR_BASE_ADDRESS (newdr) = base_address;
5293 7065 : DR_OFFSET (newdr) = ssize_int (0);
5294 7065 : DR_STEP (newdr) = step;
5295 7065 : DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
5296 7065 : DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
5297 : /* Mark as simd-lane access. */
5298 7065 : tree arg2 = gimple_call_arg (def, 1);
5299 7065 : newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
5300 7065 : free_data_ref (dr);
5301 7065 : datarefs->safe_push (newdr);
5302 7065 : if (dataref_groups)
5303 0 : dataref_groups->safe_push (group_id);
5304 7065 : return opt_result::success ();
5305 : }
5306 : }
5307 : }
5308 : }
5309 686 : free_data_ref (newdr);
5310 : }
5311 :
5312 10735163 : datarefs->safe_push (dr);
5313 10735163 : if (dataref_groups)
5314 10140394 : dataref_groups->safe_push (group_id);
5315 10735163 : return opt_result::success ();
5316 30114249 : }
5317 :
5318 : /* Function vect_analyze_data_refs.
5319 :
5320 : Find all the data references in the loop or basic block.
5321 :
5322 : The general structure of the analysis of data refs in the vectorizer is as
5323 : follows:
5324 : 1- vect_analyze_data_refs(loop/bb): call
5325 : compute_data_dependences_for_loop/bb to find and analyze all data-refs
5326 : in the loop/bb and their dependences.
5327 : 2- vect_analyze_dependences(): apply dependence testing using ddrs.
5328 : 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
5329 : 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
5330 :
5331 : */
5332 :
5333 : opt_result
5334 2632494 : vect_analyze_data_refs (vec_info *vinfo, bool *fatal)
5335 : {
5336 2632494 : class loop *loop = NULL;
5337 2632494 : unsigned int i;
5338 2632494 : struct data_reference *dr;
5339 2632494 : tree scalar_type;
5340 :
5341 2632494 : DUMP_VECT_SCOPE ("vect_analyze_data_refs");
5342 :
5343 2632494 : if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5344 434951 : loop = LOOP_VINFO_LOOP (loop_vinfo);
5345 :
5346 : /* Go through the data-refs, check that the analysis succeeded. Update
5347 : pointer from stmt_vec_info struct to DR and vectype. */
5348 :
5349 2632494 : vec<data_reference_p> datarefs = vinfo->shared->datarefs;
5350 17575612 : FOR_EACH_VEC_ELT (datarefs, i, dr)
5351 : {
5352 15001657 : enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
5353 :
5354 15001657 : gcc_assert (DR_REF (dr));
5355 15001657 : stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
5356 15001657 : gcc_assert (!stmt_info->dr_aux.dr);
5357 15001657 : stmt_info->dr_aux.dr = dr;
5358 15001657 : stmt_info->dr_aux.stmt = stmt_info;
5359 :
5360 : /* Check that analysis of the data-ref succeeded. */
5361 15001657 : if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
5362 14878745 : || !DR_STEP (dr))
5363 : {
5364 245824 : bool maybe_gather
5365 122912 : = DR_IS_READ (dr)
5366 122912 : && !TREE_THIS_VOLATILE (DR_REF (dr));
5367 245824 : bool maybe_scatter
5368 : = DR_IS_WRITE (dr)
5369 122912 : && !TREE_THIS_VOLATILE (DR_REF (dr));
5370 :
5371 : /* If target supports vector gather loads or scatter stores,
5372 : see if they can't be used. */
5373 122912 : if (is_a <loop_vec_info> (vinfo)
5374 122912 : && !nested_in_vect_loop_p (loop, stmt_info))
5375 : {
5376 119537 : if (maybe_gather || maybe_scatter)
5377 : {
5378 119537 : if (maybe_gather)
5379 : gatherscatter = GATHER;
5380 : else
5381 25590 : gatherscatter = SCATTER;
5382 : }
5383 : }
5384 :
5385 25590 : if (gatherscatter == SG_NONE)
5386 : {
5387 3375 : if (dump_enabled_p ())
5388 5 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5389 : "not vectorized: data ref analysis "
5390 : "failed %G", stmt_info->stmt);
5391 3375 : if (is_a <bb_vec_info> (vinfo))
5392 : {
5393 : /* In BB vectorization the ref can still participate
5394 : in dependence analysis, we just can't vectorize it. */
5395 3021 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
5396 3021 : continue;
5397 : }
5398 354 : return opt_result::failure_at (stmt_info->stmt,
5399 : "not vectorized:"
5400 : " data ref analysis failed: %G",
5401 : stmt_info->stmt);
5402 : }
5403 : }
5404 :
5405 : /* See if this was detected as SIMD lane access. */
5406 14998282 : if (dr->aux == (void *)-1
5407 14998282 : || dr->aux == (void *)-2
5408 14989380 : || dr->aux == (void *)-3
5409 14988540 : || dr->aux == (void *)-4)
5410 : {
5411 10542 : if (nested_in_vect_loop_p (loop, stmt_info))
5412 0 : return opt_result::failure_at (stmt_info->stmt,
5413 : "not vectorized:"
5414 : " data ref analysis failed: %G",
5415 : stmt_info->stmt);
5416 10542 : STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
5417 10542 : = -(uintptr_t) dr->aux;
5418 : }
5419 :
5420 14998282 : tree base = get_base_address (DR_REF (dr));
5421 14998282 : if (base && VAR_P (base) && DECL_NONALIASED (base))
5422 : {
5423 8725 : if (dump_enabled_p ())
5424 186 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5425 : "not vectorized: base object not addressable "
5426 : "for stmt: %G", stmt_info->stmt);
5427 8725 : if (is_a <bb_vec_info> (vinfo))
5428 : {
5429 : /* In BB vectorization the ref can still participate
5430 : in dependence analysis, we just can't vectorize it. */
5431 8725 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
5432 8725 : continue;
5433 : }
5434 0 : return opt_result::failure_at (stmt_info->stmt,
5435 : "not vectorized: base object not"
5436 : " addressable for stmt: %G",
5437 : stmt_info->stmt);
5438 : }
5439 :
5440 14989557 : if (is_a <loop_vec_info> (vinfo)
5441 981377 : && DR_STEP (dr)
5442 15851397 : && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
5443 : {
5444 41630 : if (nested_in_vect_loop_p (loop, stmt_info))
5445 372 : return opt_result::failure_at (stmt_info->stmt,
5446 : "not vectorized: "
5447 : "not suitable for strided load %G",
5448 : stmt_info->stmt);
5449 41258 : STMT_VINFO_STRIDED_P (stmt_info) = true;
5450 : }
5451 :
5452 : /* Update DR field in stmt_vec_info struct. */
5453 :
5454 : /* If the dataref is in an inner-loop of the loop that is considered for
5455 : for vectorization, we also want to analyze the access relative to
5456 : the outer-loop (DR contains information only relative to the
5457 : inner-most enclosing loop). We do that by building a reference to the
5458 : first location accessed by the inner-loop, and analyze it relative to
5459 : the outer-loop. */
5460 14989185 : if (loop && nested_in_vect_loop_p (loop, stmt_info))
5461 : {
5462 : /* Build a reference to the first location accessed by the
5463 : inner loop: *(BASE + INIT + OFFSET). By construction,
5464 : this address must be invariant in the inner loop, so we
5465 : can consider it as being used in the outer loop. */
5466 11721 : tree base = unshare_expr (DR_BASE_ADDRESS (dr));
5467 11721 : tree offset = unshare_expr (DR_OFFSET (dr));
5468 11721 : tree init = unshare_expr (DR_INIT (dr));
5469 11721 : tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
5470 : init, offset);
5471 11721 : tree init_addr = fold_build_pointer_plus (base, init_offset);
5472 11721 : tree init_ref = build_fold_indirect_ref (init_addr);
5473 :
5474 11721 : if (dump_enabled_p ())
5475 1221 : dump_printf_loc (MSG_NOTE, vect_location,
5476 : "analyze in outer loop: %T\n", init_ref);
5477 :
5478 11721 : opt_result res
5479 11721 : = dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
5480 11721 : init_ref, loop, stmt_info->stmt);
5481 11721 : if (!res)
5482 : /* dr_analyze_innermost already explained the failure. */
5483 161 : return res;
5484 :
5485 11560 : if (dump_enabled_p ())
5486 1217 : dump_printf_loc (MSG_NOTE, vect_location,
5487 : "\touter base_address: %T\n"
5488 : "\touter offset from base address: %T\n"
5489 : "\touter constant offset from base address: %T\n"
5490 : "\touter step: %T\n"
5491 : "\touter base alignment: %d\n\n"
5492 : "\touter base misalignment: %d\n"
5493 : "\touter offset alignment: %d\n"
5494 : "\touter step alignment: %d\n",
5495 : STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
5496 : STMT_VINFO_DR_OFFSET (stmt_info),
5497 : STMT_VINFO_DR_INIT (stmt_info),
5498 : STMT_VINFO_DR_STEP (stmt_info),
5499 : STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
5500 : STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
5501 : STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
5502 : STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
5503 : }
5504 :
5505 : /* Set vectype for STMT. */
5506 14989024 : scalar_type = TREE_TYPE (DR_REF (dr));
5507 14989024 : tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5508 14989024 : if (!vectype)
5509 : {
5510 1786282 : if (dump_enabled_p ())
5511 : {
5512 1847 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5513 : "not vectorized: no vectype for stmt: %G",
5514 : stmt_info->stmt);
5515 1847 : dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
5516 1847 : dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
5517 : scalar_type);
5518 1847 : dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5519 : }
5520 :
5521 1786282 : if (is_a <bb_vec_info> (vinfo))
5522 : {
5523 : /* No vector type is fine, the ref can still participate
5524 : in dependence analysis, we just can't vectorize it. */
5525 1736788 : STMT_VINFO_VECTORIZABLE (stmt_info) = false;
5526 1736788 : continue;
5527 : }
5528 49494 : if (fatal)
5529 49494 : *fatal = false;
5530 49494 : return opt_result::failure_at (stmt_info->stmt,
5531 : "not vectorized:"
5532 : " no vectype for stmt: %G"
5533 : " scalar_type: %T\n",
5534 : stmt_info->stmt, scalar_type);
5535 : }
5536 : else
5537 : {
5538 13202742 : if (dump_enabled_p ())
5539 80640 : dump_printf_loc (MSG_NOTE, vect_location,
5540 : "got vectype for stmt: %G%T\n",
5541 : stmt_info->stmt, vectype);
5542 : }
5543 :
5544 : /* Leave the BB vectorizer to pick the vector type later, based on
5545 : the final dataref group size and SLP node size. */
5546 13202742 : if (is_a <loop_vec_info> (vinfo))
5547 931350 : STMT_VINFO_VECTYPE (stmt_info) = vectype;
5548 :
5549 13202742 : if (gatherscatter != SG_NONE)
5550 : {
5551 113910 : gather_scatter_info gs_info;
5552 113910 : if (!vect_check_gather_scatter (stmt_info, vectype,
5553 : as_a <loop_vec_info> (vinfo),
5554 : &gs_info)
5555 223355 : || !get_vectype_for_scalar_type (vinfo,
5556 109445 : TREE_TYPE (gs_info.offset)))
5557 : {
5558 8158 : if (fatal)
5559 8158 : *fatal = false;
5560 8158 : return opt_result::failure_at
5561 8507 : (stmt_info->stmt,
5562 : (gatherscatter == GATHER)
5563 : ? "not vectorized: not suitable for gather load %G"
5564 : : "not vectorized: not suitable for scatter store %G",
5565 : stmt_info->stmt);
5566 : }
5567 105752 : STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
5568 : }
5569 : }
5570 :
5571 : /* We used to stop processing and prune the list here. Verify we no
5572 : longer need to. */
5573 4089659 : gcc_assert (i == datarefs.length ());
5574 :
5575 2573955 : return opt_result::success ();
5576 : }
5577 :
5578 :
5579 : /* Function vect_get_new_vect_var.
5580 :
5581 : Returns a name for a new variable. The current naming scheme appends the
5582 : prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
5583 : the name of vectorizer generated variables, and appends that to NAME if
5584 : provided. */
5585 :
5586 : tree
5587 1921683 : vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
5588 : {
5589 1921683 : const char *prefix;
5590 1921683 : tree new_vect_var;
5591 :
5592 1921683 : switch (var_kind)
5593 : {
5594 : case vect_simple_var:
5595 : prefix = "vect";
5596 : break;
5597 23045 : case vect_scalar_var:
5598 23045 : prefix = "stmp";
5599 23045 : break;
5600 19902 : case vect_mask_var:
5601 19902 : prefix = "mask";
5602 19902 : break;
5603 1378708 : case vect_pointer_var:
5604 1378708 : prefix = "vectp";
5605 1378708 : break;
5606 0 : default:
5607 0 : gcc_unreachable ();
5608 : }
5609 :
5610 1921683 : if (name)
5611 : {
5612 1084897 : char* tmp = concat (prefix, "_", name, NULL);
5613 1084897 : new_vect_var = create_tmp_reg (type, tmp);
5614 1084897 : free (tmp);
5615 : }
5616 : else
5617 836786 : new_vect_var = create_tmp_reg (type, prefix);
5618 :
5619 1921683 : return new_vect_var;
5620 : }
5621 :
5622 : /* Like vect_get_new_vect_var but return an SSA name. */
5623 :
5624 : tree
5625 6869 : vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
5626 : {
5627 6869 : const char *prefix;
5628 6869 : tree new_vect_var;
5629 :
5630 6869 : switch (var_kind)
5631 : {
5632 : case vect_simple_var:
5633 : prefix = "vect";
5634 : break;
5635 312 : case vect_scalar_var:
5636 312 : prefix = "stmp";
5637 312 : break;
5638 0 : case vect_pointer_var:
5639 0 : prefix = "vectp";
5640 0 : break;
5641 0 : default:
5642 0 : gcc_unreachable ();
5643 : }
5644 :
5645 6869 : if (name)
5646 : {
5647 6392 : char* tmp = concat (prefix, "_", name, NULL);
5648 6392 : new_vect_var = make_temp_ssa_name (type, NULL, tmp);
5649 6392 : free (tmp);
5650 : }
5651 : else
5652 477 : new_vect_var = make_temp_ssa_name (type, NULL, prefix);
5653 :
5654 6869 : return new_vect_var;
5655 : }
5656 :
5657 : /* Duplicate points-to info on NAME from DR_INFO. */
5658 :
5659 : static void
5660 286516 : vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
5661 : {
5662 286516 : duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
5663 : /* DR_PTR_INFO is for a base SSA name, not including constant or
5664 : variable offsets in the ref so its alignment info does not apply. */
5665 286516 : mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
5666 286516 : }
5667 :
5668 : /* Function vect_create_addr_base_for_vector_ref.
5669 :
5670 : Create an expression that computes the address of the first memory location
5671 : that will be accessed for a data reference.
5672 :
5673 : Input:
5674 : STMT_INFO: The statement containing the data reference.
5675 : NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
5676 : OFFSET: Optional. If supplied, it is be added to the initial address.
5677 : LOOP: Specify relative to which loop-nest should the address be computed.
5678 : For example, when the dataref is in an inner-loop nested in an
5679 : outer-loop that is now being vectorized, LOOP can be either the
5680 : outer-loop, or the inner-loop. The first memory location accessed
5681 : by the following dataref ('in' points to short):
5682 :
5683 : for (i=0; i<N; i++)
5684 : for (j=0; j<M; j++)
5685 : s += in[i+j]
5686 :
5687 : is as follows:
5688 : if LOOP=i_loop: &in (relative to i_loop)
5689 : if LOOP=j_loop: &in+i*2B (relative to j_loop)
5690 :
5691 : Output:
5692 : 1. Return an SSA_NAME whose value is the address of the memory location of
5693 : the first vector of the data reference.
5694 : 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
5695 : these statement(s) which define the returned SSA_NAME.
5696 :
5697 : FORNOW: We are only handling array accesses with step 1. */
5698 :
5699 : tree
5700 689490 : vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
5701 : gimple_seq *new_stmt_list,
5702 : tree offset)
5703 : {
5704 689490 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5705 689490 : struct data_reference *dr = dr_info->dr;
5706 689490 : const char *base_name;
5707 689490 : tree addr_base;
5708 689490 : tree dest;
5709 689490 : gimple_seq seq = NULL;
5710 689490 : tree vect_ptr_type;
5711 689490 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5712 689490 : innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
5713 :
5714 689490 : tree data_ref_base = unshare_expr (drb->base_address);
5715 689490 : tree base_offset = unshare_expr (get_dr_vinfo_offset (vinfo, dr_info, true));
5716 689490 : tree init = unshare_expr (drb->init);
5717 :
5718 689490 : if (loop_vinfo)
5719 128100 : base_name = get_name (data_ref_base);
5720 : else
5721 : {
5722 561390 : base_offset = ssize_int (0);
5723 561390 : init = ssize_int (0);
5724 561390 : base_name = get_name (DR_REF (dr));
5725 : }
5726 :
5727 : /* Create base_offset */
5728 689490 : base_offset = size_binop (PLUS_EXPR,
5729 : fold_convert (sizetype, base_offset),
5730 : fold_convert (sizetype, init));
5731 :
5732 689490 : if (offset)
5733 : {
5734 3100 : offset = fold_convert (sizetype, offset);
5735 3100 : base_offset = fold_build2 (PLUS_EXPR, sizetype,
5736 : base_offset, offset);
5737 : }
5738 :
5739 : /* base + base_offset */
5740 689490 : if (loop_vinfo)
5741 128100 : addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
5742 : else
5743 1122780 : addr_base = build1 (ADDR_EXPR,
5744 561390 : build_pointer_type (TREE_TYPE (DR_REF (dr))),
5745 : /* Strip zero offset components since we don't need
5746 : them and they can confuse late diagnostics if
5747 : we CSE them wrongly. See PR106904 for example. */
5748 : unshare_expr (strip_zero_offset_components
5749 : (DR_REF (dr))));
5750 :
5751 689490 : vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
5752 689490 : dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
5753 689490 : addr_base = force_gimple_operand (addr_base, &seq, true, dest);
5754 689490 : gimple_seq_add_seq (new_stmt_list, seq);
5755 :
5756 689490 : if (DR_PTR_INFO (dr)
5757 178028 : && TREE_CODE (addr_base) == SSA_NAME
5758 : /* We should only duplicate pointer info to newly created SSA names. */
5759 867095 : && SSA_NAME_VAR (addr_base) == dest)
5760 : {
5761 148728 : gcc_assert (!SSA_NAME_PTR_INFO (addr_base));
5762 148728 : vect_duplicate_ssa_name_ptr_info (addr_base, dr_info);
5763 : }
5764 :
5765 689490 : if (dump_enabled_p ())
5766 25197 : dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
5767 :
5768 689490 : return addr_base;
5769 : }
5770 :
5771 :
5772 : /* Function vect_create_data_ref_ptr.
5773 :
5774 : Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
5775 : location accessed in the loop by STMT_INFO, along with the def-use update
5776 : chain to appropriately advance the pointer through the loop iterations.
5777 : Also set aliasing information for the pointer. This pointer is used by
5778 : the callers to this function to create a memory reference expression for
5779 : vector load/store access.
5780 :
5781 : Input:
5782 : 1. STMT_INFO: a stmt that references memory. Expected to be of the form
5783 : GIMPLE_ASSIGN <name, data-ref> or
5784 : GIMPLE_ASSIGN <data-ref, name>.
5785 : 2. AGGR_TYPE: the type of the reference, which should be either a vector
5786 : or an array.
5787 : 3. AT_LOOP: the loop where the vector memref is to be created.
5788 : 4. OFFSET (optional): a byte offset to be added to the initial address
5789 : accessed by the data-ref in STMT_INFO.
5790 : 5. BSI: location where the new stmts are to be placed if there is no loop
5791 : 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
5792 : pointing to the initial address.
5793 : 8. IV_STEP (optional, defaults to NULL): the amount that should be added
5794 : to the IV during each iteration of the loop. NULL says to move
5795 : by one copy of AGGR_TYPE up or down, depending on the step of the
5796 : data reference.
5797 :
5798 : Output:
5799 : 1. Declare a new ptr to vector_type, and have it point to the base of the
5800 : data reference (initial addressed accessed by the data reference).
5801 : For example, for vector of type V8HI, the following code is generated:
5802 :
5803 : v8hi *ap;
5804 : ap = (v8hi *)initial_address;
5805 :
5806 : if OFFSET is not supplied:
5807 : initial_address = &a[init];
5808 : if OFFSET is supplied:
5809 : initial_address = &a[init] + OFFSET;
5810 : if BYTE_OFFSET is supplied:
5811 : initial_address = &a[init] + BYTE_OFFSET;
5812 :
5813 : Return the initial_address in INITIAL_ADDRESS.
5814 :
5815 : 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
5816 : update the pointer in each iteration of the loop.
5817 :
5818 : Return the increment stmt that updates the pointer in PTR_INCR.
5819 :
5820 : 3. Return the pointer. */
5821 :
5822 : tree
5823 689218 : vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
5824 : tree aggr_type, class loop *at_loop, tree offset,
5825 : tree *initial_address, gimple_stmt_iterator *gsi,
5826 : gimple **ptr_incr, bool only_init,
5827 : tree iv_step)
5828 : {
5829 689218 : const char *base_name;
5830 689218 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5831 689218 : class loop *loop = NULL;
5832 689218 : bool nested_in_vect_loop = false;
5833 689218 : class loop *containing_loop = NULL;
5834 689218 : tree aggr_ptr_type;
5835 689218 : tree aggr_ptr;
5836 689218 : tree new_temp;
5837 689218 : gimple_seq new_stmt_list = NULL;
5838 689218 : edge pe = NULL;
5839 689218 : basic_block new_bb;
5840 689218 : tree aggr_ptr_init;
5841 689218 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5842 689218 : struct data_reference *dr = dr_info->dr;
5843 689218 : tree aptr;
5844 689218 : gimple_stmt_iterator incr_gsi;
5845 689218 : bool insert_after;
5846 689218 : tree indx_before_incr, indx_after_incr;
5847 689218 : gimple *incr;
5848 689218 : bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5849 :
5850 689218 : gcc_assert (iv_step != NULL_TREE
5851 : || TREE_CODE (aggr_type) == ARRAY_TYPE
5852 : || TREE_CODE (aggr_type) == VECTOR_TYPE);
5853 :
5854 689218 : if (loop_vinfo)
5855 : {
5856 127828 : loop = LOOP_VINFO_LOOP (loop_vinfo);
5857 127828 : nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
5858 127828 : containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
5859 127828 : pe = loop_preheader_edge (loop);
5860 : }
5861 : else
5862 : {
5863 561390 : gcc_assert (bb_vinfo);
5864 561390 : only_init = true;
5865 561390 : *ptr_incr = NULL;
5866 : }
5867 :
5868 : /* Create an expression for the first address accessed by this load
5869 : in LOOP. */
5870 689218 : base_name = get_name (DR_BASE_ADDRESS (dr));
5871 :
5872 689218 : if (dump_enabled_p ())
5873 : {
5874 25111 : tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
5875 25111 : dump_printf_loc (MSG_NOTE, vect_location,
5876 : "create %s-pointer variable to type: %T",
5877 25111 : get_tree_code_name (TREE_CODE (aggr_type)),
5878 : aggr_type);
5879 25111 : if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
5880 13461 : dump_printf (MSG_NOTE, " vectorizing an array ref: ");
5881 11650 : else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
5882 0 : dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
5883 11650 : else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
5884 1638 : dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
5885 : else
5886 10012 : dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
5887 25111 : dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
5888 : }
5889 :
5890 : /* (1) Create the new aggregate-pointer variable.
5891 : Vector and array types inherit the alias set of their component
5892 : type by default so we need to use a ref-all pointer if the data
5893 : reference does not conflict with the created aggregated data
5894 : reference because it is not addressable. */
5895 689218 : bool need_ref_all = false;
5896 689218 : if (!alias_sets_conflict_p (get_alias_set (aggr_type),
5897 : get_alias_set (DR_REF (dr))))
5898 : need_ref_all = true;
5899 : /* Likewise for any of the data references in the stmt group. */
5900 587867 : else if (DR_GROUP_SIZE (stmt_info) > 1)
5901 : {
5902 474980 : stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
5903 1323784 : do
5904 : {
5905 1323784 : struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
5906 1323784 : if (!alias_sets_conflict_p (get_alias_set (aggr_type),
5907 : get_alias_set (DR_REF (sdr))))
5908 : {
5909 : need_ref_all = true;
5910 : break;
5911 : }
5912 1322746 : sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
5913 : }
5914 1322746 : while (sinfo);
5915 : }
5916 689218 : aggr_ptr_type = build_pointer_type_for_mode (aggr_type, VOIDmode,
5917 : need_ref_all);
5918 689218 : aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
5919 :
5920 :
5921 : /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
5922 : vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
5923 : def-use update cycles for the pointer: one relative to the outer-loop
5924 : (LOOP), which is what steps (3) and (4) below do. The other is relative
5925 : to the inner-loop (which is the inner-most loop containing the dataref),
5926 : and this is done be step (5) below.
5927 :
5928 : When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
5929 : inner-most loop, and so steps (3),(4) work the same, and step (5) is
5930 : redundant. Steps (3),(4) create the following:
5931 :
5932 : vp0 = &base_addr;
5933 : LOOP: vp1 = phi(vp0,vp2)
5934 : ...
5935 : ...
5936 : vp2 = vp1 + step
5937 : goto LOOP
5938 :
5939 : If there is an inner-loop nested in loop, then step (5) will also be
5940 : applied, and an additional update in the inner-loop will be created:
5941 :
5942 : vp0 = &base_addr;
5943 : LOOP: vp1 = phi(vp0,vp2)
5944 : ...
5945 : inner: vp3 = phi(vp1,vp4)
5946 : vp4 = vp3 + inner_step
5947 : if () goto inner
5948 : ...
5949 : vp2 = vp1 + step
5950 : if () goto LOOP */
5951 :
5952 : /* (2) Calculate the initial address of the aggregate-pointer, and set
5953 : the aggregate-pointer to point to it before the loop. */
5954 :
5955 : /* Create: (&(base[init_val]+offset) in the loop preheader. */
5956 :
5957 689218 : new_temp = vect_create_addr_base_for_vector_ref (vinfo,
5958 : stmt_info, &new_stmt_list,
5959 : offset);
5960 689218 : if (new_stmt_list)
5961 : {
5962 171324 : if (pe)
5963 : {
5964 54880 : new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
5965 54880 : gcc_assert (!new_bb);
5966 : }
5967 : else
5968 116444 : gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
5969 : }
5970 :
5971 689218 : *initial_address = new_temp;
5972 689218 : aggr_ptr_init = new_temp;
5973 :
5974 : /* (3) Handle the updating of the aggregate-pointer inside the loop.
5975 : This is needed when ONLY_INIT is false, and also when AT_LOOP is the
5976 : inner-loop nested in LOOP (during outer-loop vectorization). */
5977 :
5978 : /* No update in loop is required. */
5979 689218 : if (only_init && (!loop_vinfo || at_loop == loop))
5980 : aptr = aggr_ptr_init;
5981 : else
5982 : {
5983 : /* Accesses to invariant addresses should be handled specially
5984 : by the caller. */
5985 127820 : tree step = vect_dr_behavior (vinfo, dr_info)->step;
5986 127820 : gcc_assert (!integer_zerop (step));
5987 :
5988 127820 : if (iv_step == NULL_TREE)
5989 : {
5990 : /* The step of the aggregate pointer is the type size,
5991 : negated for downward accesses. */
5992 0 : iv_step = TYPE_SIZE_UNIT (aggr_type);
5993 0 : if (tree_int_cst_sgn (step) == -1)
5994 0 : iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
5995 : }
5996 :
5997 127820 : standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5998 :
5999 127820 : create_iv (aggr_ptr_init, PLUS_EXPR,
6000 : iv_step, aggr_ptr, loop, &incr_gsi, insert_after,
6001 : &indx_before_incr, &indx_after_incr);
6002 127820 : incr = gsi_stmt (incr_gsi);
6003 :
6004 : /* Copy the points-to information if it exists. */
6005 127820 : if (DR_PTR_INFO (dr))
6006 : {
6007 68819 : vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
6008 68819 : vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
6009 : }
6010 127820 : if (ptr_incr)
6011 127820 : *ptr_incr = incr;
6012 :
6013 127820 : aptr = indx_before_incr;
6014 : }
6015 :
6016 689218 : if (!nested_in_vect_loop || only_init)
6017 : return aptr;
6018 :
6019 :
6020 : /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
6021 : nested in LOOP, if exists. */
6022 :
6023 336 : gcc_assert (nested_in_vect_loop);
6024 336 : if (!only_init)
6025 : {
6026 336 : standard_iv_increment_position (containing_loop, &incr_gsi,
6027 : &insert_after);
6028 336 : create_iv (aptr, PLUS_EXPR, DR_STEP (dr),
6029 : aggr_ptr, containing_loop, &incr_gsi, insert_after,
6030 : &indx_before_incr, &indx_after_incr);
6031 336 : incr = gsi_stmt (incr_gsi);
6032 :
6033 : /* Copy the points-to information if it exists. */
6034 336 : if (DR_PTR_INFO (dr))
6035 : {
6036 75 : vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
6037 75 : vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
6038 : }
6039 336 : if (ptr_incr)
6040 336 : *ptr_incr = incr;
6041 :
6042 336 : return indx_before_incr;
6043 : }
6044 : else
6045 : gcc_unreachable ();
6046 : }
6047 :
6048 :
6049 : /* Function bump_vector_ptr
6050 :
6051 : Increment a pointer (to a vector type) by vector-size. If requested,
6052 : i.e. if PTR-INCR is given, then also connect the new increment stmt
6053 : to the existing def-use update-chain of the pointer, by modifying
6054 : the PTR_INCR as illustrated below:
6055 :
6056 : The pointer def-use update-chain before this function:
6057 : DATAREF_PTR = phi (p_0, p_2)
6058 : ....
6059 : PTR_INCR: p_2 = DATAREF_PTR + step
6060 :
6061 : The pointer def-use update-chain after this function:
6062 : DATAREF_PTR = phi (p_0, p_2)
6063 : ....
6064 : NEW_DATAREF_PTR = DATAREF_PTR + BUMP
6065 : ....
6066 : PTR_INCR: p_2 = NEW_DATAREF_PTR + step
6067 :
6068 : Input:
6069 : DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
6070 : in the loop.
6071 : PTR_INCR - optional. The stmt that updates the pointer in each iteration of
6072 : the loop. The increment amount across iterations is expected
6073 : to be vector_size.
6074 : BSI - location where the new update stmt is to be placed.
6075 : STMT_INFO - the original scalar memory-access stmt that is being vectorized.
6076 : UPDATE - The offset by which to bump the pointer.
6077 :
6078 : Output: Return NEW_DATAREF_PTR as illustrated above.
6079 :
6080 : */
6081 :
6082 : tree
6083 234644 : bump_vector_ptr (vec_info *vinfo,
6084 : tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
6085 : stmt_vec_info stmt_info, tree update)
6086 : {
6087 234644 : struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6088 234644 : gimple *incr_stmt;
6089 234644 : ssa_op_iter iter;
6090 234644 : use_operand_p use_p;
6091 234644 : tree new_dataref_ptr;
6092 :
6093 234644 : if (TREE_CODE (dataref_ptr) == SSA_NAME)
6094 108678 : new_dataref_ptr = copy_ssa_name (dataref_ptr);
6095 125966 : else if (is_gimple_min_invariant (dataref_ptr))
6096 : /* When possible avoid emitting a separate increment stmt that will
6097 : force the addressed object addressable. */
6098 251932 : return build1 (ADDR_EXPR, TREE_TYPE (dataref_ptr),
6099 125966 : fold_build2 (MEM_REF,
6100 : TREE_TYPE (TREE_TYPE (dataref_ptr)),
6101 : dataref_ptr,
6102 125966 : fold_convert (ptr_type_node, update)));
6103 : else
6104 0 : new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
6105 108678 : incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
6106 : dataref_ptr, update);
6107 108678 : vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
6108 : /* Fold the increment, avoiding excessive chains use-def chains of
6109 : those, leading to compile-time issues for passes until the next
6110 : forwprop pass which would do this as well. */
6111 108678 : gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
6112 108678 : if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
6113 : {
6114 69548 : incr_stmt = gsi_stmt (fold_gsi);
6115 69548 : update_stmt (incr_stmt);
6116 : }
6117 :
6118 : /* Copy the points-to information if it exists. */
6119 108678 : if (DR_PTR_INFO (dr))
6120 : {
6121 69974 : duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
6122 69974 : mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
6123 : }
6124 :
6125 108678 : if (!ptr_incr)
6126 : return new_dataref_ptr;
6127 :
6128 : /* Update the vector-pointer's cross-iteration increment. */
6129 112826 : FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
6130 : {
6131 56413 : tree use = USE_FROM_PTR (use_p);
6132 :
6133 56413 : if (use == dataref_ptr)
6134 56413 : SET_USE (use_p, new_dataref_ptr);
6135 : else
6136 0 : gcc_assert (operand_equal_p (use, update, 0));
6137 : }
6138 :
6139 : return new_dataref_ptr;
6140 : }
6141 :
6142 :
6143 : /* Copy memory reference info such as base/clique from the SRC reference
6144 : to the DEST MEM_REF. */
6145 :
6146 : void
6147 936972 : vect_copy_ref_info (tree dest, tree src)
6148 : {
6149 936972 : if (TREE_CODE (dest) != MEM_REF)
6150 : return;
6151 :
6152 : tree src_base = src;
6153 1887068 : while (handled_component_p (src_base))
6154 954767 : src_base = TREE_OPERAND (src_base, 0);
6155 932301 : if (TREE_CODE (src_base) != MEM_REF
6156 932301 : && TREE_CODE (src_base) != TARGET_MEM_REF)
6157 : return;
6158 :
6159 505323 : MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
6160 505323 : MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
6161 : }
6162 :
6163 :
6164 : /* Function vect_create_destination_var.
6165 :
6166 : Create a new temporary of type VECTYPE. */
6167 :
6168 : tree
6169 526191 : vect_create_destination_var (tree scalar_dest, tree vectype)
6170 : {
6171 526191 : tree vec_dest;
6172 526191 : const char *name;
6173 526191 : char *new_name;
6174 526191 : tree type;
6175 526191 : enum vect_var_kind kind;
6176 :
6177 526191 : kind = vectype
6178 1029337 : ? VECTOR_BOOLEAN_TYPE_P (vectype)
6179 503146 : ? vect_mask_var
6180 : : vect_simple_var
6181 : : vect_scalar_var;
6182 23045 : type = vectype ? vectype : TREE_TYPE (scalar_dest);
6183 :
6184 526191 : gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
6185 :
6186 526191 : name = get_name (scalar_dest);
6187 526191 : if (name)
6188 184644 : new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
6189 : else
6190 341547 : new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
6191 526191 : vec_dest = vect_get_new_vect_var (type, kind, new_name);
6192 526191 : free (new_name);
6193 :
6194 526191 : return vec_dest;
6195 : }
6196 :
6197 : /* Function vect_grouped_store_supported.
6198 :
6199 : Returns TRUE if interleave high and interleave low permutations
6200 : are supported, and FALSE otherwise. */
6201 :
6202 : bool
6203 2507 : vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
6204 : {
6205 2507 : machine_mode mode = TYPE_MODE (vectype);
6206 :
6207 : /* vect_permute_store_chain requires the group size to be equal to 3 or
6208 : be a power of two. */
6209 2507 : if (count != 3 && exact_log2 (count) == -1)
6210 : {
6211 552 : if (dump_enabled_p ())
6212 11 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6213 : "the size of the group of accesses"
6214 : " is not a power of 2 or not eqaul to 3\n");
6215 552 : return false;
6216 : }
6217 :
6218 : /* Check that the permutation is supported. */
6219 1955 : if (VECTOR_MODE_P (mode))
6220 : {
6221 1955 : unsigned int i;
6222 1955 : if (count == 3)
6223 : {
6224 921 : unsigned int j0 = 0, j1 = 0, j2 = 0;
6225 921 : unsigned int i, j;
6226 :
6227 921 : unsigned int nelt;
6228 1842 : if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
6229 : {
6230 : if (dump_enabled_p ())
6231 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6232 : "cannot handle groups of 3 stores for"
6233 : " variable-length vectors\n");
6234 : return false;
6235 : }
6236 :
6237 921 : vec_perm_builder sel (nelt, nelt, 1);
6238 921 : sel.quick_grow (nelt);
6239 921 : vec_perm_indices indices;
6240 3459 : for (j = 0; j < 3; j++)
6241 : {
6242 2613 : int nelt0 = ((3 - j) * nelt) % 3;
6243 2613 : int nelt1 = ((3 - j) * nelt + 1) % 3;
6244 2613 : int nelt2 = ((3 - j) * nelt + 2) % 3;
6245 9291 : for (i = 0; i < nelt; i++)
6246 : {
6247 6678 : if (3 * i + nelt0 < nelt)
6248 2264 : sel[3 * i + nelt0] = j0++;
6249 6678 : if (3 * i + nelt1 < nelt)
6250 2225 : sel[3 * i + nelt1] = nelt + j1++;
6251 6678 : if (3 * i + nelt2 < nelt)
6252 2189 : sel[3 * i + nelt2] = 0;
6253 : }
6254 2613 : indices.new_vector (sel, 2, nelt);
6255 2613 : if (!can_vec_perm_const_p (mode, mode, indices))
6256 : {
6257 66 : if (dump_enabled_p ())
6258 37 : dump_printf (MSG_MISSED_OPTIMIZATION,
6259 : "permutation op not supported by target.\n");
6260 66 : return false;
6261 : }
6262 :
6263 8649 : for (i = 0; i < nelt; i++)
6264 : {
6265 6102 : if (3 * i + nelt0 < nelt)
6266 2040 : sel[3 * i + nelt0] = 3 * i + nelt0;
6267 6102 : if (3 * i + nelt1 < nelt)
6268 2031 : sel[3 * i + nelt1] = 3 * i + nelt1;
6269 6102 : if (3 * i + nelt2 < nelt)
6270 2031 : sel[3 * i + nelt2] = nelt + j2++;
6271 : }
6272 2547 : indices.new_vector (sel, 2, nelt);
6273 2547 : if (!can_vec_perm_const_p (mode, mode, indices))
6274 : {
6275 9 : if (dump_enabled_p ())
6276 9 : dump_printf (MSG_MISSED_OPTIMIZATION,
6277 : "permutation op not supported by target.\n");
6278 9 : return false;
6279 : }
6280 : }
6281 : return true;
6282 921 : }
6283 : else
6284 : {
6285 : /* If length is not equal to 3 then only power of 2 is supported. */
6286 1034 : gcc_assert (pow2p_hwi (count));
6287 2068 : poly_uint64 nelt = GET_MODE_NUNITS (mode);
6288 :
6289 : /* The encoding has 2 interleaved stepped patterns. */
6290 2068 : if(!multiple_p (nelt, 2))
6291 986 : return false;
6292 1034 : vec_perm_builder sel (nelt, 2, 3);
6293 1034 : sel.quick_grow (6);
6294 5170 : for (i = 0; i < 3; i++)
6295 : {
6296 3102 : sel[i * 2] = i;
6297 3102 : sel[i * 2 + 1] = i + nelt;
6298 : }
6299 1034 : vec_perm_indices indices (sel, 2, nelt);
6300 1034 : if (can_vec_perm_const_p (mode, mode, indices))
6301 : {
6302 6902 : for (i = 0; i < 6; i++)
6303 5916 : sel[i] += exact_div (nelt, 2);
6304 986 : indices.new_vector (sel, 2, nelt);
6305 986 : if (can_vec_perm_const_p (mode, mode, indices))
6306 986 : return true;
6307 : }
6308 1034 : }
6309 : }
6310 :
6311 48 : if (dump_enabled_p ())
6312 3 : dump_printf (MSG_MISSED_OPTIMIZATION,
6313 : "permutation op not supported by target.\n");
6314 : return false;
6315 : }
6316 :
6317 : /* Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT vectors
6318 : of type VECTYPE. MASKED_P says whether the masked form is needed. */
6319 :
6320 : internal_fn
6321 32524 : vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
6322 : bool masked_p)
6323 : {
6324 32524 : if (vect_lanes_optab_supported_p ("vec_mask_len_store_lanes",
6325 : vec_mask_len_store_lanes_optab, vectype,
6326 : count))
6327 : return IFN_MASK_LEN_STORE_LANES;
6328 32524 : else if (masked_p)
6329 : {
6330 159 : if (vect_lanes_optab_supported_p ("vec_mask_store_lanes",
6331 : vec_mask_store_lanes_optab, vectype,
6332 : count))
6333 : return IFN_MASK_STORE_LANES;
6334 : }
6335 : else
6336 : {
6337 32365 : if (vect_lanes_optab_supported_p ("vec_store_lanes",
6338 : vec_store_lanes_optab, vectype, count))
6339 : return IFN_STORE_LANES;
6340 : }
6341 : return IFN_LAST;
6342 : }
6343 :
6344 :
6345 : /* Function vect_setup_realignment
6346 :
6347 : This function is called when vectorizing an unaligned load using
6348 : the dr_explicit_realign[_optimized] scheme.
6349 : This function generates the following code at the loop prolog:
6350 :
6351 : p = initial_addr;
6352 : x msq_init = *(floor(p)); # prolog load
6353 : realignment_token = call target_builtin;
6354 : loop:
6355 : x msq = phi (msq_init, ---)
6356 :
6357 : The stmts marked with x are generated only for the case of
6358 : dr_explicit_realign_optimized.
6359 :
6360 : The code above sets up a new (vector) pointer, pointing to the first
6361 : location accessed by STMT_INFO, and a "floor-aligned" load using that
6362 : pointer. It also generates code to compute the "realignment-token"
6363 : (if the relevant target hook was defined), and creates a phi-node at the
6364 : loop-header bb whose arguments are the result of the prolog-load (created
6365 : by this function) and the result of a load that takes place in the loop
6366 : (to be created by the caller to this function).
6367 :
6368 : For the case of dr_explicit_realign_optimized:
6369 : The caller to this function uses the phi-result (msq) to create the
6370 : realignment code inside the loop, and sets up the missing phi argument,
6371 : as follows:
6372 : loop:
6373 : msq = phi (msq_init, lsq)
6374 : lsq = *(floor(p')); # load in loop
6375 : result = realign_load (msq, lsq, realignment_token);
6376 :
6377 : For the case of dr_explicit_realign:
6378 : loop:
6379 : msq = *(floor(p)); # load in loop
6380 : p' = p + (VS-1);
6381 : lsq = *(floor(p')); # load in loop
6382 : result = realign_load (msq, lsq, realignment_token);
6383 :
6384 : Input:
6385 : STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
6386 : a memory location that may be unaligned.
6387 : BSI - place where new code is to be inserted.
6388 : ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
6389 : is used.
6390 :
6391 : Output:
6392 : REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
6393 : target hook, if defined.
6394 : Return value - the result of the loop-header phi node. */
6395 :
6396 : tree
6397 0 : vect_setup_realignment (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6398 : gimple_stmt_iterator *gsi, tree *realignment_token,
6399 : enum dr_alignment_support alignment_support_scheme,
6400 : tree init_addr,
6401 : class loop **at_loop)
6402 : {
6403 0 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6404 0 : dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6405 0 : struct data_reference *dr = dr_info->dr;
6406 0 : class loop *loop = NULL;
6407 0 : edge pe = NULL;
6408 0 : tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
6409 0 : tree vec_dest;
6410 0 : gimple *inc;
6411 0 : tree ptr;
6412 0 : tree data_ref;
6413 0 : basic_block new_bb;
6414 0 : tree msq_init = NULL_TREE;
6415 0 : tree new_temp;
6416 0 : gphi *phi_stmt;
6417 0 : tree msq = NULL_TREE;
6418 0 : gimple_seq stmts = NULL;
6419 0 : bool compute_in_loop = false;
6420 0 : bool nested_in_vect_loop = false;
6421 0 : class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
6422 0 : class loop *loop_for_initial_load = NULL;
6423 :
6424 0 : if (loop_vinfo)
6425 : {
6426 0 : loop = LOOP_VINFO_LOOP (loop_vinfo);
6427 0 : nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
6428 : }
6429 :
6430 0 : gcc_assert (alignment_support_scheme == dr_explicit_realign
6431 : || alignment_support_scheme == dr_explicit_realign_optimized);
6432 :
6433 : /* We need to generate three things:
6434 : 1. the misalignment computation
6435 : 2. the extra vector load (for the optimized realignment scheme).
6436 : 3. the phi node for the two vectors from which the realignment is
6437 : done (for the optimized realignment scheme). */
6438 :
6439 : /* 1. Determine where to generate the misalignment computation.
6440 :
6441 : If INIT_ADDR is NULL_TREE, this indicates that the misalignment
6442 : calculation will be generated by this function, outside the loop (in the
6443 : preheader). Otherwise, INIT_ADDR had already been computed for us by the
6444 : caller, inside the loop.
6445 :
6446 : Background: If the misalignment remains fixed throughout the iterations of
6447 : the loop, then both realignment schemes are applicable, and also the
6448 : misalignment computation can be done outside LOOP. This is because we are
6449 : vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
6450 : are a multiple of VS (the Vector Size), and therefore the misalignment in
6451 : different vectorized LOOP iterations is always the same.
6452 : The problem arises only if the memory access is in an inner-loop nested
6453 : inside LOOP, which is now being vectorized using outer-loop vectorization.
6454 : This is the only case when the misalignment of the memory access may not
6455 : remain fixed throughout the iterations of the inner-loop (as explained in
6456 : detail in vect_supportable_dr_alignment). In this case, not only is the
6457 : optimized realignment scheme not applicable, but also the misalignment
6458 : computation (and generation of the realignment token that is passed to
6459 : REALIGN_LOAD) have to be done inside the loop.
6460 :
6461 : In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
6462 : or not, which in turn determines if the misalignment is computed inside
6463 : the inner-loop, or outside LOOP. */
6464 :
6465 0 : if (init_addr != NULL_TREE || !loop_vinfo)
6466 : {
6467 0 : compute_in_loop = true;
6468 0 : gcc_assert (alignment_support_scheme == dr_explicit_realign);
6469 : }
6470 :
6471 :
6472 : /* 2. Determine where to generate the extra vector load.
6473 :
6474 : For the optimized realignment scheme, instead of generating two vector
6475 : loads in each iteration, we generate a single extra vector load in the
6476 : preheader of the loop, and in each iteration reuse the result of the
6477 : vector load from the previous iteration. In case the memory access is in
6478 : an inner-loop nested inside LOOP, which is now being vectorized using
6479 : outer-loop vectorization, we need to determine whether this initial vector
6480 : load should be generated at the preheader of the inner-loop, or can be
6481 : generated at the preheader of LOOP. If the memory access has no evolution
6482 : in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
6483 : to be generated inside LOOP (in the preheader of the inner-loop). */
6484 :
6485 0 : if (nested_in_vect_loop)
6486 : {
6487 0 : tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
6488 0 : bool invariant_in_outerloop =
6489 0 : (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
6490 0 : loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
6491 : }
6492 : else
6493 : loop_for_initial_load = loop;
6494 0 : if (at_loop)
6495 0 : *at_loop = loop_for_initial_load;
6496 :
6497 0 : tree vuse = NULL_TREE;
6498 0 : if (loop_for_initial_load)
6499 : {
6500 0 : pe = loop_preheader_edge (loop_for_initial_load);
6501 0 : if (gphi *vphi = get_virtual_phi (loop_for_initial_load->header))
6502 0 : vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
6503 : }
6504 0 : if (!vuse)
6505 0 : vuse = gimple_vuse (gsi_stmt (*gsi));
6506 :
6507 : /* 3. For the case of the optimized realignment, create the first vector
6508 : load at the loop preheader. */
6509 :
6510 0 : if (alignment_support_scheme == dr_explicit_realign_optimized)
6511 : {
6512 : /* Create msq_init = *(floor(p1)) in the loop preheader */
6513 0 : gassign *new_stmt;
6514 :
6515 0 : gcc_assert (!compute_in_loop);
6516 0 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6517 0 : ptr = vect_create_data_ref_ptr (vinfo, stmt_info, vectype,
6518 : loop_for_initial_load, NULL_TREE,
6519 : &init_addr, NULL, &inc, true);
6520 0 : if (TREE_CODE (ptr) == SSA_NAME)
6521 0 : new_temp = copy_ssa_name (ptr);
6522 : else
6523 0 : new_temp = make_ssa_name (TREE_TYPE (ptr));
6524 0 : poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
6525 0 : tree type = TREE_TYPE (ptr);
6526 0 : new_stmt = gimple_build_assign
6527 0 : (new_temp, BIT_AND_EXPR, ptr,
6528 0 : fold_build2 (MINUS_EXPR, type,
6529 : build_int_cst (type, 0),
6530 : build_int_cst (type, align)));
6531 0 : new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6532 0 : gcc_assert (!new_bb);
6533 0 : data_ref
6534 0 : = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
6535 : build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
6536 0 : vect_copy_ref_info (data_ref, DR_REF (dr));
6537 0 : new_stmt = gimple_build_assign (vec_dest, data_ref);
6538 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
6539 0 : gimple_assign_set_lhs (new_stmt, new_temp);
6540 0 : gimple_set_vuse (new_stmt, vuse);
6541 0 : if (pe)
6542 : {
6543 0 : new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6544 0 : gcc_assert (!new_bb);
6545 : }
6546 : else
6547 0 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
6548 :
6549 0 : msq_init = gimple_assign_lhs (new_stmt);
6550 : }
6551 :
6552 : /* 4. Create realignment token using a target builtin, if available.
6553 : It is done either inside the containing loop, or before LOOP (as
6554 : determined above). */
6555 :
6556 0 : if (targetm.vectorize.builtin_mask_for_load)
6557 : {
6558 0 : gcall *new_stmt;
6559 0 : tree builtin_decl;
6560 :
6561 : /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
6562 0 : if (!init_addr)
6563 : {
6564 : /* Generate the INIT_ADDR computation outside LOOP. */
6565 0 : init_addr = vect_create_addr_base_for_vector_ref (vinfo,
6566 : stmt_info, &stmts,
6567 : NULL_TREE);
6568 0 : if (loop)
6569 : {
6570 0 : pe = loop_preheader_edge (loop);
6571 0 : new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6572 0 : gcc_assert (!new_bb);
6573 : }
6574 : else
6575 0 : gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
6576 : }
6577 :
6578 0 : builtin_decl = targetm.vectorize.builtin_mask_for_load ();
6579 0 : new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
6580 0 : vec_dest =
6581 0 : vect_create_destination_var (scalar_dest,
6582 : gimple_call_return_type (new_stmt));
6583 0 : new_temp = make_ssa_name (vec_dest, new_stmt);
6584 0 : gimple_call_set_lhs (new_stmt, new_temp);
6585 :
6586 0 : if (compute_in_loop)
6587 0 : gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
6588 : else
6589 : {
6590 : /* Generate the misalignment computation outside LOOP. */
6591 0 : pe = loop_preheader_edge (loop);
6592 0 : new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6593 0 : gcc_assert (!new_bb);
6594 : }
6595 :
6596 0 : *realignment_token = gimple_call_lhs (new_stmt);
6597 :
6598 : /* The result of the CALL_EXPR to this builtin is determined from
6599 : the value of the parameter and no global variables are touched
6600 : which makes the builtin a "const" function. Requiring the
6601 : builtin to have the "const" attribute makes it unnecessary
6602 : to call mark_call_clobbered. */
6603 0 : gcc_assert (TREE_READONLY (builtin_decl));
6604 : }
6605 :
6606 0 : if (alignment_support_scheme == dr_explicit_realign)
6607 : return msq;
6608 :
6609 0 : gcc_assert (!compute_in_loop);
6610 0 : gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
6611 :
6612 :
6613 : /* 5. Create msq = phi <msq_init, lsq> in loop */
6614 :
6615 0 : pe = loop_preheader_edge (containing_loop);
6616 0 : vec_dest = vect_create_destination_var (scalar_dest, vectype);
6617 0 : msq = make_ssa_name (vec_dest);
6618 0 : phi_stmt = create_phi_node (msq, containing_loop->header);
6619 0 : add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
6620 :
6621 0 : return msq;
6622 : }
6623 :
6624 :
6625 : /* Function vect_grouped_load_supported.
6626 :
6627 : COUNT is the size of the load group (the number of statements plus the
6628 : number of gaps). SINGLE_ELEMENT_P is true if there is actually
6629 : only one statement, with a gap of COUNT - 1.
6630 :
6631 : Returns true if a suitable permute exists. */
6632 :
6633 : bool
6634 1681 : vect_grouped_load_supported (tree vectype, bool single_element_p,
6635 : unsigned HOST_WIDE_INT count)
6636 : {
6637 1681 : machine_mode mode = TYPE_MODE (vectype);
6638 :
6639 : /* If this is single-element interleaving with an element distance
6640 : that leaves unused vector loads around punt - we at least create
6641 : very sub-optimal code in that case (and blow up memory,
6642 : see PR65518). */
6643 1681 : if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
6644 : {
6645 23 : if (dump_enabled_p ())
6646 3 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6647 : "single-element interleaving not supported "
6648 : "for not adjacent vector loads\n");
6649 23 : return false;
6650 : }
6651 :
6652 : /* vect_permute_load_chain requires the group size to be equal to 3 or
6653 : be a power of two. */
6654 1658 : if (count != 3 && exact_log2 (count) == -1)
6655 : {
6656 222 : if (dump_enabled_p ())
6657 14 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6658 : "the size of the group of accesses"
6659 : " is not a power of 2 or not equal to 3\n");
6660 222 : return false;
6661 : }
6662 :
6663 : /* Check that the permutation is supported. */
6664 1436 : if (VECTOR_MODE_P (mode))
6665 : {
6666 1436 : unsigned int i, j;
6667 1436 : if (count == 3)
6668 : {
6669 695 : unsigned int nelt;
6670 1390 : if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
6671 : {
6672 : if (dump_enabled_p ())
6673 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6674 : "cannot handle groups of 3 loads for"
6675 : " variable-length vectors\n");
6676 : return false;
6677 : }
6678 :
6679 695 : vec_perm_builder sel (nelt, nelt, 1);
6680 695 : sel.quick_grow (nelt);
6681 695 : vec_perm_indices indices;
6682 695 : unsigned int k;
6683 2744 : for (k = 0; k < 3; k++)
6684 : {
6685 7385 : for (i = 0; i < nelt; i++)
6686 5324 : if (3 * i + k < 2 * nelt)
6687 3555 : sel[i] = 3 * i + k;
6688 : else
6689 1769 : sel[i] = 0;
6690 2061 : indices.new_vector (sel, 2, nelt);
6691 2061 : if (!can_vec_perm_const_p (mode, mode, indices))
6692 : {
6693 12 : if (dump_enabled_p ())
6694 4 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6695 : "shuffle of 3 loads is not supported by"
6696 : " target\n");
6697 12 : return false;
6698 : }
6699 7221 : for (i = 0, j = 0; i < nelt; i++)
6700 5172 : if (3 * i + k < 2 * nelt)
6701 3448 : sel[i] = i;
6702 : else
6703 1724 : sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
6704 2049 : indices.new_vector (sel, 2, nelt);
6705 2049 : if (!can_vec_perm_const_p (mode, mode, indices))
6706 : {
6707 0 : if (dump_enabled_p ())
6708 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6709 : "shuffle of 3 loads is not supported by"
6710 : " target\n");
6711 0 : return false;
6712 : }
6713 : }
6714 : return true;
6715 695 : }
6716 : else
6717 : {
6718 : /* If length is not equal to 3 then only power of 2 is supported. */
6719 741 : gcc_assert (pow2p_hwi (count));
6720 1482 : poly_uint64 nelt = GET_MODE_NUNITS (mode);
6721 :
6722 : /* The encoding has a single stepped pattern. */
6723 741 : vec_perm_builder sel (nelt, 1, 3);
6724 741 : sel.quick_grow (3);
6725 3705 : for (i = 0; i < 3; i++)
6726 2223 : sel[i] = i * 2;
6727 741 : vec_perm_indices indices (sel, 2, nelt);
6728 741 : if (can_vec_perm_const_p (mode, mode, indices))
6729 : {
6730 2956 : for (i = 0; i < 3; i++)
6731 2217 : sel[i] = i * 2 + 1;
6732 739 : indices.new_vector (sel, 2, nelt);
6733 739 : if (can_vec_perm_const_p (mode, mode, indices))
6734 739 : return true;
6735 : }
6736 741 : }
6737 : }
6738 :
6739 2 : if (dump_enabled_p ())
6740 2 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6741 : "extract even/odd not supported by target\n");
6742 : return false;
6743 : }
6744 :
6745 : /* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT vectors
6746 : of type VECTYPE. MASKED_P says whether the masked form is needed.
6747 : If it is available and ELSVALS is nonzero store the possible else values
6748 : in the vector it points to. */
6749 :
6750 : internal_fn
6751 138969 : vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
6752 : bool masked_p, vec<int> *elsvals)
6753 : {
6754 138969 : if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
6755 : vec_mask_len_load_lanes_optab, vectype,
6756 : count, elsvals))
6757 : return IFN_MASK_LEN_LOAD_LANES;
6758 138969 : else if (masked_p)
6759 : {
6760 30 : if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
6761 : vec_mask_load_lanes_optab, vectype,
6762 : count, elsvals))
6763 : return IFN_MASK_LOAD_LANES;
6764 : }
6765 : else
6766 : {
6767 138939 : if (vect_lanes_optab_supported_p ("vec_load_lanes", vec_load_lanes_optab,
6768 : vectype, count, elsvals))
6769 : return IFN_LOAD_LANES;
6770 : }
6771 : return IFN_LAST;
6772 : }
6773 :
6774 : /* Function vect_force_dr_alignment_p.
6775 :
6776 : Returns whether the alignment of a DECL can be forced to be aligned
6777 : on ALIGNMENT bit boundary. */
6778 :
6779 : bool
6780 661041 : vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
6781 : {
6782 661041 : if (!VAR_P (decl))
6783 : return false;
6784 :
6785 208770 : if (decl_in_symtab_p (decl)
6786 208770 : && (!symtab_node::get (decl)
6787 21428 : || !symtab_node::get (decl)->can_increase_alignment_p ()))
6788 12909 : return false;
6789 :
6790 195861 : if (TREE_STATIC (decl))
6791 8519 : return (known_le (alignment,
6792 8519 : (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
6793 : else
6794 187342 : return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
6795 : }
6796 :
6797 : /* Return whether the data reference DR_INFO is supported with respect to its
6798 : alignment.
6799 : If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
6800 : it is aligned, i.e., check if it is possible to vectorize it with different
6801 : alignment. If IS_GATHER_SCATTER is true we are dealing with a
6802 : gather/scatter. */
6803 :
6804 : enum dr_alignment_support
6805 2391800 : vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
6806 : tree vectype, int misalignment,
6807 : bool is_gather_scatter)
6808 : {
6809 2391800 : data_reference *dr = dr_info->dr;
6810 2391800 : stmt_vec_info stmt_info = dr_info->stmt;
6811 2391800 : machine_mode mode = TYPE_MODE (vectype);
6812 2391800 : loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6813 2391800 : class loop *vect_loop = NULL;
6814 2391800 : bool nested_in_vect_loop = false;
6815 :
6816 2391800 : if (misalignment == 0)
6817 : return dr_aligned;
6818 1473775 : else if (dr_safe_speculative_read_required (stmt_info))
6819 : return dr_unaligned_unsupported;
6820 :
6821 1091668 : if (loop_vinfo)
6822 : {
6823 686563 : vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
6824 686563 : nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt_info);
6825 : }
6826 :
6827 : /* Possibly unaligned access. */
6828 :
6829 : /* We can choose between using the implicit realignment scheme (generating
6830 : a misaligned_move stmt) and the explicit realignment scheme (generating
6831 : aligned loads with a REALIGN_LOAD). There are two variants to the
6832 : explicit realignment scheme: optimized, and unoptimized.
6833 : We can optimize the realignment only if the step between consecutive
6834 : vector loads is equal to the vector size. Since the vector memory
6835 : accesses advance in steps of VS (Vector Size) in the vectorized loop, it
6836 : is guaranteed that the misalignment amount remains the same throughout the
6837 : execution of the vectorized loop. Therefore, we can create the
6838 : "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
6839 : at the loop preheader.
6840 :
6841 : However, in the case of outer-loop vectorization, when vectorizing a
6842 : memory access in the inner-loop nested within the LOOP that is now being
6843 : vectorized, while it is guaranteed that the misalignment of the
6844 : vectorized memory access will remain the same in different outer-loop
6845 : iterations, it is *not* guaranteed that is will remain the same throughout
6846 : the execution of the inner-loop. This is because the inner-loop advances
6847 : with the original scalar step (and not in steps of VS). If the inner-loop
6848 : step happens to be a multiple of VS, then the misalignment remains fixed
6849 : and we can use the optimized realignment scheme. For example:
6850 :
6851 : for (i=0; i<N; i++)
6852 : for (j=0; j<M; j++)
6853 : s += a[i+j];
6854 :
6855 : When vectorizing the i-loop in the above example, the step between
6856 : consecutive vector loads is 1, and so the misalignment does not remain
6857 : fixed across the execution of the inner-loop, and the realignment cannot
6858 : be optimized (as illustrated in the following pseudo vectorized loop):
6859 :
6860 : for (i=0; i<N; i+=4)
6861 : for (j=0; j<M; j++){
6862 : vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
6863 : // when j is {0,1,2,3,4,5,6,7,...} respectively.
6864 : // (assuming that we start from an aligned address).
6865 : }
6866 :
6867 : We therefore have to use the unoptimized realignment scheme:
6868 :
6869 : for (i=0; i<N; i+=4)
6870 : for (j=k; j<M; j+=4)
6871 : vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
6872 : // that the misalignment of the initial address is
6873 : // 0).
6874 :
6875 : The loop can then be vectorized as follows:
6876 :
6877 : for (k=0; k<4; k++){
6878 : rt = get_realignment_token (&vp[k]);
6879 : for (i=0; i<N; i+=4){
6880 : v1 = vp[i+k];
6881 : for (j=k; j<M; j+=4){
6882 : v2 = vp[i+j+VS-1];
6883 : va = REALIGN_LOAD <v1,v2,rt>;
6884 : vs += va;
6885 : v1 = v2;
6886 : }
6887 : }
6888 : } */
6889 :
6890 1091668 : if (DR_IS_READ (dr) && !is_gather_scatter)
6891 : {
6892 444634 : if (can_implement_p (vec_realign_load_optab, mode)
6893 444634 : && (!targetm.vectorize.builtin_mask_for_load
6894 0 : || targetm.vectorize.builtin_mask_for_load ()))
6895 : {
6896 : /* If we are doing SLP then the accesses need not have the
6897 : same alignment, instead it depends on the SLP group size. */
6898 0 : if (loop_vinfo
6899 0 : && STMT_SLP_TYPE (stmt_info)
6900 0 : && STMT_VINFO_GROUPED_ACCESS (stmt_info)
6901 0 : && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6902 0 : * (DR_GROUP_SIZE
6903 0 : (DR_GROUP_FIRST_ELEMENT (stmt_info))),
6904 0 : TYPE_VECTOR_SUBPARTS (vectype)))
6905 : ;
6906 0 : else if (!loop_vinfo
6907 0 : || (nested_in_vect_loop
6908 0 : && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
6909 0 : GET_MODE_SIZE (TYPE_MODE (vectype)))))
6910 0 : return dr_explicit_realign;
6911 : else
6912 0 : return dr_explicit_realign_optimized;
6913 : }
6914 : }
6915 :
6916 1091668 : bool is_packed = not_size_aligned (DR_REF (dr));
6917 1091668 : if (misalignment == DR_MISALIGNMENT_UNKNOWN
6918 1091668 : && is_gather_scatter)
6919 3077 : misalignment = (get_object_alignment (DR_REF (dr))
6920 3077 : % (GET_MODE_BITSIZE (GET_MODE_INNER (mode))))
6921 3077 : / BITS_PER_UNIT;
6922 1091668 : if (targetm.vectorize.support_vector_misalignment (mode, misalignment,
6923 : is_packed,
6924 : is_gather_scatter))
6925 : return dr_unaligned_supported;
6926 :
6927 : /* Unsupported. */
6928 : return dr_unaligned_unsupported;
6929 : }
|