Line data Source code
1 : /* Vectorizer
2 : Copyright (C) 2003-2026 Free Software Foundation, Inc.
3 : Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 :
5 : This file is part of GCC.
6 :
7 : GCC is free software; you can redistribute it and/or modify it under
8 : the terms of the GNU General Public License as published by the Free
9 : Software Foundation; either version 3, or (at your option) any later
10 : version.
11 :
12 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with GCC; see the file COPYING3. If not see
19 : <http://www.gnu.org/licenses/>. */
20 :
21 : #ifndef GCC_TREE_VECTORIZER_H
22 : #define GCC_TREE_VECTORIZER_H
23 :
24 : typedef class _stmt_vec_info *stmt_vec_info;
25 : typedef struct _slp_tree *slp_tree;
26 :
27 : #include "tree-data-ref.h"
28 : #include "tree-hash-traits.h"
29 : #include "target.h"
30 : #include "internal-fn.h"
31 : #include "tree-ssa-operands.h"
32 : #include "gimple-match.h"
33 : #include "dominance.h"
34 :
35 : /* Used for naming of new temporaries. */
36 : enum vect_var_kind {
37 : vect_simple_var,
38 : vect_pointer_var,
39 : vect_scalar_var,
40 : vect_mask_var
41 : };
42 :
43 : /* Defines type of operation. */
44 : enum operation_type {
45 : unary_op = 1,
46 : binary_op,
47 : ternary_op
48 : };
49 :
50 : /* Define type of available alignment support. */
51 : enum dr_alignment_support {
52 : dr_unaligned_unsupported,
53 : dr_unaligned_supported,
54 : dr_explicit_realign,
55 : dr_explicit_realign_optimized,
56 : dr_aligned
57 : };
58 :
59 : /* Define type of peeling support to indicate how peeling for alignment can help
60 : make vectorization supported. */
61 : enum peeling_support {
62 : peeling_known_supported,
63 : peeling_maybe_supported,
64 : peeling_unsupported
65 : };
66 :
67 : /* Define type of def-use cross-iteration cycle. */
68 : enum vect_def_type {
69 : vect_uninitialized_def = 0,
70 : vect_constant_def = 1,
71 : vect_external_def,
72 : vect_internal_def,
73 : vect_induction_def,
74 : vect_reduction_def,
75 : vect_double_reduction_def,
76 : vect_nested_cycle,
77 : vect_first_order_recurrence,
78 : vect_condition_def,
79 : vect_unknown_def_type
80 : };
81 :
82 : /* Define operation type of linear/non-linear induction variable. */
83 : enum vect_induction_op_type {
84 : vect_step_op_add = 0,
85 : vect_step_op_neg,
86 : vect_step_op_mul,
87 : vect_step_op_shl,
88 : vect_step_op_shr
89 : };
90 :
91 : /* Define type of reduction. */
92 : enum vect_reduction_type {
93 : TREE_CODE_REDUCTION,
94 : COND_REDUCTION,
95 : INTEGER_INDUC_COND_REDUCTION,
96 : CONST_COND_REDUCTION,
97 :
98 : /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop
99 : to implement:
100 :
101 : for (int i = 0; i < VF; ++i)
102 : res = cond[i] ? val[i] : res; */
103 : EXTRACT_LAST_REDUCTION,
104 :
105 : /* Use a folding reduction within the loop to implement:
106 :
107 : for (int i = 0; i < VF; ++i)
108 : res = res OP val[i];
109 :
110 : (with no reassociation). */
111 : FOLD_LEFT_REDUCTION
112 : };
113 :
114 : #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \
115 : || ((D) == vect_double_reduction_def) \
116 : || ((D) == vect_nested_cycle))
117 :
118 : /* Structure to encapsulate information about a group of like
119 : instructions to be presented to the target cost model. */
120 : struct stmt_info_for_cost {
121 : int count;
122 : enum vect_cost_for_stmt kind;
123 : enum vect_cost_model_location where;
124 : stmt_vec_info stmt_info;
125 : slp_tree node;
126 : tree vectype;
127 : int misalign;
128 : };
129 :
130 : typedef vec<stmt_info_for_cost> stmt_vector_for_cost;
131 :
132 : /* Maps base addresses to an innermost_loop_behavior and the stmt it was
133 : derived from that gives the maximum known alignment for that base. */
134 : typedef hash_map<tree_operand_hash,
135 : std::pair<stmt_vec_info, innermost_loop_behavior *> >
136 : vec_base_alignments;
137 :
138 : /* Represents elements [START, START + LENGTH) of cyclical array OPS*
139 : (i.e. OPS repeated to give at least START + LENGTH elements) */
140 : struct vect_scalar_ops_slice
141 : {
142 : tree op (unsigned int i) const;
143 : bool all_same_p () const;
144 :
145 : vec<tree> *ops;
146 : unsigned int start;
147 : unsigned int length;
148 : };
149 :
150 : /* Return element I of the slice. */
151 : inline tree
152 2691964 : vect_scalar_ops_slice::op (unsigned int i) const
153 : {
154 5383928 : return (*ops)[(i + start) % ops->length ()];
155 : }
156 :
157 : /* Hash traits for vect_scalar_ops_slice. */
158 : struct vect_scalar_ops_slice_hash : typed_noop_remove<vect_scalar_ops_slice>
159 : {
160 : typedef vect_scalar_ops_slice value_type;
161 : typedef vect_scalar_ops_slice compare_type;
162 :
163 : static const bool empty_zero_p = true;
164 :
165 : static void mark_deleted (value_type &s) { s.length = ~0U; }
166 0 : static void mark_empty (value_type &s) { s.length = 0; }
167 424304 : static bool is_deleted (const value_type &s) { return s.length == ~0U; }
168 4032938 : static bool is_empty (const value_type &s) { return s.length == 0; }
169 : static hashval_t hash (const value_type &);
170 : static bool equal (const value_type &, const compare_type &);
171 : };
172 :
173 : /* Describes how we're going to vectorize an individual load or store,
174 : or a group of loads or stores. */
175 : enum vect_memory_access_type {
176 : VMAT_UNINITIALIZED,
177 :
178 : /* An access to an invariant address. This is used only for loads. */
179 : VMAT_INVARIANT,
180 :
181 : /* A simple contiguous access. */
182 : VMAT_CONTIGUOUS,
183 :
184 : /* A contiguous access that goes down in memory rather than up,
185 : with no additional permutation. This is used only for stores
186 : of invariants. */
187 : VMAT_CONTIGUOUS_DOWN,
188 :
189 : /* A simple contiguous access in which the elements need to be reversed
190 : after loading or before storing. */
191 : VMAT_CONTIGUOUS_REVERSE,
192 :
193 : /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */
194 : VMAT_LOAD_STORE_LANES,
195 :
196 : /* An access in which each scalar element is loaded or stored
197 : individually. */
198 : VMAT_ELEMENTWISE,
199 :
200 : /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
201 : SLP accesses. Each unrolled iteration uses a contiguous load
202 : or store for the whole group, but the groups from separate iterations
203 : are combined in the same way as for VMAT_ELEMENTWISE. */
204 : VMAT_STRIDED_SLP,
205 :
206 : /* The access uses gather loads or scatter stores. */
207 : VMAT_GATHER_SCATTER_LEGACY,
208 : VMAT_GATHER_SCATTER_IFN,
209 : VMAT_GATHER_SCATTER_EMULATED
210 : };
211 :
212 : /* Returns whether MAT is any of the VMAT_GATHER_SCATTER_* kinds. */
213 :
214 : inline bool
215 6329161 : mat_gather_scatter_p (vect_memory_access_type mat)
216 : {
217 6329161 : return (mat == VMAT_GATHER_SCATTER_LEGACY
218 : || mat == VMAT_GATHER_SCATTER_IFN
219 6329161 : || mat == VMAT_GATHER_SCATTER_EMULATED);
220 : }
221 :
222 : /*-----------------------------------------------------------------*/
223 : /* Info on vectorized defs. */
224 : /*-----------------------------------------------------------------*/
225 : enum stmt_vec_info_type {
226 : undef_vec_info_type = 0,
227 : load_vec_info_type,
228 : store_vec_info_type,
229 : shift_vec_info_type,
230 : op_vec_info_type,
231 : call_vec_info_type,
232 : call_simd_clone_vec_info_type,
233 : assignment_vec_info_type,
234 : condition_vec_info_type,
235 : comparison_vec_info_type,
236 : reduc_vec_info_type,
237 : induc_vec_info_type,
238 : type_promotion_vec_info_type,
239 : type_demotion_vec_info_type,
240 : type_conversion_vec_info_type,
241 : cycle_phi_info_type,
242 : lc_phi_info_type,
243 : phi_info_type,
244 : recurr_info_type,
245 : loop_exit_ctrl_vec_info_type,
246 : permute_info_type
247 : };
248 :
249 : /************************************************************************
250 : SLP
251 : ************************************************************************/
252 : typedef vec<std::pair<unsigned, unsigned> > lane_permutation_t;
253 : typedef auto_vec<std::pair<unsigned, unsigned>, 16> auto_lane_permutation_t;
254 : typedef vec<unsigned> load_permutation_t;
255 : typedef auto_vec<unsigned, 16> auto_load_permutation_t;
256 :
257 3320156 : struct vect_data {
258 2068919 : virtual ~vect_data () = default;
259 : };
260 :
261 : /* Analysis data from vectorizable_simd_clone_call for
262 : call_simd_clone_vec_info_type. */
263 : struct vect_simd_clone_data : vect_data {
264 1879 : virtual ~vect_simd_clone_data () = default;
265 1412 : vect_simd_clone_data () = default;
266 467 : vect_simd_clone_data (vect_simd_clone_data &&other) = default;
267 :
268 : /* Selected SIMD clone and clone for in-branch. */
269 : cgraph_node *clone;
270 : cgraph_node *clone_inbranch;
271 :
272 : /* Selected SIMD clone's function info. First vector element
273 : is NULL_TREE, followed by a pair of trees (base + step)
274 : for linear arguments (pair of NULLs for other arguments). */
275 : auto_vec<tree> simd_clone_info;
276 : };
277 :
278 : /* Analysis data from vectorizable_load and vectorizable_store for
279 : load_vec_info_type and store_vec_info_type. */
280 : struct vect_load_store_data : vect_data {
281 1249358 : vect_load_store_data (vect_load_store_data &&other) = default;
282 2068919 : vect_load_store_data () = default;
283 3317642 : virtual ~vect_load_store_data () = default;
284 :
285 : vect_memory_access_type memory_access_type;
286 : dr_alignment_support alignment_support_scheme;
287 : int misalignment;
288 : internal_fn lanes_ifn; // VMAT_LOAD_STORE_LANES
289 : poly_int64 poffset;
290 : union {
291 : internal_fn ifn; // VMAT_GATHER_SCATTER_IFN
292 : tree decl; // VMAT_GATHER_SCATTER_DECL
293 : } gs;
294 : tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
295 : /* Load/store type with larger element mode used for punning the vectype. */
296 : tree ls_type; // VMAT_GATHER_SCATTER_IFN, VMAT_STRIDED_SLP
297 : /* Load/store element type used for punning the vectype. Relevant when
298 : that is a vector type. */
299 : tree ls_eltype; // VMAT_STRIDED_SLP
300 : /* This is set to a supported offset vector type if we don't support the
301 : originally requested offset type, otherwise NULL.
302 : If nonzero there will be an additional offset conversion before
303 : the gather/scatter. */
304 : tree supported_offset_vectype; // VMAT_GATHER_SCATTER_IFN
305 : /* Similar for scale. Only nonzero if we don't support the requested
306 : scale. Then we need to multiply the offset vector before the
307 : gather/scatter. */
308 : int supported_scale; // VMAT_GATHER_SCATTER_IFN
309 : auto_vec<int> elsvals;
310 : /* True if the load requires a load permutation. */
311 : bool slp_perm; // SLP_TREE_LOAD_PERMUTATION
312 : unsigned n_perms; // SLP_TREE_LOAD_PERMUTATION
313 : unsigned n_loads; // SLP_TREE_LOAD_PERMUTATION
314 : /* Whether the load permutation is consecutive and simple. */
315 : bool subchain_p; // VMAT_STRIDED_SLP and VMAT_GATHER_SCATTER
316 : };
317 :
318 : /* A computation tree of an SLP instance. Each node corresponds to a group of
319 : stmts to be packed in a SIMD stmt. */
320 : struct _slp_tree {
321 : _slp_tree ();
322 : ~_slp_tree ();
323 :
324 : void push_vec_def (gimple *def);
325 8526 : void push_vec_def (tree def) { vec_defs.quick_push (def); }
326 :
327 : /* Nodes that contain def-stmts of this node statements operands. */
328 : vec<slp_tree> children;
329 :
330 : /* A group of scalar stmts to be vectorized together. */
331 : vec<stmt_vec_info> stmts;
332 : /* A group of scalar operands to be vectorized together. */
333 : vec<tree> ops;
334 : /* A set of lane indices that are live and to be code-generated from
335 : this SLP node. */
336 : vec<unsigned> live_lanes;
337 :
338 : /* The representative that should be used for analysis and
339 : code generation. */
340 : stmt_vec_info representative;
341 :
342 : struct {
343 : /* SLP cycle the node resides in, or -1. */
344 : int id;
345 : /* The SLP operand index with the edge on the SLP cycle, or -1. */
346 : int reduc_idx;
347 : } cycle_info;
348 :
349 : /* Load permutation relative to the stores, NULL if there is no
350 : permutation. */
351 : load_permutation_t load_permutation;
352 : /* Lane permutation of the operands scalar lanes encoded as pairs
353 : of { operand number, lane number }. The number of elements
354 : denotes the number of output lanes. */
355 : lane_permutation_t lane_permutation;
356 :
357 : tree vectype;
358 : /* Vectorized defs. */
359 : vec<tree> vec_defs;
360 :
361 : /* Reference count in the SLP graph. */
362 : unsigned int refcnt;
363 : /* The maximum number of vector elements for the subtree rooted
364 : at this node. */
365 : poly_uint64 max_nunits;
366 : /* The DEF type of this node. */
367 : enum vect_def_type def_type;
368 : /* The number of scalar lanes produced by this node. */
369 : unsigned int lanes;
370 : /* The operation of this node. */
371 : enum tree_code code;
372 : /* For gather/scatter memory operations the scale each offset element
373 : should be multiplied by before being added to the base. */
374 : int gs_scale;
375 : /* For gather/scatter memory operations the loop-invariant base value. */
376 : tree gs_base;
377 : /* Whether uses of this load or feeders of this store are suitable
378 : for load/store-lanes. */
379 : bool ldst_lanes;
380 : /* For BB vect, flag to indicate this load node should be vectorized
381 : as to avoid STLF fails because of related stores. */
382 : bool avoid_stlf_fail;
383 :
384 : int vertex;
385 :
386 : /* The kind of operation as determined by analysis and optional
387 : kind specific data. */
388 : enum stmt_vec_info_type type;
389 : vect_data *data;
390 :
391 : template <class T>
392 2070331 : T& get_data (T& else_) { return data ? *static_cast <T *> (data) : else_; }
393 :
394 : /* If not NULL this is a cached failed SLP discovery attempt with
395 : the lanes that failed during SLP discovery as 'false'. This is
396 : a copy of the matches array. */
397 : bool *failed;
398 :
399 : /* Allocate from slp_tree_pool. */
400 : static void *operator new (size_t);
401 :
402 : /* Return memory to slp_tree_pool. */
403 : static void operator delete (void *, size_t);
404 :
405 : /* Linked list of nodes to release when we free the slp_tree_pool. */
406 : slp_tree next_node;
407 : slp_tree prev_node;
408 : };
409 :
410 : /* The enum describes the type of operations that an SLP instance
411 : can perform. */
412 :
413 : enum slp_instance_kind {
414 : slp_inst_kind_store,
415 : slp_inst_kind_reduc_group,
416 : slp_inst_kind_reduc_chain,
417 : slp_inst_kind_bb_reduc,
418 : slp_inst_kind_ctor,
419 : slp_inst_kind_gcond
420 : };
421 :
422 : /* SLP instance is a sequence of stmts in a loop that can be packed into
423 : SIMD stmts. */
424 : typedef class _slp_instance {
425 : public:
426 : /* The root of SLP tree. */
427 : slp_tree root;
428 :
429 : /* For vector constructors, the constructor stmt that the SLP tree is built
430 : from, NULL otherwise. */
431 : vec<stmt_vec_info> root_stmts;
432 :
433 : /* For slp_inst_kind_bb_reduc the defs that were not vectorized, NULL
434 : otherwise. */
435 : vec<tree> remain_defs;
436 :
437 : /* The group of nodes that contain loads of this SLP instance. */
438 : vec<slp_tree> loads;
439 :
440 : /* The SLP node containing the reduction PHIs. */
441 : slp_tree reduc_phis;
442 :
443 : /* Vector cost of this entry to the SLP graph. */
444 : stmt_vector_for_cost cost_vec;
445 :
446 : /* If this instance is the main entry of a subgraph the set of
447 : entries into the same subgraph, including itself. */
448 : vec<_slp_instance *> subgraph_entries;
449 :
450 : /* The type of operation the SLP instance is performing. */
451 : slp_instance_kind kind;
452 :
453 : dump_user_location_t location () const;
454 : } *slp_instance;
455 :
456 :
457 : /* Access Functions. */
458 : #define SLP_INSTANCE_TREE(S) (S)->root
459 : #define SLP_INSTANCE_LOADS(S) (S)->loads
460 : #define SLP_INSTANCE_ROOT_STMTS(S) (S)->root_stmts
461 : #define SLP_INSTANCE_REMAIN_DEFS(S) (S)->remain_defs
462 : #define SLP_INSTANCE_KIND(S) (S)->kind
463 :
464 : #define SLP_TREE_CHILDREN(S) (S)->children
465 : #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts
466 : #define SLP_TREE_SCALAR_OPS(S) (S)->ops
467 : #define SLP_TREE_LIVE_LANES(S) (S)->live_lanes
468 : #define SLP_TREE_REF_COUNT(S) (S)->refcnt
469 : #define SLP_TREE_VEC_DEFS(S) (S)->vec_defs
470 : #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
471 : #define SLP_TREE_LANE_PERMUTATION(S) (S)->lane_permutation
472 : #define SLP_TREE_DEF_TYPE(S) (S)->def_type
473 : #define SLP_TREE_VECTYPE(S) (S)->vectype
474 : #define SLP_TREE_REPRESENTATIVE(S) (S)->representative
475 : #define SLP_TREE_LANES(S) (S)->lanes
476 : #define SLP_TREE_CODE(S) (S)->code
477 : #define SLP_TREE_TYPE(S) (S)->type
478 : #define SLP_TREE_GS_SCALE(S) (S)->gs_scale
479 : #define SLP_TREE_GS_BASE(S) (S)->gs_base
480 : #define SLP_TREE_REDUC_IDX(S) (S)->cycle_info.reduc_idx
481 : #define SLP_TREE_PERMUTE_P(S) ((S)->code == VEC_PERM_EXPR)
482 :
483 : inline vect_memory_access_type
484 887940 : SLP_TREE_MEMORY_ACCESS_TYPE (slp_tree node)
485 : {
486 323974 : if (SLP_TREE_TYPE (node) == load_vec_info_type
487 306407 : || SLP_TREE_TYPE (node) == store_vec_info_type)
488 115263 : return static_cast<vect_load_store_data *> (node->data)->memory_access_type;
489 : return VMAT_UNINITIALIZED;
490 : }
491 :
492 : enum vect_partial_vector_style {
493 : vect_partial_vectors_none,
494 : vect_partial_vectors_while_ult,
495 : vect_partial_vectors_avx512,
496 : vect_partial_vectors_len
497 : };
498 :
499 : /* Key for map that records association between
500 : scalar conditions and corresponding loop mask, and
501 : is populated by vect_record_loop_mask. */
502 :
503 : struct scalar_cond_masked_key
504 : {
505 63801 : scalar_cond_masked_key (tree t, unsigned ncopies_)
506 63801 : : ncopies (ncopies_)
507 : {
508 63801 : get_cond_ops_from_tree (t);
509 : }
510 :
511 : void get_cond_ops_from_tree (tree);
512 :
513 : unsigned ncopies;
514 : bool inverted_p;
515 : tree_code code;
516 : tree op0;
517 : tree op1;
518 : };
519 :
520 : template<>
521 : struct default_hash_traits<scalar_cond_masked_key>
522 : {
523 : typedef scalar_cond_masked_key compare_type;
524 : typedef scalar_cond_masked_key value_type;
525 :
526 : static inline hashval_t
527 72087 : hash (value_type v)
528 : {
529 72087 : inchash::hash h;
530 72087 : h.add_int (v.code);
531 72087 : inchash::add_expr (v.op0, h, 0);
532 72087 : inchash::add_expr (v.op1, h, 0);
533 72087 : h.add_int (v.ncopies);
534 72087 : h.add_flag (v.inverted_p);
535 72087 : return h.end ();
536 : }
537 :
538 : static inline bool
539 10173 : equal (value_type existing, value_type candidate)
540 : {
541 10173 : return (existing.ncopies == candidate.ncopies
542 9957 : && existing.code == candidate.code
543 6471 : && existing.inverted_p == candidate.inverted_p
544 5007 : && operand_equal_p (existing.op0, candidate.op0, 0)
545 13309 : && operand_equal_p (existing.op1, candidate.op1, 0));
546 : }
547 :
548 : static const bool empty_zero_p = true;
549 :
550 : static inline void
551 0 : mark_empty (value_type &v)
552 : {
553 0 : v.ncopies = 0;
554 0 : v.inverted_p = false;
555 : }
556 :
557 : static inline bool
558 9162293 : is_empty (value_type v)
559 : {
560 9099665 : return v.ncopies == 0;
561 : }
562 :
563 : static inline void mark_deleted (value_type &) {}
564 :
565 : static inline bool is_deleted (const value_type &)
566 : {
567 : return false;
568 : }
569 :
570 55328 : static inline void remove (value_type &) {}
571 : };
572 :
573 : typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
574 :
575 : /* Key and map that records association between vector conditions and
576 : corresponding loop mask, and is populated by prepare_vec_mask. */
577 :
578 : typedef pair_hash<tree_operand_hash, tree_operand_hash> tree_cond_mask_hash;
579 : typedef hash_set<tree_cond_mask_hash> vec_cond_masked_set_type;
580 :
581 : /* Describes two objects whose addresses must be unequal for the vectorized
582 : loop to be valid. */
583 : typedef std::pair<tree, tree> vec_object_pair;
584 :
585 : /* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE.
586 : UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */
587 : class vec_lower_bound {
588 : public:
589 : vec_lower_bound () {}
590 1764 : vec_lower_bound (tree e, bool u, poly_uint64 m)
591 1764 : : expr (e), unsigned_p (u), min_value (m) {}
592 :
593 : tree expr;
594 : bool unsigned_p;
595 : poly_uint64 min_value;
596 : };
597 :
598 : /* Vectorizer state shared between different analyses like vector sizes
599 : of the same CFG region. */
600 : class vec_info_shared {
601 : public:
602 : vec_info_shared();
603 : ~vec_info_shared();
604 :
605 : void save_datarefs();
606 : void check_datarefs();
607 :
608 : /* All data references. Freed by free_data_refs, so not an auto_vec. */
609 : vec<data_reference_p> datarefs;
610 : vec<data_reference> datarefs_copy;
611 :
612 : /* The loop nest in which the data dependences are computed. */
613 : auto_vec<loop_p> loop_nest;
614 :
615 : /* All data dependences. Freed by free_dependence_relations, so not
616 : an auto_vec. */
617 : vec<ddr_p> ddrs;
618 : };
619 :
620 : /* Vectorizer state common between loop and basic-block vectorization. */
621 : class vec_info {
622 : public:
623 : typedef hash_set<int_hash<machine_mode, E_VOIDmode, E_BLKmode> > mode_set;
624 : enum vec_kind { bb, loop };
625 :
626 : vec_info (vec_kind, vec_info_shared *);
627 : ~vec_info ();
628 :
629 : stmt_vec_info add_stmt (gimple *);
630 : stmt_vec_info add_pattern_stmt (gimple *, stmt_vec_info);
631 : stmt_vec_info resync_stmt_addr (gimple *);
632 : stmt_vec_info lookup_stmt (gimple *);
633 : stmt_vec_info lookup_def (tree);
634 : stmt_vec_info lookup_single_use (tree);
635 : class dr_vec_info *lookup_dr (data_reference *);
636 : void move_dr (stmt_vec_info, stmt_vec_info);
637 : void remove_stmt (stmt_vec_info);
638 : void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *);
639 : void insert_on_entry (stmt_vec_info, gimple *);
640 : void insert_seq_on_entry (stmt_vec_info, gimple_seq);
641 :
642 : /* The type of vectorization. */
643 : vec_kind kind;
644 :
645 : /* Shared vectorizer state. */
646 : vec_info_shared *shared;
647 :
648 : /* The mapping of GIMPLE UID to stmt_vec_info. */
649 : vec<stmt_vec_info> stmt_vec_infos;
650 : /* Whether the above mapping is complete. */
651 : bool stmt_vec_info_ro;
652 :
653 : /* Whether we've done a transform we think OK to not update virtual
654 : SSA form. */
655 : bool any_known_not_updated_vssa;
656 :
657 : /* The SLP graph. */
658 : auto_vec<slp_instance> slp_instances;
659 :
660 : /* Maps base addresses to an innermost_loop_behavior that gives the maximum
661 : known alignment for that base. */
662 : vec_base_alignments base_alignments;
663 :
664 : /* All interleaving chains of stores, represented by the first
665 : stmt in the chain. */
666 : auto_vec<stmt_vec_info> grouped_stores;
667 :
668 : /* The set of vector modes used in the vectorized region. */
669 : mode_set used_vector_modes;
670 :
671 : /* The argument we should pass to related_vector_mode when looking up
672 : the vector mode for a scalar mode, or VOIDmode if we haven't yet
673 : made any decisions about which vector modes to use. */
674 : machine_mode vector_mode;
675 :
676 : /* The basic blocks in the vectorization region. For _loop_vec_info,
677 : the memory is internally managed, while for _bb_vec_info, it points
678 : to element space of an external auto_vec<>. This inconsistency is
679 : not a good class design pattern. TODO: improve it with an unified
680 : auto_vec<> whose lifetime is confined to vec_info object. */
681 : basic_block *bbs;
682 :
683 : /* The count of the basic blocks in the vectorization region. */
684 : unsigned int nbbs;
685 :
686 : /* Used to keep a sequence of def stmts of a pattern stmt that are loop
687 : invariant if they exists.
688 : The sequence is emitted in the loop preheader should the loop be vectorized
689 : and are reset when undoing patterns. */
690 : gimple_seq inv_pattern_def_seq;
691 :
692 : private:
693 : stmt_vec_info new_stmt_vec_info (gimple *stmt);
694 : void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true);
695 : void free_stmt_vec_infos ();
696 : void free_stmt_vec_info (stmt_vec_info);
697 : };
698 :
699 : class _loop_vec_info;
700 : class _bb_vec_info;
701 :
702 : template<>
703 : template<>
704 : inline bool
705 370856969 : is_a_helper <_loop_vec_info *>::test (vec_info *i)
706 : {
707 370243736 : return i->kind == vec_info::loop;
708 : }
709 :
710 : template<>
711 : template<>
712 : inline bool
713 70558771 : is_a_helper <_bb_vec_info *>::test (vec_info *i)
714 : {
715 70558771 : return i->kind == vec_info::bb;
716 : }
717 :
718 : /* In general, we can divide the vector statements in a vectorized loop
719 : into related groups ("rgroups") and say that for each rgroup there is
720 : some nS such that the rgroup operates on nS values from one scalar
721 : iteration followed by nS values from the next. That is, if VF is the
722 : vectorization factor of the loop, the rgroup operates on a sequence:
723 :
724 : (1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS)
725 :
726 : where (i,j) represents a scalar value with index j in a scalar
727 : iteration with index i.
728 :
729 : [ We use the term "rgroup" to emphasise that this grouping isn't
730 : necessarily the same as the grouping of statements used elsewhere.
731 : For example, if we implement a group of scalar loads using gather
732 : loads, we'll use a separate gather load for each scalar load, and
733 : thus each gather load will belong to its own rgroup. ]
734 :
735 : In general this sequence will occupy nV vectors concatenated
736 : together. If these vectors have nL lanes each, the total number
737 : of scalar values N is given by:
738 :
739 : N = nS * VF = nV * nL
740 :
741 : None of nS, VF, nV and nL are required to be a power of 2. nS and nV
742 : are compile-time constants but VF and nL can be variable (if the target
743 : supports variable-length vectors).
744 :
745 : In classical vectorization, each iteration of the vector loop would
746 : handle exactly VF iterations of the original scalar loop. However,
747 : in vector loops that are able to operate on partial vectors, a
748 : particular iteration of the vector loop might handle fewer than VF
749 : iterations of the scalar loop. The vector lanes that correspond to
750 : iterations of the scalar loop are said to be "active" and the other
751 : lanes are said to be "inactive".
752 :
753 : In such vector loops, many rgroups need to be controlled to ensure
754 : that they have no effect for the inactive lanes. Conceptually, each
755 : such rgroup needs a sequence of booleans in the same order as above,
756 : but with each (i,j) replaced by a boolean that indicates whether
757 : iteration i is active. This sequence occupies nV vector controls
758 : that again have nL lanes each. Thus the control sequence as a whole
759 : consists of VF independent booleans that are each repeated nS times.
760 :
761 : Taking mask-based approach as a partially-populated vectors example.
762 : We make the simplifying assumption that if a sequence of nV masks is
763 : suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by
764 : VIEW_CONVERTing it. This holds for all current targets that support
765 : fully-masked loops. For example, suppose the scalar loop is:
766 :
767 : float *f;
768 : double *d;
769 : for (int i = 0; i < n; ++i)
770 : {
771 : f[i * 2 + 0] += 1.0f;
772 : f[i * 2 + 1] += 2.0f;
773 : d[i] += 3.0;
774 : }
775 :
776 : and suppose that vectors have 256 bits. The vectorized f accesses
777 : will belong to one rgroup and the vectorized d access to another:
778 :
779 : f rgroup: nS = 2, nV = 1, nL = 8
780 : d rgroup: nS = 1, nV = 1, nL = 4
781 : VF = 4
782 :
783 : [ In this simple example the rgroups do correspond to the normal
784 : SLP grouping scheme. ]
785 :
786 : If only the first three lanes are active, the masks we need are:
787 :
788 : f rgroup: 1 1 | 1 1 | 1 1 | 0 0
789 : d rgroup: 1 | 1 | 1 | 0
790 :
791 : Here we can use a mask calculated for f's rgroup for d's, but not
792 : vice versa.
793 :
794 : Thus for each value of nV, it is enough to provide nV masks, with the
795 : mask being calculated based on the highest nL (or, equivalently, based
796 : on the highest nS) required by any rgroup with that nV. We therefore
797 : represent the entire collection of masks as a two-level table, with the
798 : first level being indexed by nV - 1 (since nV == 0 doesn't exist) and
799 : the second being indexed by the mask index 0 <= i < nV. */
800 :
801 : /* The controls (like masks or lengths) needed by rgroups with nV vectors,
802 : according to the description above. */
803 : struct rgroup_controls {
804 : /* The largest nS for all rgroups that use these controls.
805 : For vect_partial_vectors_avx512 this is the constant nscalars_per_iter
806 : for all members of the group. */
807 : unsigned int max_nscalars_per_iter;
808 :
809 : /* For the largest nS recorded above, the loop controls divide each scalar
810 : into FACTOR equal-sized pieces. This is useful if we need to split
811 : element-based accesses into byte-based accesses.
812 : For vect_partial_vectors_avx512 this records nV instead. */
813 : unsigned int factor;
814 :
815 : /* This is a vector type with MAX_NSCALARS_PER_ITER * VF / nV elements.
816 : For mask-based controls, it is the type of the masks in CONTROLS.
817 : For length-based controls, it can be any vector type that has the
818 : specified number of elements; the type of the elements doesn't matter. */
819 : tree type;
820 :
821 : /* When there is no uniformly used LOOP_VINFO_RGROUP_COMPARE_TYPE this
822 : is the rgroup specific type used. */
823 : tree compare_type;
824 :
825 : /* A vector of nV controls, in iteration order. */
826 : vec<tree> controls;
827 :
828 : /* In case of len_load and len_store with a bias there is only one
829 : rgroup. This holds the adjusted loop length for the this rgroup. */
830 : tree bias_adjusted_ctrl;
831 : };
832 :
833 575326 : struct vec_loop_masks
834 : {
835 506162 : bool is_empty () const { return mask_set.is_empty (); }
836 :
837 : /* Set to record vectype, nvector pairs. */
838 : hash_set<pair_hash <nofree_ptr_hash <tree_node>,
839 : int_hash<unsigned, 0>>> mask_set;
840 :
841 : /* rgroup_controls used for the partial vector scheme. */
842 : auto_vec<rgroup_controls> rgc_vec;
843 : };
844 :
845 : typedef auto_vec<rgroup_controls> vec_loop_lens;
846 :
847 : typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;
848 :
849 : /* Abstraction around info on reductions which is still in stmt_vec_info
850 : but will be duplicated or moved elsewhere. */
851 203618 : class vect_reduc_info_s
852 : {
853 : public:
854 : /* The def type of the main reduction PHI, vect_reduction_def or
855 : vect_double_reduction_def. */
856 : enum vect_def_type def_type;
857 :
858 : /* The reduction type as detected by
859 : vect_is_simple_reduction and vectorizable_reduction. */
860 : enum vect_reduction_type reduc_type;
861 :
862 : /* The original scalar reduction code, to be used in the epilogue. */
863 : code_helper reduc_code;
864 :
865 : /* A vector internal function we should use in the epilogue. */
866 : internal_fn reduc_fn;
867 :
868 : /* For loop reduction with multiple vectorized results (ncopies > 1), a
869 : lane-reducing operation participating in it may not use all of those
870 : results, this field specifies result index starting from which any
871 : following land-reducing operation would be assigned to. */
872 : unsigned int reduc_result_pos;
873 :
874 : /* Whether this represents a reduction chain. */
875 : bool is_reduc_chain;
876 :
877 : /* Whether we force a single cycle PHI during reduction vectorization. */
878 : bool force_single_cycle;
879 :
880 : /* The vector type for performing the actual reduction operation. */
881 : tree reduc_vectype;
882 :
883 : /* The vector type we should use for the final reduction in the epilogue
884 : when we reduce a mask. */
885 : tree reduc_vectype_for_mask;
886 :
887 : /* The neutral operand to use, if any. */
888 : tree neutral_op;
889 :
890 : /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */
891 : tree induc_cond_initial_val;
892 :
893 : /* If not NULL the value to be added to compute final reduction value. */
894 : tree reduc_epilogue_adjustment;
895 :
896 : /* If non-null, the reduction is being performed by an epilogue loop
897 : and we have decided to reuse this accumulator from the main loop. */
898 : struct vect_reusable_accumulator *reused_accumulator;
899 :
900 : /* If the vector code is performing N scalar reductions in parallel,
901 : this variable gives the initial scalar values of those N reductions. */
902 : auto_vec<tree> reduc_initial_values;
903 :
904 : /* If the vector code is performing N scalar reductions in parallel, this
905 : variable gives the vectorized code's final (scalar) result for each of
906 : those N reductions. In other words, REDUC_SCALAR_RESULTS[I] replaces
907 : the original scalar code's loop-closed SSA PHI for reduction number I. */
908 : auto_vec<tree> reduc_scalar_results;
909 : };
910 :
911 : typedef class vect_reduc_info_s *vect_reduc_info;
912 :
913 : #define VECT_REDUC_INFO_DEF_TYPE(I) ((I)->def_type)
914 : #define VECT_REDUC_INFO_TYPE(I) ((I)->reduc_type)
915 : #define VECT_REDUC_INFO_CODE(I) ((I)->reduc_code)
916 : #define VECT_REDUC_INFO_FN(I) ((I)->reduc_fn)
917 : #define VECT_REDUC_INFO_SCALAR_RESULTS(I) ((I)->reduc_scalar_results)
918 : #define VECT_REDUC_INFO_INITIAL_VALUES(I) ((I)->reduc_initial_values)
919 : #define VECT_REDUC_INFO_REUSED_ACCUMULATOR(I) ((I)->reused_accumulator)
920 : #define VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL(I) ((I)->induc_cond_initial_val)
921 : #define VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT(I) ((I)->reduc_epilogue_adjustment)
922 : #define VECT_REDUC_INFO_VECTYPE(I) ((I)->reduc_vectype)
923 : #define VECT_REDUC_INFO_VECTYPE_FOR_MASK(I) ((I)->reduc_vectype_for_mask)
924 : #define VECT_REDUC_INFO_FORCE_SINGLE_CYCLE(I) ((I)->force_single_cycle)
925 : #define VECT_REDUC_INFO_RESULT_POS(I) ((I)->reduc_result_pos)
926 : #define VECT_REDUC_INFO_NEUTRAL_OP(I) ((I)->neutral_op)
927 :
928 : /* Information about a reduction accumulator from the main loop that could
929 : conceivably be reused as the input to a reduction in an epilogue loop. */
930 : struct vect_reusable_accumulator {
931 : /* The final value of the accumulator, which forms the input to the
932 : reduction operation. */
933 : tree reduc_input;
934 :
935 : /* The stmt_vec_info that describes the reduction (i.e. the one for
936 : which is_reduc_info is true). */
937 : vect_reduc_info reduc_info;
938 : };
939 :
940 : /*-----------------------------------------------------------------*/
941 : /* Info on vectorized loops. */
942 : /*-----------------------------------------------------------------*/
943 : typedef class _loop_vec_info : public vec_info {
944 : public:
945 : _loop_vec_info (class loop *, vec_info_shared *);
946 : ~_loop_vec_info ();
947 :
948 : /* The loop to which this info struct refers to. */
949 : class loop *loop;
950 :
951 : /* Number of latch executions. */
952 : tree num_itersm1;
953 : /* Number of iterations. */
954 : tree num_iters;
955 : /* Number of iterations of the original loop. */
956 : tree num_iters_unchanged;
957 : /* Condition under which this loop is analyzed and versioned. */
958 : tree num_iters_assumptions;
959 :
960 : /* The cost of the vector code. */
961 : class vector_costs *vector_costs;
962 :
963 : /* The cost of the scalar code. */
964 : class vector_costs *scalar_costs;
965 :
966 : /* Threshold of number of iterations below which vectorization will not be
967 : performed. It is calculated from MIN_PROFITABLE_ITERS and
968 : param_min_vect_loop_bound. */
969 : unsigned int th;
970 :
971 : /* When applying loop versioning, the vector form should only be used
972 : if the number of scalar iterations is >= this value, on top of all
973 : the other requirements. Ignored when loop versioning is not being
974 : used. */
975 : poly_uint64 versioning_threshold;
976 :
977 : /* Unrolling factor. In case of suitable super-word parallelism
978 : it can be that no unrolling is needed, and thus this is 1. */
979 : poly_uint64 vectorization_factor;
980 :
981 : /* If this loop is an epilogue loop whose main loop can be skipped,
982 : MAIN_LOOP_EDGE is the edge from the main loop to this loop's
983 : preheader. SKIP_MAIN_LOOP_EDGE is then the edge that skips the
984 : main loop and goes straight to this loop's preheader.
985 :
986 : Both fields are null otherwise. */
987 : edge main_loop_edge;
988 : edge skip_main_loop_edge;
989 :
990 : /* If this loop is an epilogue loop that might be skipped after executing
991 : the main loop, this edge is the one that skips the epilogue. */
992 : edge skip_this_loop_edge;
993 :
994 : /* Reduction descriptors of this loop. Referenced to from SLP nodes
995 : by index. */
996 : auto_vec<vect_reduc_info> reduc_infos;
997 :
998 : /* The vectorized form of a standard reduction replaces the original
999 : scalar code's final result (a loop-closed SSA PHI) with the result
1000 : of a vector-to-scalar reduction operation. After vectorization,
1001 : this variable maps these vector-to-scalar results to information
1002 : about the reductions that generated them. */
1003 : hash_map<tree, vect_reusable_accumulator> reusable_accumulators;
1004 :
1005 : /* The number of times that the target suggested we unroll the vector loop
1006 : in order to promote more ILP. This value will be used to re-analyze the
1007 : loop for vectorization and if successful the value will be folded into
1008 : vectorization_factor (and therefore exactly divides
1009 : vectorization_factor). */
1010 : unsigned int suggested_unroll_factor;
1011 :
1012 : /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR
1013 : if there is no particular limit. */
1014 : unsigned HOST_WIDE_INT max_vectorization_factor;
1015 :
1016 : /* The masks that a fully-masked loop should use to avoid operating
1017 : on inactive scalars. */
1018 : vec_loop_masks masks;
1019 :
1020 : /* The lengths that a loop with length should use to avoid operating
1021 : on inactive scalars. */
1022 : vec_loop_lens lens;
1023 :
1024 : /* Set of scalar conditions that have loop mask applied. */
1025 : scalar_cond_masked_set_type scalar_cond_masked_set;
1026 :
1027 : /* Set of vector conditions that have loop mask applied. */
1028 : vec_cond_masked_set_type vec_cond_masked_set;
1029 :
1030 : /* If we are using a loop mask to align memory addresses, this variable
1031 : contains the number of vector elements that we should skip in the
1032 : first iteration of the vector loop (i.e. the number of leading
1033 : elements that should be false in the first mask). */
1034 : tree mask_skip_niters;
1035 :
1036 : /* If we are using a loop mask to align memory addresses and we're in an
1037 : early break loop then this variable contains the number of elements that
1038 : were skipped during the initial iteration of the loop. */
1039 : tree mask_skip_niters_pfa_offset;
1040 :
1041 : /* The type that the loop control IV should be converted to before
1042 : testing which of the VF scalars are active and inactive.
1043 : Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
1044 : tree rgroup_compare_type;
1045 :
1046 : /* For #pragma omp simd if (x) loops the x expression. If constant 0,
1047 : the loop should not be vectorized, if constant non-zero, simd_if_cond
1048 : shouldn't be set and loop vectorized normally, if SSA_NAME, the loop
1049 : should be versioned on that condition, using scalar loop if the condition
1050 : is false and vectorized loop otherwise. */
1051 : tree simd_if_cond;
1052 :
1053 : /* The type that the vector loop control IV should have when
1054 : LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */
1055 : tree rgroup_iv_type;
1056 :
1057 : /* The style used for implementing partial vectors when
1058 : LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */
1059 : vect_partial_vector_style partial_vector_style;
1060 :
1061 : /* Unknown DRs according to which loop was peeled. */
1062 : class dr_vec_info *unaligned_dr;
1063 :
1064 : /* peeling_for_alignment indicates whether peeling for alignment will take
1065 : place, and what the peeling factor should be:
1066 : peeling_for_alignment = X means:
1067 : If X=0: Peeling for alignment will not be applied.
1068 : If X>0: Peel first X iterations.
1069 : If X=-1: Generate a runtime test to calculate the number of iterations
1070 : to be peeled, using the dataref recorded in the field
1071 : unaligned_dr. */
1072 : int peeling_for_alignment;
1073 :
1074 : /* The mask used to check the alignment of pointers or arrays. */
1075 : poly_uint64 ptr_mask;
1076 :
1077 : /* The maximum speculative read amount in VLA modes for runtime check. */
1078 : poly_uint64 max_spec_read_amount;
1079 :
1080 : /* Indicates whether the loop has any non-linear IV. */
1081 : bool nonlinear_iv;
1082 :
1083 : /* Data Dependence Relations defining address ranges that are candidates
1084 : for a run-time aliasing check. */
1085 : auto_vec<ddr_p> may_alias_ddrs;
1086 :
1087 : /* Data Dependence Relations defining address ranges together with segment
1088 : lengths from which the run-time aliasing check is built. */
1089 : auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs;
1090 :
1091 : /* Check that the addresses of each pair of objects is unequal. */
1092 : auto_vec<vec_object_pair> check_unequal_addrs;
1093 :
1094 : /* List of values that are required to be nonzero. This is used to check
1095 : whether things like "x[i * n] += 1;" are safe and eventually gets added
1096 : to the checks for lower bounds below. */
1097 : auto_vec<tree> check_nonzero;
1098 :
1099 : /* List of values that need to be checked for a minimum value. */
1100 : auto_vec<vec_lower_bound> lower_bounds;
1101 :
1102 : /* Statements in the loop that have data references that are candidates for a
1103 : runtime (loop versioning) misalignment check. */
1104 : auto_vec<stmt_vec_info> may_misalign_stmts;
1105 :
1106 : /* Reduction cycles detected in the loop. Used in loop-aware SLP. */
1107 : auto_vec<stmt_vec_info> reductions;
1108 :
1109 : /* Defs that could not be analyzed such as OMP SIMD calls without
1110 : a LHS. */
1111 : auto_vec<stmt_vec_info> alternate_defs;
1112 :
1113 : /* Cost vector for a single scalar iteration. */
1114 : auto_vec<stmt_info_for_cost> scalar_cost_vec;
1115 :
1116 : /* Map of IV base/step expressions to inserted name in the preheader. */
1117 : hash_map<tree_operand_hash, tree> *ivexpr_map;
1118 :
1119 : /* Map of OpenMP "omp simd array" scan variables to corresponding
1120 : rhs of the store of the initializer. */
1121 : hash_map<tree, tree> *scan_map;
1122 :
1123 : /* The factor used to over weight those statements in an inner loop
1124 : relative to the loop being vectorized. */
1125 : unsigned int inner_loop_cost_factor;
1126 :
1127 : /* Is the loop vectorizable? */
1128 : bool vectorizable;
1129 :
1130 : /* Records whether we still have the option of vectorizing this loop
1131 : using partially-populated vectors; in other words, whether it is
1132 : still possible for one iteration of the vector loop to handle
1133 : fewer than VF scalars. */
1134 : bool can_use_partial_vectors_p;
1135 :
1136 : /* Records whether we must use niter masking for correctness reasons. */
1137 : bool must_use_partial_vectors_p;
1138 :
1139 : /* True if we've decided to use partially-populated vectors, so that
1140 : the vector loop can handle fewer than VF scalars. */
1141 : bool using_partial_vectors_p;
1142 :
1143 : /* True if we've decided to use a decrementing loop control IV that counts
1144 : scalars. This can be done for any loop that:
1145 :
1146 : (a) uses length "controls"; and
1147 : (b) can iterate more than once. */
1148 : bool using_decrementing_iv_p;
1149 :
1150 : /* True if we've decided to use output of select_vl to adjust IV of
1151 : both loop control and data reference pointer. This is only true
1152 : for single-rgroup control. */
1153 : bool using_select_vl_p;
1154 :
1155 : /* True if we've decided to use peeling with versioning together, which allows
1156 : unaligned unsupported data refs to be uniformly aligned after a certain
1157 : amount of peeling (mutual alignment). Otherwise, we use versioning alone
1158 : so these data refs must be already aligned to a power-of-two boundary
1159 : without peeling. */
1160 : bool allow_mutual_alignment;
1161 :
1162 : /* The bias for len_load and len_store. For now, only 0 and -1 are
1163 : supported. -1 must be used when a backend does not support
1164 : len_load/len_store with a length of zero. */
1165 : signed char partial_load_store_bias;
1166 :
1167 : /* When we have grouped data accesses with gaps, we may introduce invalid
1168 : memory accesses. We peel the last iteration of the loop to prevent
1169 : this. */
1170 : bool peeling_for_gaps;
1171 :
1172 : /* When the number of iterations is not a multiple of the vector size
1173 : we need to peel off iterations at the end to form an epilogue loop. */
1174 : bool peeling_for_niter;
1175 :
1176 : /* When the loop has early breaks that we can vectorize we need to peel
1177 : the loop for the break finding loop. */
1178 : bool early_breaks;
1179 :
1180 : /* List of loop additional IV conditionals found in the loop. */
1181 : auto_vec<gcond *> conds;
1182 :
1183 : /* Main loop IV cond. */
1184 : gcond* loop_iv_cond;
1185 :
1186 : /* True if we have an unroll factor requested by the user through pragma GCC
1187 : unroll. */
1188 : bool user_unroll;
1189 :
1190 : /* True if there are no loop carried data dependencies in the loop.
1191 : If loop->safelen <= 1, then this is always true, either the loop
1192 : didn't have any loop carried data dependencies, or the loop is being
1193 : vectorized guarded with some runtime alias checks, or couldn't
1194 : be vectorized at all, but then this field shouldn't be used.
1195 : For loop->safelen >= 2, the user has asserted that there are no
1196 : backward dependencies, but there still could be loop carried forward
1197 : dependencies in such loops. This flag will be false if normal
1198 : vectorizer data dependency analysis would fail or require versioning
1199 : for alias, but because of loop->safelen >= 2 it has been vectorized
1200 : even without versioning for alias. E.g. in:
1201 : #pragma omp simd
1202 : for (int i = 0; i < m; i++)
1203 : a[i] = a[i + k] * c;
1204 : (or #pragma simd or #pragma ivdep) we can vectorize this and it will
1205 : DTRT even for k > 0 && k < m, but without safelen we would not
1206 : vectorize this, so this field would be false. */
1207 : bool no_data_dependencies;
1208 :
1209 : /* Mark loops having masked stores. */
1210 : bool has_mask_store;
1211 :
1212 : /* Queued scaling factor for the scalar loop. */
1213 : profile_probability scalar_loop_scaling;
1214 :
1215 : /* If if-conversion versioned this loop before conversion, this is the
1216 : loop version without if-conversion. */
1217 : class loop *scalar_loop;
1218 :
1219 : /* For loops being epilogues of already vectorized loops
1220 : this points to the main vectorized loop. Otherwise NULL. */
1221 : _loop_vec_info *main_loop_info;
1222 :
1223 : /* For loops being epilogues of already vectorized loops
1224 : this points to the preceding vectorized (possibly epilogue) loop.
1225 : Otherwise NULL. */
1226 : _loop_vec_info *orig_loop_info;
1227 :
1228 : /* Used to store loop_vec_infos of the epilogue of this loop during
1229 : analysis. */
1230 : _loop_vec_info *epilogue_vinfo;
1231 :
1232 : /* If this is an epilogue loop the DR advancement applied. */
1233 : tree drs_advanced_by;
1234 :
1235 : /* The controlling loop exit for the current loop when vectorizing.
1236 : For counted loops, this IV controls the natural exits of the loop. */
1237 : edge vec_loop_main_exit;
1238 :
1239 : /* The controlling loop exit for the epilogue loop when vectorizing.
1240 : For counted loops, this IV controls the natural exits of the loop. */
1241 : edge vec_epilogue_loop_main_exit;
1242 :
1243 : /* The controlling loop exit for the scalar loop being vectorized.
1244 : For counted loops, this IV controls the natural exits of the loop. */
1245 : edge scalar_loop_main_exit;
1246 :
1247 : /* Indicate if the multiple exit loop has any side-effects that require it to
1248 : have a scalar epilogue. */
1249 : bool early_break_needs_epilogue;
1250 :
1251 : /* Used to store the list of stores needing to be moved if doing early
1252 : break vectorization as they would violate the scalar loop semantics if
1253 : vectorized in their current location. These are stored in order that they
1254 : need to be moved. */
1255 : auto_vec<gimple *> early_break_stores;
1256 :
1257 : /* The final basic block where to move statements to. In the case of
1258 : multiple exits this could be pretty far away. */
1259 : basic_block early_break_dest_bb;
1260 :
1261 : /* Statements whose VUSES need updating if early break vectorization is to
1262 : happen. */
1263 : auto_vec<gimple*> early_break_vuses;
1264 :
1265 : /* The IV adjustment value for inductions that needs to be materialized
1266 : inside the relevant exit blocks in order to adjust for early break. */
1267 : tree early_break_niters_var;
1268 :
1269 : /* The type of the variable to be used to create the scalar IV for early break
1270 : loops. */
1271 : tree early_break_iv_type;
1272 :
1273 : /* Record statements that are needed to be live for early break vectorization
1274 : but may not have an LC PHI node materialized yet in the exits. */
1275 : auto_vec<stmt_vec_info> early_break_live_ivs;
1276 : } *loop_vec_info;
1277 :
1278 : /* Access Functions. */
1279 : #define LOOP_VINFO_LOOP(L) (L)->loop
1280 : #define LOOP_VINFO_MAIN_EXIT(L) (L)->vec_loop_main_exit
1281 : #define LOOP_VINFO_EPILOGUE_MAIN_EXIT(L) (L)->vec_epilogue_loop_main_exit
1282 : #define LOOP_VINFO_SCALAR_MAIN_EXIT(L) (L)->scalar_loop_main_exit
1283 : #define LOOP_VINFO_BBS(L) (L)->bbs
1284 : #define LOOP_VINFO_NBBS(L) (L)->nbbs
1285 : #define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1
1286 : #define LOOP_VINFO_NITERS(L) (L)->num_iters
1287 : #define LOOP_VINFO_NITERS_UNCOUNTED_P(L) (LOOP_VINFO_NITERS (L) \
1288 : == chrec_dont_know)
1289 : /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after
1290 : prologue peeling retain total unchanged scalar loop iterations for
1291 : cost model. */
1292 : #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged
1293 : #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions
1294 : #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th
1295 : #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
1296 : #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
1297 : #define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
1298 : #define LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P(L) (L)->must_use_partial_vectors_p
1299 : #define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
1300 : #define LOOP_VINFO_USING_DECREMENTING_IV_P(L) (L)->using_decrementing_iv_p
1301 : #define LOOP_VINFO_USING_SELECT_VL_P(L) (L)->using_select_vl_p
1302 : #define LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT(L) (L)->allow_mutual_alignment
1303 : #define LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS(L) (L)->partial_load_store_bias
1304 : #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor
1305 : #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor
1306 : #define LOOP_VINFO_MASKS(L) (L)->masks
1307 : #define LOOP_VINFO_LENS(L) (L)->lens
1308 : #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters
1309 : #define LOOP_VINFO_MASK_NITERS_PFA_OFFSET(L) (L)->mask_skip_niters_pfa_offset
1310 : #define LOOP_VINFO_RGROUP_COMPARE_TYPE(L) (L)->rgroup_compare_type
1311 : #define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type
1312 : #define LOOP_VINFO_PARTIAL_VECTORS_STYLE(L) (L)->partial_vector_style
1313 : #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask
1314 : #define LOOP_VINFO_MAX_SPEC_READ_AMOUNT(L) (L)->max_spec_read_amount
1315 : #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest
1316 : #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs
1317 : #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs
1318 : #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
1319 : #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
1320 : #define LOOP_VINFO_NON_LINEAR_IV(L) (L)->nonlinear_iv
1321 : #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
1322 : #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
1323 : #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
1324 : #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs
1325 : #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs
1326 : #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero
1327 : #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds
1328 : #define LOOP_VINFO_USER_UNROLL(L) (L)->user_unroll
1329 : #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores
1330 : #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
1331 : #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
1332 : #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
1333 : #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
1334 : #define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks
1335 : #define LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG(L) (L)->early_break_needs_epilogue
1336 : #define LOOP_VINFO_EARLY_BRK_STORES(L) (L)->early_break_stores
1337 : #define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \
1338 : ((single_pred ((L)->loop->latch) != (L)->vec_loop_main_exit->src) \
1339 : || LOOP_VINFO_NITERS_UNCOUNTED_P (L))
1340 : #define LOOP_VINFO_EARLY_BREAKS_LIVE_IVS(L) \
1341 : (L)->early_break_live_ivs
1342 : #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
1343 : #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
1344 : #define LOOP_VINFO_EARLY_BRK_NITERS_VAR(L) (L)->early_break_niters_var
1345 : #define LOOP_VINFO_EARLY_BRK_IV_TYPE(L) (L)->early_break_iv_type
1346 : #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
1347 : #define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond
1348 : #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
1349 : #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop
1350 : #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling
1351 : #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store
1352 : #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
1353 : #define LOOP_VINFO_MAIN_LOOP_INFO(L) (L)->main_loop_info
1354 : #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
1355 : #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond
1356 : #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
1357 : #define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L) (L)->inv_pattern_def_seq
1358 : #define LOOP_VINFO_DRS_ADVANCED_BY(L) (L)->drs_advanced_by
1359 : #define LOOP_VINFO_ALTERNATE_DEFS(L) (L)->alternate_defs
1360 :
1361 : #define LOOP_VINFO_FULLY_MASKED_P(L) \
1362 : (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
1363 : && !LOOP_VINFO_MASKS (L).is_empty ())
1364 :
1365 : #define LOOP_VINFO_FULLY_WITH_LENGTH_P(L) \
1366 : (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
1367 : && !LOOP_VINFO_LENS (L).is_empty ())
1368 :
1369 : #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
1370 : ((L)->may_misalign_stmts.length () > 0)
1371 : #define LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ(L) \
1372 : (maybe_gt ((L)->max_spec_read_amount, 0U))
1373 : #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
1374 : ((L)->comp_alias_ddrs.length () > 0 \
1375 : || (L)->check_unequal_addrs.length () > 0 \
1376 : || (L)->lower_bounds.length () > 0)
1377 : #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \
1378 : (LOOP_VINFO_NITERS_ASSUMPTIONS (L))
1379 : #define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L) \
1380 : (LOOP_VINFO_SIMD_IF_COND (L))
1381 : #define LOOP_REQUIRES_VERSIONING(L) \
1382 : (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \
1383 : || LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ (L) \
1384 : || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \
1385 : || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \
1386 : || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
1387 :
1388 : #define LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING(L) \
1389 : ((L)->may_misalign_stmts.length () > 0 \
1390 : && !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (L))
1391 :
1392 : #define LOOP_VINFO_NITERS_KNOWN_P(L) \
1393 : (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
1394 :
1395 : #define LOOP_VINFO_EPILOGUE_P(L) \
1396 : (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL)
1397 :
1398 : #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \
1399 : (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L)))
1400 :
1401 : /* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL
1402 : value signifies success, and a NULL value signifies failure, supporting
1403 : propagating an opt_problem * describing the failure back up the call
1404 : stack. */
1405 : typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info;
1406 :
1407 : inline loop_vec_info
1408 533288 : loop_vec_info_for_loop (class loop *loop)
1409 : {
1410 533288 : return (loop_vec_info) loop->aux;
1411 : }
1412 :
1413 : struct slp_root
1414 : {
1415 1245057 : slp_root (slp_instance_kind kind_, vec<stmt_vec_info> stmts_,
1416 13097 : vec<stmt_vec_info> roots_, vec<tree> remain_ = vNULL)
1417 1245057 : : kind(kind_), stmts(stmts_), roots(roots_), remain(remain_) {}
1418 : slp_instance_kind kind;
1419 : vec<stmt_vec_info> stmts;
1420 : vec<stmt_vec_info> roots;
1421 : vec<tree> remain;
1422 : };
1423 :
1424 : typedef class _bb_vec_info : public vec_info
1425 : {
1426 : public:
1427 : _bb_vec_info (vec<basic_block> bbs, vec_info_shared *);
1428 : ~_bb_vec_info ();
1429 :
1430 : vec<slp_root> roots;
1431 : } *bb_vec_info;
1432 :
1433 : #define BB_VINFO_BBS(B) (B)->bbs
1434 : #define BB_VINFO_NBBS(B) (B)->nbbs
1435 : #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores
1436 : #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances
1437 : #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs
1438 : #define BB_VINFO_DDRS(B) (B)->shared->ddrs
1439 :
1440 : /* Indicates whether/how a variable is used in the scope of loop/basic
1441 : block. */
1442 : enum vect_relevant {
1443 : vect_unused_in_scope = 0,
1444 :
1445 : /* The def is only used outside the loop. */
1446 : vect_used_only_live,
1447 : /* The def is in the inner loop, and the use is in the outer loop, and the
1448 : use is a reduction stmt. */
1449 : vect_used_in_outer_by_reduction,
1450 : /* The def is in the inner loop, and the use is in the outer loop (and is
1451 : not part of reduction). */
1452 : vect_used_in_outer,
1453 :
1454 : /* defs that feed computations that end up (only) in a reduction. These
1455 : defs may be used by non-reduction stmts, but eventually, any
1456 : computations/values that are affected by these defs are used to compute
1457 : a reduction (i.e. don't get stored to memory, for example). We use this
1458 : to identify computations that we can change the order in which they are
1459 : computed. */
1460 : vect_used_by_reduction,
1461 :
1462 : vect_used_in_scope
1463 : };
1464 :
1465 : /* The type of vectorization. pure_slp means the stmt is covered by the
1466 : SLP graph, not_vect means it is not. This is mostly used by BB
1467 : vectorization. */
1468 : enum slp_vect_type {
1469 : not_vect = 0,
1470 : pure_slp,
1471 : };
1472 :
1473 : /* Says whether a statement is a load, a store of a vectorized statement
1474 : result, or a store of an invariant value. */
1475 : enum vec_load_store_type {
1476 : VLS_LOAD,
1477 : VLS_STORE,
1478 : VLS_STORE_INVARIANT
1479 : };
1480 :
1481 : class dr_vec_info {
1482 : public:
1483 : /* The data reference itself. */
1484 : data_reference *dr;
1485 : /* The statement that contains the data reference. */
1486 : stmt_vec_info stmt;
1487 : /* The analysis group this DR belongs to when doing BB vectorization.
1488 : DRs of the same group belong to the same conditional execution context. */
1489 : unsigned group;
1490 : /* The misalignment in bytes of the reference, or -1 if not known. */
1491 : int misalignment;
1492 : /* The byte alignment that we'd ideally like the reference to have,
1493 : and the value that misalignment is measured against. */
1494 : poly_uint64 target_alignment;
1495 : /* If true the alignment of base_decl needs to be increased. */
1496 : bool base_misaligned;
1497 :
1498 : /* Set by early break vectorization when this DR needs peeling for alignment
1499 : for correctness. */
1500 : bool safe_speculative_read_required;
1501 :
1502 : /* Set by early break vectorization when this DR's scalar accesses are known
1503 : to be inbounds of a known bounds loop. */
1504 : bool scalar_access_known_in_bounds;
1505 :
1506 : tree base_decl;
1507 :
1508 : /* Stores current vectorized loop's offset. To be added to the DR's
1509 : offset to calculate current offset of data reference. */
1510 : tree offset;
1511 : };
1512 :
1513 : typedef struct data_reference *dr_p;
1514 :
1515 : class _stmt_vec_info {
1516 : public:
1517 :
1518 : /* Indicates whether this stmts is part of a computation whose result is
1519 : used outside the loop. */
1520 : bool live;
1521 :
1522 : /* Stmt is part of some pattern (computation idiom) */
1523 : bool in_pattern_p;
1524 :
1525 : /* True if the statement was created during pattern recognition as
1526 : part of the replacement for RELATED_STMT. This implies that the
1527 : statement isn't part of any basic block, although for convenience
1528 : its gimple_bb is the same as for RELATED_STMT. */
1529 : bool pattern_stmt_p;
1530 :
1531 : /* Is this statement vectorizable or should it be skipped in (partial)
1532 : vectorization. */
1533 : bool vectorizable;
1534 :
1535 : /* The stmt to which this info struct refers to. */
1536 : gimple *stmt;
1537 :
1538 : /* The vector type to be used for the LHS of this statement. */
1539 : tree vectype;
1540 :
1541 : /* The following is relevant only for stmts that contain a non-scalar
1542 : data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have
1543 : at most one such data-ref. */
1544 :
1545 : dr_vec_info dr_aux;
1546 :
1547 : /* Information about the data-ref relative to this loop
1548 : nest (the loop that is being considered for vectorization). */
1549 : innermost_loop_behavior dr_wrt_vec_loop;
1550 :
1551 : /* For loop PHI nodes, the base and evolution part of it. This makes sure
1552 : this information is still available in vect_update_ivs_after_vectorizer
1553 : where we may not be able to re-analyze the PHI nodes evolution as
1554 : peeling for the prologue loop can make it unanalyzable. The evolution
1555 : part is still correct after peeling, but the base may have changed from
1556 : the version here. */
1557 : tree loop_phi_evolution_base_unchanged;
1558 : tree loop_phi_evolution_part;
1559 : enum vect_induction_op_type loop_phi_evolution_type;
1560 :
1561 : /* Used for various bookkeeping purposes, generally holding a pointer to
1562 : some other stmt S that is in some way "related" to this stmt.
1563 : Current use of this field is:
1564 : If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is
1565 : true): S is the "pattern stmt" that represents (and replaces) the
1566 : sequence of stmts that constitutes the pattern. Similarly, the
1567 : related_stmt of the "pattern stmt" points back to this stmt (which is
1568 : the last stmt in the original sequence of stmts that constitutes the
1569 : pattern). */
1570 : stmt_vec_info related_stmt;
1571 :
1572 : /* Used to keep a sequence of def stmts of a pattern stmt if such exists.
1573 : The sequence is attached to the original statement rather than the
1574 : pattern statement. */
1575 : gimple_seq pattern_def_seq;
1576 :
1577 : /* Classify the def of this stmt. */
1578 : enum vect_def_type def_type;
1579 :
1580 : /* Whether the stmt is SLPed, loop-based vectorized, or both. */
1581 : enum slp_vect_type slp_type;
1582 :
1583 : /* Interleaving chains info. */
1584 : /* First element in the group. */
1585 : stmt_vec_info first_element;
1586 : /* Pointer to the next element in the group. */
1587 : stmt_vec_info next_element;
1588 : /* The size of the group. */
1589 : unsigned int size;
1590 : /* For loads only, the gap from the previous load. For consecutive loads, GAP
1591 : is 1. */
1592 : unsigned int gap;
1593 :
1594 : /* The minimum negative dependence distance this stmt participates in
1595 : or zero if none. */
1596 : unsigned int min_neg_dist;
1597 :
1598 : /* Not all stmts in the loop need to be vectorized. e.g, the increment
1599 : of the loop induction variable and computation of array indexes. relevant
1600 : indicates whether the stmt needs to be vectorized. */
1601 : enum vect_relevant relevant;
1602 :
1603 : /* For loads if this is a gather, for stores if this is a scatter. */
1604 : bool gather_scatter_p;
1605 :
1606 : /* True if this is an access with loop-invariant stride. */
1607 : bool strided_p;
1608 :
1609 : /* For both loads and stores. */
1610 : unsigned simd_lane_access_p : 3;
1611 :
1612 : /* On a reduction PHI the reduction type as detected by
1613 : vect_is_simple_reduction. */
1614 : enum vect_reduction_type reduc_type;
1615 :
1616 : /* On a reduction PHI, the original reduction code as detected by
1617 : vect_is_simple_reduction. */
1618 : code_helper reduc_code;
1619 :
1620 : /* On a stmt participating in a reduction the index of the operand
1621 : on the reduction SSA cycle. */
1622 : int reduc_idx;
1623 :
1624 : /* On a reduction PHI the def returned by vect_is_simple_reduction.
1625 : On the def returned by vect_is_simple_reduction the corresponding PHI. */
1626 : stmt_vec_info reduc_def;
1627 :
1628 : /* If nonzero, the lhs of the statement could be truncated to this
1629 : many bits without affecting any users of the result. */
1630 : unsigned int min_output_precision;
1631 :
1632 : /* If nonzero, all non-boolean input operands have the same precision,
1633 : and they could each be truncated to this many bits without changing
1634 : the result. */
1635 : unsigned int min_input_precision;
1636 :
1637 : /* If OPERATION_BITS is nonzero, the statement could be performed on
1638 : an integer with the sign and number of bits given by OPERATION_SIGN
1639 : and OPERATION_BITS without changing the result. */
1640 : unsigned int operation_precision;
1641 : signop operation_sign;
1642 :
1643 : /* If the statement produces a boolean result, this value describes
1644 : how we should choose the associated vector type. The possible
1645 : values are:
1646 :
1647 : - an integer precision N if we should use the vector mask type
1648 : associated with N-bit integers. This is only used if all relevant
1649 : input booleans also want the vector mask type for N-bit integers,
1650 : or if we can convert them into that form by pattern-matching.
1651 :
1652 : - ~0U if we considered choosing a vector mask type but decided
1653 : to treat the boolean as a normal integer type instead.
1654 :
1655 : - 0 otherwise. This means either that the operation isn't one that
1656 : could have a vector mask type (and so should have a normal vector
1657 : type instead) or that we simply haven't made a choice either way. */
1658 : unsigned int mask_precision;
1659 :
1660 : /* True if this is only suitable for SLP vectorization. */
1661 : bool slp_vect_only_p;
1662 : };
1663 :
1664 : /* Information about a gather/scatter call. */
1665 : struct gather_scatter_info {
1666 : /* The internal function to use for the gather/scatter operation,
1667 : or IFN_LAST if a built-in function should be used instead. */
1668 : internal_fn ifn;
1669 :
1670 : /* The FUNCTION_DECL for the built-in gather/scatter function,
1671 : or null if an internal function should be used instead. */
1672 : tree decl;
1673 :
1674 : /* The loop-invariant base value. */
1675 : tree base;
1676 :
1677 : /* The TBBA alias pointer the value of which determines the alignment
1678 : of the scalar accesses. */
1679 : tree alias_ptr;
1680 :
1681 : /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */
1682 : tree offset;
1683 :
1684 : /* Each offset element should be multiplied by this amount before
1685 : being added to the base. */
1686 : int scale;
1687 :
1688 : /* The type of the vectorized offset. */
1689 : tree offset_vectype;
1690 :
1691 : /* The type of the scalar elements after loading or before storing. */
1692 : tree element_type;
1693 :
1694 : /* The type of the scalar elements being loaded or stored. */
1695 : tree memory_type;
1696 : };
1697 :
1698 : /* Access Functions. */
1699 : #define STMT_VINFO_STMT(S) (S)->stmt
1700 : #define STMT_VINFO_RELEVANT(S) (S)->relevant
1701 : #define STMT_VINFO_LIVE_P(S) (S)->live
1702 : #define STMT_VINFO_VECTYPE(S) (S)->vectype
1703 : #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
1704 : #define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0)
1705 : #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p
1706 : #define STMT_VINFO_STRIDED_P(S) (S)->strided_p
1707 : #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
1708 : #define STMT_VINFO_REDUC_IDX(S) (S)->reduc_idx
1709 :
1710 : #define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop
1711 : #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address
1712 : #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init
1713 : #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset
1714 : #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step
1715 : #define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment
1716 : #define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \
1717 : (S)->dr_wrt_vec_loop.base_misalignment
1718 : #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \
1719 : (S)->dr_wrt_vec_loop.offset_alignment
1720 : #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \
1721 : (S)->dr_wrt_vec_loop.step_alignment
1722 :
1723 : #define STMT_VINFO_DR_INFO(S) \
1724 : (gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux)
1725 :
1726 : #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
1727 : #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
1728 : #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq
1729 : #define STMT_VINFO_DEF_TYPE(S) (S)->def_type
1730 : #define STMT_VINFO_GROUPED_ACCESS(S) \
1731 : ((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S))
1732 : #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged
1733 : #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part
1734 : #define STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE(S) (S)->loop_phi_evolution_type
1735 : #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist
1736 : #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type
1737 : #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code
1738 : #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def
1739 : #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p
1740 : #define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in
1741 :
1742 : #define DR_GROUP_FIRST_ELEMENT(S) \
1743 : (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
1744 : #define DR_GROUP_NEXT_ELEMENT(S) \
1745 : (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element)
1746 : #define DR_GROUP_SIZE(S) \
1747 : (gcc_checking_assert ((S)->dr_aux.dr), (S)->size)
1748 : #define DR_GROUP_GAP(S) \
1749 : (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)
1750 :
1751 : #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope)
1752 :
1753 : #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp)
1754 : #define STMT_SLP_TYPE(S) (S)->slp_type
1755 :
1756 :
1757 : /* Contains the scalar or vector costs for a vec_info. */
1758 : class vector_costs
1759 : {
1760 : public:
1761 : vector_costs (vec_info *, bool);
1762 0 : virtual ~vector_costs () {}
1763 :
1764 : /* Update the costs in response to adding COUNT copies of a statement.
1765 :
1766 : - WHERE specifies whether the cost occurs in the loop prologue,
1767 : the loop body, or the loop epilogue.
1768 : - KIND is the kind of statement, which is always meaningful.
1769 : - STMT_INFO or NODE, if nonnull, describe the statement that will be
1770 : vectorized.
1771 : - VECTYPE, if nonnull, is the vector type that the vectorized
1772 : statement will operate on. Note that this should be used in
1773 : preference to STMT_VINFO_VECTYPE (STMT_INFO) since the latter
1774 : is not correct for SLP.
1775 : - for unaligned_load and unaligned_store statements, MISALIGN is
1776 : the byte misalignment of the load or store relative to the target's
1777 : preferred alignment for VECTYPE, or DR_MISALIGNMENT_UNKNOWN
1778 : if the misalignment is not known.
1779 :
1780 : Return the calculated cost as well as recording it. The return
1781 : value is used for dumping purposes. */
1782 : virtual unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
1783 : stmt_vec_info stmt_info,
1784 : slp_tree node,
1785 : tree vectype, int misalign,
1786 : vect_cost_model_location where);
1787 :
1788 : /* Update the costs in response to adding costs in V which are all from
1789 : vectorizing NODE to the respective part. */
1790 : virtual unsigned int add_slp_cost (slp_tree node,
1791 : const array_slice<stmt_info_for_cost> &v);
1792 :
1793 : /* Finish calculating the cost of the code. The results can be
1794 : read back using the functions below.
1795 :
1796 : If the costs describe vector code, SCALAR_COSTS gives the costs
1797 : of the corresponding scalar code, otherwise it is null. */
1798 : virtual void finish_cost (const vector_costs *scalar_costs);
1799 :
1800 : /* The costs in THIS and OTHER both describe ways of vectorizing
1801 : a main loop. Return true if the costs described by THIS are
1802 : cheaper than the costs described by OTHER. Return false if any
1803 : of the following are true:
1804 :
1805 : - THIS and OTHER are of equal cost
1806 : - OTHER is better than THIS
1807 : - we can't be sure about the relative costs of THIS and OTHER. */
1808 : virtual bool better_main_loop_than_p (const vector_costs *other) const;
1809 :
1810 : /* Likewise, but the costs in THIS and OTHER both describe ways of
1811 : vectorizing an epilogue loop of MAIN_LOOP. */
1812 : virtual bool better_epilogue_loop_than_p (const vector_costs *other,
1813 : loop_vec_info main_loop) const;
1814 :
1815 : unsigned int prologue_cost () const;
1816 : unsigned int body_cost () const;
1817 : unsigned int epilogue_cost () const;
1818 : unsigned int outside_cost () const;
1819 : unsigned int total_cost () const;
1820 :
1821 : unsigned int suggested_unroll_factor () const;
1822 : machine_mode suggested_epilogue_mode (int &masked) const;
1823 :
1824 32461 : vec_info *vinfo () const { return m_vinfo; }
1825 7470412 : bool costing_for_scalar () const { return m_costing_for_scalar; }
1826 :
1827 : protected:
1828 : unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
1829 : unsigned int);
1830 : unsigned int adjust_cost_for_freq (stmt_vec_info, vect_cost_model_location,
1831 : unsigned int);
1832 : int compare_inside_loop_cost (const vector_costs *) const;
1833 : int compare_outside_loop_cost (const vector_costs *) const;
1834 :
1835 : /* The region of code that we're considering vectorizing. */
1836 : vec_info *m_vinfo;
1837 :
1838 : /* True if we're costing the scalar code, false if we're costing
1839 : the vector code. */
1840 : bool m_costing_for_scalar;
1841 :
1842 : /* The costs of the three regions, indexed by vect_cost_model_location. */
1843 : unsigned int m_costs[3];
1844 :
1845 : /* The suggested unrolling factor determined at finish_cost. */
1846 : unsigned int m_suggested_unroll_factor;
1847 :
1848 : /* The suggested mode to be used for a vectorized epilogue or VOIDmode,
1849 : determined at finish_cost. m_masked_epilogue specifies whether the
1850 : epilogue should use masked vectorization, regardless of the
1851 : --param vect-partial-vector-usage default. If -1 then the
1852 : --param setting takes precedence. If the user explicitly specified
1853 : --param vect-partial-vector-usage then that takes precedence. */
1854 : machine_mode m_suggested_epilogue_mode;
1855 : int m_masked_epilogue;
1856 :
1857 : /* True if finish_cost has been called. */
1858 : bool m_finished;
1859 : };
1860 :
1861 : /* Create costs for VINFO. COSTING_FOR_SCALAR is true if the costs
1862 : are for scalar code, false if they are for vector code. */
1863 :
1864 : inline
1865 2095344 : vector_costs::vector_costs (vec_info *vinfo, bool costing_for_scalar)
1866 2095344 : : m_vinfo (vinfo),
1867 2095344 : m_costing_for_scalar (costing_for_scalar),
1868 2095344 : m_costs (),
1869 2095344 : m_suggested_unroll_factor(1),
1870 2095344 : m_suggested_epilogue_mode(VOIDmode),
1871 2095344 : m_masked_epilogue (-1),
1872 2095344 : m_finished (false)
1873 : {
1874 : }
1875 :
1876 : /* Return the cost of the prologue code (in abstract units). */
1877 :
1878 : inline unsigned int
1879 1269661 : vector_costs::prologue_cost () const
1880 : {
1881 1269661 : gcc_checking_assert (m_finished);
1882 1269661 : return m_costs[vect_prologue];
1883 : }
1884 :
1885 : /* Return the cost of the body code (in abstract units). */
1886 :
1887 : inline unsigned int
1888 1990980 : vector_costs::body_cost () const
1889 : {
1890 1990980 : gcc_checking_assert (m_finished);
1891 1990980 : return m_costs[vect_body];
1892 : }
1893 :
1894 : /* Return the cost of the epilogue code (in abstract units). */
1895 :
1896 : inline unsigned int
1897 1269661 : vector_costs::epilogue_cost () const
1898 : {
1899 1269661 : gcc_checking_assert (m_finished);
1900 1269661 : return m_costs[vect_epilogue];
1901 : }
1902 :
1903 : /* Return the cost of the prologue and epilogue code (in abstract units). */
1904 :
1905 : inline unsigned int
1906 485275 : vector_costs::outside_cost () const
1907 : {
1908 485275 : return prologue_cost () + epilogue_cost ();
1909 : }
1910 :
1911 : /* Return the cost of the prologue, body and epilogue code
1912 : (in abstract units). */
1913 :
1914 : inline unsigned int
1915 124685 : vector_costs::total_cost () const
1916 : {
1917 124685 : return body_cost () + outside_cost ();
1918 : }
1919 :
1920 : /* Return the suggested unroll factor. */
1921 :
1922 : inline unsigned int
1923 124298 : vector_costs::suggested_unroll_factor () const
1924 : {
1925 124298 : gcc_checking_assert (m_finished);
1926 124298 : return m_suggested_unroll_factor;
1927 : }
1928 :
1929 : /* Return the suggested epilogue mode. */
1930 :
1931 : inline machine_mode
1932 14392 : vector_costs::suggested_epilogue_mode (int &masked_p) const
1933 : {
1934 14392 : gcc_checking_assert (m_finished);
1935 14392 : masked_p = m_masked_epilogue;
1936 14392 : return m_suggested_epilogue_mode;
1937 : }
1938 :
1939 : #define VECT_MAX_COST 1000
1940 :
1941 : /* The maximum number of intermediate steps required in multi-step type
1942 : conversion. */
1943 : #define MAX_INTERM_CVT_STEPS 3
1944 :
1945 : #define MAX_VECTORIZATION_FACTOR INT_MAX
1946 :
1947 : /* Nonzero if TYPE represents a (scalar) boolean type or type
1948 : in the middle-end compatible with it (unsigned precision 1 integral
1949 : types). Used to determine which types should be vectorized as
1950 : VECTOR_BOOLEAN_TYPE_P. */
1951 :
1952 : #define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) \
1953 : (TREE_CODE (TYPE) == BOOLEAN_TYPE \
1954 : || ((TREE_CODE (TYPE) == INTEGER_TYPE \
1955 : || TREE_CODE (TYPE) == ENUMERAL_TYPE) \
1956 : && TYPE_PRECISION (TYPE) == 1 \
1957 : && TYPE_UNSIGNED (TYPE)))
1958 :
1959 : inline bool
1960 11492757 : nested_in_vect_loop_p (class loop *loop, stmt_vec_info stmt_info)
1961 : {
1962 11492757 : return (loop->inner
1963 9339024 : && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father));
1964 : }
1965 :
1966 : /* PHI is either a scalar reduction phi or a scalar induction phi.
1967 : Return the initial value of the variable on entry to the containing
1968 : loop. */
1969 :
1970 : inline tree
1971 34075 : vect_phi_initial_value (gphi *phi)
1972 : {
1973 34075 : basic_block bb = gimple_bb (phi);
1974 34075 : edge pe = loop_preheader_edge (bb->loop_father);
1975 34075 : gcc_assert (pe->dest == bb);
1976 34075 : return PHI_ARG_DEF_FROM_EDGE (phi, pe);
1977 : }
1978 :
1979 : /* Return true if STMT_INFO should produce a vector mask type rather than
1980 : a normal nonmask type. */
1981 :
1982 : inline bool
1983 7058976 : vect_use_mask_type_p (stmt_vec_info stmt_info)
1984 : {
1985 7058976 : return stmt_info->mask_precision && stmt_info->mask_precision != ~0U;
1986 : }
1987 :
1988 : /* Return TRUE if a statement represented by STMT_INFO is a part of a
1989 : pattern. */
1990 :
1991 : inline bool
1992 127362964 : is_pattern_stmt_p (stmt_vec_info stmt_info)
1993 : {
1994 81311454 : return stmt_info->pattern_stmt_p;
1995 : }
1996 :
1997 : /* If STMT_INFO is a pattern statement, return the statement that it
1998 : replaces, otherwise return STMT_INFO itself. */
1999 :
2000 : inline stmt_vec_info
2001 50357199 : vect_orig_stmt (stmt_vec_info stmt_info)
2002 : {
2003 37917702 : if (is_pattern_stmt_p (stmt_info))
2004 3446875 : return STMT_VINFO_RELATED_STMT (stmt_info);
2005 : return stmt_info;
2006 : }
2007 :
2008 : /* Return the later statement between STMT1_INFO and STMT2_INFO. */
2009 :
2010 : inline stmt_vec_info
2011 6082577 : get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
2012 : {
2013 6082577 : gimple *stmt1 = vect_orig_stmt (stmt1_info)->stmt;
2014 6082577 : gimple *stmt2 = vect_orig_stmt (stmt2_info)->stmt;
2015 6082577 : if (gimple_bb (stmt1) == gimple_bb (stmt2))
2016 : {
2017 6057722 : if (gimple_uid (stmt1) > gimple_uid (stmt2))
2018 : return stmt1_info;
2019 : else
2020 : return stmt2_info;
2021 : }
2022 : /* ??? We should be really calling this function only with stmts
2023 : in the same BB but we can recover if there's a domination
2024 : relationship between them. */
2025 24855 : else if (dominated_by_p (CDI_DOMINATORS,
2026 24855 : gimple_bb (stmt1), gimple_bb (stmt2)))
2027 : return stmt1_info;
2028 8916 : else if (dominated_by_p (CDI_DOMINATORS,
2029 8916 : gimple_bb (stmt2), gimple_bb (stmt1)))
2030 : return stmt2_info;
2031 0 : gcc_unreachable ();
2032 : }
2033 :
2034 : /* If STMT_INFO has been replaced by a pattern statement, return the
2035 : replacement statement, otherwise return STMT_INFO itself. */
2036 :
2037 : inline stmt_vec_info
2038 51958893 : vect_stmt_to_vectorize (stmt_vec_info stmt_info)
2039 : {
2040 51958893 : if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2041 1577661 : return STMT_VINFO_RELATED_STMT (stmt_info);
2042 : return stmt_info;
2043 : }
2044 :
2045 : /* Return true if BB is a loop header. */
2046 :
2047 : inline bool
2048 1523650 : is_loop_header_bb_p (basic_block bb)
2049 : {
2050 1523650 : if (bb == (bb->loop_father)->header)
2051 1512904 : return true;
2052 :
2053 : return false;
2054 : }
2055 :
2056 : /* Return pow2 (X). */
2057 :
2058 : inline int
2059 : vect_pow2 (int x)
2060 : {
2061 : int i, res = 1;
2062 :
2063 : for (i = 0; i < x; i++)
2064 : res *= 2;
2065 :
2066 : return res;
2067 : }
2068 :
2069 : /* Alias targetm.vectorize.builtin_vectorization_cost. */
2070 :
2071 : inline int
2072 9118454 : builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
2073 : tree vectype, int misalign)
2074 : {
2075 9051093 : return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
2076 : vectype, misalign);
2077 : }
2078 :
2079 : /* Get cost by calling cost target builtin. */
2080 :
2081 : inline
2082 152 : int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
2083 : {
2084 67209 : return builtin_vectorization_cost (type_of_cost, NULL, 0);
2085 : }
2086 :
2087 : /* Alias targetm.vectorize.init_cost. */
2088 :
2089 : inline vector_costs *
2090 2095344 : init_cost (vec_info *vinfo, bool costing_for_scalar)
2091 : {
2092 2095344 : return targetm.vectorize.create_costs (vinfo, costing_for_scalar);
2093 : }
2094 :
2095 : extern void dump_stmt_cost (FILE *, int, enum vect_cost_for_stmt,
2096 : stmt_vec_info, slp_tree, tree, int, unsigned,
2097 : enum vect_cost_model_location);
2098 :
2099 : /* Dump and add costs. */
2100 :
2101 : inline unsigned
2102 7470412 : add_stmt_cost (vector_costs *costs, int count,
2103 : enum vect_cost_for_stmt kind,
2104 : stmt_vec_info stmt_info, slp_tree node,
2105 : tree vectype, int misalign,
2106 : enum vect_cost_model_location where)
2107 : {
2108 : /* Even though a vector type might be set on stmt do not pass that on when
2109 : costing the scalar IL. A SLP node shouldn't have been recorded. */
2110 7470412 : if (costs->costing_for_scalar ())
2111 : {
2112 3876750 : vectype = NULL_TREE;
2113 3876750 : gcc_checking_assert (node == NULL);
2114 : }
2115 7470412 : unsigned cost = costs->add_stmt_cost (count, kind, stmt_info, node, vectype,
2116 : misalign, where);
2117 7470412 : if (dump_file && (dump_flags & TDF_DETAILS))
2118 219989 : dump_stmt_cost (dump_file, count, kind, stmt_info, node, vectype, misalign,
2119 : cost, where);
2120 7470412 : return cost;
2121 : }
2122 :
2123 : inline unsigned
2124 82396 : add_stmt_cost (vector_costs *costs, int count, enum vect_cost_for_stmt kind,
2125 : enum vect_cost_model_location where)
2126 : {
2127 82396 : gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
2128 : || kind == scalar_stmt);
2129 82396 : return add_stmt_cost (costs, count, kind, NULL, NULL, NULL_TREE, 0, where);
2130 : }
2131 :
2132 : inline unsigned
2133 2135281 : add_stmt_cost (vector_costs *costs, stmt_info_for_cost *i)
2134 : {
2135 2135281 : return add_stmt_cost (costs, i->count, i->kind, i->stmt_info, i->node,
2136 2135281 : i->vectype, i->misalign, i->where);
2137 : }
2138 :
2139 : inline void
2140 363616 : add_stmt_costs (vector_costs *costs, stmt_vector_for_cost *cost_vec)
2141 : {
2142 363616 : stmt_info_for_cost *cost;
2143 363616 : unsigned i;
2144 2105085 : FOR_EACH_VEC_ELT (*cost_vec, i, cost)
2145 1741469 : add_stmt_cost (costs, cost->count, cost->kind, cost->stmt_info,
2146 : cost->node, cost->vectype, cost->misalign, cost->where);
2147 363616 : }
2148 :
2149 : /*-----------------------------------------------------------------*/
2150 : /* Info on data references alignment. */
2151 : /*-----------------------------------------------------------------*/
2152 : #define DR_MISALIGNMENT_UNKNOWN (-1)
2153 : #define DR_MISALIGNMENT_UNINITIALIZED (-2)
2154 :
2155 : inline void
2156 2574016 : set_dr_misalignment (dr_vec_info *dr_info, int val)
2157 : {
2158 2574016 : dr_info->misalignment = val;
2159 : }
2160 :
2161 : extern int dr_misalignment (dr_vec_info *dr_info, tree vectype,
2162 : poly_int64 offset = 0);
2163 :
2164 : #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL)
2165 :
2166 : /* Only defined once DR_MISALIGNMENT is defined. */
2167 : inline const poly_uint64
2168 8022070 : dr_target_alignment (dr_vec_info *dr_info)
2169 : {
2170 8022070 : if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
2171 5922635 : dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
2172 8022070 : return dr_info->target_alignment;
2173 : }
2174 : #define DR_TARGET_ALIGNMENT(DR) dr_target_alignment (DR)
2175 : #define DR_SCALAR_KNOWN_BOUNDS(DR) (DR)->scalar_access_known_in_bounds
2176 :
2177 : /* Return if the stmt_vec_info requires peeling for alignment. */
2178 : inline bool
2179 4474086 : dr_safe_speculative_read_required (stmt_vec_info stmt_info)
2180 : {
2181 4474086 : dr_vec_info *dr_info;
2182 4474086 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2183 1665290 : dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (stmt_info));
2184 : else
2185 2808796 : dr_info = STMT_VINFO_DR_INFO (stmt_info);
2186 :
2187 4474086 : return dr_info->safe_speculative_read_required;
2188 : }
2189 :
2190 : /* Set the safe_speculative_read_required for the stmt_vec_info, if group
2191 : access then set on the fist element otherwise set on DR directly. */
2192 : inline void
2193 226926 : dr_set_safe_speculative_read_required (stmt_vec_info stmt_info,
2194 : bool requires_alignment)
2195 : {
2196 226926 : dr_vec_info *dr_info;
2197 226926 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2198 67821 : dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (stmt_info));
2199 : else
2200 159105 : dr_info = STMT_VINFO_DR_INFO (stmt_info);
2201 :
2202 226926 : dr_info->safe_speculative_read_required = requires_alignment;
2203 226926 : }
2204 :
2205 : inline void
2206 1584559 : set_dr_target_alignment (dr_vec_info *dr_info, poly_uint64 val)
2207 : {
2208 1584559 : dr_info->target_alignment = val;
2209 : }
2210 : #define SET_DR_TARGET_ALIGNMENT(DR, VAL) set_dr_target_alignment (DR, VAL)
2211 :
2212 : /* Return true if data access DR_INFO is aligned to the targets
2213 : preferred alignment for VECTYPE (which may be less than a full vector). */
2214 :
2215 : inline bool
2216 377740 : aligned_access_p (dr_vec_info *dr_info, tree vectype)
2217 : {
2218 377740 : return (dr_misalignment (dr_info, vectype) == 0);
2219 : }
2220 :
2221 : /* Return TRUE if the (mis-)alignment of the data access is known with
2222 : respect to the targets preferred alignment for VECTYPE, and FALSE
2223 : otherwise. */
2224 :
2225 : inline bool
2226 2259976 : known_alignment_for_access_p (dr_vec_info *dr_info, tree vectype)
2227 : {
2228 2022107 : return (dr_misalignment (dr_info, vectype) != DR_MISALIGNMENT_UNKNOWN);
2229 : }
2230 :
2231 : /* Return the minimum alignment in bytes that the vectorized version
2232 : of DR_INFO is guaranteed to have. */
2233 :
2234 : inline unsigned int
2235 276064 : vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype,
2236 : poly_int64 offset = 0)
2237 : {
2238 276064 : int misalignment = dr_misalignment (dr_info, vectype, offset);
2239 276064 : if (misalignment == DR_MISALIGNMENT_UNKNOWN)
2240 133907 : return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
2241 142157 : else if (misalignment == 0)
2242 99009 : return known_alignment (DR_TARGET_ALIGNMENT (dr_info));
2243 43148 : return misalignment & -misalignment;
2244 : }
2245 :
2246 : /* Return the behavior of DR_INFO with respect to the vectorization context
2247 : (which for outer loop vectorization might not be the behavior recorded
2248 : in DR_INFO itself). */
2249 :
2250 : inline innermost_loop_behavior *
2251 5628351 : vect_dr_behavior (vec_info *vinfo, dr_vec_info *dr_info)
2252 : {
2253 5628351 : stmt_vec_info stmt_info = dr_info->stmt;
2254 5628351 : loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2255 2160970 : if (loop_vinfo == NULL
2256 2160970 : || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info))
2257 5624394 : return &DR_INNERMOST (dr_info->dr);
2258 : else
2259 3957 : return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
2260 : }
2261 :
2262 : /* Return the offset calculated by adding the offset of this DR_INFO to the
2263 : corresponding data_reference's offset. If CHECK_OUTER then use
2264 : vect_dr_behavior to select the appropriate data_reference to use. */
2265 :
2266 : inline tree
2267 736959 : get_dr_vinfo_offset (vec_info *vinfo,
2268 : dr_vec_info *dr_info, bool check_outer = false)
2269 : {
2270 736959 : innermost_loop_behavior *base;
2271 736959 : if (check_outer)
2272 697255 : base = vect_dr_behavior (vinfo, dr_info);
2273 : else
2274 39704 : base = &dr_info->dr->innermost;
2275 :
2276 736959 : tree offset = base->offset;
2277 :
2278 736959 : if (!dr_info->offset)
2279 : return offset;
2280 :
2281 19088 : offset = fold_convert (sizetype, offset);
2282 19088 : return fold_build2 (PLUS_EXPR, TREE_TYPE (dr_info->offset), offset,
2283 : dr_info->offset);
2284 : }
2285 :
2286 :
2287 : /* Return the vect cost model for LOOP. */
2288 : inline enum vect_cost_model
2289 2385163 : loop_cost_model (loop_p loop)
2290 : {
2291 2385163 : if (loop != NULL
2292 1725721 : && loop->force_vectorize
2293 77755 : && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
2294 : return flag_simd_cost_model;
2295 2307408 : return flag_vect_cost_model;
2296 : }
2297 :
2298 : /* Return true if the vect cost model is unlimited. */
2299 : inline bool
2300 1655200 : unlimited_cost_model (loop_p loop)
2301 : {
2302 1655200 : return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
2303 : }
2304 :
2305 : /* Return true if the loop described by LOOP_VINFO is fully-masked and
2306 : if the first iteration should use a partial mask in order to achieve
2307 : alignment. */
2308 :
2309 : inline bool
2310 265493 : vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo)
2311 : {
2312 : /* With early break vectorization we don't know whether the accesses will stay
2313 : inside the loop or not. TODO: The early break adjustment code can be
2314 : implemented the same way as vectorizable_linear_induction. However we
2315 : can't test this today so reject it. */
2316 85 : return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
2317 85 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2318 265497 : && !(LOOP_VINFO_NON_LINEAR_IV (loop_vinfo)
2319 0 : && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)));
2320 : }
2321 :
2322 : /* Return the number of vectors of type VECTYPE that are needed to get
2323 : NUNITS elements. NUNITS should be based on the vectorization factor,
2324 : so it is always a known multiple of the number of elements in VECTYPE. */
2325 :
2326 : inline unsigned int
2327 7233722 : vect_get_num_vectors (poly_uint64 nunits, tree vectype)
2328 : {
2329 7233722 : return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
2330 : }
2331 :
2332 : /* Return the number of vectors in the context of vectorization region VINFO,
2333 : needed for a group of statements and a vector type as specified by NODE. */
2334 :
2335 : inline unsigned int
2336 7232893 : vect_get_num_copies (vec_info *vinfo, slp_tree node)
2337 : {
2338 7232893 : poly_uint64 vf;
2339 :
2340 7232893 : if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2341 3323694 : vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2342 : else
2343 : vf = 1;
2344 :
2345 7232893 : vf *= SLP_TREE_LANES (node);
2346 7232893 : tree vectype = SLP_TREE_VECTYPE (node);
2347 :
2348 7232893 : return vect_get_num_vectors (vf, vectype);
2349 : }
2350 :
2351 : /* Update maximum unit count *MAX_NUNITS so that it accounts for
2352 : NUNITS. *MAX_NUNITS can be 1 if we haven't yet recorded anything. */
2353 :
2354 : inline void
2355 9542033 : vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits)
2356 : {
2357 : /* All unit counts have the form vec_info::vector_size * X for some
2358 : rational X, so two unit sizes must have a common multiple.
2359 : Everything is a multiple of the initial value of 1. */
2360 4186528 : *max_nunits = force_common_multiple (*max_nunits, nunits);
2361 : }
2362 :
2363 : /* Update maximum unit count *MAX_NUNITS so that it accounts for
2364 : the number of units in vector type VECTYPE. *MAX_NUNITS can be 1
2365 : if we haven't yet recorded any vector types. */
2366 :
2367 : inline void
2368 5355505 : vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype)
2369 : {
2370 5355505 : vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype));
2371 5355505 : }
2372 :
2373 : /* Return the vectorization factor that should be used for costing
2374 : purposes while vectorizing the loop described by LOOP_VINFO.
2375 : Pick a reasonable estimate if the vectorization factor isn't
2376 : known at compile time. */
2377 :
2378 : inline unsigned int
2379 1280610 : vect_vf_for_cost (loop_vec_info loop_vinfo)
2380 : {
2381 1280610 : return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2382 : }
2383 :
2384 : /* Estimate the number of elements in VEC_TYPE for costing purposes.
2385 : Pick a reasonable estimate if the exact number isn't known at
2386 : compile time. */
2387 :
2388 : inline unsigned int
2389 30439 : vect_nunits_for_cost (tree vec_type)
2390 : {
2391 30439 : return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type));
2392 : }
2393 :
2394 : /* Return the maximum possible vectorization factor for LOOP_VINFO. */
2395 :
2396 : inline unsigned HOST_WIDE_INT
2397 105367 : vect_max_vf (loop_vec_info loop_vinfo)
2398 : {
2399 105367 : unsigned HOST_WIDE_INT vf;
2400 105367 : if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
2401 105367 : return vf;
2402 : return MAX_VECTORIZATION_FACTOR;
2403 : }
2404 :
2405 : /* Return the size of the value accessed by unvectorized data reference
2406 : DR_INFO. This is only valid once STMT_VINFO_VECTYPE has been calculated
2407 : for the associated gimple statement, since that guarantees that DR_INFO
2408 : accesses either a scalar or a scalar equivalent. ("Scalar equivalent"
2409 : here includes things like V1SI, which can be vectorized in the same way
2410 : as a plain SI.) */
2411 :
2412 : inline unsigned int
2413 1956001 : vect_get_scalar_dr_size (dr_vec_info *dr_info)
2414 : {
2415 1956001 : return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr))));
2416 : }
2417 :
2418 : /* Return true if LOOP_VINFO requires a runtime check for whether the
2419 : vector loop is profitable. */
2420 :
2421 : inline bool
2422 71365 : vect_apply_runtime_profitability_check_p (loop_vec_info loop_vinfo)
2423 : {
2424 71365 : unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
2425 37644 : return (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2426 71365 : && th >= vect_vf_for_cost (loop_vinfo));
2427 : }
2428 :
2429 : /* Return true if CODE is a lane-reducing opcode. */
2430 :
2431 : inline bool
2432 382140 : lane_reducing_op_p (code_helper code)
2433 : {
2434 382140 : return code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR;
2435 : }
2436 :
2437 : /* Return true if STMT is a lane-reducing statement. */
2438 :
2439 : inline bool
2440 461888 : lane_reducing_stmt_p (gimple *stmt)
2441 : {
2442 461888 : if (auto *assign = dyn_cast <gassign *> (stmt))
2443 318966 : return lane_reducing_op_p (gimple_assign_rhs_code (assign));
2444 : return false;
2445 : }
2446 :
2447 : /* Source location + hotness information. */
2448 : extern dump_user_location_t vect_location;
2449 :
2450 : /* A macro for calling:
2451 : dump_begin_scope (MSG, vect_location);
2452 : via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc,
2453 : and then calling
2454 : dump_end_scope ();
2455 : once the object goes out of scope, thus capturing the nesting of
2456 : the scopes.
2457 :
2458 : These scopes affect dump messages within them: dump messages at the
2459 : top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those
2460 : in a nested scope implicitly default to MSG_PRIORITY_INTERNALS. */
2461 :
2462 : #define DUMP_VECT_SCOPE(MSG) \
2463 : AUTO_DUMP_SCOPE (MSG, vect_location)
2464 :
2465 : /* A sentinel class for ensuring that the "vect_location" global gets
2466 : reset at the end of a scope.
2467 :
2468 : The "vect_location" global is used during dumping and contains a
2469 : location_t, which could contain references to a tree block via the
2470 : ad-hoc data. This data is used for tracking inlining information,
2471 : but it's not a GC root; it's simply assumed that such locations never
2472 : get accessed if the blocks are optimized away.
2473 :
2474 : Hence we need to ensure that such locations are purged at the end
2475 : of any operations using them (e.g. via this class). */
2476 :
2477 : class auto_purge_vect_location
2478 : {
2479 : public:
2480 : ~auto_purge_vect_location ();
2481 : };
2482 :
2483 : /*-----------------------------------------------------------------*/
2484 : /* Function prototypes. */
2485 : /*-----------------------------------------------------------------*/
2486 :
2487 : /* Simple loop peeling and versioning utilities for vectorizer's purposes -
2488 : in tree-vect-loop-manip.cc. */
2489 : extern void vect_set_loop_condition (class loop *, edge, loop_vec_info,
2490 : tree, tree, tree, bool);
2491 : extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge,
2492 : const_edge);
2493 : class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, edge,
2494 : class loop *, edge,
2495 : edge, edge *, bool = true,
2496 : vec<basic_block> * = NULL,
2497 : bool = false, bool = false,
2498 : bool = true);
2499 : class loop *vect_loop_versioning (loop_vec_info, gimple *);
2500 : extern class loop *vect_do_peeling (loop_vec_info, tree, tree,
2501 : tree *, tree *, tree *, int, bool, bool,
2502 : tree *);
2503 : extern tree vect_get_main_loop_result (loop_vec_info, tree, tree);
2504 : extern void vect_prepare_for_masked_peels (loop_vec_info);
2505 : extern dump_user_location_t find_loop_location (class loop *);
2506 : extern bool vect_can_advance_ivs_p (loop_vec_info);
2507 : extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
2508 : extern edge vec_init_loop_exit_info (class loop *);
2509 : extern void vect_iv_increment_position (edge, gimple_stmt_iterator *, bool *);
2510 :
2511 : /* In tree-vect-stmts.cc. */
2512 : extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
2513 : poly_uint64 = 0);
2514 : extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0);
2515 : extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree);
2516 : extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0);
2517 : extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree);
2518 : extern tree get_same_sized_vectype (tree, tree);
2519 : extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
2520 : extern bool vect_chooses_same_modes_p (machine_mode, machine_mode);
2521 : extern bool vect_get_loop_mask_type (loop_vec_info);
2522 : extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
2523 : stmt_vec_info * = NULL, gimple ** = NULL);
2524 : extern bool vect_is_simple_use (vec_info *, slp_tree,
2525 : unsigned, tree *, slp_tree *,
2526 : enum vect_def_type *,
2527 : tree *, stmt_vec_info * = NULL);
2528 : extern bool vect_maybe_update_slp_op_vectype (slp_tree, tree);
2529 : extern tree perm_mask_for_reverse (tree);
2530 : extern bool supportable_widening_operation (code_helper, tree, tree, bool,
2531 : code_helper*, code_helper*,
2532 : int*, vec<tree> *);
2533 : extern bool supportable_narrowing_operation (code_helper, tree, tree,
2534 : code_helper *, int *,
2535 : vec<tree> *);
2536 : extern bool supportable_indirect_convert_operation (code_helper,
2537 : tree, tree,
2538 : vec<std::pair<tree, tree_code> > &,
2539 : tree = NULL_TREE,
2540 : slp_tree = NULL);
2541 : extern int compare_step_with_zero (vec_info *, stmt_vec_info);
2542 :
2543 : extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2544 : enum vect_cost_for_stmt, stmt_vec_info,
2545 : tree, int, enum vect_cost_model_location);
2546 : extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2547 : enum vect_cost_for_stmt, slp_tree,
2548 : tree, int, enum vect_cost_model_location);
2549 : extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2550 : enum vect_cost_for_stmt,
2551 : enum vect_cost_model_location);
2552 : extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2553 : enum vect_cost_for_stmt, stmt_vec_info,
2554 : slp_tree, tree, int,
2555 : enum vect_cost_model_location);
2556 :
2557 : /* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO. */
2558 :
2559 : inline unsigned
2560 1780574 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
2561 : enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
2562 : int misalign, enum vect_cost_model_location where)
2563 : {
2564 1780045 : return record_stmt_cost (body_cost_vec, count, kind, stmt_info,
2565 1780045 : STMT_VINFO_VECTYPE (stmt_info), misalign, where);
2566 : }
2567 :
2568 : /* Overload of record_stmt_cost with VECTYPE derived from SLP node. */
2569 :
2570 : inline unsigned
2571 1620045 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
2572 : enum vect_cost_for_stmt kind, slp_tree node,
2573 : int misalign, enum vect_cost_model_location where)
2574 : {
2575 1416676 : return record_stmt_cost (body_cost_vec, count, kind, node,
2576 69318 : SLP_TREE_VECTYPE (node), misalign, where);
2577 : }
2578 :
2579 : extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *);
2580 : extern void vect_finish_stmt_generation (vec_info *, stmt_vec_info, gimple *,
2581 : gimple_stmt_iterator *);
2582 : extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *);
2583 : extern tree vect_get_store_rhs (stmt_vec_info);
2584 : void vect_get_vec_defs (vec_info *, slp_tree,
2585 : tree, vec<tree> *,
2586 : tree = NULL, vec<tree> * = NULL,
2587 : tree = NULL, vec<tree> * = NULL,
2588 : tree = NULL, vec<tree> * = NULL);
2589 : extern tree vect_init_vector (vec_info *, stmt_vec_info, tree, tree,
2590 : gimple_stmt_iterator *);
2591 : extern tree vect_get_slp_vect_def (slp_tree, unsigned);
2592 : extern bool vect_transform_stmt (vec_info *, stmt_vec_info,
2593 : gimple_stmt_iterator *,
2594 : slp_tree, slp_instance);
2595 : extern void vect_remove_stores (vec_info *, stmt_vec_info);
2596 : extern bool vect_nop_conversion_p (stmt_vec_info);
2597 : extern opt_result vect_analyze_stmt (vec_info *, slp_tree,
2598 : slp_instance, stmt_vector_for_cost *);
2599 : extern void vect_get_load_cost (vec_info *, stmt_vec_info, slp_tree, int,
2600 : dr_alignment_support, int, bool,
2601 : unsigned int *, unsigned int *,
2602 : stmt_vector_for_cost *,
2603 : stmt_vector_for_cost *, bool);
2604 : extern void vect_get_store_cost (vec_info *, stmt_vec_info, slp_tree, int,
2605 : dr_alignment_support, int,
2606 : unsigned int *, stmt_vector_for_cost *);
2607 : extern bool vect_supportable_shift (vec_info *, enum tree_code, tree);
2608 : extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &);
2609 : extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &);
2610 : extern void optimize_mask_stores (class loop*);
2611 : extern tree vect_gen_while (gimple_seq *, tree, tree, tree,
2612 : const char * = nullptr);
2613 : extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
2614 : extern opt_result vect_get_vector_types_for_stmt (vec_info *,
2615 : stmt_vec_info, tree *,
2616 : tree *, unsigned int = 0);
2617 : extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0);
2618 :
2619 : /* In tree-if-conv.cc. */
2620 : extern bool ref_within_array_bound (gimple *, tree);
2621 :
2622 : /* In tree-vect-data-refs.cc. */
2623 : extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
2624 : extern enum dr_alignment_support vect_supportable_dr_alignment
2625 : (vec_info *, dr_vec_info *, tree, int,
2626 : bool = false);
2627 : extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree);
2628 : extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *);
2629 : extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
2630 : extern opt_result vect_enhance_data_refs_alignment (loop_vec_info);
2631 : extern void vect_analyze_data_refs_alignment (loop_vec_info);
2632 : extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
2633 : extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
2634 : extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
2635 : extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
2636 : tree, int, int *, internal_fn *, tree *,
2637 : tree *, vec<int> * = nullptr);
2638 : extern bool vect_check_gather_scatter (stmt_vec_info, tree,
2639 : loop_vec_info, gather_scatter_info *,
2640 : vec<int> * = nullptr);
2641 : extern void vect_describe_gather_scatter_call (stmt_vec_info,
2642 : gather_scatter_info *);
2643 : extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
2644 : vec<data_reference_p> *,
2645 : vec<int> *, int);
2646 : extern opt_result vect_analyze_data_refs (vec_info *, bool *);
2647 : extern void vect_record_base_alignments (vec_info *);
2648 : extern tree vect_create_data_ref_ptr (vec_info *,
2649 : stmt_vec_info, tree, class loop *, tree,
2650 : tree *, gimple_stmt_iterator *,
2651 : gimple **, bool,
2652 : tree = NULL_TREE);
2653 : extern tree bump_vector_ptr (vec_info *, tree, gimple *, gimple_stmt_iterator *,
2654 : stmt_vec_info, tree);
2655 : extern void vect_copy_ref_info (tree, tree);
2656 : extern tree vect_create_destination_var (tree, tree);
2657 : extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
2658 : extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
2659 : extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
2660 : extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
2661 : bool, vec<int> * = nullptr);
2662 : extern tree vect_setup_realignment (vec_info *,
2663 : stmt_vec_info, tree, gimple_stmt_iterator *,
2664 : tree *, enum dr_alignment_support, tree,
2665 : class loop **);
2666 : extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
2667 : extern tree vect_get_new_ssa_name (tree, enum vect_var_kind,
2668 : const char * = NULL);
2669 : extern tree vect_create_addr_base_for_vector_ref (vec_info *,
2670 : stmt_vec_info, gimple_seq *,
2671 : tree);
2672 :
2673 : /* In tree-vect-loop.cc. */
2674 : extern tree neutral_op_for_reduction (tree, code_helper, tree, bool = true);
2675 : extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo);
2676 : bool vect_rgroup_iv_might_wrap_p (loop_vec_info, rgroup_controls *);
2677 : /* Used in gimple-loop-interchange.c and tree-parloops.cc. */
2678 : extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
2679 : enum tree_code);
2680 : extern bool needs_fold_left_reduction_p (tree, code_helper);
2681 : /* Drive for loop analysis stage. */
2682 : extern opt_loop_vec_info vect_analyze_loop (class loop *, gimple *,
2683 : vec_info_shared *);
2684 : extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
2685 : extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
2686 : tree *, bool);
2687 : extern tree vect_halve_mask_nunits (tree, machine_mode);
2688 : extern tree vect_double_mask_nunits (tree, machine_mode);
2689 : extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
2690 : unsigned int, tree, tree);
2691 : extern tree vect_get_loop_mask (loop_vec_info, gimple_stmt_iterator *,
2692 : vec_loop_masks *,
2693 : unsigned int, tree, unsigned int);
2694 : extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
2695 : tree, unsigned int);
2696 : extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
2697 : vec_loop_lens *, unsigned int, tree,
2698 : unsigned int, unsigned int, bool);
2699 : extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
2700 : gimple_stmt_iterator *, vec_loop_lens *,
2701 : unsigned int, tree, tree, unsigned int,
2702 : unsigned int);
2703 : extern gimple_seq vect_gen_len (tree, tree, tree, tree);
2704 : extern vect_reduc_info info_for_reduction (loop_vec_info, slp_tree);
2705 : extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
2706 : extern unsigned vect_min_prec_for_max_niters (loop_vec_info, unsigned int);
2707 : /* Drive for loop transformation stage. */
2708 : extern class loop *vect_transform_loop (loop_vec_info, gimple *);
2709 908312 : struct vect_loop_form_info
2710 : {
2711 : tree number_of_iterations;
2712 : tree number_of_iterationsm1;
2713 : tree assumptions;
2714 : auto_vec<gcond *> conds;
2715 : gcond *inner_loop_cond;
2716 : edge loop_exit;
2717 : };
2718 : extern opt_result vect_analyze_loop_form (class loop *, gimple *,
2719 : vect_loop_form_info *);
2720 : extern loop_vec_info vect_create_loop_vinfo (class loop *, vec_info_shared *,
2721 : const vect_loop_form_info *,
2722 : loop_vec_info = nullptr);
2723 : extern bool vectorizable_live_operation (vec_info *, stmt_vec_info,
2724 : slp_tree, slp_instance, int,
2725 : bool, stmt_vector_for_cost *);
2726 : extern bool vectorizable_lane_reducing (loop_vec_info, stmt_vec_info,
2727 : slp_tree, stmt_vector_for_cost *);
2728 : extern bool vectorizable_reduction (loop_vec_info, stmt_vec_info,
2729 : slp_tree, slp_instance,
2730 : stmt_vector_for_cost *);
2731 : extern bool vectorizable_induction (loop_vec_info, stmt_vec_info,
2732 : slp_tree, stmt_vector_for_cost *);
2733 : extern bool vect_transform_reduction (loop_vec_info, stmt_vec_info,
2734 : gimple_stmt_iterator *,
2735 : slp_tree);
2736 : extern bool vect_transform_cycle_phi (loop_vec_info, stmt_vec_info,
2737 : slp_tree, slp_instance);
2738 : extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info, slp_tree);
2739 : extern bool vect_transform_lc_phi (loop_vec_info, stmt_vec_info, slp_tree);
2740 : extern bool vectorizable_phi (bb_vec_info, stmt_vec_info, slp_tree,
2741 : stmt_vector_for_cost *);
2742 : extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info,
2743 : slp_tree, stmt_vector_for_cost *);
2744 : extern bool vectorizable_early_exit (loop_vec_info, stmt_vec_info,
2745 : gimple_stmt_iterator *,
2746 : slp_tree, stmt_vector_for_cost *);
2747 : extern bool vect_emulated_vector_p (tree);
2748 : extern bool vect_can_vectorize_without_simd_p (tree_code);
2749 : extern bool vect_can_vectorize_without_simd_p (code_helper);
2750 : extern int vect_get_known_peeling_cost (loop_vec_info, int);
2751 : extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree);
2752 :
2753 : /* Nonlinear induction. */
2754 : extern tree vect_peel_nonlinear_iv_init (gimple_seq*, tree, tree,
2755 : tree, enum vect_induction_op_type,
2756 : bool);
2757 :
2758 : /* In tree-vect-slp.cc. */
2759 : extern void vect_slp_init (void);
2760 : extern void vect_slp_fini (void);
2761 : extern void vect_free_slp_instance (slp_instance);
2762 : extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec<tree> &,
2763 : gimple_stmt_iterator *, poly_uint64,
2764 : bool, unsigned *,
2765 : unsigned * = nullptr, bool = false);
2766 : extern bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
2767 : slp_tree, stmt_vector_for_cost *);
2768 : extern bool vect_slp_analyze_operations (vec_info *);
2769 : extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &);
2770 : extern opt_result vect_analyze_slp (vec_info *, unsigned, bool);
2771 : extern bool vect_make_slp_decision (loop_vec_info);
2772 : extern bool vect_detect_hybrid_slp (loop_vec_info);
2773 : extern void vect_optimize_slp (vec_info *);
2774 : extern void vect_gather_slp_loads (vec_info *);
2775 : extern tree vect_get_slp_scalar_def (slp_tree, unsigned);
2776 : extern void vect_get_slp_defs (slp_tree, vec<tree> *);
2777 : extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
2778 : unsigned n = -1U);
2779 : extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop);
2780 : extern bool vect_slp_function (function *);
2781 : extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
2782 : extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
2783 : extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
2784 : extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
2785 : unsigned int * = NULL,
2786 : tree * = NULL, tree * = NULL);
2787 : extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
2788 : const vec<tree> &, unsigned int, vec<tree> &);
2789 : extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
2790 : extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
2791 : extern void vect_free_slp_tree (slp_tree);
2792 : extern bool compatible_calls_p (gcall *, gcall *, bool);
2793 : extern int vect_slp_child_index_for_operand (const stmt_vec_info, int op);
2794 :
2795 : extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
2796 : gimple_stmt_iterator *);
2797 : extern tree vect_get_mask_load_else (int, tree);
2798 : extern bool vect_load_perm_consecutive_p (slp_tree, unsigned = UINT_MAX);
2799 :
2800 : /* In tree-vect-patterns.cc. */
2801 : extern void
2802 : vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);
2803 : extern bool vect_get_range_info (tree, wide_int*, wide_int*);
2804 :
2805 : /* Pattern recognition functions.
2806 : Additional pattern recognition functions can (and will) be added
2807 : in the future. */
2808 : void vect_pattern_recog (vec_info *);
2809 :
2810 : /* In tree-vectorizer.cc. */
2811 : unsigned vectorize_loops (void);
2812 : void vect_free_loop_info_assumptions (class loop *);
2813 : gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL);
2814 : bool vect_stmt_dominates_stmt_p (gimple *, gimple *);
2815 :
2816 : /* SLP Pattern matcher types, tree-vect-slp-patterns.cc. */
2817 :
2818 : /* Forward declaration of possible two operands operation that can be matched
2819 : by the complex numbers pattern matchers. */
2820 : enum _complex_operation : unsigned;
2821 :
2822 : /* All possible load permute values that could result from the partial data-flow
2823 : analysis. */
2824 : typedef enum _complex_perm_kinds {
2825 : PERM_UNKNOWN,
2826 : PERM_EVENODD,
2827 : PERM_ODDEVEN,
2828 : PERM_ODDODD,
2829 : PERM_EVENEVEN,
2830 : /* Can be combined with any other PERM values. */
2831 : PERM_TOP
2832 : } complex_perm_kinds_t;
2833 :
2834 : /* Cache from nodes to the load permutation they represent. */
2835 : typedef hash_map <slp_tree, complex_perm_kinds_t>
2836 : slp_tree_to_load_perm_map_t;
2837 :
2838 : /* Cache from nodes pair to being compatible or not. */
2839 : typedef pair_hash <nofree_ptr_hash <_slp_tree>,
2840 : nofree_ptr_hash <_slp_tree>> slp_node_hash;
2841 : typedef hash_map <slp_node_hash, bool> slp_compat_nodes_map_t;
2842 :
2843 :
2844 : /* Vector pattern matcher base class. All SLP pattern matchers must inherit
2845 : from this type. */
2846 :
2847 : class vect_pattern
2848 : {
2849 : protected:
2850 : /* The number of arguments that the IFN requires. */
2851 : unsigned m_num_args;
2852 :
2853 : /* The internal function that will be used when a pattern is created. */
2854 : internal_fn m_ifn;
2855 :
2856 : /* The current node being inspected. */
2857 : slp_tree *m_node;
2858 :
2859 : /* The list of operands to be the children for the node produced when the
2860 : internal function is created. */
2861 : vec<slp_tree> m_ops;
2862 :
2863 : /* Default constructor where NODE is the root of the tree to inspect. */
2864 1107 : vect_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
2865 1107 : {
2866 1107 : this->m_ifn = ifn;
2867 1107 : this->m_node = node;
2868 1107 : this->m_ops.create (0);
2869 1107 : if (m_ops)
2870 32 : this->m_ops.safe_splice (*m_ops);
2871 : }
2872 :
2873 : public:
2874 :
2875 : /* Create a new instance of the pattern matcher class of the given type. */
2876 : static vect_pattern* recognize (slp_tree_to_load_perm_map_t *,
2877 : slp_compat_nodes_map_t *, slp_tree *);
2878 :
2879 : /* Build the pattern from the data collected so far. */
2880 : virtual void build (vec_info *) = 0;
2881 :
2882 : /* Default destructor. */
2883 : virtual ~vect_pattern ()
2884 : {
2885 : this->m_ops.release ();
2886 : }
2887 : };
2888 :
2889 : /* Function pointer to create a new pattern matcher from a generic type. */
2890 : typedef vect_pattern* (*vect_pattern_decl_t) (slp_tree_to_load_perm_map_t *,
2891 : slp_compat_nodes_map_t *,
2892 : slp_tree *);
2893 :
2894 : /* List of supported pattern matchers. */
2895 : extern vect_pattern_decl_t slp_patterns[];
2896 :
2897 : /* Number of supported pattern matchers. */
2898 : extern size_t num__slp_patterns;
2899 :
2900 : /* ----------------------------------------------------------------------
2901 : Target support routines
2902 : -----------------------------------------------------------------------
2903 : The following routines are provided to simplify costing decisions in
2904 : target code. Please add more as needed. */
2905 :
2906 : /* Return true if an operation of kind KIND for STMT_INFO represents
2907 : the extraction of an element from a vector in preparation for
2908 : storing the element to memory. */
2909 : inline bool
2910 : vect_is_store_elt_extraction (vect_cost_for_stmt kind, stmt_vec_info stmt_info)
2911 : {
2912 : return (kind == vec_to_scalar
2913 : && STMT_VINFO_DATA_REF (stmt_info)
2914 : && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)));
2915 : }
2916 :
2917 : /* Return true if STMT_INFO represents part of a reduction. */
2918 : inline bool
2919 47579719 : vect_is_reduction (stmt_vec_info stmt_info)
2920 : {
2921 47579719 : return STMT_VINFO_REDUC_IDX (stmt_info) != -1;
2922 : }
2923 :
2924 : /* Return true if SLP_NODE represents part of a reduction. */
2925 : inline bool
2926 261768 : vect_is_reduction (slp_tree slp_node)
2927 : {
2928 261768 : return SLP_TREE_REDUC_IDX (slp_node) != -1;
2929 : }
2930 :
2931 : /* If STMT_INFO describes a reduction, return the vect_reduction_type
2932 : of the reduction it describes, otherwise return -1. */
2933 : inline int
2934 45 : vect_reduc_type (vec_info *vinfo, slp_tree node)
2935 : {
2936 45 : if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
2937 : {
2938 45 : vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, node);
2939 45 : if (reduc_info)
2940 45 : return int (VECT_REDUC_INFO_TYPE (reduc_info));
2941 : }
2942 : return -1;
2943 : }
2944 :
2945 : /* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the
2946 : scalar type of the values being compared. Return null otherwise. */
2947 : inline tree
2948 : vect_embedded_comparison_type (stmt_vec_info stmt_info)
2949 : {
2950 : if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
2951 : if (gimple_assign_rhs_code (assign) == COND_EXPR)
2952 : {
2953 : tree cond = gimple_assign_rhs1 (assign);
2954 : if (COMPARISON_CLASS_P (cond))
2955 : return TREE_TYPE (TREE_OPERAND (cond, 0));
2956 : }
2957 : return NULL_TREE;
2958 : }
2959 :
2960 : /* If STMT_INFO is a comparison or contains an embedded comparison, return the
2961 : scalar type of the values being compared. Return null otherwise. */
2962 : inline tree
2963 : vect_comparison_type (stmt_vec_info stmt_info)
2964 : {
2965 : if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
2966 : if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
2967 : return TREE_TYPE (gimple_assign_rhs1 (assign));
2968 : return vect_embedded_comparison_type (stmt_info);
2969 : }
2970 :
2971 : /* Return true if STMT_INFO extends the result of a load. */
2972 : inline bool
2973 : vect_is_extending_load (class vec_info *vinfo, stmt_vec_info stmt_info)
2974 : {
2975 : /* Although this is quite large for an inline function, this part
2976 : at least should be inline. */
2977 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
2978 : if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
2979 : return false;
2980 :
2981 : tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2982 : tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
2983 : tree rhs_type = TREE_TYPE (rhs);
2984 : if (!INTEGRAL_TYPE_P (lhs_type)
2985 : || !INTEGRAL_TYPE_P (rhs_type)
2986 : || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
2987 : return false;
2988 :
2989 : stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
2990 : return (def_stmt_info
2991 : && STMT_VINFO_DATA_REF (def_stmt_info)
2992 : && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
2993 : }
2994 :
2995 : /* Return true if STMT_INFO is an integer truncation. */
2996 : inline bool
2997 : vect_is_integer_truncation (stmt_vec_info stmt_info)
2998 : {
2999 : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
3000 : if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
3001 : return false;
3002 :
3003 : tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
3004 : tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
3005 : return (INTEGRAL_TYPE_P (lhs_type)
3006 : && INTEGRAL_TYPE_P (rhs_type)
3007 : && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
3008 : }
3009 :
3010 : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
3011 : or internal_fn contained in ch, respectively. */
3012 : gimple * vect_gimple_build (tree, code_helper, tree, tree = NULL_TREE);
3013 : #endif /* GCC_TREE_VECTORIZER_H */
|