Branch data Line data Source code
1 : : /* Vectorizer
2 : : Copyright (C) 2003-2025 Free Software Foundation, Inc.
3 : : Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : #ifndef GCC_TREE_VECTORIZER_H
22 : : #define GCC_TREE_VECTORIZER_H
23 : :
24 : : typedef class _stmt_vec_info *stmt_vec_info;
25 : : typedef struct _slp_tree *slp_tree;
26 : :
27 : : #include "tree-data-ref.h"
28 : : #include "tree-hash-traits.h"
29 : : #include "target.h"
30 : : #include "internal-fn.h"
31 : : #include "tree-ssa-operands.h"
32 : : #include "gimple-match.h"
33 : : #include "dominance.h"
34 : :
35 : : /* Used for naming of new temporaries. */
36 : : enum vect_var_kind {
37 : : vect_simple_var,
38 : : vect_pointer_var,
39 : : vect_scalar_var,
40 : : vect_mask_var
41 : : };
42 : :
43 : : /* Defines type of operation. */
44 : : enum operation_type {
45 : : unary_op = 1,
46 : : binary_op,
47 : : ternary_op
48 : : };
49 : :
50 : : /* Define type of available alignment support. */
51 : : enum dr_alignment_support {
52 : : dr_unaligned_unsupported,
53 : : dr_unaligned_supported,
54 : : dr_explicit_realign,
55 : : dr_explicit_realign_optimized,
56 : : dr_aligned
57 : : };
58 : :
59 : : /* Define type of peeling support to indicate how peeling for alignment can help
60 : : make vectorization supported. */
61 : : enum peeling_support {
62 : : peeling_known_supported,
63 : : peeling_maybe_supported,
64 : : peeling_unsupported
65 : : };
66 : :
67 : : /* Define type of def-use cross-iteration cycle. */
68 : : enum vect_def_type {
69 : : vect_uninitialized_def = 0,
70 : : vect_constant_def = 1,
71 : : vect_external_def,
72 : : vect_internal_def,
73 : : vect_induction_def,
74 : : vect_reduction_def,
75 : : vect_double_reduction_def,
76 : : vect_nested_cycle,
77 : : vect_first_order_recurrence,
78 : : vect_condition_def,
79 : : vect_unknown_def_type
80 : : };
81 : :
82 : : /* Define operation type of linear/non-linear induction variable. */
83 : : enum vect_induction_op_type {
84 : : vect_step_op_add = 0,
85 : : vect_step_op_neg,
86 : : vect_step_op_mul,
87 : : vect_step_op_shl,
88 : : vect_step_op_shr
89 : : };
90 : :
91 : : /* Define type of reduction. */
92 : : enum vect_reduction_type {
93 : : TREE_CODE_REDUCTION,
94 : : COND_REDUCTION,
95 : : INTEGER_INDUC_COND_REDUCTION,
96 : : CONST_COND_REDUCTION,
97 : :
98 : : /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop
99 : : to implement:
100 : :
101 : : for (int i = 0; i < VF; ++i)
102 : : res = cond[i] ? val[i] : res; */
103 : : EXTRACT_LAST_REDUCTION,
104 : :
105 : : /* Use a folding reduction within the loop to implement:
106 : :
107 : : for (int i = 0; i < VF; ++i)
108 : : res = res OP val[i];
109 : :
110 : : (with no reassocation). */
111 : : FOLD_LEFT_REDUCTION
112 : : };
113 : :
114 : : #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \
115 : : || ((D) == vect_double_reduction_def) \
116 : : || ((D) == vect_nested_cycle))
117 : :
118 : : /* Structure to encapsulate information about a group of like
119 : : instructions to be presented to the target cost model. */
120 : : struct stmt_info_for_cost {
121 : : int count;
122 : : enum vect_cost_for_stmt kind;
123 : : enum vect_cost_model_location where;
124 : : stmt_vec_info stmt_info;
125 : : slp_tree node;
126 : : tree vectype;
127 : : int misalign;
128 : : };
129 : :
130 : : typedef vec<stmt_info_for_cost> stmt_vector_for_cost;
131 : :
132 : : /* Maps base addresses to an innermost_loop_behavior and the stmt it was
133 : : derived from that gives the maximum known alignment for that base. */
134 : : typedef hash_map<tree_operand_hash,
135 : : std::pair<stmt_vec_info, innermost_loop_behavior *> >
136 : : vec_base_alignments;
137 : :
138 : : /* Represents elements [START, START + LENGTH) of cyclical array OPS*
139 : : (i.e. OPS repeated to give at least START + LENGTH elements) */
140 : : struct vect_scalar_ops_slice
141 : : {
142 : : tree op (unsigned int i) const;
143 : : bool all_same_p () const;
144 : :
145 : : vec<tree> *ops;
146 : : unsigned int start;
147 : : unsigned int length;
148 : : };
149 : :
150 : : /* Return element I of the slice. */
151 : : inline tree
152 : 2661514 : vect_scalar_ops_slice::op (unsigned int i) const
153 : : {
154 : 5323028 : return (*ops)[(i + start) % ops->length ()];
155 : : }
156 : :
157 : : /* Hash traits for vect_scalar_ops_slice. */
158 : : struct vect_scalar_ops_slice_hash : typed_noop_remove<vect_scalar_ops_slice>
159 : : {
160 : : typedef vect_scalar_ops_slice value_type;
161 : : typedef vect_scalar_ops_slice compare_type;
162 : :
163 : : static const bool empty_zero_p = true;
164 : :
165 : : static void mark_deleted (value_type &s) { s.length = ~0U; }
166 : 0 : static void mark_empty (value_type &s) { s.length = 0; }
167 : 420402 : static bool is_deleted (const value_type &s) { return s.length == ~0U; }
168 : 3990352 : static bool is_empty (const value_type &s) { return s.length == 0; }
169 : : static hashval_t hash (const value_type &);
170 : : static bool equal (const value_type &, const compare_type &);
171 : : };
172 : :
173 : : /* Describes how we're going to vectorize an individual load or store,
174 : : or a group of loads or stores. */
175 : : enum vect_memory_access_type {
176 : : VMAT_UNINITIALIZED,
177 : :
178 : : /* An access to an invariant address. This is used only for loads. */
179 : : VMAT_INVARIANT,
180 : :
181 : : /* A simple contiguous access. */
182 : : VMAT_CONTIGUOUS,
183 : :
184 : : /* A contiguous access that goes down in memory rather than up,
185 : : with no additional permutation. This is used only for stores
186 : : of invariants. */
187 : : VMAT_CONTIGUOUS_DOWN,
188 : :
189 : : /* A simple contiguous access in which the elements need to be reversed
190 : : after loading or before storing. */
191 : : VMAT_CONTIGUOUS_REVERSE,
192 : :
193 : : /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */
194 : : VMAT_LOAD_STORE_LANES,
195 : :
196 : : /* An access in which each scalar element is loaded or stored
197 : : individually. */
198 : : VMAT_ELEMENTWISE,
199 : :
200 : : /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped
201 : : SLP accesses. Each unrolled iteration uses a contiguous load
202 : : or store for the whole group, but the groups from separate iterations
203 : : are combined in the same way as for VMAT_ELEMENTWISE. */
204 : : VMAT_STRIDED_SLP,
205 : :
206 : : /* The access uses gather loads or scatter stores. */
207 : : VMAT_GATHER_SCATTER_LEGACY,
208 : : VMAT_GATHER_SCATTER_IFN,
209 : : VMAT_GATHER_SCATTER_EMULATED
210 : : };
211 : :
212 : : /* Returns whether MAT is any of the VMAT_GATHER_SCATTER_* kinds. */
213 : :
214 : : inline bool
215 : 6040738 : mat_gather_scatter_p (vect_memory_access_type mat)
216 : : {
217 : 6040738 : return (mat == VMAT_GATHER_SCATTER_LEGACY
218 : : || mat == VMAT_GATHER_SCATTER_IFN
219 : 6040738 : || mat == VMAT_GATHER_SCATTER_EMULATED);
220 : : }
221 : :
222 : : /*-----------------------------------------------------------------*/
223 : : /* Info on vectorized defs. */
224 : : /*-----------------------------------------------------------------*/
225 : : enum stmt_vec_info_type {
226 : : undef_vec_info_type = 0,
227 : : load_vec_info_type,
228 : : store_vec_info_type,
229 : : shift_vec_info_type,
230 : : op_vec_info_type,
231 : : call_vec_info_type,
232 : : call_simd_clone_vec_info_type,
233 : : assignment_vec_info_type,
234 : : condition_vec_info_type,
235 : : comparison_vec_info_type,
236 : : reduc_vec_info_type,
237 : : induc_vec_info_type,
238 : : type_promotion_vec_info_type,
239 : : type_demotion_vec_info_type,
240 : : type_conversion_vec_info_type,
241 : : cycle_phi_info_type,
242 : : lc_phi_info_type,
243 : : phi_info_type,
244 : : recurr_info_type,
245 : : loop_exit_ctrl_vec_info_type,
246 : : permute_info_type
247 : : };
248 : :
249 : : /************************************************************************
250 : : SLP
251 : : ************************************************************************/
252 : : typedef vec<std::pair<unsigned, unsigned> > lane_permutation_t;
253 : : typedef auto_vec<std::pair<unsigned, unsigned>, 16> auto_lane_permutation_t;
254 : : typedef vec<unsigned> load_permutation_t;
255 : : typedef auto_vec<unsigned, 16> auto_load_permutation_t;
256 : :
257 : 3115348 : struct vect_data {
258 : 1956599 : virtual ~vect_data () = default;
259 : : };
260 : :
261 : : /* Analysis data from vectorizable_simd_clone_call for
262 : : call_simd_clone_vec_info_type. */
263 : : struct vect_simd_clone_data : vect_data {
264 : 1811 : virtual ~vect_simd_clone_data () = default;
265 : 1369 : vect_simd_clone_data () = default;
266 : 442 : vect_simd_clone_data (vect_simd_clone_data &&other) = default;
267 : :
268 : : /* Selected SIMD clone's function info. First vector element
269 : : is SIMD clone's function decl, followed by a pair of trees (base + step)
270 : : for linear arguments (pair of NULLs for other arguments). */
271 : : auto_vec<tree> simd_clone_info;
272 : : };
273 : :
274 : : /* Analysis data from vectorizable_load and vectorizable_store for
275 : : load_vec_info_type and store_vec_info_type. */
276 : : struct vect_load_store_data : vect_data {
277 : 1156938 : vect_load_store_data (vect_load_store_data &&other) = default;
278 : 1956599 : vect_load_store_data () = default;
279 : 3109693 : virtual ~vect_load_store_data () = default;
280 : :
281 : : vect_memory_access_type memory_access_type;
282 : : dr_alignment_support alignment_support_scheme;
283 : : int misalignment;
284 : : internal_fn lanes_ifn; // VMAT_LOAD_STORE_LANES
285 : : poly_int64 poffset;
286 : : union {
287 : : internal_fn ifn; // VMAT_GATHER_SCATTER_IFN
288 : : tree decl; // VMAT_GATHER_SCATTER_DECL
289 : : } gs;
290 : : tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
291 : : /* Load/store type with larger element mode used for punning the vectype. */
292 : : tree ls_type; // VMAT_GATHER_SCATTER_IFN
293 : : /* This is set to a supported offset vector type if we don't support the
294 : : originally requested offset type, otherwise NULL.
295 : : If nonzero there will be an additional offset conversion before
296 : : the gather/scatter. */
297 : : tree supported_offset_vectype; // VMAT_GATHER_SCATTER_IFN
298 : : /* Similar for scale. Only nonzero if we don't support the requested
299 : : scale. Then we need to multiply the offset vector before the
300 : : gather/scatter. */
301 : : int supported_scale; // VMAT_GATHER_SCATTER_IFN
302 : : auto_vec<int> elsvals;
303 : : /* True if the load requires a load permutation. */
304 : : bool slp_perm; // SLP_TREE_LOAD_PERMUTATION
305 : : unsigned n_perms; // SLP_TREE_LOAD_PERMUTATION
306 : : /* Whether the load permutation is consecutive and simple. */
307 : : bool subchain_p; // VMAT_STRIDED_SLP and VMAT_GATHER_SCATTER
308 : : };
309 : :
310 : : /* A computation tree of an SLP instance. Each node corresponds to a group of
311 : : stmts to be packed in a SIMD stmt. */
312 : : struct _slp_tree {
313 : : _slp_tree ();
314 : : ~_slp_tree ();
315 : :
316 : : void push_vec_def (gimple *def);
317 : 9677 : void push_vec_def (tree def) { vec_defs.quick_push (def); }
318 : :
319 : : /* Nodes that contain def-stmts of this node statements operands. */
320 : : vec<slp_tree> children;
321 : :
322 : : /* A group of scalar stmts to be vectorized together. */
323 : : vec<stmt_vec_info> stmts;
324 : : /* A group of scalar operands to be vectorized together. */
325 : : vec<tree> ops;
326 : : /* The representative that should be used for analysis and
327 : : code generation. */
328 : : stmt_vec_info representative;
329 : :
330 : : struct {
331 : : /* SLP cycle the node resides in, or -1. */
332 : : int id;
333 : : /* The SLP operand index with the edge on the SLP cycle, or -1. */
334 : : int reduc_idx;
335 : : } cycle_info;
336 : :
337 : : /* Load permutation relative to the stores, NULL if there is no
338 : : permutation. */
339 : : load_permutation_t load_permutation;
340 : : /* Lane permutation of the operands scalar lanes encoded as pairs
341 : : of { operand number, lane number }. The number of elements
342 : : denotes the number of output lanes. */
343 : : lane_permutation_t lane_permutation;
344 : :
345 : : tree vectype;
346 : : /* Vectorized defs. */
347 : : vec<tree> vec_defs;
348 : :
349 : : /* Reference count in the SLP graph. */
350 : : unsigned int refcnt;
351 : : /* The maximum number of vector elements for the subtree rooted
352 : : at this node. */
353 : : poly_uint64 max_nunits;
354 : : /* The DEF type of this node. */
355 : : enum vect_def_type def_type;
356 : : /* The number of scalar lanes produced by this node. */
357 : : unsigned int lanes;
358 : : /* The operation of this node. */
359 : : enum tree_code code;
360 : : /* For gather/scatter memory operations the scale each offset element
361 : : should be multiplied by before being added to the base. */
362 : : int gs_scale;
363 : : /* For gather/scatter memory operations the loop-invariant base value. */
364 : : tree gs_base;
365 : : /* Whether uses of this load or feeders of this store are suitable
366 : : for load/store-lanes. */
367 : : bool ldst_lanes;
368 : : /* For BB vect, flag to indicate this load node should be vectorized
369 : : as to avoid STLF fails because of related stores. */
370 : : bool avoid_stlf_fail;
371 : :
372 : : int vertex;
373 : :
374 : : /* The kind of operation as determined by analysis and optional
375 : : kind specific data. */
376 : : enum stmt_vec_info_type type;
377 : : vect_data *data;
378 : :
379 : : template <class T>
380 : 1957968 : T& get_data (T& else_) { return data ? *static_cast <T *> (data) : else_; }
381 : :
382 : : /* If not NULL this is a cached failed SLP discovery attempt with
383 : : the lanes that failed during SLP discovery as 'false'. This is
384 : : a copy of the matches array. */
385 : : bool *failed;
386 : :
387 : : /* Allocate from slp_tree_pool. */
388 : : static void *operator new (size_t);
389 : :
390 : : /* Return memory to slp_tree_pool. */
391 : : static void operator delete (void *, size_t);
392 : :
393 : : /* Linked list of nodes to release when we free the slp_tree_pool. */
394 : : slp_tree next_node;
395 : : slp_tree prev_node;
396 : : };
397 : :
398 : : /* The enum describes the type of operations that an SLP instance
399 : : can perform. */
400 : :
401 : : enum slp_instance_kind {
402 : : slp_inst_kind_store,
403 : : slp_inst_kind_reduc_group,
404 : : slp_inst_kind_reduc_chain,
405 : : slp_inst_kind_bb_reduc,
406 : : slp_inst_kind_ctor,
407 : : slp_inst_kind_gcond
408 : : };
409 : :
410 : : /* SLP instance is a sequence of stmts in a loop that can be packed into
411 : : SIMD stmts. */
412 : : typedef class _slp_instance {
413 : : public:
414 : : /* The root of SLP tree. */
415 : : slp_tree root;
416 : :
417 : : /* For vector constructors, the constructor stmt that the SLP tree is built
418 : : from, NULL otherwise. */
419 : : vec<stmt_vec_info> root_stmts;
420 : :
421 : : /* For slp_inst_kind_bb_reduc the defs that were not vectorized, NULL
422 : : otherwise. */
423 : : vec<tree> remain_defs;
424 : :
425 : : /* The group of nodes that contain loads of this SLP instance. */
426 : : vec<slp_tree> loads;
427 : :
428 : : /* The SLP node containing the reduction PHIs. */
429 : : slp_tree reduc_phis;
430 : :
431 : : /* Vector cost of this entry to the SLP graph. */
432 : : stmt_vector_for_cost cost_vec;
433 : :
434 : : /* If this instance is the main entry of a subgraph the set of
435 : : entries into the same subgraph, including itself. */
436 : : vec<_slp_instance *> subgraph_entries;
437 : :
438 : : /* The type of operation the SLP instance is performing. */
439 : : slp_instance_kind kind;
440 : :
441 : : dump_user_location_t location () const;
442 : : } *slp_instance;
443 : :
444 : :
445 : : /* Access Functions. */
446 : : #define SLP_INSTANCE_TREE(S) (S)->root
447 : : #define SLP_INSTANCE_LOADS(S) (S)->loads
448 : : #define SLP_INSTANCE_ROOT_STMTS(S) (S)->root_stmts
449 : : #define SLP_INSTANCE_REMAIN_DEFS(S) (S)->remain_defs
450 : : #define SLP_INSTANCE_KIND(S) (S)->kind
451 : :
452 : : #define SLP_TREE_CHILDREN(S) (S)->children
453 : : #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts
454 : : #define SLP_TREE_SCALAR_OPS(S) (S)->ops
455 : : #define SLP_TREE_REF_COUNT(S) (S)->refcnt
456 : : #define SLP_TREE_VEC_DEFS(S) (S)->vec_defs
457 : : #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
458 : : #define SLP_TREE_LANE_PERMUTATION(S) (S)->lane_permutation
459 : : #define SLP_TREE_DEF_TYPE(S) (S)->def_type
460 : : #define SLP_TREE_VECTYPE(S) (S)->vectype
461 : : #define SLP_TREE_REPRESENTATIVE(S) (S)->representative
462 : : #define SLP_TREE_LANES(S) (S)->lanes
463 : : #define SLP_TREE_CODE(S) (S)->code
464 : : #define SLP_TREE_TYPE(S) (S)->type
465 : : #define SLP_TREE_GS_SCALE(S) (S)->gs_scale
466 : : #define SLP_TREE_GS_BASE(S) (S)->gs_base
467 : : #define SLP_TREE_REDUC_IDX(S) (S)->cycle_info.reduc_idx
468 : : #define SLP_TREE_PERMUTE_P(S) ((S)->code == VEC_PERM_EXPR)
469 : :
470 : : inline vect_memory_access_type
471 : 1267426 : SLP_TREE_MEMORY_ACCESS_TYPE (slp_tree node)
472 : : {
473 : 494780 : if (SLP_TREE_TYPE (node) == load_vec_info_type
474 : 433112 : || SLP_TREE_TYPE (node) == store_vec_info_type)
475 : 248824 : return static_cast<vect_load_store_data *> (node->data)->memory_access_type;
476 : : return VMAT_UNINITIALIZED;
477 : : }
478 : :
479 : : enum vect_partial_vector_style {
480 : : vect_partial_vectors_none,
481 : : vect_partial_vectors_while_ult,
482 : : vect_partial_vectors_avx512,
483 : : vect_partial_vectors_len
484 : : };
485 : :
486 : : /* Key for map that records association between
487 : : scalar conditions and corresponding loop mask, and
488 : : is populated by vect_record_loop_mask. */
489 : :
490 : : struct scalar_cond_masked_key
491 : : {
492 : 55149 : scalar_cond_masked_key (tree t, unsigned ncopies_)
493 : 55149 : : ncopies (ncopies_)
494 : : {
495 : 55149 : get_cond_ops_from_tree (t);
496 : : }
497 : :
498 : : void get_cond_ops_from_tree (tree);
499 : :
500 : : unsigned ncopies;
501 : : bool inverted_p;
502 : : tree_code code;
503 : : tree op0;
504 : : tree op1;
505 : : };
506 : :
507 : : template<>
508 : : struct default_hash_traits<scalar_cond_masked_key>
509 : : {
510 : : typedef scalar_cond_masked_key compare_type;
511 : : typedef scalar_cond_masked_key value_type;
512 : :
513 : : static inline hashval_t
514 : 63280 : hash (value_type v)
515 : : {
516 : 63280 : inchash::hash h;
517 : 63280 : h.add_int (v.code);
518 : 63280 : inchash::add_expr (v.op0, h, 0);
519 : 63280 : inchash::add_expr (v.op1, h, 0);
520 : 63280 : h.add_int (v.ncopies);
521 : 63280 : h.add_flag (v.inverted_p);
522 : 63280 : return h.end ();
523 : : }
524 : :
525 : : static inline bool
526 : 9688 : equal (value_type existing, value_type candidate)
527 : : {
528 : 9688 : return (existing.ncopies == candidate.ncopies
529 : 9543 : && existing.code == candidate.code
530 : 5993 : && existing.inverted_p == candidate.inverted_p
531 : 4457 : && operand_equal_p (existing.op0, candidate.op0, 0)
532 : 12417 : && operand_equal_p (existing.op1, candidate.op1, 0));
533 : : }
534 : :
535 : : static const bool empty_zero_p = true;
536 : :
537 : : static inline void
538 : 0 : mark_empty (value_type &v)
539 : : {
540 : 0 : v.ncopies = 0;
541 : 0 : v.inverted_p = false;
542 : : }
543 : :
544 : : static inline bool
545 : 161574 : is_empty (value_type v)
546 : : {
547 : 6806143 : return v.ncopies == 0;
548 : : }
549 : :
550 : : static inline void mark_deleted (value_type &) {}
551 : :
552 : : static inline bool is_deleted (const value_type &)
553 : : {
554 : : return false;
555 : : }
556 : :
557 : 47397 : static inline void remove (value_type &) {}
558 : : };
559 : :
560 : : typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
561 : :
562 : : /* Key and map that records association between vector conditions and
563 : : corresponding loop mask, and is populated by prepare_vec_mask. */
564 : :
565 : : typedef pair_hash<tree_operand_hash, tree_operand_hash> tree_cond_mask_hash;
566 : : typedef hash_set<tree_cond_mask_hash> vec_cond_masked_set_type;
567 : :
568 : : /* Describes two objects whose addresses must be unequal for the vectorized
569 : : loop to be valid. */
570 : : typedef std::pair<tree, tree> vec_object_pair;
571 : :
572 : : /* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE.
573 : : UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */
574 : : class vec_lower_bound {
575 : : public:
576 : : vec_lower_bound () {}
577 : 1418 : vec_lower_bound (tree e, bool u, poly_uint64 m)
578 : 1418 : : expr (e), unsigned_p (u), min_value (m) {}
579 : :
580 : : tree expr;
581 : : bool unsigned_p;
582 : : poly_uint64 min_value;
583 : : };
584 : :
585 : : /* Vectorizer state shared between different analyses like vector sizes
586 : : of the same CFG region. */
587 : : class vec_info_shared {
588 : : public:
589 : : vec_info_shared();
590 : : ~vec_info_shared();
591 : :
592 : : void save_datarefs();
593 : : void check_datarefs();
594 : :
595 : : /* All data references. Freed by free_data_refs, so not an auto_vec. */
596 : : vec<data_reference_p> datarefs;
597 : : vec<data_reference> datarefs_copy;
598 : :
599 : : /* The loop nest in which the data dependences are computed. */
600 : : auto_vec<loop_p> loop_nest;
601 : :
602 : : /* All data dependences. Freed by free_dependence_relations, so not
603 : : an auto_vec. */
604 : : vec<ddr_p> ddrs;
605 : : };
606 : :
607 : : /* Vectorizer state common between loop and basic-block vectorization. */
608 : : class vec_info {
609 : : public:
610 : : typedef hash_set<int_hash<machine_mode, E_VOIDmode, E_BLKmode> > mode_set;
611 : : enum vec_kind { bb, loop };
612 : :
613 : : vec_info (vec_kind, vec_info_shared *);
614 : : ~vec_info ();
615 : :
616 : : stmt_vec_info add_stmt (gimple *);
617 : : stmt_vec_info add_pattern_stmt (gimple *, stmt_vec_info);
618 : : stmt_vec_info resync_stmt_addr (gimple *);
619 : : stmt_vec_info lookup_stmt (gimple *);
620 : : stmt_vec_info lookup_def (tree);
621 : : stmt_vec_info lookup_single_use (tree);
622 : : class dr_vec_info *lookup_dr (data_reference *);
623 : : void move_dr (stmt_vec_info, stmt_vec_info);
624 : : void remove_stmt (stmt_vec_info);
625 : : void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *);
626 : : void insert_on_entry (stmt_vec_info, gimple *);
627 : : void insert_seq_on_entry (stmt_vec_info, gimple_seq);
628 : :
629 : : /* The type of vectorization. */
630 : : vec_kind kind;
631 : :
632 : : /* Shared vectorizer state. */
633 : : vec_info_shared *shared;
634 : :
635 : : /* The mapping of GIMPLE UID to stmt_vec_info. */
636 : : vec<stmt_vec_info> stmt_vec_infos;
637 : : /* Whether the above mapping is complete. */
638 : : bool stmt_vec_info_ro;
639 : :
640 : : /* Whether we've done a transform we think OK to not update virtual
641 : : SSA form. */
642 : : bool any_known_not_updated_vssa;
643 : :
644 : : /* The SLP graph. */
645 : : auto_vec<slp_instance> slp_instances;
646 : :
647 : : /* Maps base addresses to an innermost_loop_behavior that gives the maximum
648 : : known alignment for that base. */
649 : : vec_base_alignments base_alignments;
650 : :
651 : : /* All interleaving chains of stores, represented by the first
652 : : stmt in the chain. */
653 : : auto_vec<stmt_vec_info> grouped_stores;
654 : :
655 : : /* The set of vector modes used in the vectorized region. */
656 : : mode_set used_vector_modes;
657 : :
658 : : /* The argument we should pass to related_vector_mode when looking up
659 : : the vector mode for a scalar mode, or VOIDmode if we haven't yet
660 : : made any decisions about which vector modes to use. */
661 : : machine_mode vector_mode;
662 : :
663 : : /* The basic blocks in the vectorization region. For _loop_vec_info,
664 : : the memory is internally managed, while for _bb_vec_info, it points
665 : : to element space of an external auto_vec<>. This inconsistency is
666 : : not a good class design pattern. TODO: improve it with an unified
667 : : auto_vec<> whose lifetime is confined to vec_info object. */
668 : : basic_block *bbs;
669 : :
670 : : /* The count of the basic blocks in the vectorization region. */
671 : : unsigned int nbbs;
672 : :
673 : : /* Used to keep a sequence of def stmts of a pattern stmt that are loop
674 : : invariant if they exists.
675 : : The sequence is emitted in the loop preheader should the loop be vectorized
676 : : and are reset when undoing patterns. */
677 : : gimple_seq inv_pattern_def_seq;
678 : :
679 : : private:
680 : : stmt_vec_info new_stmt_vec_info (gimple *stmt);
681 : : void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true);
682 : : void free_stmt_vec_infos ();
683 : : void free_stmt_vec_info (stmt_vec_info);
684 : : };
685 : :
686 : : class _loop_vec_info;
687 : : class _bb_vec_info;
688 : :
689 : : template<>
690 : : template<>
691 : : inline bool
692 : 360082419 : is_a_helper <_loop_vec_info *>::test (vec_info *i)
693 : : {
694 : 359443821 : return i->kind == vec_info::loop;
695 : : }
696 : :
697 : : template<>
698 : : template<>
699 : : inline bool
700 : 70079313 : is_a_helper <_bb_vec_info *>::test (vec_info *i)
701 : : {
702 : 70079313 : return i->kind == vec_info::bb;
703 : : }
704 : :
705 : : /* In general, we can divide the vector statements in a vectorized loop
706 : : into related groups ("rgroups") and say that for each rgroup there is
707 : : some nS such that the rgroup operates on nS values from one scalar
708 : : iteration followed by nS values from the next. That is, if VF is the
709 : : vectorization factor of the loop, the rgroup operates on a sequence:
710 : :
711 : : (1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS)
712 : :
713 : : where (i,j) represents a scalar value with index j in a scalar
714 : : iteration with index i.
715 : :
716 : : [ We use the term "rgroup" to emphasise that this grouping isn't
717 : : necessarily the same as the grouping of statements used elsewhere.
718 : : For example, if we implement a group of scalar loads using gather
719 : : loads, we'll use a separate gather load for each scalar load, and
720 : : thus each gather load will belong to its own rgroup. ]
721 : :
722 : : In general this sequence will occupy nV vectors concatenated
723 : : together. If these vectors have nL lanes each, the total number
724 : : of scalar values N is given by:
725 : :
726 : : N = nS * VF = nV * nL
727 : :
728 : : None of nS, VF, nV and nL are required to be a power of 2. nS and nV
729 : : are compile-time constants but VF and nL can be variable (if the target
730 : : supports variable-length vectors).
731 : :
732 : : In classical vectorization, each iteration of the vector loop would
733 : : handle exactly VF iterations of the original scalar loop. However,
734 : : in vector loops that are able to operate on partial vectors, a
735 : : particular iteration of the vector loop might handle fewer than VF
736 : : iterations of the scalar loop. The vector lanes that correspond to
737 : : iterations of the scalar loop are said to be "active" and the other
738 : : lanes are said to be "inactive".
739 : :
740 : : In such vector loops, many rgroups need to be controlled to ensure
741 : : that they have no effect for the inactive lanes. Conceptually, each
742 : : such rgroup needs a sequence of booleans in the same order as above,
743 : : but with each (i,j) replaced by a boolean that indicates whether
744 : : iteration i is active. This sequence occupies nV vector controls
745 : : that again have nL lanes each. Thus the control sequence as a whole
746 : : consists of VF independent booleans that are each repeated nS times.
747 : :
748 : : Taking mask-based approach as a partially-populated vectors example.
749 : : We make the simplifying assumption that if a sequence of nV masks is
750 : : suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by
751 : : VIEW_CONVERTing it. This holds for all current targets that support
752 : : fully-masked loops. For example, suppose the scalar loop is:
753 : :
754 : : float *f;
755 : : double *d;
756 : : for (int i = 0; i < n; ++i)
757 : : {
758 : : f[i * 2 + 0] += 1.0f;
759 : : f[i * 2 + 1] += 2.0f;
760 : : d[i] += 3.0;
761 : : }
762 : :
763 : : and suppose that vectors have 256 bits. The vectorized f accesses
764 : : will belong to one rgroup and the vectorized d access to another:
765 : :
766 : : f rgroup: nS = 2, nV = 1, nL = 8
767 : : d rgroup: nS = 1, nV = 1, nL = 4
768 : : VF = 4
769 : :
770 : : [ In this simple example the rgroups do correspond to the normal
771 : : SLP grouping scheme. ]
772 : :
773 : : If only the first three lanes are active, the masks we need are:
774 : :
775 : : f rgroup: 1 1 | 1 1 | 1 1 | 0 0
776 : : d rgroup: 1 | 1 | 1 | 0
777 : :
778 : : Here we can use a mask calculated for f's rgroup for d's, but not
779 : : vice versa.
780 : :
781 : : Thus for each value of nV, it is enough to provide nV masks, with the
782 : : mask being calculated based on the highest nL (or, equivalently, based
783 : : on the highest nS) required by any rgroup with that nV. We therefore
784 : : represent the entire collection of masks as a two-level table, with the
785 : : first level being indexed by nV - 1 (since nV == 0 doesn't exist) and
786 : : the second being indexed by the mask index 0 <= i < nV. */
787 : :
788 : : /* The controls (like masks or lengths) needed by rgroups with nV vectors,
789 : : according to the description above. */
790 : : struct rgroup_controls {
791 : : /* The largest nS for all rgroups that use these controls.
792 : : For vect_partial_vectors_avx512 this is the constant nscalars_per_iter
793 : : for all members of the group. */
794 : : unsigned int max_nscalars_per_iter;
795 : :
796 : : /* For the largest nS recorded above, the loop controls divide each scalar
797 : : into FACTOR equal-sized pieces. This is useful if we need to split
798 : : element-based accesses into byte-based accesses.
799 : : For vect_partial_vectors_avx512 this records nV instead. */
800 : : unsigned int factor;
801 : :
802 : : /* This is a vector type with MAX_NSCALARS_PER_ITER * VF / nV elements.
803 : : For mask-based controls, it is the type of the masks in CONTROLS.
804 : : For length-based controls, it can be any vector type that has the
805 : : specified number of elements; the type of the elements doesn't matter. */
806 : : tree type;
807 : :
808 : : /* When there is no uniformly used LOOP_VINFO_RGROUP_COMPARE_TYPE this
809 : : is the rgroup specific type used. */
810 : : tree compare_type;
811 : :
812 : : /* A vector of nV controls, in iteration order. */
813 : : vec<tree> controls;
814 : :
815 : : /* In case of len_load and len_store with a bias there is only one
816 : : rgroup. This holds the adjusted loop length for the this rgroup. */
817 : : tree bias_adjusted_ctrl;
818 : : };
819 : :
820 : 415649 : struct vec_loop_masks
821 : : {
822 : 403823 : bool is_empty () const { return mask_set.is_empty (); }
823 : :
824 : : /* Set to record vectype, nvector pairs. */
825 : : hash_set<pair_hash <nofree_ptr_hash <tree_node>,
826 : : int_hash<unsigned, 0>>> mask_set;
827 : :
828 : : /* rgroup_controls used for the partial vector scheme. */
829 : : auto_vec<rgroup_controls> rgc_vec;
830 : : };
831 : :
832 : : typedef auto_vec<rgroup_controls> vec_loop_lens;
833 : :
834 : : typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;
835 : :
836 : : /* Abstraction around info on reductions which is still in stmt_vec_info
837 : : but will be duplicated or moved elsewhere. */
838 : 139160 : class vect_reduc_info_s
839 : : {
840 : : public:
841 : : /* The def type of the main reduction PHI, vect_reduction_def or
842 : : vect_double_reduction_def. */
843 : : enum vect_def_type def_type;
844 : :
845 : : /* The reduction type as detected by
846 : : vect_is_simple_reduction and vectorizable_reduction. */
847 : : enum vect_reduction_type reduc_type;
848 : :
849 : : /* The original scalar reduction code, to be used in the epilogue. */
850 : : code_helper reduc_code;
851 : :
852 : : /* A vector internal function we should use in the epilogue. */
853 : : internal_fn reduc_fn;
854 : :
855 : : /* For loop reduction with multiple vectorized results (ncopies > 1), a
856 : : lane-reducing operation participating in it may not use all of those
857 : : results, this field specifies result index starting from which any
858 : : following land-reducing operation would be assigned to. */
859 : : unsigned int reduc_result_pos;
860 : :
861 : : /* Whether this represents a reduction chain. */
862 : : bool is_reduc_chain;
863 : :
864 : : /* Whether we force a single cycle PHI during reduction vectorization. */
865 : : bool force_single_cycle;
866 : :
867 : : /* The vector type for performing the actual reduction operation. */
868 : : tree reduc_vectype;
869 : :
870 : : /* The vector type we should use for the final reduction in the epilogue
871 : : when we reduce a mask. */
872 : : tree reduc_vectype_for_mask;
873 : :
874 : : /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */
875 : : tree induc_cond_initial_val;
876 : :
877 : : /* If not NULL the value to be added to compute final reduction value. */
878 : : tree reduc_epilogue_adjustment;
879 : :
880 : : /* If non-null, the reduction is being performed by an epilogue loop
881 : : and we have decided to reuse this accumulator from the main loop. */
882 : : struct vect_reusable_accumulator *reused_accumulator;
883 : :
884 : : /* If the vector code is performing N scalar reductions in parallel,
885 : : this variable gives the initial scalar values of those N reductions. */
886 : : auto_vec<tree> reduc_initial_values;
887 : :
888 : : /* If the vector code is performing N scalar reductions in parallel, this
889 : : variable gives the vectorized code's final (scalar) result for each of
890 : : those N reductions. In other words, REDUC_SCALAR_RESULTS[I] replaces
891 : : the original scalar code's loop-closed SSA PHI for reduction number I. */
892 : : auto_vec<tree> reduc_scalar_results;
893 : : };
894 : :
895 : : typedef class vect_reduc_info_s *vect_reduc_info;
896 : :
897 : : #define VECT_REDUC_INFO_DEF_TYPE(I) ((I)->def_type)
898 : : #define VECT_REDUC_INFO_TYPE(I) ((I)->reduc_type)
899 : : #define VECT_REDUC_INFO_CODE(I) ((I)->reduc_code)
900 : : #define VECT_REDUC_INFO_FN(I) ((I)->reduc_fn)
901 : : #define VECT_REDUC_INFO_SCALAR_RESULTS(I) ((I)->reduc_scalar_results)
902 : : #define VECT_REDUC_INFO_INITIAL_VALUES(I) ((I)->reduc_initial_values)
903 : : #define VECT_REDUC_INFO_REUSED_ACCUMULATOR(I) ((I)->reused_accumulator)
904 : : #define VECT_REDUC_INFO_INDUC_COND_INITIAL_VAL(I) ((I)->induc_cond_initial_val)
905 : : #define VECT_REDUC_INFO_EPILOGUE_ADJUSTMENT(I) ((I)->reduc_epilogue_adjustment)
906 : : #define VECT_REDUC_INFO_VECTYPE(I) ((I)->reduc_vectype)
907 : : #define VECT_REDUC_INFO_VECTYPE_FOR_MASK(I) ((I)->reduc_vectype_for_mask)
908 : : #define VECT_REDUC_INFO_FORCE_SINGLE_CYCLE(I) ((I)->force_single_cycle)
909 : : #define VECT_REDUC_INFO_RESULT_POS(I) ((I)->reduc_result_pos)
910 : :
911 : : /* Information about a reduction accumulator from the main loop that could
912 : : conceivably be reused as the input to a reduction in an epilogue loop. */
913 : : struct vect_reusable_accumulator {
914 : : /* The final value of the accumulator, which forms the input to the
915 : : reduction operation. */
916 : : tree reduc_input;
917 : :
918 : : /* The stmt_vec_info that describes the reduction (i.e. the one for
919 : : which is_reduc_info is true). */
920 : : vect_reduc_info reduc_info;
921 : : };
922 : :
923 : : /*-----------------------------------------------------------------*/
924 : : /* Info on vectorized loops. */
925 : : /*-----------------------------------------------------------------*/
926 : : typedef class _loop_vec_info : public vec_info {
927 : : public:
928 : : _loop_vec_info (class loop *, vec_info_shared *);
929 : : ~_loop_vec_info ();
930 : :
931 : : /* The loop to which this info struct refers to. */
932 : : class loop *loop;
933 : :
934 : : /* Number of latch executions. */
935 : : tree num_itersm1;
936 : : /* Number of iterations. */
937 : : tree num_iters;
938 : : /* Number of iterations of the original loop. */
939 : : tree num_iters_unchanged;
940 : : /* Condition under which this loop is analyzed and versioned. */
941 : : tree num_iters_assumptions;
942 : :
943 : : /* The cost of the vector code. */
944 : : class vector_costs *vector_costs;
945 : :
946 : : /* The cost of the scalar code. */
947 : : class vector_costs *scalar_costs;
948 : :
949 : : /* Threshold of number of iterations below which vectorization will not be
950 : : performed. It is calculated from MIN_PROFITABLE_ITERS and
951 : : param_min_vect_loop_bound. */
952 : : unsigned int th;
953 : :
954 : : /* When applying loop versioning, the vector form should only be used
955 : : if the number of scalar iterations is >= this value, on top of all
956 : : the other requirements. Ignored when loop versioning is not being
957 : : used. */
958 : : poly_uint64 versioning_threshold;
959 : :
960 : : /* Unrolling factor. In case of suitable super-word parallelism
961 : : it can be that no unrolling is needed, and thus this is 1. */
962 : : poly_uint64 vectorization_factor;
963 : :
964 : : /* If this loop is an epilogue loop whose main loop can be skipped,
965 : : MAIN_LOOP_EDGE is the edge from the main loop to this loop's
966 : : preheader. SKIP_MAIN_LOOP_EDGE is then the edge that skips the
967 : : main loop and goes straight to this loop's preheader.
968 : :
969 : : Both fields are null otherwise. */
970 : : edge main_loop_edge;
971 : : edge skip_main_loop_edge;
972 : :
973 : : /* If this loop is an epilogue loop that might be skipped after executing
974 : : the main loop, this edge is the one that skips the epilogue. */
975 : : edge skip_this_loop_edge;
976 : :
977 : : /* Reduction descriptors of this loop. Referenced to from SLP nodes
978 : : by index. */
979 : : auto_vec<vect_reduc_info> reduc_infos;
980 : :
981 : : /* The vectorized form of a standard reduction replaces the original
982 : : scalar code's final result (a loop-closed SSA PHI) with the result
983 : : of a vector-to-scalar reduction operation. After vectorization,
984 : : this variable maps these vector-to-scalar results to information
985 : : about the reductions that generated them. */
986 : : hash_map<tree, vect_reusable_accumulator> reusable_accumulators;
987 : :
988 : : /* The number of times that the target suggested we unroll the vector loop
989 : : in order to promote more ILP. This value will be used to re-analyze the
990 : : loop for vectorization and if successful the value will be folded into
991 : : vectorization_factor (and therefore exactly divides
992 : : vectorization_factor). */
993 : : unsigned int suggested_unroll_factor;
994 : :
995 : : /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR
996 : : if there is no particular limit. */
997 : : unsigned HOST_WIDE_INT max_vectorization_factor;
998 : :
999 : : /* The masks that a fully-masked loop should use to avoid operating
1000 : : on inactive scalars. */
1001 : : vec_loop_masks masks;
1002 : :
1003 : : /* The lengths that a loop with length should use to avoid operating
1004 : : on inactive scalars. */
1005 : : vec_loop_lens lens;
1006 : :
1007 : : /* Set of scalar conditions that have loop mask applied. */
1008 : : scalar_cond_masked_set_type scalar_cond_masked_set;
1009 : :
1010 : : /* Set of vector conditions that have loop mask applied. */
1011 : : vec_cond_masked_set_type vec_cond_masked_set;
1012 : :
1013 : : /* If we are using a loop mask to align memory addresses, this variable
1014 : : contains the number of vector elements that we should skip in the
1015 : : first iteration of the vector loop (i.e. the number of leading
1016 : : elements that should be false in the first mask). */
1017 : : tree mask_skip_niters;
1018 : :
1019 : : /* If we are using a loop mask to align memory addresses and we're in an
1020 : : early break loop then this variable contains the number of elements that
1021 : : were skipped during the initial iteration of the loop. */
1022 : : tree mask_skip_niters_pfa_offset;
1023 : :
1024 : : /* The type that the loop control IV should be converted to before
1025 : : testing which of the VF scalars are active and inactive.
1026 : : Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P. */
1027 : : tree rgroup_compare_type;
1028 : :
1029 : : /* For #pragma omp simd if (x) loops the x expression. If constant 0,
1030 : : the loop should not be vectorized, if constant non-zero, simd_if_cond
1031 : : shouldn't be set and loop vectorized normally, if SSA_NAME, the loop
1032 : : should be versioned on that condition, using scalar loop if the condition
1033 : : is false and vectorized loop otherwise. */
1034 : : tree simd_if_cond;
1035 : :
1036 : : /* The type that the vector loop control IV should have when
1037 : : LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */
1038 : : tree rgroup_iv_type;
1039 : :
1040 : : /* The style used for implementing partial vectors when
1041 : : LOOP_VINFO_USING_PARTIAL_VECTORS_P is true. */
1042 : : vect_partial_vector_style partial_vector_style;
1043 : :
1044 : : /* Unknown DRs according to which loop was peeled. */
1045 : : class dr_vec_info *unaligned_dr;
1046 : :
1047 : : /* peeling_for_alignment indicates whether peeling for alignment will take
1048 : : place, and what the peeling factor should be:
1049 : : peeling_for_alignment = X means:
1050 : : If X=0: Peeling for alignment will not be applied.
1051 : : If X>0: Peel first X iterations.
1052 : : If X=-1: Generate a runtime test to calculate the number of iterations
1053 : : to be peeled, using the dataref recorded in the field
1054 : : unaligned_dr. */
1055 : : int peeling_for_alignment;
1056 : :
1057 : : /* The mask used to check the alignment of pointers or arrays. */
1058 : : poly_uint64 ptr_mask;
1059 : :
1060 : : /* The maximum speculative read amount in VLA modes for runtime check. */
1061 : : poly_uint64 max_spec_read_amount;
1062 : :
1063 : : /* Indicates whether the loop has any non-linear IV. */
1064 : : bool nonlinear_iv;
1065 : :
1066 : : /* Data Dependence Relations defining address ranges that are candidates
1067 : : for a run-time aliasing check. */
1068 : : auto_vec<ddr_p> may_alias_ddrs;
1069 : :
1070 : : /* Data Dependence Relations defining address ranges together with segment
1071 : : lengths from which the run-time aliasing check is built. */
1072 : : auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs;
1073 : :
1074 : : /* Check that the addresses of each pair of objects is unequal. */
1075 : : auto_vec<vec_object_pair> check_unequal_addrs;
1076 : :
1077 : : /* List of values that are required to be nonzero. This is used to check
1078 : : whether things like "x[i * n] += 1;" are safe and eventually gets added
1079 : : to the checks for lower bounds below. */
1080 : : auto_vec<tree> check_nonzero;
1081 : :
1082 : : /* List of values that need to be checked for a minimum value. */
1083 : : auto_vec<vec_lower_bound> lower_bounds;
1084 : :
1085 : : /* Statements in the loop that have data references that are candidates for a
1086 : : runtime (loop versioning) misalignment check. */
1087 : : auto_vec<stmt_vec_info> may_misalign_stmts;
1088 : :
1089 : : /* Reduction cycles detected in the loop. Used in loop-aware SLP. */
1090 : : auto_vec<stmt_vec_info> reductions;
1091 : :
1092 : : /* Defs that could not be analyzed such as OMP SIMD calls without
1093 : : a LHS. */
1094 : : auto_vec<stmt_vec_info> alternate_defs;
1095 : :
1096 : : /* Cost vector for a single scalar iteration. */
1097 : : auto_vec<stmt_info_for_cost> scalar_cost_vec;
1098 : :
1099 : : /* Map of IV base/step expressions to inserted name in the preheader. */
1100 : : hash_map<tree_operand_hash, tree> *ivexpr_map;
1101 : :
1102 : : /* Map of OpenMP "omp simd array" scan variables to corresponding
1103 : : rhs of the store of the initializer. */
1104 : : hash_map<tree, tree> *scan_map;
1105 : :
1106 : : /* The factor used to over weight those statements in an inner loop
1107 : : relative to the loop being vectorized. */
1108 : : unsigned int inner_loop_cost_factor;
1109 : :
1110 : : /* Is the loop vectorizable? */
1111 : : bool vectorizable;
1112 : :
1113 : : /* Records whether we still have the option of vectorizing this loop
1114 : : using partially-populated vectors; in other words, whether it is
1115 : : still possible for one iteration of the vector loop to handle
1116 : : fewer than VF scalars. */
1117 : : bool can_use_partial_vectors_p;
1118 : :
1119 : : /* Records whether we must use niter masking for correctness reasons. */
1120 : : bool must_use_partial_vectors_p;
1121 : :
1122 : : /* True if we've decided to use partially-populated vectors, so that
1123 : : the vector loop can handle fewer than VF scalars. */
1124 : : bool using_partial_vectors_p;
1125 : :
1126 : : /* True if we've decided to use a decrementing loop control IV that counts
1127 : : scalars. This can be done for any loop that:
1128 : :
1129 : : (a) uses length "controls"; and
1130 : : (b) can iterate more than once. */
1131 : : bool using_decrementing_iv_p;
1132 : :
1133 : : /* True if we've decided to use output of select_vl to adjust IV of
1134 : : both loop control and data reference pointer. This is only true
1135 : : for single-rgroup control. */
1136 : : bool using_select_vl_p;
1137 : :
1138 : : /* True if we've decided to use peeling with versioning together, which allows
1139 : : unaligned unsupported data refs to be uniformly aligned after a certain
1140 : : amount of peeling (mutual alignment). Otherwise, we use versioning alone
1141 : : so these data refs must be already aligned to a power-of-two boundary
1142 : : without peeling. */
1143 : : bool allow_mutual_alignment;
1144 : :
1145 : : /* The bias for len_load and len_store. For now, only 0 and -1 are
1146 : : supported. -1 must be used when a backend does not support
1147 : : len_load/len_store with a length of zero. */
1148 : : signed char partial_load_store_bias;
1149 : :
1150 : : /* When we have grouped data accesses with gaps, we may introduce invalid
1151 : : memory accesses. We peel the last iteration of the loop to prevent
1152 : : this. */
1153 : : bool peeling_for_gaps;
1154 : :
1155 : : /* When the number of iterations is not a multiple of the vector size
1156 : : we need to peel off iterations at the end to form an epilogue loop. */
1157 : : bool peeling_for_niter;
1158 : :
1159 : : /* When the loop has early breaks that we can vectorize we need to peel
1160 : : the loop for the break finding loop. */
1161 : : bool early_breaks;
1162 : :
1163 : : /* List of loop additional IV conditionals found in the loop. */
1164 : : auto_vec<gcond *> conds;
1165 : :
1166 : : /* Main loop IV cond. */
1167 : : gcond* loop_iv_cond;
1168 : :
1169 : : /* True if we have an unroll factor requested by the user through pragma GCC
1170 : : unroll. */
1171 : : bool user_unroll;
1172 : :
1173 : : /* True if there are no loop carried data dependencies in the loop.
1174 : : If loop->safelen <= 1, then this is always true, either the loop
1175 : : didn't have any loop carried data dependencies, or the loop is being
1176 : : vectorized guarded with some runtime alias checks, or couldn't
1177 : : be vectorized at all, but then this field shouldn't be used.
1178 : : For loop->safelen >= 2, the user has asserted that there are no
1179 : : backward dependencies, but there still could be loop carried forward
1180 : : dependencies in such loops. This flag will be false if normal
1181 : : vectorizer data dependency analysis would fail or require versioning
1182 : : for alias, but because of loop->safelen >= 2 it has been vectorized
1183 : : even without versioning for alias. E.g. in:
1184 : : #pragma omp simd
1185 : : for (int i = 0; i < m; i++)
1186 : : a[i] = a[i + k] * c;
1187 : : (or #pragma simd or #pragma ivdep) we can vectorize this and it will
1188 : : DTRT even for k > 0 && k < m, but without safelen we would not
1189 : : vectorize this, so this field would be false. */
1190 : : bool no_data_dependencies;
1191 : :
1192 : : /* Mark loops having masked stores. */
1193 : : bool has_mask_store;
1194 : :
1195 : : /* Queued scaling factor for the scalar loop. */
1196 : : profile_probability scalar_loop_scaling;
1197 : :
1198 : : /* If if-conversion versioned this loop before conversion, this is the
1199 : : loop version without if-conversion. */
1200 : : class loop *scalar_loop;
1201 : :
1202 : : /* For loops being epilogues of already vectorized loops
1203 : : this points to the main vectorized loop. Otherwise NULL. */
1204 : : _loop_vec_info *main_loop_info;
1205 : :
1206 : : /* For loops being epilogues of already vectorized loops
1207 : : this points to the preceeding vectorized (possibly epilogue) loop.
1208 : : Otherwise NULL. */
1209 : : _loop_vec_info *orig_loop_info;
1210 : :
1211 : : /* Used to store loop_vec_infos of the epilogue of this loop during
1212 : : analysis. */
1213 : : _loop_vec_info *epilogue_vinfo;
1214 : :
1215 : : /* If this is an epilogue loop the DR advancement applied. */
1216 : : tree drs_advanced_by;
1217 : :
1218 : : /* The controlling loop IV for the current loop when vectorizing. This IV
1219 : : controls the natural exits of the loop. */
1220 : : edge vec_loop_iv_exit;
1221 : :
1222 : : /* The controlling loop IV for the epilogue loop when vectorizing. This IV
1223 : : controls the natural exits of the loop. */
1224 : : edge vec_epilogue_loop_iv_exit;
1225 : :
1226 : : /* The controlling loop IV for the scalar loop being vectorized. This IV
1227 : : controls the natural exits of the loop. */
1228 : : edge scalar_loop_iv_exit;
1229 : :
1230 : : /* Used to store the list of stores needing to be moved if doing early
1231 : : break vectorization as they would violate the scalar loop semantics if
1232 : : vectorized in their current location. These are stored in order that they
1233 : : need to be moved. */
1234 : : auto_vec<gimple *> early_break_stores;
1235 : :
1236 : : /* The final basic block where to move statements to. In the case of
1237 : : multiple exits this could be pretty far away. */
1238 : : basic_block early_break_dest_bb;
1239 : :
1240 : : /* Statements whose VUSES need updating if early break vectorization is to
1241 : : happen. */
1242 : : auto_vec<gimple*> early_break_vuses;
1243 : :
1244 : : /* Record statements that are needed to be live for early break vectorization
1245 : : but may not have an LC PHI node materialized yet in the exits. */
1246 : : auto_vec<stmt_vec_info> early_break_live_ivs;
1247 : : } *loop_vec_info;
1248 : :
1249 : : /* Access Functions. */
1250 : : #define LOOP_VINFO_LOOP(L) (L)->loop
1251 : : #define LOOP_VINFO_IV_EXIT(L) (L)->vec_loop_iv_exit
1252 : : #define LOOP_VINFO_EPILOGUE_IV_EXIT(L) (L)->vec_epilogue_loop_iv_exit
1253 : : #define LOOP_VINFO_SCALAR_IV_EXIT(L) (L)->scalar_loop_iv_exit
1254 : : #define LOOP_VINFO_BBS(L) (L)->bbs
1255 : : #define LOOP_VINFO_NBBS(L) (L)->nbbs
1256 : : #define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1
1257 : : #define LOOP_VINFO_NITERS(L) (L)->num_iters
1258 : : /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after
1259 : : prologue peeling retain total unchanged scalar loop iterations for
1260 : : cost model. */
1261 : : #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged
1262 : : #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions
1263 : : #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th
1264 : : #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
1265 : : #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
1266 : : #define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
1267 : : #define LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P(L) (L)->must_use_partial_vectors_p
1268 : : #define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
1269 : : #define LOOP_VINFO_USING_DECREMENTING_IV_P(L) (L)->using_decrementing_iv_p
1270 : : #define LOOP_VINFO_USING_SELECT_VL_P(L) (L)->using_select_vl_p
1271 : : #define LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT(L) (L)->allow_mutual_alignment
1272 : : #define LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS(L) (L)->partial_load_store_bias
1273 : : #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor
1274 : : #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor
1275 : : #define LOOP_VINFO_MASKS(L) (L)->masks
1276 : : #define LOOP_VINFO_LENS(L) (L)->lens
1277 : : #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters
1278 : : #define LOOP_VINFO_MASK_NITERS_PFA_OFFSET(L) (L)->mask_skip_niters_pfa_offset
1279 : : #define LOOP_VINFO_RGROUP_COMPARE_TYPE(L) (L)->rgroup_compare_type
1280 : : #define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type
1281 : : #define LOOP_VINFO_PARTIAL_VECTORS_STYLE(L) (L)->partial_vector_style
1282 : : #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask
1283 : : #define LOOP_VINFO_MAX_SPEC_READ_AMOUNT(L) (L)->max_spec_read_amount
1284 : : #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest
1285 : : #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs
1286 : : #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs
1287 : : #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
1288 : : #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
1289 : : #define LOOP_VINFO_NON_LINEAR_IV(L) (L)->nonlinear_iv
1290 : : #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
1291 : : #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
1292 : : #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
1293 : : #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs
1294 : : #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs
1295 : : #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero
1296 : : #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds
1297 : : #define LOOP_VINFO_USER_UNROLL(L) (L)->user_unroll
1298 : : #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores
1299 : : #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
1300 : : #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
1301 : : #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
1302 : : #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
1303 : : #define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks
1304 : : #define LOOP_VINFO_EARLY_BRK_STORES(L) (L)->early_break_stores
1305 : : #define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \
1306 : : (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
1307 : : #define LOOP_VINFO_EARLY_BREAKS_LIVE_IVS(L) \
1308 : : (L)->early_break_live_ivs
1309 : : #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
1310 : : #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
1311 : : #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
1312 : : #define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond
1313 : : #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
1314 : : #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop
1315 : : #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling
1316 : : #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store
1317 : : #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
1318 : : #define LOOP_VINFO_MAIN_LOOP_INFO(L) (L)->main_loop_info
1319 : : #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
1320 : : #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond
1321 : : #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
1322 : : #define LOOP_VINFO_INV_PATTERN_DEF_SEQ(L) (L)->inv_pattern_def_seq
1323 : : #define LOOP_VINFO_DRS_ADVANCED_BY(L) (L)->drs_advanced_by
1324 : : #define LOOP_VINFO_ALTERNATE_DEFS(L) (L)->alternate_defs
1325 : :
1326 : : #define LOOP_VINFO_FULLY_MASKED_P(L) \
1327 : : (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
1328 : : && !LOOP_VINFO_MASKS (L).is_empty ())
1329 : :
1330 : : #define LOOP_VINFO_FULLY_WITH_LENGTH_P(L) \
1331 : : (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
1332 : : && !LOOP_VINFO_LENS (L).is_empty ())
1333 : :
1334 : : #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
1335 : : ((L)->may_misalign_stmts.length () > 0)
1336 : : #define LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ(L) \
1337 : : (maybe_gt ((L)->max_spec_read_amount, 0U))
1338 : : #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
1339 : : ((L)->comp_alias_ddrs.length () > 0 \
1340 : : || (L)->check_unequal_addrs.length () > 0 \
1341 : : || (L)->lower_bounds.length () > 0)
1342 : : #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \
1343 : : (LOOP_VINFO_NITERS_ASSUMPTIONS (L))
1344 : : #define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L) \
1345 : : (LOOP_VINFO_SIMD_IF_COND (L))
1346 : : #define LOOP_REQUIRES_VERSIONING(L) \
1347 : : (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \
1348 : : || LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ (L) \
1349 : : || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \
1350 : : || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \
1351 : : || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
1352 : :
1353 : : #define LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING(L) \
1354 : : ((L)->may_misalign_stmts.length () > 0 \
1355 : : && !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (L))
1356 : :
1357 : : #define LOOP_VINFO_NITERS_KNOWN_P(L) \
1358 : : (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
1359 : :
1360 : : #define LOOP_VINFO_EPILOGUE_P(L) \
1361 : : (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL)
1362 : :
1363 : : #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \
1364 : : (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L)))
1365 : :
1366 : : /* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL
1367 : : value signifies success, and a NULL value signifies failure, supporting
1368 : : propagating an opt_problem * describing the failure back up the call
1369 : : stack. */
1370 : : typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info;
1371 : :
1372 : : inline loop_vec_info
1373 : 546463 : loop_vec_info_for_loop (class loop *loop)
1374 : : {
1375 : 546463 : return (loop_vec_info) loop->aux;
1376 : : }
1377 : :
1378 : : struct slp_root
1379 : : {
1380 : 1220202 : slp_root (slp_instance_kind kind_, vec<stmt_vec_info> stmts_,
1381 : 11917 : vec<stmt_vec_info> roots_, vec<tree> remain_ = vNULL)
1382 : 1220202 : : kind(kind_), stmts(stmts_), roots(roots_), remain(remain_) {}
1383 : : slp_instance_kind kind;
1384 : : vec<stmt_vec_info> stmts;
1385 : : vec<stmt_vec_info> roots;
1386 : : vec<tree> remain;
1387 : : };
1388 : :
1389 : : typedef class _bb_vec_info : public vec_info
1390 : : {
1391 : : public:
1392 : : _bb_vec_info (vec<basic_block> bbs, vec_info_shared *);
1393 : : ~_bb_vec_info ();
1394 : :
1395 : : vec<slp_root> roots;
1396 : : } *bb_vec_info;
1397 : :
1398 : : #define BB_VINFO_BBS(B) (B)->bbs
1399 : : #define BB_VINFO_NBBS(B) (B)->nbbs
1400 : : #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores
1401 : : #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances
1402 : : #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs
1403 : : #define BB_VINFO_DDRS(B) (B)->shared->ddrs
1404 : :
1405 : : /* Indicates whether/how a variable is used in the scope of loop/basic
1406 : : block. */
1407 : : enum vect_relevant {
1408 : : vect_unused_in_scope = 0,
1409 : :
1410 : : /* The def is only used outside the loop. */
1411 : : vect_used_only_live,
1412 : : /* The def is in the inner loop, and the use is in the outer loop, and the
1413 : : use is a reduction stmt. */
1414 : : vect_used_in_outer_by_reduction,
1415 : : /* The def is in the inner loop, and the use is in the outer loop (and is
1416 : : not part of reduction). */
1417 : : vect_used_in_outer,
1418 : :
1419 : : /* defs that feed computations that end up (only) in a reduction. These
1420 : : defs may be used by non-reduction stmts, but eventually, any
1421 : : computations/values that are affected by these defs are used to compute
1422 : : a reduction (i.e. don't get stored to memory, for example). We use this
1423 : : to identify computations that we can change the order in which they are
1424 : : computed. */
1425 : : vect_used_by_reduction,
1426 : :
1427 : : vect_used_in_scope
1428 : : };
1429 : :
1430 : : /* The type of vectorization. pure_slp means the stmt is covered by the
1431 : : SLP graph, not_vect means it is not. This is mostly used by BB
1432 : : vectorization. */
1433 : : enum slp_vect_type {
1434 : : not_vect = 0,
1435 : : pure_slp,
1436 : : };
1437 : :
1438 : : /* Says whether a statement is a load, a store of a vectorized statement
1439 : : result, or a store of an invariant value. */
1440 : : enum vec_load_store_type {
1441 : : VLS_LOAD,
1442 : : VLS_STORE,
1443 : : VLS_STORE_INVARIANT
1444 : : };
1445 : :
1446 : : class dr_vec_info {
1447 : : public:
1448 : : /* The data reference itself. */
1449 : : data_reference *dr;
1450 : : /* The statement that contains the data reference. */
1451 : : stmt_vec_info stmt;
1452 : : /* The analysis group this DR belongs to when doing BB vectorization.
1453 : : DRs of the same group belong to the same conditional execution context. */
1454 : : unsigned group;
1455 : : /* The misalignment in bytes of the reference, or -1 if not known. */
1456 : : int misalignment;
1457 : : /* The byte alignment that we'd ideally like the reference to have,
1458 : : and the value that misalignment is measured against. */
1459 : : poly_uint64 target_alignment;
1460 : : /* If true the alignment of base_decl needs to be increased. */
1461 : : bool base_misaligned;
1462 : :
1463 : : /* Set by early break vectorization when this DR needs peeling for alignment
1464 : : for correctness. */
1465 : : bool safe_speculative_read_required;
1466 : :
1467 : : /* Set by early break vectorization when this DR's scalar accesses are known
1468 : : to be inbounds of a known bounds loop. */
1469 : : bool scalar_access_known_in_bounds;
1470 : :
1471 : : tree base_decl;
1472 : :
1473 : : /* Stores current vectorized loop's offset. To be added to the DR's
1474 : : offset to calculate current offset of data reference. */
1475 : : tree offset;
1476 : : };
1477 : :
1478 : : typedef struct data_reference *dr_p;
1479 : :
1480 : : class _stmt_vec_info {
1481 : : public:
1482 : :
1483 : : /* Indicates whether this stmts is part of a computation whose result is
1484 : : used outside the loop. */
1485 : : bool live;
1486 : :
1487 : : /* Stmt is part of some pattern (computation idiom) */
1488 : : bool in_pattern_p;
1489 : :
1490 : : /* True if the statement was created during pattern recognition as
1491 : : part of the replacement for RELATED_STMT. This implies that the
1492 : : statement isn't part of any basic block, although for convenience
1493 : : its gimple_bb is the same as for RELATED_STMT. */
1494 : : bool pattern_stmt_p;
1495 : :
1496 : : /* Is this statement vectorizable or should it be skipped in (partial)
1497 : : vectorization. */
1498 : : bool vectorizable;
1499 : :
1500 : : /* The stmt to which this info struct refers to. */
1501 : : gimple *stmt;
1502 : :
1503 : : /* The vector type to be used for the LHS of this statement. */
1504 : : tree vectype;
1505 : :
1506 : : /* The following is relevant only for stmts that contain a non-scalar
1507 : : data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have
1508 : : at most one such data-ref. */
1509 : :
1510 : : dr_vec_info dr_aux;
1511 : :
1512 : : /* Information about the data-ref relative to this loop
1513 : : nest (the loop that is being considered for vectorization). */
1514 : : innermost_loop_behavior dr_wrt_vec_loop;
1515 : :
1516 : : /* For loop PHI nodes, the base and evolution part of it. This makes sure
1517 : : this information is still available in vect_update_ivs_after_vectorizer
1518 : : where we may not be able to re-analyze the PHI nodes evolution as
1519 : : peeling for the prologue loop can make it unanalyzable. The evolution
1520 : : part is still correct after peeling, but the base may have changed from
1521 : : the version here. */
1522 : : tree loop_phi_evolution_base_unchanged;
1523 : : tree loop_phi_evolution_part;
1524 : : enum vect_induction_op_type loop_phi_evolution_type;
1525 : :
1526 : : /* Used for various bookkeeping purposes, generally holding a pointer to
1527 : : some other stmt S that is in some way "related" to this stmt.
1528 : : Current use of this field is:
1529 : : If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is
1530 : : true): S is the "pattern stmt" that represents (and replaces) the
1531 : : sequence of stmts that constitutes the pattern. Similarly, the
1532 : : related_stmt of the "pattern stmt" points back to this stmt (which is
1533 : : the last stmt in the original sequence of stmts that constitutes the
1534 : : pattern). */
1535 : : stmt_vec_info related_stmt;
1536 : :
1537 : : /* Used to keep a sequence of def stmts of a pattern stmt if such exists.
1538 : : The sequence is attached to the original statement rather than the
1539 : : pattern statement. */
1540 : : gimple_seq pattern_def_seq;
1541 : :
1542 : : /* Classify the def of this stmt. */
1543 : : enum vect_def_type def_type;
1544 : :
1545 : : /* Whether the stmt is SLPed, loop-based vectorized, or both. */
1546 : : enum slp_vect_type slp_type;
1547 : :
1548 : : /* Interleaving chains info. */
1549 : : /* First element in the group. */
1550 : : stmt_vec_info first_element;
1551 : : /* Pointer to the next element in the group. */
1552 : : stmt_vec_info next_element;
1553 : : /* The size of the group. */
1554 : : unsigned int size;
1555 : : /* For loads only, the gap from the previous load. For consecutive loads, GAP
1556 : : is 1. */
1557 : : unsigned int gap;
1558 : :
1559 : : /* The minimum negative dependence distance this stmt participates in
1560 : : or zero if none. */
1561 : : unsigned int min_neg_dist;
1562 : :
1563 : : /* Not all stmts in the loop need to be vectorized. e.g, the increment
1564 : : of the loop induction variable and computation of array indexes. relevant
1565 : : indicates whether the stmt needs to be vectorized. */
1566 : : enum vect_relevant relevant;
1567 : :
1568 : : /* For loads if this is a gather, for stores if this is a scatter. */
1569 : : bool gather_scatter_p;
1570 : :
1571 : : /* True if this is an access with loop-invariant stride. */
1572 : : bool strided_p;
1573 : :
1574 : : /* For both loads and stores. */
1575 : : unsigned simd_lane_access_p : 3;
1576 : :
1577 : : /* On a reduction PHI the reduction type as detected by
1578 : : vect_is_simple_reduction. */
1579 : : enum vect_reduction_type reduc_type;
1580 : :
1581 : : /* On a reduction PHI, the original reduction code as detected by
1582 : : vect_is_simple_reduction. */
1583 : : code_helper reduc_code;
1584 : :
1585 : : /* On a stmt participating in a reduction the index of the operand
1586 : : on the reduction SSA cycle. */
1587 : : int reduc_idx;
1588 : :
1589 : : /* On a reduction PHI the def returned by vect_is_simple_reduction.
1590 : : On the def returned by vect_is_simple_reduction the corresponding PHI. */
1591 : : stmt_vec_info reduc_def;
1592 : :
1593 : : /* If nonzero, the lhs of the statement could be truncated to this
1594 : : many bits without affecting any users of the result. */
1595 : : unsigned int min_output_precision;
1596 : :
1597 : : /* If nonzero, all non-boolean input operands have the same precision,
1598 : : and they could each be truncated to this many bits without changing
1599 : : the result. */
1600 : : unsigned int min_input_precision;
1601 : :
1602 : : /* If OPERATION_BITS is nonzero, the statement could be performed on
1603 : : an integer with the sign and number of bits given by OPERATION_SIGN
1604 : : and OPERATION_BITS without changing the result. */
1605 : : unsigned int operation_precision;
1606 : : signop operation_sign;
1607 : :
1608 : : /* If the statement produces a boolean result, this value describes
1609 : : how we should choose the associated vector type. The possible
1610 : : values are:
1611 : :
1612 : : - an integer precision N if we should use the vector mask type
1613 : : associated with N-bit integers. This is only used if all relevant
1614 : : input booleans also want the vector mask type for N-bit integers,
1615 : : or if we can convert them into that form by pattern-matching.
1616 : :
1617 : : - ~0U if we considered choosing a vector mask type but decided
1618 : : to treat the boolean as a normal integer type instead.
1619 : :
1620 : : - 0 otherwise. This means either that the operation isn't one that
1621 : : could have a vector mask type (and so should have a normal vector
1622 : : type instead) or that we simply haven't made a choice either way. */
1623 : : unsigned int mask_precision;
1624 : :
1625 : : /* True if this is only suitable for SLP vectorization. */
1626 : : bool slp_vect_only_p;
1627 : :
1628 : : /* True if this is a pattern that can only be handled by SLP
1629 : : vectorization. */
1630 : : bool slp_vect_pattern_only_p;
1631 : : };
1632 : :
1633 : : /* Information about a gather/scatter call. */
1634 : : struct gather_scatter_info {
1635 : : /* The internal function to use for the gather/scatter operation,
1636 : : or IFN_LAST if a built-in function should be used instead. */
1637 : : internal_fn ifn;
1638 : :
1639 : : /* The FUNCTION_DECL for the built-in gather/scatter function,
1640 : : or null if an internal function should be used instead. */
1641 : : tree decl;
1642 : :
1643 : : /* The loop-invariant base value. */
1644 : : tree base;
1645 : :
1646 : : /* The TBBA alias pointer the value of which determines the alignment
1647 : : of the scalar accesses. */
1648 : : tree alias_ptr;
1649 : :
1650 : : /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */
1651 : : tree offset;
1652 : :
1653 : : /* Each offset element should be multiplied by this amount before
1654 : : being added to the base. */
1655 : : int scale;
1656 : :
1657 : : /* The type of the vectorized offset. */
1658 : : tree offset_vectype;
1659 : :
1660 : : /* The type of the scalar elements after loading or before storing. */
1661 : : tree element_type;
1662 : :
1663 : : /* The type of the scalar elements being loaded or stored. */
1664 : : tree memory_type;
1665 : : };
1666 : :
1667 : : /* Access Functions. */
1668 : : #define STMT_VINFO_STMT(S) (S)->stmt
1669 : : #define STMT_VINFO_RELEVANT(S) (S)->relevant
1670 : : #define STMT_VINFO_LIVE_P(S) (S)->live
1671 : : #define STMT_VINFO_VECTYPE(S) (S)->vectype
1672 : : #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
1673 : : #define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0)
1674 : : #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p
1675 : : #define STMT_VINFO_STRIDED_P(S) (S)->strided_p
1676 : : #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
1677 : : #define STMT_VINFO_REDUC_IDX(S) (S)->reduc_idx
1678 : :
1679 : : #define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop
1680 : : #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address
1681 : : #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init
1682 : : #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset
1683 : : #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step
1684 : : #define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment
1685 : : #define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \
1686 : : (S)->dr_wrt_vec_loop.base_misalignment
1687 : : #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \
1688 : : (S)->dr_wrt_vec_loop.offset_alignment
1689 : : #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \
1690 : : (S)->dr_wrt_vec_loop.step_alignment
1691 : :
1692 : : #define STMT_VINFO_DR_INFO(S) \
1693 : : (gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux)
1694 : :
1695 : : #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
1696 : : #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
1697 : : #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq
1698 : : #define STMT_VINFO_DEF_TYPE(S) (S)->def_type
1699 : : #define STMT_VINFO_GROUPED_ACCESS(S) \
1700 : : ((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S))
1701 : : #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged
1702 : : #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part
1703 : : #define STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE(S) (S)->loop_phi_evolution_type
1704 : : #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist
1705 : : #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type
1706 : : #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code
1707 : : #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def
1708 : : #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p
1709 : : #define STMT_VINFO_SLP_VECT_ONLY_PATTERN(S) (S)->slp_vect_pattern_only_p
1710 : : #define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in
1711 : :
1712 : : #define DR_GROUP_FIRST_ELEMENT(S) \
1713 : : (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
1714 : : #define DR_GROUP_NEXT_ELEMENT(S) \
1715 : : (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element)
1716 : : #define DR_GROUP_SIZE(S) \
1717 : : (gcc_checking_assert ((S)->dr_aux.dr), (S)->size)
1718 : : #define DR_GROUP_GAP(S) \
1719 : : (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)
1720 : :
1721 : : #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope)
1722 : :
1723 : : #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp)
1724 : : #define STMT_SLP_TYPE(S) (S)->slp_type
1725 : :
1726 : :
1727 : : /* Contains the scalar or vector costs for a vec_info. */
1728 : : class vector_costs
1729 : : {
1730 : : public:
1731 : : vector_costs (vec_info *, bool);
1732 : 0 : virtual ~vector_costs () {}
1733 : :
1734 : : /* Update the costs in response to adding COUNT copies of a statement.
1735 : :
1736 : : - WHERE specifies whether the cost occurs in the loop prologue,
1737 : : the loop body, or the loop epilogue.
1738 : : - KIND is the kind of statement, which is always meaningful.
1739 : : - STMT_INFO or NODE, if nonnull, describe the statement that will be
1740 : : vectorized.
1741 : : - VECTYPE, if nonnull, is the vector type that the vectorized
1742 : : statement will operate on. Note that this should be used in
1743 : : preference to STMT_VINFO_VECTYPE (STMT_INFO) since the latter
1744 : : is not correct for SLP.
1745 : : - for unaligned_load and unaligned_store statements, MISALIGN is
1746 : : the byte misalignment of the load or store relative to the target's
1747 : : preferred alignment for VECTYPE, or DR_MISALIGNMENT_UNKNOWN
1748 : : if the misalignment is not known.
1749 : :
1750 : : Return the calculated cost as well as recording it. The return
1751 : : value is used for dumping purposes. */
1752 : : virtual unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
1753 : : stmt_vec_info stmt_info,
1754 : : slp_tree node,
1755 : : tree vectype, int misalign,
1756 : : vect_cost_model_location where);
1757 : :
1758 : : /* Finish calculating the cost of the code. The results can be
1759 : : read back using the functions below.
1760 : :
1761 : : If the costs describe vector code, SCALAR_COSTS gives the costs
1762 : : of the corresponding scalar code, otherwise it is null. */
1763 : : virtual void finish_cost (const vector_costs *scalar_costs);
1764 : :
1765 : : /* The costs in THIS and OTHER both describe ways of vectorizing
1766 : : a main loop. Return true if the costs described by THIS are
1767 : : cheaper than the costs described by OTHER. Return false if any
1768 : : of the following are true:
1769 : :
1770 : : - THIS and OTHER are of equal cost
1771 : : - OTHER is better than THIS
1772 : : - we can't be sure about the relative costs of THIS and OTHER. */
1773 : : virtual bool better_main_loop_than_p (const vector_costs *other) const;
1774 : :
1775 : : /* Likewise, but the costs in THIS and OTHER both describe ways of
1776 : : vectorizing an epilogue loop of MAIN_LOOP. */
1777 : : virtual bool better_epilogue_loop_than_p (const vector_costs *other,
1778 : : loop_vec_info main_loop) const;
1779 : :
1780 : : unsigned int prologue_cost () const;
1781 : : unsigned int body_cost () const;
1782 : : unsigned int epilogue_cost () const;
1783 : : unsigned int outside_cost () const;
1784 : : unsigned int total_cost () const;
1785 : : unsigned int suggested_unroll_factor () const;
1786 : : machine_mode suggested_epilogue_mode (int &masked) const;
1787 : 6814991 : bool costing_for_scalar () const { return m_costing_for_scalar; }
1788 : :
1789 : : protected:
1790 : : unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
1791 : : unsigned int);
1792 : : unsigned int adjust_cost_for_freq (stmt_vec_info, vect_cost_model_location,
1793 : : unsigned int);
1794 : : int compare_inside_loop_cost (const vector_costs *) const;
1795 : : int compare_outside_loop_cost (const vector_costs *) const;
1796 : :
1797 : : /* The region of code that we're considering vectorizing. */
1798 : : vec_info *m_vinfo;
1799 : :
1800 : : /* True if we're costing the scalar code, false if we're costing
1801 : : the vector code. */
1802 : : bool m_costing_for_scalar;
1803 : :
1804 : : /* The costs of the three regions, indexed by vect_cost_model_location. */
1805 : : unsigned int m_costs[3];
1806 : :
1807 : : /* The suggested unrolling factor determined at finish_cost. */
1808 : : unsigned int m_suggested_unroll_factor;
1809 : :
1810 : : /* The suggested mode to be used for a vectorized epilogue or VOIDmode,
1811 : : determined at finish_cost. m_masked_epilogue specifies whether the
1812 : : epilogue should use masked vectorization, regardless of the
1813 : : --param vect-partial-vector-usage default. If -1 then the
1814 : : --param setting takes precedence. If the user explicitly specified
1815 : : --param vect-partial-vector-usage then that takes precedence. */
1816 : : machine_mode m_suggested_epilogue_mode;
1817 : : int m_masked_epilogue;
1818 : :
1819 : : /* True if finish_cost has been called. */
1820 : : bool m_finished;
1821 : : };
1822 : :
1823 : : /* Create costs for VINFO. COSTING_FOR_SCALAR is true if the costs
1824 : : are for scalar code, false if they are for vector code. */
1825 : :
1826 : : inline
1827 : 1954984 : vector_costs::vector_costs (vec_info *vinfo, bool costing_for_scalar)
1828 : 1954984 : : m_vinfo (vinfo),
1829 : 1954984 : m_costing_for_scalar (costing_for_scalar),
1830 : 1954984 : m_costs (),
1831 : 1954984 : m_suggested_unroll_factor(1),
1832 : 1954984 : m_suggested_epilogue_mode(VOIDmode),
1833 : 1954984 : m_masked_epilogue (-1),
1834 : 1954984 : m_finished (false)
1835 : : {
1836 : : }
1837 : :
1838 : : /* Return the cost of the prologue code (in abstract units). */
1839 : :
1840 : : inline unsigned int
1841 : 831106 : vector_costs::prologue_cost () const
1842 : : {
1843 : 831106 : gcc_checking_assert (m_finished);
1844 : 831106 : return m_costs[vect_prologue];
1845 : : }
1846 : :
1847 : : /* Return the cost of the body code (in abstract units). */
1848 : :
1849 : : inline unsigned int
1850 : 1495360 : vector_costs::body_cost () const
1851 : : {
1852 : 1495360 : gcc_checking_assert (m_finished);
1853 : 1495360 : return m_costs[vect_body];
1854 : : }
1855 : :
1856 : : /* Return the cost of the epilogue code (in abstract units). */
1857 : :
1858 : : inline unsigned int
1859 : 831106 : vector_costs::epilogue_cost () const
1860 : : {
1861 : 831106 : gcc_checking_assert (m_finished);
1862 : 831106 : return m_costs[vect_epilogue];
1863 : : }
1864 : :
1865 : : /* Return the cost of the prologue and epilogue code (in abstract units). */
1866 : :
1867 : : inline unsigned int
1868 : 83426 : vector_costs::outside_cost () const
1869 : : {
1870 : 83426 : return prologue_cost () + epilogue_cost ();
1871 : : }
1872 : :
1873 : : /* Return the cost of the prologue, body and epilogue code
1874 : : (in abstract units). */
1875 : :
1876 : : inline unsigned int
1877 : 83426 : vector_costs::total_cost () const
1878 : : {
1879 : 83426 : return body_cost () + outside_cost ();
1880 : : }
1881 : :
1882 : : /* Return the suggested unroll factor. */
1883 : :
1884 : : inline unsigned int
1885 : 83239 : vector_costs::suggested_unroll_factor () const
1886 : : {
1887 : 83239 : gcc_checking_assert (m_finished);
1888 : 83239 : return m_suggested_unroll_factor;
1889 : : }
1890 : :
1891 : : /* Return the suggested epilogue mode. */
1892 : :
1893 : : inline machine_mode
1894 : 12764 : vector_costs::suggested_epilogue_mode (int &masked_p) const
1895 : : {
1896 : 12764 : gcc_checking_assert (m_finished);
1897 : 12764 : masked_p = m_masked_epilogue;
1898 : 12764 : return m_suggested_epilogue_mode;
1899 : : }
1900 : :
1901 : : #define VECT_MAX_COST 1000
1902 : :
1903 : : /* The maximum number of intermediate steps required in multi-step type
1904 : : conversion. */
1905 : : #define MAX_INTERM_CVT_STEPS 3
1906 : :
1907 : : #define MAX_VECTORIZATION_FACTOR INT_MAX
1908 : :
1909 : : /* Nonzero if TYPE represents a (scalar) boolean type or type
1910 : : in the middle-end compatible with it (unsigned precision 1 integral
1911 : : types). Used to determine which types should be vectorized as
1912 : : VECTOR_BOOLEAN_TYPE_P. */
1913 : :
1914 : : #define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) \
1915 : : (TREE_CODE (TYPE) == BOOLEAN_TYPE \
1916 : : || ((TREE_CODE (TYPE) == INTEGER_TYPE \
1917 : : || TREE_CODE (TYPE) == ENUMERAL_TYPE) \
1918 : : && TYPE_PRECISION (TYPE) == 1 \
1919 : : && TYPE_UNSIGNED (TYPE)))
1920 : :
1921 : : inline bool
1922 : 9400390 : nested_in_vect_loop_p (class loop *loop, stmt_vec_info stmt_info)
1923 : : {
1924 : 9400390 : return (loop->inner
1925 : 7535730 : && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father));
1926 : : }
1927 : :
1928 : : /* PHI is either a scalar reduction phi or a scalar induction phi.
1929 : : Return the initial value of the variable on entry to the containing
1930 : : loop. */
1931 : :
1932 : : inline tree
1933 : 34153 : vect_phi_initial_value (gphi *phi)
1934 : : {
1935 : 34153 : basic_block bb = gimple_bb (phi);
1936 : 34153 : edge pe = loop_preheader_edge (bb->loop_father);
1937 : 34153 : gcc_assert (pe->dest == bb);
1938 : 34153 : return PHI_ARG_DEF_FROM_EDGE (phi, pe);
1939 : : }
1940 : :
1941 : : /* Return true if STMT_INFO should produce a vector mask type rather than
1942 : : a normal nonmask type. */
1943 : :
1944 : : inline bool
1945 : 7362424 : vect_use_mask_type_p (stmt_vec_info stmt_info)
1946 : : {
1947 : 7362424 : return stmt_info->mask_precision && stmt_info->mask_precision != ~0U;
1948 : : }
1949 : :
1950 : : /* Return TRUE if a statement represented by STMT_INFO is a part of a
1951 : : pattern. */
1952 : :
1953 : : inline bool
1954 : 123214861 : is_pattern_stmt_p (stmt_vec_info stmt_info)
1955 : : {
1956 : 78262768 : return stmt_info->pattern_stmt_p;
1957 : : }
1958 : :
1959 : : /* If STMT_INFO is a pattern statement, return the statement that it
1960 : : replaces, otherwise return STMT_INFO itself. */
1961 : :
1962 : : inline stmt_vec_info
1963 : 48938806 : vect_orig_stmt (stmt_vec_info stmt_info)
1964 : : {
1965 : 36774318 : if (is_pattern_stmt_p (stmt_info))
1966 : 2999040 : return STMT_VINFO_RELATED_STMT (stmt_info);
1967 : : return stmt_info;
1968 : : }
1969 : :
1970 : : /* Return the later statement between STMT1_INFO and STMT2_INFO. */
1971 : :
1972 : : inline stmt_vec_info
1973 : 5957843 : get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
1974 : : {
1975 : 5957843 : gimple *stmt1 = vect_orig_stmt (stmt1_info)->stmt;
1976 : 5957843 : gimple *stmt2 = vect_orig_stmt (stmt2_info)->stmt;
1977 : 5957843 : if (gimple_bb (stmt1) == gimple_bb (stmt2))
1978 : : {
1979 : 5930683 : if (gimple_uid (stmt1) > gimple_uid (stmt2))
1980 : : return stmt1_info;
1981 : : else
1982 : : return stmt2_info;
1983 : : }
1984 : : /* ??? We should be really calling this function only with stmts
1985 : : in the same BB but we can recover if there's a domination
1986 : : relationship between them. */
1987 : 27160 : else if (dominated_by_p (CDI_DOMINATORS,
1988 : 27160 : gimple_bb (stmt1), gimple_bb (stmt2)))
1989 : : return stmt1_info;
1990 : 8297 : else if (dominated_by_p (CDI_DOMINATORS,
1991 : 8297 : gimple_bb (stmt2), gimple_bb (stmt1)))
1992 : : return stmt2_info;
1993 : 0 : gcc_unreachable ();
1994 : : }
1995 : :
1996 : : /* If STMT_INFO has been replaced by a pattern statement, return the
1997 : : replacement statement, otherwise return STMT_INFO itself. */
1998 : :
1999 : : inline stmt_vec_info
2000 : 48203406 : vect_stmt_to_vectorize (stmt_vec_info stmt_info)
2001 : : {
2002 : 48203406 : if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2003 : 1382572 : return STMT_VINFO_RELATED_STMT (stmt_info);
2004 : : return stmt_info;
2005 : : }
2006 : :
2007 : : /* Return true if BB is a loop header. */
2008 : :
2009 : : inline bool
2010 : 1120968 : is_loop_header_bb_p (basic_block bb)
2011 : : {
2012 : 1120968 : if (bb == (bb->loop_father)->header)
2013 : 1111056 : return true;
2014 : :
2015 : : return false;
2016 : : }
2017 : :
2018 : : /* Return pow2 (X). */
2019 : :
2020 : : inline int
2021 : : vect_pow2 (int x)
2022 : : {
2023 : : int i, res = 1;
2024 : :
2025 : : for (i = 0; i < x; i++)
2026 : : res *= 2;
2027 : :
2028 : : return res;
2029 : : }
2030 : :
2031 : : /* Alias targetm.vectorize.builtin_vectorization_cost. */
2032 : :
2033 : : inline int
2034 : 9560849 : builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
2035 : : tree vectype, int misalign)
2036 : : {
2037 : 9560849 : return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
2038 : : vectype, misalign);
2039 : : }
2040 : :
2041 : : /* Get cost by calling cost target builtin. */
2042 : :
2043 : : inline
2044 : 38 : int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
2045 : : {
2046 : 50588 : return builtin_vectorization_cost (type_of_cost, NULL, 0);
2047 : : }
2048 : :
2049 : : /* Alias targetm.vectorize.init_cost. */
2050 : :
2051 : : inline vector_costs *
2052 : 1954984 : init_cost (vec_info *vinfo, bool costing_for_scalar)
2053 : : {
2054 : 1954984 : return targetm.vectorize.create_costs (vinfo, costing_for_scalar);
2055 : : }
2056 : :
2057 : : extern void dump_stmt_cost (FILE *, int, enum vect_cost_for_stmt,
2058 : : stmt_vec_info, slp_tree, tree, int, unsigned,
2059 : : enum vect_cost_model_location);
2060 : :
2061 : : /* Dump and add costs. */
2062 : :
2063 : : inline unsigned
2064 : 6814991 : add_stmt_cost (vector_costs *costs, int count,
2065 : : enum vect_cost_for_stmt kind,
2066 : : stmt_vec_info stmt_info, slp_tree node,
2067 : : tree vectype, int misalign,
2068 : : enum vect_cost_model_location where)
2069 : : {
2070 : : /* Even though a vector type might be set on stmt do not pass that on when
2071 : : costing the scalar IL. A SLP node shouldn't have been recorded. */
2072 : 6814991 : if (costs->costing_for_scalar ())
2073 : : {
2074 : 3693918 : vectype = NULL_TREE;
2075 : 3693918 : gcc_checking_assert (node == NULL);
2076 : : }
2077 : 6814991 : unsigned cost = costs->add_stmt_cost (count, kind, stmt_info, node, vectype,
2078 : : misalign, where);
2079 : 6814991 : if (dump_file && (dump_flags & TDF_DETAILS))
2080 : 208195 : dump_stmt_cost (dump_file, count, kind, stmt_info, node, vectype, misalign,
2081 : : cost, where);
2082 : 6814991 : return cost;
2083 : : }
2084 : :
2085 : : inline unsigned
2086 : 59853 : add_stmt_cost (vector_costs *costs, int count, enum vect_cost_for_stmt kind,
2087 : : enum vect_cost_model_location where)
2088 : : {
2089 : 59853 : gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
2090 : : || kind == scalar_stmt);
2091 : 59853 : return add_stmt_cost (costs, count, kind, NULL, NULL, NULL_TREE, 0, where);
2092 : : }
2093 : :
2094 : : inline unsigned
2095 : 3868933 : add_stmt_cost (vector_costs *costs, stmt_info_for_cost *i)
2096 : : {
2097 : 3868933 : return add_stmt_cost (costs, i->count, i->kind, i->stmt_info, i->node,
2098 : 3868933 : i->vectype, i->misalign, i->where);
2099 : : }
2100 : :
2101 : : inline void
2102 : 499192 : add_stmt_costs (vector_costs *costs, stmt_vector_for_cost *cost_vec)
2103 : : {
2104 : 499192 : stmt_info_for_cost *cost;
2105 : 499192 : unsigned i;
2106 : 3159495 : FOR_EACH_VEC_ELT (*cost_vec, i, cost)
2107 : 2660303 : add_stmt_cost (costs, cost->count, cost->kind, cost->stmt_info,
2108 : : cost->node, cost->vectype, cost->misalign, cost->where);
2109 : 499192 : }
2110 : :
2111 : : /*-----------------------------------------------------------------*/
2112 : : /* Info on data references alignment. */
2113 : : /*-----------------------------------------------------------------*/
2114 : : #define DR_MISALIGNMENT_UNKNOWN (-1)
2115 : : #define DR_MISALIGNMENT_UNINITIALIZED (-2)
2116 : :
2117 : : inline void
2118 : 2364803 : set_dr_misalignment (dr_vec_info *dr_info, int val)
2119 : : {
2120 : 2364803 : dr_info->misalignment = val;
2121 : : }
2122 : :
2123 : : extern int dr_misalignment (dr_vec_info *dr_info, tree vectype,
2124 : : poly_int64 offset = 0);
2125 : :
2126 : : #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL)
2127 : :
2128 : : /* Only defined once DR_MISALIGNMENT is defined. */
2129 : : inline const poly_uint64
2130 : 6254270 : dr_target_alignment (dr_vec_info *dr_info)
2131 : : {
2132 : 6254270 : if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
2133 : 4611926 : dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
2134 : 6254270 : return dr_info->target_alignment;
2135 : : }
2136 : : #define DR_TARGET_ALIGNMENT(DR) dr_target_alignment (DR)
2137 : : #define DR_SCALAR_KNOWN_BOUNDS(DR) (DR)->scalar_access_known_in_bounds
2138 : :
2139 : : /* Return if the stmt_vec_info requires peeling for alignment. */
2140 : : inline bool
2141 : 3808786 : dr_safe_speculative_read_required (stmt_vec_info stmt_info)
2142 : : {
2143 : 3808786 : dr_vec_info *dr_info;
2144 : 3808786 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2145 : 1628557 : dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (stmt_info));
2146 : : else
2147 : 2180229 : dr_info = STMT_VINFO_DR_INFO (stmt_info);
2148 : :
2149 : 3808786 : return dr_info->safe_speculative_read_required;
2150 : : }
2151 : :
2152 : : /* Set the safe_speculative_read_required for the stmt_vec_info, if group
2153 : : access then set on the fist element otherwise set on DR directly. */
2154 : : inline void
2155 : 209809 : dr_set_safe_speculative_read_required (stmt_vec_info stmt_info,
2156 : : bool requires_alignment)
2157 : : {
2158 : 209809 : dr_vec_info *dr_info;
2159 : 209809 : if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2160 : 68483 : dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (stmt_info));
2161 : : else
2162 : 141326 : dr_info = STMT_VINFO_DR_INFO (stmt_info);
2163 : :
2164 : 209809 : dr_info->safe_speculative_read_required = requires_alignment;
2165 : 209809 : }
2166 : :
2167 : : inline void
2168 : 1448587 : set_dr_target_alignment (dr_vec_info *dr_info, poly_uint64 val)
2169 : : {
2170 : 1448587 : dr_info->target_alignment = val;
2171 : : }
2172 : : #define SET_DR_TARGET_ALIGNMENT(DR, VAL) set_dr_target_alignment (DR, VAL)
2173 : :
2174 : : /* Return true if data access DR_INFO is aligned to the targets
2175 : : preferred alignment for VECTYPE (which may be less than a full vector). */
2176 : :
2177 : : inline bool
2178 : 307523 : aligned_access_p (dr_vec_info *dr_info, tree vectype)
2179 : : {
2180 : 307523 : return (dr_misalignment (dr_info, vectype) == 0);
2181 : : }
2182 : :
2183 : : /* Return TRUE if the (mis-)alignment of the data access is known with
2184 : : respect to the targets preferred alignment for VECTYPE, and FALSE
2185 : : otherwise. */
2186 : :
2187 : : inline bool
2188 : 1750424 : known_alignment_for_access_p (dr_vec_info *dr_info, tree vectype)
2189 : : {
2190 : 1574329 : return (dr_misalignment (dr_info, vectype) != DR_MISALIGNMENT_UNKNOWN);
2191 : : }
2192 : :
2193 : : /* Return the minimum alignment in bytes that the vectorized version
2194 : : of DR_INFO is guaranteed to have. */
2195 : :
2196 : : inline unsigned int
2197 : 235389 : vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype,
2198 : : poly_int64 offset = 0)
2199 : : {
2200 : 235389 : int misalignment = dr_misalignment (dr_info, vectype, offset);
2201 : 235389 : if (misalignment == DR_MISALIGNMENT_UNKNOWN)
2202 : 114887 : return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
2203 : 120502 : else if (misalignment == 0)
2204 : 92044 : return known_alignment (DR_TARGET_ALIGNMENT (dr_info));
2205 : 28458 : return misalignment & -misalignment;
2206 : : }
2207 : :
2208 : : /* Return the behavior of DR_INFO with respect to the vectorization context
2209 : : (which for outer loop vectorization might not be the behavior recorded
2210 : : in DR_INFO itself). */
2211 : :
2212 : : inline innermost_loop_behavior *
2213 : 5322895 : vect_dr_behavior (vec_info *vinfo, dr_vec_info *dr_info)
2214 : : {
2215 : 5322895 : stmt_vec_info stmt_info = dr_info->stmt;
2216 : 5322895 : loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2217 : 1871162 : if (loop_vinfo == NULL
2218 : 1871162 : || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info))
2219 : 5319216 : return &DR_INNERMOST (dr_info->dr);
2220 : : else
2221 : 3679 : return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
2222 : : }
2223 : :
2224 : : /* Return the offset calculated by adding the offset of this DR_INFO to the
2225 : : corresponding data_reference's offset. If CHECK_OUTER then use
2226 : : vect_dr_behavior to select the appropriate data_reference to use. */
2227 : :
2228 : : inline tree
2229 : 733516 : get_dr_vinfo_offset (vec_info *vinfo,
2230 : : dr_vec_info *dr_info, bool check_outer = false)
2231 : : {
2232 : 733516 : innermost_loop_behavior *base;
2233 : 733516 : if (check_outer)
2234 : 693399 : base = vect_dr_behavior (vinfo, dr_info);
2235 : : else
2236 : 40117 : base = &dr_info->dr->innermost;
2237 : :
2238 : 733516 : tree offset = base->offset;
2239 : :
2240 : 733516 : if (!dr_info->offset)
2241 : : return offset;
2242 : :
2243 : 19034 : offset = fold_convert (sizetype, offset);
2244 : 19034 : return fold_build2 (PLUS_EXPR, TREE_TYPE (dr_info->offset), offset,
2245 : : dr_info->offset);
2246 : : }
2247 : :
2248 : :
2249 : : /* Return the vect cost model for LOOP. */
2250 : : inline enum vect_cost_model
2251 : 1818264 : loop_cost_model (loop_p loop)
2252 : : {
2253 : 1818264 : if (loop != NULL
2254 : 1153951 : && loop->force_vectorize
2255 : 69912 : && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
2256 : : return flag_simd_cost_model;
2257 : 1748352 : return flag_vect_cost_model;
2258 : : }
2259 : :
2260 : : /* Return true if the vect cost model is unlimited. */
2261 : : inline bool
2262 : 1258082 : unlimited_cost_model (loop_p loop)
2263 : : {
2264 : 1258082 : return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
2265 : : }
2266 : :
2267 : : /* Return true if the loop described by LOOP_VINFO is fully-masked and
2268 : : if the first iteration should use a partial mask in order to achieve
2269 : : alignment. */
2270 : :
2271 : : inline bool
2272 : 216988 : vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo)
2273 : : {
2274 : : /* With early break vectorization we don't know whether the accesses will stay
2275 : : inside the loop or not. TODO: The early break adjustment code can be
2276 : : implemented the same way as vectorizable_linear_induction. However we
2277 : : can't test this today so reject it. */
2278 : 78 : return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
2279 : 78 : && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2280 : 216992 : && !(LOOP_VINFO_NON_LINEAR_IV (loop_vinfo)
2281 : 0 : && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)));
2282 : : }
2283 : :
2284 : : /* Return the number of vectors of type VECTYPE that are needed to get
2285 : : NUNITS elements. NUNITS should be based on the vectorization factor,
2286 : : so it is always a known multiple of the number of elements in VECTYPE. */
2287 : :
2288 : : inline unsigned int
2289 : 6654571 : vect_get_num_vectors (poly_uint64 nunits, tree vectype)
2290 : : {
2291 : 6654571 : return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
2292 : : }
2293 : :
2294 : : /* Return the number of vectors in the context of vectorization region VINFO,
2295 : : needed for a group of statements and a vector type as specified by NODE. */
2296 : :
2297 : : inline unsigned int
2298 : 6653781 : vect_get_num_copies (vec_info *vinfo, slp_tree node)
2299 : : {
2300 : 6653781 : poly_uint64 vf;
2301 : :
2302 : 6653781 : if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2303 : 2790963 : vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2304 : : else
2305 : : vf = 1;
2306 : :
2307 : 6653781 : vf *= SLP_TREE_LANES (node);
2308 : 6653781 : tree vectype = SLP_TREE_VECTYPE (node);
2309 : :
2310 : 6653781 : return vect_get_num_vectors (vf, vectype);
2311 : : }
2312 : :
2313 : : /* Update maximum unit count *MAX_NUNITS so that it accounts for
2314 : : NUNITS. *MAX_NUNITS can be 1 if we haven't yet recorded anything. */
2315 : :
2316 : : inline void
2317 : 9814881 : vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits)
2318 : : {
2319 : : /* All unit counts have the form vec_info::vector_size * X for some
2320 : : rational X, so two unit sizes must have a common multiple.
2321 : : Everything is a multiple of the initial value of 1. */
2322 : 4284884 : *max_nunits = force_common_multiple (*max_nunits, nunits);
2323 : : }
2324 : :
2325 : : /* Update maximum unit count *MAX_NUNITS so that it accounts for
2326 : : the number of units in vector type VECTYPE. *MAX_NUNITS can be 1
2327 : : if we haven't yet recorded any vector types. */
2328 : :
2329 : : inline void
2330 : 5529997 : vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype)
2331 : : {
2332 : 5529997 : vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype));
2333 : 5529997 : }
2334 : :
2335 : : /* Return the vectorization factor that should be used for costing
2336 : : purposes while vectorizing the loop described by LOOP_VINFO.
2337 : : Pick a reasonable estimate if the vectorization factor isn't
2338 : : known at compile time. */
2339 : :
2340 : : inline unsigned int
2341 : 945967 : vect_vf_for_cost (loop_vec_info loop_vinfo)
2342 : : {
2343 : 945967 : return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2344 : : }
2345 : :
2346 : : /* Estimate the number of elements in VEC_TYPE for costing purposes.
2347 : : Pick a reasonable estimate if the exact number isn't known at
2348 : : compile time. */
2349 : :
2350 : : inline unsigned int
2351 : 29720 : vect_nunits_for_cost (tree vec_type)
2352 : : {
2353 : 29720 : return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type));
2354 : : }
2355 : :
2356 : : /* Return the maximum possible vectorization factor for LOOP_VINFO. */
2357 : :
2358 : : inline unsigned HOST_WIDE_INT
2359 : 76950 : vect_max_vf (loop_vec_info loop_vinfo)
2360 : : {
2361 : 76950 : unsigned HOST_WIDE_INT vf;
2362 : 76950 : if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
2363 : 76950 : return vf;
2364 : : return MAX_VECTORIZATION_FACTOR;
2365 : : }
2366 : :
2367 : : /* Return the size of the value accessed by unvectorized data reference
2368 : : DR_INFO. This is only valid once STMT_VINFO_VECTYPE has been calculated
2369 : : for the associated gimple statement, since that guarantees that DR_INFO
2370 : : accesses either a scalar or a scalar equivalent. ("Scalar equivalent"
2371 : : here includes things like V1SI, which can be vectorized in the same way
2372 : : as a plain SI.) */
2373 : :
2374 : : inline unsigned int
2375 : 1729426 : vect_get_scalar_dr_size (dr_vec_info *dr_info)
2376 : : {
2377 : 1729426 : return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr))));
2378 : : }
2379 : :
2380 : : /* Return true if LOOP_VINFO requires a runtime check for whether the
2381 : : vector loop is profitable. */
2382 : :
2383 : : inline bool
2384 : 66520 : vect_apply_runtime_profitability_check_p (loop_vec_info loop_vinfo)
2385 : : {
2386 : 66520 : unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
2387 : 36646 : return (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2388 : 66520 : && th >= vect_vf_for_cost (loop_vinfo));
2389 : : }
2390 : :
2391 : : /* Return true if CODE is a lane-reducing opcode. */
2392 : :
2393 : : inline bool
2394 : 284056 : lane_reducing_op_p (code_helper code)
2395 : : {
2396 : 284056 : return code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR;
2397 : : }
2398 : :
2399 : : /* Return true if STMT is a lane-reducing statement. */
2400 : :
2401 : : inline bool
2402 : 359038 : lane_reducing_stmt_p (gimple *stmt)
2403 : : {
2404 : 359038 : if (auto *assign = dyn_cast <gassign *> (stmt))
2405 : 261971 : return lane_reducing_op_p (gimple_assign_rhs_code (assign));
2406 : : return false;
2407 : : }
2408 : :
2409 : : /* Source location + hotness information. */
2410 : : extern dump_user_location_t vect_location;
2411 : :
2412 : : /* A macro for calling:
2413 : : dump_begin_scope (MSG, vect_location);
2414 : : via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc,
2415 : : and then calling
2416 : : dump_end_scope ();
2417 : : once the object goes out of scope, thus capturing the nesting of
2418 : : the scopes.
2419 : :
2420 : : These scopes affect dump messages within them: dump messages at the
2421 : : top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those
2422 : : in a nested scope implicitly default to MSG_PRIORITY_INTERNALS. */
2423 : :
2424 : : #define DUMP_VECT_SCOPE(MSG) \
2425 : : AUTO_DUMP_SCOPE (MSG, vect_location)
2426 : :
2427 : : /* A sentinel class for ensuring that the "vect_location" global gets
2428 : : reset at the end of a scope.
2429 : :
2430 : : The "vect_location" global is used during dumping and contains a
2431 : : location_t, which could contain references to a tree block via the
2432 : : ad-hoc data. This data is used for tracking inlining information,
2433 : : but it's not a GC root; it's simply assumed that such locations never
2434 : : get accessed if the blocks are optimized away.
2435 : :
2436 : : Hence we need to ensure that such locations are purged at the end
2437 : : of any operations using them (e.g. via this class). */
2438 : :
2439 : : class auto_purge_vect_location
2440 : : {
2441 : : public:
2442 : : ~auto_purge_vect_location ();
2443 : : };
2444 : :
2445 : : /*-----------------------------------------------------------------*/
2446 : : /* Function prototypes. */
2447 : : /*-----------------------------------------------------------------*/
2448 : :
2449 : : /* Simple loop peeling and versioning utilities for vectorizer's purposes -
2450 : : in tree-vect-loop-manip.cc. */
2451 : : extern void vect_set_loop_condition (class loop *, edge, loop_vec_info,
2452 : : tree, tree, tree, bool);
2453 : : extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge,
2454 : : const_edge);
2455 : : class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, edge,
2456 : : class loop *, edge,
2457 : : edge, edge *, bool = true,
2458 : : vec<basic_block> * = NULL);
2459 : : class loop *vect_loop_versioning (loop_vec_info, gimple *);
2460 : : extern class loop *vect_do_peeling (loop_vec_info, tree, tree,
2461 : : tree *, tree *, tree *, int, bool, bool,
2462 : : tree *);
2463 : : extern tree vect_get_main_loop_result (loop_vec_info, tree, tree);
2464 : : extern void vect_prepare_for_masked_peels (loop_vec_info);
2465 : : extern dump_user_location_t find_loop_location (class loop *);
2466 : : extern bool vect_can_advance_ivs_p (loop_vec_info);
2467 : : extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
2468 : : extern edge vec_init_loop_exit_info (class loop *);
2469 : : extern void vect_iv_increment_position (edge, gimple_stmt_iterator *, bool *);
2470 : :
2471 : : /* In tree-vect-stmts.cc. */
2472 : : extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
2473 : : poly_uint64 = 0);
2474 : : extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0);
2475 : : extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree);
2476 : : extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0);
2477 : : extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree);
2478 : : extern tree get_same_sized_vectype (tree, tree);
2479 : : extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
2480 : : extern bool vect_chooses_same_modes_p (machine_mode, machine_mode);
2481 : : extern bool vect_get_loop_mask_type (loop_vec_info);
2482 : : extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
2483 : : stmt_vec_info * = NULL, gimple ** = NULL);
2484 : : extern bool vect_is_simple_use (vec_info *, slp_tree,
2485 : : unsigned, tree *, slp_tree *,
2486 : : enum vect_def_type *,
2487 : : tree *, stmt_vec_info * = NULL);
2488 : : extern bool vect_maybe_update_slp_op_vectype (slp_tree, tree);
2489 : : extern tree perm_mask_for_reverse (tree);
2490 : : extern bool supportable_widening_operation (vec_info*, code_helper,
2491 : : stmt_vec_info, tree, tree,
2492 : : code_helper*, code_helper*,
2493 : : int*, vec<tree> *);
2494 : : extern bool supportable_narrowing_operation (code_helper, tree, tree,
2495 : : code_helper *, int *,
2496 : : vec<tree> *);
2497 : : extern bool supportable_indirect_convert_operation (code_helper,
2498 : : tree, tree,
2499 : : vec<std::pair<tree, tree_code> > &,
2500 : : tree = NULL_TREE,
2501 : : slp_tree = NULL);
2502 : : extern int compare_step_with_zero (vec_info *, stmt_vec_info);
2503 : :
2504 : : extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2505 : : enum vect_cost_for_stmt, stmt_vec_info,
2506 : : tree, int, enum vect_cost_model_location);
2507 : : extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2508 : : enum vect_cost_for_stmt, slp_tree,
2509 : : tree, int, enum vect_cost_model_location);
2510 : : extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2511 : : enum vect_cost_for_stmt,
2512 : : enum vect_cost_model_location);
2513 : : extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
2514 : : enum vect_cost_for_stmt, stmt_vec_info,
2515 : : slp_tree, tree, int,
2516 : : enum vect_cost_model_location);
2517 : :
2518 : : /* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO. */
2519 : :
2520 : : inline unsigned
2521 : 2972000 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
2522 : : enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
2523 : : int misalign, enum vect_cost_model_location where)
2524 : : {
2525 : 2971279 : return record_stmt_cost (body_cost_vec, count, kind, stmt_info,
2526 : 1593331 : STMT_VINFO_VECTYPE (stmt_info), misalign, where);
2527 : : }
2528 : :
2529 : : /* Overload of record_stmt_cost with VECTYPE derived from SLP node. */
2530 : :
2531 : : inline unsigned
2532 : 1360018 : record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
2533 : : enum vect_cost_for_stmt kind, slp_tree node,
2534 : : int misalign, enum vect_cost_model_location where)
2535 : : {
2536 : 1203456 : return record_stmt_cost (body_cost_vec, count, kind, node,
2537 : 93500 : SLP_TREE_VECTYPE (node), misalign, where);
2538 : : }
2539 : :
2540 : : extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *);
2541 : : extern void vect_finish_stmt_generation (vec_info *, stmt_vec_info, gimple *,
2542 : : gimple_stmt_iterator *);
2543 : : extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *);
2544 : : extern tree vect_get_store_rhs (stmt_vec_info);
2545 : : void vect_get_vec_defs (vec_info *, slp_tree,
2546 : : tree, vec<tree> *,
2547 : : tree = NULL, vec<tree> * = NULL,
2548 : : tree = NULL, vec<tree> * = NULL,
2549 : : tree = NULL, vec<tree> * = NULL);
2550 : : extern tree vect_init_vector (vec_info *, stmt_vec_info, tree, tree,
2551 : : gimple_stmt_iterator *);
2552 : : extern tree vect_get_slp_vect_def (slp_tree, unsigned);
2553 : : extern bool vect_transform_stmt (vec_info *, stmt_vec_info,
2554 : : gimple_stmt_iterator *,
2555 : : slp_tree, slp_instance);
2556 : : extern void vect_remove_stores (vec_info *, stmt_vec_info);
2557 : : extern bool vect_nop_conversion_p (stmt_vec_info);
2558 : : extern opt_result vect_analyze_stmt (vec_info *, slp_tree,
2559 : : slp_instance, stmt_vector_for_cost *);
2560 : : extern void vect_get_load_cost (vec_info *, stmt_vec_info, slp_tree, int,
2561 : : dr_alignment_support, int, bool,
2562 : : unsigned int *, unsigned int *,
2563 : : stmt_vector_for_cost *,
2564 : : stmt_vector_for_cost *, bool);
2565 : : extern void vect_get_store_cost (vec_info *, stmt_vec_info, slp_tree, int,
2566 : : dr_alignment_support, int,
2567 : : unsigned int *, stmt_vector_for_cost *);
2568 : : extern bool vect_supportable_shift (vec_info *, enum tree_code, tree);
2569 : : extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &);
2570 : : extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &);
2571 : : extern void optimize_mask_stores (class loop*);
2572 : : extern tree vect_gen_while (gimple_seq *, tree, tree, tree,
2573 : : const char * = nullptr);
2574 : : extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
2575 : : extern opt_result vect_get_vector_types_for_stmt (vec_info *,
2576 : : stmt_vec_info, tree *,
2577 : : tree *, unsigned int = 0);
2578 : : extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0);
2579 : :
2580 : : /* In tree-if-conv.cc. */
2581 : : extern bool ref_within_array_bound (gimple *, tree);
2582 : :
2583 : : /* In tree-vect-data-refs.cc. */
2584 : : extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
2585 : : extern enum dr_alignment_support vect_supportable_dr_alignment
2586 : : (vec_info *, dr_vec_info *, tree, int,
2587 : : bool = false);
2588 : : extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree);
2589 : : extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *);
2590 : : extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
2591 : : extern opt_result vect_enhance_data_refs_alignment (loop_vec_info);
2592 : : extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);
2593 : : extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance);
2594 : : extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *);
2595 : : extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
2596 : : extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
2597 : : tree, int, int *, internal_fn *, tree *,
2598 : : tree *, vec<int> * = nullptr);
2599 : : extern bool vect_check_gather_scatter (stmt_vec_info, tree,
2600 : : loop_vec_info, gather_scatter_info *,
2601 : : vec<int> * = nullptr);
2602 : : extern void vect_describe_gather_scatter_call (stmt_vec_info,
2603 : : gather_scatter_info *);
2604 : : extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
2605 : : vec<data_reference_p> *,
2606 : : vec<int> *, int);
2607 : : extern opt_result vect_analyze_data_refs (vec_info *, bool *);
2608 : : extern void vect_record_base_alignments (vec_info *);
2609 : : extern tree vect_create_data_ref_ptr (vec_info *,
2610 : : stmt_vec_info, tree, class loop *, tree,
2611 : : tree *, gimple_stmt_iterator *,
2612 : : gimple **, bool,
2613 : : tree = NULL_TREE);
2614 : : extern tree bump_vector_ptr (vec_info *, tree, gimple *, gimple_stmt_iterator *,
2615 : : stmt_vec_info, tree);
2616 : : extern void vect_copy_ref_info (tree, tree);
2617 : : extern tree vect_create_destination_var (tree, tree);
2618 : : extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
2619 : : extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
2620 : : extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
2621 : : extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
2622 : : bool, vec<int> * = nullptr);
2623 : : extern tree vect_setup_realignment (vec_info *,
2624 : : stmt_vec_info, tree, gimple_stmt_iterator *,
2625 : : tree *, enum dr_alignment_support, tree,
2626 : : class loop **);
2627 : : extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
2628 : : extern tree vect_get_new_ssa_name (tree, enum vect_var_kind,
2629 : : const char * = NULL);
2630 : : extern tree vect_create_addr_base_for_vector_ref (vec_info *,
2631 : : stmt_vec_info, gimple_seq *,
2632 : : tree);
2633 : :
2634 : : /* In tree-vect-loop.cc. */
2635 : : extern tree neutral_op_for_reduction (tree, code_helper, tree, bool = true);
2636 : : extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo);
2637 : : bool vect_rgroup_iv_might_wrap_p (loop_vec_info, rgroup_controls *);
2638 : : /* Used in tree-vect-loop-manip.cc */
2639 : : extern bool vect_need_peeling_or_partial_vectors_p (loop_vec_info);
2640 : : /* Used in gimple-loop-interchange.c and tree-parloops.cc. */
2641 : : extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
2642 : : enum tree_code);
2643 : : extern bool needs_fold_left_reduction_p (tree, code_helper);
2644 : : /* Drive for loop analysis stage. */
2645 : : extern opt_loop_vec_info vect_analyze_loop (class loop *, gimple *,
2646 : : vec_info_shared *);
2647 : : extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
2648 : : extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
2649 : : tree *, bool);
2650 : : extern tree vect_halve_mask_nunits (tree, machine_mode);
2651 : : extern tree vect_double_mask_nunits (tree, machine_mode);
2652 : : extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
2653 : : unsigned int, tree, tree);
2654 : : extern tree vect_get_loop_mask (loop_vec_info, gimple_stmt_iterator *,
2655 : : vec_loop_masks *,
2656 : : unsigned int, tree, unsigned int);
2657 : : extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
2658 : : tree, unsigned int);
2659 : : extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
2660 : : vec_loop_lens *, unsigned int, tree,
2661 : : unsigned int, unsigned int);
2662 : : extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
2663 : : gimple_stmt_iterator *, vec_loop_lens *,
2664 : : unsigned int, tree, tree, unsigned int,
2665 : : unsigned int);
2666 : : extern gimple_seq vect_gen_len (tree, tree, tree, tree);
2667 : : extern vect_reduc_info info_for_reduction (loop_vec_info, slp_tree);
2668 : : extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);
2669 : :
2670 : : /* Drive for loop transformation stage. */
2671 : : extern class loop *vect_transform_loop (loop_vec_info, gimple *);
2672 : 932016 : struct vect_loop_form_info
2673 : : {
2674 : : tree number_of_iterations;
2675 : : tree number_of_iterationsm1;
2676 : : tree assumptions;
2677 : : auto_vec<gcond *> conds;
2678 : : gcond *inner_loop_cond;
2679 : : edge loop_exit;
2680 : : };
2681 : : extern opt_result vect_analyze_loop_form (class loop *, gimple *,
2682 : : vect_loop_form_info *);
2683 : : extern loop_vec_info vect_create_loop_vinfo (class loop *, vec_info_shared *,
2684 : : const vect_loop_form_info *,
2685 : : loop_vec_info = nullptr);
2686 : : extern bool vectorizable_live_operation (vec_info *, stmt_vec_info,
2687 : : slp_tree, slp_instance, int,
2688 : : bool, stmt_vector_for_cost *);
2689 : : extern bool vectorizable_lane_reducing (loop_vec_info, stmt_vec_info,
2690 : : slp_tree, stmt_vector_for_cost *);
2691 : : extern bool vectorizable_reduction (loop_vec_info, stmt_vec_info,
2692 : : slp_tree, slp_instance,
2693 : : stmt_vector_for_cost *);
2694 : : extern bool vectorizable_induction (loop_vec_info, stmt_vec_info,
2695 : : slp_tree, stmt_vector_for_cost *);
2696 : : extern bool vect_transform_reduction (loop_vec_info, stmt_vec_info,
2697 : : gimple_stmt_iterator *,
2698 : : slp_tree);
2699 : : extern bool vect_transform_cycle_phi (loop_vec_info, stmt_vec_info,
2700 : : slp_tree, slp_instance);
2701 : : extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info, slp_tree);
2702 : : extern bool vect_transform_lc_phi (loop_vec_info, stmt_vec_info, slp_tree);
2703 : : extern bool vectorizable_phi (bb_vec_info, stmt_vec_info, slp_tree,
2704 : : stmt_vector_for_cost *);
2705 : : extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info,
2706 : : slp_tree, stmt_vector_for_cost *);
2707 : : extern bool vectorizable_early_exit (loop_vec_info, stmt_vec_info,
2708 : : gimple_stmt_iterator *,
2709 : : slp_tree, stmt_vector_for_cost *);
2710 : : extern bool vect_emulated_vector_p (tree);
2711 : : extern bool vect_can_vectorize_without_simd_p (tree_code);
2712 : : extern bool vect_can_vectorize_without_simd_p (code_helper);
2713 : : extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
2714 : : stmt_vector_for_cost *,
2715 : : stmt_vector_for_cost *,
2716 : : stmt_vector_for_cost *);
2717 : : extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree);
2718 : :
2719 : : /* Nonlinear induction. */
2720 : : extern tree vect_peel_nonlinear_iv_init (gimple_seq*, tree, tree,
2721 : : tree, enum vect_induction_op_type);
2722 : :
2723 : : /* In tree-vect-slp.cc. */
2724 : : extern void vect_slp_init (void);
2725 : : extern void vect_slp_fini (void);
2726 : : extern void vect_free_slp_instance (slp_instance);
2727 : : extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec<tree> &,
2728 : : gimple_stmt_iterator *, poly_uint64,
2729 : : bool, unsigned *,
2730 : : unsigned * = nullptr, bool = false);
2731 : : extern bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
2732 : : slp_tree, stmt_vector_for_cost *);
2733 : : extern bool vect_slp_analyze_operations (vec_info *);
2734 : : extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &);
2735 : : extern opt_result vect_analyze_slp (vec_info *, unsigned, bool);
2736 : : extern bool vect_make_slp_decision (loop_vec_info);
2737 : : extern bool vect_detect_hybrid_slp (loop_vec_info);
2738 : : extern void vect_optimize_slp (vec_info *);
2739 : : extern void vect_gather_slp_loads (vec_info *);
2740 : : extern tree vect_get_slp_scalar_def (slp_tree, unsigned);
2741 : : extern void vect_get_slp_defs (slp_tree, vec<tree> *);
2742 : : extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
2743 : : unsigned n = -1U);
2744 : : extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop);
2745 : : extern bool vect_slp_function (function *);
2746 : : extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
2747 : : extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
2748 : : extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
2749 : : extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
2750 : : unsigned int * = NULL,
2751 : : tree * = NULL, tree * = NULL);
2752 : : extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
2753 : : const vec<tree> &, unsigned int, vec<tree> &);
2754 : : extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
2755 : : extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
2756 : : extern void vect_free_slp_tree (slp_tree);
2757 : : extern bool compatible_calls_p (gcall *, gcall *, bool);
2758 : : extern int vect_slp_child_index_for_operand (const gimple *, int op, bool);
2759 : :
2760 : : extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
2761 : : gimple_stmt_iterator *);
2762 : : extern tree vect_get_mask_load_else (int, tree);
2763 : : extern bool vect_load_perm_consecutive_p (slp_tree, unsigned = 0);
2764 : :
2765 : : /* In tree-vect-patterns.cc. */
2766 : : extern void
2767 : : vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);
2768 : : extern bool vect_get_range_info (tree, wide_int*, wide_int*);
2769 : :
2770 : : /* Pattern recognition functions.
2771 : : Additional pattern recognition functions can (and will) be added
2772 : : in the future. */
2773 : : void vect_pattern_recog (vec_info *);
2774 : :
2775 : : /* In tree-vectorizer.cc. */
2776 : : unsigned vectorize_loops (void);
2777 : : void vect_free_loop_info_assumptions (class loop *);
2778 : : gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL);
2779 : : bool vect_stmt_dominates_stmt_p (gimple *, gimple *);
2780 : :
2781 : : /* SLP Pattern matcher types, tree-vect-slp-patterns.cc. */
2782 : :
2783 : : /* Forward declaration of possible two operands operation that can be matched
2784 : : by the complex numbers pattern matchers. */
2785 : : enum _complex_operation : unsigned;
2786 : :
2787 : : /* All possible load permute values that could result from the partial data-flow
2788 : : analysis. */
2789 : : typedef enum _complex_perm_kinds {
2790 : : PERM_UNKNOWN,
2791 : : PERM_EVENODD,
2792 : : PERM_ODDEVEN,
2793 : : PERM_ODDODD,
2794 : : PERM_EVENEVEN,
2795 : : /* Can be combined with any other PERM values. */
2796 : : PERM_TOP
2797 : : } complex_perm_kinds_t;
2798 : :
2799 : : /* Cache from nodes to the load permutation they represent. */
2800 : : typedef hash_map <slp_tree, complex_perm_kinds_t>
2801 : : slp_tree_to_load_perm_map_t;
2802 : :
2803 : : /* Cache from nodes pair to being compatible or not. */
2804 : : typedef pair_hash <nofree_ptr_hash <_slp_tree>,
2805 : : nofree_ptr_hash <_slp_tree>> slp_node_hash;
2806 : : typedef hash_map <slp_node_hash, bool> slp_compat_nodes_map_t;
2807 : :
2808 : :
2809 : : /* Vector pattern matcher base class. All SLP pattern matchers must inherit
2810 : : from this type. */
2811 : :
2812 : : class vect_pattern
2813 : : {
2814 : : protected:
2815 : : /* The number of arguments that the IFN requires. */
2816 : : unsigned m_num_args;
2817 : :
2818 : : /* The internal function that will be used when a pattern is created. */
2819 : : internal_fn m_ifn;
2820 : :
2821 : : /* The current node being inspected. */
2822 : : slp_tree *m_node;
2823 : :
2824 : : /* The list of operands to be the children for the node produced when the
2825 : : internal function is created. */
2826 : : vec<slp_tree> m_ops;
2827 : :
2828 : : /* Default constructor where NODE is the root of the tree to inspect. */
2829 : 1072 : vect_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
2830 : 1072 : {
2831 : 1072 : this->m_ifn = ifn;
2832 : 1072 : this->m_node = node;
2833 : 1072 : this->m_ops.create (0);
2834 : 1072 : if (m_ops)
2835 : 20 : this->m_ops.safe_splice (*m_ops);
2836 : : }
2837 : :
2838 : : public:
2839 : :
2840 : : /* Create a new instance of the pattern matcher class of the given type. */
2841 : : static vect_pattern* recognize (slp_tree_to_load_perm_map_t *,
2842 : : slp_compat_nodes_map_t *, slp_tree *);
2843 : :
2844 : : /* Build the pattern from the data collected so far. */
2845 : : virtual void build (vec_info *) = 0;
2846 : :
2847 : : /* Default destructor. */
2848 : : virtual ~vect_pattern ()
2849 : : {
2850 : : this->m_ops.release ();
2851 : : }
2852 : : };
2853 : :
2854 : : /* Function pointer to create a new pattern matcher from a generic type. */
2855 : : typedef vect_pattern* (*vect_pattern_decl_t) (slp_tree_to_load_perm_map_t *,
2856 : : slp_compat_nodes_map_t *,
2857 : : slp_tree *);
2858 : :
2859 : : /* List of supported pattern matchers. */
2860 : : extern vect_pattern_decl_t slp_patterns[];
2861 : :
2862 : : /* Number of supported pattern matchers. */
2863 : : extern size_t num__slp_patterns;
2864 : :
2865 : : /* ----------------------------------------------------------------------
2866 : : Target support routines
2867 : : -----------------------------------------------------------------------
2868 : : The following routines are provided to simplify costing decisions in
2869 : : target code. Please add more as needed. */
2870 : :
2871 : : /* Return true if an operaton of kind KIND for STMT_INFO represents
2872 : : the extraction of an element from a vector in preparation for
2873 : : storing the element to memory. */
2874 : : inline bool
2875 : : vect_is_store_elt_extraction (vect_cost_for_stmt kind, stmt_vec_info stmt_info)
2876 : : {
2877 : : return (kind == vec_to_scalar
2878 : : && STMT_VINFO_DATA_REF (stmt_info)
2879 : : && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)));
2880 : : }
2881 : :
2882 : : /* Return true if STMT_INFO represents part of a reduction. */
2883 : : inline bool
2884 : 44898396 : vect_is_reduction (stmt_vec_info stmt_info)
2885 : : {
2886 : 44898396 : return STMT_VINFO_REDUC_IDX (stmt_info) != -1;
2887 : : }
2888 : :
2889 : : /* Return true if SLP_NODE represents part of a reduction. */
2890 : : inline bool
2891 : 277931 : vect_is_reduction (slp_tree slp_node)
2892 : : {
2893 : 277931 : return SLP_TREE_REDUC_IDX (slp_node) != -1;
2894 : : }
2895 : :
2896 : : /* If STMT_INFO describes a reduction, return the vect_reduction_type
2897 : : of the reduction it describes, otherwise return -1. */
2898 : : inline int
2899 : : vect_reduc_type (vec_info *vinfo, slp_tree node)
2900 : : {
2901 : : if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
2902 : : {
2903 : : vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, node);
2904 : : if (reduc_info)
2905 : : return int (VECT_REDUC_INFO_TYPE (reduc_info));
2906 : : }
2907 : : return -1;
2908 : : }
2909 : :
2910 : : /* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the
2911 : : scalar type of the values being compared. Return null otherwise. */
2912 : : inline tree
2913 : : vect_embedded_comparison_type (stmt_vec_info stmt_info)
2914 : : {
2915 : : if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
2916 : : if (gimple_assign_rhs_code (assign) == COND_EXPR)
2917 : : {
2918 : : tree cond = gimple_assign_rhs1 (assign);
2919 : : if (COMPARISON_CLASS_P (cond))
2920 : : return TREE_TYPE (TREE_OPERAND (cond, 0));
2921 : : }
2922 : : return NULL_TREE;
2923 : : }
2924 : :
2925 : : /* If STMT_INFO is a comparison or contains an embedded comparison, return the
2926 : : scalar type of the values being compared. Return null otherwise. */
2927 : : inline tree
2928 : : vect_comparison_type (stmt_vec_info stmt_info)
2929 : : {
2930 : : if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
2931 : : if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
2932 : : return TREE_TYPE (gimple_assign_rhs1 (assign));
2933 : : return vect_embedded_comparison_type (stmt_info);
2934 : : }
2935 : :
2936 : : /* Return true if STMT_INFO extends the result of a load. */
2937 : : inline bool
2938 : : vect_is_extending_load (class vec_info *vinfo, stmt_vec_info stmt_info)
2939 : : {
2940 : : /* Although this is quite large for an inline function, this part
2941 : : at least should be inline. */
2942 : : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
2943 : : if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
2944 : : return false;
2945 : :
2946 : : tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2947 : : tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
2948 : : tree rhs_type = TREE_TYPE (rhs);
2949 : : if (!INTEGRAL_TYPE_P (lhs_type)
2950 : : || !INTEGRAL_TYPE_P (rhs_type)
2951 : : || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
2952 : : return false;
2953 : :
2954 : : stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
2955 : : return (def_stmt_info
2956 : : && STMT_VINFO_DATA_REF (def_stmt_info)
2957 : : && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
2958 : : }
2959 : :
2960 : : /* Return true if STMT_INFO is an integer truncation. */
2961 : : inline bool
2962 : : vect_is_integer_truncation (stmt_vec_info stmt_info)
2963 : : {
2964 : : gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
2965 : : if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
2966 : : return false;
2967 : :
2968 : : tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
2969 : : tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
2970 : : return (INTEGRAL_TYPE_P (lhs_type)
2971 : : && INTEGRAL_TYPE_P (rhs_type)
2972 : : && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
2973 : : }
2974 : :
2975 : : /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
2976 : : or internal_fn contained in ch, respectively. */
2977 : : gimple * vect_gimple_build (tree, code_helper, tree, tree = NULL_TREE);
2978 : : #endif /* GCC_TREE_VECTORIZER_H */
|