Line data Source code
1 : /* Loop versioning pass.
2 : Copyright (C) 2018-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU General Public License as published by the
8 : Free Software Foundation; either version 3, or (at your option) any
9 : later version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "tree.h"
25 : #include "gimple.h"
26 : #include "gimple-iterator.h"
27 : #include "tree-pass.h"
28 : #include "gimplify-me.h"
29 : #include "cfgloop.h"
30 : #include "tree-ssa-loop.h"
31 : #include "ssa.h"
32 : #include "tree-scalar-evolution.h"
33 : #include "tree-ssa-loop-ivopts.h"
34 : #include "fold-const.h"
35 : #include "tree-ssa-propagate.h"
36 : #include "tree-inline.h"
37 : #include "domwalk.h"
38 : #include "tree-vectorizer.h"
39 : #include "omp-general.h"
40 : #include "predict.h"
41 : #include "tree-into-ssa.h"
42 : #include "gimple-range.h"
43 : #include "tree-cfg.h"
44 : #include "hierarchical_discriminator.h"
45 :
46 : namespace {
47 :
48 : /* This pass looks for loops that could be simplified if certain loop
49 : invariant conditions were true. It is effectively a form of loop
50 : splitting in which the pass produces the split conditions itself,
51 : instead of using ones that are already present in the IL.
52 :
53 : Versioning for when strides are 1
54 : ---------------------------------
55 :
56 : At the moment the only thing the pass looks for are memory references
57 : like:
58 :
59 : for (auto i : ...)
60 : ...x[i * stride]...
61 :
62 : It considers changing such loops to:
63 :
64 : if (stride == 1)
65 : for (auto i : ...) [A]
66 : ...x[i]...
67 : else
68 : for (auto i : ...) [B]
69 : ...x[i * stride]...
70 :
71 : This can have several benefits:
72 :
73 : (1) [A] is often easier or cheaper to vectorize than [B].
74 :
75 : (2) The scalar code in [A] is simpler than the scalar code in [B]
76 : (if the loops cannot be vectorized or need an epilogue loop).
77 :
78 : (3) We might recognize [A] as a pattern, such as a memcpy or memset.
79 :
80 : (4) [A] has simpler address evolutions, which can help other passes
81 : like loop interchange.
82 :
83 : The optimization is particularly useful for assumed-shape arrays in
84 : Fortran, where the stride of the innermost dimension depends on the
85 : array descriptor but is often equal to 1 in practice. For example:
86 :
87 : subroutine f1(x)
88 : real :: x(:)
89 : x(:) = 100
90 : end subroutine f1
91 :
92 : generates the equivalent of:
93 :
94 : raw_stride = *x.dim[0].stride;
95 : stride = raw_stride != 0 ? raw_stride : 1;
96 : x_base = *x.data;
97 : ...
98 : tmp1 = stride * S;
99 : tmp2 = tmp1 - stride;
100 : *x_base[tmp2] = 1.0e+2;
101 :
102 : but in the common case that stride == 1, the last three statements
103 : simplify to:
104 :
105 : tmp3 = S + -1;
106 : *x_base[tmp3] = 1.0e+2;
107 :
108 : The optimization is in principle very simple. The difficult parts are:
109 :
110 : (a) deciding which parts of a general address calculation correspond
111 : to the inner dimension of an array, since this usually isn't explicit
112 : in the IL, and for C often isn't even explicit in the source code
113 :
114 : (b) estimating when the transformation is worthwhile
115 :
116 : Structure
117 : ---------
118 :
119 : The pass has four phases:
120 :
121 : (1) Walk through the statements looking for and recording potential
122 : versioning opportunities. Stop if there are none.
123 :
124 : (2) Use context-sensitive range information to see whether any versioning
125 : conditions are impossible in practice. Remove them if so, and stop
126 : if no opportunities remain.
127 :
128 : (We do this only after (1) to keep compile time down when no
129 : versioning opportunities exist.)
130 :
131 : (3) Apply the cost model. Decide which versioning opportunities are
132 : worthwhile and at which nesting level they should be applied.
133 :
134 : (4) Attempt to version all the loops selected by (3), so that:
135 :
136 : for (...)
137 : ...
138 :
139 : becomes:
140 :
141 : if (!cond)
142 : for (...) // Original loop
143 : ...
144 : else
145 : for (...) // New loop
146 : ...
147 :
148 : Use the version condition COND to simplify the new loop. */
149 :
150 : /* Enumerates the likelihood that a particular value indexes the inner
151 : dimension of an array. */
152 : enum inner_likelihood {
153 : INNER_UNLIKELY,
154 : INNER_DONT_KNOW,
155 : INNER_LIKELY
156 : };
157 :
158 : /* Information about one term of an address_info. */
159 : struct address_term_info
160 : {
161 : /* The value of the term is EXPR * MULTIPLIER. */
162 : tree expr;
163 : unsigned HOST_WIDE_INT multiplier;
164 :
165 : /* The stride applied by EXPR in each iteration of some unrecorded loop,
166 : or null if no stride has been identified. */
167 : tree stride;
168 :
169 : /* Enumerates the likelihood that EXPR indexes the inner dimension
170 : of an array. */
171 : enum inner_likelihood inner_likelihood;
172 :
173 : /* True if STRIDE == 1 is a versioning opportunity when considered
174 : in isolation. */
175 : bool versioning_opportunity_p;
176 : };
177 :
178 : /* Information about an address calculation, and the range of constant
179 : offsets applied to it. */
180 147889 : class address_info
181 : {
182 : public:
183 : static const unsigned int MAX_TERMS = 8;
184 :
185 : /* One statement that calculates the address. If multiple statements
186 : share the same address, we only record the first. */
187 : gimple *stmt;
188 :
189 : /* The loop containing STMT (cached for convenience). If multiple
190 : statements share the same address, they all belong to this loop. */
191 : class loop *loop;
192 :
193 : /* A decomposition of the calculation into a sum of terms plus an
194 : optional base. When BASE is provided, it is never an SSA name.
195 : Once initialization is complete, all members of TERMs are SSA names. */
196 : tree base;
197 : auto_vec<address_term_info, MAX_TERMS> terms;
198 :
199 : /* All bytes accessed from the address fall in the offset range
200 : [MIN_OFFSET, MAX_OFFSET). */
201 : HOST_WIDE_INT min_offset, max_offset;
202 : };
203 :
204 : /* Stores addresses based on their base and terms (ignoring the offsets). */
205 : struct address_info_hasher : nofree_ptr_hash <address_info>
206 : {
207 : static hashval_t hash (const address_info *);
208 : static bool equal (const address_info *, const address_info *);
209 : };
210 :
211 : /* Information about the versioning we'd like to apply to a loop. */
212 152684 : class loop_info
213 : {
214 : public:
215 : bool worth_versioning_p () const;
216 :
217 : /* True if we've decided not to version this loop. The remaining
218 : fields are meaningless if so. */
219 : bool rejected_p;
220 :
221 : /* True if at least one subloop of this loop benefits from versioning. */
222 : bool subloops_benefit_p;
223 :
224 : /* An estimate of the total number of instructions in the loop,
225 : excluding those in subloops that benefit from versioning. */
226 : unsigned int num_insns;
227 :
228 : /* The outermost loop that can handle all the version checks
229 : described below. */
230 : class loop *outermost;
231 :
232 : /* The first entry in the list of blocks that belong to this loop
233 : (and not to subloops). m_next_block_in_loop provides the chain
234 : pointers for the list. */
235 : basic_block block_list;
236 :
237 : /* We'd like to version the loop for the case in which these SSA names
238 : (keyed off their SSA_NAME_VERSION) are all equal to 1 at runtime. */
239 : bitmap_head unity_names;
240 :
241 : /* If versioning succeeds, this points the version of the loop that
242 : assumes the version conditions holds. */
243 : class loop *optimized_loop;
244 : };
245 :
246 : /* The main pass structure. */
247 : class loop_versioning
248 : {
249 : public:
250 : loop_versioning (function *);
251 : ~loop_versioning ();
252 : unsigned int run ();
253 :
254 : private:
255 : /* Used to walk the dominator tree to find loop versioning conditions
256 : that are always false. */
257 1414 : class lv_dom_walker : public dom_walker
258 : {
259 : public:
260 : lv_dom_walker (loop_versioning &);
261 :
262 : edge before_dom_children (basic_block) final override;
263 :
264 : private:
265 : /* The parent pass. */
266 : loop_versioning &m_lv;
267 : };
268 :
269 : /* Used to simplify statements based on conditions that are established
270 : by the version checks. */
271 2384 : class name_prop : public substitute_and_fold_engine
272 : {
273 : public:
274 2384 : name_prop (loop_info &li) : m_li (li) {}
275 : tree value_of_expr (tree name, gimple *) final override;
276 :
277 : private:
278 : /* Information about the versioning we've performed on the loop. */
279 : loop_info &m_li;
280 : };
281 :
282 2292126 : loop_info &get_loop_info (class loop *loop) { return m_loops[loop->num]; }
283 :
284 : unsigned int max_insns_for_loop (class loop *);
285 : bool expensive_stmt_p (gimple *);
286 :
287 : void version_for_unity (gimple *, tree);
288 : bool acceptable_multiplier_p (tree, unsigned HOST_WIDE_INT,
289 : unsigned HOST_WIDE_INT * = 0);
290 : bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *);
291 : bool multiply_term_by (address_term_info &, tree);
292 : inner_likelihood get_inner_likelihood (tree, unsigned HOST_WIDE_INT);
293 : void dump_inner_likelihood (address_info &, address_term_info &);
294 : void analyze_stride (address_info &, address_term_info &,
295 : tree, class loop *);
296 : bool find_per_loop_multiplication (address_info &, address_term_info &);
297 : bool analyze_term_using_scevs (address_info &, address_term_info &);
298 : void analyze_arbitrary_term (address_info &, address_term_info &);
299 : void analyze_address_fragment (address_info &);
300 : void record_address_fragment (gimple *, unsigned HOST_WIDE_INT,
301 : tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT);
302 : void analyze_expr (gimple *, tree);
303 : bool analyze_block (basic_block);
304 : bool analyze_blocks ();
305 :
306 : void prune_loop_conditions (class loop *);
307 : bool prune_conditions ();
308 :
309 : void merge_loop_info (class loop *, class loop *);
310 : void add_loop_to_queue (class loop *);
311 : bool decide_whether_loop_is_versionable (class loop *);
312 : bool make_versioning_decisions ();
313 :
314 : bool version_loop (class loop *);
315 : void implement_versioning_decisions ();
316 :
317 : /* The function we're optimizing. */
318 : function *m_fn;
319 :
320 : /* The obstack to use for all pass-specific bitmaps. */
321 : bitmap_obstack m_bitmap_obstack;
322 :
323 : /* An obstack to use for general allocation. */
324 : obstack m_obstack;
325 :
326 : /* The total number of loop version conditions we've found. */
327 : unsigned int m_num_conditions;
328 :
329 : /* Assume that an address fragment of the form i * stride * scale
330 : (for variable stride and constant scale) will not benefit from
331 : versioning for stride == 1 when scale is greater than this value. */
332 : unsigned HOST_WIDE_INT m_maximum_scale;
333 :
334 : /* Information about each loop. */
335 : auto_vec<loop_info> m_loops;
336 :
337 : /* Used to form a linked list of blocks that belong to a loop,
338 : started by loop_info::block_list. */
339 : auto_vec<basic_block> m_next_block_in_loop;
340 :
341 : /* The list of loops that we've decided to version. */
342 : auto_vec<class loop *> m_loops_to_version;
343 :
344 : /* A table of addresses in the current loop, keyed off their values
345 : but not their offsets. */
346 : hash_table <address_info_hasher> m_address_table;
347 :
348 : /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order. */
349 : auto_vec <address_info *, 32> m_address_list;
350 : };
351 :
352 : /* If EXPR is an SSA name and not a default definition, return the
353 : defining statement, otherwise return null. */
354 :
355 : static gimple *
356 1269655 : maybe_get_stmt (tree expr)
357 : {
358 1094724 : if (TREE_CODE (expr) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (expr))
359 0 : return SSA_NAME_DEF_STMT (expr);
360 : return NULL;
361 : }
362 :
363 : /* Like maybe_get_stmt, but also return null if the defining
364 : statement isn't an assignment. */
365 :
366 : static gassign *
367 1134879 : maybe_get_assign (tree expr)
368 : {
369 2415013 : return safe_dyn_cast <gassign *> (maybe_get_stmt (expr));
370 : }
371 :
372 : /* Return true if this pass should look through a cast of expression FROM
373 : to type TYPE when analyzing pieces of an address. */
374 :
375 : static bool
376 64599 : look_through_cast_p (tree type, tree from)
377 : {
378 64599 : return (INTEGRAL_TYPE_P (TREE_TYPE (from)) == INTEGRAL_TYPE_P (type)
379 64599 : && POINTER_TYPE_P (TREE_TYPE (from)) == POINTER_TYPE_P (type));
380 : }
381 :
382 : /* Strip all conversions of integers or pointers from EXPR, regardless
383 : of whether the conversions are nops. This is useful in the context
384 : of this pass because we're not trying to fold or simulate the
385 : expression; we just want to see how it's structured. */
386 :
387 : static tree
388 452095 : strip_casts (tree expr)
389 : {
390 452095 : const unsigned int MAX_NITERS = 4;
391 :
392 452095 : tree type = TREE_TYPE (expr);
393 452095 : while (CONVERT_EXPR_P (expr)
394 452415 : && look_through_cast_p (type, TREE_OPERAND (expr, 0)))
395 320 : expr = TREE_OPERAND (expr, 0);
396 :
397 515142 : for (unsigned int niters = 0; niters < MAX_NITERS; ++niters)
398 : {
399 515142 : gassign *assign = maybe_get_assign (expr);
400 515142 : if (assign
401 253876 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))
402 579421 : && look_through_cast_p (type, gimple_assign_rhs1 (assign)))
403 63047 : expr = gimple_assign_rhs1 (assign);
404 : else
405 : break;
406 : }
407 452095 : return expr;
408 : }
409 :
410 : /* Compare two address_term_infos in the same address_info. */
411 :
412 : static int
413 324913 : compare_address_terms (const void *a_uncast, const void *b_uncast)
414 : {
415 324913 : const address_term_info *a = (const address_term_info *) a_uncast;
416 324913 : const address_term_info *b = (const address_term_info *) b_uncast;
417 :
418 324913 : if (a->expr != b->expr)
419 324581 : return SSA_NAME_VERSION (a->expr) < SSA_NAME_VERSION (b->expr) ? -1 : 1;
420 :
421 332 : if (a->multiplier != b->multiplier)
422 266 : return a->multiplier < b->multiplier ? -1 : 1;
423 :
424 : return 0;
425 : }
426 :
427 : /* Dump ADDRESS using flags FLAGS. */
428 :
429 : static void
430 424 : dump_address_info (dump_flags_t flags, address_info &address)
431 : {
432 424 : if (address.base)
433 2 : dump_printf (flags, "%T + ", address.base);
434 1186 : for (unsigned int i = 0; i < address.terms.length (); ++i)
435 : {
436 762 : if (i != 0)
437 338 : dump_printf (flags, " + ");
438 762 : dump_printf (flags, "%T", address.terms[i].expr);
439 762 : if (address.terms[i].multiplier != 1)
440 530 : dump_printf (flags, " * %wd", address.terms[i].multiplier);
441 : }
442 848 : dump_printf (flags, " + [%wd, %wd]",
443 424 : address.min_offset, address.max_offset - 1);
444 424 : }
445 :
446 : /* Hash an address_info based on its base and terms. */
447 :
448 : hashval_t
449 208055 : address_info_hasher::hash (const address_info *info)
450 : {
451 208055 : inchash::hash hash;
452 208055 : hash.add_int (info->base ? TREE_CODE (info->base) : 0);
453 416110 : hash.add_int (info->terms.length ());
454 520162 : for (unsigned int i = 0; i < info->terms.length (); ++i)
455 : {
456 312107 : hash.add_int (SSA_NAME_VERSION (info->terms[i].expr));
457 312107 : hash.add_hwi (info->terms[i].multiplier);
458 : }
459 208055 : return hash.end ();
460 : }
461 :
462 : /* Return true if two address_infos have equal bases and terms. Other
463 : properties might be different (such as the statement or constant
464 : offset range). */
465 :
466 : bool
467 113918 : address_info_hasher::equal (const address_info *a, const address_info *b)
468 : {
469 113918 : if (a->base != b->base
470 113918 : && (!a->base || !b->base || !operand_equal_p (a->base, b->base, 0)))
471 3084 : return false;
472 :
473 332502 : if (a->terms.length () != b->terms.length ())
474 : return false;
475 :
476 218518 : for (unsigned int i = 0; i < a->terms.length (); ++i)
477 140687 : if (a->terms[i].expr != b->terms[i].expr
478 140687 : || a->terms[i].multiplier != b->terms[i].multiplier)
479 : return false;
480 :
481 : return true;
482 : }
483 :
484 : /* Return true if we want to version the loop, i.e. if we have a
485 : specific reason for doing so and no specific reason not to. */
486 :
487 : bool
488 7506 : loop_info::worth_versioning_p () const
489 : {
490 7506 : return (!rejected_p
491 5758 : && (!bitmap_empty_p (&unity_names) || subloops_benefit_p));
492 : }
493 :
494 707 : loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
495 0 : : dom_walker (CDI_DOMINATORS), m_lv (lv)
496 : {
497 0 : }
498 :
499 : /* Process BB before processing the blocks it dominates. */
500 :
501 : edge
502 31704 : loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
503 : {
504 31704 : if (bb == bb->loop_father->header)
505 4407 : m_lv.prune_loop_conditions (bb->loop_father);
506 :
507 31704 : return NULL;
508 : }
509 :
510 : /* Decide whether to replace VAL with a new value in a versioned loop.
511 : Return the new value if so, otherwise return null. */
512 :
513 : tree
514 69670 : loop_versioning::name_prop::value_of_expr (tree val, gimple *)
515 : {
516 69670 : if (TREE_CODE (val) == SSA_NAME
517 69670 : && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val)))
518 1441 : return build_one_cst (TREE_TYPE (val));
519 : return NULL_TREE;
520 : }
521 :
522 : /* Initialize the structure to optimize FN. */
523 :
524 27928 : loop_versioning::loop_versioning (function *fn)
525 27928 : : m_fn (fn),
526 27928 : m_num_conditions (0),
527 27928 : m_address_table (31)
528 : {
529 27928 : unsigned m_nloops = number_of_loops (fn);
530 27928 : bitmap_obstack_initialize (&m_bitmap_obstack);
531 27928 : gcc_obstack_init (&m_obstack);
532 :
533 : /* Initialize the loop information. */
534 27928 : m_loops.safe_grow_cleared (m_nloops, true);
535 180612 : for (unsigned int i = 0; i < m_nloops; ++i)
536 : {
537 152684 : m_loops[i].outermost = get_loop (m_fn, 0);
538 152684 : bitmap_initialize (&m_loops[i].unity_names, &m_bitmap_obstack);
539 : }
540 :
541 : /* Initialize the list of blocks that belong to each loop. */
542 27928 : unsigned int nbbs = last_basic_block_for_fn (fn);
543 27928 : m_next_block_in_loop.safe_grow (nbbs, true);
544 27928 : basic_block bb;
545 746420 : FOR_EACH_BB_FN (bb, fn)
546 : {
547 718492 : loop_info &li = get_loop_info (bb->loop_father);
548 718492 : m_next_block_in_loop[bb->index] = li.block_list;
549 718492 : li.block_list = bb;
550 : }
551 :
552 : /* MAX_FIXED_MODE_SIZE should be a reasonable maximum scale for
553 : unvectorizable code, since it is the largest size that can be
554 : handled efficiently by scalar code. omp_max_vf calculates the
555 : maximum number of bytes in a vector, when such a value is relevant
556 : to loop optimization. */
557 27928 : m_maximum_scale = estimated_poly_value (omp_max_vf (false));
558 83784 : m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
559 27928 : }
560 :
561 27928 : loop_versioning::~loop_versioning ()
562 : {
563 27928 : bitmap_obstack_release (&m_bitmap_obstack);
564 27928 : obstack_free (&m_obstack, NULL);
565 27928 : }
566 :
567 : /* Return the maximum number of instructions allowed in LOOP before
568 : it becomes too big for versioning.
569 :
570 : There are separate limits for inner and outer loops. The limit for
571 : inner loops applies only to loops that benefit directly from versioning.
572 : The limit for outer loops applies to all code in the outer loop and
573 : its subloops that *doesn't* benefit directly from versioning; such code
574 : would be "taken along for the ride". The idea is that if the cost of
575 : the latter is small, it is better to version outer loops rather than
576 : inner loops, both to reduce the number of repeated checks and to enable
577 : more of the loop nest to be optimized as a natural nest (e.g. by loop
578 : interchange or outer-loop vectorization). */
579 :
580 : unsigned int
581 1581 : loop_versioning::max_insns_for_loop (class loop *loop)
582 : {
583 1581 : return (loop->inner
584 383 : ? param_loop_versioning_max_outer_insns
585 1198 : : param_loop_versioning_max_inner_insns);
586 : }
587 :
588 : /* Return true if for cost reasons we should avoid versioning any loop
589 : that contains STMT.
590 :
591 : Note that we don't need to check whether versioning is invalid for
592 : correctness reasons, since the versioning process does that for us.
593 : The conditions involved are too rare to be worth duplicating here. */
594 :
595 : bool
596 809394 : loop_versioning::expensive_stmt_p (gimple *stmt)
597 : {
598 0 : if (gcall *call = dyn_cast <gcall *> (stmt))
599 : /* Assume for now that the time spent in an "expensive" call would
600 : overwhelm any saving from versioning. */
601 22976 : return !gimple_inexpensive_call_p (call);
602 : return false;
603 : }
604 :
605 : /* Record that we want to version the loop that contains STMT for the
606 : case in which SSA name NAME is equal to 1. We already know that NAME
607 : is invariant in the loop. */
608 :
609 : void
610 2273 : loop_versioning::version_for_unity (gimple *stmt, tree name)
611 : {
612 2273 : class loop *loop = loop_containing_stmt (stmt);
613 2273 : loop_info &li = get_loop_info (loop);
614 :
615 2273 : if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name)))
616 : {
617 : /* This is the first time we've wanted to version LOOP for NAME.
618 : Keep track of the outermost loop that can handle all versioning
619 : checks in LI. */
620 1567 : class loop *outermost
621 1567 : = outermost_invariant_loop_for_expr (loop, name);
622 1900 : if (loop_depth (li.outermost) < loop_depth (outermost))
623 1174 : li.outermost = outermost;
624 :
625 1567 : if (dump_enabled_p ())
626 : {
627 71 : dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop"
628 : " for when %T == 1", name);
629 71 : if (outermost == loop)
630 23 : dump_printf (MSG_NOTE, "; cannot hoist check further");
631 : else
632 : {
633 58 : dump_printf (MSG_NOTE, "; could implement the check at loop"
634 : " depth %d", loop_depth (outermost));
635 68 : if (loop_depth (li.outermost) > loop_depth (outermost))
636 0 : dump_printf (MSG_NOTE, ", but other checks only allow"
637 0 : " a depth of %d", loop_depth (li.outermost));
638 : }
639 71 : dump_printf (MSG_NOTE, "\n");
640 : }
641 :
642 1567 : m_num_conditions += 1;
643 : }
644 : else
645 : {
646 : /* This is a duplicate request. */
647 706 : if (dump_enabled_p ())
648 6 : dump_printf_loc (MSG_NOTE, stmt, "already asked to version containing"
649 : " loop for when %T == 1\n", name);
650 : }
651 2273 : }
652 :
653 : /* Return true if OP1_TREE is constant and if in principle it is worth
654 : versioning an address fragment of the form:
655 :
656 : i * OP1_TREE * OP2 * stride
657 :
658 : for the case in which stride == 1. This in practice means testing
659 : whether:
660 :
661 : OP1_TREE * OP2 <= M_MAXIMUM_SCALE.
662 :
663 : If RESULT is nonnull, store OP1_TREE * OP2 there when returning true. */
664 :
665 : bool
666 340521 : loop_versioning::acceptable_multiplier_p (tree op1_tree,
667 : unsigned HOST_WIDE_INT op2,
668 : unsigned HOST_WIDE_INT *result)
669 : {
670 340521 : if (tree_fits_uhwi_p (op1_tree))
671 : {
672 321402 : unsigned HOST_WIDE_INT op1 = tree_to_uhwi (op1_tree);
673 : /* The first part checks for overflow. */
674 321402 : if (op1 * op2 >= op2 && op1 * op2 <= m_maximum_scale)
675 : {
676 293687 : if (result)
677 241709 : *result = op1 * op2;
678 293687 : return true;
679 : }
680 : }
681 : return false;
682 : }
683 :
684 : /* Return true if it is worth using loop versioning on a memory access
685 : of type TYPE. Store the size of the access in *SIZE if so. */
686 :
687 : bool
688 210925 : loop_versioning::acceptable_type_p (tree type, unsigned HOST_WIDE_INT *size)
689 : {
690 210925 : return (TYPE_SIZE_UNIT (type)
691 210925 : && acceptable_multiplier_p (TYPE_SIZE_UNIT (type), 1, size));
692 : }
693 :
694 : /* See whether OP is constant and whether we can multiply TERM by that
695 : constant without exceeding M_MAXIMUM_SCALE. Return true and update
696 : TERM if so. */
697 :
698 : bool
699 90915 : loop_versioning::multiply_term_by (address_term_info &term, tree op)
700 : {
701 0 : return acceptable_multiplier_p (op, term.multiplier, &term.multiplier);
702 : }
703 :
704 : /* Decide whether an address fragment of the form STRIDE * MULTIPLIER
705 : is likely to be indexing an innermost dimension, returning the result
706 : as an INNER_* probability. */
707 :
708 : inner_likelihood
709 77769 : loop_versioning::get_inner_likelihood (tree stride,
710 : unsigned HOST_WIDE_INT multiplier)
711 : {
712 77769 : const unsigned int MAX_NITERS = 8;
713 :
714 : /* Iterate over possible values of STRIDE. Return INNER_LIKELY if at
715 : least one of those values is likely to be for the innermost dimension.
716 : Record in UNLIKELY_P if at least one of those values is unlikely to be
717 : for the innermost dimension.
718 :
719 : E.g. for:
720 :
721 : stride = cond ? a * b : 1
722 :
723 : we should treat STRIDE as being a likely inner dimension, since
724 : we know that it is 1 under at least some circumstances. (See the
725 : Fortran example below.) However:
726 :
727 : stride = a * b
728 :
729 : on its own is unlikely to be for the innermost dimension, since
730 : that would require both a and b to be 1 at runtime. */
731 77769 : bool unlikely_p = false;
732 77769 : tree worklist[MAX_NITERS];
733 77769 : unsigned int length = 0;
734 77769 : worklist[length++] = stride;
735 121024 : for (unsigned int i = 0; i < length; ++i)
736 : {
737 95233 : tree expr = worklist[i];
738 :
739 95233 : if (CONSTANT_CLASS_P (expr))
740 : {
741 : /* See if EXPR * MULTIPLIER would be consistent with an individual
742 : access or a small grouped access. */
743 54915 : if (acceptable_multiplier_p (expr, multiplier))
744 : return INNER_LIKELY;
745 : else
746 : unlikely_p = true;
747 : }
748 79591 : else if (gimple *stmt = maybe_get_stmt (expr))
749 : {
750 : /* If EXPR is set by a PHI node, queue its arguments in case
751 : we find one that is consistent with an inner dimension.
752 :
753 : An important instance of this is the Fortran handling of array
754 : descriptors, which calculates the stride of the inner dimension
755 : using a PHI equivalent of:
756 :
757 : raw_stride = a.dim[0].stride;
758 : stride = raw_stride != 0 ? raw_stride : 1;
759 :
760 : (Strides for outer dimensions do not treat 0 specially.) */
761 39273 : if (gphi *phi = dyn_cast <gphi *> (stmt))
762 : {
763 11131 : unsigned int nargs = gimple_phi_num_args (phi);
764 29393 : for (unsigned int j = 0; j < nargs && length < MAX_NITERS; ++j)
765 18262 : worklist[length++] = strip_casts (gimple_phi_arg_def (phi, j));
766 : }
767 : /* If the value is set by an assignment, expect it to be read
768 : from memory (such as an array descriptor) rather than be
769 : calculated. */
770 71397 : else if (gassign *assign = dyn_cast <gassign *> (stmt))
771 : {
772 27822 : if (!gimple_assign_load_p (assign))
773 13933 : unlikely_p = true;
774 : }
775 : /* Things like calls don't really tell us anything. */
776 : }
777 : }
778 :
779 : /* We didn't find any possible values of STRIDE that were likely to be
780 : for the innermost dimension. If we found one that was actively
781 : unlikely to be for the innermost dimension, assume that that applies
782 : to STRIDE too. */
783 25791 : return unlikely_p ? INNER_UNLIKELY : INNER_DONT_KNOW;
784 : }
785 :
786 : /* Dump the likelihood that TERM's stride is for the innermost dimension.
787 : ADDRESS is the address that contains TERM. */
788 :
789 : void
790 241 : loop_versioning::dump_inner_likelihood (address_info &address,
791 : address_term_info &term)
792 : {
793 241 : if (term.inner_likelihood == INNER_LIKELY)
794 87 : dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the"
795 : " innermost dimension\n", term.stride);
796 154 : else if (term.inner_likelihood == INNER_UNLIKELY)
797 36 : dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the"
798 : " innermost dimension\n", term.stride);
799 : else
800 118 : dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T"
801 : " is the innermost dimension\n", term.stride);
802 241 : }
803 :
804 : /* The caller has identified that STRIDE is the stride of interest
805 : in TERM, and that the stride is applied in OP_LOOP. Record this
806 : information in TERM, deciding whether STRIDE is likely to be for
807 : the innermost dimension of an array and whether it represents a
808 : versioning opportunity. ADDRESS is the address that contains TERM. */
809 :
810 : void
811 58924 : loop_versioning::analyze_stride (address_info &address,
812 : address_term_info &term,
813 : tree stride, class loop *op_loop)
814 : {
815 58924 : term.stride = stride;
816 :
817 58924 : term.inner_likelihood = get_inner_likelihood (stride, term.multiplier);
818 58924 : if (dump_enabled_p ())
819 200 : dump_inner_likelihood (address, term);
820 :
821 : /* To be a versioning opportunity we require:
822 :
823 : - The multiplier applied by TERM is equal to the access size,
824 : so that when STRIDE is 1, the accesses in successive loop
825 : iterations are consecutive.
826 :
827 : This is deliberately conservative. We could relax it to handle
828 : other cases (such as those with gaps between iterations) if we
829 : find any real testcases for which it's useful.
830 :
831 : - the stride is applied in the same loop as STMT rather than
832 : in an outer loop. Although versioning for strides applied in
833 : outer loops could help in some cases -- such as enabling
834 : more loop interchange -- the savings are much lower than for
835 : inner loops.
836 :
837 : - the stride is an SSA name that is invariant in STMT's loop,
838 : since otherwise versioning isn't possible. */
839 58924 : unsigned HOST_WIDE_INT access_size = address.max_offset - address.min_offset;
840 58924 : if (term.multiplier == access_size
841 48836 : && address.loop == op_loop
842 46225 : && TREE_CODE (stride) == SSA_NAME
843 61400 : && expr_invariant_in_loop_p (address.loop, stride))
844 : {
845 2463 : term.versioning_opportunity_p = true;
846 2463 : if (dump_enabled_p ())
847 89 : dump_printf_loc (MSG_NOTE, address.stmt, "%T == 1 is a versioning"
848 : " opportunity\n", stride);
849 : }
850 58924 : }
851 :
852 : /* See whether address term TERM (which belongs to ADDRESS) is the result
853 : of multiplying a varying SSA name by a loop-invariant SSA name.
854 : Return true and update TERM if so.
855 :
856 : This handles both cases that SCEV might handle, such as:
857 :
858 : for (int i = 0; i < n; ++i)
859 : res += a[i * stride];
860 :
861 : and ones in which the term varies arbitrarily between iterations, such as:
862 :
863 : for (int i = 0; i < n; ++i)
864 : res += a[index[i] * stride]; */
865 :
866 : bool
867 100658 : loop_versioning::find_per_loop_multiplication (address_info &address,
868 : address_term_info &term)
869 : {
870 100658 : gassign *mult = maybe_get_assign (term.expr);
871 126592 : if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR)
872 : return false;
873 :
874 7714 : class loop *mult_loop = loop_containing_stmt (mult);
875 7714 : if (!loop_outer (mult_loop))
876 : return false;
877 :
878 7410 : tree op1 = strip_casts (gimple_assign_rhs1 (mult));
879 14820 : tree op2 = strip_casts (gimple_assign_rhs2 (mult));
880 7410 : if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
881 : return false;
882 :
883 6555 : bool invariant1_p = expr_invariant_in_loop_p (mult_loop, op1);
884 6555 : bool invariant2_p = expr_invariant_in_loop_p (mult_loop, op2);
885 6555 : if (invariant1_p == invariant2_p)
886 : return false;
887 :
888 : /* Make sure that the loop invariant is OP2 rather than OP1. */
889 6200 : if (invariant1_p)
890 3671 : std::swap (op1, op2);
891 :
892 6200 : if (dump_enabled_p ())
893 90 : dump_printf_loc (MSG_NOTE, address.stmt, "address term %T = varying %T"
894 : " * loop-invariant %T\n", term.expr, op1, op2);
895 6200 : analyze_stride (address, term, op2, mult_loop);
896 6200 : return true;
897 : }
898 :
899 : /* Try to use scalar evolutions to find an address stride for TERM,
900 : which belongs to ADDRESS. Return true and update TERM if so.
901 :
902 : Here we are interested in any evolution information we can find,
903 : not just evolutions wrt ADDRESS->LOOP. For example, if we find that
904 : an outer loop obviously iterates over the inner dimension of an array,
905 : that information can help us eliminate worthless versioning opportunities
906 : in inner loops. */
907 :
908 : bool
909 94458 : loop_versioning::analyze_term_using_scevs (address_info &address,
910 : address_term_info &term)
911 : {
912 136192 : gimple *setter = maybe_get_stmt (term.expr);
913 79586 : if (!setter)
914 : return false;
915 :
916 79586 : class loop *wrt_loop = loop_containing_stmt (setter);
917 79586 : if (!loop_outer (wrt_loop))
918 : return false;
919 :
920 64394 : tree chrec = strip_casts (analyze_scalar_evolution (wrt_loop, term.expr));
921 64394 : if (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
922 : {
923 52724 : if (dump_enabled_p ())
924 110 : dump_printf_loc (MSG_NOTE, address.stmt,
925 : "address term %T = %T\n", term.expr, chrec);
926 :
927 : /* Peel casts and accumulate constant multiplications, up to the
928 : limit allowed by M_MAXIMUM_SCALE. */
929 52724 : tree stride = strip_casts (CHREC_RIGHT (chrec));
930 52724 : while (TREE_CODE (stride) == MULT_EXPR
931 52724 : && multiply_term_by (term, TREE_OPERAND (stride, 1)))
932 0 : stride = strip_casts (TREE_OPERAND (stride, 0));
933 :
934 : gassign *assign;
935 52925 : while ((assign = maybe_get_assign (stride))
936 466 : && gimple_assign_rhs_code (assign) == MULT_EXPR
937 53126 : && multiply_term_by (term, gimple_assign_rhs2 (assign)))
938 : {
939 201 : if (dump_enabled_p ())
940 24 : dump_printf_loc (MSG_NOTE, address.stmt,
941 : "looking through %G", (gimple *) assign);
942 201 : stride = strip_casts (gimple_assign_rhs1 (assign));
943 : }
944 :
945 52724 : analyze_stride (address, term, stride, get_chrec_loop (chrec));
946 52724 : return true;
947 : }
948 :
949 : return false;
950 : }
951 :
952 : /* Address term TERM is an arbitrary term that provides no versioning
953 : opportunities. Analyze it to see whether it contains any likely
954 : inner strides, so that we don't mistakenly version for other
955 : (less likely) candidates.
956 :
957 : This copes with invariant innermost indices such as:
958 :
959 : x(i, :) = 100
960 :
961 : where the "i" component of the address is invariant in the loop
962 : but provides the real inner stride.
963 :
964 : ADDRESS is the address that contains TERM. */
965 :
966 : void
967 18087 : loop_versioning::analyze_arbitrary_term (address_info &address,
968 : address_term_info &term)
969 :
970 : {
971 : /* A multiplication offers two potential strides. Pick the one that
972 : is most likely to be an innermost stride. */
973 18087 : tree expr = term.expr, alt = NULL_TREE;
974 18087 : gassign *mult = maybe_get_assign (expr);
975 32201 : if (mult && gimple_assign_rhs_code (mult) == MULT_EXPR)
976 : {
977 758 : expr = strip_casts (gimple_assign_rhs1 (mult));
978 1516 : alt = strip_casts (gimple_assign_rhs2 (mult));
979 : }
980 18087 : term.stride = expr;
981 18087 : term.inner_likelihood = get_inner_likelihood (expr, term.multiplier);
982 18087 : if (alt)
983 : {
984 758 : inner_likelihood alt_l = get_inner_likelihood (alt, term.multiplier);
985 758 : if (alt_l > term.inner_likelihood)
986 : {
987 321 : term.stride = alt;
988 321 : term.inner_likelihood = alt_l;
989 : }
990 : }
991 18087 : if (dump_enabled_p ())
992 41 : dump_inner_likelihood (address, term);
993 18087 : }
994 :
995 : /* Try to identify loop strides in ADDRESS and try to choose realistic
996 : versioning opportunities based on these strides.
997 :
998 : The main difficulty here isn't finding strides that could be used
999 : in a version check (that's pretty easy). The problem instead is to
1000 : avoid versioning for some stride S that is unlikely ever to be 1 at
1001 : runtime. Versioning for S == 1 on its own would lead to unnecessary
1002 : code bloat, while adding S == 1 to more realistic version conditions
1003 : would lose the optimisation opportunity offered by those other conditions.
1004 :
1005 : For example, versioning for a stride of 1 in the Fortran code:
1006 :
1007 : integer :: a(:,:)
1008 : a(1,:) = 1
1009 :
1010 : is not usually a good idea, since the assignment is iterating over
1011 : an outer dimension and is relatively unlikely to have a stride of 1.
1012 : (It isn't impossible, since the inner dimension might be 1, or the
1013 : array might be transposed.) Similarly, in:
1014 :
1015 : integer :: a(:,:), b(:,:)
1016 : b(:,1) = a(1,:)
1017 :
1018 : b(:,1) is relatively likely to have a stride of 1 while a(1,:) isn't.
1019 : Versioning for when both strides are 1 would lose most of the benefit
1020 : of versioning for b's access.
1021 :
1022 : The approach we take is as follows:
1023 :
1024 : - Analyze each term to see whether it has an identifiable stride,
1025 : regardless of which loop applies the stride.
1026 :
1027 : - Evaluate the likelihood that each such stride is for the innermost
1028 : dimension of an array, on the scale "likely", "don't know" or "unlikely".
1029 :
1030 : - If there is a single "likely" innermost stride, and that stride is
1031 : applied in the loop that contains STMT, version the loop for when the
1032 : stride is 1. This deals with the cases in which we're fairly
1033 : confident of doing the right thing, such as the b(:,1) reference above.
1034 :
1035 : - If there are no "likely" innermost strides, and the loop that contains
1036 : STMT uses a stride that we rated as "don't know", version for when
1037 : that stride is 1. This is principally used for C code such as:
1038 :
1039 : for (int i = 0; i < n; ++i)
1040 : a[i * x] = ...;
1041 :
1042 : and:
1043 :
1044 : for (int j = 0; j < n; ++j)
1045 : for (int i = 0; i < n; ++i)
1046 : a[i * x + j * y] = ...;
1047 :
1048 : where nothing in the way "x" and "y" are set gives a hint as to
1049 : whether "i" iterates over the innermost dimension of the array.
1050 : In these situations it seems reasonable to assume the
1051 : programmer has nested the loops appropriately (although of course
1052 : there are examples like GEMM in which this assumption doesn't hold
1053 : for all accesses in the loop).
1054 :
1055 : This case is also useful for the Fortran equivalent of the
1056 : above C code. */
1057 :
1058 : void
1059 64809 : loop_versioning::analyze_address_fragment (address_info &address)
1060 : {
1061 64809 : if (dump_enabled_p ())
1062 : {
1063 173 : dump_printf_loc (MSG_NOTE, address.stmt, "analyzing address fragment ");
1064 173 : dump_address_info (MSG_NOTE, address);
1065 173 : dump_printf (MSG_NOTE, "\n");
1066 : }
1067 :
1068 : /* Analyze each component of the sum to see whether it involves an
1069 : apparent stride.
1070 :
1071 : There is an overlap between the addresses that
1072 : find_per_loop_multiplication and analyze_term_using_scevs can handle,
1073 : but the former is much cheaper than SCEV analysis, so try it first. */
1074 330934 : for (unsigned int i = 0; i < address.terms.length (); ++i)
1075 100658 : if (!find_per_loop_multiplication (address, address.terms[i])
1076 94458 : && !analyze_term_using_scevs (address, address.terms[i])
1077 142392 : && !POINTER_TYPE_P (TREE_TYPE (address.terms[i].expr)))
1078 18087 : analyze_arbitrary_term (address, address.terms[i]);
1079 :
1080 : /* Check for strides that are likely to be for the innermost dimension.
1081 :
1082 : 1. If there is a single likely inner stride, if it is an SSA name,
1083 : and if it is worth versioning the loop for when the SSA name
1084 : equals 1, record that we want to do so.
1085 :
1086 : 2. Otherwise, if there any likely inner strides, bail out. This means
1087 : one of:
1088 :
1089 : (a) There are multiple likely inner strides. This suggests we're
1090 : confused and be can't be confident of doing the right thing.
1091 :
1092 : (b) There is a single likely inner stride and it is a constant
1093 : rather than an SSA name. This can mean either that the access
1094 : is a natural one without any variable strides, such as:
1095 :
1096 : for (int i = 0; i < n; ++i)
1097 : a[i] += 1;
1098 :
1099 : or that a variable stride is applied to an outer dimension,
1100 : such as:
1101 :
1102 : for (int i = 0; i < n; ++i)
1103 : for (int j = 0; j < n; ++j)
1104 : a[j * stride][i] += 1;
1105 :
1106 : (c) There is a single likely inner stride, and it is an SSA name,
1107 : but it isn't a worthwhile versioning opportunity. This usually
1108 : means that the variable stride is applied by an outer loop,
1109 : such as:
1110 :
1111 : for (int i = 0; i < n; ++i)
1112 : for (int j = 0; j < n; ++j)
1113 : a[j][i * stride] += 1;
1114 :
1115 : or (using an example with a more natural loop nesting):
1116 :
1117 : for (int i = 0; i < n; ++i)
1118 : for (int j = 0; j < n; ++j)
1119 : a[i][j] += b[i * stride];
1120 :
1121 : in cases where b[i * stride] cannot (yet) be hoisted for
1122 : aliasing reasons.
1123 :
1124 : 3. If there are no likely inner strides, fall through to the next
1125 : set of checks.
1126 :
1127 : Pointer equality is enough to check for uniqueness in (1), since we
1128 : only care about SSA names. */
1129 : tree chosen_stride = NULL_TREE;
1130 : tree version_stride = NULL_TREE;
1131 165056 : for (unsigned int i = 0; i < address.terms.length (); ++i)
1132 100507 : if (chosen_stride != address.terms[i].stride
1133 100507 : && address.terms[i].inner_likelihood == INNER_LIKELY)
1134 : {
1135 50194 : if (chosen_stride)
1136 : return;
1137 49934 : chosen_stride = address.terms[i].stride;
1138 49934 : if (address.terms[i].versioning_opportunity_p)
1139 711 : version_stride = chosen_stride;
1140 : }
1141 :
1142 : /* If there are no likely inner strides, see if there is a single
1143 : versioning opportunity for a stride that was rated as INNER_DONT_KNOW.
1144 : See the comment above the function for the cases that this code
1145 : handles. */
1146 64549 : if (!chosen_stride)
1147 43740 : for (unsigned int i = 0; i < address.terms.length (); ++i)
1148 28874 : if (version_stride != address.terms[i].stride
1149 17631 : && address.terms[i].inner_likelihood == INNER_DONT_KNOW
1150 39480 : && address.terms[i].versioning_opportunity_p)
1151 : {
1152 1584 : if (version_stride)
1153 : return;
1154 : version_stride = address.terms[i].stride;
1155 : }
1156 :
1157 64540 : if (version_stride)
1158 2273 : version_for_unity (address.stmt, version_stride);
1159 : }
1160 :
1161 : /* Treat EXPR * MULTIPLIER + OFFSET as a fragment of an address that addresses
1162 : TYPE_SIZE bytes and record this address fragment for later processing.
1163 : STMT is the statement that contains the address. */
1164 :
1165 : void
1166 173451 : loop_versioning::record_address_fragment (gimple *stmt,
1167 : unsigned HOST_WIDE_INT type_size,
1168 : tree expr,
1169 : unsigned HOST_WIDE_INT multiplier,
1170 : HOST_WIDE_INT offset)
1171 : {
1172 : /* We're only interested in computed values. */
1173 173451 : if (TREE_CODE (expr) != SSA_NAME)
1174 25884 : return;
1175 :
1176 : /* Quick exit if no part of the address is calculated in STMT's loop,
1177 : since such addresses have no versioning opportunities. */
1178 157022 : class loop *loop = loop_containing_stmt (stmt);
1179 157022 : if (expr_invariant_in_loop_p (loop, expr))
1180 : return;
1181 :
1182 : /* Set up an address_info for EXPR * MULTIPLIER. */
1183 147889 : address_info *address = XOBNEW (&m_obstack, address_info);
1184 147889 : new (address) address_info;
1185 147889 : address->stmt = stmt;
1186 147889 : address->loop = loop;
1187 147889 : address->base = NULL_TREE;
1188 147889 : address->terms.quick_grow (1);
1189 147889 : address->terms[0].expr = expr;
1190 147889 : address->terms[0].multiplier = multiplier;
1191 147889 : address->terms[0].stride = NULL_TREE;
1192 147889 : address->terms[0].inner_likelihood = INNER_UNLIKELY;
1193 147889 : address->terms[0].versioning_opportunity_p = false;
1194 147889 : address->min_offset = offset;
1195 :
1196 : /* Peel apart the expression into a sum of address_terms, where each
1197 : term is multiplied by a constant. Treat a + b and a - b the same,
1198 : since it doesn't matter for our purposes whether an address is
1199 : increasing or decreasing. Distribute (a + b) * constant into
1200 : a * constant + b * constant.
1201 :
1202 : We don't care which loop each term belongs to, since we want to
1203 : examine as many candidate strides as possible when determining
1204 : which is likely to be for the innermost dimension. We therefore
1205 : don't limit the search to statements in STMT's loop. */
1206 595956 : for (unsigned int i = 0; i < address->terms.length (); )
1207 : {
1208 448067 : if (gassign *assign = maybe_get_assign (address->terms[i].expr))
1209 : {
1210 273069 : tree_code code = gimple_assign_rhs_code (assign);
1211 273069 : if (code == PLUS_EXPR
1212 273069 : || code == POINTER_PLUS_EXPR
1213 126518 : || code == MINUS_EXPR)
1214 : {
1215 148277 : tree op1 = gimple_assign_rhs1 (assign);
1216 148277 : tree op2 = gimple_assign_rhs2 (assign);
1217 148277 : if (TREE_CODE (op2) == INTEGER_CST)
1218 : {
1219 73761 : address->terms[i].expr = strip_casts (op1);
1220 : /* This is heuristic only, so don't worry about truncation
1221 : or overflow. */
1222 73761 : address->min_offset += (TREE_INT_CST_LOW (op2)
1223 73761 : * address->terms[i].multiplier);
1224 73761 : continue;
1225 : }
1226 74516 : else if (address->terms.length () < address_info::MAX_TERMS)
1227 : {
1228 74491 : unsigned int j = address->terms.length ();
1229 74491 : address->terms.quick_push (address->terms[i]);
1230 74491 : address->terms[i].expr = strip_casts (op1);
1231 74491 : address->terms[j].expr = strip_casts (op2);
1232 74491 : continue;
1233 74491 : }
1234 : }
1235 124817 : if (code == MULT_EXPR)
1236 : {
1237 90689 : tree op1 = gimple_assign_rhs1 (assign);
1238 90689 : tree op2 = gimple_assign_rhs2 (assign);
1239 90689 : if (multiply_term_by (address->terms[i], op2))
1240 : {
1241 68057 : address->terms[i].expr = strip_casts (op1);
1242 68057 : continue;
1243 : }
1244 : }
1245 56760 : if (CONVERT_EXPR_CODE_P (code))
1246 : {
1247 9378 : tree op1 = gimple_assign_rhs1 (assign);
1248 9378 : address->terms[i].expr = strip_casts (op1);
1249 9378 : continue;
1250 9378 : }
1251 : }
1252 222380 : i += 1;
1253 : }
1254 :
1255 : /* Peel off any symbolic pointer. */
1256 147889 : if (TREE_CODE (address->terms[0].expr) != SSA_NAME
1257 147889 : && address->terms[0].multiplier == 1)
1258 : {
1259 6530 : if (address->terms.length () == 1)
1260 : {
1261 0 : obstack_free (&m_obstack, address);
1262 0 : return;
1263 : }
1264 6530 : address->base = address->terms[0].expr;
1265 6530 : address->terms.ordered_remove (0);
1266 : }
1267 :
1268 : /* Require all remaining terms to be SSA names. (This could be false
1269 : for unfolded statements, but they aren't worth dealing with.) */
1270 362917 : for (unsigned int i = 0; i < address->terms.length (); ++i)
1271 215350 : if (TREE_CODE (address->terms[i].expr) != SSA_NAME)
1272 : {
1273 322 : obstack_free (&m_obstack, address);
1274 322 : return;
1275 : }
1276 :
1277 : /* The loop above set MIN_OFFSET based on the first byte of the
1278 : referenced data. Calculate the end + 1. */
1279 147567 : address->max_offset = address->min_offset + type_size;
1280 :
1281 : /* Put the terms into a canonical order for the hash table lookup below. */
1282 147567 : address->terms.qsort (compare_address_terms);
1283 :
1284 147567 : if (dump_enabled_p ())
1285 : {
1286 251 : dump_printf_loc (MSG_NOTE, stmt, "recording address fragment %T", expr);
1287 251 : if (multiplier != 1)
1288 118 : dump_printf (MSG_NOTE, " * %wd", multiplier);
1289 251 : dump_printf (MSG_NOTE, " = ");
1290 251 : dump_address_info (MSG_NOTE, *address);
1291 251 : dump_printf (MSG_NOTE, "\n");
1292 : }
1293 :
1294 : /* Pool address information with the same terms (but potentially
1295 : different offsets). */
1296 147567 : address_info **slot = m_address_table.find_slot (address, INSERT);
1297 147567 : if (address_info *old_address = *slot)
1298 : {
1299 : /* We've already seen an address with the same terms. Extend the
1300 : offset range to account for this access. Doing this can paper
1301 : over gaps, such as in:
1302 :
1303 : a[i * stride * 4] + a[i * stride * 4 + 3];
1304 :
1305 : where nothing references "+ 1" or "+ 2". However, the vectorizer
1306 : handles such gapped accesses without problems, so it's not worth
1307 : trying to exclude them. */
1308 77831 : if (old_address->min_offset > address->min_offset)
1309 18005 : old_address->min_offset = address->min_offset;
1310 77831 : if (old_address->max_offset < address->max_offset)
1311 20417 : old_address->max_offset = address->max_offset;
1312 77831 : obstack_free (&m_obstack, address);
1313 : }
1314 : else
1315 : {
1316 : /* This is the first time we've seen an address with these terms. */
1317 69736 : *slot = address;
1318 69736 : m_address_list.safe_push (address);
1319 : }
1320 : }
1321 :
1322 : /* Analyze expression EXPR, which occurs in STMT. */
1323 :
1324 : void
1325 1613999 : loop_versioning::analyze_expr (gimple *stmt, tree expr)
1326 : {
1327 1613999 : unsigned HOST_WIDE_INT type_size;
1328 :
1329 1779568 : while (handled_component_p (expr))
1330 : {
1331 : /* See whether we can use versioning to avoid a multiplication
1332 : in an array index. */
1333 165569 : if (TREE_CODE (expr) == ARRAY_REF
1334 165569 : && acceptable_type_p (TREE_TYPE (expr), &type_size))
1335 100926 : record_address_fragment (stmt, type_size,
1336 100926 : TREE_OPERAND (expr, 1), type_size, 0);
1337 165569 : expr = TREE_OPERAND (expr, 0);
1338 : }
1339 :
1340 : /* See whether we can use versioning to avoid a multiplication
1341 : in the pointer calculation of a MEM_REF. */
1342 1613999 : if (TREE_CODE (expr) == MEM_REF
1343 1613999 : && acceptable_type_p (TREE_TYPE (expr), &type_size))
1344 72525 : record_address_fragment (stmt, type_size, TREE_OPERAND (expr, 0), 1,
1345 : /* This is heuristic only, so don't worry
1346 : about truncation or overflow. */
1347 72525 : TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)));
1348 :
1349 : /* These would be easy to handle if they existed at this stage. */
1350 1613999 : gcc_checking_assert (TREE_CODE (expr) != TARGET_MEM_REF);
1351 1613999 : }
1352 :
1353 : /* Analyze all the statements in BB looking for useful version checks.
1354 : Return true on success, false if something prevents the block from
1355 : being versioned. */
1356 :
1357 : bool
1358 278621 : loop_versioning::analyze_block (basic_block bb)
1359 : {
1360 278621 : class loop *loop = bb->loop_father;
1361 278621 : loop_info &li = get_loop_info (loop);
1362 1462210 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1363 904968 : gsi_next (&gsi))
1364 : {
1365 923060 : gimple *stmt = gsi_stmt (gsi);
1366 923060 : if (is_gimple_debug (stmt))
1367 113666 : continue;
1368 :
1369 832370 : if (expensive_stmt_p (stmt))
1370 : {
1371 18092 : if (dump_enabled_p ())
1372 58 : dump_printf_loc (MSG_NOTE, stmt, "expensive statement"
1373 : " prevents versioning: %G", stmt);
1374 18092 : return false;
1375 : }
1376 :
1377 : /* Only look for direct versioning opportunities in inner loops
1378 : since the benefit tends to be much smaller for outer loops. */
1379 791302 : if (!loop->inner)
1380 : {
1381 657758 : unsigned int nops = gimple_num_ops (stmt);
1382 2482606 : for (unsigned int i = 0; i < nops; ++i)
1383 1824848 : if (tree op = gimple_op (stmt, i))
1384 1613999 : analyze_expr (stmt, op);
1385 : }
1386 :
1387 : /* The point of the instruction limit is to prevent excessive
1388 : code growth, so this is a size-based estimate even though
1389 : the optimization is aimed at speed. */
1390 791302 : li.num_insns += estimate_num_insns (stmt, &eni_size_weights);
1391 : }
1392 :
1393 : return true;
1394 : }
1395 :
1396 : /* Analyze all the blocks in the function, looking for useful version checks.
1397 : Return true if we found one. */
1398 :
1399 : bool
1400 27928 : loop_versioning::analyze_blocks ()
1401 : {
1402 27928 : AUTO_DUMP_SCOPE ("analyze_blocks",
1403 : dump_user_location_t::from_function_decl (m_fn->decl));
1404 :
1405 : /* For now we don't try to version the whole function, although
1406 : versioning at that level could be useful in some cases. */
1407 27928 : get_loop_info (get_loop (m_fn, 0)).rejected_p = true;
1408 :
1409 166526 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1410 : {
1411 82742 : loop_info &linfo = get_loop_info (loop);
1412 :
1413 : /* Ignore cold loops. */
1414 82742 : if (!optimize_loop_for_speed_p (loop))
1415 1500 : linfo.rejected_p = true;
1416 :
1417 : /* See whether an inner loop prevents versioning of this loop. */
1418 82742 : if (!linfo.rejected_p)
1419 95613 : for (class loop *inner = loop->inner; inner; inner = inner->next)
1420 18594 : if (get_loop_info (inner).rejected_p)
1421 : {
1422 4223 : linfo.rejected_p = true;
1423 4223 : break;
1424 : }
1425 :
1426 : /* If versioning the loop is still a possibility, examine the
1427 : statements in the loop to look for versioning opportunities. */
1428 82742 : if (!linfo.rejected_p)
1429 : {
1430 77019 : void *start_point = obstack_alloc (&m_obstack, 0);
1431 :
1432 337548 : for (basic_block bb = linfo.block_list; bb;
1433 260529 : bb = m_next_block_in_loop[bb->index])
1434 278621 : if (!analyze_block (bb))
1435 : {
1436 18092 : linfo.rejected_p = true;
1437 18092 : break;
1438 : }
1439 :
1440 77019 : if (!linfo.rejected_p)
1441 : {
1442 : /* Process any queued address fragments, now that we have
1443 : complete grouping information. */
1444 : address_info *address;
1445 : unsigned int i;
1446 123736 : FOR_EACH_VEC_ELT (m_address_list, i, address)
1447 64809 : analyze_address_fragment (*address);
1448 : }
1449 :
1450 77019 : m_address_table.empty ();
1451 77019 : m_address_list.truncate (0);
1452 77019 : obstack_free (&m_obstack, start_point);
1453 : }
1454 27928 : }
1455 :
1456 27928 : return m_num_conditions != 0;
1457 : }
1458 :
1459 : /* Use the ranges in VRS to remove impossible versioning conditions from
1460 : LOOP. */
1461 :
1462 : void
1463 4407 : loop_versioning::prune_loop_conditions (class loop *loop)
1464 : {
1465 4407 : loop_info &li = get_loop_info (loop);
1466 :
1467 4407 : int to_remove = -1;
1468 4407 : bitmap_iterator bi;
1469 4407 : unsigned int i;
1470 4407 : int_range_max r;
1471 5974 : EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
1472 : {
1473 1567 : tree name = ssa_name (i);
1474 1567 : gimple *stmt = first_stmt (loop->header);
1475 :
1476 3134 : if (get_range_query (cfun)->range_of_expr (r, name, stmt)
1477 3134 : && !r.contains_p (wi::one (TYPE_PRECISION (TREE_TYPE (name)))))
1478 : {
1479 36 : if (dump_enabled_p ())
1480 2 : dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1481 : "%T can never be 1 in this loop\n", name);
1482 :
1483 36 : if (to_remove >= 0)
1484 0 : bitmap_clear_bit (&li.unity_names, to_remove);
1485 36 : to_remove = i;
1486 36 : m_num_conditions -= 1;
1487 : }
1488 : }
1489 4407 : if (to_remove >= 0)
1490 36 : bitmap_clear_bit (&li.unity_names, to_remove);
1491 4407 : }
1492 :
1493 : /* Remove any scheduled loop version conditions that will never be true.
1494 : Return true if any remain. */
1495 :
1496 : bool
1497 707 : loop_versioning::prune_conditions ()
1498 : {
1499 707 : AUTO_DUMP_SCOPE ("prune_loop_conditions",
1500 : dump_user_location_t::from_function_decl (m_fn->decl));
1501 :
1502 707 : calculate_dominance_info (CDI_DOMINATORS);
1503 707 : lv_dom_walker dom_walker (*this);
1504 707 : dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn));
1505 707 : return m_num_conditions != 0;
1506 707 : }
1507 :
1508 : /* Merge the version checks for INNER into immediately-enclosing loop
1509 : OUTER. */
1510 :
1511 : void
1512 536 : loop_versioning::merge_loop_info (class loop *outer, class loop *inner)
1513 : {
1514 536 : loop_info &inner_li = get_loop_info (inner);
1515 536 : loop_info &outer_li = get_loop_info (outer);
1516 :
1517 536 : if (dump_enabled_p ())
1518 : {
1519 11 : bitmap_iterator bi;
1520 11 : unsigned int i;
1521 22 : EXECUTE_IF_SET_IN_BITMAP (&inner_li.unity_names, 0, i, bi)
1522 11 : if (!bitmap_bit_p (&outer_li.unity_names, i))
1523 11 : dump_printf_loc (MSG_NOTE, find_loop_location (inner),
1524 : "hoisting check that %T == 1 to outer loop\n",
1525 11 : ssa_name (i));
1526 : }
1527 :
1528 536 : bitmap_ior_into (&outer_li.unity_names, &inner_li.unity_names);
1529 549 : if (loop_depth (outer_li.outermost) < loop_depth (inner_li.outermost))
1530 372 : outer_li.outermost = inner_li.outermost;
1531 536 : }
1532 :
1533 : /* Add LOOP to the queue of loops to version. */
1534 :
1535 : void
1536 1194 : loop_versioning::add_loop_to_queue (class loop *loop)
1537 : {
1538 1194 : loop_info &li = get_loop_info (loop);
1539 :
1540 1194 : if (dump_enabled_p ())
1541 69 : dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1542 : "queuing this loop for versioning\n");
1543 1194 : m_loops_to_version.safe_push (loop);
1544 :
1545 : /* Don't try to version superloops. */
1546 1194 : li.rejected_p = true;
1547 1194 : }
1548 :
1549 : /* Decide whether the cost model would allow us to version LOOP,
1550 : either directly or as part of a parent loop, and return true if so.
1551 : This does not imply that the loop is actually worth versioning in its
1552 : own right, just that it would be valid to version it if something
1553 : benefited.
1554 :
1555 : We have already made this decision for all inner loops of LOOP. */
1556 :
1557 : bool
1558 3657 : loop_versioning::decide_whether_loop_is_versionable (class loop *loop)
1559 : {
1560 3657 : loop_info &li = get_loop_info (loop);
1561 :
1562 3657 : if (li.rejected_p)
1563 : return false;
1564 :
1565 : /* Examine the decisions made for inner loops. */
1566 3436 : for (class loop *inner = loop->inner; inner; inner = inner->next)
1567 : {
1568 652 : loop_info &inner_li = get_loop_info (inner);
1569 652 : if (inner_li.rejected_p)
1570 : {
1571 6 : if (dump_enabled_p ())
1572 0 : dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1573 : "not versioning this loop because one of its"
1574 : " inner loops should not be versioned\n");
1575 6 : return false;
1576 : }
1577 :
1578 646 : if (inner_li.worth_versioning_p ())
1579 411 : li.subloops_benefit_p = true;
1580 :
1581 : /* Accumulate the number of instructions from subloops that are not
1582 : the innermost, or that don't benefit from versioning. Only the
1583 : instructions from innermost loops that benefit from versioning
1584 : should be weighed against loop-versioning-max-inner-insns;
1585 : everything else should be weighed against
1586 : loop-versioning-max-outer-insns. */
1587 646 : if (!inner_li.worth_versioning_p () || inner->inner)
1588 : {
1589 296 : if (dump_enabled_p ())
1590 0 : dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1591 : "counting %d instructions from this loop"
1592 : " against its parent loop\n", inner_li.num_insns);
1593 296 : li.num_insns += inner_li.num_insns;
1594 : }
1595 : }
1596 :
1597 : /* Enforce the size limits. */
1598 4370 : if (li.worth_versioning_p ())
1599 : {
1600 1581 : unsigned int max_num_insns = max_insns_for_loop (loop);
1601 1581 : if (dump_enabled_p ())
1602 80 : dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1603 : "this loop has %d instructions, against"
1604 : " a versioning limit of %d\n",
1605 : li.num_insns, max_num_insns);
1606 1581 : if (li.num_insns > max_num_insns)
1607 : {
1608 10 : if (dump_enabled_p ())
1609 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION
1610 0 : | MSG_PRIORITY_USER_FACING,
1611 0 : find_loop_location (loop),
1612 : "this loop is too big to version");
1613 10 : return false;
1614 : }
1615 : }
1616 :
1617 : /* Hoist all version checks from subloops to this loop. */
1618 3310 : for (class loop *subloop = loop->inner; subloop; subloop = subloop->next)
1619 536 : merge_loop_info (loop, subloop);
1620 :
1621 : return true;
1622 : }
1623 :
1624 : /* Decide which loops to version and add them to the versioning queue.
1625 : Return true if there are any loops to version. */
1626 :
1627 : bool
1628 696 : loop_versioning::make_versioning_decisions ()
1629 : {
1630 696 : AUTO_DUMP_SCOPE ("make_versioning_decisions",
1631 : dump_user_location_t::from_function_decl (m_fn->decl));
1632 :
1633 5745 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1634 : {
1635 3657 : loop_info &linfo = get_loop_info (loop);
1636 3657 : if (decide_whether_loop_is_versionable (loop))
1637 : {
1638 : /* Commit to versioning LOOP directly if we can't hoist the
1639 : version checks any further. */
1640 8002 : if (linfo.worth_versioning_p ()
1641 2774 : && (loop_depth (loop) == 1 || linfo.outermost == loop))
1642 1130 : add_loop_to_queue (loop);
1643 : }
1644 : else
1645 : {
1646 : /* We can't version this loop, so individually version any
1647 : subloops that would benefit and haven't been versioned yet. */
1648 883 : linfo.rejected_p = true;
1649 1539 : for (class loop *subloop = loop->inner; subloop;
1650 656 : subloop = subloop->next)
1651 856 : if (get_loop_info (subloop).worth_versioning_p ())
1652 64 : add_loop_to_queue (subloop);
1653 : }
1654 696 : }
1655 :
1656 1392 : return !m_loops_to_version.is_empty ();
1657 : }
1658 :
1659 : /* Attempt to implement loop versioning for LOOP, using the information
1660 : cached in the associated loop_info. Return true on success. */
1661 :
1662 : bool
1663 1194 : loop_versioning::version_loop (class loop *loop)
1664 : {
1665 1194 : loop_info &li = get_loop_info (loop);
1666 :
1667 : /* Build up a condition that selects the original loop instead of
1668 : the simplified loop. */
1669 1194 : tree cond = boolean_false_node;
1670 1194 : bitmap_iterator bi;
1671 1194 : unsigned int i;
1672 2721 : EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
1673 : {
1674 1527 : tree name = ssa_name (i);
1675 1527 : tree ne_one = fold_build2 (NE_EXPR, boolean_type_node, name,
1676 : build_one_cst (TREE_TYPE (name)));
1677 1527 : cond = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, cond, ne_one);
1678 : }
1679 :
1680 : /* Convert the condition into a suitable gcond. */
1681 1194 : gimple_seq stmts = NULL;
1682 1194 : cond = force_gimple_operand_1 (cond, &stmts, is_gimple_condexpr_for_cond,
1683 : NULL_TREE);
1684 :
1685 : /* Version the loop. */
1686 1194 : initialize_original_copy_tables ();
1687 1194 : basic_block cond_bb;
1688 1194 : li.optimized_loop = loop_version (loop, cond, &cond_bb,
1689 : profile_probability::unlikely (),
1690 : profile_probability::likely (),
1691 : profile_probability::unlikely (),
1692 : profile_probability::likely (), true);
1693 1194 : free_original_copy_tables ();
1694 1194 : if (!li.optimized_loop)
1695 : {
1696 2 : if (dump_enabled_p ())
1697 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, find_loop_location (loop),
1698 : "tried but failed to version this loop for when"
1699 : " certain strides are 1\n");
1700 2 : return false;
1701 : }
1702 :
1703 : /* Assign hierarchical discriminators to distinguish loop versions.
1704 : This allows AutoFDO to distinguish profile data from different
1705 : versions. No multiplicity for non-vectorized loop versioning. */
1706 1192 : gimple *optimized_last = last_nondebug_stmt (li.optimized_loop->header);
1707 1192 : location_t optimized_loc
1708 1192 : = optimized_last ? gimple_location (optimized_last) : UNKNOWN_LOCATION;
1709 1192 : if (optimized_loc != UNKNOWN_LOCATION)
1710 : {
1711 1107 : unsigned int optimized_copyid = allocate_copyid_base (optimized_loc, 1);
1712 1107 : assign_discriminators_to_loop (li.optimized_loop, 0, optimized_copyid);
1713 : }
1714 1192 : gimple *loop_last = last_nondebug_stmt (loop->header);
1715 1192 : location_t loop_loc
1716 1192 : = loop_last ? gimple_location (loop_last) : UNKNOWN_LOCATION;
1717 1192 : if (loop_loc != UNKNOWN_LOCATION)
1718 : {
1719 1107 : unsigned int loop_copyid = allocate_copyid_base (loop_loc, 1);
1720 1107 : assign_discriminators_to_loop (loop, 0, loop_copyid);
1721 : }
1722 1192 : if (dump_enabled_p ())
1723 69 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, find_loop_location (loop),
1724 : "versioned this loop for when certain strides are 1\n");
1725 :
1726 : /* Insert the statements that feed COND. */
1727 1192 : if (stmts)
1728 : {
1729 183 : gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
1730 183 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1731 : }
1732 :
1733 : return true;
1734 : }
1735 :
1736 : /* Attempt to version all loops in the versioning queue. */
1737 :
1738 : void
1739 696 : loop_versioning::implement_versioning_decisions ()
1740 : {
1741 : /* No AUTO_DUMP_SCOPE here since all messages are top-level and
1742 : user-facing at this point. */
1743 :
1744 696 : bool any_succeeded_p = false;
1745 696 : class loop *loop;
1746 696 : unsigned int i;
1747 1890 : FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
1748 1194 : if (version_loop (loop))
1749 1192 : any_succeeded_p = true;
1750 696 : if (!any_succeeded_p)
1751 696 : return;
1752 :
1753 694 : update_ssa (TODO_update_ssa);
1754 :
1755 : /* Simplify the new loop, which is used when COND is false. */
1756 1886 : FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
1757 : {
1758 1192 : loop_info &linfo = get_loop_info (loop);
1759 1192 : if (linfo.optimized_loop)
1760 1192 : name_prop (linfo).substitute_and_fold (linfo.optimized_loop->header);
1761 : }
1762 : }
1763 :
1764 : /* Run the pass and return a set of TODO_* flags. */
1765 :
1766 : unsigned int
1767 27928 : loop_versioning::run ()
1768 : {
1769 27928 : gcc_assert (scev_initialized_p ());
1770 :
1771 27928 : if (analyze_blocks ()
1772 707 : && prune_conditions ()
1773 28624 : && make_versioning_decisions ())
1774 696 : implement_versioning_decisions ();
1775 :
1776 27928 : return 0;
1777 : }
1778 :
1779 : /* Loop versioning pass. */
1780 :
1781 : const pass_data pass_data_loop_versioning =
1782 : {
1783 : GIMPLE_PASS, /* type */
1784 : "lversion", /* name */
1785 : OPTGROUP_LOOP, /* optinfo_flags */
1786 : TV_LOOP_VERSIONING, /* tv_id */
1787 : PROP_cfg, /* properties_required */
1788 : 0, /* properties_provided */
1789 : 0, /* properties_destroyed */
1790 : 0, /* todo_flags_start */
1791 : 0, /* todo_flags_finish */
1792 : };
1793 :
1794 : class pass_loop_versioning : public gimple_opt_pass
1795 : {
1796 : public:
1797 285722 : pass_loop_versioning (gcc::context *ctxt)
1798 571444 : : gimple_opt_pass (pass_data_loop_versioning, ctxt)
1799 : {}
1800 :
1801 : /* opt_pass methods: */
1802 241458 : bool gate (function *) final override
1803 : {
1804 241458 : return flag_version_loops_for_strides;
1805 : }
1806 : unsigned int execute (function *) final override;
1807 : };
1808 :
1809 : unsigned int
1810 27928 : pass_loop_versioning::execute (function *fn)
1811 : {
1812 55856 : if (number_of_loops (fn) <= 1)
1813 : return 0;
1814 :
1815 27928 : enable_ranger (fn);
1816 27928 : unsigned int ret = loop_versioning (fn).run ();
1817 27928 : disable_ranger (fn);
1818 27928 : return ret;
1819 : }
1820 :
1821 : } // anon namespace
1822 :
1823 : gimple_opt_pass *
1824 285722 : make_pass_loop_versioning (gcc::context *ctxt)
1825 : {
1826 285722 : return new pass_loop_versioning (ctxt);
1827 : }
|