Branch data Line data Source code
1 : : /* Loop versioning pass.
2 : : Copyright (C) 2018-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it
7 : : under the terms of the GNU General Public License as published by the
8 : : Free Software Foundation; either version 3, or (at your option) any
9 : : later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT
12 : : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "tree.h"
25 : : #include "gimple.h"
26 : : #include "gimple-iterator.h"
27 : : #include "tree-pass.h"
28 : : #include "gimplify-me.h"
29 : : #include "cfgloop.h"
30 : : #include "tree-ssa-loop.h"
31 : : #include "ssa.h"
32 : : #include "tree-scalar-evolution.h"
33 : : #include "tree-ssa-loop-ivopts.h"
34 : : #include "fold-const.h"
35 : : #include "tree-ssa-propagate.h"
36 : : #include "tree-inline.h"
37 : : #include "domwalk.h"
38 : : #include "tree-vectorizer.h"
39 : : #include "omp-general.h"
40 : : #include "predict.h"
41 : : #include "tree-into-ssa.h"
42 : : #include "gimple-range.h"
43 : : #include "tree-cfg.h"
44 : :
45 : : namespace {
46 : :
47 : : /* This pass looks for loops that could be simplified if certain loop
48 : : invariant conditions were true. It is effectively a form of loop
49 : : splitting in which the pass produces the split conditions itself,
50 : : instead of using ones that are already present in the IL.
51 : :
52 : : Versioning for when strides are 1
53 : : ---------------------------------
54 : :
55 : : At the moment the only thing the pass looks for are memory references
56 : : like:
57 : :
58 : : for (auto i : ...)
59 : : ...x[i * stride]...
60 : :
61 : : It considers changing such loops to:
62 : :
63 : : if (stride == 1)
64 : : for (auto i : ...) [A]
65 : : ...x[i]...
66 : : else
67 : : for (auto i : ...) [B]
68 : : ...x[i * stride]...
69 : :
70 : : This can have several benefits:
71 : :
72 : : (1) [A] is often easier or cheaper to vectorize than [B].
73 : :
74 : : (2) The scalar code in [A] is simpler than the scalar code in [B]
75 : : (if the loops cannot be vectorized or need an epilogue loop).
76 : :
77 : : (3) We might recognize [A] as a pattern, such as a memcpy or memset.
78 : :
79 : : (4) [A] has simpler address evolutions, which can help other passes
80 : : like loop interchange.
81 : :
82 : : The optimization is particularly useful for assumed-shape arrays in
83 : : Fortran, where the stride of the innermost dimension depends on the
84 : : array descriptor but is often equal to 1 in practice. For example:
85 : :
86 : : subroutine f1(x)
87 : : real :: x(:)
88 : : x(:) = 100
89 : : end subroutine f1
90 : :
91 : : generates the equivalent of:
92 : :
93 : : raw_stride = *x.dim[0].stride;
94 : : stride = raw_stride != 0 ? raw_stride : 1;
95 : : x_base = *x.data;
96 : : ...
97 : : tmp1 = stride * S;
98 : : tmp2 = tmp1 - stride;
99 : : *x_base[tmp2] = 1.0e+2;
100 : :
101 : : but in the common case that stride == 1, the last three statements
102 : : simplify to:
103 : :
104 : : tmp3 = S + -1;
105 : : *x_base[tmp3] = 1.0e+2;
106 : :
107 : : The optimization is in principle very simple. The difficult parts are:
108 : :
109 : : (a) deciding which parts of a general address calculation correspond
110 : : to the inner dimension of an array, since this usually isn't explicit
111 : : in the IL, and for C often isn't even explicit in the source code
112 : :
113 : : (b) estimating when the transformation is worthwhile
114 : :
115 : : Structure
116 : : ---------
117 : :
118 : : The pass has four phases:
119 : :
120 : : (1) Walk through the statements looking for and recording potential
121 : : versioning opportunities. Stop if there are none.
122 : :
123 : : (2) Use context-sensitive range information to see whether any versioning
124 : : conditions are impossible in practice. Remove them if so, and stop
125 : : if no opportunities remain.
126 : :
127 : : (We do this only after (1) to keep compile time down when no
128 : : versioning opportunities exist.)
129 : :
130 : : (3) Apply the cost model. Decide which versioning opportunities are
131 : : worthwhile and at which nesting level they should be applied.
132 : :
133 : : (4) Attempt to version all the loops selected by (3), so that:
134 : :
135 : : for (...)
136 : : ...
137 : :
138 : : becomes:
139 : :
140 : : if (!cond)
141 : : for (...) // Original loop
142 : : ...
143 : : else
144 : : for (...) // New loop
145 : : ...
146 : :
147 : : Use the version condition COND to simplify the new loop. */
148 : :
149 : : /* Enumerates the likelihood that a particular value indexes the inner
150 : : dimension of an array. */
151 : : enum inner_likelihood {
152 : : INNER_UNLIKELY,
153 : : INNER_DONT_KNOW,
154 : : INNER_LIKELY
155 : : };
156 : :
157 : : /* Information about one term of an address_info. */
158 : : struct address_term_info
159 : : {
160 : : /* The value of the term is EXPR * MULTIPLIER. */
161 : : tree expr;
162 : : unsigned HOST_WIDE_INT multiplier;
163 : :
164 : : /* The stride applied by EXPR in each iteration of some unrecorded loop,
165 : : or null if no stride has been identified. */
166 : : tree stride;
167 : :
168 : : /* Enumerates the likelihood that EXPR indexes the inner dimension
169 : : of an array. */
170 : : enum inner_likelihood inner_likelihood;
171 : :
172 : : /* True if STRIDE == 1 is a versioning opportunity when considered
173 : : in isolation. */
174 : : bool versioning_opportunity_p;
175 : : };
176 : :
177 : : /* Information about an address calculation, and the range of constant
178 : : offsets applied to it. */
179 : 119023 : class address_info
180 : : {
181 : : public:
182 : : static const unsigned int MAX_TERMS = 8;
183 : :
184 : : /* One statement that calculates the address. If multiple statements
185 : : share the same address, we only record the first. */
186 : : gimple *stmt;
187 : :
188 : : /* The loop containing STMT (cached for convenience). If multiple
189 : : statements share the same address, they all belong to this loop. */
190 : : class loop *loop;
191 : :
192 : : /* A decomposition of the calculation into a sum of terms plus an
193 : : optional base. When BASE is provided, it is never an SSA name.
194 : : Once initialization is complete, all members of TERMs are SSA names. */
195 : : tree base;
196 : : auto_vec<address_term_info, MAX_TERMS> terms;
197 : :
198 : : /* All bytes accessed from the address fall in the offset range
199 : : [MIN_OFFSET, MAX_OFFSET). */
200 : : HOST_WIDE_INT min_offset, max_offset;
201 : : };
202 : :
203 : : /* Stores addresses based on their base and terms (ignoring the offsets). */
204 : : struct address_info_hasher : nofree_ptr_hash <address_info>
205 : : {
206 : : static hashval_t hash (const address_info *);
207 : : static bool equal (const address_info *, const address_info *);
208 : : };
209 : :
210 : : /* Information about the versioning we'd like to apply to a loop. */
211 : 123484 : class loop_info
212 : : {
213 : : public:
214 : : bool worth_versioning_p () const;
215 : :
216 : : /* True if we've decided not to version this loop. The remaining
217 : : fields are meaningless if so. */
218 : : bool rejected_p;
219 : :
220 : : /* True if at least one subloop of this loop benefits from versioning. */
221 : : bool subloops_benefit_p;
222 : :
223 : : /* An estimate of the total number of instructions in the loop,
224 : : excluding those in subloops that benefit from versioning. */
225 : : unsigned int num_insns;
226 : :
227 : : /* The outermost loop that can handle all the version checks
228 : : described below. */
229 : : class loop *outermost;
230 : :
231 : : /* The first entry in the list of blocks that belong to this loop
232 : : (and not to subloops). m_next_block_in_loop provides the chain
233 : : pointers for the list. */
234 : : basic_block block_list;
235 : :
236 : : /* We'd like to version the loop for the case in which these SSA names
237 : : (keyed off their SSA_NAME_VERSION) are all equal to 1 at runtime. */
238 : : bitmap_head unity_names;
239 : :
240 : : /* If versioning succeeds, this points the version of the loop that
241 : : assumes the version conditions holds. */
242 : : class loop *optimized_loop;
243 : : };
244 : :
245 : : /* The main pass structure. */
246 : : class loop_versioning
247 : : {
248 : : public:
249 : : loop_versioning (function *);
250 : : ~loop_versioning ();
251 : : unsigned int run ();
252 : :
253 : : private:
254 : : /* Used to walk the dominator tree to find loop versioning conditions
255 : : that are always false. */
256 : 1124 : class lv_dom_walker : public dom_walker
257 : : {
258 : : public:
259 : : lv_dom_walker (loop_versioning &);
260 : :
261 : : edge before_dom_children (basic_block) final override;
262 : :
263 : : private:
264 : : /* The parent pass. */
265 : : loop_versioning &m_lv;
266 : : };
267 : :
268 : : /* Used to simplify statements based on conditions that are established
269 : : by the version checks. */
270 : 1920 : class name_prop : public substitute_and_fold_engine
271 : : {
272 : : public:
273 : 1920 : name_prop (loop_info &li) : m_li (li) {}
274 : : tree value_of_expr (tree name, gimple *) final override;
275 : :
276 : : private:
277 : : /* Information about the versioning we've performed on the loop. */
278 : : loop_info &m_li;
279 : : };
280 : :
281 : 1796410 : loop_info &get_loop_info (class loop *loop) { return m_loops[loop->num]; }
282 : :
283 : : unsigned int max_insns_for_loop (class loop *);
284 : : bool expensive_stmt_p (gimple *);
285 : :
286 : : void version_for_unity (gimple *, tree);
287 : : bool acceptable_multiplier_p (tree, unsigned HOST_WIDE_INT,
288 : : unsigned HOST_WIDE_INT * = 0);
289 : : bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *);
290 : : bool multiply_term_by (address_term_info &, tree);
291 : : inner_likelihood get_inner_likelihood (tree, unsigned HOST_WIDE_INT);
292 : : void dump_inner_likelihood (address_info &, address_term_info &);
293 : : void analyze_stride (address_info &, address_term_info &,
294 : : tree, class loop *);
295 : : bool find_per_loop_multiplication (address_info &, address_term_info &);
296 : : bool analyze_term_using_scevs (address_info &, address_term_info &);
297 : : void analyze_arbitrary_term (address_info &, address_term_info &);
298 : : void analyze_address_fragment (address_info &);
299 : : void record_address_fragment (gimple *, unsigned HOST_WIDE_INT,
300 : : tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT);
301 : : void analyze_expr (gimple *, tree);
302 : : bool analyze_block (basic_block);
303 : : bool analyze_blocks ();
304 : :
305 : : void prune_loop_conditions (class loop *);
306 : : bool prune_conditions ();
307 : :
308 : : void merge_loop_info (class loop *, class loop *);
309 : : void add_loop_to_queue (class loop *);
310 : : bool decide_whether_loop_is_versionable (class loop *);
311 : : bool make_versioning_decisions ();
312 : :
313 : : bool version_loop (class loop *);
314 : : void implement_versioning_decisions ();
315 : :
316 : : /* The function we're optimizing. */
317 : : function *m_fn;
318 : :
319 : : /* The obstack to use for all pass-specific bitmaps. */
320 : : bitmap_obstack m_bitmap_obstack;
321 : :
322 : : /* An obstack to use for general allocation. */
323 : : obstack m_obstack;
324 : :
325 : : /* The number of loops in the function. */
326 : : unsigned int m_nloops;
327 : :
328 : : /* The total number of loop version conditions we've found. */
329 : : unsigned int m_num_conditions;
330 : :
331 : : /* Assume that an address fragment of the form i * stride * scale
332 : : (for variable stride and constant scale) will not benefit from
333 : : versioning for stride == 1 when scale is greater than this value. */
334 : : unsigned HOST_WIDE_INT m_maximum_scale;
335 : :
336 : : /* Information about each loop. */
337 : : auto_vec<loop_info> m_loops;
338 : :
339 : : /* Used to form a linked list of blocks that belong to a loop,
340 : : started by loop_info::block_list. */
341 : : auto_vec<basic_block> m_next_block_in_loop;
342 : :
343 : : /* The list of loops that we've decided to version. */
344 : : auto_vec<class loop *> m_loops_to_version;
345 : :
346 : : /* A table of addresses in the current loop, keyed off their values
347 : : but not their offsets. */
348 : : hash_table <address_info_hasher> m_address_table;
349 : :
350 : : /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order. */
351 : : auto_vec <address_info *, 32> m_address_list;
352 : : };
353 : :
354 : : /* If EXPR is an SSA name and not a default definition, return the
355 : : defining statement, otherwise return null. */
356 : :
357 : : static gimple *
358 : 1004077 : maybe_get_stmt (tree expr)
359 : : {
360 : 867629 : if (TREE_CODE (expr) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (expr))
361 : 0 : return SSA_NAME_DEF_STMT (expr);
362 : : return NULL;
363 : : }
364 : :
365 : : /* Like maybe_get_stmt, but also return null if the defining
366 : : statement isn't an assignment. */
367 : :
368 : : static gassign *
369 : 891970 : maybe_get_assign (tree expr)
370 : : {
371 : 1889733 : return safe_dyn_cast <gassign *> (maybe_get_stmt (expr));
372 : : }
373 : :
374 : : /* Return true if this pass should look through a cast of expression FROM
375 : : to type TYPE when analyzing pieces of an address. */
376 : :
377 : : static bool
378 : 53883 : look_through_cast_p (tree type, tree from)
379 : : {
380 : 53883 : return (INTEGRAL_TYPE_P (TREE_TYPE (from)) == INTEGRAL_TYPE_P (type)
381 : 53883 : && POINTER_TYPE_P (TREE_TYPE (from)) == POINTER_TYPE_P (type));
382 : : }
383 : :
384 : : /* Strip all conversions of integers or pointers from EXPR, regardless
385 : : of whether the conversions are nops. This is useful in the context
386 : : of this pass because we're not trying to fold or simulate the
387 : : expression; we just want to see how it's structured. */
388 : :
389 : : static tree
390 : 350236 : strip_casts (tree expr)
391 : : {
392 : 350236 : const unsigned int MAX_NITERS = 4;
393 : :
394 : 350236 : tree type = TREE_TYPE (expr);
395 : 350236 : while (CONVERT_EXPR_P (expr)
396 : 350402 : && look_through_cast_p (type, TREE_OPERAND (expr, 0)))
397 : 166 : expr = TREE_OPERAND (expr, 0);
398 : :
399 : 403139 : for (unsigned int niters = 0; niters < MAX_NITERS; ++niters)
400 : : {
401 : 403139 : gassign *assign = maybe_get_assign (expr);
402 : 403139 : if (assign
403 : 201917 : && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))
404 : 456856 : && look_through_cast_p (type, gimple_assign_rhs1 (assign)))
405 : 52903 : expr = gimple_assign_rhs1 (assign);
406 : : else
407 : : break;
408 : : }
409 : 350236 : return expr;
410 : : }
411 : :
412 : : /* Compare two address_term_infos in the same address_info. */
413 : :
414 : : static int
415 : 265616 : compare_address_terms (const void *a_uncast, const void *b_uncast)
416 : : {
417 : 265616 : const address_term_info *a = (const address_term_info *) a_uncast;
418 : 265616 : const address_term_info *b = (const address_term_info *) b_uncast;
419 : :
420 : 265616 : if (a->expr != b->expr)
421 : 265284 : return SSA_NAME_VERSION (a->expr) < SSA_NAME_VERSION (b->expr) ? -1 : 1;
422 : :
423 : 332 : if (a->multiplier != b->multiplier)
424 : 266 : return a->multiplier < b->multiplier ? -1 : 1;
425 : :
426 : : return 0;
427 : : }
428 : :
429 : : /* Dump ADDRESS using flags FLAGS. */
430 : :
431 : : static void
432 : 420 : dump_address_info (dump_flags_t flags, address_info &address)
433 : : {
434 : 420 : if (address.base)
435 : 2 : dump_printf (flags, "%T + ", address.base);
436 : 2340 : for (unsigned int i = 0; i < address.terms.length (); ++i)
437 : : {
438 : 750 : if (i != 0)
439 : 330 : dump_printf (flags, " + ");
440 : 750 : dump_printf (flags, "%T", address.terms[i].expr);
441 : 750 : if (address.terms[i].multiplier != 1)
442 : 522 : dump_printf (flags, " * %wd", address.terms[i].multiplier);
443 : : }
444 : 840 : dump_printf (flags, " + [%wd, %wd]",
445 : 420 : address.min_offset, address.max_offset - 1);
446 : 420 : }
447 : :
448 : : /* Hash an address_info based on its base and terms. */
449 : :
450 : : hashval_t
451 : 165754 : address_info_hasher::hash (const address_info *info)
452 : : {
453 : 165754 : inchash::hash hash;
454 : 165754 : hash.add_int (info->base ? TREE_CODE (info->base) : 0);
455 : 331508 : hash.add_int (info->terms.length ());
456 : 415174 : for (unsigned int i = 0; i < info->terms.length (); ++i)
457 : : {
458 : 249420 : hash.add_int (SSA_NAME_VERSION (info->terms[i].expr));
459 : 249420 : hash.add_hwi (info->terms[i].multiplier);
460 : : }
461 : 165754 : return hash.end ();
462 : : }
463 : :
464 : : /* Return true if two address_infos have equal bases and terms. Other
465 : : properties might be different (such as the statement or constant
466 : : offset range). */
467 : :
468 : : bool
469 : 89701 : address_info_hasher::equal (const address_info *a, const address_info *b)
470 : : {
471 : 89701 : if (a->base != b->base
472 : 89701 : && (!a->base || !b->base || !operand_equal_p (a->base, b->base, 0)))
473 : 2575 : return false;
474 : :
475 : 261378 : if (a->terms.length () != b->terms.length ())
476 : : return false;
477 : :
478 : 172388 : for (unsigned int i = 0; i < a->terms.length (); ++i)
479 : 111356 : if (a->terms[i].expr != b->terms[i].expr
480 : 111356 : || a->terms[i].multiplier != b->terms[i].multiplier)
481 : : return false;
482 : :
483 : : return true;
484 : : }
485 : :
486 : : /* Return true if we want to version the loop, i.e. if we have a
487 : : specific reason for doing so and no specific reason not to. */
488 : :
489 : : bool
490 : 6122 : loop_info::worth_versioning_p () const
491 : : {
492 : 6122 : return (!rejected_p
493 : 4670 : && (!bitmap_empty_p (&unity_names) || subloops_benefit_p));
494 : : }
495 : :
496 : 562 : loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
497 : 0 : : dom_walker (CDI_DOMINATORS), m_lv (lv)
498 : : {
499 : 0 : }
500 : :
501 : : /* Process BB before processing the blocks it dominates. */
502 : :
503 : : edge
504 : 26259 : loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
505 : : {
506 : 26259 : if (bb == bb->loop_father->header)
507 : 3613 : m_lv.prune_loop_conditions (bb->loop_father);
508 : :
509 : 26259 : return NULL;
510 : : }
511 : :
512 : : /* Decide whether to replace VAL with a new value in a versioned loop.
513 : : Return the new value if so, otherwise return null. */
514 : :
515 : : tree
516 : 62326 : loop_versioning::name_prop::value_of_expr (tree val, gimple *)
517 : : {
518 : 62326 : if (TREE_CODE (val) == SSA_NAME
519 : 62326 : && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val)))
520 : 1088 : return build_one_cst (TREE_TYPE (val));
521 : : return NULL_TREE;
522 : : }
523 : :
524 : : /* Initialize the structure to optimize FN. */
525 : :
526 : 23966 : loop_versioning::loop_versioning (function *fn)
527 : 23966 : : m_fn (fn),
528 : 23966 : m_nloops (number_of_loops (fn)),
529 : 23966 : m_num_conditions (0),
530 : 23966 : m_address_table (31)
531 : : {
532 : 23966 : bitmap_obstack_initialize (&m_bitmap_obstack);
533 : 23966 : gcc_obstack_init (&m_obstack);
534 : :
535 : : /* Initialize the loop information. */
536 : 23966 : m_loops.safe_grow_cleared (m_nloops, true);
537 : 147450 : for (unsigned int i = 0; i < m_nloops; ++i)
538 : : {
539 : 123484 : m_loops[i].outermost = get_loop (m_fn, 0);
540 : 123484 : bitmap_initialize (&m_loops[i].unity_names, &m_bitmap_obstack);
541 : : }
542 : :
543 : : /* Initialize the list of blocks that belong to each loop. */
544 : 23966 : unsigned int nbbs = last_basic_block_for_fn (fn);
545 : 23966 : m_next_block_in_loop.safe_grow (nbbs, true);
546 : 23966 : basic_block bb;
547 : 594727 : FOR_EACH_BB_FN (bb, fn)
548 : : {
549 : 570761 : loop_info &li = get_loop_info (bb->loop_father);
550 : 570761 : m_next_block_in_loop[bb->index] = li.block_list;
551 : 570761 : li.block_list = bb;
552 : : }
553 : :
554 : : /* MAX_FIXED_MODE_SIZE should be a reasonable maximum scale for
555 : : unvectorizable code, since it is the largest size that can be
556 : : handled efficiently by scalar code. omp_max_vf calculates the
557 : : maximum number of bytes in a vector, when such a value is relevant
558 : : to loop optimization. */
559 : 23966 : m_maximum_scale = estimated_poly_value (omp_max_vf ());
560 : 71898 : m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
561 : 23966 : }
562 : :
563 : 23966 : loop_versioning::~loop_versioning ()
564 : : {
565 : 23966 : bitmap_obstack_release (&m_bitmap_obstack);
566 : 23966 : obstack_free (&m_obstack, NULL);
567 : 23966 : }
568 : :
569 : : /* Return the maximum number of instructions allowed in LOOP before
570 : : it becomes too big for versioning.
571 : :
572 : : There are separate limits for inner and outer loops. The limit for
573 : : inner loops applies only to loops that benefit directly from versioning.
574 : : The limit for outer loops applies to all code in the outer loop and
575 : : its subloops that *doesn't* benefit directly from versioning; such code
576 : : would be "taken along for the ride". The idea is that if the cost of
577 : : the latter is small, it is better to version outer loops rather than
578 : : inner loops, both to reduce the number of repeated checks and to enable
579 : : more of the loop nest to be optimized as a natural nest (e.g. by loop
580 : : interchange or outer-loop vectorization). */
581 : :
582 : : unsigned int
583 : 1237 : loop_versioning::max_insns_for_loop (class loop *loop)
584 : : {
585 : 1237 : return (loop->inner
586 : 271 : ? param_loop_versioning_max_outer_insns
587 : 966 : : param_loop_versioning_max_inner_insns);
588 : : }
589 : :
590 : : /* Return true if for cost reasons we should avoid versioning any loop
591 : : that contains STMT.
592 : :
593 : : Note that we don't need to check whether versioning is invalid for
594 : : correctness reasons, since the versioning process does that for us.
595 : : The conditions involved are too rare to be worth duplicating here. */
596 : :
597 : : bool
598 : 644338 : loop_versioning::expensive_stmt_p (gimple *stmt)
599 : : {
600 : 0 : if (gcall *call = dyn_cast <gcall *> (stmt))
601 : : /* Assume for now that the time spent in an "expensive" call would
602 : : overwhelm any saving from versioning. */
603 : 20236 : return !gimple_inexpensive_call_p (call);
604 : : return false;
605 : : }
606 : :
607 : : /* Record that we want to version the loop that contains STMT for the
608 : : case in which SSA name NAME is equal to 1. We already know that NAME
609 : : is invariant in the loop. */
610 : :
611 : : void
612 : 2133 : loop_versioning::version_for_unity (gimple *stmt, tree name)
613 : : {
614 : 2133 : class loop *loop = loop_containing_stmt (stmt);
615 : 2133 : loop_info &li = get_loop_info (loop);
616 : :
617 : 2133 : if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name)))
618 : : {
619 : : /* This is the first time we've wanted to version LOOP for NAME.
620 : : Keep track of the outermost loop that can handle all versioning
621 : : checks in LI. */
622 : 1191 : class loop *outermost
623 : 1191 : = outermost_invariant_loop_for_expr (loop, name);
624 : 1376 : if (loop_depth (li.outermost) < loop_depth (outermost))
625 : 947 : li.outermost = outermost;
626 : :
627 : 1191 : if (dump_enabled_p ())
628 : : {
629 : 70 : dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop"
630 : : " for when %T == 1", name);
631 : 70 : if (outermost == loop)
632 : 23 : dump_printf (MSG_NOTE, "; cannot hoist check further");
633 : : else
634 : : {
635 : 57 : dump_printf (MSG_NOTE, "; could implement the check at loop"
636 : : " depth %d", loop_depth (outermost));
637 : 67 : if (loop_depth (li.outermost) > loop_depth (outermost))
638 : 0 : dump_printf (MSG_NOTE, ", but other checks only allow"
639 : 0 : " a depth of %d", loop_depth (li.outermost));
640 : : }
641 : 70 : dump_printf (MSG_NOTE, "\n");
642 : : }
643 : :
644 : 1191 : m_num_conditions += 1;
645 : : }
646 : : else
647 : : {
648 : : /* This is a duplicate request. */
649 : 942 : if (dump_enabled_p ())
650 : 6 : dump_printf_loc (MSG_NOTE, stmt, "already asked to version containing"
651 : : " loop for when %T == 1\n", name);
652 : : }
653 : 2133 : }
654 : :
655 : : /* Return true if OP1_TREE is constant and if in principle it is worth
656 : : versioning an address fragment of the form:
657 : :
658 : : i * OP1_TREE * OP2 * stride
659 : :
660 : : for the case in which stride == 1. This in practice means testing
661 : : whether:
662 : :
663 : : OP1_TREE * OP2 <= M_MAXIMUM_SCALE.
664 : :
665 : : If RESULT is nonnull, store OP1_TREE * OP2 there when returning true. */
666 : :
667 : : bool
668 : 269156 : loop_versioning::acceptable_multiplier_p (tree op1_tree,
669 : : unsigned HOST_WIDE_INT op2,
670 : : unsigned HOST_WIDE_INT *result)
671 : : {
672 : 269156 : if (tree_fits_uhwi_p (op1_tree))
673 : : {
674 : 253105 : unsigned HOST_WIDE_INT op1 = tree_to_uhwi (op1_tree);
675 : : /* The first part checks for overflow. */
676 : 253105 : if (op1 * op2 >= op2 && op1 * op2 <= m_maximum_scale)
677 : : {
678 : 230945 : if (result)
679 : 190318 : *result = op1 * op2;
680 : 230945 : return true;
681 : : }
682 : : }
683 : : return false;
684 : : }
685 : :
686 : : /* Return true if it is worth using loop versioning on a memory access
687 : : of type TYPE. Store the size of the access in *SIZE if so. */
688 : :
689 : : bool
690 : 170937 : loop_versioning::acceptable_type_p (tree type, unsigned HOST_WIDE_INT *size)
691 : : {
692 : 170937 : return (TYPE_SIZE_UNIT (type)
693 : 170937 : && acceptable_multiplier_p (TYPE_SIZE_UNIT (type), 1, size));
694 : : }
695 : :
696 : : /* See whether OP is constant and whether we can multiply TERM by that
697 : : constant without exceeding M_MAXIMUM_SCALE. Return true and update
698 : : TERM if so. */
699 : :
700 : : bool
701 : 68058 : loop_versioning::multiply_term_by (address_term_info &term, tree op)
702 : : {
703 : 0 : return acceptable_multiplier_p (op, term.multiplier, &term.multiplier);
704 : : }
705 : :
706 : : /* Decide whether an address fragment of the form STRIDE * MULTIPLIER
707 : : is likely to be indexing an innermost dimension, returning the result
708 : : as an INNER_* probability. */
709 : :
710 : : inner_likelihood
711 : 63822 : loop_versioning::get_inner_likelihood (tree stride,
712 : : unsigned HOST_WIDE_INT multiplier)
713 : : {
714 : 63822 : const unsigned int MAX_NITERS = 8;
715 : :
716 : : /* Iterate over possible values of STRIDE. Return INNER_LIKELY if at
717 : : least one of those values is likely to be for the innermost dimension.
718 : : Record in UNLIKELY_P if at least one of those values is unlikely to be
719 : : for the innermost dimension.
720 : :
721 : : E.g. for:
722 : :
723 : : stride = cond ? a * b : 1
724 : :
725 : : we should treat STRIDE as being a likely inner dimension, since
726 : : we know that it is 1 under at least some circumstances. (See the
727 : : Fortran example below.) However:
728 : :
729 : : stride = a * b
730 : :
731 : : on its own is unlikely to be for the innermost dimension, since
732 : : that would require both a and b to be 1 at runtime. */
733 : 63822 : bool unlikely_p = false;
734 : 63822 : tree worklist[MAX_NITERS];
735 : 63822 : unsigned int length = 0;
736 : 63822 : worklist[length++] = stride;
737 : 100697 : for (unsigned int i = 0; i < length; ++i)
738 : : {
739 : 77502 : tree expr = worklist[i];
740 : :
741 : 77502 : if (CONSTANT_CLASS_P (expr))
742 : : {
743 : : /* See if EXPR * MULTIPLIER would be consistent with an individual
744 : : access or a small grouped access. */
745 : 43013 : if (acceptable_multiplier_p (expr, multiplier))
746 : : return INNER_LIKELY;
747 : : else
748 : : unlikely_p = true;
749 : : }
750 : 68034 : else if (gimple *stmt = maybe_get_stmt (expr))
751 : : {
752 : : /* If EXPR is set by a PHI node, queue its arguments in case
753 : : we find one that is consistent with an inner dimension.
754 : :
755 : : An important instance of this is the Fortran handling of array
756 : : descriptors, which calculates the stride of the inner dimension
757 : : using a PHI equivalent of:
758 : :
759 : : raw_stride = a.dim[0].stride;
760 : : stride = raw_stride != 0 ? raw_stride : 1;
761 : :
762 : : (Strides for outer dimensions do not treat 0 specially.) */
763 : 33545 : if (gphi *phi = dyn_cast <gphi *> (stmt))
764 : : {
765 : 8762 : unsigned int nargs = gimple_phi_num_args (phi);
766 : 22770 : for (unsigned int j = 0; j < nargs && length < MAX_NITERS; ++j)
767 : 14008 : worklist[length++] = strip_casts (gimple_phi_arg_def (phi, j));
768 : : }
769 : : /* If the value is set by an assignment, expect it to be read
770 : : from memory (such as an array descriptor) rather than be
771 : : calculated. */
772 : 61658 : else if (gassign *assign = dyn_cast <gassign *> (stmt))
773 : : {
774 : 24469 : if (!gimple_assign_load_p (assign))
775 : 11816 : unlikely_p = true;
776 : : }
777 : : /* Things like calls don't really tell us anything. */
778 : : }
779 : : }
780 : :
781 : : /* We didn't find any possible values of STRIDE that were likely to be
782 : : for the innermost dimension. If we found one that was actively
783 : : unlikely to be for the innermost dimension, assume that that applies
784 : : to STRIDE too. */
785 : 23195 : return unlikely_p ? INNER_UNLIKELY : INNER_DONT_KNOW;
786 : : }
787 : :
788 : : /* Dump the likelihood that TERM's stride is for the innermost dimension.
789 : : ADDRESS is the address that contains TERM. */
790 : :
791 : : void
792 : 237 : loop_versioning::dump_inner_likelihood (address_info &address,
793 : : address_term_info &term)
794 : : {
795 : 237 : if (term.inner_likelihood == INNER_LIKELY)
796 : 82 : dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the"
797 : : " innermost dimension\n", term.stride);
798 : 155 : else if (term.inner_likelihood == INNER_UNLIKELY)
799 : 37 : dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the"
800 : : " innermost dimension\n", term.stride);
801 : : else
802 : 118 : dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T"
803 : : " is the innermost dimension\n", term.stride);
804 : 237 : }
805 : :
806 : : /* The caller has identified that STRIDE is the stride of interest
807 : : in TERM, and that the stride is applied in OP_LOOP. Record this
808 : : information in TERM, deciding whether STRIDE is likely to be for
809 : : the innermost dimension of an array and whether it represents a
810 : : versioning opportunity. ADDRESS is the address that contains TERM. */
811 : :
812 : : void
813 : 47224 : loop_versioning::analyze_stride (address_info &address,
814 : : address_term_info &term,
815 : : tree stride, class loop *op_loop)
816 : : {
817 : 47224 : term.stride = stride;
818 : :
819 : 47224 : term.inner_likelihood = get_inner_likelihood (stride, term.multiplier);
820 : 47224 : if (dump_enabled_p ())
821 : 197 : dump_inner_likelihood (address, term);
822 : :
823 : : /* To be a versioning opportunity we require:
824 : :
825 : : - The multiplier applied by TERM is equal to the access size,
826 : : so that when STRIDE is 1, the accesses in successive loop
827 : : iterations are consecutive.
828 : :
829 : : This is deliberately conservative. We could relax it to handle
830 : : other cases (such as those with gaps between iterations) if we
831 : : find any real testcases for which it's useful.
832 : :
833 : : - the stride is applied in the same loop as STMT rather than
834 : : in an outer loop. Although versioning for strides applied in
835 : : outer loops could help in some cases -- such as enabling
836 : : more loop interchange -- the savings are much lower than for
837 : : inner loops.
838 : :
839 : : - the stride is an SSA name that is invariant in STMT's loop,
840 : : since otherwise versioning isn't possible. */
841 : 47224 : unsigned HOST_WIDE_INT access_size = address.max_offset - address.min_offset;
842 : 47224 : if (term.multiplier == access_size
843 : 39657 : && address.loop == op_loop
844 : 37385 : && TREE_CODE (stride) == SSA_NAME
845 : 49604 : && expr_invariant_in_loop_p (address.loop, stride))
846 : : {
847 : 2368 : term.versioning_opportunity_p = true;
848 : 2368 : if (dump_enabled_p ())
849 : 88 : dump_printf_loc (MSG_NOTE, address.stmt, "%T == 1 is a versioning"
850 : : " opportunity\n", stride);
851 : : }
852 : 47224 : }
853 : :
854 : : /* See whether address term TERM (which belongs to ADDRESS) is the result
855 : : of multiplying a varying SSA name by a loop-invariant SSA name.
856 : : Return true and update TERM if so.
857 : :
858 : : This handles both cases that SCEV might handle, such as:
859 : :
860 : : for (int i = 0; i < n; ++i)
861 : : res += a[i * stride];
862 : :
863 : : and ones in which the term varies arbitrarily between iterations, such as:
864 : :
865 : : for (int i = 0; i < n; ++i)
866 : : res += a[index[i] * stride]; */
867 : :
868 : : bool
869 : 83417 : loop_versioning::find_per_loop_multiplication (address_info &address,
870 : : address_term_info &term)
871 : : {
872 : 83417 : gassign *mult = maybe_get_assign (term.expr);
873 : 106750 : if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR)
874 : : return false;
875 : :
876 : 6971 : class loop *mult_loop = loop_containing_stmt (mult);
877 : 6971 : if (!loop_outer (mult_loop))
878 : : return false;
879 : :
880 : 6745 : tree op1 = strip_casts (gimple_assign_rhs1 (mult));
881 : 13490 : tree op2 = strip_casts (gimple_assign_rhs2 (mult));
882 : 6745 : if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
883 : : return false;
884 : :
885 : 5996 : bool invariant1_p = expr_invariant_in_loop_p (mult_loop, op1);
886 : 5996 : bool invariant2_p = expr_invariant_in_loop_p (mult_loop, op2);
887 : 5996 : if (invariant1_p == invariant2_p)
888 : : return false;
889 : :
890 : : /* Make sure that the loop invariant is OP2 rather than OP1. */
891 : 5799 : if (invariant1_p)
892 : 3464 : std::swap (op1, op2);
893 : :
894 : 5799 : if (dump_enabled_p ())
895 : 90 : dump_printf_loc (MSG_NOTE, address.stmt, "address term %T = varying %T"
896 : : " * loop-invariant %T\n", term.expr, op1, op2);
897 : 5799 : analyze_stride (address, term, op2, mult_loop);
898 : 5799 : return true;
899 : : }
900 : :
901 : : /* Try to use scalar evolutions to find an address stride for TERM,
902 : : which belongs to ADDRESS. Return true and update TERM if so.
903 : :
904 : : Here we are interested in any evolution information we can find,
905 : : not just evolutions wrt ADDRESS->LOOP. For example, if we find that
906 : : an outer loop obviously iterates over the inner dimension of an array,
907 : : that information can help us eliminate worthless versioning opportunities
908 : : in inner loops. */
909 : :
910 : : bool
911 : 77618 : loop_versioning::analyze_term_using_scevs (address_info &address,
912 : : address_term_info &term)
913 : : {
914 : 113811 : gimple *setter = maybe_get_stmt (term.expr);
915 : 64554 : if (!setter)
916 : : return false;
917 : :
918 : 64554 : class loop *wrt_loop = loop_containing_stmt (setter);
919 : 64554 : if (!loop_outer (wrt_loop))
920 : : return false;
921 : :
922 : 51397 : tree chrec = strip_casts (analyze_scalar_evolution (wrt_loop, term.expr));
923 : 51397 : if (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
924 : : {
925 : 41425 : if (dump_enabled_p ())
926 : 107 : dump_printf_loc (MSG_NOTE, address.stmt,
927 : : "address term %T = %T\n", term.expr, chrec);
928 : :
929 : : /* Peel casts and accumulate constant multiplications, up to the
930 : : limit allowed by M_MAXIMUM_SCALE. */
931 : 41425 : tree stride = strip_casts (CHREC_RIGHT (chrec));
932 : 41425 : while (TREE_CODE (stride) == MULT_EXPR
933 : 41425 : && multiply_term_by (term, TREE_OPERAND (stride, 1)))
934 : 0 : stride = strip_casts (TREE_OPERAND (stride, 0));
935 : :
936 : : gassign *assign;
937 : 41537 : while ((assign = maybe_get_assign (stride))
938 : 286 : && gimple_assign_rhs_code (assign) == MULT_EXPR
939 : 41649 : && multiply_term_by (term, gimple_assign_rhs2 (assign)))
940 : : {
941 : 112 : if (dump_enabled_p ())
942 : 24 : dump_printf_loc (MSG_NOTE, address.stmt,
943 : : "looking through %G", (gimple *) assign);
944 : 112 : stride = strip_casts (gimple_assign_rhs1 (assign));
945 : : }
946 : :
947 : 41425 : analyze_stride (address, term, stride, get_chrec_loop (chrec));
948 : 41425 : return true;
949 : : }
950 : :
951 : : return false;
952 : : }
953 : :
954 : : /* Address term TERM is an arbitrary term that provides no versioning
955 : : opportunities. Analyze it to see whether it contains any likely
956 : : inner strides, so that we don't mistakenly version for other
957 : : (less likely) candidates.
958 : :
959 : : This copes with invariant innermost indices such as:
960 : :
961 : : x(i, :) = 100
962 : :
963 : : where the "i" component of the address is invariant in the loop
964 : : but provides the real inner stride.
965 : :
966 : : ADDRESS is the address that contains TERM. */
967 : :
968 : : void
969 : 16082 : loop_versioning::analyze_arbitrary_term (address_info &address,
970 : : address_term_info &term)
971 : :
972 : : {
973 : : /* A multiplication offers two potential strides. Pick the one that
974 : : is most likely to be an innermost stride. */
975 : 16082 : tree expr = term.expr, alt = NULL_TREE;
976 : 16082 : gassign *mult = maybe_get_assign (expr);
977 : 28940 : if (mult && gimple_assign_rhs_code (mult) == MULT_EXPR)
978 : : {
979 : 516 : expr = strip_casts (gimple_assign_rhs1 (mult));
980 : 1032 : alt = strip_casts (gimple_assign_rhs2 (mult));
981 : : }
982 : 16082 : term.stride = expr;
983 : 16082 : term.inner_likelihood = get_inner_likelihood (expr, term.multiplier);
984 : 16082 : if (alt)
985 : : {
986 : 516 : inner_likelihood alt_l = get_inner_likelihood (alt, term.multiplier);
987 : 516 : if (alt_l > term.inner_likelihood)
988 : : {
989 : 173 : term.stride = alt;
990 : 173 : term.inner_likelihood = alt_l;
991 : : }
992 : : }
993 : 16082 : if (dump_enabled_p ())
994 : 40 : dump_inner_likelihood (address, term);
995 : 16082 : }
996 : :
997 : : /* Try to identify loop strides in ADDRESS and try to choose realistic
998 : : versioning opportunities based on these strides.
999 : :
1000 : : The main difficulty here isn't finding strides that could be used
1001 : : in a version check (that's pretty easy). The problem instead is to
1002 : : avoid versioning for some stride S that is unlikely ever to be 1 at
1003 : : runtime. Versioning for S == 1 on its own would lead to unnecessary
1004 : : code bloat, while adding S == 1 to more realistic version conditions
1005 : : would lose the optimisation opportunity offered by those other conditions.
1006 : :
1007 : : For example, versioning for a stride of 1 in the Fortran code:
1008 : :
1009 : : integer :: a(:,:)
1010 : : a(1,:) = 1
1011 : :
1012 : : is not usually a good idea, since the assignment is iterating over
1013 : : an outer dimension and is relatively unlikely to have a stride of 1.
1014 : : (It isn't impossible, since the inner dimension might be 1, or the
1015 : : array might be transposed.) Similarly, in:
1016 : :
1017 : : integer :: a(:,:), b(:,:)
1018 : : b(:,1) = a(1,:)
1019 : :
1020 : : b(:,1) is relatively likely to have a stride of 1 while a(1,:) isn't.
1021 : : Versioning for when both strides are 1 would lose most of the benefit
1022 : : of versioning for b's access.
1023 : :
1024 : : The approach we take is as follows:
1025 : :
1026 : : - Analyze each term to see whether it has an identifiable stride,
1027 : : regardless of which loop applies the stride.
1028 : :
1029 : : - Evaluate the likelihood that each such stride is for the innermost
1030 : : dimension of an array, on the scale "likely", "don't know" or "unlikely".
1031 : :
1032 : : - If there is a single "likely" innermost stride, and that stride is
1033 : : applied in the loop that contains STMT, version the loop for when the
1034 : : stride is 1. This deals with the cases in which we're fairly
1035 : : confident of doing the right thing, such as the b(:,1) reference above.
1036 : :
1037 : : - If there are no "likely" innermost strides, and the loop that contains
1038 : : STMT uses a stride that we rated as "don't know", version for when
1039 : : that stride is 1. This is principally used for C code such as:
1040 : :
1041 : : for (int i = 0; i < n; ++i)
1042 : : a[i * x] = ...;
1043 : :
1044 : : and:
1045 : :
1046 : : for (int j = 0; j < n; ++j)
1047 : : for (int i = 0; i < n; ++i)
1048 : : a[i * x + j * y] = ...;
1049 : :
1050 : : where nothing in the way "x" and "y" are set gives a hint as to
1051 : : whether "i" iterates over the innermost dimension of the array.
1052 : : In these situations it seems reasonable to assume the
1053 : : programmer has nested the loops appropriately (although of course
1054 : : there are examples like GEMM in which this assumption doesn't hold
1055 : : for all accesses in the loop).
1056 : :
1057 : : This case is also useful for the Fortran equivalent of the
1058 : : above C code. */
1059 : :
1060 : : void
1061 : 52905 : loop_versioning::analyze_address_fragment (address_info &address)
1062 : : {
1063 : 52905 : if (dump_enabled_p ())
1064 : : {
1065 : 171 : dump_printf_loc (MSG_NOTE, address.stmt, "analyzing address fragment ");
1066 : 171 : dump_address_info (MSG_NOTE, address);
1067 : 171 : dump_printf (MSG_NOTE, "\n");
1068 : : }
1069 : :
1070 : : /* Analyze each component of the sum to see whether it involves an
1071 : : apparent stride.
1072 : :
1073 : : There is an overlap between the addresses that
1074 : : find_per_loop_multiplication and analyze_term_using_scevs can handle,
1075 : : but the former is much cheaper than SCEV analysis, so try it first. */
1076 : 272644 : for (unsigned int i = 0; i < address.terms.length (); ++i)
1077 : 83417 : if (!find_per_loop_multiplication (address, address.terms[i])
1078 : 77618 : && !analyze_term_using_scevs (address, address.terms[i])
1079 : 119610 : && !POINTER_TYPE_P (TREE_TYPE (address.terms[i].expr)))
1080 : 16082 : analyze_arbitrary_term (address, address.terms[i]);
1081 : :
1082 : : /* Check for strides that are likely to be for the innermost dimension.
1083 : :
1084 : : 1. If there is a single likely inner stride, if it is an SSA name,
1085 : : and if it is worth versioning the loop for when the SSA name
1086 : : equals 1, record that we want to do so.
1087 : :
1088 : : 2. Otherwise, if there any likely inner strides, bail out. This means
1089 : : one of:
1090 : :
1091 : : (a) There are multiple likely inner strides. This suggests we're
1092 : : confused and be can't be confident of doing the right thing.
1093 : :
1094 : : (b) There is a single likely inner stride and it is a constant
1095 : : rather than an SSA name. This can mean either that the access
1096 : : is a natural one without any variable strides, such as:
1097 : :
1098 : : for (int i = 0; i < n; ++i)
1099 : : a[i] += 1;
1100 : :
1101 : : or that a variable stride is applied to an outer dimension,
1102 : : such as:
1103 : :
1104 : : for (int i = 0; i < n; ++i)
1105 : : for (int j = 0; j < n; ++j)
1106 : : a[j * stride][i] += 1;
1107 : :
1108 : : (c) There is a single likely inner stride, and it is an SSA name,
1109 : : but it isn't a worthwhile versioning opportunity. This usually
1110 : : means that the variable stride is applied by an outer loop,
1111 : : such as:
1112 : :
1113 : : for (int i = 0; i < n; ++i)
1114 : : for (int j = 0; j < n; ++j)
1115 : : a[j][i * stride] += 1;
1116 : :
1117 : : or (using an example with a more natural loop nesting):
1118 : :
1119 : : for (int i = 0; i < n; ++i)
1120 : : for (int j = 0; j < n; ++j)
1121 : : a[i][j] += b[i * stride];
1122 : :
1123 : : in cases where b[i * stride] cannot (yet) be hoisted for
1124 : : aliasing reasons.
1125 : :
1126 : : 3. If there are no likely inner strides, fall through to the next
1127 : : set of checks.
1128 : :
1129 : : Pointer equality is enough to check for uniqueness in (1), since we
1130 : : only care about SSA names. */
1131 : : tree chosen_stride = NULL_TREE;
1132 : : tree version_stride = NULL_TREE;
1133 : 136187 : for (unsigned int i = 0; i < address.terms.length (); ++i)
1134 : 83404 : if (chosen_stride != address.terms[i].stride
1135 : 83404 : && address.terms[i].inner_likelihood == INNER_LIKELY)
1136 : : {
1137 : 40165 : if (chosen_stride)
1138 : : return;
1139 : 40043 : chosen_stride = address.terms[i].stride;
1140 : 40043 : if (address.terms[i].versioning_opportunity_p)
1141 : 575 : version_stride = chosen_stride;
1142 : : }
1143 : :
1144 : : /* If there are no likely inner strides, see if there is a single
1145 : : versioning opportunity for a stride that was rated as INNER_DONT_KNOW.
1146 : : See the comment above the function for the cases that this code
1147 : : handles. */
1148 : 52783 : if (!chosen_stride)
1149 : 39023 : for (unsigned int i = 0; i < address.terms.length (); ++i)
1150 : 26170 : if (version_stride != address.terms[i].stride
1151 : 16291 : && address.terms[i].inner_likelihood == INNER_DONT_KNOW
1152 : 36333 : && address.terms[i].versioning_opportunity_p)
1153 : : {
1154 : 1578 : if (version_stride)
1155 : : return;
1156 : : version_stride = address.terms[i].stride;
1157 : : }
1158 : :
1159 : 52774 : if (version_stride)
1160 : 2133 : version_for_unity (address.stmt, version_stride);
1161 : : }
1162 : :
1163 : : /* Treat EXPR * MULTIPLIER + OFFSET as a fragment of an address that addresses
1164 : : TYPE_SIZE bytes and record this address fragment for later processing.
1165 : : STMT is the statement that contains the address. */
1166 : :
1167 : : void
1168 : 141843 : loop_versioning::record_address_fragment (gimple *stmt,
1169 : : unsigned HOST_WIDE_INT type_size,
1170 : : tree expr,
1171 : : unsigned HOST_WIDE_INT multiplier,
1172 : : HOST_WIDE_INT offset)
1173 : : {
1174 : : /* We're only interested in computed values. */
1175 : 141843 : if (TREE_CODE (expr) != SSA_NAME)
1176 : 23058 : return;
1177 : :
1178 : : /* Quick exit if no part of the address is calculated in STMT's loop,
1179 : : since such addresses have no versioning opportunities. */
1180 : 127267 : class loop *loop = loop_containing_stmt (stmt);
1181 : 127267 : if (expr_invariant_in_loop_p (loop, expr))
1182 : : return;
1183 : :
1184 : : /* Set up an address_info for EXPR * MULTIPLIER. */
1185 : 119023 : address_info *address = XOBNEW (&m_obstack, address_info);
1186 : 119023 : new (address) address_info;
1187 : 119023 : address->stmt = stmt;
1188 : 119023 : address->loop = loop;
1189 : 119023 : address->base = NULL_TREE;
1190 : 119023 : address->terms.quick_grow (1);
1191 : 119023 : address->terms[0].expr = expr;
1192 : 119023 : address->terms[0].multiplier = multiplier;
1193 : 119023 : address->terms[0].stride = NULL_TREE;
1194 : 119023 : address->terms[0].inner_likelihood = INNER_UNLIKELY;
1195 : 119023 : address->terms[0].versioning_opportunity_p = false;
1196 : 119023 : address->min_offset = offset;
1197 : :
1198 : : /* Peel apart the expression into a sum of address_terms, where each
1199 : : term is multiplied by a constant. Treat a + b and a - b the same,
1200 : : since it doesn't matter for our purposes whether an address is
1201 : : increasing or decreasing. Distribute (a + b) * constant into
1202 : : a * constant + b * constant.
1203 : :
1204 : : We don't care which loop each term belongs to, since we want to
1205 : : examine as many candidate strides as possible when determining
1206 : : which is likely to be for the innermost dimension. We therefore
1207 : : don't limit the search to statements in STMT's loop. */
1208 : 466818 : for (unsigned int i = 0; i < address->terms.length (); )
1209 : : {
1210 : 347795 : if (gassign *assign = maybe_get_assign (address->terms[i].expr))
1211 : : {
1212 : 211536 : tree_code code = gimple_assign_rhs_code (assign);
1213 : 211536 : if (code == PLUS_EXPR
1214 : 211536 : || code == POINTER_PLUS_EXPR
1215 : 98769 : || code == MINUS_EXPR)
1216 : : {
1217 : 114112 : tree op1 = gimple_assign_rhs1 (assign);
1218 : 114112 : tree op2 = gimple_assign_rhs2 (assign);
1219 : 114112 : if (TREE_CODE (op2) == INTEGER_CST)
1220 : : {
1221 : 55010 : address->terms[i].expr = strip_casts (op1);
1222 : : /* This is heuristic only, so don't worry about truncation
1223 : : or overflow. */
1224 : 55010 : address->min_offset += (TREE_INT_CST_LOW (op2)
1225 : 55010 : * address->terms[i].multiplier);
1226 : 55010 : continue;
1227 : : }
1228 : 59102 : else if (address->terms.length () < address_info::MAX_TERMS)
1229 : : {
1230 : 59063 : unsigned int j = address->terms.length ();
1231 : 59063 : address->terms.quick_push (address->terms[i]);
1232 : 59063 : address->terms[i].expr = strip_casts (op1);
1233 : 59063 : address->terms[j].expr = strip_casts (op2);
1234 : 59063 : continue;
1235 : 59063 : }
1236 : : }
1237 : 97463 : if (code == MULT_EXPR)
1238 : : {
1239 : 67921 : tree op1 = gimple_assign_rhs1 (assign);
1240 : 67921 : tree op2 = gimple_assign_rhs2 (assign);
1241 : 67921 : if (multiply_term_by (address->terms[i], op2))
1242 : : {
1243 : 48363 : address->terms[i].expr = strip_casts (op1);
1244 : 48363 : continue;
1245 : : }
1246 : : }
1247 : 49100 : if (CONVERT_EXPR_CODE_P (code))
1248 : : {
1249 : 7273 : tree op1 = gimple_assign_rhs1 (assign);
1250 : 7273 : address->terms[i].expr = strip_casts (op1);
1251 : 7273 : continue;
1252 : 7273 : }
1253 : : }
1254 : 178086 : i += 1;
1255 : : }
1256 : :
1257 : : /* Peel off any symbolic pointer. */
1258 : 119023 : if (TREE_CODE (address->terms[0].expr) != SSA_NAME
1259 : 119023 : && address->terms[0].multiplier == 1)
1260 : : {
1261 : 4751 : if (address->terms.length () == 1)
1262 : : {
1263 : 0 : obstack_free (&m_obstack, address);
1264 : 0 : return;
1265 : : }
1266 : 4751 : address->base = address->terms[0].expr;
1267 : 4751 : address->terms.ordered_remove (0);
1268 : : }
1269 : :
1270 : : /* Require all remaining terms to be SSA names. (This could be false
1271 : : for unfolded statements, but they aren't worth dealing with.) */
1272 : 583580 : for (unsigned int i = 0; i < address->terms.length (); ++i)
1273 : 173005 : if (TREE_CODE (address->terms[i].expr) != SSA_NAME)
1274 : : {
1275 : 238 : obstack_free (&m_obstack, address);
1276 : 238 : return;
1277 : : }
1278 : :
1279 : : /* The loop above set MIN_OFFSET based on the first byte of the
1280 : : referenced data. Calculate the end + 1. */
1281 : 118785 : address->max_offset = address->min_offset + type_size;
1282 : :
1283 : : /* Put the terms into a canonical order for the hash table lookup below. */
1284 : 118785 : address->terms.qsort (compare_address_terms);
1285 : :
1286 : 118785 : if (dump_enabled_p ())
1287 : : {
1288 : 249 : dump_printf_loc (MSG_NOTE, stmt, "recording address fragment %T", expr);
1289 : 249 : if (multiplier != 1)
1290 : 118 : dump_printf (MSG_NOTE, " * %wd", multiplier);
1291 : 249 : dump_printf (MSG_NOTE, " = ");
1292 : 249 : dump_address_info (MSG_NOTE, *address);
1293 : 249 : dump_printf (MSG_NOTE, "\n");
1294 : : }
1295 : :
1296 : : /* Pool address information with the same terms (but potentially
1297 : : different offsets). */
1298 : 118785 : address_info **slot = m_address_table.find_slot (address, INSERT);
1299 : 118785 : if (address_info *old_address = *slot)
1300 : : {
1301 : : /* We've already seen an address with the same terms. Extend the
1302 : : offset range to account for this access. Doing this can paper
1303 : : over gaps, such as in:
1304 : :
1305 : : a[i * stride * 4] + a[i * stride * 4 + 3];
1306 : :
1307 : : where nothing references "+ 1" or "+ 2". However, the vectorizer
1308 : : handles such gapped accesses without problems, so it's not worth
1309 : : trying to exclude them. */
1310 : 61032 : if (old_address->min_offset > address->min_offset)
1311 : 12602 : old_address->min_offset = address->min_offset;
1312 : 61032 : if (old_address->max_offset < address->max_offset)
1313 : 12326 : old_address->max_offset = address->max_offset;
1314 : 61032 : obstack_free (&m_obstack, address);
1315 : : }
1316 : : else
1317 : : {
1318 : : /* This is the first time we've seen an address with these terms. */
1319 : 57753 : *slot = address;
1320 : 57753 : m_address_list.safe_push (address);
1321 : : }
1322 : : }
1323 : :
1324 : : /* Analyze expression EXPR, which occurs in STMT. */
1325 : :
1326 : : void
1327 : 1311194 : loop_versioning::analyze_expr (gimple *stmt, tree expr)
1328 : : {
1329 : 1311194 : unsigned HOST_WIDE_INT type_size;
1330 : :
1331 : 1461739 : while (handled_component_p (expr))
1332 : : {
1333 : : /* See whether we can use versioning to avoid a multiplication
1334 : : in an array index. */
1335 : 150545 : if (TREE_CODE (expr) == ARRAY_REF
1336 : 150545 : && acceptable_type_p (TREE_TYPE (expr), &type_size))
1337 : 83755 : record_address_fragment (stmt, type_size,
1338 : 83755 : TREE_OPERAND (expr, 1), type_size, 0);
1339 : 150545 : expr = TREE_OPERAND (expr, 0);
1340 : : }
1341 : :
1342 : : /* See whether we can use versioning to avoid a multiplication
1343 : : in the pointer calculation of a MEM_REF. */
1344 : 1311194 : if (TREE_CODE (expr) == MEM_REF
1345 : 1311194 : && acceptable_type_p (TREE_TYPE (expr), &type_size))
1346 : 58088 : record_address_fragment (stmt, type_size, TREE_OPERAND (expr, 0), 1,
1347 : : /* This is heuristic only, so don't worry
1348 : : about truncation or overflow. */
1349 : 58088 : TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)));
1350 : :
1351 : : /* These would be easy to handle if they existed at this stage. */
1352 : 1311194 : gcc_checking_assert (TREE_CODE (expr) != TARGET_MEM_REF);
1353 : 1311194 : }
1354 : :
1355 : : /* Analyze all the statements in BB looking for useful version checks.
1356 : : Return true on success, false if something prevents the block from
1357 : : being versioned. */
1358 : :
1359 : : bool
1360 : 205710 : loop_versioning::analyze_block (basic_block bb)
1361 : : {
1362 : 205710 : class loop *loop = bb->loop_father;
1363 : 205710 : loop_info &li = get_loop_info (loop);
1364 : 1131066 : for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1365 : 719646 : gsi_next (&gsi))
1366 : : {
1367 : 735103 : gimple *stmt = gsi_stmt (gsi);
1368 : 735103 : if (is_gimple_debug (stmt))
1369 : 90765 : continue;
1370 : :
1371 : 664574 : if (expensive_stmt_p (stmt))
1372 : : {
1373 : 15457 : if (dump_enabled_p ())
1374 : 58 : dump_printf_loc (MSG_NOTE, stmt, "expensive statement"
1375 : : " prevents versioning: %G", stmt);
1376 : 15457 : return false;
1377 : : }
1378 : :
1379 : : /* Only look for direct versioning opportunities in inner loops
1380 : : since the benefit tends to be much smaller for outer loops. */
1381 : 628881 : if (!loop->inner)
1382 : : {
1383 : 533453 : unsigned int nops = gimple_num_ops (stmt);
1384 : 2008827 : for (unsigned int i = 0; i < nops; ++i)
1385 : 1475374 : if (tree op = gimple_op (stmt, i))
1386 : 1311194 : analyze_expr (stmt, op);
1387 : : }
1388 : :
1389 : : /* The point of the instruction limit is to prevent excessive
1390 : : code growth, so this is a size-based estimate even though
1391 : : the optimization is aimed at speed. */
1392 : 628881 : li.num_insns += estimate_num_insns (stmt, &eni_size_weights);
1393 : : }
1394 : :
1395 : : return true;
1396 : : }
1397 : :
1398 : : /* Analyze all the blocks in the function, looking for useful version checks.
1399 : : Return true if we found one. */
1400 : :
1401 : : bool
1402 : 23966 : loop_versioning::analyze_blocks ()
1403 : : {
1404 : 23966 : AUTO_DUMP_SCOPE ("analyze_blocks",
1405 : : dump_user_location_t::from_function_decl (m_fn->decl));
1406 : :
1407 : : /* For now we don't try to version the whole function, although
1408 : : versioning at that level could be useful in some cases. */
1409 : 23966 : get_loop_info (get_loop (m_fn, 0)).rejected_p = true;
1410 : :
1411 : 138705 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1412 : : {
1413 : 66807 : loop_info &linfo = get_loop_info (loop);
1414 : :
1415 : : /* Ignore cold loops. */
1416 : 66807 : if (!optimize_loop_for_speed_p (loop))
1417 : 1385 : linfo.rejected_p = true;
1418 : :
1419 : : /* See whether an inner loop prevents versioning of this loop. */
1420 : 66807 : if (!linfo.rejected_p)
1421 : 76196 : for (class loop *inner = loop->inner; inner; inner = inner->next)
1422 : 14576 : if (get_loop_info (inner).rejected_p)
1423 : : {
1424 : 3802 : linfo.rejected_p = true;
1425 : 3802 : break;
1426 : : }
1427 : :
1428 : : /* If versioning the loop is still a possibility, examine the
1429 : : statements in the loop to look for versioning opportunities. */
1430 : 66807 : if (!linfo.rejected_p)
1431 : : {
1432 : 61620 : void *start_point = obstack_alloc (&m_obstack, 0);
1433 : :
1434 : 251873 : for (basic_block bb = linfo.block_list; bb;
1435 : 190253 : bb = m_next_block_in_loop[bb->index])
1436 : 205710 : if (!analyze_block (bb))
1437 : : {
1438 : 15457 : linfo.rejected_p = true;
1439 : 15457 : break;
1440 : : }
1441 : :
1442 : 61620 : if (!linfo.rejected_p)
1443 : : {
1444 : : /* Process any queued address fragments, now that we have
1445 : : complete grouping information. */
1446 : : address_info *address;
1447 : : unsigned int i;
1448 : 99068 : FOR_EACH_VEC_ELT (m_address_list, i, address)
1449 : 52905 : analyze_address_fragment (*address);
1450 : : }
1451 : :
1452 : 61620 : m_address_table.empty ();
1453 : 61620 : m_address_list.truncate (0);
1454 : 61620 : obstack_free (&m_obstack, start_point);
1455 : : }
1456 : 23966 : }
1457 : :
1458 : 23966 : return m_num_conditions != 0;
1459 : : }
1460 : :
1461 : : /* Use the ranges in VRS to remove impossible versioning conditions from
1462 : : LOOP. */
1463 : :
1464 : : void
1465 : 3613 : loop_versioning::prune_loop_conditions (class loop *loop)
1466 : : {
1467 : 3613 : loop_info &li = get_loop_info (loop);
1468 : :
1469 : 3613 : int to_remove = -1;
1470 : 3613 : bitmap_iterator bi;
1471 : 3613 : unsigned int i;
1472 : 3613 : int_range_max r;
1473 : 4804 : EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
1474 : : {
1475 : 1191 : tree name = ssa_name (i);
1476 : 1191 : gimple *stmt = first_stmt (loop->header);
1477 : :
1478 : 2382 : if (get_range_query (cfun)->range_of_expr (r, name, stmt)
1479 : 2382 : && !r.contains_p (wi::one (TYPE_PRECISION (TREE_TYPE (name)))))
1480 : : {
1481 : 40 : if (dump_enabled_p ())
1482 : 2 : dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1483 : : "%T can never be 1 in this loop\n", name);
1484 : :
1485 : 40 : if (to_remove >= 0)
1486 : 0 : bitmap_clear_bit (&li.unity_names, to_remove);
1487 : 40 : to_remove = i;
1488 : 40 : m_num_conditions -= 1;
1489 : : }
1490 : : }
1491 : 3613 : if (to_remove >= 0)
1492 : 40 : bitmap_clear_bit (&li.unity_names, to_remove);
1493 : 3613 : }
1494 : :
1495 : : /* Remove any scheduled loop version conditions that will never be true.
1496 : : Return true if any remain. */
1497 : :
1498 : : bool
1499 : 562 : loop_versioning::prune_conditions ()
1500 : : {
1501 : 562 : AUTO_DUMP_SCOPE ("prune_loop_conditions",
1502 : : dump_user_location_t::from_function_decl (m_fn->decl));
1503 : :
1504 : 562 : calculate_dominance_info (CDI_DOMINATORS);
1505 : 562 : lv_dom_walker dom_walker (*this);
1506 : 562 : dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn));
1507 : 562 : return m_num_conditions != 0;
1508 : 562 : }
1509 : :
1510 : : /* Merge the version checks for INNER into immediately-enclosing loop
1511 : : OUTER. */
1512 : :
1513 : : void
1514 : 402 : loop_versioning::merge_loop_info (class loop *outer, class loop *inner)
1515 : : {
1516 : 402 : loop_info &inner_li = get_loop_info (inner);
1517 : 402 : loop_info &outer_li = get_loop_info (outer);
1518 : :
1519 : 402 : if (dump_enabled_p ())
1520 : : {
1521 : 11 : bitmap_iterator bi;
1522 : 11 : unsigned int i;
1523 : 22 : EXECUTE_IF_SET_IN_BITMAP (&inner_li.unity_names, 0, i, bi)
1524 : 11 : if (!bitmap_bit_p (&outer_li.unity_names, i))
1525 : 11 : dump_printf_loc (MSG_NOTE, find_loop_location (inner),
1526 : : "hoisting check that %T == 1 to outer loop\n",
1527 : 11 : ssa_name (i));
1528 : : }
1529 : :
1530 : 402 : bitmap_ior_into (&outer_li.unity_names, &inner_li.unity_names);
1531 : 415 : if (loop_depth (outer_li.outermost) < loop_depth (inner_li.outermost))
1532 : 260 : outer_li.outermost = inner_li.outermost;
1533 : 402 : }
1534 : :
1535 : : /* Add LOOP to the queue of loops to version. */
1536 : :
1537 : : void
1538 : 962 : loop_versioning::add_loop_to_queue (class loop *loop)
1539 : : {
1540 : 962 : loop_info &li = get_loop_info (loop);
1541 : :
1542 : 962 : if (dump_enabled_p ())
1543 : 68 : dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1544 : : "queuing this loop for versioning\n");
1545 : 962 : m_loops_to_version.safe_push (loop);
1546 : :
1547 : : /* Don't try to version superloops. */
1548 : 962 : li.rejected_p = true;
1549 : 962 : }
1550 : :
1551 : : /* Decide whether the cost model would allow us to version LOOP,
1552 : : either directly or as part of a parent loop, and return true if so.
1553 : : This does not imply that the loop is actually worth versioning in its
1554 : : own right, just that it would be valid to version it if something
1555 : : benefited.
1556 : :
1557 : : We have already made this decision for all inner loops of LOOP. */
1558 : :
1559 : : bool
1560 : 3002 : loop_versioning::decide_whether_loop_is_versionable (class loop *loop)
1561 : : {
1562 : 3002 : loop_info &li = get_loop_info (loop);
1563 : :
1564 : 3002 : if (li.rejected_p)
1565 : : return false;
1566 : :
1567 : : /* Examine the decisions made for inner loops. */
1568 : 2755 : for (class loop *inner = loop->inner; inner; inner = inner->next)
1569 : : {
1570 : 514 : loop_info &inner_li = get_loop_info (inner);
1571 : 514 : if (inner_li.rejected_p)
1572 : : {
1573 : 6 : if (dump_enabled_p ())
1574 : 0 : dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1575 : : "not versioning this loop because one of its"
1576 : : " inner loops should not be versioned\n");
1577 : 6 : return false;
1578 : : }
1579 : :
1580 : 508 : if (inner_li.worth_versioning_p ())
1581 : 299 : li.subloops_benefit_p = true;
1582 : :
1583 : : /* Accumulate the number of instructions from subloops that are not
1584 : : the innermost, or that don't benefit from versioning. Only the
1585 : : instructions from innermost loops that benefit from versioning
1586 : : should be weighed against loop-versioning-max-inner-insns;
1587 : : everything else should be weighed against
1588 : : loop-versioning-max-outer-insns. */
1589 : 508 : if (!inner_li.worth_versioning_p () || inner->inner)
1590 : : {
1591 : 236 : if (dump_enabled_p ())
1592 : 0 : dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1593 : : "counting %d instructions from this loop"
1594 : : " against its parent loop\n", inner_li.num_insns);
1595 : 236 : li.num_insns += inner_li.num_insns;
1596 : : }
1597 : : }
1598 : :
1599 : : /* Enforce the size limits. */
1600 : 3516 : if (li.worth_versioning_p ())
1601 : : {
1602 : 1237 : unsigned int max_num_insns = max_insns_for_loop (loop);
1603 : 1237 : if (dump_enabled_p ())
1604 : 79 : dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1605 : : "this loop has %d instructions, against"
1606 : : " a versioning limit of %d\n",
1607 : : li.num_insns, max_num_insns);
1608 : 1237 : if (li.num_insns > max_num_insns)
1609 : : {
1610 : 10 : if (dump_enabled_p ())
1611 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION
1612 : : | MSG_PRIORITY_USER_FACING,
1613 : 0 : find_loop_location (loop),
1614 : : "this loop is too big to version");
1615 : 10 : return false;
1616 : : }
1617 : : }
1618 : :
1619 : : /* Hoist all version checks from subloops to this loop. */
1620 : 2633 : for (class loop *subloop = loop->inner; subloop; subloop = subloop->next)
1621 : 402 : merge_loop_info (loop, subloop);
1622 : :
1623 : : return true;
1624 : : }
1625 : :
1626 : : /* Decide which loops to version and add them to the versioning queue.
1627 : : Return true if there are any loops to version. */
1628 : :
1629 : : bool
1630 : 551 : loop_versioning::make_versioning_decisions ()
1631 : : {
1632 : 551 : AUTO_DUMP_SCOPE ("make_versioning_decisions",
1633 : : dump_user_location_t::from_function_decl (m_fn->decl));
1634 : :
1635 : 4655 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1636 : : {
1637 : 3002 : loop_info &linfo = get_loop_info (loop);
1638 : 3002 : if (decide_whether_loop_is_versionable (loop))
1639 : : {
1640 : : /* Commit to versioning LOOP directly if we can't hoist the
1641 : : version checks any further. */
1642 : 6460 : if (linfo.worth_versioning_p ()
1643 : 2231 : && (loop_depth (loop) == 1 || linfo.outermost == loop))
1644 : 896 : add_loop_to_queue (loop);
1645 : : }
1646 : : else
1647 : : {
1648 : : /* We can't version this loop, so individually version any
1649 : : subloops that would benefit and haven't been versioned yet. */
1650 : 771 : linfo.rejected_p = true;
1651 : 1405 : for (class loop *subloop = loop->inner; subloop;
1652 : 634 : subloop = subloop->next)
1653 : 832 : if (get_loop_info (subloop).worth_versioning_p ())
1654 : 66 : add_loop_to_queue (subloop);
1655 : : }
1656 : 551 : }
1657 : :
1658 : 1102 : return !m_loops_to_version.is_empty ();
1659 : : }
1660 : :
1661 : : /* Attempt to implement loop versioning for LOOP, using the information
1662 : : cached in the associated loop_info. Return true on success. */
1663 : :
1664 : : bool
1665 : 962 : loop_versioning::version_loop (class loop *loop)
1666 : : {
1667 : 962 : loop_info &li = get_loop_info (loop);
1668 : :
1669 : : /* Build up a condition that selects the original loop instead of
1670 : : the simplified loop. */
1671 : 962 : tree cond = boolean_false_node;
1672 : 962 : bitmap_iterator bi;
1673 : 962 : unsigned int i;
1674 : 2109 : EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
1675 : : {
1676 : 1147 : tree name = ssa_name (i);
1677 : 1147 : tree ne_one = fold_build2 (NE_EXPR, boolean_type_node, name,
1678 : : build_one_cst (TREE_TYPE (name)));
1679 : 1147 : cond = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, cond, ne_one);
1680 : : }
1681 : :
1682 : : /* Convert the condition into a suitable gcond. */
1683 : 962 : gimple_seq stmts = NULL;
1684 : 962 : cond = force_gimple_operand_1 (cond, &stmts, is_gimple_condexpr_for_cond,
1685 : : NULL_TREE);
1686 : :
1687 : : /* Version the loop. */
1688 : 962 : initialize_original_copy_tables ();
1689 : 962 : basic_block cond_bb;
1690 : 962 : li.optimized_loop = loop_version (loop, cond, &cond_bb,
1691 : : profile_probability::unlikely (),
1692 : : profile_probability::likely (),
1693 : : profile_probability::unlikely (),
1694 : : profile_probability::likely (), true);
1695 : 962 : free_original_copy_tables ();
1696 : 962 : if (!li.optimized_loop)
1697 : : {
1698 : 2 : if (dump_enabled_p ())
1699 : 0 : dump_printf_loc (MSG_MISSED_OPTIMIZATION, find_loop_location (loop),
1700 : : "tried but failed to version this loop for when"
1701 : : " certain strides are 1\n");
1702 : 2 : return false;
1703 : : }
1704 : :
1705 : 960 : if (dump_enabled_p ())
1706 : 68 : dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, find_loop_location (loop),
1707 : : "versioned this loop for when certain strides are 1\n");
1708 : :
1709 : : /* Insert the statements that feed COND. */
1710 : 960 : if (stmts)
1711 : : {
1712 : 111 : gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
1713 : 111 : gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1714 : : }
1715 : :
1716 : : return true;
1717 : : }
1718 : :
1719 : : /* Attempt to version all loops in the versioning queue. */
1720 : :
1721 : : void
1722 : 551 : loop_versioning::implement_versioning_decisions ()
1723 : : {
1724 : : /* No AUTO_DUMP_SCOPE here since all messages are top-level and
1725 : : user-facing at this point. */
1726 : :
1727 : 551 : bool any_succeeded_p = false;
1728 : 551 : class loop *loop;
1729 : 551 : unsigned int i;
1730 : 1513 : FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
1731 : 962 : if (version_loop (loop))
1732 : 960 : any_succeeded_p = true;
1733 : 551 : if (!any_succeeded_p)
1734 : 551 : return;
1735 : :
1736 : 549 : update_ssa (TODO_update_ssa);
1737 : :
1738 : : /* Simplify the new loop, which is used when COND is false. */
1739 : 1509 : FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
1740 : : {
1741 : 960 : loop_info &linfo = get_loop_info (loop);
1742 : 960 : if (linfo.optimized_loop)
1743 : 960 : name_prop (linfo).substitute_and_fold (linfo.optimized_loop->header);
1744 : : }
1745 : : }
1746 : :
1747 : : /* Run the pass and return a set of TODO_* flags. */
1748 : :
1749 : : unsigned int
1750 : 23966 : loop_versioning::run ()
1751 : : {
1752 : 23966 : gcc_assert (scev_initialized_p ());
1753 : :
1754 : 23966 : if (analyze_blocks ()
1755 : 562 : && prune_conditions ()
1756 : 24517 : && make_versioning_decisions ())
1757 : 551 : implement_versioning_decisions ();
1758 : :
1759 : 23966 : return 0;
1760 : : }
1761 : :
1762 : : /* Loop versioning pass. */
1763 : :
1764 : : const pass_data pass_data_loop_versioning =
1765 : : {
1766 : : GIMPLE_PASS, /* type */
1767 : : "lversion", /* name */
1768 : : OPTGROUP_LOOP, /* optinfo_flags */
1769 : : TV_LOOP_VERSIONING, /* tv_id */
1770 : : PROP_cfg, /* properties_required */
1771 : : 0, /* properties_provided */
1772 : : 0, /* properties_destroyed */
1773 : : 0, /* todo_flags_start */
1774 : : 0, /* todo_flags_finish */
1775 : : };
1776 : :
1777 : : class pass_loop_versioning : public gimple_opt_pass
1778 : : {
1779 : : public:
1780 : 281914 : pass_loop_versioning (gcc::context *ctxt)
1781 : 563828 : : gimple_opt_pass (pass_data_loop_versioning, ctxt)
1782 : : {}
1783 : :
1784 : : /* opt_pass methods: */
1785 : 219536 : bool gate (function *) final override
1786 : : {
1787 : 219536 : return flag_version_loops_for_strides;
1788 : : }
1789 : : unsigned int execute (function *) final override;
1790 : : };
1791 : :
1792 : : unsigned int
1793 : 23966 : pass_loop_versioning::execute (function *fn)
1794 : : {
1795 : 47932 : if (number_of_loops (fn) <= 1)
1796 : : return 0;
1797 : :
1798 : 23966 : enable_ranger (fn);
1799 : 23966 : unsigned int ret = loop_versioning (fn).run ();
1800 : 23966 : disable_ranger (fn);
1801 : 23966 : return ret;
1802 : : }
1803 : :
1804 : : } // anon namespace
1805 : :
1806 : : gimple_opt_pass *
1807 : 281914 : make_pass_loop_versioning (gcc::context *ctxt)
1808 : : {
1809 : 281914 : return new pass_loop_versioning (ctxt);
1810 : : }
|