LCOV - code coverage report
Current view: top level - gcc - gimple-loop-versioning.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 96.9 % 556 539
Test Date: 2026-02-28 14:20:25 Functions: 100.0 % 43 43
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Loop versioning pass.
       2              :    Copyright (C) 2018-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify it
       7              : under the terms of the GNU General Public License as published by the
       8              : Free Software Foundation; either version 3, or (at your option) any
       9              : later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful, but WITHOUT
      12              : ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "backend.h"
      24              : #include "tree.h"
      25              : #include "gimple.h"
      26              : #include "gimple-iterator.h"
      27              : #include "tree-pass.h"
      28              : #include "gimplify-me.h"
      29              : #include "cfgloop.h"
      30              : #include "tree-ssa-loop.h"
      31              : #include "ssa.h"
      32              : #include "tree-scalar-evolution.h"
      33              : #include "tree-ssa-loop-ivopts.h"
      34              : #include "fold-const.h"
      35              : #include "tree-ssa-propagate.h"
      36              : #include "tree-inline.h"
      37              : #include "domwalk.h"
      38              : #include "tree-vectorizer.h"
      39              : #include "omp-general.h"
      40              : #include "predict.h"
      41              : #include "tree-into-ssa.h"
      42              : #include "gimple-range.h"
      43              : #include "tree-cfg.h"
      44              : #include "hierarchical_discriminator.h"
      45              : 
      46              : namespace {
      47              : 
      48              : /* This pass looks for loops that could be simplified if certain loop
      49              :    invariant conditions were true.  It is effectively a form of loop
      50              :    splitting in which the pass produces the split conditions itself,
      51              :    instead of using ones that are already present in the IL.
      52              : 
      53              :    Versioning for when strides are 1
      54              :    ---------------------------------
      55              : 
      56              :    At the moment the only thing the pass looks for are memory references
      57              :    like:
      58              : 
      59              :      for (auto i : ...)
      60              :        ...x[i * stride]...
      61              : 
      62              :    It considers changing such loops to:
      63              : 
      64              :      if (stride == 1)
      65              :        for (auto i : ...)    [A]
      66              :          ...x[i]...
      67              :      else
      68              :        for (auto i : ...)    [B]
      69              :          ...x[i * stride]...
      70              : 
      71              :    This can have several benefits:
      72              : 
      73              :    (1) [A] is often easier or cheaper to vectorize than [B].
      74              : 
      75              :    (2) The scalar code in [A] is simpler than the scalar code in [B]
      76              :        (if the loops cannot be vectorized or need an epilogue loop).
      77              : 
      78              :    (3) We might recognize [A] as a pattern, such as a memcpy or memset.
      79              : 
      80              :    (4) [A] has simpler address evolutions, which can help other passes
      81              :        like loop interchange.
      82              : 
      83              :    The optimization is particularly useful for assumed-shape arrays in
      84              :    Fortran, where the stride of the innermost dimension depends on the
      85              :    array descriptor but is often equal to 1 in practice.  For example:
      86              : 
      87              :      subroutine f1(x)
      88              :        real :: x(:)
      89              :        x(:) = 100
      90              :      end subroutine f1
      91              : 
      92              :    generates the equivalent of:
      93              : 
      94              :      raw_stride = *x.dim[0].stride;
      95              :      stride = raw_stride != 0 ? raw_stride : 1;
      96              :      x_base = *x.data;
      97              :      ...
      98              :      tmp1 = stride * S;
      99              :      tmp2 = tmp1 - stride;
     100              :      *x_base[tmp2] = 1.0e+2;
     101              : 
     102              :    but in the common case that stride == 1, the last three statements
     103              :    simplify to:
     104              : 
     105              :      tmp3 = S + -1;
     106              :      *x_base[tmp3] = 1.0e+2;
     107              : 
     108              :    The optimization is in principle very simple.  The difficult parts are:
     109              : 
     110              :    (a) deciding which parts of a general address calculation correspond
     111              :        to the inner dimension of an array, since this usually isn't explicit
     112              :        in the IL, and for C often isn't even explicit in the source code
     113              : 
     114              :    (b) estimating when the transformation is worthwhile
     115              : 
     116              :    Structure
     117              :    ---------
     118              : 
     119              :    The pass has four phases:
     120              : 
     121              :    (1) Walk through the statements looking for and recording potential
     122              :        versioning opportunities.  Stop if there are none.
     123              : 
     124              :    (2) Use context-sensitive range information to see whether any versioning
     125              :        conditions are impossible in practice.  Remove them if so, and stop
     126              :        if no opportunities remain.
     127              : 
     128              :        (We do this only after (1) to keep compile time down when no
     129              :        versioning opportunities exist.)
     130              : 
     131              :    (3) Apply the cost model.  Decide which versioning opportunities are
     132              :        worthwhile and at which nesting level they should be applied.
     133              : 
     134              :    (4) Attempt to version all the loops selected by (3), so that:
     135              : 
     136              :          for (...)
     137              :            ...
     138              : 
     139              :        becomes:
     140              : 
     141              :          if (!cond)
     142              :            for (...) // Original loop
     143              :              ...
     144              :          else
     145              :            for (...) // New loop
     146              :              ...
     147              : 
     148              :        Use the version condition COND to simplify the new loop.  */
     149              : 
     150              : /* Enumerates the likelihood that a particular value indexes the inner
     151              :    dimension of an array.  */
     152              : enum inner_likelihood {
     153              :   INNER_UNLIKELY,
     154              :   INNER_DONT_KNOW,
     155              :   INNER_LIKELY
     156              : };
     157              : 
     158              : /* Information about one term of an address_info.  */
     159              : struct address_term_info
     160              : {
     161              :   /* The value of the term is EXPR * MULTIPLIER.  */
     162              :   tree expr;
     163              :   unsigned HOST_WIDE_INT multiplier;
     164              : 
     165              :   /* The stride applied by EXPR in each iteration of some unrecorded loop,
     166              :      or null if no stride has been identified.  */
     167              :   tree stride;
     168              : 
     169              :   /* Enumerates the likelihood that EXPR indexes the inner dimension
     170              :      of an array.  */
     171              :   enum inner_likelihood inner_likelihood;
     172              : 
     173              :   /* True if STRIDE == 1 is a versioning opportunity when considered
     174              :      in isolation.  */
     175              :   bool versioning_opportunity_p;
     176              : };
     177              : 
     178              : /* Information about an address calculation, and the range of constant
     179              :    offsets applied to it.  */
     180       147889 : class address_info
     181              : {
     182              : public:
     183              :   static const unsigned int MAX_TERMS = 8;
     184              : 
     185              :   /* One statement that calculates the address.  If multiple statements
     186              :      share the same address, we only record the first.  */
     187              :   gimple *stmt;
     188              : 
     189              :   /* The loop containing STMT (cached for convenience).  If multiple
     190              :      statements share the same address, they all belong to this loop.  */
     191              :   class loop *loop;
     192              : 
     193              :   /* A decomposition of the calculation into a sum of terms plus an
     194              :      optional base.  When BASE is provided, it is never an SSA name.
     195              :      Once initialization is complete, all members of TERMs are SSA names.  */
     196              :   tree base;
     197              :   auto_vec<address_term_info, MAX_TERMS> terms;
     198              : 
     199              :   /* All bytes accessed from the address fall in the offset range
     200              :      [MIN_OFFSET, MAX_OFFSET).  */
     201              :   HOST_WIDE_INT min_offset, max_offset;
     202              : };
     203              : 
     204              : /* Stores addresses based on their base and terms (ignoring the offsets).  */
     205              : struct address_info_hasher : nofree_ptr_hash <address_info>
     206              : {
     207              :   static hashval_t hash (const address_info *);
     208              :   static bool equal (const address_info *, const address_info *);
     209              : };
     210              : 
     211              : /* Information about the versioning we'd like to apply to a loop.  */
     212       152684 : class loop_info
     213              : {
     214              : public:
     215              :   bool worth_versioning_p () const;
     216              : 
     217              :   /* True if we've decided not to version this loop.  The remaining
     218              :      fields are meaningless if so.  */
     219              :   bool rejected_p;
     220              : 
     221              :   /* True if at least one subloop of this loop benefits from versioning.  */
     222              :   bool subloops_benefit_p;
     223              : 
     224              :   /* An estimate of the total number of instructions in the loop,
     225              :      excluding those in subloops that benefit from versioning.  */
     226              :   unsigned int num_insns;
     227              : 
     228              :   /* The outermost loop that can handle all the version checks
     229              :      described below.  */
     230              :   class loop *outermost;
     231              : 
     232              :   /* The first entry in the list of blocks that belong to this loop
     233              :      (and not to subloops).  m_next_block_in_loop provides the chain
     234              :      pointers for the list.  */
     235              :   basic_block block_list;
     236              : 
     237              :   /* We'd like to version the loop for the case in which these SSA names
     238              :      (keyed off their SSA_NAME_VERSION) are all equal to 1 at runtime.  */
     239              :   bitmap_head unity_names;
     240              : 
     241              :   /* If versioning succeeds, this points the version of the loop that
     242              :      assumes the version conditions holds.  */
     243              :   class loop *optimized_loop;
     244              : };
     245              : 
     246              : /* The main pass structure.  */
     247              : class loop_versioning
     248              : {
     249              : public:
     250              :   loop_versioning (function *);
     251              :   ~loop_versioning ();
     252              :   unsigned int run ();
     253              : 
     254              : private:
     255              :   /* Used to walk the dominator tree to find loop versioning conditions
     256              :      that are always false.  */
     257         1414 :   class lv_dom_walker : public dom_walker
     258              :   {
     259              :   public:
     260              :     lv_dom_walker (loop_versioning &);
     261              : 
     262              :     edge before_dom_children (basic_block) final override;
     263              : 
     264              :   private:
     265              :     /* The parent pass.  */
     266              :     loop_versioning &m_lv;
     267              :   };
     268              : 
     269              :   /* Used to simplify statements based on conditions that are established
     270              :      by the version checks.  */
     271         2384 :   class name_prop : public substitute_and_fold_engine
     272              :   {
     273              :   public:
     274         2384 :     name_prop (loop_info &li) : m_li (li) {}
     275              :     tree value_of_expr (tree name, gimple *) final override;
     276              : 
     277              :   private:
     278              :     /* Information about the versioning we've performed on the loop.  */
     279              :     loop_info &m_li;
     280              :   };
     281              : 
     282      2292126 :   loop_info &get_loop_info (class loop *loop) { return m_loops[loop->num]; }
     283              : 
     284              :   unsigned int max_insns_for_loop (class loop *);
     285              :   bool expensive_stmt_p (gimple *);
     286              : 
     287              :   void version_for_unity (gimple *, tree);
     288              :   bool acceptable_multiplier_p (tree, unsigned HOST_WIDE_INT,
     289              :                                 unsigned HOST_WIDE_INT * = 0);
     290              :   bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *);
     291              :   bool multiply_term_by (address_term_info &, tree);
     292              :   inner_likelihood get_inner_likelihood (tree, unsigned HOST_WIDE_INT);
     293              :   void dump_inner_likelihood (address_info &, address_term_info &);
     294              :   void analyze_stride (address_info &, address_term_info &,
     295              :                        tree, class loop *);
     296              :   bool find_per_loop_multiplication (address_info &, address_term_info &);
     297              :   bool analyze_term_using_scevs (address_info &, address_term_info &);
     298              :   void analyze_arbitrary_term (address_info &, address_term_info &);
     299              :   void analyze_address_fragment (address_info &);
     300              :   void record_address_fragment (gimple *, unsigned HOST_WIDE_INT,
     301              :                                 tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT);
     302              :   void analyze_expr (gimple *, tree);
     303              :   bool analyze_block (basic_block);
     304              :   bool analyze_blocks ();
     305              : 
     306              :   void prune_loop_conditions (class loop *);
     307              :   bool prune_conditions ();
     308              : 
     309              :   void merge_loop_info (class loop *, class loop *);
     310              :   void add_loop_to_queue (class loop *);
     311              :   bool decide_whether_loop_is_versionable (class loop *);
     312              :   bool make_versioning_decisions ();
     313              : 
     314              :   bool version_loop (class loop *);
     315              :   void implement_versioning_decisions ();
     316              : 
     317              :   /* The function we're optimizing.  */
     318              :   function *m_fn;
     319              : 
     320              :   /* The obstack to use for all pass-specific bitmaps.  */
     321              :   bitmap_obstack m_bitmap_obstack;
     322              : 
     323              :   /* An obstack to use for general allocation.  */
     324              :   obstack m_obstack;
     325              : 
     326              :   /* The total number of loop version conditions we've found.  */
     327              :   unsigned int m_num_conditions;
     328              : 
     329              :   /* Assume that an address fragment of the form i * stride * scale
     330              :      (for variable stride and constant scale) will not benefit from
     331              :      versioning for stride == 1 when scale is greater than this value.  */
     332              :   unsigned HOST_WIDE_INT m_maximum_scale;
     333              : 
     334              :   /* Information about each loop.  */
     335              :   auto_vec<loop_info> m_loops;
     336              : 
     337              :   /* Used to form a linked list of blocks that belong to a loop,
     338              :      started by loop_info::block_list.  */
     339              :   auto_vec<basic_block> m_next_block_in_loop;
     340              : 
     341              :   /* The list of loops that we've decided to version.  */
     342              :   auto_vec<class loop *> m_loops_to_version;
     343              : 
     344              :   /* A table of addresses in the current loop, keyed off their values
     345              :      but not their offsets.  */
     346              :   hash_table <address_info_hasher> m_address_table;
     347              : 
     348              :   /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order.  */
     349              :   auto_vec <address_info *, 32> m_address_list;
     350              : };
     351              : 
     352              : /* If EXPR is an SSA name and not a default definition, return the
     353              :    defining statement, otherwise return null.  */
     354              : 
     355              : static gimple *
     356      1269655 : maybe_get_stmt (tree expr)
     357              : {
     358      1094724 :   if (TREE_CODE (expr) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (expr))
     359            0 :     return SSA_NAME_DEF_STMT (expr);
     360              :   return NULL;
     361              : }
     362              : 
     363              : /* Like maybe_get_stmt, but also return null if the defining
     364              :    statement isn't an assignment.  */
     365              : 
     366              : static gassign *
     367      1134879 : maybe_get_assign (tree expr)
     368              : {
     369      2415013 :   return safe_dyn_cast <gassign *> (maybe_get_stmt (expr));
     370              : }
     371              : 
     372              : /* Return true if this pass should look through a cast of expression FROM
     373              :    to type TYPE when analyzing pieces of an address.  */
     374              : 
     375              : static bool
     376        64599 : look_through_cast_p (tree type, tree from)
     377              : {
     378        64599 :   return (INTEGRAL_TYPE_P (TREE_TYPE (from)) == INTEGRAL_TYPE_P (type)
     379        64599 :           && POINTER_TYPE_P (TREE_TYPE (from)) == POINTER_TYPE_P (type));
     380              : }
     381              : 
     382              : /* Strip all conversions of integers or pointers from EXPR, regardless
     383              :    of whether the conversions are nops.  This is useful in the context
     384              :    of this pass because we're not trying to fold or simulate the
     385              :    expression; we just want to see how it's structured.  */
     386              : 
     387              : static tree
     388       452095 : strip_casts (tree expr)
     389              : {
     390       452095 :   const unsigned int MAX_NITERS = 4;
     391              : 
     392       452095 :   tree type = TREE_TYPE (expr);
     393       452095 :   while (CONVERT_EXPR_P (expr)
     394       452415 :          && look_through_cast_p (type, TREE_OPERAND (expr, 0)))
     395          320 :     expr = TREE_OPERAND (expr, 0);
     396              : 
     397       515142 :   for (unsigned int niters = 0; niters < MAX_NITERS; ++niters)
     398              :     {
     399       515142 :       gassign *assign = maybe_get_assign (expr);
     400       515142 :       if (assign
     401       253876 :           && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))
     402       579421 :           && look_through_cast_p (type, gimple_assign_rhs1 (assign)))
     403        63047 :         expr = gimple_assign_rhs1 (assign);
     404              :       else
     405              :         break;
     406              :     }
     407       452095 :   return expr;
     408              : }
     409              : 
     410              : /* Compare two address_term_infos in the same address_info.  */
     411              : 
     412              : static int
     413       324913 : compare_address_terms (const void *a_uncast, const void *b_uncast)
     414              : {
     415       324913 :   const address_term_info *a = (const address_term_info *) a_uncast;
     416       324913 :   const address_term_info *b = (const address_term_info *) b_uncast;
     417              : 
     418       324913 :   if (a->expr != b->expr)
     419       324581 :     return SSA_NAME_VERSION (a->expr) < SSA_NAME_VERSION (b->expr) ? -1 : 1;
     420              : 
     421          332 :   if (a->multiplier != b->multiplier)
     422          266 :     return a->multiplier < b->multiplier ? -1 : 1;
     423              : 
     424              :   return 0;
     425              : }
     426              : 
     427              : /* Dump ADDRESS using flags FLAGS.  */
     428              : 
     429              : static void
     430          424 : dump_address_info (dump_flags_t flags, address_info &address)
     431              : {
     432          424 :   if (address.base)
     433            2 :     dump_printf (flags, "%T + ", address.base);
     434         1186 :   for (unsigned int i = 0; i < address.terms.length (); ++i)
     435              :     {
     436          762 :       if (i != 0)
     437          338 :         dump_printf (flags, " + ");
     438          762 :       dump_printf (flags, "%T", address.terms[i].expr);
     439          762 :       if (address.terms[i].multiplier != 1)
     440          530 :         dump_printf (flags, " * %wd", address.terms[i].multiplier);
     441              :     }
     442          848 :   dump_printf (flags, " + [%wd, %wd]",
     443          424 :                address.min_offset, address.max_offset - 1);
     444          424 : }
     445              : 
     446              : /* Hash an address_info based on its base and terms.  */
     447              : 
     448              : hashval_t
     449       208055 : address_info_hasher::hash (const address_info *info)
     450              : {
     451       208055 :   inchash::hash hash;
     452       208055 :   hash.add_int (info->base ? TREE_CODE (info->base) : 0);
     453       416110 :   hash.add_int (info->terms.length ());
     454       520162 :   for (unsigned int i = 0; i < info->terms.length (); ++i)
     455              :     {
     456       312107 :       hash.add_int (SSA_NAME_VERSION (info->terms[i].expr));
     457       312107 :       hash.add_hwi (info->terms[i].multiplier);
     458              :     }
     459       208055 :   return hash.end ();
     460              : }
     461              : 
     462              : /* Return true if two address_infos have equal bases and terms.  Other
     463              :    properties might be different (such as the statement or constant
     464              :    offset range).  */
     465              : 
     466              : bool
     467       113918 : address_info_hasher::equal (const address_info *a, const address_info *b)
     468              : {
     469       113918 :   if (a->base != b->base
     470       113918 :       && (!a->base || !b->base || !operand_equal_p (a->base, b->base, 0)))
     471         3084 :     return false;
     472              : 
     473       332502 :   if (a->terms.length () != b->terms.length ())
     474              :     return false;
     475              : 
     476       218518 :   for (unsigned int i = 0; i < a->terms.length (); ++i)
     477       140687 :     if (a->terms[i].expr != b->terms[i].expr
     478       140687 :         || a->terms[i].multiplier != b->terms[i].multiplier)
     479              :       return false;
     480              : 
     481              :   return true;
     482              : }
     483              : 
     484              : /* Return true if we want to version the loop, i.e. if we have a
     485              :    specific reason for doing so and no specific reason not to.  */
     486              : 
     487              : bool
     488         7506 : loop_info::worth_versioning_p () const
     489              : {
     490         7506 :   return (!rejected_p
     491         5758 :           && (!bitmap_empty_p (&unity_names) || subloops_benefit_p));
     492              : }
     493              : 
     494          707 : loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
     495            0 :   : dom_walker (CDI_DOMINATORS), m_lv (lv)
     496              : {
     497            0 : }
     498              : 
     499              : /* Process BB before processing the blocks it dominates.  */
     500              : 
     501              : edge
     502        31704 : loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
     503              : {
     504        31704 :   if (bb == bb->loop_father->header)
     505         4407 :     m_lv.prune_loop_conditions (bb->loop_father);
     506              : 
     507        31704 :   return NULL;
     508              : }
     509              : 
     510              : /* Decide whether to replace VAL with a new value in a versioned loop.
     511              :    Return the new value if so, otherwise return null.  */
     512              : 
     513              : tree
     514        69670 : loop_versioning::name_prop::value_of_expr (tree val, gimple *)
     515              : {
     516        69670 :   if (TREE_CODE (val) == SSA_NAME
     517        69670 :       && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val)))
     518         1441 :     return build_one_cst (TREE_TYPE (val));
     519              :   return NULL_TREE;
     520              : }
     521              : 
     522              : /* Initialize the structure to optimize FN.  */
     523              : 
     524        27928 : loop_versioning::loop_versioning (function *fn)
     525        27928 :   : m_fn (fn),
     526        27928 :     m_num_conditions (0),
     527        27928 :     m_address_table (31)
     528              : {
     529        27928 :   unsigned m_nloops = number_of_loops (fn);
     530        27928 :   bitmap_obstack_initialize (&m_bitmap_obstack);
     531        27928 :   gcc_obstack_init (&m_obstack);
     532              : 
     533              :   /* Initialize the loop information.  */
     534        27928 :   m_loops.safe_grow_cleared (m_nloops, true);
     535       180612 :   for (unsigned int i = 0; i < m_nloops; ++i)
     536              :     {
     537       152684 :       m_loops[i].outermost = get_loop (m_fn, 0);
     538       152684 :       bitmap_initialize (&m_loops[i].unity_names, &m_bitmap_obstack);
     539              :     }
     540              : 
     541              :   /* Initialize the list of blocks that belong to each loop.  */
     542        27928 :   unsigned int nbbs = last_basic_block_for_fn (fn);
     543        27928 :   m_next_block_in_loop.safe_grow (nbbs, true);
     544        27928 :   basic_block bb;
     545       746420 :   FOR_EACH_BB_FN (bb, fn)
     546              :     {
     547       718492 :       loop_info &li = get_loop_info (bb->loop_father);
     548       718492 :       m_next_block_in_loop[bb->index] = li.block_list;
     549       718492 :       li.block_list = bb;
     550              :     }
     551              : 
     552              :   /* MAX_FIXED_MODE_SIZE should be a reasonable maximum scale for
     553              :      unvectorizable code, since it is the largest size that can be
     554              :      handled efficiently by scalar code.  omp_max_vf calculates the
     555              :      maximum number of bytes in a vector, when such a value is relevant
     556              :      to loop optimization.  */
     557        27928 :   m_maximum_scale = estimated_poly_value (omp_max_vf (false));
     558        83784 :   m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
     559        27928 : }
     560              : 
     561        27928 : loop_versioning::~loop_versioning ()
     562              : {
     563        27928 :   bitmap_obstack_release (&m_bitmap_obstack);
     564        27928 :   obstack_free (&m_obstack, NULL);
     565        27928 : }
     566              : 
     567              : /* Return the maximum number of instructions allowed in LOOP before
     568              :    it becomes too big for versioning.
     569              : 
     570              :    There are separate limits for inner and outer loops.  The limit for
     571              :    inner loops applies only to loops that benefit directly from versioning.
     572              :    The limit for outer loops applies to all code in the outer loop and
     573              :    its subloops that *doesn't* benefit directly from versioning; such code
     574              :    would be "taken along for the ride".  The idea is that if the cost of
     575              :    the latter is small, it is better to version outer loops rather than
     576              :    inner loops, both to reduce the number of repeated checks and to enable
     577              :    more of the loop nest to be optimized as a natural nest (e.g. by loop
     578              :    interchange or outer-loop vectorization).  */
     579              : 
     580              : unsigned int
     581         1581 : loop_versioning::max_insns_for_loop (class loop *loop)
     582              : {
     583         1581 :   return (loop->inner
     584          383 :           ? param_loop_versioning_max_outer_insns
     585         1198 :           : param_loop_versioning_max_inner_insns);
     586              : }
     587              : 
     588              : /* Return true if for cost reasons we should avoid versioning any loop
     589              :    that contains STMT.
     590              : 
     591              :    Note that we don't need to check whether versioning is invalid for
     592              :    correctness reasons, since the versioning process does that for us.
     593              :    The conditions involved are too rare to be worth duplicating here.  */
     594              : 
     595              : bool
     596       809394 : loop_versioning::expensive_stmt_p (gimple *stmt)
     597              : {
     598            0 :   if (gcall *call = dyn_cast <gcall *> (stmt))
     599              :     /* Assume for now that the time spent in an "expensive" call would
     600              :        overwhelm any saving from versioning.  */
     601        22976 :     return !gimple_inexpensive_call_p (call);
     602              :   return false;
     603              : }
     604              : 
     605              : /* Record that we want to version the loop that contains STMT for the
     606              :    case in which SSA name NAME is equal to 1.  We already know that NAME
     607              :    is invariant in the loop.  */
     608              : 
     609              : void
     610         2273 : loop_versioning::version_for_unity (gimple *stmt, tree name)
     611              : {
     612         2273 :   class loop *loop = loop_containing_stmt (stmt);
     613         2273 :   loop_info &li = get_loop_info (loop);
     614              : 
     615         2273 :   if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name)))
     616              :     {
     617              :       /* This is the first time we've wanted to version LOOP for NAME.
     618              :          Keep track of the outermost loop that can handle all versioning
     619              :          checks in LI.  */
     620         1567 :       class loop *outermost
     621         1567 :         = outermost_invariant_loop_for_expr (loop, name);
     622         1900 :       if (loop_depth (li.outermost) < loop_depth (outermost))
     623         1174 :         li.outermost = outermost;
     624              : 
     625         1567 :       if (dump_enabled_p ())
     626              :         {
     627           71 :           dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop"
     628              :                            " for when %T == 1", name);
     629           71 :           if (outermost == loop)
     630           23 :             dump_printf (MSG_NOTE, "; cannot hoist check further");
     631              :           else
     632              :             {
     633           58 :               dump_printf (MSG_NOTE, "; could implement the check at loop"
     634              :                            " depth %d", loop_depth (outermost));
     635           68 :               if (loop_depth (li.outermost) > loop_depth (outermost))
     636            0 :                 dump_printf (MSG_NOTE, ", but other checks only allow"
     637            0 :                              " a depth of %d", loop_depth (li.outermost));
     638              :             }
     639           71 :           dump_printf (MSG_NOTE, "\n");
     640              :         }
     641              : 
     642         1567 :       m_num_conditions += 1;
     643              :     }
     644              :   else
     645              :     {
     646              :       /* This is a duplicate request.  */
     647          706 :       if (dump_enabled_p ())
     648            6 :         dump_printf_loc (MSG_NOTE, stmt, "already asked to version containing"
     649              :                          " loop for when %T == 1\n", name);
     650              :     }
     651         2273 : }
     652              : 
     653              : /* Return true if OP1_TREE is constant and if in principle it is worth
     654              :    versioning an address fragment of the form:
     655              : 
     656              :      i * OP1_TREE * OP2 * stride
     657              : 
     658              :    for the case in which stride == 1.  This in practice means testing
     659              :    whether:
     660              : 
     661              :      OP1_TREE * OP2 <= M_MAXIMUM_SCALE.
     662              : 
     663              :    If RESULT is nonnull, store OP1_TREE * OP2 there when returning true.  */
     664              : 
     665              : bool
     666       340521 : loop_versioning::acceptable_multiplier_p (tree op1_tree,
     667              :                                           unsigned HOST_WIDE_INT op2,
     668              :                                           unsigned HOST_WIDE_INT *result)
     669              : {
     670       340521 :   if (tree_fits_uhwi_p (op1_tree))
     671              :     {
     672       321402 :       unsigned HOST_WIDE_INT op1 = tree_to_uhwi (op1_tree);
     673              :       /* The first part checks for overflow.  */
     674       321402 :       if (op1 * op2 >= op2 && op1 * op2 <= m_maximum_scale)
     675              :         {
     676       293687 :           if (result)
     677       241709 :             *result = op1 * op2;
     678       293687 :           return true;
     679              :         }
     680              :     }
     681              :   return false;
     682              : }
     683              : 
     684              : /* Return true if it is worth using loop versioning on a memory access
     685              :    of type TYPE.  Store the size of the access in *SIZE if so.  */
     686              : 
     687              : bool
     688       210925 : loop_versioning::acceptable_type_p (tree type, unsigned HOST_WIDE_INT *size)
     689              : {
     690       210925 :   return (TYPE_SIZE_UNIT (type)
     691       210925 :           && acceptable_multiplier_p (TYPE_SIZE_UNIT (type), 1, size));
     692              : }
     693              : 
     694              : /* See whether OP is constant and whether we can multiply TERM by that
     695              :    constant without exceeding M_MAXIMUM_SCALE.  Return true and update
     696              :    TERM if so.  */
     697              : 
     698              : bool
     699        90915 : loop_versioning::multiply_term_by (address_term_info &term, tree op)
     700              : {
     701            0 :   return acceptable_multiplier_p (op, term.multiplier, &term.multiplier);
     702              : }
     703              : 
     704              : /* Decide whether an address fragment of the form STRIDE * MULTIPLIER
     705              :    is likely to be indexing an innermost dimension, returning the result
     706              :    as an INNER_* probability.  */
     707              : 
     708              : inner_likelihood
     709        77769 : loop_versioning::get_inner_likelihood (tree stride,
     710              :                                        unsigned HOST_WIDE_INT multiplier)
     711              : {
     712        77769 :   const unsigned int MAX_NITERS = 8;
     713              : 
     714              :   /* Iterate over possible values of STRIDE.  Return INNER_LIKELY if at
     715              :      least one of those values is likely to be for the innermost dimension.
     716              :      Record in UNLIKELY_P if at least one of those values is unlikely to be
     717              :      for the innermost dimension.
     718              : 
     719              :      E.g. for:
     720              : 
     721              :        stride = cond ? a * b : 1
     722              : 
     723              :      we should treat STRIDE as being a likely inner dimension, since
     724              :      we know that it is 1 under at least some circumstances.  (See the
     725              :      Fortran example below.)  However:
     726              : 
     727              :        stride = a * b
     728              : 
     729              :      on its own is unlikely to be for the innermost dimension, since
     730              :      that would require both a and b to be 1 at runtime.  */
     731        77769 :   bool unlikely_p = false;
     732        77769 :   tree worklist[MAX_NITERS];
     733        77769 :   unsigned int length = 0;
     734        77769 :   worklist[length++] = stride;
     735       121024 :   for (unsigned int i = 0; i < length; ++i)
     736              :     {
     737        95233 :       tree expr = worklist[i];
     738              : 
     739        95233 :       if (CONSTANT_CLASS_P (expr))
     740              :         {
     741              :           /* See if EXPR * MULTIPLIER would be consistent with an individual
     742              :              access or a small grouped access.  */
     743        54915 :           if (acceptable_multiplier_p (expr, multiplier))
     744              :             return INNER_LIKELY;
     745              :           else
     746              :             unlikely_p = true;
     747              :         }
     748        79591 :       else if (gimple *stmt = maybe_get_stmt (expr))
     749              :         {
     750              :           /* If EXPR is set by a PHI node, queue its arguments in case
     751              :              we find one that is consistent with an inner dimension.
     752              : 
     753              :              An important instance of this is the Fortran handling of array
     754              :              descriptors, which calculates the stride of the inner dimension
     755              :              using a PHI equivalent of:
     756              : 
     757              :                 raw_stride = a.dim[0].stride;
     758              :                 stride = raw_stride != 0 ? raw_stride : 1;
     759              : 
     760              :              (Strides for outer dimensions do not treat 0 specially.)  */
     761        39273 :           if (gphi *phi = dyn_cast <gphi *> (stmt))
     762              :             {
     763        11131 :               unsigned int nargs = gimple_phi_num_args (phi);
     764        29393 :               for (unsigned int j = 0; j < nargs && length < MAX_NITERS; ++j)
     765        18262 :                 worklist[length++] = strip_casts (gimple_phi_arg_def (phi, j));
     766              :             }
     767              :           /* If the value is set by an assignment, expect it to be read
     768              :              from memory (such as an array descriptor) rather than be
     769              :              calculated.  */
     770        71397 :           else if (gassign *assign = dyn_cast <gassign *> (stmt))
     771              :             {
     772        27822 :               if (!gimple_assign_load_p (assign))
     773        13933 :                 unlikely_p = true;
     774              :             }
     775              :           /* Things like calls don't really tell us anything.  */
     776              :         }
     777              :     }
     778              : 
     779              :   /* We didn't find any possible values of STRIDE that were likely to be
     780              :      for the innermost dimension.  If we found one that was actively
     781              :      unlikely to be for the innermost dimension, assume that that applies
     782              :      to STRIDE too.  */
     783        25791 :   return unlikely_p ? INNER_UNLIKELY : INNER_DONT_KNOW;
     784              : }
     785              : 
     786              : /* Dump the likelihood that TERM's stride is for the innermost dimension.
     787              :    ADDRESS is the address that contains TERM.  */
     788              : 
     789              : void
     790          241 : loop_versioning::dump_inner_likelihood (address_info &address,
     791              :                                         address_term_info &term)
     792              : {
     793          241 :   if (term.inner_likelihood == INNER_LIKELY)
     794           87 :     dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the"
     795              :                      " innermost dimension\n", term.stride);
     796          154 :   else if (term.inner_likelihood == INNER_UNLIKELY)
     797           36 :     dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the"
     798              :                      " innermost dimension\n", term.stride);
     799              :   else
     800          118 :     dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T"
     801              :                      " is the innermost dimension\n", term.stride);
     802          241 : }
     803              : 
     804              : /* The caller has identified that STRIDE is the stride of interest
     805              :    in TERM, and that the stride is applied in OP_LOOP.  Record this
     806              :    information in TERM, deciding whether STRIDE is likely to be for
     807              :    the innermost dimension of an array and whether it represents a
     808              :    versioning opportunity.  ADDRESS is the address that contains TERM.  */
     809              : 
     810              : void
     811        58924 : loop_versioning::analyze_stride (address_info &address,
     812              :                                  address_term_info &term,
     813              :                                  tree stride, class loop *op_loop)
     814              : {
     815        58924 :   term.stride = stride;
     816              : 
     817        58924 :   term.inner_likelihood = get_inner_likelihood (stride, term.multiplier);
     818        58924 :   if (dump_enabled_p ())
     819          200 :     dump_inner_likelihood (address, term);
     820              : 
     821              :   /* To be a versioning opportunity we require:
     822              : 
     823              :      - The multiplier applied by TERM is equal to the access size,
     824              :        so that when STRIDE is 1, the accesses in successive loop
     825              :        iterations are consecutive.
     826              : 
     827              :        This is deliberately conservative.  We could relax it to handle
     828              :        other cases (such as those with gaps between iterations) if we
     829              :        find any real testcases for which it's useful.
     830              : 
     831              :      - the stride is applied in the same loop as STMT rather than
     832              :        in an outer loop.  Although versioning for strides applied in
     833              :        outer loops could help in some cases -- such as enabling
     834              :        more loop interchange -- the savings are much lower than for
     835              :        inner loops.
     836              : 
     837              :      - the stride is an SSA name that is invariant in STMT's loop,
     838              :        since otherwise versioning isn't possible.  */
     839        58924 :   unsigned HOST_WIDE_INT access_size = address.max_offset - address.min_offset;
     840        58924 :   if (term.multiplier == access_size
     841        48836 :       && address.loop == op_loop
     842        46225 :       && TREE_CODE (stride) == SSA_NAME
     843        61400 :       && expr_invariant_in_loop_p (address.loop, stride))
     844              :     {
     845         2463 :       term.versioning_opportunity_p = true;
     846         2463 :       if (dump_enabled_p ())
     847           89 :         dump_printf_loc (MSG_NOTE, address.stmt, "%T == 1 is a versioning"
     848              :                          " opportunity\n", stride);
     849              :     }
     850        58924 : }
     851              : 
     852              : /* See whether address term TERM (which belongs to ADDRESS) is the result
     853              :    of multiplying a varying SSA name by a loop-invariant SSA name.
     854              :    Return true and update TERM if so.
     855              : 
     856              :    This handles both cases that SCEV might handle, such as:
     857              : 
     858              :      for (int i = 0; i < n; ++i)
     859              :        res += a[i * stride];
     860              : 
     861              :    and ones in which the term varies arbitrarily between iterations, such as:
     862              : 
     863              :      for (int i = 0; i < n; ++i)
     864              :        res += a[index[i] * stride];  */
     865              : 
     866              : bool
     867       100658 : loop_versioning::find_per_loop_multiplication (address_info &address,
     868              :                                                address_term_info &term)
     869              : {
     870       100658 :   gassign *mult = maybe_get_assign (term.expr);
     871       126592 :   if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR)
     872              :     return false;
     873              : 
     874         7714 :   class loop *mult_loop = loop_containing_stmt (mult);
     875         7714 :   if (!loop_outer (mult_loop))
     876              :     return false;
     877              : 
     878         7410 :   tree op1 = strip_casts (gimple_assign_rhs1 (mult));
     879        14820 :   tree op2 = strip_casts (gimple_assign_rhs2 (mult));
     880         7410 :   if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
     881              :     return false;
     882              : 
     883         6555 :   bool invariant1_p = expr_invariant_in_loop_p (mult_loop, op1);
     884         6555 :   bool invariant2_p = expr_invariant_in_loop_p (mult_loop, op2);
     885         6555 :   if (invariant1_p == invariant2_p)
     886              :     return false;
     887              : 
     888              :   /* Make sure that the loop invariant is OP2 rather than OP1.  */
     889         6200 :   if (invariant1_p)
     890         3671 :     std::swap (op1, op2);
     891              : 
     892         6200 :   if (dump_enabled_p ())
     893           90 :     dump_printf_loc (MSG_NOTE, address.stmt, "address term %T = varying %T"
     894              :                      " * loop-invariant %T\n", term.expr, op1, op2);
     895         6200 :   analyze_stride (address, term, op2, mult_loop);
     896         6200 :   return true;
     897              : }
     898              : 
     899              : /* Try to use scalar evolutions to find an address stride for TERM,
     900              :    which belongs to ADDRESS.  Return true and update TERM if so.
     901              : 
     902              :    Here we are interested in any evolution information we can find,
     903              :    not just evolutions wrt ADDRESS->LOOP.  For example, if we find that
     904              :    an outer loop obviously iterates over the inner dimension of an array,
     905              :    that information can help us eliminate worthless versioning opportunities
     906              :    in inner loops.  */
     907              : 
     908              : bool
     909        94458 : loop_versioning::analyze_term_using_scevs (address_info &address,
     910              :                                            address_term_info &term)
     911              : {
     912       136192 :   gimple *setter = maybe_get_stmt (term.expr);
     913        79586 :   if (!setter)
     914              :     return false;
     915              : 
     916        79586 :   class loop *wrt_loop = loop_containing_stmt (setter);
     917        79586 :   if (!loop_outer (wrt_loop))
     918              :     return false;
     919              : 
     920        64394 :   tree chrec = strip_casts (analyze_scalar_evolution (wrt_loop, term.expr));
     921        64394 :   if (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
     922              :     {
     923        52724 :       if (dump_enabled_p ())
     924          110 :         dump_printf_loc (MSG_NOTE, address.stmt,
     925              :                          "address term %T = %T\n", term.expr, chrec);
     926              : 
     927              :       /* Peel casts and accumulate constant multiplications, up to the
     928              :          limit allowed by M_MAXIMUM_SCALE.  */
     929        52724 :       tree stride = strip_casts (CHREC_RIGHT (chrec));
     930        52724 :       while (TREE_CODE (stride) == MULT_EXPR
     931        52724 :              && multiply_term_by (term, TREE_OPERAND (stride, 1)))
     932            0 :         stride = strip_casts (TREE_OPERAND (stride, 0));
     933              : 
     934              :       gassign *assign;
     935        52925 :       while ((assign = maybe_get_assign (stride))
     936          466 :              && gimple_assign_rhs_code (assign) == MULT_EXPR
     937        53126 :              && multiply_term_by (term, gimple_assign_rhs2 (assign)))
     938              :         {
     939          201 :           if (dump_enabled_p ())
     940           24 :             dump_printf_loc (MSG_NOTE, address.stmt,
     941              :                              "looking through %G", (gimple *) assign);
     942          201 :           stride = strip_casts (gimple_assign_rhs1 (assign));
     943              :         }
     944              : 
     945        52724 :       analyze_stride (address, term, stride, get_chrec_loop (chrec));
     946        52724 :       return true;
     947              :     }
     948              : 
     949              :   return false;
     950              : }
     951              : 
     952              : /* Address term TERM is an arbitrary term that provides no versioning
     953              :    opportunities.  Analyze it to see whether it contains any likely
     954              :    inner strides, so that we don't mistakenly version for other
     955              :    (less likely) candidates.
     956              : 
     957              :    This copes with invariant innermost indices such as:
     958              : 
     959              :      x(i, :) = 100
     960              : 
     961              :    where the "i" component of the address is invariant in the loop
     962              :    but provides the real inner stride.
     963              : 
     964              :    ADDRESS is the address that contains TERM.  */
     965              : 
     966              : void
     967        18087 : loop_versioning::analyze_arbitrary_term (address_info &address,
     968              :                                          address_term_info &term)
     969              : 
     970              : {
     971              :   /* A multiplication offers two potential strides.  Pick the one that
     972              :      is most likely to be an innermost stride.  */
     973        18087 :   tree expr = term.expr, alt = NULL_TREE;
     974        18087 :   gassign *mult = maybe_get_assign (expr);
     975        32201 :   if (mult && gimple_assign_rhs_code (mult) == MULT_EXPR)
     976              :     {
     977          758 :       expr = strip_casts (gimple_assign_rhs1 (mult));
     978         1516 :       alt = strip_casts (gimple_assign_rhs2 (mult));
     979              :     }
     980        18087 :   term.stride = expr;
     981        18087 :   term.inner_likelihood = get_inner_likelihood (expr, term.multiplier);
     982        18087 :   if (alt)
     983              :     {
     984          758 :       inner_likelihood alt_l = get_inner_likelihood (alt, term.multiplier);
     985          758 :       if (alt_l > term.inner_likelihood)
     986              :         {
     987          321 :           term.stride = alt;
     988          321 :           term.inner_likelihood = alt_l;
     989              :         }
     990              :     }
     991        18087 :   if (dump_enabled_p ())
     992           41 :     dump_inner_likelihood (address, term);
     993        18087 : }
     994              : 
     995              : /* Try to identify loop strides in ADDRESS and try to choose realistic
     996              :    versioning opportunities based on these strides.
     997              : 
     998              :    The main difficulty here isn't finding strides that could be used
     999              :    in a version check (that's pretty easy).  The problem instead is to
    1000              :    avoid versioning for some stride S that is unlikely ever to be 1 at
    1001              :    runtime.  Versioning for S == 1 on its own would lead to unnecessary
    1002              :    code bloat, while adding S == 1 to more realistic version conditions
    1003              :    would lose the optimisation opportunity offered by those other conditions.
    1004              : 
    1005              :    For example, versioning for a stride of 1 in the Fortran code:
    1006              : 
    1007              :      integer :: a(:,:)
    1008              :      a(1,:) = 1
    1009              : 
    1010              :    is not usually a good idea, since the assignment is iterating over
    1011              :    an outer dimension and is relatively unlikely to have a stride of 1.
    1012              :    (It isn't impossible, since the inner dimension might be 1, or the
    1013              :    array might be transposed.)  Similarly, in:
    1014              : 
    1015              :      integer :: a(:,:), b(:,:)
    1016              :      b(:,1) = a(1,:)
    1017              : 
    1018              :    b(:,1) is relatively likely to have a stride of 1 while a(1,:) isn't.
    1019              :    Versioning for when both strides are 1 would lose most of the benefit
    1020              :    of versioning for b's access.
    1021              : 
    1022              :    The approach we take is as follows:
    1023              : 
    1024              :    - Analyze each term to see whether it has an identifiable stride,
    1025              :      regardless of which loop applies the stride.
    1026              : 
    1027              :    - Evaluate the likelihood that each such stride is for the innermost
    1028              :      dimension of an array, on the scale "likely", "don't know" or "unlikely".
    1029              : 
    1030              :    - If there is a single "likely" innermost stride, and that stride is
    1031              :      applied in the loop that contains STMT, version the loop for when the
    1032              :      stride is 1.  This deals with the cases in which we're fairly
    1033              :      confident of doing the right thing, such as the b(:,1) reference above.
    1034              : 
    1035              :    - If there are no "likely" innermost strides, and the loop that contains
    1036              :      STMT uses a stride that we rated as "don't know", version for when
    1037              :      that stride is 1.  This is principally used for C code such as:
    1038              : 
    1039              :        for (int i = 0; i < n; ++i)
    1040              :          a[i * x] = ...;
    1041              : 
    1042              :      and:
    1043              : 
    1044              :        for (int j = 0; j < n; ++j)
    1045              :          for (int i = 0; i < n; ++i)
    1046              :            a[i * x + j * y] = ...;
    1047              : 
    1048              :      where nothing in the way "x" and "y" are set gives a hint as to
    1049              :      whether "i" iterates over the innermost dimension of the array.
    1050              :      In these situations it seems reasonable to assume the
    1051              :      programmer has nested the loops appropriately (although of course
    1052              :      there are examples like GEMM in which this assumption doesn't hold
    1053              :      for all accesses in the loop).
    1054              : 
    1055              :      This case is also useful for the Fortran equivalent of the
    1056              :      above C code.  */
    1057              : 
    1058              : void
    1059        64809 : loop_versioning::analyze_address_fragment (address_info &address)
    1060              : {
    1061        64809 :   if (dump_enabled_p ())
    1062              :     {
    1063          173 :       dump_printf_loc (MSG_NOTE, address.stmt, "analyzing address fragment ");
    1064          173 :       dump_address_info (MSG_NOTE, address);
    1065          173 :       dump_printf (MSG_NOTE, "\n");
    1066              :     }
    1067              : 
    1068              :   /* Analyze each component of the sum to see whether it involves an
    1069              :      apparent stride.
    1070              : 
    1071              :      There is an overlap between the addresses that
    1072              :      find_per_loop_multiplication and analyze_term_using_scevs can handle,
    1073              :      but the former is much cheaper than SCEV analysis, so try it first.  */
    1074       330934 :   for (unsigned int i = 0; i < address.terms.length (); ++i)
    1075       100658 :     if (!find_per_loop_multiplication (address, address.terms[i])
    1076        94458 :         && !analyze_term_using_scevs (address, address.terms[i])
    1077       142392 :         && !POINTER_TYPE_P (TREE_TYPE (address.terms[i].expr)))
    1078        18087 :       analyze_arbitrary_term (address, address.terms[i]);
    1079              : 
    1080              :   /* Check for strides that are likely to be for the innermost dimension.
    1081              : 
    1082              :      1. If there is a single likely inner stride, if it is an SSA name,
    1083              :         and if it is worth versioning the loop for when the SSA name
    1084              :         equals 1, record that we want to do so.
    1085              : 
    1086              :      2. Otherwise, if there any likely inner strides, bail out.  This means
    1087              :         one of:
    1088              : 
    1089              :         (a) There are multiple likely inner strides.  This suggests we're
    1090              :             confused and be can't be confident of doing the right thing.
    1091              : 
    1092              :         (b) There is a single likely inner stride and it is a constant
    1093              :             rather than an SSA name.  This can mean either that the access
    1094              :             is a natural one without any variable strides, such as:
    1095              : 
    1096              :               for (int i = 0; i < n; ++i)
    1097              :                 a[i] += 1;
    1098              : 
    1099              :             or that a variable stride is applied to an outer dimension,
    1100              :             such as:
    1101              : 
    1102              :               for (int i = 0; i < n; ++i)
    1103              :                 for (int j = 0; j < n; ++j)
    1104              :                   a[j * stride][i] += 1;
    1105              : 
    1106              :         (c) There is a single likely inner stride, and it is an SSA name,
    1107              :             but it isn't a worthwhile versioning opportunity.  This usually
    1108              :             means that the variable stride is applied by an outer loop,
    1109              :             such as:
    1110              : 
    1111              :               for (int i = 0; i < n; ++i)
    1112              :                 for (int j = 0; j < n; ++j)
    1113              :                   a[j][i * stride] += 1;
    1114              : 
    1115              :             or (using an example with a more natural loop nesting):
    1116              : 
    1117              :               for (int i = 0; i < n; ++i)
    1118              :                 for (int j = 0; j < n; ++j)
    1119              :                   a[i][j] += b[i * stride];
    1120              : 
    1121              :             in cases where b[i * stride] cannot (yet) be hoisted for
    1122              :             aliasing reasons.
    1123              : 
    1124              :      3. If there are no likely inner strides, fall through to the next
    1125              :         set of checks.
    1126              : 
    1127              :      Pointer equality is enough to check for uniqueness in (1), since we
    1128              :      only care about SSA names.  */
    1129              :   tree chosen_stride = NULL_TREE;
    1130              :   tree version_stride = NULL_TREE;
    1131       165056 :   for (unsigned int i = 0; i < address.terms.length (); ++i)
    1132       100507 :     if (chosen_stride != address.terms[i].stride
    1133       100507 :         && address.terms[i].inner_likelihood == INNER_LIKELY)
    1134              :       {
    1135        50194 :         if (chosen_stride)
    1136              :           return;
    1137        49934 :         chosen_stride = address.terms[i].stride;
    1138        49934 :         if (address.terms[i].versioning_opportunity_p)
    1139          711 :           version_stride = chosen_stride;
    1140              :       }
    1141              : 
    1142              :   /* If there are no likely inner strides, see if there is a single
    1143              :      versioning opportunity for a stride that was rated as INNER_DONT_KNOW.
    1144              :      See the comment above the function for the cases that this code
    1145              :      handles.  */
    1146        64549 :   if (!chosen_stride)
    1147        43740 :     for (unsigned int i = 0; i < address.terms.length (); ++i)
    1148        28874 :       if (version_stride != address.terms[i].stride
    1149        17631 :           && address.terms[i].inner_likelihood == INNER_DONT_KNOW
    1150        39480 :           && address.terms[i].versioning_opportunity_p)
    1151              :         {
    1152         1584 :           if (version_stride)
    1153              :             return;
    1154              :           version_stride = address.terms[i].stride;
    1155              :         }
    1156              : 
    1157        64540 :   if (version_stride)
    1158         2273 :     version_for_unity (address.stmt, version_stride);
    1159              : }
    1160              : 
    1161              : /* Treat EXPR * MULTIPLIER + OFFSET as a fragment of an address that addresses
    1162              :    TYPE_SIZE bytes and record this address fragment for later processing.
    1163              :    STMT is the statement that contains the address.  */
    1164              : 
    1165              : void
    1166       173451 : loop_versioning::record_address_fragment (gimple *stmt,
    1167              :                                           unsigned HOST_WIDE_INT type_size,
    1168              :                                           tree expr,
    1169              :                                           unsigned HOST_WIDE_INT multiplier,
    1170              :                                           HOST_WIDE_INT offset)
    1171              : {
    1172              :   /* We're only interested in computed values.  */
    1173       173451 :   if (TREE_CODE (expr) != SSA_NAME)
    1174        25884 :     return;
    1175              : 
    1176              :   /* Quick exit if no part of the address is calculated in STMT's loop,
    1177              :      since such addresses have no versioning opportunities.  */
    1178       157022 :   class loop *loop = loop_containing_stmt (stmt);
    1179       157022 :   if (expr_invariant_in_loop_p (loop, expr))
    1180              :     return;
    1181              : 
    1182              :   /* Set up an address_info for EXPR * MULTIPLIER.  */
    1183       147889 :   address_info *address = XOBNEW (&m_obstack, address_info);
    1184       147889 :   new (address) address_info;
    1185       147889 :   address->stmt = stmt;
    1186       147889 :   address->loop = loop;
    1187       147889 :   address->base = NULL_TREE;
    1188       147889 :   address->terms.quick_grow (1);
    1189       147889 :   address->terms[0].expr = expr;
    1190       147889 :   address->terms[0].multiplier = multiplier;
    1191       147889 :   address->terms[0].stride = NULL_TREE;
    1192       147889 :   address->terms[0].inner_likelihood = INNER_UNLIKELY;
    1193       147889 :   address->terms[0].versioning_opportunity_p = false;
    1194       147889 :   address->min_offset = offset;
    1195              : 
    1196              :   /* Peel apart the expression into a sum of address_terms, where each
    1197              :      term is multiplied by a constant.  Treat a + b and a - b the same,
    1198              :      since it doesn't matter for our purposes whether an address is
    1199              :      increasing or decreasing.  Distribute (a + b) * constant into
    1200              :      a * constant + b * constant.
    1201              : 
    1202              :      We don't care which loop each term belongs to, since we want to
    1203              :      examine as many candidate strides as possible when determining
    1204              :      which is likely to be for the innermost dimension.  We therefore
    1205              :      don't limit the search to statements in STMT's loop.  */
    1206       595956 :   for (unsigned int i = 0; i < address->terms.length (); )
    1207              :     {
    1208       448067 :       if (gassign *assign = maybe_get_assign (address->terms[i].expr))
    1209              :         {
    1210       273069 :           tree_code code = gimple_assign_rhs_code (assign);
    1211       273069 :           if (code == PLUS_EXPR
    1212       273069 :               || code == POINTER_PLUS_EXPR
    1213       126518 :               || code == MINUS_EXPR)
    1214              :             {
    1215       148277 :               tree op1 = gimple_assign_rhs1 (assign);
    1216       148277 :               tree op2 = gimple_assign_rhs2 (assign);
    1217       148277 :               if (TREE_CODE (op2) == INTEGER_CST)
    1218              :                 {
    1219        73761 :                   address->terms[i].expr = strip_casts (op1);
    1220              :                   /* This is heuristic only, so don't worry about truncation
    1221              :                      or overflow.  */
    1222        73761 :                   address->min_offset += (TREE_INT_CST_LOW (op2)
    1223        73761 :                                           * address->terms[i].multiplier);
    1224        73761 :                   continue;
    1225              :                 }
    1226        74516 :               else if (address->terms.length () < address_info::MAX_TERMS)
    1227              :                 {
    1228        74491 :                   unsigned int j = address->terms.length ();
    1229        74491 :                   address->terms.quick_push (address->terms[i]);
    1230        74491 :                   address->terms[i].expr = strip_casts (op1);
    1231        74491 :                   address->terms[j].expr = strip_casts (op2);
    1232        74491 :                   continue;
    1233        74491 :                 }
    1234              :             }
    1235       124817 :           if (code == MULT_EXPR)
    1236              :             {
    1237        90689 :               tree op1 = gimple_assign_rhs1 (assign);
    1238        90689 :               tree op2 = gimple_assign_rhs2 (assign);
    1239        90689 :               if (multiply_term_by (address->terms[i], op2))
    1240              :                 {
    1241        68057 :                   address->terms[i].expr = strip_casts (op1);
    1242        68057 :                   continue;
    1243              :                 }
    1244              :             }
    1245        56760 :           if (CONVERT_EXPR_CODE_P (code))
    1246              :             {
    1247         9378 :               tree op1 = gimple_assign_rhs1 (assign);
    1248         9378 :               address->terms[i].expr = strip_casts (op1);
    1249         9378 :               continue;
    1250         9378 :             }
    1251              :         }
    1252       222380 :       i += 1;
    1253              :     }
    1254              : 
    1255              :   /* Peel off any symbolic pointer.  */
    1256       147889 :   if (TREE_CODE (address->terms[0].expr) != SSA_NAME
    1257       147889 :       && address->terms[0].multiplier == 1)
    1258              :     {
    1259         6530 :       if (address->terms.length () == 1)
    1260              :         {
    1261            0 :           obstack_free (&m_obstack, address);
    1262            0 :           return;
    1263              :         }
    1264         6530 :       address->base = address->terms[0].expr;
    1265         6530 :       address->terms.ordered_remove (0);
    1266              :     }
    1267              : 
    1268              :   /* Require all remaining terms to be SSA names.  (This could be false
    1269              :      for unfolded statements, but they aren't worth dealing with.)  */
    1270       362917 :   for (unsigned int i = 0; i < address->terms.length (); ++i)
    1271       215350 :     if (TREE_CODE (address->terms[i].expr) != SSA_NAME)
    1272              :       {
    1273          322 :         obstack_free (&m_obstack, address);
    1274          322 :         return;
    1275              :       }
    1276              : 
    1277              :   /* The loop above set MIN_OFFSET based on the first byte of the
    1278              :      referenced data.  Calculate the end + 1.  */
    1279       147567 :   address->max_offset = address->min_offset + type_size;
    1280              : 
    1281              :   /* Put the terms into a canonical order for the hash table lookup below.  */
    1282       147567 :   address->terms.qsort (compare_address_terms);
    1283              : 
    1284       147567 :   if (dump_enabled_p ())
    1285              :     {
    1286          251 :       dump_printf_loc (MSG_NOTE, stmt, "recording address fragment %T", expr);
    1287          251 :       if (multiplier != 1)
    1288          118 :         dump_printf (MSG_NOTE, " * %wd", multiplier);
    1289          251 :       dump_printf (MSG_NOTE, " = ");
    1290          251 :       dump_address_info (MSG_NOTE, *address);
    1291          251 :       dump_printf (MSG_NOTE, "\n");
    1292              :     }
    1293              : 
    1294              :   /* Pool address information with the same terms (but potentially
    1295              :      different offsets).  */
    1296       147567 :   address_info **slot = m_address_table.find_slot (address, INSERT);
    1297       147567 :   if (address_info *old_address = *slot)
    1298              :     {
    1299              :       /* We've already seen an address with the same terms.  Extend the
    1300              :          offset range to account for this access.  Doing this can paper
    1301              :          over gaps, such as in:
    1302              : 
    1303              :            a[i * stride * 4] + a[i * stride * 4 + 3];
    1304              : 
    1305              :          where nothing references "+ 1" or "+ 2".  However, the vectorizer
    1306              :          handles such gapped accesses without problems, so it's not worth
    1307              :          trying to exclude them.  */
    1308        77831 :       if (old_address->min_offset > address->min_offset)
    1309        18005 :         old_address->min_offset = address->min_offset;
    1310        77831 :       if (old_address->max_offset < address->max_offset)
    1311        20417 :         old_address->max_offset = address->max_offset;
    1312        77831 :       obstack_free (&m_obstack, address);
    1313              :     }
    1314              :   else
    1315              :     {
    1316              :       /* This is the first time we've seen an address with these terms.  */
    1317        69736 :       *slot = address;
    1318        69736 :       m_address_list.safe_push (address);
    1319              :     }
    1320              : }
    1321              : 
    1322              : /* Analyze expression EXPR, which occurs in STMT.  */
    1323              : 
    1324              : void
    1325      1613999 : loop_versioning::analyze_expr (gimple *stmt, tree expr)
    1326              : {
    1327      1613999 :   unsigned HOST_WIDE_INT type_size;
    1328              : 
    1329      1779568 :   while (handled_component_p (expr))
    1330              :     {
    1331              :       /* See whether we can use versioning to avoid a multiplication
    1332              :          in an array index.  */
    1333       165569 :       if (TREE_CODE (expr) == ARRAY_REF
    1334       165569 :           && acceptable_type_p (TREE_TYPE (expr), &type_size))
    1335       100926 :         record_address_fragment (stmt, type_size,
    1336       100926 :                                  TREE_OPERAND (expr, 1), type_size, 0);
    1337       165569 :       expr = TREE_OPERAND (expr, 0);
    1338              :     }
    1339              : 
    1340              :   /* See whether we can use versioning to avoid a multiplication
    1341              :      in the pointer calculation of a MEM_REF.  */
    1342      1613999 :   if (TREE_CODE (expr) == MEM_REF
    1343      1613999 :       && acceptable_type_p (TREE_TYPE (expr), &type_size))
    1344        72525 :     record_address_fragment (stmt, type_size, TREE_OPERAND (expr, 0), 1,
    1345              :                              /* This is heuristic only, so don't worry
    1346              :                                 about truncation or overflow.  */
    1347        72525 :                              TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)));
    1348              : 
    1349              :   /* These would be easy to handle if they existed at this stage.  */
    1350      1613999 :   gcc_checking_assert (TREE_CODE (expr) != TARGET_MEM_REF);
    1351      1613999 : }
    1352              : 
    1353              : /* Analyze all the statements in BB looking for useful version checks.
    1354              :    Return true on success, false if something prevents the block from
    1355              :    being versioned.  */
    1356              : 
    1357              : bool
    1358       278621 : loop_versioning::analyze_block (basic_block bb)
    1359              : {
    1360       278621 :   class loop *loop = bb->loop_father;
    1361       278621 :   loop_info &li = get_loop_info (loop);
    1362      1462210 :   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
    1363       904968 :        gsi_next (&gsi))
    1364              :     {
    1365       923060 :       gimple *stmt = gsi_stmt (gsi);
    1366       923060 :       if (is_gimple_debug (stmt))
    1367       113666 :         continue;
    1368              : 
    1369       832370 :       if (expensive_stmt_p (stmt))
    1370              :         {
    1371        18092 :           if (dump_enabled_p ())
    1372           58 :             dump_printf_loc (MSG_NOTE, stmt, "expensive statement"
    1373              :                              " prevents versioning: %G", stmt);
    1374        18092 :           return false;
    1375              :         }
    1376              : 
    1377              :       /* Only look for direct versioning opportunities in inner loops
    1378              :          since the benefit tends to be much smaller for outer loops.  */
    1379       791302 :       if (!loop->inner)
    1380              :         {
    1381       657758 :           unsigned int nops = gimple_num_ops (stmt);
    1382      2482606 :           for (unsigned int i = 0; i < nops; ++i)
    1383      1824848 :             if (tree op = gimple_op (stmt, i))
    1384      1613999 :               analyze_expr (stmt, op);
    1385              :         }
    1386              : 
    1387              :       /* The point of the instruction limit is to prevent excessive
    1388              :          code growth, so this is a size-based estimate even though
    1389              :          the optimization is aimed at speed.  */
    1390       791302 :       li.num_insns += estimate_num_insns (stmt, &eni_size_weights);
    1391              :     }
    1392              : 
    1393              :   return true;
    1394              : }
    1395              : 
    1396              : /* Analyze all the blocks in the function, looking for useful version checks.
    1397              :    Return true if we found one.  */
    1398              : 
    1399              : bool
    1400        27928 : loop_versioning::analyze_blocks ()
    1401              : {
    1402        27928 :   AUTO_DUMP_SCOPE ("analyze_blocks",
    1403              :                    dump_user_location_t::from_function_decl (m_fn->decl));
    1404              : 
    1405              :   /* For now we don't try to version the whole function, although
    1406              :      versioning at that level could be useful in some cases.  */
    1407        27928 :   get_loop_info (get_loop (m_fn, 0)).rejected_p = true;
    1408              : 
    1409       166526 :   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    1410              :     {
    1411        82742 :       loop_info &linfo = get_loop_info (loop);
    1412              : 
    1413              :       /* Ignore cold loops.  */
    1414        82742 :       if (!optimize_loop_for_speed_p (loop))
    1415         1500 :         linfo.rejected_p = true;
    1416              : 
    1417              :       /* See whether an inner loop prevents versioning of this loop.  */
    1418        82742 :       if (!linfo.rejected_p)
    1419        95613 :         for (class loop *inner = loop->inner; inner; inner = inner->next)
    1420        18594 :           if (get_loop_info (inner).rejected_p)
    1421              :             {
    1422         4223 :               linfo.rejected_p = true;
    1423         4223 :               break;
    1424              :             }
    1425              : 
    1426              :       /* If versioning the loop is still a possibility, examine the
    1427              :          statements in the loop to look for versioning opportunities.  */
    1428        82742 :       if (!linfo.rejected_p)
    1429              :         {
    1430        77019 :           void *start_point = obstack_alloc (&m_obstack, 0);
    1431              : 
    1432       337548 :           for (basic_block bb = linfo.block_list; bb;
    1433       260529 :                bb = m_next_block_in_loop[bb->index])
    1434       278621 :             if (!analyze_block (bb))
    1435              :               {
    1436        18092 :                 linfo.rejected_p = true;
    1437        18092 :                 break;
    1438              :               }
    1439              : 
    1440        77019 :           if (!linfo.rejected_p)
    1441              :             {
    1442              :               /* Process any queued address fragments, now that we have
    1443              :                  complete grouping information.  */
    1444              :               address_info *address;
    1445              :               unsigned int i;
    1446       123736 :               FOR_EACH_VEC_ELT (m_address_list, i, address)
    1447        64809 :                 analyze_address_fragment (*address);
    1448              :             }
    1449              : 
    1450        77019 :           m_address_table.empty ();
    1451        77019 :           m_address_list.truncate (0);
    1452        77019 :           obstack_free (&m_obstack, start_point);
    1453              :         }
    1454        27928 :     }
    1455              : 
    1456        27928 :   return m_num_conditions != 0;
    1457              : }
    1458              : 
    1459              : /* Use the ranges in VRS to remove impossible versioning conditions from
    1460              :    LOOP.  */
    1461              : 
    1462              : void
    1463         4407 : loop_versioning::prune_loop_conditions (class loop *loop)
    1464              : {
    1465         4407 :   loop_info &li = get_loop_info (loop);
    1466              : 
    1467         4407 :   int to_remove = -1;
    1468         4407 :   bitmap_iterator bi;
    1469         4407 :   unsigned int i;
    1470         4407 :   int_range_max r;
    1471         5974 :   EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
    1472              :     {
    1473         1567 :       tree name = ssa_name (i);
    1474         1567 :       gimple *stmt = first_stmt (loop->header);
    1475              : 
    1476         3134 :       if (get_range_query (cfun)->range_of_expr (r, name, stmt)
    1477         3134 :           && !r.contains_p (wi::one (TYPE_PRECISION (TREE_TYPE (name)))))
    1478              :         {
    1479           36 :           if (dump_enabled_p ())
    1480            2 :             dump_printf_loc (MSG_NOTE, find_loop_location (loop),
    1481              :                              "%T can never be 1 in this loop\n", name);
    1482              : 
    1483           36 :           if (to_remove >= 0)
    1484            0 :             bitmap_clear_bit (&li.unity_names, to_remove);
    1485           36 :           to_remove = i;
    1486           36 :           m_num_conditions -= 1;
    1487              :         }
    1488              :     }
    1489         4407 :   if (to_remove >= 0)
    1490           36 :     bitmap_clear_bit (&li.unity_names, to_remove);
    1491         4407 : }
    1492              : 
    1493              : /* Remove any scheduled loop version conditions that will never be true.
    1494              :    Return true if any remain.  */
    1495              : 
    1496              : bool
    1497          707 : loop_versioning::prune_conditions ()
    1498              : {
    1499          707 :   AUTO_DUMP_SCOPE ("prune_loop_conditions",
    1500              :                    dump_user_location_t::from_function_decl (m_fn->decl));
    1501              : 
    1502          707 :   calculate_dominance_info (CDI_DOMINATORS);
    1503          707 :   lv_dom_walker dom_walker (*this);
    1504          707 :   dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn));
    1505          707 :   return m_num_conditions != 0;
    1506          707 : }
    1507              : 
    1508              : /* Merge the version checks for INNER into immediately-enclosing loop
    1509              :    OUTER.  */
    1510              : 
    1511              : void
    1512          536 : loop_versioning::merge_loop_info (class loop *outer, class loop *inner)
    1513              : {
    1514          536 :   loop_info &inner_li = get_loop_info (inner);
    1515          536 :   loop_info &outer_li = get_loop_info (outer);
    1516              : 
    1517          536 :   if (dump_enabled_p ())
    1518              :     {
    1519           11 :       bitmap_iterator bi;
    1520           11 :       unsigned int i;
    1521           22 :       EXECUTE_IF_SET_IN_BITMAP (&inner_li.unity_names, 0, i, bi)
    1522           11 :         if (!bitmap_bit_p (&outer_li.unity_names, i))
    1523           11 :           dump_printf_loc (MSG_NOTE, find_loop_location (inner),
    1524              :                            "hoisting check that %T == 1 to outer loop\n",
    1525           11 :                            ssa_name (i));
    1526              :     }
    1527              : 
    1528          536 :   bitmap_ior_into (&outer_li.unity_names, &inner_li.unity_names);
    1529          549 :   if (loop_depth (outer_li.outermost) < loop_depth (inner_li.outermost))
    1530          372 :     outer_li.outermost = inner_li.outermost;
    1531          536 : }
    1532              : 
    1533              : /* Add LOOP to the queue of loops to version.  */
    1534              : 
    1535              : void
    1536         1194 : loop_versioning::add_loop_to_queue (class loop *loop)
    1537              : {
    1538         1194 :   loop_info &li = get_loop_info (loop);
    1539              : 
    1540         1194 :   if (dump_enabled_p ())
    1541           69 :     dump_printf_loc (MSG_NOTE, find_loop_location (loop),
    1542              :                      "queuing this loop for versioning\n");
    1543         1194 :   m_loops_to_version.safe_push (loop);
    1544              : 
    1545              :   /* Don't try to version superloops.  */
    1546         1194 :   li.rejected_p = true;
    1547         1194 : }
    1548              : 
    1549              : /* Decide whether the cost model would allow us to version LOOP,
    1550              :    either directly or as part of a parent loop, and return true if so.
    1551              :    This does not imply that the loop is actually worth versioning in its
    1552              :    own right, just that it would be valid to version it if something
    1553              :    benefited.
    1554              : 
    1555              :    We have already made this decision for all inner loops of LOOP.  */
    1556              : 
    1557              : bool
    1558         3657 : loop_versioning::decide_whether_loop_is_versionable (class loop *loop)
    1559              : {
    1560         3657 :   loop_info &li = get_loop_info (loop);
    1561              : 
    1562         3657 :   if (li.rejected_p)
    1563              :     return false;
    1564              : 
    1565              :   /* Examine the decisions made for inner loops.  */
    1566         3436 :   for (class loop *inner = loop->inner; inner; inner = inner->next)
    1567              :     {
    1568          652 :       loop_info &inner_li = get_loop_info (inner);
    1569          652 :       if (inner_li.rejected_p)
    1570              :         {
    1571            6 :           if (dump_enabled_p ())
    1572            0 :             dump_printf_loc (MSG_NOTE, find_loop_location (loop),
    1573              :                              "not versioning this loop because one of its"
    1574              :                              " inner loops should not be versioned\n");
    1575            6 :           return false;
    1576              :         }
    1577              : 
    1578          646 :       if (inner_li.worth_versioning_p ())
    1579          411 :         li.subloops_benefit_p = true;
    1580              : 
    1581              :       /* Accumulate the number of instructions from subloops that are not
    1582              :          the innermost, or that don't benefit from versioning.  Only the
    1583              :          instructions from innermost loops that benefit from versioning
    1584              :          should be weighed against loop-versioning-max-inner-insns;
    1585              :          everything else should be weighed against
    1586              :          loop-versioning-max-outer-insns.  */
    1587          646 :       if (!inner_li.worth_versioning_p () || inner->inner)
    1588              :         {
    1589          296 :           if (dump_enabled_p ())
    1590            0 :             dump_printf_loc (MSG_NOTE, find_loop_location (loop),
    1591              :                              "counting %d instructions from this loop"
    1592              :                              " against its parent loop\n", inner_li.num_insns);
    1593          296 :           li.num_insns += inner_li.num_insns;
    1594              :         }
    1595              :     }
    1596              : 
    1597              :   /* Enforce the size limits.  */
    1598         4370 :   if (li.worth_versioning_p ())
    1599              :     {
    1600         1581 :       unsigned int max_num_insns = max_insns_for_loop (loop);
    1601         1581 :       if (dump_enabled_p ())
    1602           80 :         dump_printf_loc (MSG_NOTE, find_loop_location (loop),
    1603              :                          "this loop has %d instructions, against"
    1604              :                          " a versioning limit of %d\n",
    1605              :                          li.num_insns, max_num_insns);
    1606         1581 :       if (li.num_insns > max_num_insns)
    1607              :         {
    1608           10 :           if (dump_enabled_p ())
    1609            0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION
    1610            0 :                              | MSG_PRIORITY_USER_FACING,
    1611            0 :                              find_loop_location (loop),
    1612              :                              "this loop is too big to version");
    1613           10 :           return false;
    1614              :         }
    1615              :     }
    1616              : 
    1617              :   /* Hoist all version checks from subloops to this loop.  */
    1618         3310 :   for (class loop *subloop = loop->inner; subloop; subloop = subloop->next)
    1619          536 :     merge_loop_info (loop, subloop);
    1620              : 
    1621              :   return true;
    1622              : }
    1623              : 
    1624              : /* Decide which loops to version and add them to the versioning queue.
    1625              :    Return true if there are any loops to version.  */
    1626              : 
    1627              : bool
    1628          696 : loop_versioning::make_versioning_decisions ()
    1629              : {
    1630          696 :   AUTO_DUMP_SCOPE ("make_versioning_decisions",
    1631              :                    dump_user_location_t::from_function_decl (m_fn->decl));
    1632              : 
    1633         5745 :   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    1634              :     {
    1635         3657 :       loop_info &linfo = get_loop_info (loop);
    1636         3657 :       if (decide_whether_loop_is_versionable (loop))
    1637              :         {
    1638              :           /* Commit to versioning LOOP directly if we can't hoist the
    1639              :              version checks any further.  */
    1640         8002 :           if (linfo.worth_versioning_p ()
    1641         2774 :               && (loop_depth (loop) == 1 || linfo.outermost == loop))
    1642         1130 :             add_loop_to_queue (loop);
    1643              :         }
    1644              :       else
    1645              :         {
    1646              :           /* We can't version this loop, so individually version any
    1647              :              subloops that would benefit and haven't been versioned yet.  */
    1648          883 :           linfo.rejected_p = true;
    1649         1539 :           for (class loop *subloop = loop->inner; subloop;
    1650          656 :                subloop = subloop->next)
    1651          856 :             if (get_loop_info (subloop).worth_versioning_p ())
    1652           64 :               add_loop_to_queue (subloop);
    1653              :         }
    1654          696 :     }
    1655              : 
    1656         1392 :   return !m_loops_to_version.is_empty ();
    1657              : }
    1658              : 
    1659              : /* Attempt to implement loop versioning for LOOP, using the information
    1660              :    cached in the associated loop_info.  Return true on success.  */
    1661              : 
    1662              : bool
    1663         1194 : loop_versioning::version_loop (class loop *loop)
    1664              : {
    1665         1194 :   loop_info &li = get_loop_info (loop);
    1666              : 
    1667              :   /* Build up a condition that selects the original loop instead of
    1668              :      the simplified loop.  */
    1669         1194 :   tree cond = boolean_false_node;
    1670         1194 :   bitmap_iterator bi;
    1671         1194 :   unsigned int i;
    1672         2721 :   EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
    1673              :     {
    1674         1527 :       tree name = ssa_name (i);
    1675         1527 :       tree ne_one = fold_build2 (NE_EXPR, boolean_type_node, name,
    1676              :                                  build_one_cst (TREE_TYPE (name)));
    1677         1527 :       cond = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, cond, ne_one);
    1678              :     }
    1679              : 
    1680              :   /* Convert the condition into a suitable gcond.  */
    1681         1194 :   gimple_seq stmts = NULL;
    1682         1194 :   cond = force_gimple_operand_1 (cond, &stmts, is_gimple_condexpr_for_cond,
    1683              :                                  NULL_TREE);
    1684              : 
    1685              :   /* Version the loop.  */
    1686         1194 :   initialize_original_copy_tables ();
    1687         1194 :   basic_block cond_bb;
    1688         1194 :   li.optimized_loop = loop_version (loop, cond, &cond_bb,
    1689              :                                     profile_probability::unlikely (),
    1690              :                                     profile_probability::likely (),
    1691              :                                     profile_probability::unlikely (),
    1692              :                                     profile_probability::likely (), true);
    1693         1194 :   free_original_copy_tables ();
    1694         1194 :   if (!li.optimized_loop)
    1695              :     {
    1696            2 :       if (dump_enabled_p ())
    1697            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, find_loop_location (loop),
    1698              :                          "tried but failed to version this loop for when"
    1699              :                          " certain strides are 1\n");
    1700            2 :       return false;
    1701              :     }
    1702              : 
    1703              :   /* Assign hierarchical discriminators to distinguish loop versions.
    1704              :      This allows AutoFDO to distinguish profile data from different
    1705              :      versions.  No multiplicity for non-vectorized loop versioning.  */
    1706         1192 :   gimple *optimized_last = last_nondebug_stmt (li.optimized_loop->header);
    1707         1192 :   location_t optimized_loc
    1708         1192 :     = optimized_last ? gimple_location (optimized_last) : UNKNOWN_LOCATION;
    1709         1192 :   if (optimized_loc != UNKNOWN_LOCATION)
    1710              :     {
    1711         1107 :       unsigned int optimized_copyid = allocate_copyid_base (optimized_loc, 1);
    1712         1107 :       assign_discriminators_to_loop (li.optimized_loop, 0, optimized_copyid);
    1713              :     }
    1714         1192 :   gimple *loop_last = last_nondebug_stmt (loop->header);
    1715         1192 :   location_t loop_loc
    1716         1192 :     = loop_last ? gimple_location (loop_last) : UNKNOWN_LOCATION;
    1717         1192 :   if (loop_loc != UNKNOWN_LOCATION)
    1718              :     {
    1719         1107 :       unsigned int loop_copyid = allocate_copyid_base (loop_loc, 1);
    1720         1107 :       assign_discriminators_to_loop (loop, 0, loop_copyid);
    1721              :     }
    1722         1192 :   if (dump_enabled_p ())
    1723           69 :     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, find_loop_location (loop),
    1724              :                      "versioned this loop for when certain strides are 1\n");
    1725              : 
    1726              :   /* Insert the statements that feed COND.  */
    1727         1192 :   if (stmts)
    1728              :     {
    1729          183 :       gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
    1730          183 :       gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
    1731              :     }
    1732              : 
    1733              :   return true;
    1734              : }
    1735              : 
    1736              : /* Attempt to version all loops in the versioning queue.  */
    1737              : 
    1738              : void
    1739          696 : loop_versioning::implement_versioning_decisions ()
    1740              : {
    1741              :   /* No AUTO_DUMP_SCOPE here since all messages are top-level and
    1742              :      user-facing at this point.  */
    1743              : 
    1744          696 :   bool any_succeeded_p = false;
    1745          696 :   class loop *loop;
    1746          696 :   unsigned int i;
    1747         1890 :   FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
    1748         1194 :     if (version_loop (loop))
    1749         1192 :       any_succeeded_p = true;
    1750          696 :   if (!any_succeeded_p)
    1751          696 :     return;
    1752              : 
    1753          694 :   update_ssa (TODO_update_ssa);
    1754              : 
    1755              :   /* Simplify the new loop, which is used when COND is false.  */
    1756         1886 :   FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
    1757              :     {
    1758         1192 :       loop_info &linfo = get_loop_info (loop);
    1759         1192 :       if (linfo.optimized_loop)
    1760         1192 :         name_prop (linfo).substitute_and_fold (linfo.optimized_loop->header);
    1761              :     }
    1762              : }
    1763              : 
    1764              : /* Run the pass and return a set of TODO_* flags.  */
    1765              : 
    1766              : unsigned int
    1767        27928 : loop_versioning::run ()
    1768              : {
    1769        27928 :   gcc_assert (scev_initialized_p ());
    1770              : 
    1771        27928 :   if (analyze_blocks ()
    1772          707 :       && prune_conditions ()
    1773        28624 :       && make_versioning_decisions ())
    1774          696 :     implement_versioning_decisions ();
    1775              : 
    1776        27928 :   return 0;
    1777              : }
    1778              : 
    1779              : /* Loop versioning pass.  */
    1780              : 
    1781              : const pass_data pass_data_loop_versioning =
    1782              : {
    1783              :   GIMPLE_PASS, /* type */
    1784              :   "lversion", /* name */
    1785              :   OPTGROUP_LOOP, /* optinfo_flags */
    1786              :   TV_LOOP_VERSIONING, /* tv_id */
    1787              :   PROP_cfg, /* properties_required */
    1788              :   0, /* properties_provided */
    1789              :   0, /* properties_destroyed */
    1790              :   0, /* todo_flags_start */
    1791              :   0, /* todo_flags_finish */
    1792              : };
    1793              : 
    1794              : class pass_loop_versioning : public gimple_opt_pass
    1795              : {
    1796              : public:
    1797       285722 :   pass_loop_versioning (gcc::context *ctxt)
    1798       571444 :     : gimple_opt_pass (pass_data_loop_versioning, ctxt)
    1799              :   {}
    1800              : 
    1801              :   /* opt_pass methods: */
    1802       241458 :   bool gate (function *) final override
    1803              :   {
    1804       241458 :     return flag_version_loops_for_strides;
    1805              :   }
    1806              :   unsigned int execute (function *) final override;
    1807              : };
    1808              : 
    1809              : unsigned int
    1810        27928 : pass_loop_versioning::execute (function *fn)
    1811              : {
    1812        55856 :   if (number_of_loops (fn) <= 1)
    1813              :     return 0;
    1814              : 
    1815        27928 :   enable_ranger (fn);
    1816        27928 :   unsigned int ret = loop_versioning (fn).run ();
    1817        27928 :   disable_ranger (fn);
    1818        27928 :   return ret;
    1819              : }
    1820              : 
    1821              : } // anon namespace
    1822              : 
    1823              : gimple_opt_pass *
    1824       285722 : make_pass_loop_versioning (gcc::context *ctxt)
    1825              : {
    1826       285722 :   return new pass_loop_versioning (ctxt);
    1827              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.