LCOV - code coverage report
Current view: top level - gcc - tree-vect-data-refs.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 89.9 % 2878 2588
Test Date: 2026-06-20 15:32:29 Functions: 97.4 % 78 76
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Data References Analysis and Manipulation Utilities for Vectorization.
       2              :    Copyright (C) 2003-2026 Free Software Foundation, Inc.
       3              :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4              :    and Ira Rosen <irar@il.ibm.com>
       5              : 
       6              : This file is part of GCC.
       7              : 
       8              : GCC is free software; you can redistribute it and/or modify it under
       9              : the terms of the GNU General Public License as published by the Free
      10              : Software Foundation; either version 3, or (at your option) any later
      11              : version.
      12              : 
      13              : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      14              : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15              : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16              : for more details.
      17              : 
      18              : You should have received a copy of the GNU General Public License
      19              : along with GCC; see the file COPYING3.  If not see
      20              : <http://www.gnu.org/licenses/>.  */
      21              : 
      22              : #define INCLUDE_ALGORITHM
      23              : #include "config.h"
      24              : #include "system.h"
      25              : #include "coretypes.h"
      26              : #include "backend.h"
      27              : #include "target.h"
      28              : #include "rtl.h"
      29              : #include "tree.h"
      30              : #include "gimple.h"
      31              : #include "predict.h"
      32              : #include "memmodel.h"
      33              : #include "tm_p.h"
      34              : #include "ssa.h"
      35              : #include "optabs-tree.h"
      36              : #include "cgraph.h"
      37              : #include "dumpfile.h"
      38              : #include "pretty-print.h"
      39              : #include "alias.h"
      40              : #include "fold-const.h"
      41              : #include "stor-layout.h"
      42              : #include "tree-eh.h"
      43              : #include "gimplify.h"
      44              : #include "gimple-iterator.h"
      45              : #include "gimplify-me.h"
      46              : #include "tree-ssa-loop-ivopts.h"
      47              : #include "tree-ssa-loop-manip.h"
      48              : #include "tree-ssa-loop.h"
      49              : #include "cfgloop.h"
      50              : #include "tree-scalar-evolution.h"
      51              : #include "tree-vectorizer.h"
      52              : #include "expr.h"
      53              : #include "builtins.h"
      54              : #include "tree-cfg.h"
      55              : #include "tree-hash-traits.h"
      56              : #include "vec-perm-indices.h"
      57              : #include "internal-fn.h"
      58              : #include "gimple-fold.h"
      59              : #include "optabs-query.h"
      60              : 
      61              : /* Return true if load- or store-lanes optab OPTAB is implemented for
      62              :    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.
      63              : 
      64              :    If it is implemented and ELSVALS is nonzero store the possible else
      65              :    values in the vector it points to.  */
      66              : 
      67              : static bool
      68       370066 : vect_lanes_optab_supported_p (const char *name, convert_optab optab,
      69              :                               tree vectype, unsigned HOST_WIDE_INT count,
      70              :                               vec<int> *elsvals = nullptr)
      71              : {
      72       370066 :   machine_mode mode, array_mode;
      73       370066 :   bool limit_p;
      74              : 
      75       370066 :   mode = TYPE_MODE (vectype);
      76       370066 :   if (!targetm.array_mode (mode, count).exists (&array_mode))
      77              :     {
      78       740132 :       poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
      79       370066 :       limit_p = !targetm.array_mode_supported_p (mode, count);
      80       370066 :       if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
      81              :         {
      82       317812 :           if (dump_enabled_p ())
      83        12924 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
      84              :                              "no array mode for %s[%wu]\n",
      85        12924 :                              GET_MODE_NAME (mode), count);
      86       317812 :           return false;
      87              :         }
      88              :     }
      89              : 
      90        52254 :   enum insn_code icode;
      91        52254 :   if ((icode = convert_optab_handler (optab, array_mode, mode))
      92              :       == CODE_FOR_nothing)
      93              :     {
      94        52254 :       if (dump_enabled_p ())
      95         4152 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
      96              :                          "cannot use %s<%s><%s>\n", name,
      97         4152 :                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
      98        52254 :       return false;
      99              :     }
     100              : 
     101            0 :   if (dump_enabled_p ())
     102            0 :     dump_printf_loc (MSG_NOTE, vect_location,
     103            0 :                      "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
     104            0 :                      GET_MODE_NAME (mode));
     105              : 
     106            0 :   if (elsvals)
     107            0 :     get_supported_else_vals (icode,
     108            0 :                              internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES),
     109              :                              *elsvals);
     110              : 
     111              :   return true;
     112              : }
     113              : 
     114              : /* Helper function to identify a simd clone call.  If this is a call to a
     115              :    function with simd clones then return the corresponding cgraph_node,
     116              :    otherwise return NULL.  */
     117              : 
     118              : static cgraph_node*
     119       622171 : simd_clone_call_p (gimple *stmt)
     120              : {
     121       699956 :   gcall *call = dyn_cast <gcall *> (stmt);
     122        79469 :   if (!call)
     123              :     return NULL;
     124              : 
     125        79469 :   tree fndecl = NULL_TREE;
     126        79469 :   if (gimple_call_internal_p (call, IFN_MASK_CALL))
     127          226 :     fndecl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
     128              :   else
     129        79243 :     fndecl = gimple_call_fndecl (stmt);
     130              : 
     131        79469 :   if (fndecl == NULL_TREE)
     132              :     return NULL;
     133              : 
     134        36796 :   cgraph_node *node = cgraph_node::get (fndecl);
     135        36796 :   if (node && node->simd_clones != NULL)
     136              :     return node;
     137              : 
     138              :   return NULL;
     139              : }
     140              : 
     141              : 
     142              : 
     143              : /* Return the smallest scalar part of STMT_INFO.
     144              :    This is used to determine the vectype of the stmt.  We generally set the
     145              :    vectype according to the type of the result (lhs).  For stmts whose
     146              :    result-type is different than the type of the arguments (e.g., demotion,
     147              :    promotion), vectype will be reset appropriately (later).  Note that we have
     148              :    to visit the smallest datatype in this function, because that determines the
     149              :    VF.  If the smallest datatype in the loop is present only as the rhs of a
     150              :    promotion operation - we'd miss it.
     151              :    Such a case, where a variable of this datatype does not appear in the lhs
     152              :    anywhere in the loop, can only occur if it's an invariant: e.g.:
     153              :    'int_x = (int) short_inv', which we'd expect to have been optimized away by
     154              :    invariant motion.  However, we cannot rely on invariant motion to always
     155              :    take invariants out of the loop, and so in the case of promotion we also
     156              :    have to check the rhs.
     157              :    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
     158              :    types.  */
     159              : 
     160              : tree
     161      5044044 : vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type)
     162              : {
     163      5044044 :   HOST_WIDE_INT lhs, rhs;
     164              : 
     165              :   /* During the analysis phase, this function is called on arbitrary
     166              :      statements that might not have scalar results.  */
     167      5044044 :   if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
     168              :     return scalar_type;
     169              : 
     170      5044044 :   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
     171              : 
     172      5044044 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
     173      5044044 :   if (assign)
     174              :     {
     175      4421873 :       scalar_type = TREE_TYPE (gimple_assign_lhs (assign));
     176      4421873 :       if (gimple_assign_cast_p (assign)
     177      4018973 :           || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
     178      4018337 :           || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
     179      4018337 :           || gimple_assign_rhs_code (assign) == SAD_EXPR
     180      4018228 :           || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
     181      4014484 :           || gimple_assign_rhs_code (assign) == WIDEN_MULT_PLUS_EXPR
     182      4014484 :           || gimple_assign_rhs_code (assign) == WIDEN_MULT_MINUS_EXPR
     183      4014484 :           || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
     184      8436357 :           || gimple_assign_rhs_code (assign) == FLOAT_EXPR)
     185              :         {
     186       421841 :           tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
     187              : 
     188       421841 :           rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
     189       421841 :           if (rhs < lhs)
     190      5044044 :             scalar_type = rhs_type;
     191              :         }
     192              :     }
     193       622171 :   else if (cgraph_node *node = simd_clone_call_p (stmt_info->stmt))
     194              :     {
     195         1684 :       auto clone = node->simd_clones->simdclone;
     196         5120 :       for (unsigned int i = 0; i < clone->nargs; ++i)
     197              :         {
     198         3436 :           if (clone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
     199              :             {
     200         1983 :               tree arg_scalar_type = TREE_TYPE (clone->args[i].vector_type);
     201         1983 :               rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (arg_scalar_type));
     202         1983 :               if (rhs < lhs)
     203              :                 {
     204         3436 :                   scalar_type = arg_scalar_type;
     205         3436 :                   lhs = rhs;
     206              :                 }
     207              :             }
     208              :         }
     209              :     }
     210       620487 :   else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
     211              :     {
     212        77785 :       unsigned int i = 0;
     213        77785 :       if (gimple_call_internal_p (call))
     214              :         {
     215        40324 :           internal_fn ifn = gimple_call_internal_fn (call);
     216        40324 :           if (internal_load_fn_p (ifn))
     217              :             /* For loads the LHS type does the trick.  */
     218              :             i = ~0U;
     219        35483 :           else if (internal_store_fn_p (ifn))
     220              :             {
     221              :               /* For stores use the type of the stored value.  */
     222         2742 :               i = internal_fn_stored_value_index (ifn);
     223         2742 :               scalar_type = TREE_TYPE (gimple_call_arg (call, i));
     224         2742 :               i = ~0U;
     225              :             }
     226        32741 :           else if (internal_fn_mask_index (ifn) == 0)
     227        11080 :             i = 1;
     228              :         }
     229        77785 :       if (i < gimple_call_num_args (call))
     230              :         {
     231        65625 :           tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
     232        65625 :           if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
     233              :             {
     234        65625 :               rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
     235        65625 :               if (rhs < lhs)
     236      5044044 :                 scalar_type = rhs_type;
     237              :             }
     238              :         }
     239              :     }
     240              : 
     241              :   return scalar_type;
     242              : }
     243              : 
     244              : 
     245              : /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
     246              :    tested at run-time.  Return TRUE if DDR was successfully inserted.
     247              :    Return false if versioning is not supported.  */
     248              : 
     249              : static opt_result
     250       169173 : vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
     251              : {
     252       169173 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     253              : 
     254       169173 :   if ((unsigned) param_vect_max_version_for_alias_checks == 0)
     255           54 :     return opt_result::failure_at (vect_location,
     256              :                                    "will not create alias checks, as"
     257              :                                    " --param vect-max-version-for-alias-checks"
     258              :                                    " == 0\n");
     259              : 
     260       169119 :   opt_result res
     261       169119 :     = runtime_alias_check_p (ddr, loop,
     262       169119 :                              optimize_loop_nest_for_speed_p (loop));
     263       169119 :   if (!res)
     264          143 :     return res;
     265              : 
     266       168976 :   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
     267       168976 :   return opt_result::success ();
     268              : }
     269              : 
     270              : /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero.  */
     271              : 
     272              : static void
     273         1528 : vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
     274              : {
     275         1528 :   const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
     276         2295 :   for (unsigned int i = 0; i < checks.length(); ++i)
     277          767 :     if (checks[i] == value)
     278              :       return;
     279              : 
     280         1528 :   if (dump_enabled_p ())
     281          432 :     dump_printf_loc (MSG_NOTE, vect_location,
     282              :                      "need run-time check that %T is nonzero\n",
     283              :                      value);
     284         1528 :   LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
     285              : }
     286              : 
     287              : /* Return true if we know that the order of vectorized DR_INFO_A and
     288              :    vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
     289              :    DR_INFO_B.  At least one of the accesses is a write.  */
     290              : 
     291              : static bool
     292       144329 : vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
     293              : {
     294       144329 :   stmt_vec_info stmtinfo_a = dr_info_a->stmt;
     295       144329 :   stmt_vec_info stmtinfo_b = dr_info_b->stmt;
     296              : 
     297              :   /* Single statements are always kept in their original order.  */
     298       144329 :   if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
     299       239226 :       && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
     300              :     return true;
     301              : 
     302              :   /* If there is a loop invariant read involved we might vectorize it in
     303              :      the prologue, breaking scalar order with respect to the in-loop store.  */
     304        26144 :   if ((DR_IS_READ (dr_info_a->dr) && integer_zerop (DR_STEP (dr_info_a->dr)))
     305        80816 :       || (DR_IS_READ (dr_info_b->dr) && integer_zerop (DR_STEP (dr_info_b->dr))))
     306         1726 :     return false;
     307              : 
     308              :   /* STMT_A and STMT_B belong to overlapping groups.  All loads are
     309              :      emitted at the position of the first scalar load.
     310              :      Stores in a group are emitted at the position of the last scalar store.
     311              :      Compute that position and check whether the resulting order matches
     312              :      the current one.  */
     313        54181 :   stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
     314        54181 :   if (il_a)
     315              :     {
     316        48941 :       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
     317       213632 :         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
     318       188239 :              s = DR_GROUP_NEXT_ELEMENT (s))
     319       188239 :           il_a = get_later_stmt (il_a, s);
     320              :       else /* DR_IS_READ */
     321        93502 :         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
     322        69954 :              s = DR_GROUP_NEXT_ELEMENT (s))
     323        69954 :           if (get_later_stmt (il_a, s) == il_a)
     324         2144 :             il_a = s;
     325              :     }
     326              :   else
     327              :     il_a = stmtinfo_a;
     328        54181 :   stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
     329        54181 :   if (il_b)
     330              :     {
     331        47323 :       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
     332       273547 :         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
     333       235517 :              s = DR_GROUP_NEXT_ELEMENT (s))
     334       235517 :           il_b = get_later_stmt (il_b, s);
     335              :       else /* DR_IS_READ */
     336        42771 :         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
     337        33478 :              s = DR_GROUP_NEXT_ELEMENT (s))
     338        33478 :           if (get_later_stmt (il_b, s) == il_b)
     339          327 :             il_b = s;
     340              :     }
     341              :   else
     342              :     il_b = stmtinfo_b;
     343        54181 :   bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
     344        54181 :   return (get_later_stmt (il_a, il_b) == il_a) == a_after_b;
     345              : }
     346              : 
     347              : /* A subroutine of vect_analyze_data_ref_dependence.  Handle
     348              :    DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
     349              :    distances.  These distances are conservatively correct but they don't
     350              :    reflect a guaranteed dependence.
     351              : 
     352              :    Return true if this function does all the work necessary to avoid
     353              :    an alias or false if the caller should use the dependence distances
     354              :    to limit the vectorization factor in the usual way.  LOOP_DEPTH is
     355              :    the depth of the loop described by LOOP_VINFO and the other arguments
     356              :    are as for vect_analyze_data_ref_dependence.  */
     357              : 
     358              : static bool
     359         8308 : vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
     360              :                                        loop_vec_info loop_vinfo,
     361              :                                        int loop_depth, unsigned int *max_vf)
     362              : {
     363         8308 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     364        33250 :   for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr))
     365              :     {
     366        16351 :       int dist = dist_v[loop_depth];
     367        16351 :       if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
     368              :         {
     369              :           /* If the user asserted safelen >= DIST consecutive iterations
     370              :              can be executed concurrently, assume independence.
     371              : 
     372              :              ??? An alternative would be to add the alias check even
     373              :              in this case, and vectorize the fallback loop with the
     374              :              maximum VF set to safelen.  However, if the user has
     375              :              explicitly given a length, it's less likely that that
     376              :              would be a win.  */
     377         8057 :           if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
     378              :             {
     379           32 :               if ((unsigned int) loop->safelen < *max_vf)
     380            2 :                 *max_vf = loop->safelen;
     381           32 :               LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
     382           32 :               continue;
     383              :             }
     384              : 
     385              :           /* For dependence distances of 2 or more, we have the option
     386              :              of limiting VF or checking for an alias at runtime.
     387              :              Prefer to check at runtime if we can, to avoid limiting
     388              :              the VF unnecessarily when the bases are in fact independent.
     389              : 
     390              :              Note that the alias checks will be removed if the VF ends up
     391              :              being small enough.  */
     392         8025 :           dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
     393         8025 :           dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
     394         8025 :           return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
     395         8025 :                   && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
     396        16058 :                   && vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
     397              :         }
     398              :     }
     399              :   return true;
     400              : }
     401              : 
     402              : 
     403              : /* Function vect_analyze_data_ref_dependence.
     404              : 
     405              :    FIXME: I needed to change the sense of the returned flag.
     406              : 
     407              :    Return FALSE if there (might) exist a dependence between a memory-reference
     408              :    DRA and a memory-reference DRB.  When versioning for alias may check a
     409              :    dependence at run-time, return TRUE.  Adjust *MAX_VF according to
     410              :    the data dependence.  */
     411              : 
     412              : static opt_result
     413      1486752 : vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
     414              :                                   loop_vec_info loop_vinfo,
     415              :                                   unsigned int *max_vf)
     416              : {
     417      1486752 :   unsigned int i;
     418      1486752 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     419      1486752 :   struct data_reference *dra = DDR_A (ddr);
     420      1486752 :   struct data_reference *drb = DDR_B (ddr);
     421      1486752 :   dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
     422      1486752 :   dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
     423      1486752 :   stmt_vec_info stmtinfo_a = dr_info_a->stmt;
     424      1486752 :   stmt_vec_info stmtinfo_b = dr_info_b->stmt;
     425      1486752 :   lambda_vector dist_v;
     426      1486752 :   unsigned int loop_depth;
     427              : 
     428              :   /* If user asserted safelen consecutive iterations can be
     429              :      executed concurrently, assume independence.  */
     430      1666574 :   auto apply_safelen = [&]()
     431              :     {
     432       179822 :       if (loop->safelen >= 2)
     433              :         {
     434         7464 :           if ((unsigned int) loop->safelen < *max_vf)
     435         1896 :             *max_vf = loop->safelen;
     436         7464 :           LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
     437         7464 :           return true;
     438              :         }
     439              :       return false;
     440      1486752 :     };
     441              : 
     442              :   /* In loop analysis all data references should be vectorizable.  */
     443      1486752 :   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
     444      1486752 :       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
     445            0 :     gcc_unreachable ();
     446              : 
     447              :   /* Independent data accesses.  */
     448      1486752 :   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
     449      1220527 :     return opt_result::success ();
     450              : 
     451       266225 :   if (dra == drb
     452       266225 :       || (DR_IS_READ (dra) && DR_IS_READ (drb)))
     453            0 :     return opt_result::success ();
     454              : 
     455              :   /* We do not have to consider dependences between accesses that belong
     456              :      to the same group, unless the stride could be smaller than the
     457              :      group size.  */
     458       266225 :   if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
     459       115282 :       && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
     460       115282 :           == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
     461       284808 :       && !STMT_VINFO_STRIDED_P (stmtinfo_a))
     462         2281 :     return opt_result::success ();
     463              : 
     464              :   /* Even if we have an anti-dependence then, as the vectorized loop covers at
     465              :      least two scalar iterations, there is always also a true dependence.
     466              :      As the vectorizer does not re-order loads and stores we can ignore
     467              :      the anti-dependence if TBAA can disambiguate both DRs similar to the
     468              :      case with known negative distance anti-dependences (positive
     469              :      distance anti-dependences would violate TBAA constraints).  */
     470       132015 :   if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
     471       131929 :        || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
     472       414504 :       && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
     473              :                                  get_alias_set (DR_REF (drb))))
     474         6274 :     return opt_result::success ();
     475              : 
     476       257670 :   if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
     477       247737 :       || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
     478              :     {
     479        12608 :       if (apply_safelen ())
     480         1398 :         return opt_result::success ();
     481              : 
     482        11210 :       return opt_result::failure_at
     483        11210 :         (stmtinfo_a->stmt,
     484              :          "possible alias involving gather/scatter between %T and %T\n",
     485              :          DR_REF (dra), DR_REF (drb));
     486              :     }
     487              : 
     488              :   /* Unknown data dependence.  */
     489       245062 :   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
     490              :     {
     491       166653 :       if (apply_safelen ())
     492         6066 :         return opt_result::success ();
     493              : 
     494       160587 :       if (dump_enabled_p ())
     495         7665 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
     496              :                          "versioning for alias required: "
     497              :                          "can't determine dependence between %T and %T\n",
     498              :                          DR_REF (dra), DR_REF (drb));
     499              : 
     500              :       /* Add to list of ddrs that need to be tested at run-time.  */
     501       160587 :       return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
     502              :     }
     503              : 
     504              :   /* Known data dependence.  */
     505        78409 :   if (DDR_NUM_DIST_VECTS (ddr) == 0)
     506              :     {
     507          561 :       if (apply_safelen ())
     508            0 :         return opt_result::success ();
     509              : 
     510          561 :       if (dump_enabled_p ())
     511          156 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
     512              :                          "versioning for alias required: "
     513              :                          "bad dist vector for %T and %T\n",
     514              :                          DR_REF (dra), DR_REF (drb));
     515              :       /* Add to list of ddrs that need to be tested at run-time.  */
     516          561 :       return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
     517              :     }
     518              : 
     519        77848 :   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
     520              : 
     521        77848 :   if (DDR_COULD_BE_INDEPENDENT_P (ddr)
     522        77848 :       && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
     523              :                                                 loop_depth, max_vf))
     524         8300 :     return opt_result::success ();
     525              : 
     526       132693 :   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
     527              :     {
     528        69570 :       int dist = dist_v[loop_depth];
     529              : 
     530        69570 :       if (dump_enabled_p ())
     531         4361 :         dump_printf_loc (MSG_NOTE, vect_location,
     532              :                          "dependence distance  = %d.\n", dist);
     533              : 
     534        69570 :       if (dist == 0)
     535              :         {
     536        58139 :           if (dump_enabled_p ())
     537         3567 :             dump_printf_loc (MSG_NOTE, vect_location,
     538              :                              "dependence distance == 0 between %T and %T\n",
     539              :                              DR_REF (dra), DR_REF (drb));
     540              : 
     541              :           /* When we perform grouped accesses and perform implicit CSE
     542              :              by detecting equal accesses and doing disambiguation with
     543              :              runtime alias tests like for
     544              :                 .. = a[i];
     545              :                 .. = a[i+1];
     546              :                 a[i] = ..;
     547              :                 a[i+1] = ..;
     548              :                 *p = ..;
     549              :                 .. = a[i];
     550              :                 .. = a[i+1];
     551              :              where we will end up loading { a[i], a[i+1] } once, make
     552              :              sure that inserting group loads before the first load and
     553              :              stores after the last store will do the right thing.
     554              :              Similar for groups like
     555              :                 a[i] = ...;
     556              :                 ... = a[i];
     557              :                 a[i+1] = ...;
     558              :              where loads from the group interleave with the store.  */
     559        58139 :           if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
     560            0 :             return opt_result::failure_at (stmtinfo_a->stmt,
     561              :                                            "READ_WRITE dependence"
     562              :                                            " in interleaving.\n");
     563              : 
     564        58139 :           if (loop->safelen < 2)
     565              :             {
     566        54258 :               tree indicator = dr_zero_step_indicator (dra);
     567        54258 :               if (!indicator || integer_zerop (indicator))
     568            0 :                 return opt_result::failure_at (stmtinfo_a->stmt,
     569              :                                                "access also has a zero step\n");
     570        54258 :               else if (TREE_CODE (indicator) != INTEGER_CST)
     571         1528 :                 vect_check_nonzero_value (loop_vinfo, indicator);
     572              :             }
     573        58139 :           continue;
     574        58139 :         }
     575              : 
     576        11431 :       if (dist > 0 && DDR_REVERSED_P (ddr))
     577              :         {
     578              :           /* If DDR_REVERSED_P the order of the data-refs in DDR was
     579              :              reversed (to make distance vector positive), and the actual
     580              :              distance is negative.  */
     581         3912 :           if (dump_enabled_p ())
     582          105 :             dump_printf_loc (MSG_NOTE, vect_location,
     583              :                              "dependence distance negative.\n");
     584              :           /* When doing outer loop vectorization, we need to check if there is
     585              :              a backward dependence at the inner loop level if the dependence
     586              :              at the outer loop is reversed.  See PR81740.  */
     587         3912 :           if (nested_in_vect_loop_p (loop, stmtinfo_a)
     588         3900 :               || nested_in_vect_loop_p (loop, stmtinfo_b))
     589              :             {
     590           12 :               unsigned inner_depth = index_in_loop_nest (loop->inner->num,
     591           12 :                                                          DDR_LOOP_NEST (ddr));
     592           12 :               if (dist_v[inner_depth] < 0)
     593            9 :                 return opt_result::failure_at (stmtinfo_a->stmt,
     594              :                                                "not vectorized, dependence "
     595              :                                                "between data-refs %T and %T\n",
     596              :                                                DR_REF (dra), DR_REF (drb));
     597              :             }
     598              :           /* Record a negative dependence distance to later limit the
     599              :              amount of stmt copying / unrolling we can perform.
     600              :              Only need to handle read-after-write dependence.  */
     601         3903 :           if (DR_IS_READ (drb)
     602          156 :               && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
     603           36 :                   || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
     604          156 :             STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
     605         3903 :           continue;
     606         3903 :         }
     607              : 
     608         7519 :       unsigned int abs_dist = abs (dist);
     609         7519 :       if (abs_dist >= 2 && abs_dist < *max_vf)
     610              :         {
     611              :           /* The dependence distance requires reduction of the maximal
     612              :              vectorization factor.  */
     613          558 :           *max_vf = abs_dist;
     614          558 :           if (dump_enabled_p ())
     615           30 :             dump_printf_loc (MSG_NOTE, vect_location,
     616              :                              "adjusting maximal vectorization factor to %i\n",
     617              :                              *max_vf);
     618              :         }
     619              : 
     620         7519 :       if (abs_dist >= *max_vf)
     621              :         {
     622              :           /* Dependence distance does not create dependence, as far as
     623              :              vectorization is concerned, in this case.  */
     624         1103 :           if (dump_enabled_p ())
     625          437 :             dump_printf_loc (MSG_NOTE, vect_location,
     626              :                              "dependence distance >= VF.\n");
     627         1103 :           continue;
     628              :         }
     629              : 
     630         6416 :       return opt_result::failure_at (stmtinfo_a->stmt,
     631              :                                      "not vectorized, possible dependence "
     632              :                                      "between data-refs %T and %T\n",
     633              :                                      DR_REF (dra), DR_REF (drb));
     634              :     }
     635              : 
     636        63123 :   return opt_result::success ();
     637              : }
     638              : 
     639              : /* Function vect_analyze_early_break_dependences.
     640              : 
     641              :    Examine all the data references in the loop and make sure that if we have
     642              :    multiple exits that we are able to safely move stores such that they become
     643              :    safe for vectorization.  The function also calculates the place where to move
     644              :    the instructions to and computes what the new vUSE chain should be.
     645              : 
     646              :    This works in tandem with the CFG that will be produced by
     647              :    slpeel_tree_duplicate_loop_to_edge_cfg later on.
     648              : 
     649              :    This function tries to validate whether an early break vectorization
     650              :    is possible for the current instruction sequence. Returns True i
     651              :    possible, otherwise False.
     652              : 
     653              :    Requirements:
     654              :      - Any memory access must be to a fixed size buffer.
     655              :      - There must not be any loads and stores to the same object.
     656              :      - Multiple loads are allowed as long as they don't alias.
     657              : 
     658              :    NOTE:
     659              :      This implementation is very conservative. Any overlapping loads/stores
     660              :      that take place before the early break statement gets rejected aside from
     661              :      WAR dependencies.
     662              : 
     663              :      i.e.:
     664              : 
     665              :         a[i] = 8
     666              :         c = a[i]
     667              :         if (b[i])
     668              :           ...
     669              : 
     670              :         is not allowed, but
     671              : 
     672              :         c = a[i]
     673              :         a[i] = 8
     674              :         if (b[i])
     675              :           ...
     676              : 
     677              :         is which is the common case.  */
     678              : 
     679              : static opt_result
     680       141676 : vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
     681              : {
     682       141676 :   DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences");
     683              : 
     684              :   /* List of all load data references found during traversal.  */
     685       141676 :   auto_vec<data_reference *> bases;
     686       141676 :   basic_block dest_bb = NULL;
     687              : 
     688       141676 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     689       141676 :   class loop *loop_nest = loop_outer (loop);
     690              : 
     691       141676 :   if (dump_enabled_p ())
     692         1582 :     dump_printf_loc (MSG_NOTE, vect_location,
     693              :                      "loop contains multiple exits, analyzing"
     694              :                      " statement dependencies.\n");
     695              : 
     696       141676 :   if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
     697        25518 :     if (dump_enabled_p ())
     698          286 :       dump_printf_loc (MSG_NOTE, vect_location,
     699              :                        "alternate exit has been chosen as main exit.\n");
     700              : 
     701              :   /* Since we don't support general control flow, the location we'll move the
     702              :      side-effects to is always the latch connected exit.  When we support
     703              :      general control flow we can do better but for now this is fine.  Move
     704              :      side-effects to the in-loop destination of the last early exit.  For the
     705              :      PEELED case we move the side-effects to the latch block as this is
     706              :      guaranteed to be the last block to be executed when a vector iteration
     707              :      finished.  */
     708       141676 :   if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
     709        25518 :     dest_bb = loop->latch;
     710              :   else
     711       116158 :     dest_bb = single_pred (loop->latch);
     712              : 
     713              :   /* We start looking from dest_bb, for the non-PEELED case we don't want to
     714              :      move any stores already present, but we do want to read and validate the
     715              :      loads.  */
     716       141676 :   basic_block bb = dest_bb;
     717              : 
     718              :   /* We move stores across all loads to the beginning of dest_bb, so
     719              :      the first block processed below doesn't need dependence checking.  */
     720       141676 :   bool check_deps = false;
     721              : 
     722       511836 :   do
     723              :     {
     724       326756 :       gimple_stmt_iterator gsi = gsi_last_bb (bb);
     725              : 
     726              :       /* Now analyze all the remaining statements and try to determine which
     727              :          instructions are allowed/needed to be moved.  */
     728      2435105 :       while (!gsi_end_p (gsi))
     729              :         {
     730      2114315 :           gimple *stmt = gsi_stmt (gsi);
     731      2114315 :           gsi_prev (&gsi);
     732      2114315 :           if (is_gimple_debug (stmt))
     733      1866158 :             continue;
     734              : 
     735      1112324 :           stmt_vec_info orig_stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
     736      1112324 :           stmt_vec_info stmt_vinfo
     737      1112324 :             = vect_stmt_to_vectorize (orig_stmt_vinfo);
     738      1112324 :           auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
     739      1112324 :           if (!dr_ref)
     740              :             {
     741              :               /* Trapping statements after the last early exit are fine.  */
     742       858435 :               if (check_deps)
     743              :                 {
     744       520135 :                   bool could_trap_p = false;
     745       520135 :                   gimple *cur_stmt = STMT_VINFO_STMT (stmt_vinfo);
     746       520135 :                   could_trap_p = gimple_could_trap_p (cur_stmt);
     747       520135 :                   if (STMT_VINFO_IN_PATTERN_P (orig_stmt_vinfo))
     748              :                     {
     749       192456 :                       gimple_stmt_iterator gsi2;
     750       192456 :                       auto stmt_seq
     751       192456 :                         = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_vinfo);
     752       192456 :                       for (gsi2 = gsi_start (stmt_seq);
     753       388190 :                            !could_trap_p && !gsi_end_p (gsi2); gsi_next (&gsi2))
     754              :                         {
     755       195734 :                           cur_stmt = gsi_stmt (gsi2);
     756       195734 :                           could_trap_p = gimple_could_trap_p (cur_stmt);
     757              :                         }
     758              :                     }
     759              : 
     760       520135 :                   if (could_trap_p)
     761              :                     {
     762         5424 :                       if (dump_enabled_p ())
     763          150 :                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     764              :                              "cannot vectorize as operation may trap.\n");
     765         5424 :                       return opt_result::failure_at (cur_stmt,
     766              :                              "can't safely apply code motion to dependencies"
     767              :                              " to vectorize the early exit. %G may trap.\n",
     768              :                              cur_stmt);
     769              :                     }
     770              :                 }
     771              : 
     772       853011 :               continue;
     773       853011 :             }
     774              : 
     775              :           /* We know everything below dest_bb is safe since we know we
     776              :              had a full vector iteration when reaching it.  Either by
     777              :              the loop entry / IV exit test being last or because this
     778              :              is the loop latch itself.  */
     779       253889 :           if (!check_deps)
     780        11156 :             continue;
     781              : 
     782              :           /* Check if vector accesses to the object will be within bounds.
     783              :              must be a constant or assume loop will be versioned or niters
     784              :              bounded by VF so accesses are within range.  We only need to check
     785              :              the reads since writes are moved to a safe place where if we get
     786              :              there we know they are safe to perform.  */
     787       242733 :           if (DR_IS_READ (dr_ref))
     788              :             {
     789       226926 :               dr_set_safe_speculative_read_required (stmt_vinfo, true);
     790       226926 :               bool inbounds = ref_within_array_bound (stmt, DR_REF (dr_ref));
     791       226926 :               DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_vinfo)) = inbounds;
     792              : 
     793       226926 :               if (dump_enabled_p ())
     794         2457 :                 dump_printf_loc (MSG_NOTE, vect_location,
     795              :                                  "marking DR (read) as possibly needing peeling "
     796              :                                  "for alignment at %G", stmt);
     797              :             }
     798              : 
     799       242733 :           if (DR_IS_READ (dr_ref))
     800       226926 :             bases.safe_push (dr_ref);
     801        15807 :           else if (DR_IS_WRITE (dr_ref))
     802              :             {
     803              :               /* We are moving writes down in the CFG.  To be sure that this
     804              :                  is valid after vectorization we have to check all the loads
     805              :                  we are sinking the stores past to see if any of them may
     806              :                  alias or are the same object.
     807              : 
     808              :                  Same objects will not be an issue because unless the store
     809              :                  is marked volatile the value can be forwarded.  If the
     810              :                  store is marked volatile we don't vectorize the loop
     811              :                  anyway.
     812              : 
     813              :                  That leaves the check for aliasing.  We don't really need
     814              :                  to care about the stores aliasing with each other since the
     815              :                  stores are moved in order so the effects are still observed
     816              :                  correctly.  This leaves the check for WAR dependencies
     817              :                  which we would be introducing here if the DR can alias.
     818              :                  The check is quadratic in loads/stores but I have not found
     819              :                  a better API to do this.  I believe all loads and stores
     820              :                  must be checked.  We also must check them when we
     821              :                  encountered the store, since we don't care about loads past
     822              :                  the store.  */
     823              : 
     824        49035 :               for (auto dr_read : bases)
     825        15466 :                 if (dr_may_alias_p (dr_ref, dr_read, loop_nest))
     826              :                   {
     827          542 :                     if (dump_enabled_p ())
     828            4 :                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
     829              :                                        vect_location,
     830              :                                        "early breaks not supported: "
     831              :                                        "overlapping loads and stores "
     832              :                                        "found before the break "
     833              :                                        "statement.\n");
     834              : 
     835          542 :                     return opt_result::failure_at (stmt,
     836              :                              "can't safely apply code motion to dependencies"
     837              :                              " to vectorize the early exit. %G may alias with"
     838              :                              " %G\n", stmt, dr_read->stmt);
     839              :                   }
     840              :             }
     841              : 
     842       484382 :           if (gimple_vdef (stmt))
     843              :             {
     844        15265 :               if (dump_enabled_p ())
     845          282 :                 dump_printf_loc (MSG_NOTE, vect_location,
     846              :                                  "==> recording stmt %G", stmt);
     847              : 
     848        15265 :               LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (stmt);
     849              :             }
     850       696043 :           else if (gimple_vuse (stmt))
     851              :             {
     852       226926 :               LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_insert (0, stmt);
     853       226926 :               if (dump_enabled_p ())
     854         2457 :                 dump_printf_loc (MSG_NOTE, vect_location,
     855              :                                  "marked statement for vUSE update: %G", stmt);
     856              :             }
     857              :         }
     858              : 
     859       320790 :       if (!single_pred_p (bb))
     860              :         {
     861       135710 :           gcc_assert (bb == loop->header);
     862       135710 :           break;
     863              :         }
     864              : 
     865              :       /* If we possibly sink through a virtual PHI make sure to elide that.  */
     866       185080 :       if (gphi *vphi = get_virtual_phi (bb))
     867          107 :         LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (vphi);
     868              : 
     869              :       /* All earlier blocks need dependence checking.  */
     870       185080 :       check_deps = true;
     871       185080 :       bb = single_pred (bb);
     872       185080 :     }
     873              :   while (1);
     874              : 
     875              :   /* We don't allow outer -> inner loop transitions which should have been
     876              :      trapped already during loop form analysis.  */
     877       135710 :   gcc_assert (dest_bb->loop_father == loop);
     878              : 
     879              :   /* Check that the destination block we picked has only one pred.  To relax this we
     880              :      have to take special care when moving the statements.  We don't currently support
     881              :      such control flow however this check is there to simplify how we handle
     882              :      labels that may be present anywhere in the IL.  This check is to ensure that the
     883              :      labels aren't significant for the CFG.  */
     884       135710 :   if (!single_pred (dest_bb))
     885            0 :     return opt_result::failure_at (vect_location,
     886              :                              "chosen loop exit block (BB %d) does not have a "
     887              :                              "single predecessor which is currently not "
     888              :                              "supported for early break vectorization.\n",
     889              :                              dest_bb->index);
     890              : 
     891       135710 :   LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
     892              :   /* Check if loop has a side-effect (stores), force scalar epilogue.  */
     893       612980 :   for (auto dr : LOOP_VINFO_DATAREFS (loop_vinfo))
     894       232420 :     if (DR_IS_WRITE (dr))
     895              :       {
     896        13040 :         LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true;
     897        13040 :         break;
     898              :       }
     899              : 
     900       135710 :   if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
     901              :     {
     902              :       /* All uses shall be updated to that of the first load.  Entries are
     903              :          stored in reverse order.  */
     904       125186 :       tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).last ());
     905       350869 :       for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
     906              :         {
     907       225683 :           if (dump_enabled_p ())
     908         2394 :           dump_printf_loc (MSG_NOTE, vect_location,
     909              :                            "will update use: %T, mem_ref: %G", vuse, g);
     910              :         }
     911              :     }
     912              : 
     913       135710 :   if (dump_enabled_p ())
     914         1428 :     dump_printf_loc (MSG_NOTE, vect_location,
     915              :                      "recorded statements to be moved to BB %d\n",
     916         1428 :                      LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
     917              : 
     918       135710 :   return opt_result::success ();
     919       141676 : }
     920              : 
     921              : /* Function vect_analyze_data_ref_dependences.
     922              : 
     923              :    Examine all the data references in the loop, and make sure there do not
     924              :    exist any data dependences between them.  Set *MAX_VF according to
     925              :    the maximum vectorization factor the data dependences allow.  */
     926              : 
     927              : opt_result
     928       387406 : vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
     929              :                                    unsigned int *max_vf)
     930              : {
     931       387406 :   unsigned int i;
     932       387406 :   struct data_dependence_relation *ddr;
     933              : 
     934       387406 :   DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
     935              : 
     936       387406 :   if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
     937              :     {
     938       161121 :       LOOP_VINFO_DDRS (loop_vinfo)
     939       161121 :         .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
     940       161121 :                  * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
     941              :       /* We do not need read-read dependences.  */
     942       322242 :       bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
     943              :                                           &LOOP_VINFO_DDRS (loop_vinfo),
     944       161121 :                                           LOOP_VINFO_LOOP_NEST (loop_vinfo),
     945              :                                           false);
     946       161121 :       gcc_assert (res);
     947              :     }
     948              : 
     949       387406 :   LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
     950              : 
     951              :   /* For epilogues we either have no aliases or alias versioning
     952              :      was applied to original loop.  Therefore we may just get max_vf
     953              :      using VF of original loop.  */
     954       387406 :   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
     955        12570 :     *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
     956              :   else
     957      1843764 :     FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
     958              :       {
     959      1486752 :         opt_result res
     960      1486752 :           = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
     961      1486752 :         if (!res)
     962        17824 :           return res;
     963              :       }
     964              : 
     965              :   /* If we have early break statements in the loop, check to see if they
     966              :      are of a form we can vectorizer.  */
     967       369582 :   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
     968       141676 :     return vect_analyze_early_break_dependences (loop_vinfo);
     969              : 
     970       227906 :   return opt_result::success ();
     971              : }
     972              : 
     973              : 
     974              : /* Function vect_slp_analyze_data_ref_dependence.
     975              : 
     976              :    Classify the dependence between the memory-references DRA and DRB of DDR
     977              :    for VINFO using the classical (affine) data-dependence test.  Return
     978              :    chrec_known if they are provably independent, chrec_dont_know if the test
     979              :    cannot analyze them (in which case the caller can still try to disambiguate
     980              :    them with the alias oracle), and the dependence (NULL_TREE) otherwise.  */
     981              : 
     982              : static tree
     983      6887601 : vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
     984              :                                       struct data_dependence_relation *ddr)
     985              : {
     986      6887601 :   struct data_reference *dra = DDR_A (ddr);
     987      6887601 :   struct data_reference *drb = DDR_B (ddr);
     988      6887601 :   dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
     989      6887601 :   dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
     990              : 
     991              :   /* We need to check dependences of statements marked as unvectorizable
     992              :      as well, they still can prohibit vectorization.  */
     993              : 
     994              :   /* Independent data accesses.  */
     995      6887601 :   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
     996              :     return chrec_known;
     997              : 
     998      1104601 :   if (dra == drb)
     999              :     return chrec_known;
    1000              : 
    1001              :   /* Read-read is OK.  */
    1002         8832 :   if (DR_IS_READ (dra) && DR_IS_READ (drb))
    1003              :     return chrec_known;
    1004              : 
    1005              :   /* If dra and drb are part of the same interleaving chain consider
    1006              :      them independent.  */
    1007         8832 :   if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
    1008         8832 :       && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
    1009         8832 :           == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
    1010              :     return chrec_known;
    1011              : 
    1012              :   /* Unknown data dependence.  */
    1013         8832 :   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
    1014              :     {
    1015         8832 :       if  (dump_enabled_p ())
    1016           60 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1017              :                          "can't determine dependence between %T and %T\n",
    1018              :                          DR_REF (dra), DR_REF (drb));
    1019              :     }
    1020            0 :   else if (dump_enabled_p ())
    1021            0 :     dump_printf_loc (MSG_NOTE, vect_location,
    1022              :                      "determined dependence between %T and %T\n",
    1023              :                      DR_REF (dra), DR_REF (drb));
    1024              : 
    1025         8832 :   return DDR_ARE_DEPENDENT (ddr);
    1026              : }
    1027              : 
    1028              : 
    1029              : /* Analyze dependences involved in the transform of a store SLP NODE.  */
    1030              : 
    1031              : static bool
    1032       660867 : vect_slp_analyze_store_dependences (vec_info *vinfo, slp_tree node)
    1033              : {
    1034              :   /* This walks over all stmts involved in the SLP store done
    1035              :      in NODE verifying we can sink them up to the last stmt in the
    1036              :      group.  */
    1037       660867 :   stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
    1038       660867 :   gcc_assert (DR_IS_WRITE (STMT_VINFO_DATA_REF (last_access_info)));
    1039              : 
    1040      2401446 :   for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
    1041              :     {
    1042      1749021 :       stmt_vec_info access_info
    1043      1749021 :         = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
    1044      1749021 :       if (access_info == last_access_info)
    1045       653252 :         continue;
    1046      1095769 :       data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
    1047      1095769 :       ao_ref ref;
    1048      1095769 :       bool ref_initialized_p = false;
    1049      1095769 :       for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
    1050     10573169 :            gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
    1051              :         {
    1052      9485842 :           gimple *stmt = gsi_stmt (gsi);
    1053     16836345 :           if (! gimple_vuse (stmt))
    1054      2597915 :             continue;
    1055              : 
    1056              :           /* If we couldn't record a (single) data reference for this stmt,
    1057              :              or the classical dependence test cannot analyze it, we have to
    1058              :              resort to the alias oracle.  */
    1059      6887927 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
    1060      6887927 :           data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
    1061      6887927 :           if (dr_b)
    1062              :             {
    1063      6887387 :               gcc_assert (!gimple_visited_p (stmt));
    1064              : 
    1065      6887387 :               ddr_p ddr = initialize_data_dependence_relation (dr_a,
    1066      6887387 :                                                                dr_b, vNULL);
    1067      6887387 :               tree dep = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    1068      6887387 :               free_dependence_relation (ddr);
    1069      6887387 :               if (dep == chrec_known)
    1070      6878591 :                 continue;
    1071         8796 :               if (dep != chrec_dont_know)
    1072         8442 :                 return false;
    1073              :               /* Unknown dependence - fall through to the alias oracle.  */
    1074              :             }
    1075              : 
    1076              :           /* We are moving a store - this means we cannot use TBAA for
    1077              :              disambiguation.  */
    1078         9336 :           if (!ref_initialized_p)
    1079              :             {
    1080         9075 :               ao_ref_init (&ref, DR_REF (dr_a));
    1081         9075 :               ref_initialized_p = true;
    1082              :             }
    1083         9336 :           if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
    1084         9336 :               || ref_maybe_used_by_stmt_p (stmt, &ref, false))
    1085         8442 :             return false;
    1086              :         }
    1087              :     }
    1088              :   return true;
    1089              : }
    1090              : 
    1091              : /* Analyze dependences involved in the transform of a load SLP NODE.  STORES
    1092              :    contain the vector of scalar stores of this instance if we are
    1093              :    disambiguating the loads.  */
    1094              : 
    1095              : static bool
    1096       152158 : vect_slp_analyze_load_dependences (vec_info *vinfo, slp_tree node,
    1097              :                                    vec<stmt_vec_info> stores,
    1098              :                                    stmt_vec_info last_store_info)
    1099              : {
    1100              :   /* This walks over all stmts involved in the SLP load done
    1101              :      in NODE verifying we can hoist them up to the first stmt in the
    1102              :      group.  */
    1103       152158 :   stmt_vec_info first_access_info = vect_find_first_scalar_stmt_in_slp (node);
    1104       152158 :   gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (first_access_info)));
    1105              : 
    1106       536010 :   for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
    1107              :     {
    1108       383888 :       if (! SLP_TREE_SCALAR_STMTS (node)[k])
    1109       158585 :         continue;
    1110       383888 :       stmt_vec_info access_info
    1111       383888 :         = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
    1112       383888 :       if (access_info == first_access_info)
    1113       158585 :         continue;
    1114       225303 :       data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
    1115       225303 :       ao_ref ref;
    1116       225303 :       bool ref_initialized_p = false;
    1117       225303 :       hash_set<stmt_vec_info> grp_visited;
    1118       225303 :       for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
    1119      4348181 :            gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi))
    1120              :         {
    1121      2061475 :           gimple *stmt = gsi_stmt (gsi);
    1122      3358625 :           if (! gimple_vdef (stmt))
    1123      2005767 :             continue;
    1124              : 
    1125       280497 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
    1126              : 
    1127              :           /* If we run into a store of this same instance (we've just
    1128              :              marked those) then delay dependence checking until we run
    1129              :              into the last store because this is where it will have
    1130              :              been sunk to (and we verified that we can do that already).  */
    1131       280497 :           if (gimple_visited_p (stmt))
    1132              :             {
    1133       224789 :               if (stmt_info != last_store_info)
    1134       224787 :                 continue;
    1135              : 
    1136           10 :               for (stmt_vec_info &store_info : stores)
    1137              :                 {
    1138            4 :                   data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
    1139            4 :                   ddr_p ddr = initialize_data_dependence_relation
    1140            4 :                                 (dr_a, store_dr, vNULL);
    1141            4 :                   tree dep
    1142            4 :                     = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    1143            4 :                   free_dependence_relation (ddr);
    1144            4 :                   if (dep == chrec_known)
    1145            4 :                     continue;
    1146            0 :                   if (dep != chrec_dont_know)
    1147           36 :                     return false;
    1148              :                   /* The classical dependence test cannot analyze this;
    1149              :                      resort to the alias oracle.  We are hoisting a load
    1150              :                      so TBAA may be used for disambiguation.  */
    1151            0 :                   if (!ref_initialized_p)
    1152              :                     {
    1153            0 :                       ao_ref_init (&ref, DR_REF (dr_a));
    1154            0 :                       ref_initialized_p = true;
    1155              :                     }
    1156            0 :                   if (stmt_may_clobber_ref_p_1 (store_info->stmt, &ref, true))
    1157              :                     return false;
    1158              :                 }
    1159            2 :               continue;
    1160            2 :             }
    1161              : 
    1162       114327 :           auto check_hoist = [&] (stmt_vec_info stmt_info) -> bool
    1163              :             {
    1164              :               /* We are hoisting a load - this means we can use TBAA for
    1165              :                  disambiguation.  */
    1166        58619 :               if (!ref_initialized_p)
    1167              :                 {
    1168        10972 :                   ao_ref_init (&ref, DR_REF (dr_a));
    1169        10972 :                   ref_initialized_p = true;
    1170              :                 }
    1171        58619 :               if (stmt_may_clobber_ref_p_1 (stmt_info->stmt, &ref, true))
    1172              :                 {
    1173              :                   /* If we couldn't record a (single) data reference for this
    1174              :                      stmt we have to give up now.  */
    1175          210 :                   data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
    1176          210 :                   if (!dr_b)
    1177              :                     return false;
    1178          210 :                   ddr_p ddr = initialize_data_dependence_relation (dr_a,
    1179          210 :                                                                    dr_b, vNULL);
    1180          210 :                   tree dep
    1181          210 :                     = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    1182          210 :                   free_dependence_relation (ddr);
    1183              :                   /* The alias oracle above could not rule out a conflict;
    1184              :                      only a proven-independent (chrec_known) result lets us
    1185              :                      hoist the load past this store.  */
    1186          210 :                   if (dep != chrec_known)
    1187              :                     return false;
    1188              :                 }
    1189              :               /* No dependence.  */
    1190              :               return true;
    1191        55708 :             };
    1192        55708 :           if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    1193              :             {
    1194              :               /* When we run into a store group we have to honor
    1195              :                  that earlier stores might be moved here.  We don't
    1196              :                  know exactly which and where to since we lack a
    1197              :                  back-mapping from DR to SLP node, so assume all
    1198              :                  earlier stores are sunk here.  It's enough to
    1199              :                  consider the last stmt of a group for this.
    1200              :                  ???  Both this and the fact that we disregard that
    1201              :                  the conflicting instance might be removed later
    1202              :                  is overly conservative.  */
    1203        55246 :               if (!grp_visited.add (DR_GROUP_FIRST_ELEMENT (stmt_info)))
    1204        10772 :                 for (auto store_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
    1205       129309 :                      store_info != NULL;
    1206       118537 :                      store_info = DR_GROUP_NEXT_ELEMENT (store_info))
    1207       118573 :                   if ((store_info == stmt_info
    1208       107810 :                        || get_later_stmt (store_info, stmt_info) == stmt_info)
    1209       165967 :                       && !check_hoist (store_info))
    1210              :                     return false;
    1211              :             }
    1212              :           else
    1213              :             {
    1214          462 :               if (!check_hoist (stmt_info))
    1215              :                 return false;
    1216              :             }
    1217              :         }
    1218       225303 :     }
    1219              :   return true;
    1220              : }
    1221              : 
    1222              : 
    1223              : /* Function vect_analyze_data_ref_dependences.
    1224              : 
    1225              :    Examine all the data references in the basic-block, and make sure there
    1226              :    do not exist any data dependences between them.  Set *MAX_VF according to
    1227              :    the maximum vectorization factor the data dependences allow.  */
    1228              : 
    1229              : bool
    1230       788684 : vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
    1231              : {
    1232       788684 :   DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
    1233              : 
    1234              :   /* The stores of this instance are at the root of the SLP tree.  */
    1235       788684 :   slp_tree store = NULL;
    1236       788684 :   if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store)
    1237       660867 :     store = SLP_INSTANCE_TREE (instance);
    1238              : 
    1239              :   /* Verify we can sink stores to the vectorized stmt insert location.  */
    1240       660867 :   stmt_vec_info last_store_info = NULL;
    1241       660867 :   if (store)
    1242              :     {
    1243       660867 :       if (! vect_slp_analyze_store_dependences (vinfo, store))
    1244              :         return false;
    1245              : 
    1246              :       /* Mark stores in this instance and remember the last one.  */
    1247       652425 :       last_store_info = vect_find_last_scalar_stmt_in_slp (store);
    1248      2392147 :       for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
    1249      1739722 :         gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true);
    1250              :     }
    1251              : 
    1252       780242 :   bool res = true;
    1253              : 
    1254              :   /* Verify we can sink loads to the vectorized stmt insert location,
    1255              :      special-casing stores of this instance.  */
    1256      1175798 :   for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
    1257       152158 :     if (! vect_slp_analyze_load_dependences (vinfo, load,
    1258              :                                              store
    1259              :                                              ? SLP_TREE_SCALAR_STMTS (store)
    1260              :                                              : vNULL, last_store_info))
    1261              :       {
    1262              :         res = false;
    1263              :         break;
    1264              :       }
    1265              : 
    1266              :   /* Unset the visited flag.  */
    1267       780242 :   if (store)
    1268      2392147 :     for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
    1269      1739722 :       gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
    1270              : 
    1271              :   /* If this is a SLP instance with a store check if there's a dependent
    1272              :      load that cannot be forwarded from a previous iteration of a loop
    1273              :      both are in.  This is to avoid situations like that in PR115777.  */
    1274       780242 :   if (res && store)
    1275              :     {
    1276       652401 :       stmt_vec_info store_info
    1277       652401 :         = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (store)[0]);
    1278       652401 :       class loop *store_loop = gimple_bb (store_info->stmt)->loop_father;
    1279       652401 :       if (! loop_outer (store_loop))
    1280       557164 :         return res;
    1281        95237 :       vec<loop_p> loop_nest;
    1282        95237 :       loop_nest.create (1);
    1283        95237 :       loop_nest.quick_push (store_loop);
    1284        95237 :       data_reference *drs = nullptr;
    1285       177730 :       for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
    1286              :         {
    1287        36627 :           if (! STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (load)[0]))
    1288            0 :             continue;
    1289        36627 :           stmt_vec_info load_info
    1290        36627 :             = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (load)[0]);
    1291        36627 :           if (gimple_bb (load_info->stmt)->loop_father != store_loop)
    1292         5073 :             continue;
    1293              : 
    1294              :           /* For now concern ourselves with write-after-read as we also
    1295              :              only look for re-use of the store within the same SLP instance.
    1296              :              We can still get a RAW here when the instance contains a PHI
    1297              :              with a backedge though, thus this test.  */
    1298        31554 :           if (! vect_stmt_dominates_stmt_p (STMT_VINFO_STMT (load_info),
    1299              :                                             STMT_VINFO_STMT (store_info)))
    1300        11681 :             continue;
    1301              : 
    1302        19873 :           if (! drs)
    1303              :             {
    1304        19014 :               drs = create_data_ref (loop_preheader_edge (store_loop),
    1305              :                                      store_loop,
    1306        19014 :                                      DR_REF (STMT_VINFO_DATA_REF (store_info)),
    1307              :                                      store_info->stmt, false, false);
    1308        19014 :               if (! DR_BASE_ADDRESS (drs)
    1309        16087 :                   || TREE_CODE (DR_STEP (drs)) != INTEGER_CST)
    1310              :                 break;
    1311              :             }
    1312        16669 :           data_reference *drl
    1313        16669 :             = create_data_ref (loop_preheader_edge (store_loop),
    1314              :                                store_loop,
    1315        16669 :                                DR_REF (STMT_VINFO_DATA_REF (load_info)),
    1316              :                                load_info->stmt, true, false);
    1317              : 
    1318              :           /* See whether the DRs have a known constant distance throughout
    1319              :              the containing loop iteration.  */
    1320        31651 :           if (! DR_BASE_ADDRESS (drl)
    1321        14560 :               || ! operand_equal_p (DR_STEP (drs), DR_STEP (drl))
    1322         8577 :               || ! operand_equal_p (DR_BASE_ADDRESS (drs),
    1323         8577 :                                     DR_BASE_ADDRESS (drl))
    1324        18366 :               || ! operand_equal_p (DR_OFFSET (drs), DR_OFFSET (drl)))
    1325              :             {
    1326        14982 :               free_data_ref (drl);
    1327        14982 :               continue;
    1328              :             }
    1329              : 
    1330              :           /* If the next iteration load overlaps with a non-power-of-two offset
    1331              :              we are surely failing any STLF attempt.  */
    1332         1687 :           HOST_WIDE_INT step = TREE_INT_CST_LOW (DR_STEP (drl));
    1333         1687 :           unsigned HOST_WIDE_INT sizes
    1334         1687 :             = (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drs))))
    1335         1687 :                * DR_GROUP_SIZE (store_info));
    1336         1687 :           unsigned HOST_WIDE_INT sizel
    1337         1687 :             = (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drl))))
    1338         1687 :                * DR_GROUP_SIZE (load_info));
    1339         1687 :           if (ranges_overlap_p (TREE_INT_CST_LOW (DR_INIT (drl)) + step, sizel,
    1340         1687 :                                 TREE_INT_CST_LOW (DR_INIT (drs)), sizes))
    1341              :             {
    1342          835 :               unsigned HOST_WIDE_INT dist
    1343          835 :                 = absu_hwi (TREE_INT_CST_LOW (DR_INIT (drl)) + step
    1344          835 :                             - TREE_INT_CST_LOW (DR_INIT (drs)));
    1345          835 :               poly_uint64 loadsz = tree_to_poly_uint64
    1346          835 :                                      (TYPE_SIZE_UNIT (SLP_TREE_VECTYPE (load)));
    1347          835 :               poly_uint64 storesz = tree_to_poly_uint64
    1348          835 :                                     (TYPE_SIZE_UNIT (SLP_TREE_VECTYPE (store)));
    1349              :               /* When the overlap aligns with vector sizes used for the loads
    1350              :                  and the vector stores are larger or equal to the loads
    1351              :                  forwarding should work.  */
    1352         1670 :               if (maybe_gt (loadsz, storesz) || ! multiple_p (dist, loadsz))
    1353           70 :                 load->avoid_stlf_fail = true;
    1354              :             }
    1355         1687 :           free_data_ref (drl);
    1356              :         }
    1357        95237 :       if (drs)
    1358        19014 :         free_data_ref (drs);
    1359        95237 :       loop_nest.release ();
    1360              :     }
    1361              : 
    1362              :   return res;
    1363              : }
    1364              : 
    1365              : /* Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET
    1366              :    applied.  */
    1367              : 
    1368              : int
    1369      6619782 : dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
    1370              : {
    1371      6619782 :   HOST_WIDE_INT diff = 0;
    1372              :   /* Alignment is only analyzed for the first element of a DR group,
    1373              :      use that but adjust misalignment by the offset of the access.  */
    1374      6619782 :   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
    1375              :     {
    1376      2295826 :       dr_vec_info *first_dr
    1377      2295826 :         = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
    1378              :       /* vect_analyze_data_ref_accesses guarantees that DR_INIT are
    1379              :          INTEGER_CSTs and the first element in the group has the lowest
    1380              :          address.  */
    1381      2295826 :       diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
    1382      2295826 :               - TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
    1383      2295826 :       gcc_assert (diff >= 0);
    1384              :       dr_info = first_dr;
    1385              :     }
    1386              : 
    1387      6619782 :   int misalign = dr_info->misalignment;
    1388      6619782 :   gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
    1389      6619782 :   if (misalign == DR_MISALIGNMENT_UNKNOWN)
    1390              :     return misalign;
    1391              : 
    1392              :   /* If the access is only aligned for a vector type with smaller alignment
    1393              :      requirement the access has unknown misalignment.  */
    1394      4025611 :   if (maybe_lt (dr_info->target_alignment * BITS_PER_UNIT,
    1395      4025611 :                 targetm.vectorize.preferred_vector_alignment (vectype)))
    1396              :     return DR_MISALIGNMENT_UNKNOWN;
    1397              : 
    1398              :   /* Apply the offset from the DR group start and the externally supplied
    1399              :      offset which can for example result from a negative stride access.  */
    1400      4025608 :   poly_int64 misalignment = misalign + diff + offset;
    1401              : 
    1402              :   /* Below we reject compile-time non-constant target alignments, but if
    1403              :      our misalignment is zero, then we are known to already be aligned
    1404              :      w.r.t. any such possible target alignment.  */
    1405      4025608 :   if (known_eq (misalignment, 0))
    1406              :     return 0;
    1407              : 
    1408       632133 :   unsigned HOST_WIDE_INT target_alignment_c;
    1409       632133 :   if (!dr_info->target_alignment.is_constant (&target_alignment_c)
    1410       632133 :       || !known_misalignment (misalignment, target_alignment_c, &misalign))
    1411              :     return DR_MISALIGNMENT_UNKNOWN;
    1412       632133 :   return misalign;
    1413              : }
    1414              : 
    1415              : /* Record the base alignment guarantee given by DRB, which occurs
    1416              :    in STMT_INFO.  */
    1417              : 
    1418              : static void
    1419      4612346 : vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info,
    1420              :                             innermost_loop_behavior *drb)
    1421              : {
    1422      4612346 :   bool existed;
    1423      4612346 :   std::pair<stmt_vec_info, innermost_loop_behavior *> &entry
    1424      4612346 :     = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
    1425      4612346 :   if (!existed || entry.second->base_alignment < drb->base_alignment)
    1426              :     {
    1427      1429394 :       entry = std::make_pair (stmt_info, drb);
    1428      1429394 :       if (dump_enabled_p ())
    1429        32783 :         dump_printf_loc (MSG_NOTE, vect_location,
    1430              :                          "recording new base alignment for %T\n"
    1431              :                          "  alignment:    %d\n"
    1432              :                          "  misalignment: %d\n"
    1433              :                          "  based on:     %G",
    1434              :                          drb->base_address,
    1435              :                          drb->base_alignment,
    1436              :                          drb->base_misalignment,
    1437              :                          stmt_info->stmt);
    1438              :     }
    1439      4612346 : }
    1440              : 
    1441              : /* If the region we're going to vectorize is reached, all unconditional
    1442              :    data references occur at least once.  We can therefore pool the base
    1443              :    alignment guarantees from each unconditional reference.  Do this by
    1444              :    going through all the data references in VINFO and checking whether
    1445              :    the containing statement makes the reference unconditionally.  If so,
    1446              :    record the alignment of the base address in VINFO so that it can be
    1447              :    used for all other references with the same base.  */
    1448              : 
    1449              : void
    1450      1025559 : vect_record_base_alignments (vec_info *vinfo)
    1451              : {
    1452      1025559 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    1453       412326 :   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
    1454     15017152 :   for (data_reference *dr : vinfo->shared->datarefs)
    1455              :     {
    1456     12038899 :       dr_vec_info *dr_info = vinfo->lookup_dr (dr);
    1457     12038899 :       stmt_vec_info stmt_info = dr_info->stmt;
    1458     12038899 :       if (!DR_IS_CONDITIONAL_IN_STMT (dr)
    1459     12028853 :           && STMT_VINFO_VECTORIZABLE (stmt_info)
    1460      4629315 :           && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    1461              :         {
    1462      4610809 :           vect_record_base_alignment (vinfo, stmt_info, &DR_INNERMOST (dr));
    1463              : 
    1464              :           /* If DR is nested in the loop that is being vectorized, we can also
    1465              :              record the alignment of the base wrt the outer loop.  */
    1466     12968559 :           if (loop && nested_in_vect_loop_p (loop, stmt_info))
    1467         1537 :             vect_record_base_alignment
    1468         1537 :               (vinfo, stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
    1469              :         }
    1470              :     }
    1471      1025559 : }
    1472              : 
    1473              : /* Function vect_compute_data_ref_alignment
    1474              : 
    1475              :    Compute the misalignment of the data reference DR_INFO when vectorizing
    1476              :    with VECTYPE.
    1477              : 
    1478              :    Output:
    1479              :    1. initialized misalignment info for DR_INFO
    1480              : 
    1481              :    FOR NOW: No analysis is actually performed. Misalignment is calculated
    1482              :    only for trivial cases. TODO.  */
    1483              : 
    1484              : static void
    1485      1604785 : vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
    1486              :                                  tree vectype)
    1487              : {
    1488      1604785 :   stmt_vec_info stmt_info = dr_info->stmt;
    1489      1604785 :   vec_base_alignments *base_alignments = &vinfo->base_alignments;
    1490      1604785 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    1491      1604785 :   class loop *loop = NULL;
    1492      1604785 :   tree ref = DR_REF (dr_info->dr);
    1493              : 
    1494      1604785 :   if (dump_enabled_p ())
    1495        51682 :     dump_printf_loc (MSG_NOTE, vect_location,
    1496              :                      "vect_compute_data_ref_alignment:\n");
    1497              : 
    1498      1604785 :   if (loop_vinfo)
    1499       827006 :     loop = LOOP_VINFO_LOOP (loop_vinfo);
    1500              : 
    1501              :   /* Initialize misalignment to unknown.  */
    1502      1604785 :   SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
    1503              : 
    1504      1604785 :   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    1505              :     return;
    1506              : 
    1507      1584559 :   innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
    1508      1584559 :   bool step_preserves_misalignment_p;
    1509              : 
    1510      1584559 :   poly_uint64 vector_alignment
    1511      1584559 :     = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
    1512              :                  BITS_PER_UNIT);
    1513              : 
    1514      1584559 :   if (loop_vinfo
    1515      1584559 :       && dr_safe_speculative_read_required (stmt_info))
    1516              :     {
    1517              :       /* The required target alignment must be a power-of-2 value and is
    1518              :          computed as the product of vector element size, VF and group size.
    1519              :          We compute the constant part first as VF may be a variable.  For
    1520              :          variable VF, the power-of-2 check of VF is deferred to runtime.  */
    1521       307444 :       auto align_factor_c
    1522       307444 :         = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    1523       307444 :       if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    1524        89978 :         align_factor_c *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
    1525              : 
    1526       307444 :       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    1527       307444 :       poly_uint64 new_alignment = vf * align_factor_c;
    1528              : 
    1529       614888 :       if ((vf.is_constant () && pow2p_hwi (new_alignment.to_constant ()))
    1530              :           || (!vf.is_constant () && pow2p_hwi (align_factor_c)))
    1531              :         {
    1532       244215 :           if (dump_enabled_p ())
    1533              :             {
    1534         2960 :               dump_printf_loc (MSG_NOTE, vect_location,
    1535              :                                "alignment increased due to early break to ");
    1536         2960 :               dump_dec (MSG_NOTE, new_alignment);
    1537         2960 :               dump_printf (MSG_NOTE, " bytes.\n");
    1538              :             }
    1539       244215 :           vector_alignment = new_alignment;
    1540              :         }
    1541              :     }
    1542              : 
    1543      1584559 :   SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
    1544              : 
    1545              :   /* If the main loop has peeled for alignment we have no way of knowing
    1546              :      whether the data accesses in the epilogues are aligned.  We can't at
    1547              :      compile time answer the question whether we have entered the main loop or
    1548              :      not.  Fixes PR 92351.  */
    1549      1584559 :   if (loop_vinfo)
    1550              :     {
    1551       806780 :       loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
    1552       806780 :       if (orig_loop_vinfo
    1553        32513 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
    1554              :         return;
    1555              :     }
    1556              : 
    1557      1584342 :   unsigned HOST_WIDE_INT vect_align_c;
    1558      1584342 :   if (!vector_alignment.is_constant (&vect_align_c))
    1559              :     return;
    1560              : 
    1561              :   /* No step for BB vectorization.  */
    1562      1584342 :   if (!loop)
    1563              :     {
    1564       777779 :       gcc_assert (integer_zerop (drb->step));
    1565              :       step_preserves_misalignment_p = true;
    1566              :     }
    1567              : 
    1568              :   else
    1569              :     {
    1570              :       /* We can only use base and misalignment information relative to
    1571              :          an innermost loop if the misalignment stays the same throughout the
    1572              :          execution of the loop.  As above, this is the case if the stride of
    1573              :          the dataref evenly divides by the alignment.  Make sure to check
    1574              :          previous epilogues and the main loop.  */
    1575              :       step_preserves_misalignment_p = true;
    1576              :       auto lvinfo = loop_vinfo;
    1577      1646148 :       while (lvinfo)
    1578              :         {
    1579       839585 :           poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (lvinfo);
    1580       839585 :           step_preserves_misalignment_p
    1581       839585 :             &= multiple_p (drb->step_alignment * vf, vect_align_c);
    1582       839585 :           lvinfo = LOOP_VINFO_ORIG_LOOP_INFO (lvinfo);
    1583              :         }
    1584              : 
    1585       806563 :       if (!step_preserves_misalignment_p && dump_enabled_p ())
    1586          322 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1587              :                          "step doesn't divide the vector alignment.\n");
    1588              : 
    1589              :       /* In case the dataref is in an inner-loop of the loop that is being
    1590              :          vectorized (LOOP), we use the base and misalignment information
    1591              :          relative to the outer-loop (LOOP).  This is ok only if the
    1592              :          misalignment stays the same throughout the execution of the
    1593              :          inner-loop, which is why we have to check that the stride of the
    1594              :          dataref in the inner-loop evenly divides by the vector alignment.  */
    1595       806563 :       if (step_preserves_misalignment_p
    1596       806563 :           && nested_in_vect_loop_p (loop, stmt_info))
    1597              :         {
    1598         1536 :           step_preserves_misalignment_p
    1599         1536 :             = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
    1600              : 
    1601         1536 :           if (dump_enabled_p ())
    1602              :             {
    1603          496 :               if (step_preserves_misalignment_p)
    1604          358 :                 dump_printf_loc (MSG_NOTE, vect_location,
    1605              :                                  "inner step divides the vector alignment.\n");
    1606              :               else
    1607          138 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1608              :                                  "inner step doesn't divide the vector"
    1609              :                                  " alignment.\n");
    1610              :             }
    1611              :         }
    1612              :     }
    1613              : 
    1614      1584342 :   unsigned int base_alignment = drb->base_alignment;
    1615      1584342 :   unsigned int base_misalignment = drb->base_misalignment;
    1616              : 
    1617              :   /* Calculate the maximum of the pooled base address alignment and the
    1618              :      alignment that we can compute for DR itself.  */
    1619      1584342 :   std::pair<stmt_vec_info, innermost_loop_behavior *> *entry
    1620      1584342 :     = base_alignments->get (drb->base_address);
    1621      1584342 :   if (entry
    1622      1579530 :       && base_alignment < (*entry).second->base_alignment
    1623      1587649 :       && (loop_vinfo
    1624         2397 :           || (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt_info->stmt),
    1625         2397 :                               gimple_bb (entry->first->stmt))
    1626         2289 :               && (gimple_bb (stmt_info->stmt) != gimple_bb (entry->first->stmt)
    1627         2053 :                   || (entry->first->dr_aux.group <= dr_info->group)))))
    1628              :     {
    1629         3182 :       base_alignment = entry->second->base_alignment;
    1630         3182 :       base_misalignment = entry->second->base_misalignment;
    1631              :     }
    1632              : 
    1633      1584342 :   if (drb->offset_alignment < vect_align_c
    1634      1516504 :       || !step_preserves_misalignment_p
    1635              :       /* We need to know whether the step wrt the vectorized loop is
    1636              :          negative when computing the starting misalignment below.  */
    1637      1508123 :       || TREE_CODE (drb->step) != INTEGER_CST)
    1638              :     {
    1639       104067 :       if (dump_enabled_p ())
    1640         3715 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1641              :                          "Unknown alignment for access: %T\n", ref);
    1642       104067 :       return;
    1643              :     }
    1644              : 
    1645      1480275 :   if (base_alignment < vect_align_c)
    1646              :     {
    1647       732320 :       unsigned int max_alignment;
    1648       732320 :       tree base = get_base_for_alignment (drb->base_address, &max_alignment);
    1649       732320 :       if (max_alignment < vect_align_c
    1650       729946 :           || (loop_vinfo && LOOP_VINFO_EPILOGUE_P (loop_vinfo))
    1651      1441626 :           || !vect_can_force_dr_alignment_p (base,
    1652       709306 :                                              vect_align_c * BITS_PER_UNIT))
    1653              :         {
    1654       535274 :           if (dump_enabled_p ())
    1655        14334 :             dump_printf_loc (MSG_NOTE, vect_location,
    1656              :                              "can't force alignment of ref: %T\n", ref);
    1657       535274 :           return;
    1658              :         }
    1659              : 
    1660              :       /* Force the alignment of the decl.
    1661              :          NOTE: This is the only change to the code we make during
    1662              :          the analysis phase, before deciding to vectorize the loop.  */
    1663       197046 :       if (dump_enabled_p ())
    1664         7944 :         dump_printf_loc (MSG_NOTE, vect_location,
    1665              :                          "force alignment of %T\n", ref);
    1666              : 
    1667       197046 :       dr_info->base_decl = base;
    1668       197046 :       dr_info->base_misaligned = true;
    1669       197046 :       base_misalignment = 0;
    1670              :     }
    1671       945001 :   poly_int64 misalignment
    1672       945001 :     = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
    1673              : 
    1674       945001 :   unsigned int const_misalignment;
    1675       945001 :   if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
    1676              :     {
    1677              :       if (dump_enabled_p ())
    1678              :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1679              :                          "Non-constant misalignment for access: %T\n", ref);
    1680              :       return;
    1681              :     }
    1682              : 
    1683       945001 :   SET_DR_MISALIGNMENT (dr_info, const_misalignment);
    1684              : 
    1685       945001 :   if (dump_enabled_p ())
    1686        32308 :     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1687              :                      "misalign = %d bytes of ref %T\n",
    1688              :                      const_misalignment, ref);
    1689              : 
    1690              :   return;
    1691              : }
    1692              : 
    1693              : /* Return whether DR_INFO, which is related to DR_PEEL_INFO in
    1694              :    that it only differs in DR_INIT, is aligned if DR_PEEL_INFO
    1695              :    is made aligned via peeling.  */
    1696              : 
    1697              : static bool
    1698      1986758 : vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info,
    1699              :                                          dr_vec_info *dr_peel_info)
    1700              : {
    1701      1986758 :   if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info),
    1702      1987526 :                   DR_TARGET_ALIGNMENT (dr_info)))
    1703              :     {
    1704      1985990 :       poly_offset_int diff
    1705      1985990 :         = (wi::to_poly_offset (DR_INIT (dr_peel_info->dr))
    1706      1985990 :            - wi::to_poly_offset (DR_INIT (dr_info->dr)));
    1707      1985990 :       if (known_eq (diff, 0)
    1708      1985990 :           || multiple_p (diff, DR_TARGET_ALIGNMENT (dr_info)))
    1709       747227 :         return true;
    1710              :     }
    1711              :   return false;
    1712              : }
    1713              : 
    1714              : /* Return whether DR_INFO is aligned if DR_PEEL_INFO is made
    1715              :    aligned via peeling.  */
    1716              : 
    1717              : static bool
    1718       198518 : vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
    1719              :                                  dr_vec_info *dr_peel_info)
    1720              : {
    1721       198518 :   if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr),
    1722       198518 :                         DR_BASE_ADDRESS (dr_peel_info->dr), 0)
    1723        49000 :       || !operand_equal_p (DR_OFFSET (dr_info->dr),
    1724        49000 :                            DR_OFFSET (dr_peel_info->dr), 0)
    1725       246610 :       || !operand_equal_p (DR_STEP (dr_info->dr),
    1726        48092 :                            DR_STEP (dr_peel_info->dr), 0))
    1727       150824 :     return false;
    1728              : 
    1729        47694 :   return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
    1730              : }
    1731              : 
    1732              : /* Compute the value for dr_info->misalign so that the access appears
    1733              :    aligned.  This is used by peeling to compensate for dr_misalignment
    1734              :    applying the offset for negative step.  */
    1735              : 
    1736              : int
    1737        21899 : vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
    1738              : {
    1739        21899 :   if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
    1740              :     return 0;
    1741              : 
    1742          201 :   tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
    1743          201 :   poly_int64 misalignment
    1744          201 :     = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
    1745          201 :        * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
    1746              : 
    1747          201 :   unsigned HOST_WIDE_INT target_alignment_c;
    1748          201 :   int misalign;
    1749          201 :   if (!dr_info->target_alignment.is_constant (&target_alignment_c)
    1750          201 :       || !known_misalignment (misalignment, target_alignment_c, &misalign))
    1751              :     return DR_MISALIGNMENT_UNKNOWN;
    1752          201 :   return misalign;
    1753              : }
    1754              : 
    1755              : /* Function vect_update_misalignment_for_peel.
    1756              :    Sets DR_INFO's misalignment
    1757              :    - to 0 if it has the same alignment as DR_PEEL_INFO,
    1758              :    - to the misalignment computed using NPEEL if DR_INFO's salignment is known,
    1759              :    - to -1 (unknown) otherwise.
    1760              : 
    1761              :    DR_INFO - the data reference whose misalignment is to be adjusted.
    1762              :    DR_PEEL_INFO - the data reference whose misalignment is being made
    1763              :                   zero in the vector loop by the peel.
    1764              :    NPEEL - the number of iterations in the peel loop if the misalignment
    1765              :            of DR_PEEL_INFO is known at compile time.  */
    1766              : 
    1767              : static void
    1768         2775 : vect_update_misalignment_for_peel (dr_vec_info *dr_info,
    1769              :                                    dr_vec_info *dr_peel_info, int npeel)
    1770              : {
    1771              :   /* If dr_info is aligned of dr_peel_info is, then mark it so.  */
    1772         2775 :   if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
    1773              :     {
    1774          444 :       SET_DR_MISALIGNMENT (dr_info,
    1775              :                            vect_dr_misalign_for_aligned_access (dr_peel_info));
    1776          444 :       return;
    1777              :     }
    1778              : 
    1779         2331 :   unsigned HOST_WIDE_INT alignment;
    1780         2331 :   if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
    1781         2331 :       && known_alignment_for_access_p (dr_info,
    1782         2331 :                                        STMT_VINFO_VECTYPE (dr_info->stmt))
    1783          218 :       && known_alignment_for_access_p (dr_peel_info,
    1784          218 :                                        STMT_VINFO_VECTYPE (dr_peel_info->stmt)))
    1785              :     {
    1786          202 :       int misal = dr_info->misalignment;
    1787          202 :       misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
    1788          202 :       misal &= alignment - 1;
    1789          202 :       set_dr_misalignment (dr_info, misal);
    1790          202 :       return;
    1791              :     }
    1792              : 
    1793         2129 :   if (dump_enabled_p ())
    1794           40 :     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
    1795              :                      "to unknown (-1).\n");
    1796         2129 :   SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
    1797              : }
    1798              : 
    1799              : /* Return true if alignment is relevant for DR_INFO.  */
    1800              : 
    1801              : static bool
    1802      1802746 : vect_relevant_for_alignment_p (dr_vec_info *dr_info)
    1803              : {
    1804      1802746 :   stmt_vec_info stmt_info = dr_info->stmt;
    1805              : 
    1806      1802746 :   if (!STMT_VINFO_RELEVANT_P (stmt_info))
    1807              :     return false;
    1808              : 
    1809              :   /* For interleaving, only the alignment of the first access matters.  */
    1810      1801824 :   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
    1811      2044802 :       && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
    1812              :     return false;
    1813              : 
    1814              :   /* Scatter-gather and invariant accesses continue to address individual
    1815              :      scalars, so vector-level alignment is irrelevant.  */
    1816      1694647 :   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
    1817      1694647 :       || integer_zerop (DR_STEP (dr_info->dr)))
    1818        54727 :     return false;
    1819              : 
    1820              :   /* Strided accesses perform only component accesses, alignment is
    1821              :      irrelevant for them.  */
    1822      1639920 :   if (STMT_VINFO_STRIDED_P (stmt_info)
    1823      1639920 :       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
    1824              :     return false;
    1825              : 
    1826              :   return true;
    1827              : }
    1828              : 
    1829              : /* Given an memory reference EXP return whether its alignment is less
    1830              :    than its size.  */
    1831              : 
    1832              : static bool
    1833      1596858 : not_size_aligned (tree exp)
    1834              : {
    1835      1596858 :   if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
    1836              :     return true;
    1837              : 
    1838      1596858 :   return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
    1839      1596858 :           > get_object_alignment (exp));
    1840              : }
    1841              : 
    1842              : /* Function vector_alignment_reachable_p
    1843              : 
    1844              :    Return true if vector alignment for DR_INFO is reachable by peeling
    1845              :    a few loop iterations.  Return false otherwise.  */
    1846              : 
    1847              : static bool
    1848       613133 : vector_alignment_reachable_p (dr_vec_info *dr_info, poly_uint64 vf)
    1849              : {
    1850       613133 :   stmt_vec_info stmt_info = dr_info->stmt;
    1851       613133 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    1852       613133 :   poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
    1853      1226266 :   poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
    1854       613133 :   unsigned elem_size = vector_element_size (vector_size, nelements);
    1855       613133 :   unsigned group_size = 1;
    1856              : 
    1857       613133 :   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    1858              :     {
    1859              :       /* For interleaved access we peel only if number of iterations in
    1860              :          the prolog loop ({VF - misalignment}), is a multiple of the
    1861              :          number of the interleaved accesses.  */
    1862              : 
    1863              :       /* FORNOW: handle only known alignment.  */
    1864        87492 :       if (!known_alignment_for_access_p (dr_info, vectype))
    1865       613133 :         return false;
    1866              : 
    1867        52060 :       unsigned mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
    1868        64532 :       if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
    1869              :         return false;
    1870              : 
    1871        12472 :       group_size = DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
    1872              :     }
    1873              : 
    1874              :   /* If the vectorization factor does not guarantee DR advancement of
    1875              :      a multiple of the target alignment no peeling will help.  */
    1876       538113 :   if (!multiple_p (elem_size * group_size * vf, dr_target_alignment (dr_info)))
    1877          154 :     return false;
    1878              : 
    1879              :   /* If misalignment is known at the compile time then allow peeling
    1880              :      only if natural alignment is reachable through peeling.  */
    1881       537959 :   if (known_alignment_for_access_p (dr_info, vectype)
    1882       838267 :       && !aligned_access_p (dr_info, vectype))
    1883              :     {
    1884        14238 :       HOST_WIDE_INT elmsize =
    1885        14238 :                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    1886        14238 :       if (dump_enabled_p ())
    1887              :         {
    1888          768 :           dump_printf_loc (MSG_NOTE, vect_location,
    1889              :                            "data size = %wd. misalignment = %d.\n", elmsize,
    1890              :                            dr_misalignment (dr_info, vectype));
    1891              :         }
    1892        14238 :       if (dr_misalignment (dr_info, vectype) % elmsize)
    1893              :         {
    1894           72 :           if (dump_enabled_p ())
    1895            7 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1896              :                              "data size does not divide the misalignment.\n");
    1897           72 :           return false;
    1898              :         }
    1899              :     }
    1900              : 
    1901       537887 :   if (!known_alignment_for_access_p (dr_info, vectype))
    1902              :     {
    1903       237651 :       tree type = TREE_TYPE (DR_REF (dr_info->dr));
    1904       237651 :       bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
    1905       237651 :       if (dump_enabled_p ())
    1906        16013 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1907              :                          "Unknown misalignment, %snaturally aligned\n",
    1908              :                          is_packed ? "not " : "");
    1909       237651 :       return targetm.vectorize.vector_alignment_reachable (type, is_packed);
    1910              :     }
    1911              : 
    1912              :   return true;
    1913              : }
    1914              : 
    1915              : 
    1916              : /* Calculate the cost of the memory access represented by DR_INFO.  */
    1917              : 
    1918              : static void
    1919       732917 : vect_get_data_access_cost (vec_info *vinfo, dr_vec_info *dr_info,
    1920              :                            dr_alignment_support alignment_support_scheme,
    1921              :                            int misalignment,
    1922              :                            unsigned int *inside_cost,
    1923              :                            unsigned int *outside_cost,
    1924              :                            stmt_vector_for_cost *body_cost_vec,
    1925              :                            stmt_vector_for_cost *prologue_cost_vec)
    1926              : {
    1927       732917 :   stmt_vec_info stmt_info = dr_info->stmt;
    1928              : 
    1929       732917 :   if (DR_IS_READ (dr_info->dr))
    1930       512272 :     vect_get_load_cost (vinfo, stmt_info, NULL, 1,
    1931              :                         alignment_support_scheme, misalignment, true,
    1932              :                         inside_cost, outside_cost, prologue_cost_vec,
    1933              :                         body_cost_vec, false);
    1934              :   else
    1935       220645 :     vect_get_store_cost (vinfo,stmt_info, NULL, 1,
    1936              :                          alignment_support_scheme, misalignment, inside_cost,
    1937              :                          body_cost_vec);
    1938              : 
    1939       732917 :   if (dump_enabled_p ())
    1940        29885 :     dump_printf_loc (MSG_NOTE, vect_location,
    1941              :                      "vect_get_data_access_cost: inside_cost = %d, "
    1942              :                      "outside_cost = %d.\n", *inside_cost, *outside_cost);
    1943       732917 : }
    1944              : 
    1945              : 
    1946              : typedef struct _vect_peel_info
    1947              : {
    1948              :   dr_vec_info *dr_info;
    1949              :   int npeel;
    1950              :   unsigned int count;
    1951              : } *vect_peel_info;
    1952              : 
    1953              : typedef struct _vect_peel_extended_info
    1954              : {
    1955              :   vec_info *vinfo;
    1956              :   struct _vect_peel_info peel_info;
    1957              :   unsigned int inside_cost;
    1958              :   unsigned int outside_cost;
    1959              : } *vect_peel_extended_info;
    1960              : 
    1961              : 
    1962              : /* Peeling hashtable helpers.  */
    1963              : 
    1964              : struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
    1965              : {
    1966              :   static inline hashval_t hash (const _vect_peel_info *);
    1967              :   static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
    1968              : };
    1969              : 
    1970              : inline hashval_t
    1971       747068 : peel_info_hasher::hash (const _vect_peel_info *peel_info)
    1972              : {
    1973       747068 :   return (hashval_t) peel_info->npeel;
    1974              : }
    1975              : 
    1976              : inline bool
    1977       388623 : peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
    1978              : {
    1979       388623 :   return (a->npeel == b->npeel);
    1980              : }
    1981              : 
    1982              : 
    1983              : /* Insert DR_INFO into peeling hash table with NPEEL as key.  */
    1984              : 
    1985              : static void
    1986       359109 : vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
    1987              :                           loop_vec_info loop_vinfo, dr_vec_info *dr_info,
    1988              :                           int npeel, bool supportable_if_not_aligned)
    1989              : {
    1990       359109 :   struct _vect_peel_info elem, *slot;
    1991       359109 :   _vect_peel_info **new_slot;
    1992              : 
    1993       359109 :   elem.npeel = npeel;
    1994       359109 :   slot = peeling_htab->find (&elem);
    1995       359109 :   if (slot)
    1996       157766 :     slot->count++;
    1997              :   else
    1998              :     {
    1999       201343 :       slot = XNEW (struct _vect_peel_info);
    2000       201343 :       slot->npeel = npeel;
    2001       201343 :       slot->dr_info = dr_info;
    2002       201343 :       slot->count = 1;
    2003       201343 :       new_slot = peeling_htab->find_slot (slot, INSERT);
    2004       201343 :       *new_slot = slot;
    2005              :     }
    2006              : 
    2007              :   /* If this DR is not supported with unknown misalignment then bias
    2008              :      this slot when the cost model is disabled.  */
    2009       359109 :   if (!supportable_if_not_aligned
    2010       359109 :       && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
    2011         4656 :     slot->count += VECT_MAX_COST;
    2012       359109 : }
    2013              : 
    2014              : 
    2015              : /* Traverse peeling hash table to find peeling option that aligns maximum
    2016              :    number of data accesses.  */
    2017              : 
    2018              : int
    2019        35801 : vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
    2020              :                                      _vect_peel_extended_info *max)
    2021              : {
    2022        35801 :   vect_peel_info elem = *slot;
    2023              : 
    2024        35801 :   if (elem->count > max->peel_info.count
    2025        21747 :       || (elem->count == max->peel_info.count
    2026        17037 :           && max->peel_info.npeel > elem->npeel))
    2027              :     {
    2028        14070 :       max->peel_info.npeel = elem->npeel;
    2029        14070 :       max->peel_info.count = elem->count;
    2030        14070 :       max->peel_info.dr_info = elem->dr_info;
    2031              :     }
    2032              : 
    2033        35801 :   return 1;
    2034              : }
    2035              : 
    2036              : /* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking
    2037              :    data access costs for all data refs.  If UNKNOWN_MISALIGNMENT is true,
    2038              :    npeel is computed at runtime but DR0_INFO's misalignment will be zero
    2039              :    after peeling.  */
    2040              : 
    2041              : static void
    2042       401748 : vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
    2043              :                                 dr_vec_info *dr0_info,
    2044              :                                 unsigned int *inside_cost,
    2045              :                                 unsigned int *outside_cost,
    2046              :                                 stmt_vector_for_cost *body_cost_vec,
    2047              :                                 stmt_vector_for_cost *prologue_cost_vec,
    2048              :                                 unsigned int npeel)
    2049              : {
    2050       401748 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    2051              : 
    2052       401748 :   bool dr0_alignment_known_p
    2053              :     = (dr0_info
    2054       735971 :        && known_alignment_for_access_p (dr0_info,
    2055       334223 :                                         STMT_VINFO_VECTYPE (dr0_info->stmt)));
    2056              : 
    2057      1975450 :   for (data_reference *dr : datarefs)
    2058              :     {
    2059       770206 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2060       770206 :       if (!vect_relevant_for_alignment_p (dr_info))
    2061        37289 :         continue;
    2062              : 
    2063       732917 :       tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
    2064       732917 :       dr_alignment_support alignment_support_scheme;
    2065       732917 :       int misalignment;
    2066       732917 :       unsigned HOST_WIDE_INT alignment;
    2067              : 
    2068       732917 :       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    2069       732917 :                                             size_zero_node) < 0;
    2070       732917 :       poly_int64 off = 0;
    2071       732917 :       if (negative)
    2072        24157 :         off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
    2073        24157 :                * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
    2074              : 
    2075       732917 :       if (npeel == 0)
    2076       372380 :         misalignment = dr_misalignment (dr_info, vectype, off);
    2077       360537 :       else if (dr_info == dr0_info
    2078       360537 :                || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
    2079              :         misalignment = 0;
    2080       125507 :       else if (!dr0_alignment_known_p
    2081         8115 :                || !known_alignment_for_access_p (dr_info, vectype)
    2082       133622 :                || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
    2083              :         misalignment = DR_MISALIGNMENT_UNKNOWN;
    2084              :       else
    2085              :         {
    2086         7106 :           misalignment = dr_misalignment (dr_info, vectype, off);
    2087         7106 :           misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
    2088         7106 :           misalignment &= alignment - 1;
    2089              :         }
    2090       732917 :       alignment_support_scheme
    2091       732917 :         = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
    2092              :                                          misalignment);
    2093              : 
    2094       732917 :       vect_get_data_access_cost (loop_vinfo, dr_info,
    2095              :                                  alignment_support_scheme, misalignment,
    2096              :                                  inside_cost, outside_cost,
    2097              :                                  body_cost_vec, prologue_cost_vec);
    2098              :     }
    2099       401748 : }
    2100              : 
    2101              : /* Traverse peeling hash table and calculate cost for each peeling option.
    2102              :    Find the one with the lowest cost.  */
    2103              : 
    2104              : int
    2105       145907 : vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
    2106              :                                    _vect_peel_extended_info *min)
    2107              : {
    2108       145907 :   vect_peel_info elem = *slot;
    2109       145907 :   unsigned int inside_cost = 0, outside_cost = 0;
    2110       145907 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (min->vinfo);
    2111       145907 :   stmt_vector_for_cost prologue_cost_vec, body_cost_vec;
    2112              : 
    2113       145907 :   prologue_cost_vec.create (2);
    2114       145907 :   body_cost_vec.create (2);
    2115              : 
    2116       145907 :   vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost,
    2117              :                                   &outside_cost, &body_cost_vec,
    2118       145907 :                                   &prologue_cost_vec, elem->npeel);
    2119              : 
    2120       145907 :   body_cost_vec.release ();
    2121       145907 :   prologue_cost_vec.release ();
    2122              : 
    2123       145907 :   outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel);
    2124              : 
    2125       145907 :   if (inside_cost < min->inside_cost
    2126         1679 :       || (inside_cost == min->inside_cost
    2127         1255 :           && outside_cost < min->outside_cost))
    2128              :     {
    2129       144234 :       min->inside_cost = inside_cost;
    2130       144234 :       min->outside_cost = outside_cost;
    2131       144234 :       min->peel_info.dr_info = elem->dr_info;
    2132       144234 :       min->peel_info.npeel = elem->npeel;
    2133       144234 :       min->peel_info.count = elem->count;
    2134              :     }
    2135              : 
    2136       145907 :   return 1;
    2137              : }
    2138              : 
    2139              : 
    2140              : /* Choose best peeling option by traversing peeling hash table and either
    2141              :    choosing an option with the lowest cost (if cost model is enabled) or the
    2142              :    option that aligns as many accesses as possible.  */
    2143              : 
    2144              : static struct _vect_peel_extended_info
    2145       156925 : vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
    2146              :                                        loop_vec_info loop_vinfo)
    2147              : {
    2148       156925 :    struct _vect_peel_extended_info res;
    2149              : 
    2150       156925 :    res.peel_info.dr_info = NULL;
    2151       156925 :    res.vinfo = loop_vinfo;
    2152              : 
    2153       156925 :    if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
    2154              :      {
    2155       142924 :        res.inside_cost = INT_MAX;
    2156       142924 :        res.outside_cost = INT_MAX;
    2157       142924 :        peeling_htab->traverse <_vect_peel_extended_info *,
    2158       288831 :                                vect_peeling_hash_get_lowest_cost> (&res);
    2159              :      }
    2160              :    else
    2161              :      {
    2162        14001 :        res.peel_info.count = 0;
    2163        14001 :        peeling_htab->traverse <_vect_peel_extended_info *,
    2164        49802 :                                vect_peeling_hash_get_most_frequent> (&res);
    2165        14001 :        res.inside_cost = 0;
    2166        14001 :        res.outside_cost = 0;
    2167              :      }
    2168              : 
    2169       156925 :    return res;
    2170              : }
    2171              : 
    2172              : /* Return if vectorization is definitely, possibly, or unlikely to be
    2173              :    supportable after loop peeling.  */
    2174              : 
    2175              : static enum peeling_support
    2176        78400 : vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
    2177              :                           unsigned npeel)
    2178              : {
    2179        78400 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    2180        78400 :   enum dr_alignment_support supportable_dr_alignment;
    2181              : 
    2182        78400 :   bool dr0_alignment_known_p
    2183       156800 :     = known_alignment_for_access_p (dr0_info,
    2184        78400 :                                     STMT_VINFO_VECTYPE (dr0_info->stmt));
    2185        78400 :   bool has_unsupported_dr_p = false;
    2186        78400 :   unsigned int dr0_step = tree_to_shwi (DR_STEP (dr0_info->dr));
    2187        78400 :   int known_unsupported_misalignment = DR_MISALIGNMENT_UNKNOWN;
    2188              : 
    2189              :   /* Check if each data ref can be vectorized after peeling.  */
    2190       335240 :   for (data_reference *dr : datarefs)
    2191              :     {
    2192       115984 :       if (dr == dr0_info->dr)
    2193        77444 :         continue;
    2194              : 
    2195        38540 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2196        38540 :       if (!vect_relevant_for_alignment_p (dr_info)
    2197        38540 :           || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
    2198         6711 :         continue;
    2199              : 
    2200        31829 :       tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
    2201        31829 :       int misalignment;
    2202        31829 :       unsigned HOST_WIDE_INT alignment;
    2203        31829 :       if (!dr0_alignment_known_p
    2204         1854 :           || !known_alignment_for_access_p (dr_info, vectype)
    2205        33683 :           || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
    2206              :         misalignment = DR_MISALIGNMENT_UNKNOWN;
    2207              :       else
    2208              :         {
    2209         1840 :           misalignment = dr_misalignment (dr_info, vectype);
    2210         1840 :           misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
    2211         1840 :           misalignment &= alignment - 1;
    2212              :         }
    2213        31829 :       supportable_dr_alignment
    2214        31829 :         = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
    2215              :                                          misalignment);
    2216        31829 :       if (supportable_dr_alignment == dr_unaligned_unsupported)
    2217              :         {
    2218        30404 :           has_unsupported_dr_p = true;
    2219              : 
    2220              :           /* If unaligned unsupported DRs exist, we do following checks to see
    2221              :              if they can be mutually aligned to support vectorization.  If yes,
    2222              :              we can try peeling and create a runtime (mutual alignment) check
    2223              :              to guard the peeled loop.  If no, return PEELING_UNSUPPORTED.  */
    2224              : 
    2225              :           /* 1) If unaligned unsupported DRs have different alignment steps, the
    2226              :                 probability of DRs being mutually aligned is very low, and it's
    2227              :                 quite complex to check mutual alignment at runtime.  We return
    2228              :                 PEELING_UNSUPPORTED in this case.  */
    2229        30404 :           if (tree_to_shwi (DR_STEP (dr)) != dr0_step)
    2230        78400 :             return peeling_unsupported;
    2231              : 
    2232              :           /* 2) Based on above same alignment step condition, if one known
    2233              :                 misaligned DR has zero misalignment, or different misalignment
    2234              :                 amount from another known misaligned DR, peeling is unable to
    2235              :                 help make all these DRs aligned together.  We won't try peeling
    2236              :                 with versioning anymore.  */
    2237        26204 :           int curr_dr_misalignment = dr_misalignment (dr_info, vectype);
    2238        26204 :           if (curr_dr_misalignment == 0)
    2239              :             return peeling_unsupported;
    2240        14460 :           if (known_unsupported_misalignment != DR_MISALIGNMENT_UNKNOWN)
    2241              :             {
    2242            8 :               if (curr_dr_misalignment != DR_MISALIGNMENT_UNKNOWN
    2243            8 :                   && curr_dr_misalignment != known_unsupported_misalignment)
    2244              :                 return peeling_unsupported;
    2245              :             }
    2246              :           else
    2247              :             known_unsupported_misalignment = curr_dr_misalignment;
    2248              :         }
    2249              :     }
    2250              : 
    2251              :   /* Vectorization is known to be supportable with peeling alone when there is
    2252              :      no unsupported DR.  */
    2253        62456 :   return has_unsupported_dr_p ? peeling_maybe_supported
    2254              :                               : peeling_known_supported;
    2255              : }
    2256              : 
    2257              : /* Compare two data-references DRA and DRB to group them into chunks
    2258              :    with related alignment.  */
    2259              : 
    2260              : static int
    2261      4597918 : dr_align_group_sort_cmp (const void *dra_, const void *drb_)
    2262              : {
    2263      4597918 :   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
    2264      4597918 :   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
    2265      4597918 :   int cmp;
    2266              : 
    2267              :   /* Stabilize sort.  */
    2268      4597918 :   if (dra == drb)
    2269              :     return 0;
    2270              : 
    2271              :   /* Ordering of DRs according to base.  */
    2272      4597918 :   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
    2273              :                                DR_BASE_ADDRESS (drb));
    2274      4597918 :   if (cmp != 0)
    2275              :     return cmp;
    2276              : 
    2277              :   /* And according to DR_OFFSET.  */
    2278      2029943 :   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
    2279      2029943 :   if (cmp != 0)
    2280              :     return cmp;
    2281              : 
    2282              :   /* And after step.  */
    2283      2015706 :   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
    2284      2015706 :   if (cmp != 0)
    2285              :     return cmp;
    2286              : 
    2287              :   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
    2288      2010471 :   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
    2289      2010471 :   if (cmp == 0)
    2290       237097 :     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
    2291              :   return cmp;
    2292              : }
    2293              : 
    2294              : /* Function vect_enhance_data_refs_alignment
    2295              : 
    2296              :    This pass will use loop versioning and loop peeling in order to enhance
    2297              :    the alignment of data references in the loop.
    2298              : 
    2299              :    FOR NOW: we assume that whatever versioning/peeling takes place, only the
    2300              :    original loop is to be vectorized.  Any other loops that are created by
    2301              :    the transformations performed in this pass - are not supposed to be
    2302              :    vectorized.  This restriction will be relaxed.
    2303              : 
    2304              :    This pass will require a cost model to guide it whether to apply peeling
    2305              :    or versioning or a combination of the two.  For example, the scheme that
    2306              :    intel uses when given a loop with several memory accesses, is as follows:
    2307              :    choose one memory access ('p') which alignment you want to force by doing
    2308              :    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
    2309              :    other accesses are not necessarily aligned, or (2) use loop versioning to
    2310              :    generate one loop in which all accesses are aligned, and another loop in
    2311              :    which only 'p' is necessarily aligned.
    2312              : 
    2313              :    ("Automatic Intra-Register Vectorization for the Intel Architecture",
    2314              :    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
    2315              :    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
    2316              : 
    2317              :    Devising a cost model is the most critical aspect of this work.  It will
    2318              :    guide us on which access to peel for, whether to use loop versioning, how
    2319              :    many versions to create, etc.  The cost model will probably consist of
    2320              :    generic considerations as well as target specific considerations (on
    2321              :    powerpc for example, misaligned stores are more painful than misaligned
    2322              :    loads).
    2323              : 
    2324              :    Here are the general steps involved in alignment enhancements:
    2325              : 
    2326              :      -- original loop, before alignment analysis:
    2327              :         for (i=0; i<N; i++){
    2328              :           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
    2329              :           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
    2330              :         }
    2331              : 
    2332              :      -- After vect_compute_data_refs_alignment:
    2333              :         for (i=0; i<N; i++){
    2334              :           x = q[i];                     # DR_MISALIGNMENT(q) = 3
    2335              :           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
    2336              :         }
    2337              : 
    2338              :      -- Possibility 1: we do loop versioning:
    2339              :      if (p is aligned) {
    2340              :         for (i=0; i<N; i++){ # loop 1A
    2341              :           x = q[i];                     # DR_MISALIGNMENT(q) = 3
    2342              :           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
    2343              :         }
    2344              :      }
    2345              :      else {
    2346              :         for (i=0; i<N; i++){ # loop 1B
    2347              :           x = q[i];                     # DR_MISALIGNMENT(q) = 3
    2348              :           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
    2349              :         }
    2350              :      }
    2351              : 
    2352              :      -- Possibility 2: we do loop peeling:
    2353              :      for (i = 0; i < 3; i++){        # (scalar loop, not to be vectorized).
    2354              :         x = q[i];
    2355              :         p[i] = y;
    2356              :      }
    2357              :      for (i = 3; i < N; i++){        # loop 2A
    2358              :         x = q[i];                       # DR_MISALIGNMENT(q) = 0
    2359              :         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
    2360              :      }
    2361              : 
    2362              :      -- Possibility 3: combination of loop peeling and versioning:
    2363              :      if (p & q are mutually aligned) {
    2364              :         for (i=0; i<3; i++){ # (peeled loop iterations).
    2365              :           x = q[i];
    2366              :           p[i] = y;
    2367              :         }
    2368              :         for (i=3; i<N; i++){ # loop 3A
    2369              :           x = q[i];                     # DR_MISALIGNMENT(q) = 0
    2370              :           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
    2371              :         }
    2372              :      }
    2373              :      else {
    2374              :         for (i=0; i<N; i++){ # (scalar loop, not to be vectorized).
    2375              :           x = q[i];                     # DR_MISALIGNMENT(q) = 3
    2376              :           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
    2377              :         }
    2378              :      }
    2379              : 
    2380              :      These loops are later passed to loop_transform to be vectorized.  The
    2381              :      vectorizer will use the alignment information to guide the transformation
    2382              :      (whether to generate regular loads/stores, or with special handling for
    2383              :      misalignment).  */
    2384              : 
    2385              : opt_result
    2386       380940 : vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
    2387              : {
    2388       380940 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2389       380940 :   dr_vec_info *first_store = NULL;
    2390       380940 :   dr_vec_info *dr0_info = NULL;
    2391       380940 :   struct data_reference *dr;
    2392       380940 :   unsigned int i;
    2393       380940 :   bool do_peeling = false;
    2394       380940 :   bool do_versioning = false;
    2395       380940 :   bool try_peeling_with_versioning = false;
    2396       380940 :   unsigned int npeel = 0;
    2397       380940 :   bool one_misalignment_known = false;
    2398       380940 :   bool one_misalignment_unknown = false;
    2399       380940 :   bool one_dr_unsupportable = false;
    2400       380940 :   dr_vec_info *unsupportable_dr_info = NULL;
    2401       380940 :   unsigned int dr0_same_align_drs = 0, first_store_same_align_drs = 0;
    2402       380940 :   hash_table<peel_info_hasher> peeling_htab (1);
    2403              : 
    2404       380940 :   DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
    2405              : 
    2406              :   /* Reset data so we can safely be called multiple times.  */
    2407       380940 :   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
    2408       380940 :   LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
    2409              : 
    2410       380940 :   if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ())
    2411        14156 :     return opt_result::success ();
    2412              : 
    2413              :   /* Sort the vector of datarefs so DRs that have the same or dependent
    2414              :      alignment are next to each other.  */
    2415       366784 :   auto_vec<data_reference_p> datarefs
    2416       366784 :     = LOOP_VINFO_DATAREFS (loop_vinfo).copy ();
    2417       366784 :   datarefs.qsort (dr_align_group_sort_cmp);
    2418              : 
    2419              :   /* Compute the number of DRs that become aligned when we peel
    2420              :      a dataref so it becomes aligned.  */
    2421       733568 :   auto_vec<unsigned> n_same_align_refs (datarefs.length ());
    2422       366784 :   n_same_align_refs.quick_grow_cleared (datarefs.length ());
    2423       366784 :   unsigned i0;
    2424       753243 :   for (i0 = 0; i0 < datarefs.length (); ++i0)
    2425       379821 :     if (DR_BASE_ADDRESS (datarefs[i0]))
    2426              :       break;
    2427      2384220 :   for (i = i0 + 1; i <= datarefs.length (); ++i)
    2428              :     {
    2429       825326 :       if (i == datarefs.length ()
    2430       465180 :           || !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0]),
    2431       465180 :                                DR_BASE_ADDRESS (datarefs[i]), 0)
    2432       218592 :           || !operand_equal_p (DR_OFFSET (datarefs[i0]),
    2433       218592 :                                DR_OFFSET (datarefs[i]), 0)
    2434      1042608 :           || !operand_equal_p (DR_STEP (datarefs[i0]),
    2435       217282 :                                DR_STEP (datarefs[i]), 0))
    2436              :         {
    2437              :           /* The subgroup [i0, i-1] now only differs in DR_INIT and
    2438              :              possibly DR_TARGET_ALIGNMENT.  Still the whole subgroup
    2439              :              will get known misalignment if we align one of the refs
    2440              :              with the largest DR_TARGET_ALIGNMENT.  */
    2441      1433916 :           for (unsigned j = i0; j < i; ++j)
    2442              :             {
    2443       825326 :               dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs[j]);
    2444      3589716 :               for (unsigned k = i0; k < i; ++k)
    2445              :                 {
    2446      2764390 :                   if (k == j)
    2447       825326 :                     continue;
    2448      1939064 :                   dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs[k]);
    2449      1939064 :                   if (vect_dr_aligned_if_related_peeled_dr_is (dr_infok,
    2450              :                                                                dr_infoj))
    2451       708376 :                     n_same_align_refs[j]++;
    2452              :                 }
    2453              :             }
    2454              :           i0 = i;
    2455              :         }
    2456              :     }
    2457              : 
    2458              :   /* While cost model enhancements are expected in the future, the high level
    2459              :      view of the code at this time is as follows:
    2460              : 
    2461              :      A) If there is a misaligned access then see if doing peeling alone can
    2462              :         make all data references satisfy vect_supportable_dr_alignment.  If so,
    2463              :         update data structures and return.
    2464              : 
    2465              :      B) If peeling alone wasn't possible and there is a data reference with an
    2466              :         unknown misalignment that does not satisfy vect_supportable_dr_alignment
    2467              :         then we may use either of the following two approaches.
    2468              : 
    2469              :         B1) Try peeling with versioning: Add a runtime loop versioning check to
    2470              :             see if all unsupportable data references are mutually aligned, which
    2471              :             means they will be uniformly aligned after a certain amount of loop
    2472              :             peeling.  If peeling and versioning can be used together, set
    2473              :             LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT_P to TRUE and return.
    2474              : 
    2475              :         B2) Try versioning alone: Add a runtime loop versioning check to see if
    2476              :             all unsupportable data references are already uniformly aligned
    2477              :             without loop peeling.  If versioning can be applied alone, set
    2478              :             LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT_P to FALSE and return.
    2479              : 
    2480              :         Above B1 is more powerful and more likely to be adopted than B2.  But B2
    2481              :         is still available and useful in some cases, for example, the cost model
    2482              :         does not allow much peeling.
    2483              : 
    2484              :      C) If none of above was successful then the alignment was not enhanced,
    2485              :         just return.  */
    2486              : 
    2487              :   /* (1) Peeling to force alignment.  */
    2488              : 
    2489              :   /* (1.1) Decide whether to perform peeling, how many iterations to peel, and
    2490              :      if vectorization may be supported by peeling with versioning.
    2491              :      Considerations:
    2492              :      - How many accesses will become aligned due to the peeling
    2493              :      - How many accesses will become unaligned due to the peeling,
    2494              :        and the cost of misaligned accesses.
    2495              :      - The cost of peeling (the extra runtime checks, the increase
    2496              :        in code size).  */
    2497              : 
    2498       366784 :   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    2499      1043339 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    2500              :     {
    2501       721888 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2502       721888 :       if (!vect_relevant_for_alignment_p (dr_info))
    2503       108755 :         continue;
    2504              : 
    2505       613133 :       stmt_vec_info stmt_info = dr_info->stmt;
    2506       613133 :       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2507              : 
    2508              :       /* With variable VF, unsafe speculative read can be avoided for known
    2509              :          inbounds DRs as long as partial vectors are used.  */
    2510       613133 :       if (!vf.is_constant ()
    2511              :           && dr_safe_speculative_read_required (stmt_info)
    2512              :           && DR_SCALAR_KNOWN_BOUNDS (dr_info))
    2513              :         {
    2514              :           dr_set_safe_speculative_read_required (stmt_info, false);
    2515              :           LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
    2516              :         }
    2517              : 
    2518       613133 :       do_peeling = vector_alignment_reachable_p (dr_info, vf);
    2519       613133 :       if (do_peeling)
    2520              :         {
    2521       535701 :           if (known_alignment_for_access_p (dr_info, vectype))
    2522              :             {
    2523       300236 :               unsigned int npeel_tmp = 0;
    2524       300236 :               bool negative = tree_int_cst_compare (DR_STEP (dr),
    2525       300236 :                                                     size_zero_node) < 0;
    2526              : 
    2527              :               /* If known_alignment_for_access_p then we have set
    2528              :                  DR_MISALIGNMENT which is only done if we know it at compiler
    2529              :                  time, so it is safe to assume target alignment is constant.
    2530              :                */
    2531       300236 :               unsigned int target_align =
    2532       300236 :                 DR_TARGET_ALIGNMENT (dr_info).to_constant ();
    2533       300236 :               unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
    2534       300236 :               poly_int64 off = 0;
    2535       300236 :               if (negative)
    2536         2564 :                 off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
    2537       300236 :               unsigned int mis = dr_misalignment (dr_info, vectype, off);
    2538       300236 :               mis = negative ? mis : -mis;
    2539       300236 :               if (mis != 0)
    2540        13207 :                 npeel_tmp = (mis & (target_align - 1)) / dr_size;
    2541              : 
    2542              :               /* For multiple types, it is possible that the bigger type access
    2543              :                  will have more than one peeling option.  E.g., a loop with two
    2544              :                  types: one of size (vector size / 4), and the other one of
    2545              :                  size (vector size / 8).  Vectorization factor will 8.  If both
    2546              :                  accesses are misaligned by 3, the first one needs one scalar
    2547              :                  iteration to be aligned, and the second one needs 5.  But the
    2548              :                  first one will be aligned also by peeling 5 scalar
    2549              :                  iterations, and in that case both accesses will be aligned.
    2550              :                  Hence, except for the immediate peeling amount, we also want
    2551              :                  to try to add full vector size, while we don't exceed
    2552              :                  vectorization factor.
    2553              :                  We do this automatically for cost model, since we calculate
    2554              :                  cost for every peeling option.  */
    2555       300236 :               poly_uint64 nscalars = npeel_tmp;
    2556       300236 :               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
    2557              :                 {
    2558        39790 :                   unsigned group_size = 1;
    2559        39790 :                   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    2560         1917 :                     group_size = DR_GROUP_SIZE (stmt_info);
    2561        39790 :                   nscalars = vf * group_size;
    2562              :                 }
    2563              : 
    2564              :               /* Save info about DR in the hash table.  Also include peeling
    2565              :                  amounts according to the explanation above.  Indicate
    2566              :                  the alignment status when the ref is not aligned.
    2567              :                  ???  Rather than using unknown alignment here we should
    2568              :                  prune all entries from the peeling hashtable which cause
    2569              :                  DRs to be not supported.  */
    2570       300236 :               bool supportable_if_not_aligned
    2571              :                 = vect_supportable_dr_alignment
    2572       300236 :                     (loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN);
    2573       659345 :               while (known_le (npeel_tmp, nscalars))
    2574              :                 {
    2575       359109 :                   vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
    2576              :                                             dr_info, npeel_tmp,
    2577              :                                             supportable_if_not_aligned);
    2578       359109 :                   npeel_tmp += MAX (1, target_align / dr_size);
    2579              :                 }
    2580              : 
    2581       300236 :               one_misalignment_known = true;
    2582              :             }
    2583              :           else
    2584              :             {
    2585              :               /* If we don't know any misalignment values, we prefer
    2586              :                  peeling for data-ref that has the maximum number of data-refs
    2587              :                  with the same alignment, unless the target prefers to align
    2588              :                  stores over load.  */
    2589       235465 :               unsigned same_align_drs = n_same_align_refs[i];
    2590       235465 :               if (!dr0_info
    2591       235465 :                   || dr0_same_align_drs < same_align_drs)
    2592              :                 {
    2593              :                   dr0_same_align_drs = same_align_drs;
    2594              :                   dr0_info = dr_info;
    2595              :                 }
    2596              :               /* For data-refs with the same number of related
    2597              :                  accesses prefer the one where the misalign
    2598              :                  computation will be invariant in the outermost loop.  */
    2599        76257 :               else if (dr0_same_align_drs == same_align_drs)
    2600              :                 {
    2601        74794 :                   class loop *ivloop0, *ivloop;
    2602        74794 :                   ivloop0 = outermost_invariant_loop_for_expr
    2603        74794 :                     (loop, DR_BASE_ADDRESS (dr0_info->dr));
    2604        74794 :                   ivloop = outermost_invariant_loop_for_expr
    2605        74794 :                     (loop, DR_BASE_ADDRESS (dr));
    2606        74794 :                   if ((ivloop && !ivloop0)
    2607        74794 :                       || (ivloop && ivloop0
    2608        74786 :                           && flow_loop_nested_p (ivloop, ivloop0)))
    2609              :                     dr0_info = dr_info;
    2610              :                 }
    2611              : 
    2612       235465 :               one_misalignment_unknown = true;
    2613              : 
    2614              :               /* Check for data refs with unsupportable alignment that
    2615              :                  can be peeled.  */
    2616       235465 :               enum dr_alignment_support supportable_dr_alignment
    2617       235465 :                 = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
    2618              :                                                  DR_MISALIGNMENT_UNKNOWN);
    2619       235465 :               if (supportable_dr_alignment == dr_unaligned_unsupported)
    2620              :                 {
    2621        96639 :                   one_dr_unsupportable = true;
    2622        96639 :                   unsupportable_dr_info = dr_info;
    2623              :                 }
    2624              : 
    2625       235465 :               if (!first_store && DR_IS_WRITE (dr))
    2626              :                 {
    2627        51074 :                   first_store = dr_info;
    2628        51074 :                   first_store_same_align_drs = same_align_drs;
    2629              :                 }
    2630              :             }
    2631              :         }
    2632              :       else
    2633              :         {
    2634        77432 :           if (!aligned_access_p (dr_info, vectype))
    2635              :             {
    2636        45333 :               if (dump_enabled_p ())
    2637         2091 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2638              :                                  "vector alignment may not be reachable\n");
    2639              :               break;
    2640              :             }
    2641              :         }
    2642              :     }
    2643              : 
    2644              :   /* Check if we can possibly peel the loop.  */
    2645       366784 :   if (!vect_can_advance_ivs_p (loop_vinfo)
    2646       363318 :       || !slpeel_can_duplicate_loop_p (loop, LOOP_VINFO_MAIN_EXIT (loop_vinfo),
    2647       363318 :                                        loop_preheader_edge (loop))
    2648       363318 :       || loop->inner
    2649              :       /* We don't currently maintain the LCSSA for prologue peeled inversed
    2650              :          loops.  */
    2651       728497 :       || (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)
    2652        29373 :           && !LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)))
    2653              :     do_peeling = false;
    2654              : 
    2655       366784 :   struct _vect_peel_extended_info peel_for_known_alignment;
    2656       366784 :   struct _vect_peel_extended_info peel_for_unknown_alignment;
    2657       366784 :   struct _vect_peel_extended_info best_peel;
    2658              : 
    2659       366784 :   peel_for_unknown_alignment.inside_cost = INT_MAX;
    2660       366784 :   peel_for_unknown_alignment.outside_cost = INT_MAX;
    2661       366784 :   peel_for_unknown_alignment.peel_info.count = 0;
    2662              : 
    2663       366784 :   if (do_peeling
    2664       366784 :       && one_misalignment_unknown)
    2665              :     {
    2666              :       /* Check if the target requires to prefer stores over loads, i.e., if
    2667              :          misaligned stores are more expensive than misaligned loads (taking
    2668              :          drs with same alignment into account).  */
    2669       144008 :       unsigned int load_inside_cost = 0;
    2670       144008 :       unsigned int load_outside_cost = 0;
    2671       144008 :       unsigned int store_inside_cost = 0;
    2672       144008 :       unsigned int store_outside_cost = 0;
    2673       144008 :       unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
    2674              : 
    2675       144008 :       stmt_vector_for_cost dummy;
    2676       144008 :       dummy.create (2);
    2677       144008 :       vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
    2678              :                                       &load_inside_cost,
    2679              :                                       &load_outside_cost,
    2680              :                                       &dummy, &dummy, estimated_npeels);
    2681       144008 :       dummy.release ();
    2682              : 
    2683       144008 :       if (first_store)
    2684              :         {
    2685        44308 :           dummy.create (2);
    2686        44308 :           vect_get_peeling_costs_all_drs (loop_vinfo, first_store,
    2687              :                                           &store_inside_cost,
    2688              :                                           &store_outside_cost,
    2689              :                                           &dummy, &dummy,
    2690              :                                           estimated_npeels);
    2691        44308 :           dummy.release ();
    2692              :         }
    2693              :       else
    2694              :         {
    2695        99700 :           store_inside_cost = INT_MAX;
    2696        99700 :           store_outside_cost = INT_MAX;
    2697              :         }
    2698              : 
    2699       144008 :       if (load_inside_cost > store_inside_cost
    2700       144008 :           || (load_inside_cost == store_inside_cost
    2701        43759 :               && load_outside_cost > store_outside_cost))
    2702              :         {
    2703       144008 :           dr0_info = first_store;
    2704       144008 :           dr0_same_align_drs = first_store_same_align_drs;
    2705       144008 :           peel_for_unknown_alignment.inside_cost = store_inside_cost;
    2706       144008 :           peel_for_unknown_alignment.outside_cost = store_outside_cost;
    2707              :         }
    2708              :       else
    2709              :         {
    2710       144008 :           peel_for_unknown_alignment.inside_cost = load_inside_cost;
    2711       144008 :           peel_for_unknown_alignment.outside_cost = load_outside_cost;
    2712              :         }
    2713              : 
    2714       144008 :       peel_for_unknown_alignment.outside_cost
    2715       144008 :         += vect_get_known_peeling_cost (loop_vinfo, estimated_npeels);
    2716              : 
    2717       144008 :       peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + 1;
    2718              :     }
    2719              : 
    2720       366784 :   peel_for_unknown_alignment.peel_info.npeel = 0;
    2721       366784 :   peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
    2722              : 
    2723       366784 :   best_peel = peel_for_unknown_alignment;
    2724              : 
    2725       366784 :   peel_for_known_alignment.inside_cost = INT_MAX;
    2726       366784 :   peel_for_known_alignment.outside_cost = INT_MAX;
    2727       366784 :   peel_for_known_alignment.peel_info.count = 0;
    2728       366784 :   peel_for_known_alignment.peel_info.dr_info = NULL;
    2729              : 
    2730       366784 :   if (do_peeling && one_misalignment_known)
    2731              :     {
    2732              :       /* Peeling is possible, but there is no data access that is not supported
    2733              :          unless aligned.  So we try to choose the best possible peeling from
    2734              :          the hash table.  */
    2735       156925 :       peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
    2736       156925 :         (&peeling_htab, loop_vinfo);
    2737              :     }
    2738              : 
    2739              :   /* Compare costs of peeling for known and unknown alignment. */
    2740       366784 :   if (peel_for_known_alignment.peel_info.dr_info != NULL
    2741       156925 :       && peel_for_unknown_alignment.inside_cost
    2742              :       >= peel_for_known_alignment.inside_cost)
    2743              :     {
    2744       142669 :       best_peel = peel_for_known_alignment;
    2745              : 
    2746              :       /* If the best peeling for known alignment has NPEEL == 0, perform no
    2747              :          peeling at all except if there is an unsupportable dr that we can
    2748              :          align.  */
    2749       142669 :       if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
    2750              :         do_peeling = false;
    2751              :     }
    2752              : 
    2753              :   /* If there is an unsupportable data ref, prefer this over all choices so far
    2754              :      since we'd have to discard a chosen peeling except when it accidentally
    2755              :      aligned the unsupportable data ref.  */
    2756       231788 :   if (one_dr_unsupportable)
    2757              :     dr0_info = unsupportable_dr_info;
    2758       287895 :   else if (do_peeling)
    2759              :     {
    2760              :       /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
    2761              :          TODO: Use nopeel_outside_cost or get rid of it?  */
    2762        67525 :       unsigned nopeel_inside_cost = 0;
    2763        67525 :       unsigned nopeel_outside_cost = 0;
    2764              : 
    2765        67525 :       stmt_vector_for_cost dummy;
    2766        67525 :       dummy.create (2);
    2767        67525 :       vect_get_peeling_costs_all_drs (loop_vinfo, NULL, &nopeel_inside_cost,
    2768              :                                       &nopeel_outside_cost, &dummy, &dummy, 0);
    2769        67525 :       dummy.release ();
    2770              : 
    2771              :       /* Add epilogue costs.  As we do not peel for alignment here, no prologue
    2772              :          costs will be recorded.  */
    2773        67525 :       nopeel_outside_cost += vect_get_known_peeling_cost (loop_vinfo, 0);
    2774              : 
    2775        67525 :       npeel = best_peel.peel_info.npeel;
    2776        67525 :       dr0_info = best_peel.peel_info.dr_info;
    2777              : 
    2778              :       /* If no peeling is not more expensive than the best peeling we
    2779              :          have so far, don't perform any peeling.  */
    2780        67525 :       if (nopeel_inside_cost <= best_peel.inside_cost)
    2781        60999 :         do_peeling = false;
    2782              :     }
    2783              : 
    2784       146414 :   if (do_peeling)
    2785              :     {
    2786        78400 :       stmt_vec_info stmt_info = dr0_info->stmt;
    2787        78400 :       if (known_alignment_for_access_p (dr0_info,
    2788              :                                         STMT_VINFO_VECTYPE (stmt_info)))
    2789              :         {
    2790         6503 :           bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
    2791         6503 :                                                 size_zero_node) < 0;
    2792         6503 :           if (!npeel)
    2793              :             {
    2794              :               /* Since it's known at compile time, compute the number of
    2795              :                  iterations in the peeled loop (the peeling factor) for use in
    2796              :                  updating DR_MISALIGNMENT values.  The peeling factor is the
    2797              :                  vectorization factor minus the misalignment as an element
    2798              :                  count.  */
    2799            0 :               tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2800            0 :               poly_int64 off = 0;
    2801            0 :               if (negative)
    2802            0 :                 off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
    2803            0 :                        * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
    2804            0 :               unsigned int mis
    2805            0 :                 = dr_misalignment (dr0_info, vectype, off);
    2806            0 :               mis = negative ? mis : -mis;
    2807              :               /* If known_alignment_for_access_p then we have set
    2808              :                  DR_MISALIGNMENT which is only done if we know it at compiler
    2809              :                  time, so it is safe to assume target alignment is constant.
    2810              :                */
    2811            0 :               unsigned int target_align =
    2812            0 :                 DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
    2813            0 :               npeel = ((mis & (target_align - 1))
    2814            0 :                        / vect_get_scalar_dr_size (dr0_info));
    2815              :             }
    2816              : 
    2817              :           /* For interleaved data access every iteration accesses all the
    2818              :              members of the group, therefore we divide the number of iterations
    2819              :              by the group size.  */
    2820         6503 :           if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    2821          281 :             npeel /= DR_GROUP_SIZE (stmt_info);
    2822              : 
    2823         6503 :           if (dump_enabled_p ())
    2824          284 :             dump_printf_loc (MSG_NOTE, vect_location,
    2825              :                              "Try peeling by %d\n", npeel);
    2826              :         }
    2827              : 
    2828              :       /* Check how peeling for alignment can support vectorization.  Function
    2829              :          vect_peeling_supportable returns one of the three possible values:
    2830              :          - PEELING_KNOWN_SUPPORTED: indicates that we know all unsupported
    2831              :            datarefs can be aligned after peeling.  We can use peeling alone.
    2832              :          - PEELING_MAYBE_SUPPORTED: indicates that peeling may be able to make
    2833              :            these datarefs aligned but we are not sure about it at compile time.
    2834              :            We will try peeling with versioning to add a runtime check to guard
    2835              :            the peeled loop.
    2836              :          - PEELING_UNSUPPORTED: indicates that peeling is almost impossible to
    2837              :            support vectorization.  We will stop trying peeling.  */
    2838        78400 :       switch (vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
    2839              :         {
    2840              :         case peeling_known_supported:
    2841              :           break;
    2842        13266 :         case peeling_maybe_supported:
    2843        13266 :           try_peeling_with_versioning = true;
    2844        13266 :           break;
    2845        15944 :         case peeling_unsupported:
    2846        15944 :           do_peeling = false;
    2847        15944 :           break;
    2848              :         }
    2849              : 
    2850              :       /* Check if all datarefs are supportable and log.  */
    2851        78400 :       if (do_peeling
    2852        78400 :           && npeel == 0
    2853        78400 :           && known_alignment_for_access_p (dr0_info,
    2854              :                                            STMT_VINFO_VECTYPE (stmt_info)))
    2855            3 :         return opt_result::success ();
    2856              : 
    2857              :       /* Cost model #1 - honor --param vect-max-peeling-for-alignment.  */
    2858        78397 :       if (do_peeling)
    2859              :         {
    2860        62453 :           unsigned max_allowed_peel
    2861        62453 :             = param_vect_max_peeling_for_alignment;
    2862        62453 :           if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP)
    2863              :             max_allowed_peel = 0;
    2864        14565 :           if (max_allowed_peel != (unsigned)-1)
    2865              :             {
    2866        47909 :               unsigned max_peel = npeel;
    2867        47909 :               if (max_peel == 0)
    2868              :                 {
    2869        45146 :                   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
    2870        45146 :                   unsigned HOST_WIDE_INT target_align_c;
    2871        45146 :                   if (target_align.is_constant (&target_align_c))
    2872        90292 :                     max_peel =
    2873        45146 :                       target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
    2874              :                   else
    2875              :                     {
    2876              :                       do_peeling = false;
    2877              :                       if (dump_enabled_p ())
    2878              :                         dump_printf_loc (MSG_NOTE, vect_location,
    2879              :                           "Disable peeling, max peels set and vector"
    2880              :                           " alignment unknown\n");
    2881              :                     }
    2882              :                 }
    2883        47909 :               if (max_peel > max_allowed_peel)
    2884              :                 {
    2885        47901 :                   do_peeling = false;
    2886        47901 :                   if (dump_enabled_p ())
    2887           53 :                     dump_printf_loc (MSG_NOTE, vect_location,
    2888              :                         "Disable peeling, max peels reached: %d\n", max_peel);
    2889              :                 }
    2890              :             }
    2891              :         }
    2892              : 
    2893              :       /* Cost model #2 - if peeling may result in a remaining loop not
    2894              :          iterating enough to be vectorized then do not peel.  Since this
    2895              :          is a cost heuristic rather than a correctness decision, use the
    2896              :          most likely runtime value for variable vectorization factors.  */
    2897           53 :       if (do_peeling
    2898        14552 :           && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
    2899              :         {
    2900         3193 :           unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
    2901         3193 :           unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
    2902         3193 :           if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
    2903         3193 :               < assumed_vf + max_peel)
    2904              :             do_peeling = false;
    2905              :         }
    2906              : 
    2907              :       if (do_peeling)
    2908              :         {
    2909              :           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
    2910              :              If the misalignment of DR_i is identical to that of dr0 then set
    2911              :              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
    2912              :              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
    2913              :              by the peeling factor times the element size of DR_i (MOD the
    2914              :              vectorization factor times the size).  Otherwise, the
    2915              :              misalignment of DR_i must be set to unknown.  */
    2916        30615 :           FOR_EACH_VEC_ELT (datarefs, i, dr)
    2917        16872 :             if (dr != dr0_info->dr)
    2918              :               {
    2919         3129 :                 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2920         3129 :                 if (!vect_relevant_for_alignment_p (dr_info))
    2921          354 :                   continue;
    2922              : 
    2923         2775 :                 vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
    2924              :               }
    2925              :         }
    2926              : 
    2927        78397 :       if (do_peeling && !try_peeling_with_versioning)
    2928              :         {
    2929              :           /* Update data structures if peeling will be applied alone.  */
    2930        12691 :           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
    2931        12691 :           if (npeel)
    2932         2104 :             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
    2933              :           else
    2934        10587 :             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
    2935        12691 :           SET_DR_MISALIGNMENT (dr0_info,
    2936              :                                vect_dr_misalign_for_aligned_access (dr0_info));
    2937        12691 :           if (dump_enabled_p ())
    2938              :             {
    2939          346 :               dump_printf_loc (MSG_NOTE, vect_location,
    2940              :                                "Alignment of access forced using peeling.\n");
    2941          346 :               dump_printf_loc (MSG_NOTE, vect_location,
    2942              :                                "Peeling for alignment will be applied.\n");
    2943              :             }
    2944              : 
    2945              :           /* The inside-loop cost will be accounted for in vectorizable_load
    2946              :              and vectorizable_store correctly with adjusted alignments.
    2947              :              Drop the body_cst_vec on the floor here.  */
    2948        12691 :           return opt_result::success ();
    2949              :         }
    2950              :     }
    2951              : 
    2952              :   /* (2) Versioning to force alignment.  */
    2953              : 
    2954              :   /* Try versioning if:
    2955              :      1) optimize loop for speed and the cost-model is not cheap
    2956              :      2) there is at least one unsupported misaligned data ref with an unknown
    2957              :         misalignment, and
    2958              :      3) all misaligned data refs with a known misalignment are supported, and
    2959              :      4) the number of runtime alignment checks is within reason.  */
    2960              : 
    2961       354090 :   do_versioning
    2962       354090 :     = (optimize_loop_nest_for_speed_p (loop)
    2963       353651 :        && !loop->inner /* FORNOW */
    2964       706136 :        && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
    2965              : 
    2966              :   if (do_versioning)
    2967              :     {
    2968       357452 :       FOR_EACH_VEC_ELT (datarefs, i, dr)
    2969              :         {
    2970       268983 :           dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2971       268983 :           if (!vect_relevant_for_alignment_p (dr_info))
    2972       188929 :             continue;
    2973              : 
    2974       185352 :           stmt_vec_info stmt_info = dr_info->stmt;
    2975       185352 :           if (STMT_VINFO_STRIDED_P (stmt_info))
    2976              :             {
    2977              :               do_versioning = false;
    2978         5041 :               break;
    2979              :             }
    2980              : 
    2981       184264 :           tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2982       184264 :           bool negative = tree_int_cst_compare (DR_STEP (dr),
    2983       184264 :                                                 size_zero_node) < 0;
    2984       184264 :           poly_int64 off = 0;
    2985       184264 :           if (negative)
    2986         3385 :             off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
    2987         3385 :                    * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
    2988       184264 :           int misalignment;
    2989       184264 :           if ((misalignment = dr_misalignment (dr_info, vectype, off)) == 0)
    2990       105298 :             continue;
    2991              : 
    2992        78966 :           enum dr_alignment_support supportable_dr_alignment
    2993        78966 :             = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
    2994              :                                              misalignment);
    2995        78966 :           if (supportable_dr_alignment == dr_unaligned_unsupported)
    2996              :             {
    2997        15357 :               if (misalignment != DR_MISALIGNMENT_UNKNOWN
    2998        15357 :                   || (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
    2999        11928 :                       >= (unsigned) param_vect_max_version_for_alignment_checks))
    3000              :                 {
    3001              :                   do_versioning = false;
    3002         5041 :                   break;
    3003              :                 }
    3004              : 
    3005              :               /* Forcing alignment in the first iteration is no good if
    3006              :                  we don't keep it across iterations.  For now, just disable
    3007              :                  versioning in this case.
    3008              :                  ?? We could actually unroll the loop to achieve the required
    3009              :                  overall step alignment, and forcing the alignment could be
    3010              :                  done by doing some iterations of the non-vectorized loop.  */
    3011        11520 :               if (!multiple_p (vf * DR_STEP_ALIGNMENT (dr),
    3012        11520 :                                DR_TARGET_ALIGNMENT (dr_info)))
    3013              :                 {
    3014              :                   do_versioning = false;
    3015              :                   break;
    3016              :                 }
    3017              : 
    3018              :               /* Use "mask = DR_TARGET_ALIGNMENT - 1" to test rightmost address
    3019              :                  bits for runtime alignment check.  For example, for 16 bytes
    3020              :                  target alignment the mask is 15 = 0xf.  */
    3021        11520 :               poly_uint64 mask = DR_TARGET_ALIGNMENT (dr_info) - 1;
    3022              : 
    3023              :               /* FORNOW: use the same mask to test all potentially unaligned
    3024              :                  references in the loop.  */
    3025        11520 :               if (maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), 0U)
    3026        11520 :                   && maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), mask))
    3027              :                 {
    3028              :                   do_versioning = false;
    3029              :                   break;
    3030              :                 }
    3031              : 
    3032        11404 :               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
    3033        11404 :               LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
    3034              :             }
    3035              :         }
    3036              : 
    3037              :       /* Versioning requires at least one misaligned data reference.  */
    3038        93510 :       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
    3039              :         do_versioning = false;
    3040         5698 :       else if (!do_versioning)
    3041          540 :         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
    3042              :     }
    3043              : 
    3044              :   /* If we are trying peeling with versioning but versioning is disabled for
    3045              :      some reason, peeling should be turned off together.  */
    3046       354090 :   if (try_peeling_with_versioning && !do_versioning)
    3047              :     do_peeling = false;
    3048              : 
    3049       341968 :   if (do_versioning)
    3050              :     {
    3051              :       const vec<stmt_vec_info> &may_misalign_stmts
    3052              :         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
    3053              :       stmt_vec_info stmt_info;
    3054              : 
    3055              :       /* It can now be assumed that the data references in the statements
    3056              :          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
    3057              :          of the loop being vectorized.  */
    3058        13922 :       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
    3059              :         {
    3060         8764 :           dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    3061         8764 :           SET_DR_MISALIGNMENT (dr_info,
    3062              :                                vect_dr_misalign_for_aligned_access (dr_info));
    3063         8764 :           if (dump_enabled_p ())
    3064          146 :             dump_printf_loc (MSG_NOTE, vect_location,
    3065              :                              "Alignment of access forced using versioning.\n");
    3066              :         }
    3067              : 
    3068         5158 :       if (do_peeling)
    3069              :         {
    3070              :           /* This point is reached if peeling and versioning are used together
    3071              :              to ensure alignment.  Update data structures to make sure the loop
    3072              :              is correctly peeled and a right runtime check is added for loop
    3073              :              versioning.  */
    3074         1052 :           gcc_assert (try_peeling_with_versioning);
    3075         1052 :           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
    3076         1052 :           LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
    3077         1052 :           LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo) = true;
    3078         1052 :           if (dump_enabled_p ())
    3079           11 :             dump_printf_loc (MSG_NOTE, vect_location,
    3080              :                              "Both peeling and versioning will be applied.\n");
    3081              :         }
    3082              :       else
    3083              :         {
    3084              :           /* This point is reached if versioning is used alone.  */
    3085         4106 :           LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (loop_vinfo) = false;
    3086         4106 :           if (dump_enabled_p ())
    3087           82 :             dump_printf_loc (MSG_NOTE, vect_location,
    3088              :                              "Versioning for alignment will be applied.\n");
    3089              :         }
    3090              : 
    3091         5158 :       return opt_result::success ();
    3092              :     }
    3093              : 
    3094              :   /* This point is reached if neither peeling nor versioning is being done.  */
    3095       348932 :   gcc_assert (! (do_peeling || do_versioning));
    3096              : 
    3097       348932 :   return opt_result::success ();
    3098       747724 : }
    3099              : 
    3100              : 
    3101              : /* Function vect_analyze_data_refs_alignment
    3102              : 
    3103              :    Analyze the alignment of the data-references in the loop.  */
    3104              : 
    3105              : void
    3106       412326 : vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
    3107              : {
    3108       412326 :   DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
    3109              : 
    3110       412326 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    3111       412326 :   struct data_reference *dr;
    3112       412326 :   unsigned int i;
    3113              : 
    3114       412326 :   vect_record_base_alignments (loop_vinfo);
    3115      1765556 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    3116              :     {
    3117       955389 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    3118       955389 :       if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
    3119              :         {
    3120       955389 :           if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
    3121      1244297 :               && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
    3122       128383 :             continue;
    3123              : 
    3124       827006 :           vect_compute_data_ref_alignment (loop_vinfo, dr_info,
    3125              :                                            STMT_VINFO_VECTYPE (dr_info->stmt));
    3126              :         }
    3127              :     }
    3128       412326 : }
    3129              : 
    3130              : 
    3131              : /* Analyze alignment of DRs of stmts in NODE.  */
    3132              : 
    3133              : static bool
    3134       816286 : vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
    3135              : {
    3136              :   /* Alignment is maintained in the first element of the group.  */
    3137       816286 :   stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
    3138       816286 :   first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
    3139       816286 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
    3140       816286 :   tree vectype = SLP_TREE_VECTYPE (node);
    3141       816286 :   poly_uint64 vector_alignment
    3142       816286 :     = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
    3143              :                  BITS_PER_UNIT);
    3144       816286 :   if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
    3145       777708 :     vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
    3146              :   /* Re-analyze alignment when we're facing a vectorization with a bigger
    3147              :      alignment requirement.  */
    3148        38578 :   else if (known_lt (dr_info->target_alignment, vector_alignment))
    3149              :     {
    3150           71 :       poly_uint64 old_target_alignment = dr_info->target_alignment;
    3151           71 :       int old_misalignment = dr_info->misalignment;
    3152           71 :       vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
    3153              :       /* But keep knowledge about a smaller alignment.  */
    3154           71 :       if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
    3155           38 :           && dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
    3156              :         {
    3157            1 :           dr_info->target_alignment = old_target_alignment;
    3158            1 :           dr_info->misalignment = old_misalignment;
    3159              :         }
    3160              :     }
    3161              :   /* When we ever face unordered target alignments the first one wins in terms
    3162              :      of analyzing and the other will become unknown in dr_misalignment.  */
    3163       816286 :   return true;
    3164              : }
    3165              : 
    3166              : /* Function vect_slp_analyze_instance_alignment
    3167              : 
    3168              :    Analyze the alignment of the data-references in the SLP instance.
    3169              :    Return FALSE if a data reference is found that cannot be vectorized.  */
    3170              : 
    3171              : bool
    3172       788684 : vect_slp_analyze_instance_alignment (vec_info *vinfo,
    3173              :                                                 slp_instance instance)
    3174              : {
    3175       788684 :   DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment");
    3176              : 
    3177       788684 :   slp_tree node;
    3178       788684 :   unsigned i;
    3179       944103 :   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
    3180       155419 :     if (! vect_slp_analyze_node_alignment (vinfo, node))
    3181              :       return false;
    3182              : 
    3183       788684 :   if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
    3184       788684 :       && ! vect_slp_analyze_node_alignment
    3185       660867 :              (vinfo, SLP_INSTANCE_TREE (instance)))
    3186              :     return false;
    3187              : 
    3188              :   return true;
    3189              : }
    3190              : 
    3191              : 
    3192              : /* Analyze groups of accesses: check that DR_INFO belongs to a group of
    3193              :    accesses of legal size, step, etc.  Detect gaps, single element
    3194              :    interleaving, and other special cases. Set grouped access info.
    3195              :    Collect groups of strided stores for further use in SLP analysis.
    3196              :    Worker for vect_analyze_group_access.  */
    3197              : 
    3198              : static bool
    3199     12500174 : vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
    3200              : {
    3201     12500174 :   data_reference *dr = dr_info->dr;
    3202     12500174 :   tree step = DR_STEP (dr);
    3203     12500174 :   tree scalar_type = TREE_TYPE (DR_REF (dr));
    3204     12500174 :   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
    3205     12500174 :   stmt_vec_info stmt_info = dr_info->stmt;
    3206     12500174 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    3207     12500174 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    3208     12500174 :   HOST_WIDE_INT dr_step = -1;
    3209     12500174 :   HOST_WIDE_INT groupsize, last_accessed_element = 1;
    3210     12500174 :   bool slp_impossible = false;
    3211              : 
    3212              :   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
    3213              :      size of the interleaving group (including gaps).  */
    3214     12500174 :   if (tree_fits_shwi_p (step))
    3215              :     {
    3216     12490576 :       dr_step = tree_to_shwi (step);
    3217              :       /* Check that STEP is a multiple of type size.  Otherwise there is
    3218              :          a non-element-sized gap at the end of the group which we
    3219              :          cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
    3220              :          ???  As we can handle non-constant step fine here we should
    3221              :          simply remove uses of DR_GROUP_GAP between the last and first
    3222              :          element and instead rely on DR_STEP.  DR_GROUP_SIZE then would
    3223              :          simply not include that gap.  */
    3224     12490576 :       if ((dr_step % type_size) != 0)
    3225              :         {
    3226          498 :           if (dump_enabled_p ())
    3227           27 :             dump_printf_loc (MSG_NOTE, vect_location,
    3228              :                              "Step %T is not a multiple of the element size"
    3229              :                              " for %T\n",
    3230              :                              step, DR_REF (dr));
    3231          498 :           return false;
    3232              :         }
    3233     12490078 :       groupsize = absu_hwi (dr_step) / type_size;
    3234              :     }
    3235              :   else
    3236              :     groupsize = 0;
    3237              : 
    3238              :   /* Not consecutive access is possible only if it is a part of interleaving.  */
    3239     12499676 :   if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
    3240              :     {
    3241              :       /* Check if it this DR is a part of interleaving, and is a single
    3242              :          element of the group that is accessed in the loop.  */
    3243              : 
    3244              :       /* Gaps are supported only for loads. STEP must be a multiple of the type
    3245              :          size.  */
    3246      8369505 :       if (DR_IS_READ (dr)
    3247      4996780 :           && (dr_step % type_size) == 0
    3248              :           && groupsize > 0
    3249              :           /* This could be UINT_MAX but as we are generating code in a very
    3250              :              inefficient way we have to cap earlier.
    3251              :              See PR91403 for example.  */
    3252      4996780 :           && groupsize <= 4096)
    3253              :         {
    3254        73051 :           DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
    3255        73051 :           DR_GROUP_SIZE (stmt_info) = groupsize;
    3256        73051 :           DR_GROUP_GAP (stmt_info) = groupsize - 1;
    3257        73051 :           if (dump_enabled_p ())
    3258         1492 :             dump_printf_loc (MSG_NOTE, vect_location,
    3259              :                              "Detected single element interleaving %T"
    3260              :                              " step %T\n",
    3261              :                              DR_REF (dr), step);
    3262              : 
    3263        73051 :           return true;
    3264              :         }
    3265              : 
    3266      8296454 :       if (dump_enabled_p ())
    3267         3130 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3268              :                          "not consecutive access %G", stmt_info->stmt);
    3269              : 
    3270      8296454 :       if (bb_vinfo)
    3271              :         {
    3272              :           /* Mark the statement as unvectorizable.  */
    3273      8277521 :           STMT_VINFO_VECTORIZABLE (stmt_info) = false;
    3274      8277521 :           return true;
    3275              :         }
    3276              : 
    3277        18933 :       if (dump_enabled_p ())
    3278          305 :         dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
    3279        18933 :       STMT_VINFO_STRIDED_P (stmt_info) = true;
    3280        18933 :       return true;
    3281              :     }
    3282              : 
    3283      4130171 :   if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
    3284              :     {
    3285              :       /* First stmt in the interleaving chain. Check the chain.  */
    3286      1499405 :       stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
    3287      1499405 :       struct data_reference *data_ref = dr;
    3288      1499405 :       unsigned int count = 1;
    3289      1499405 :       tree prev_init = DR_INIT (data_ref);
    3290      1499405 :       HOST_WIDE_INT diff, gaps = 0;
    3291              : 
    3292              :       /* By construction, all group members have INTEGER_CST DR_INITs.  */
    3293      4130180 :       while (next)
    3294              :         {
    3295              :           /* We never have the same DR multiple times.  */
    3296      2630837 :           gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
    3297              :                                 DR_INIT (STMT_VINFO_DATA_REF (next))) != 0);
    3298              : 
    3299      2630837 :           data_ref = STMT_VINFO_DATA_REF (next);
    3300              : 
    3301              :           /* All group members have the same STEP by construction.  */
    3302      2630837 :           gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
    3303              : 
    3304              :           /* Check that the distance between two accesses is equal to the type
    3305              :              size. Otherwise, we have gaps.  */
    3306      2630837 :           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
    3307      2630837 :                   - TREE_INT_CST_LOW (prev_init)) / type_size;
    3308      2630837 :           if (diff < 1 || diff > UINT_MAX)
    3309              :             {
    3310              :               /* For artificial testcases with array accesses with large
    3311              :                  constant indices we can run into overflow issues which
    3312              :                  can end up fooling the groupsize constraint below so
    3313              :                  check the individual gaps (which are represented as
    3314              :                  unsigned int) as well.  */
    3315            0 :               if (dump_enabled_p ())
    3316            0 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3317              :                                  "interleaved access with gap larger "
    3318              :                                  "than representable\n");
    3319            0 :               return false;
    3320              :             }
    3321      2630837 :           if (diff != 1)
    3322              :             {
    3323              :               /* FORNOW: SLP of accesses with gaps is not supported.  */
    3324       102471 :               slp_impossible = true;
    3325       102471 :               if (DR_IS_WRITE (data_ref))
    3326              :                 {
    3327           62 :                   if (dump_enabled_p ())
    3328            0 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3329              :                                      "interleaved store with gaps\n");
    3330           62 :                   return false;
    3331              :                 }
    3332              : 
    3333       102409 :               gaps += diff - 1;
    3334              :             }
    3335              : 
    3336      2630775 :           last_accessed_element += diff;
    3337              : 
    3338              :           /* Store the gap from the previous member of the group. If there is no
    3339              :              gap in the access, DR_GROUP_GAP is always 1.  */
    3340      2630775 :           DR_GROUP_GAP (next) = diff;
    3341              : 
    3342      2630775 :           prev_init = DR_INIT (data_ref);
    3343      2630775 :           next = DR_GROUP_NEXT_ELEMENT (next);
    3344              :           /* Count the number of data-refs in the chain.  */
    3345      2630775 :           count++;
    3346              :         }
    3347              : 
    3348      1499343 :       if (groupsize == 0)
    3349      1429131 :         groupsize = count + gaps;
    3350              : 
    3351              :       /* This could be UINT_MAX but as we are generating code in a very
    3352              :          inefficient way we have to cap earlier.  See PR78699 for example.  */
    3353      1499343 :       if (groupsize > 4096)
    3354              :         {
    3355            1 :           if (dump_enabled_p ())
    3356            1 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3357              :                              "group is too large\n");
    3358            1 :           return false;
    3359              :         }
    3360              : 
    3361              :       /* Check that the size of the interleaving is equal to count for stores,
    3362              :          i.e., that there are no gaps.  */
    3363      1499342 :       if (groupsize != count
    3364       105924 :           && !DR_IS_READ (dr))
    3365              :         {
    3366        11368 :           groupsize = count;
    3367        11368 :           STMT_VINFO_STRIDED_P (stmt_info) = true;
    3368              :         }
    3369              : 
    3370              :       /* If there is a gap after the last load in the group it is the
    3371              :          difference between the groupsize and the last accessed
    3372              :          element.
    3373              :          When there is no gap, this difference should be 0.  */
    3374      1499342 :       DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
    3375              : 
    3376      1499342 :       DR_GROUP_SIZE (stmt_info) = groupsize;
    3377      1499342 :       if (dump_enabled_p ())
    3378              :         {
    3379         8020 :           dump_printf_loc (MSG_NOTE, vect_location,
    3380              :                            "Detected interleaving ");
    3381         8020 :           if (DR_IS_READ (dr))
    3382         4323 :             dump_printf (MSG_NOTE, "load ");
    3383         3697 :           else if (STMT_VINFO_STRIDED_P (stmt_info))
    3384          496 :             dump_printf (MSG_NOTE, "strided store ");
    3385              :           else
    3386         3201 :             dump_printf (MSG_NOTE, "store ");
    3387         8020 :           dump_printf (MSG_NOTE, "of size %u\n",
    3388              :                        (unsigned)groupsize);
    3389         8020 :           dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
    3390         8020 :           next = DR_GROUP_NEXT_ELEMENT (stmt_info);
    3391        39497 :           while (next)
    3392              :             {
    3393        31477 :               if (DR_GROUP_GAP (next) != 1)
    3394          317 :                 dump_printf_loc (MSG_NOTE, vect_location,
    3395              :                                  "\t<gap of %d elements>\n",
    3396          317 :                                  DR_GROUP_GAP (next) - 1);
    3397        31477 :               dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
    3398        31477 :               next = DR_GROUP_NEXT_ELEMENT (next);
    3399              :             }
    3400         8020 :           if (DR_GROUP_GAP (stmt_info) != 0)
    3401          398 :             dump_printf_loc (MSG_NOTE, vect_location,
    3402              :                              "\t<gap of %d elements>\n",
    3403          398 :                              DR_GROUP_GAP (stmt_info));
    3404              :         }
    3405              : 
    3406              :       /* SLP: create an SLP data structure for every interleaving group of
    3407              :          stores for further analysis in vect_analyse_slp.  */
    3408      1499342 :       if (DR_IS_WRITE (dr) && !slp_impossible)
    3409              :         {
    3410       920838 :           if (loop_vinfo)
    3411        29369 :             LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt_info);
    3412       920838 :           if (bb_vinfo)
    3413       891469 :             BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
    3414              :         }
    3415              :     }
    3416              : 
    3417              :   return true;
    3418              : }
    3419              : 
    3420              : /* Analyze groups of accesses: check that DR_INFO belongs to a group of
    3421              :    accesses of legal size, step, etc.  Detect gaps, single element
    3422              :    interleaving, and other special cases. Set grouped access info.
    3423              :    Collect groups of strided stores for further use in SLP analysis.  */
    3424              : 
    3425              : static bool
    3426     12500174 : vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
    3427              : {
    3428     12500174 :   if (!vect_analyze_group_access_1 (vinfo, dr_info))
    3429              :     {
    3430              :       /* Dissolve the group if present.  */
    3431          561 :       stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
    3432          792 :       while (stmt_info)
    3433              :         {
    3434          231 :           stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
    3435          231 :           DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
    3436          231 :           DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
    3437          231 :           stmt_info = next;
    3438              :         }
    3439              :       return false;
    3440              :     }
    3441              :   return true;
    3442              : }
    3443              : 
    3444              : /* Analyze the access pattern of the data-reference DR_INFO.
    3445              :    In case of non-consecutive accesses call vect_analyze_group_access() to
    3446              :    analyze groups of accesses.  */
    3447              : 
    3448              : static bool
    3449     13254921 : vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
    3450              : {
    3451     13254921 :   data_reference *dr = dr_info->dr;
    3452     13254921 :   tree step = DR_STEP (dr);
    3453     13254921 :   tree scalar_type = TREE_TYPE (DR_REF (dr));
    3454     13254921 :   stmt_vec_info stmt_info = dr_info->stmt;
    3455     13254921 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    3456     13254921 :   class loop *loop = NULL;
    3457              : 
    3458     13254921 :   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    3459              :     return true;
    3460              : 
    3461     13154910 :   if (loop_vinfo)
    3462       968958 :     loop = LOOP_VINFO_LOOP (loop_vinfo);
    3463              : 
    3464     13154910 :   if (loop_vinfo && !step)
    3465              :     {
    3466            0 :       if (dump_enabled_p ())
    3467            0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3468              :                          "bad data-ref access in loop\n");
    3469            0 :       return false;
    3470              :     }
    3471              : 
    3472              :   /* Allow loads with zero step in inner-loop vectorization.  */
    3473     13154910 :   if (loop_vinfo && integer_zerop (step))
    3474              :     {
    3475        14162 :       DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
    3476        14162 :       DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
    3477        14162 :       if (!nested_in_vect_loop_p (loop, stmt_info))
    3478        13901 :         return DR_IS_READ (dr);
    3479              :       /* Allow references with zero step for outer loops marked
    3480              :          with pragma omp simd only - it guarantees absence of
    3481              :          loop-carried dependencies between inner loop iterations.  */
    3482          261 :       if (loop->safelen < 2)
    3483              :         {
    3484          225 :           if (dump_enabled_p ())
    3485            6 :             dump_printf_loc (MSG_NOTE, vect_location,
    3486              :                              "zero step in inner loop of nest\n");
    3487          225 :           return false;
    3488              :         }
    3489              :     }
    3490              : 
    3491     13140748 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    3492              :     {
    3493              :       /* Interleaved accesses are not yet supported within outer-loop
    3494              :         vectorization for references in the inner-loop.  */
    3495         5812 :       DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
    3496         5812 :       DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
    3497              : 
    3498              :       /* For the rest of the analysis we use the outer-loop step.  */
    3499         5812 :       step = STMT_VINFO_DR_STEP (stmt_info);
    3500         5812 :       if (integer_zerop (step))
    3501              :         {
    3502         1290 :           if (dump_enabled_p ())
    3503          241 :             dump_printf_loc (MSG_NOTE, vect_location,
    3504              :                              "zero step in outer loop.\n");
    3505         1290 :           return DR_IS_READ (dr);
    3506              :         }
    3507              :     }
    3508              : 
    3509              :   /* Consecutive?  */
    3510     13139494 :   if (TREE_CODE (step) == INTEGER_CST)
    3511              :     {
    3512     13100216 :       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
    3513     13100216 :       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
    3514     13100216 :           || (dr_step < 0
    3515        28891 :               && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
    3516              :         {
    3517              :           /* Mark that it is not interleaving.  */
    3518       606769 :           DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
    3519       606769 :           DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
    3520       606769 :           return true;
    3521              :         }
    3522              :     }
    3523              : 
    3524     12532725 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    3525              :     {
    3526         3330 :       if (dump_enabled_p ())
    3527          163 :         dump_printf_loc (MSG_NOTE, vect_location,
    3528              :                          "grouped access in outer loop.\n");
    3529         3330 :       return false;
    3530              :     }
    3531              : 
    3532              : 
    3533              :   /* Assume this is a DR handled by non-constant strided load case.  */
    3534     12529395 :   if (TREE_CODE (step) != INTEGER_CST)
    3535        38819 :     return (STMT_VINFO_STRIDED_P (stmt_info)
    3536        38819 :             && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
    3537         9598 :                 || vect_analyze_group_access (vinfo, dr_info)));
    3538              : 
    3539              :   /* Not consecutive access - check if it's a part of interleaving group.  */
    3540     12490576 :   return vect_analyze_group_access (vinfo, dr_info);
    3541              : }
    3542              : 
    3543              : /* Compare two data-references DRA and DRB to group them into chunks
    3544              :    suitable for grouping.  */
    3545              : 
    3546              : static int
    3547    345861699 : dr_group_sort_cmp (const void *dra_, const void *drb_)
    3548              : {
    3549    345861699 :   dr_vec_info *dra_info = *(dr_vec_info **)const_cast<void *>(dra_);
    3550    345861699 :   dr_vec_info *drb_info = *(dr_vec_info **)const_cast<void *>(drb_);
    3551    345861699 :   data_reference_p dra = dra_info->dr;
    3552    345861699 :   data_reference_p drb = drb_info->dr;
    3553    345861699 :   int cmp;
    3554              : 
    3555              :   /* Stabilize sort.  */
    3556    345861699 :   if (dra == drb)
    3557              :     return 0;
    3558              : 
    3559              :   /* Different group IDs lead never belong to the same group.  */
    3560    345861699 :   if (dra_info->group != drb_info->group)
    3561    377032164 :     return dra_info->group < drb_info->group ? -1 : 1;
    3562              : 
    3563              :   /* Ordering of DRs according to base.  */
    3564     97832872 :   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
    3565              :                                DR_BASE_ADDRESS (drb));
    3566     97832872 :   if (cmp != 0)
    3567              :     return cmp;
    3568              : 
    3569              :   /* And according to DR_OFFSET.  */
    3570     52980237 :   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
    3571     52980237 :   if (cmp != 0)
    3572              :     return cmp;
    3573              : 
    3574              :   /* Put reads before writes.  */
    3575     52629372 :   if (DR_IS_READ (dra) != DR_IS_READ (drb))
    3576      4265279 :     return DR_IS_READ (dra) ? -1 : 1;
    3577              : 
    3578              :   /* Then sort after access size.  */
    3579     49727981 :   cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
    3580     49727981 :                                TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
    3581     49727981 :   if (cmp != 0)
    3582              :     return cmp;
    3583              : 
    3584              :   /* And after step.  */
    3585     43011253 :   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
    3586     43011253 :   if (cmp != 0)
    3587              :     return cmp;
    3588              : 
    3589              :   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
    3590     43003879 :   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
    3591     43003879 :   if (cmp == 0)
    3592       497942 :     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
    3593              :   return cmp;
    3594              : }
    3595              : 
    3596              : /* If OP is the result of a conversion, return the unconverted value,
    3597              :    otherwise return null.  */
    3598              : 
    3599              : static tree
    3600          402 : strip_conversion (tree op)
    3601              : {
    3602          402 :   if (TREE_CODE (op) != SSA_NAME)
    3603              :     return NULL_TREE;
    3604          402 :   gimple *stmt = SSA_NAME_DEF_STMT (op);
    3605          402 :   if (!is_gimple_assign (stmt)
    3606          402 :       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
    3607              :     return NULL_TREE;
    3608          186 :   return gimple_assign_rhs1 (stmt);
    3609              : }
    3610              : 
    3611              : /* Return true if vectorizable_* routines can handle statements STMT1_INFO
    3612              :    and STMT2_INFO being in a single group.  When ALLOW_SLP_P, masked loads can
    3613              :    be grouped in SLP mode.  */
    3614              : 
    3615              : static bool
    3616      7020828 : can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
    3617              :                    bool allow_slp_p)
    3618              : {
    3619      7020828 :   if (gimple_assign_single_p (stmt1_info->stmt))
    3620      7018926 :     return gimple_assign_single_p (stmt2_info->stmt);
    3621              : 
    3622         1902 :   gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
    3623         1902 :   if (call1 && gimple_call_internal_p (call1))
    3624              :     {
    3625              :       /* Check for two masked loads or two masked stores.  */
    3626         2155 :       gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt);
    3627         1886 :       if (!call2 || !gimple_call_internal_p (call2))
    3628              :         return false;
    3629         1886 :       internal_fn ifn = gimple_call_internal_fn (call1);
    3630         1886 :       if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
    3631              :         return false;
    3632         1886 :       if (ifn != gimple_call_internal_fn (call2))
    3633              :         return false;
    3634              : 
    3635              :       /* Check that the masks are the same.  Cope with casts of masks,
    3636              :          like those created by build_mask_conversion.  */
    3637         1886 :       tree mask1 = gimple_call_arg (call1, 2);
    3638         1886 :       tree mask2 = gimple_call_arg (call2, 2);
    3639         1886 :       if (!operand_equal_p (mask1, mask2, 0) && !allow_slp_p)
    3640              :         {
    3641          309 :           mask1 = strip_conversion (mask1);
    3642          309 :           if (!mask1)
    3643              :             return false;
    3644           93 :           mask2 = strip_conversion (mask2);
    3645           93 :           if (!mask2)
    3646              :             return false;
    3647           93 :           if (!operand_equal_p (mask1, mask2, 0))
    3648              :             return false;
    3649              :         }
    3650         1633 :       return true;
    3651              :     }
    3652              : 
    3653              :   return false;
    3654              : }
    3655              : 
    3656              : /* Function vect_analyze_data_ref_accesses.
    3657              : 
    3658              :    Analyze the access pattern of all the data references in the loop.
    3659              : 
    3660              :    FORNOW: the only access pattern that is considered vectorizable is a
    3661              :            simple step 1 (consecutive) access.
    3662              : 
    3663              :    FORNOW: handle only arrays and pointer accesses.  */
    3664              : 
    3665              : opt_result
    3666      2624173 : vect_analyze_data_ref_accesses (vec_info *vinfo,
    3667              :                                 vec<int> *dataref_groups)
    3668              : {
    3669      2624173 :   unsigned int i;
    3670      2624173 :   vec<data_reference_p> datarefs = vinfo->shared->datarefs;
    3671              : 
    3672      2624173 :   DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
    3673              : 
    3674      2624173 :   if (datarefs.is_empty ())
    3675      1048828 :     return opt_result::success ();
    3676              : 
    3677              :   /* Sort the array of datarefs to make building the interleaving chains
    3678              :      linear.  Don't modify the original vector's order, it is needed for
    3679              :      determining what dependencies are reversed.  */
    3680      1575345 :   vec<dr_vec_info *> datarefs_copy;
    3681      1575345 :   datarefs_copy.create (datarefs.length ());
    3682     16592463 :   for (unsigned i = 0; i < datarefs.length (); i++)
    3683              :     {
    3684     15017118 :       dr_vec_info *dr_info = vinfo->lookup_dr (datarefs[i]);
    3685              :       /* If the caller computed DR grouping use that, otherwise group by
    3686              :          basic blocks.  */
    3687     15017118 :       if (dataref_groups)
    3688     13934131 :         dr_info->group = (*dataref_groups)[i];
    3689              :       else
    3690      1082987 :         dr_info->group = gimple_bb (DR_STMT (datarefs[i]))->index;
    3691     15017118 :       datarefs_copy.quick_push (dr_info);
    3692              :     }
    3693      1575345 :   datarefs_copy.qsort (dr_group_sort_cmp);
    3694      1575345 :   hash_set<stmt_vec_info> to_fixup;
    3695              : 
    3696              :   /* Build the interleaving chains.  */
    3697     14169326 :   for (i = 0; i < datarefs_copy.length () - 1;)
    3698              :     {
    3699     11018636 :       dr_vec_info *dr_info_a = datarefs_copy[i];
    3700     11018636 :       data_reference_p dra = dr_info_a->dr;
    3701     11018636 :       int dra_group_id = dr_info_a->group;
    3702     11018636 :       stmt_vec_info stmtinfo_a = dr_info_a->stmt;
    3703     11018636 :       stmt_vec_info lastinfo = NULL;
    3704     11018636 :       if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
    3705      9406428 :           || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
    3706              :         {
    3707      1677107 :           ++i;
    3708      1677107 :           continue;
    3709              :         }
    3710     24600129 :       for (i = i + 1; i < datarefs_copy.length (); ++i)
    3711              :         {
    3712     11764666 :           dr_vec_info *dr_info_b = datarefs_copy[i];
    3713     11764666 :           data_reference_p drb = dr_info_b->dr;
    3714     11764666 :           int drb_group_id = dr_info_b->group;
    3715     11764666 :           stmt_vec_info stmtinfo_b = dr_info_b->stmt;
    3716     11764666 :           if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
    3717     11458902 :               || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
    3718              :             break;
    3719              : 
    3720              :           /* ???  Imperfect sorting (non-compatible types, non-modulo
    3721              :              accesses, same accesses) can lead to a group to be artificially
    3722              :              split here as we don't just skip over those.  If it really
    3723              :              matters we can push those to a worklist and re-iterate
    3724              :              over them.  The we can just skip ahead to the next DR here.  */
    3725              : 
    3726              :           /* DRs in a different DR group should not be put into the same
    3727              :              interleaving group.  */
    3728     11455281 :           if (dra_group_id != drb_group_id)
    3729              :             break;
    3730              : 
    3731              :           /* Check that the data-refs have same first location (except init)
    3732              :              and they are both either store or load (not load and store,
    3733              :              not masked loads or stores).  */
    3734      7287543 :           if (DR_IS_READ (dra) != DR_IS_READ (drb)
    3735      5993262 :               || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
    3736              :                                         DR_BASE_ADDRESS (drb)) != 0
    3737      4375626 :               || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
    3738     11644174 :               || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
    3739              :             break;
    3740              : 
    3741              :           /* Check that the data-refs have the same constant size.  */
    3742      4356606 :           tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
    3743      4356606 :           tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
    3744      4356606 :           if (!tree_fits_uhwi_p (sza)
    3745      4356606 :               || !tree_fits_uhwi_p (szb)
    3746      8713212 :               || !tree_int_cst_equal (sza, szb))
    3747              :             break;
    3748              : 
    3749              :           /* Check that the data-refs have the same step.  */
    3750      4011781 :           if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
    3751              :             break;
    3752              : 
    3753              :           /* Check the types are compatible.
    3754              :              ???  We don't distinguish this during sorting.  */
    3755      4011061 :           if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
    3756      4011061 :                                    TREE_TYPE (DR_REF (drb))))
    3757              :             break;
    3758              : 
    3759              :           /* Check that the DR_INITs are compile-time constants.  */
    3760      2875753 :           if (!tree_fits_shwi_p (DR_INIT (dra))
    3761      2875753 :               || !tree_fits_shwi_p (DR_INIT (drb)))
    3762              :             break;
    3763              : 
    3764              :           /* Different .GOMP_SIMD_LANE calls still give the same lane,
    3765              :              just hold extra information.  */
    3766      2875753 :           if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
    3767         1240 :               && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
    3768      2876993 :               && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
    3769              :             break;
    3770              : 
    3771              :           /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
    3772      2874513 :           HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
    3773      2874513 :           HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
    3774      2874513 :           HOST_WIDE_INT init_prev
    3775      2874513 :             = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]->dr));
    3776      2874513 :           gcc_assert (init_a <= init_b
    3777              :                       && init_a <= init_prev
    3778              :                       && init_prev <= init_b);
    3779              : 
    3780              :           /* Do not place the same access in the interleaving chain twice.  */
    3781      2874513 :           if (init_b == init_prev)
    3782              :             {
    3783        29902 :               gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]->dr))
    3784              :                           < gimple_uid (DR_STMT (drb)));
    3785              :               /* Simply link in duplicates and fix up the chain below.  */
    3786              :             }
    3787              :           else
    3788              :             {
    3789              :               /* If init_b == init_a + the size of the type * k, we have an
    3790              :                  interleaving, and DRA is accessed before DRB.  */
    3791      2844611 :               unsigned HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
    3792      2844611 :               if (type_size_a == 0
    3793      2844611 :                   || (((unsigned HOST_WIDE_INT)init_b - init_a)
    3794      2844611 :                       % type_size_a != 0))
    3795              :                 break;
    3796              : 
    3797              :               /* If we have a store, the accesses are adjacent.  This splits
    3798              :                  groups into chunks we support (we don't support vectorization
    3799              :                  of stores with gaps).  */
    3800      2842885 :               if (!DR_IS_READ (dra)
    3801      1865366 :                   && (((unsigned HOST_WIDE_INT)init_b - init_prev)
    3802              :                       != type_size_a))
    3803              :                 break;
    3804              : 
    3805              :               /* For datarefs with big gap, it's better to split them into different
    3806              :                  groups.
    3807              :                  .i.e a[0], a[1], a[2], .. a[7], a[100], a[101],..., a[107]  */
    3808      2663537 :               if ((unsigned HOST_WIDE_INT)(init_b - init_prev)
    3809              :                   > MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT)
    3810              :                 break;
    3811              : 
    3812              :               /* If the step (if not zero or non-constant) is smaller than the
    3813              :                  difference between data-refs' inits this splits groups into
    3814              :                  suitable sizes.  */
    3815      2653944 :               if (tree_fits_shwi_p (DR_STEP (dra)))
    3816              :                 {
    3817      2647636 :                   unsigned HOST_WIDE_INT step
    3818      2647636 :                     = absu_hwi (tree_to_shwi (DR_STEP (dra)));
    3819      2647636 :                   if (step != 0
    3820       164399 :                       && step <= ((unsigned HOST_WIDE_INT)init_b - init_a))
    3821              :                     break;
    3822              :                 }
    3823              :             }
    3824              : 
    3825      2664619 :           if (dump_enabled_p ())
    3826        32385 :             dump_printf_loc (MSG_NOTE, vect_location,
    3827        32385 :                              DR_IS_READ (dra)
    3828              :                              ? "Detected interleaving load %T and %T\n"
    3829              :                              : "Detected interleaving store %T and %T\n",
    3830              :                              DR_REF (dra), DR_REF (drb));
    3831              : 
    3832              :           /* Link the found element into the group list.  */
    3833      2664619 :           if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
    3834              :             {
    3835      1477725 :               DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
    3836      1477725 :               lastinfo = stmtinfo_a;
    3837              :             }
    3838      2664619 :           DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
    3839      2664619 :           DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
    3840      2664619 :           lastinfo = stmtinfo_b;
    3841              : 
    3842      2664619 :           if (! STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
    3843              :             {
    3844      2664197 :               STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
    3845      2664197 :                 = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
    3846              : 
    3847      2664197 :               if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
    3848          126 :                 dump_printf_loc (MSG_NOTE, vect_location,
    3849              :                                  "Load suitable for SLP vectorization only.\n");
    3850              :             }
    3851              : 
    3852      2664619 :           if (init_b == init_prev
    3853        29902 :               && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
    3854      2681129 :               && dump_enabled_p ())
    3855          213 :             dump_printf_loc (MSG_NOTE, vect_location,
    3856              :                              "Queuing group with duplicate access for fixup\n");
    3857              :         }
    3858              :     }
    3859              : 
    3860              :   /* Fixup groups with duplicate entries by splitting it.  */
    3861      1618511 :   while (1)
    3862              :     {
    3863      1618511 :       hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
    3864      1618511 :       if (!(it != to_fixup.end ()))
    3865              :         break;
    3866        43166 :       stmt_vec_info grp = *it;
    3867        43166 :       to_fixup.remove (grp);
    3868              : 
    3869              :       /* Find the earliest duplicate group member.  */
    3870        43166 :       unsigned first_duplicate = -1u;
    3871        43166 :       stmt_vec_info next, g = grp;
    3872       276908 :       while ((next = DR_GROUP_NEXT_ELEMENT (g)))
    3873              :         {
    3874       190576 :           if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
    3875       190576 :                                   DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
    3876       190576 :               && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
    3877              :             first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
    3878              :           g = next;
    3879              :         }
    3880        43166 :       if (first_duplicate == -1U)
    3881        16510 :         continue;
    3882              : 
    3883              :       /* Then move all stmts after the first duplicate to a new group.
    3884              :          Note this is a heuristic but one with the property that *it
    3885              :          is fixed up completely.  */
    3886        26656 :       g = grp;
    3887        26656 :       stmt_vec_info newgroup = NULL, ng = grp;
    3888       241443 :       while ((next = DR_GROUP_NEXT_ELEMENT (g)))
    3889              :         {
    3890       188131 :           if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
    3891              :             {
    3892       181929 :               DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
    3893       181929 :               if (!newgroup)
    3894              :                 {
    3895        26656 :                   newgroup = next;
    3896        26656 :                   STMT_VINFO_SLP_VECT_ONLY (newgroup)
    3897        26656 :                     = STMT_VINFO_SLP_VECT_ONLY (grp);
    3898              :                 }
    3899              :               else
    3900       155273 :                 DR_GROUP_NEXT_ELEMENT (ng) = next;
    3901       181929 :               ng = next;
    3902       181929 :               DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
    3903              :             }
    3904              :           else
    3905              :             g = DR_GROUP_NEXT_ELEMENT (g);
    3906              :         }
    3907        26656 :       DR_GROUP_NEXT_ELEMENT (ng) = NULL;
    3908              : 
    3909              :       /* Fixup the new group which still may contain duplicates.  */
    3910        26656 :       to_fixup.add (newgroup);
    3911              :     }
    3912              : 
    3913      1575345 :   dr_vec_info *dr_info;
    3914     16570435 :   FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)
    3915              :     {
    3916     15003100 :       if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
    3917     15003100 :           && !vect_analyze_data_ref_access (vinfo, dr_info))
    3918              :         {
    3919         8064 :           if (dump_enabled_p ())
    3920          292 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3921              :                              "not vectorized: complicated access pattern.\n");
    3922              : 
    3923         8064 :           if (is_a <bb_vec_info> (vinfo))
    3924              :             {
    3925              :               /* Mark the statement as not vectorizable.  */
    3926           54 :               STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
    3927           54 :               continue;
    3928              :             }
    3929              :           else
    3930              :             {
    3931         8010 :               datarefs_copy.release ();
    3932         8010 :               return opt_result::failure_at (dr_info->stmt->stmt,
    3933              :                                              "not vectorized:"
    3934              :                                              " complicated access pattern.\n");
    3935              :             }
    3936              :         }
    3937              :     }
    3938              : 
    3939      1567335 :   datarefs_copy.release ();
    3940      1567335 :   return opt_result::success ();
    3941      1575345 : }
    3942              : 
    3943              : /* Function vect_vfa_segment_size.
    3944              : 
    3945              :    Input:
    3946              :      DR_INFO: The data reference.
    3947              :      LENGTH_FACTOR: segment length to consider.
    3948              : 
    3949              :    Return a value suitable for the dr_with_seg_len::seg_len field.
    3950              :    This is the "distance travelled" by the pointer from the first
    3951              :    iteration in the segment to the last.  Note that it does not include
    3952              :    the size of the access; in effect it only describes the first byte.  */
    3953              : 
    3954              : static tree
    3955       146308 : vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
    3956              : {
    3957       146308 :   length_factor = size_binop (MINUS_EXPR,
    3958              :                               fold_convert (sizetype, length_factor),
    3959              :                               size_one_node);
    3960       146308 :   return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
    3961              :                      length_factor);
    3962              : }
    3963              : 
    3964              : /* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),
    3965              :    gives the worst-case number of bytes covered by the segment.  */
    3966              : 
    3967              : static unsigned HOST_WIDE_INT
    3968       146790 : vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info)
    3969              : {
    3970       146790 :   stmt_vec_info stmt_vinfo = dr_info->stmt;
    3971       146790 :   tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
    3972       146790 :   unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
    3973       146790 :   unsigned HOST_WIDE_INT access_size = ref_size;
    3974       146790 :   if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
    3975              :     {
    3976        42341 :       gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
    3977        42341 :       access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
    3978              :     }
    3979       146790 :   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
    3980       146790 :   int misalignment;
    3981       293580 :   if (((misalignment = dr_misalignment (dr_info, vectype)), true)
    3982       146790 :       && (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
    3983              :           == dr_explicit_realign_optimized))
    3984              :     {
    3985              :       /* We might access a full vector's worth.  */
    3986            0 :       access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
    3987              :     }
    3988       146790 :   return access_size;
    3989              : }
    3990              : 
    3991              : /* Get the minimum alignment for all the scalar accesses that DR_INFO
    3992              :    describes.  */
    3993              : 
    3994              : static unsigned int
    3995       146790 : vect_vfa_align (dr_vec_info *dr_info)
    3996              : {
    3997            0 :   return dr_alignment (dr_info->dr);
    3998              : }
    3999              : 
    4000              : /* Function vect_no_alias_p.
    4001              : 
    4002              :    Given data references A and B with equal base and offset, see whether
    4003              :    the alias relation can be decided at compilation time.  Return 1 if
    4004              :    it can and the references alias, 0 if it can and the references do
    4005              :    not alias, and -1 if we cannot decide at compile time.  SEGMENT_LENGTH_A,
    4006              :    SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
    4007              :    of dr_with_seg_len::{seg_len,access_size} for A and B.  */
    4008              : 
    4009              : static int
    4010         4344 : vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b,
    4011              :                          tree segment_length_a, tree segment_length_b,
    4012              :                          unsigned HOST_WIDE_INT access_size_a,
    4013              :                          unsigned HOST_WIDE_INT access_size_b)
    4014              : {
    4015         4344 :   poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
    4016         4344 :   poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
    4017         4344 :   poly_uint64 const_length_a;
    4018         4344 :   poly_uint64 const_length_b;
    4019              : 
    4020              :   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
    4021              :      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
    4022              :      [a, a+12) */
    4023         4344 :   if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < 0)
    4024              :     {
    4025          250 :       const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
    4026          250 :       offset_a -= const_length_a;
    4027              :     }
    4028              :   else
    4029         4094 :     const_length_a = tree_to_poly_uint64 (segment_length_a);
    4030         4344 :   if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < 0)
    4031              :     {
    4032          408 :       const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
    4033          408 :       offset_b -= const_length_b;
    4034              :     }
    4035              :   else
    4036         3936 :     const_length_b = tree_to_poly_uint64 (segment_length_b);
    4037              : 
    4038         4344 :   const_length_a += access_size_a;
    4039         4344 :   const_length_b += access_size_b;
    4040              : 
    4041         4344 :   if (ranges_known_overlap_p (offset_a, const_length_a,
    4042              :                               offset_b, const_length_b))
    4043              :     return 1;
    4044              : 
    4045          536 :   if (!ranges_maybe_overlap_p (offset_a, const_length_a,
    4046              :                                offset_b, const_length_b))
    4047          536 :     return 0;
    4048              : 
    4049              :   return -1;
    4050              : }
    4051              : 
    4052              : /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
    4053              :    in DDR is >= VF.  */
    4054              : 
    4055              : static bool
    4056        86331 : dependence_distance_ge_vf (data_dependence_relation *ddr,
    4057              :                            unsigned int loop_depth, poly_uint64 vf)
    4058              : {
    4059        86331 :   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
    4060        91338 :       || DDR_NUM_DIST_VECTS (ddr) == 0)
    4061              :     return false;
    4062              : 
    4063              :   /* If the dependence is exact, we should have limited the VF instead.  */
    4064         5042 :   gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
    4065              : 
    4066              :   unsigned int i;
    4067              :   lambda_vector dist_v;
    4068        10115 :   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
    4069              :     {
    4070        10080 :       HOST_WIDE_INT dist = dist_v[loop_depth];
    4071        10080 :       if (dist != 0
    4072         5042 :           && !(dist > 0 && DDR_REVERSED_P (ddr))
    4073        15122 :           && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
    4074              :         return false;
    4075              :     }
    4076              : 
    4077           35 :   if (dump_enabled_p ())
    4078            2 :     dump_printf_loc (MSG_NOTE, vect_location,
    4079              :                      "dependence distance between %T and %T is >= VF\n",
    4080            2 :                      DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
    4081              : 
    4082              :   return true;
    4083              : }
    4084              : 
    4085              : /* Dump LOWER_BOUND using flags DUMP_KIND.  Dumps are known to be enabled.  */
    4086              : 
    4087              : static void
    4088          437 : dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
    4089              : {
    4090          437 :   dump_printf (dump_kind, "%s (%T) >= ",
    4091          437 :                lower_bound.unsigned_p ? "unsigned" : "abs",
    4092          437 :                lower_bound.expr);
    4093          437 :   dump_dec (dump_kind, lower_bound.min_value);
    4094          437 : }
    4095              : 
    4096              : /* Record that the vectorized loop requires the vec_lower_bound described
    4097              :    by EXPR, UNSIGNED_P and MIN_VALUE.  */
    4098              : 
    4099              : static void
    4100         6684 : vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
    4101              :                         poly_uint64 min_value)
    4102              : {
    4103         6684 :   vec<vec_lower_bound> &lower_bounds
    4104              :     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
    4105         7656 :   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
    4106         5892 :     if (operand_equal_p (lower_bounds[i].expr, expr, 0))
    4107              :       {
    4108         4920 :         unsigned_p &= lower_bounds[i].unsigned_p;
    4109         4920 :         min_value = upper_bound (lower_bounds[i].min_value, min_value);
    4110         4920 :         if (lower_bounds[i].unsigned_p != unsigned_p
    4111         4920 :             || maybe_lt (lower_bounds[i].min_value, min_value))
    4112              :           {
    4113          798 :             lower_bounds[i].unsigned_p = unsigned_p;
    4114          798 :             lower_bounds[i].min_value = min_value;
    4115          798 :             if (dump_enabled_p ())
    4116              :               {
    4117          250 :                 dump_printf_loc (MSG_NOTE, vect_location,
    4118              :                                  "updating run-time check to ");
    4119          250 :                 dump_lower_bound (MSG_NOTE, lower_bounds[i]);
    4120          250 :                 dump_printf (MSG_NOTE, "\n");
    4121              :               }
    4122              :           }
    4123         4920 :         return;
    4124              :       }
    4125              : 
    4126         1764 :   vec_lower_bound lower_bound (expr, unsigned_p, min_value);
    4127         1764 :   if (dump_enabled_p ())
    4128              :     {
    4129          187 :       dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
    4130          187 :       dump_lower_bound (MSG_NOTE, lower_bound);
    4131          187 :       dump_printf (MSG_NOTE, "\n");
    4132              :     }
    4133         1764 :   LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
    4134              : }
    4135              : 
    4136              : /* Return true if it's unlikely that the step of the vectorized form of DR_INFO
    4137              :    will span fewer than GAP bytes.  */
    4138              : 
    4139              : static bool
    4140         5348 : vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
    4141              :                   poly_int64 gap)
    4142              : {
    4143         5348 :   stmt_vec_info stmt_info = dr_info->stmt;
    4144         5348 :   HOST_WIDE_INT count
    4145         5348 :     = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
    4146         5348 :   if (DR_GROUP_FIRST_ELEMENT (stmt_info))
    4147         4588 :     count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
    4148         5348 :   return (estimated_poly_value (gap)
    4149         5348 :           <= count * vect_get_scalar_dr_size (dr_info));
    4150              : }
    4151              : 
    4152              : /* Return true if we know that there is no alias between DR_INFO_A and
    4153              :    DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
    4154              :    When returning true, set *LOWER_BOUND_OUT to this N.  */
    4155              : 
    4156              : static bool
    4157        19466 : vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b,
    4158              :                                 poly_uint64 *lower_bound_out)
    4159              : {
    4160              :   /* Check that there is a constant gap of known sign between DR_A
    4161              :      and DR_B.  */
    4162        19466 :   data_reference *dr_a = dr_info_a->dr;
    4163        19466 :   data_reference *dr_b = dr_info_b->dr;
    4164        19466 :   poly_int64 init_a, init_b;
    4165        19466 :   if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
    4166         8828 :       || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
    4167         8142 :       || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
    4168         8132 :       || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
    4169         8132 :       || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
    4170        19466 :       || !ordered_p (init_a, init_b))
    4171        11334 :     return false;
    4172              : 
    4173              :   /* Sort DR_A and DR_B by the address they access.  */
    4174         8132 :   if (maybe_lt (init_b, init_a))
    4175              :     {
    4176          116 :       std::swap (init_a, init_b);
    4177          116 :       std::swap (dr_info_a, dr_info_b);
    4178          116 :       std::swap (dr_a, dr_b);
    4179              :     }
    4180              : 
    4181              :   /* If the two accesses could be dependent within a scalar iteration,
    4182              :      make sure that we'd retain their order.  */
    4183         8132 :   if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
    4184         8132 :       && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
    4185              :     return false;
    4186              : 
    4187              :   /* There is no alias if abs (DR_STEP) is greater than or equal to
    4188              :      the bytes spanned by the combination of the two accesses.  */
    4189         8132 :   *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a;
    4190         8132 :   return true;
    4191              : }
    4192              : 
    4193              : /* Function vect_prune_runtime_alias_test_list.
    4194              : 
    4195              :    Prune a list of ddrs to be tested at run-time by versioning for alias.
    4196              :    Merge several alias checks into one if possible.
    4197              :    Return FALSE if resulting list of ddrs is longer then allowed by
    4198              :    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
    4199              : 
    4200              : opt_result
    4201       412326 : vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
    4202              : {
    4203       412326 :   typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
    4204       412326 :   hash_set <tree_pair_hash> compared_objects;
    4205              : 
    4206       412326 :   const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
    4207       412326 :   vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
    4208              :     = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
    4209       412326 :   const vec<vec_object_pair> &check_unequal_addrs
    4210              :     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
    4211       412326 :   poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    4212       412326 :   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
    4213              : 
    4214       412326 :   ddr_p ddr;
    4215       412326 :   unsigned int i;
    4216       412326 :   tree length_factor;
    4217              : 
    4218       412326 :   DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
    4219              : 
    4220              :   /* Step values are irrelevant for aliasing if the number of vector
    4221              :      iterations is equal to the number of scalar iterations (which can
    4222              :      happen for fully-SLP loops).  */
    4223       412326 :   bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
    4224              : 
    4225       412326 :   if (!vf_one_p)
    4226              :     {
    4227              :       /* Convert the checks for nonzero steps into bound tests.  */
    4228              :       tree value;
    4229       409098 :       FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
    4230         1693 :         vect_check_lower_bound (loop_vinfo, value, true, 1);
    4231              :     }
    4232              : 
    4233       412326 :   if (may_alias_ddrs.is_empty ())
    4234       385941 :     return opt_result::success ();
    4235              : 
    4236        26385 :   comp_alias_ddrs.create (may_alias_ddrs.length ());
    4237              : 
    4238        26385 :   unsigned int loop_depth
    4239        26385 :     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
    4240        26385 :                           LOOP_VINFO_LOOP_NEST (loop_vinfo));
    4241              : 
    4242              :   /* First, we collect all data ref pairs for aliasing checks.  */
    4243       108896 :   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
    4244              :     {
    4245        86331 :       poly_uint64 lower_bound;
    4246        86331 :       tree segment_length_a, segment_length_b;
    4247        86331 :       unsigned HOST_WIDE_INT access_size_a, access_size_b;
    4248        86331 :       unsigned HOST_WIDE_INT align_a, align_b;
    4249              : 
    4250              :       /* Ignore the alias if the VF we chose ended up being no greater
    4251              :          than the dependence distance.  */
    4252        86331 :       if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
    4253        13460 :         continue;
    4254              : 
    4255        86296 :       if (DDR_OBJECT_A (ddr))
    4256              :         {
    4257          106 :           vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
    4258          106 :           if (!compared_objects.add (new_pair))
    4259              :             {
    4260           22 :               if (dump_enabled_p ())
    4261           16 :                 dump_printf_loc (MSG_NOTE, vect_location,
    4262              :                                  "checking that %T and %T"
    4263              :                                  " have different addresses\n",
    4264              :                                  new_pair.first, new_pair.second);
    4265           22 :               LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
    4266              :             }
    4267          106 :           continue;
    4268          106 :         }
    4269              : 
    4270        86190 :       dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
    4271        86190 :       stmt_vec_info stmt_info_a = dr_info_a->stmt;
    4272              : 
    4273        86190 :       dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
    4274        86190 :       stmt_vec_info stmt_info_b = dr_info_b->stmt;
    4275              : 
    4276        86190 :       bool preserves_scalar_order_p
    4277        86190 :         = vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
    4278        86190 :       bool ignore_step_p
    4279              :           = (vf_one_p
    4280        86190 :              && (preserves_scalar_order_p
    4281         4037 :                  || operand_equal_p (DR_STEP (dr_info_a->dr),
    4282         4037 :                                      DR_STEP (dr_info_b->dr))));
    4283              : 
    4284              :       /* Skip the pair if inter-iteration dependencies are irrelevant
    4285              :          and intra-iteration dependencies are guaranteed to be honored.  */
    4286        15825 :       if (ignore_step_p
    4287         8033 :           && (preserves_scalar_order_p
    4288         3318 :               || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
    4289              :                                                  &lower_bound)))
    4290              :         {
    4291         7792 :           if (dump_enabled_p ())
    4292         2528 :             dump_printf_loc (MSG_NOTE, vect_location,
    4293              :                              "no need for alias check between "
    4294              :                              "%T and %T when VF is 1\n",
    4295         2528 :                              DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
    4296         7792 :           continue;
    4297              :         }
    4298              : 
    4299              :       /* See whether we can handle the alias using a bounds check on
    4300              :          the step, and whether that's likely to be the best approach.
    4301              :          (It might not be, for example, if the minimum step is much larger
    4302              :          than the number of bytes handled by one vector iteration.)  */
    4303        78398 :       if (!ignore_step_p
    4304        78157 :           && TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
    4305        16148 :           && vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
    4306              :                                              &lower_bound)
    4307        83453 :           && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound)
    4308          293 :               || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound)))
    4309              :         {
    4310         4991 :           bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
    4311         4991 :           if (dump_enabled_p ())
    4312              :             {
    4313         3384 :               dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
    4314              :                                "%T and %T when the step %T is outside ",
    4315              :                                DR_REF (dr_info_a->dr),
    4316         1692 :                                DR_REF (dr_info_b->dr),
    4317         1692 :                                DR_STEP (dr_info_a->dr));
    4318         1692 :               if (unsigned_p)
    4319          504 :                 dump_printf (MSG_NOTE, "[0");
    4320              :               else
    4321              :                 {
    4322         1188 :                   dump_printf (MSG_NOTE, "(");
    4323         1188 :                   dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
    4324              :                 }
    4325         1692 :               dump_printf (MSG_NOTE, ", ");
    4326         1692 :               dump_dec (MSG_NOTE, lower_bound);
    4327         1692 :               dump_printf (MSG_NOTE, ")\n");
    4328              :             }
    4329         4991 :           vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
    4330              :                                   unsigned_p, lower_bound);
    4331         4991 :           continue;
    4332         4991 :         }
    4333              : 
    4334        73407 :       stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
    4335        73407 :       if (dr_group_first_a)
    4336              :         {
    4337        20948 :           stmt_info_a = dr_group_first_a;
    4338        20948 :           dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
    4339              :         }
    4340              : 
    4341        73407 :       stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
    4342        73407 :       if (dr_group_first_b)
    4343              :         {
    4344        21393 :           stmt_info_b = dr_group_first_b;
    4345        21393 :           dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
    4346              :         }
    4347              : 
    4348        73407 :       if (ignore_step_p)
    4349              :         {
    4350          241 :           segment_length_a = size_zero_node;
    4351          241 :           segment_length_b = size_zero_node;
    4352              :         }
    4353              :       else
    4354              :         {
    4355        73166 :           if (!operand_equal_p (DR_STEP (dr_info_a->dr),
    4356        73166 :                                 DR_STEP (dr_info_b->dr), 0))
    4357              :             {
    4358        16209 :               length_factor = scalar_loop_iters;
    4359        16209 :               if (TREE_CODE (length_factor) == SCEV_NOT_KNOWN)
    4360           12 :                 return opt_result::failure_at (vect_location,
    4361              :                                                "Unsupported alias check on"
    4362              :                                                " uncounted loop\n");
    4363              :             }
    4364              :           else
    4365        56957 :             length_factor = size_int (vect_factor);
    4366        73154 :           segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
    4367        73154 :           segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
    4368              :         }
    4369        73395 :       access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a);
    4370        73395 :       access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b);
    4371        73395 :       align_a = vect_vfa_align (dr_info_a);
    4372        73395 :       align_b = vect_vfa_align (dr_info_b);
    4373              : 
    4374              :       /* See whether the alias is known at compilation time.  */
    4375        73395 :       if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
    4376        73395 :                            DR_BASE_ADDRESS (dr_info_b->dr), 0)
    4377         6184 :           && operand_equal_p (DR_OFFSET (dr_info_a->dr),
    4378         6184 :                               DR_OFFSET (dr_info_b->dr), 0)
    4379         4492 :           && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
    4380         4418 :           && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
    4381         4408 :           && poly_int_tree_p (segment_length_a)
    4382        77761 :           && poly_int_tree_p (segment_length_b))
    4383              :         {
    4384         4344 :           int res = vect_compile_time_alias (dr_info_a, dr_info_b,
    4385              :                                              segment_length_a,
    4386              :                                              segment_length_b,
    4387              :                                              access_size_a,
    4388              :                                              access_size_b);
    4389         4344 :           if (res >= 0 && dump_enabled_p ())
    4390              :             {
    4391          208 :               dump_printf_loc (MSG_NOTE, vect_location,
    4392              :                                "can tell at compile time that %T and %T",
    4393          104 :                                DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
    4394          104 :               if (res == 0)
    4395           57 :                 dump_printf (MSG_NOTE, " do not alias\n");
    4396              :               else
    4397           47 :                 dump_printf (MSG_NOTE, " alias\n");
    4398              :             }
    4399              : 
    4400         4344 :           if (res == 0)
    4401          536 :             continue;
    4402              : 
    4403         3808 :           if (res == 1)
    4404         3808 :             return opt_result::failure_at (stmt_info_b->stmt,
    4405              :                                            "not vectorized:"
    4406              :                                            " compilation time alias: %G%G",
    4407              :                                            stmt_info_a->stmt,
    4408              :                                            stmt_info_b->stmt);
    4409              :         }
    4410              : 
    4411              :       /* dr_with_seg_len requires the alignment to apply to the segment length
    4412              :          and access size, not just the start address.  The access size can be
    4413              :          smaller than the pointer alignment for grouped accesses and bitfield
    4414              :          references; see PR115192 and PR116125 respectively.  */
    4415        69051 :       align_a = std::min (align_a, least_bit_hwi (access_size_a));
    4416        69051 :       align_b = std::min (align_b, least_bit_hwi (access_size_b));
    4417              : 
    4418        69051 :       dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
    4419        69051 :                             access_size_a, align_a);
    4420        69051 :       dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
    4421        69051 :                             access_size_b, align_b);
    4422              :       /* Canonicalize the order to be the one that's needed for accurate
    4423              :          RAW, WAR and WAW flags, in cases where the data references are
    4424              :          well-ordered.  The order doesn't really matter otherwise,
    4425              :          but we might as well be consistent.  */
    4426        69051 :       if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
    4427         5190 :         std::swap (dr_a, dr_b);
    4428              : 
    4429        69051 :       dr_with_seg_len_pair_t dr_with_seg_len_pair
    4430              :         (dr_a, dr_b, (preserves_scalar_order_p
    4431              :                       ? dr_with_seg_len_pair_t::WELL_ORDERED
    4432        75894 :                       : dr_with_seg_len_pair_t::REORDERED));
    4433              : 
    4434        69051 :       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
    4435              :     }
    4436              : 
    4437        22565 :   prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
    4438              : 
    4439        45130 :   unsigned int count = (comp_alias_ddrs.length ()
    4440        22565 :                         + check_unequal_addrs.length ());
    4441              : 
    4442        22565 :   if (count
    4443        22565 :       && (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
    4444              :           == VECT_COST_MODEL_VERY_CHEAP))
    4445        12757 :     return opt_result::failure_at
    4446        12757 :       (vect_location, "would need a runtime alias check\n");
    4447              : 
    4448         9808 :   if (dump_enabled_p ())
    4449         1946 :     dump_printf_loc (MSG_NOTE, vect_location,
    4450              :                      "improved number of alias checks from %d to %d\n",
    4451              :                      may_alias_ddrs.length (), count);
    4452         9808 :   unsigned limit = param_vect_max_version_for_alias_checks;
    4453         9808 :   if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)) == VECT_COST_MODEL_CHEAP)
    4454          936 :     limit = param_vect_max_version_for_alias_checks * 6 / 10;
    4455         9808 :   if (count > limit)
    4456          162 :     return opt_result::failure_at
    4457          162 :       (vect_location,
    4458              :        "number of versioning for alias run-time tests exceeds %d "
    4459              :        "(--param vect-max-version-for-alias-checks)\n", limit);
    4460              : 
    4461         9646 :   return opt_result::success ();
    4462       412326 : }
    4463              : 
    4464              : /* Structure to hold information about a supported gather/scatter
    4465              :    configuration.  */
    4466              : struct gather_scatter_config
    4467              : {
    4468              :   internal_fn ifn;
    4469              :   tree offset_vectype;
    4470              :   int scale;
    4471              :   vec<int> elsvals;
    4472              : };
    4473              : 
    4474              : /* Determine which gather/scatter IFN is supported for the given parameters.
    4475              :    IFN_MASK_GATHER_LOAD, IFN_GATHER_LOAD, and IFN_MASK_LEN_GATHER_LOAD
    4476              :    are mutually exclusive, so we only need to find one.  Return the
    4477              :    supported IFN or IFN_LAST if none are supported.  */
    4478              : 
    4479              : static internal_fn
    4480      1173436 : vect_gather_scatter_which_ifn (bool read_p, bool masked_p,
    4481              :                                tree vectype, tree memory_type,
    4482              :                                tree offset_vectype, int scale,
    4483              :                                vec<int> *elsvals)
    4484              : {
    4485              :   /* Work out which functions to try.  */
    4486      1173436 :   internal_fn ifn, alt_ifn, alt_ifn2;
    4487      1173436 :   if (read_p)
    4488              :     {
    4489       876692 :       ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
    4490              :       alt_ifn = IFN_MASK_GATHER_LOAD;
    4491              :       alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
    4492              :     }
    4493              :   else
    4494              :     {
    4495       296744 :       ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
    4496              :       alt_ifn = IFN_MASK_SCATTER_STORE;
    4497              :       alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
    4498              :     }
    4499              : 
    4500      1173436 :   if (!offset_vectype)
    4501              :     return IFN_LAST;
    4502              : 
    4503      1173436 :   if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
    4504              :                                               offset_vectype, scale, elsvals))
    4505              :     return ifn;
    4506      1173436 :   if (internal_gather_scatter_fn_supported_p (alt_ifn, vectype, memory_type,
    4507              :                                               offset_vectype, scale, elsvals))
    4508              :     return alt_ifn;
    4509      1173436 :   if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, memory_type,
    4510              :                                               offset_vectype, scale, elsvals))
    4511              :     return alt_ifn2;
    4512              : 
    4513              :   return IFN_LAST;
    4514              : }
    4515              : 
    4516              : /* Collect all supported offset vector types for a gather load or scatter
    4517              :    store.  READ_P is true for loads and false for stores.  MASKED_P is true
    4518              :    if the load or store is conditional.  VECTYPE is the data vector type.
    4519              :    MEMORY_TYPE is the type of the memory elements being loaded or stored,
    4520              :    and OFFSET_TYPE is the type of the offset.
    4521              :    SCALE is the amount by which the offset should be multiplied.
    4522              : 
    4523              :    Return a vector of all configurations the target supports (which can
    4524              :    be none).  */
    4525              : 
    4526              : static auto_vec<gather_scatter_config>
    4527        84449 : vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p,
    4528              :                                  tree vectype, tree memory_type,
    4529              :                                  tree offset_type, int scale)
    4530              : {
    4531        84449 :   auto_vec<gather_scatter_config> configs;
    4532              : 
    4533        84449 :   auto_vec<tree, 8> offset_types_to_try;
    4534              : 
    4535              :   /* Try all sizes from the offset type's precision up to POINTER_SIZE.  */
    4536        84449 :   for (unsigned int bits = TYPE_PRECISION (offset_type);
    4537       395480 :        bits <= POINTER_SIZE;
    4538       297853 :        bits *= 2)
    4539              :     {
    4540              :       /* Signed variant.  */
    4541       297853 :       offset_types_to_try.safe_push
    4542       297853 :         (build_nonstandard_integer_type (bits, 0));
    4543              :       /* Unsigned variant.  */
    4544       297853 :       offset_types_to_try.safe_push
    4545       297853 :         (build_nonstandard_integer_type (bits, 1));
    4546              :     }
    4547              : 
    4548              :   /* Once we find which IFN works for one offset type, we know that it
    4549              :      will work for other offset types as well.  Then we can perform
    4550              :      the checks for the remaining offset types with only that IFN.
    4551              :      However, we might need to try different offset types to find which
    4552              :      IFN is supported, since the check is offset-type-specific.  */
    4553              :   internal_fn ifn = IFN_LAST;
    4554              : 
    4555              :   /* Try each offset type.  */
    4556       680155 :   for (unsigned int i = 0; i < offset_types_to_try.length (); i++)
    4557              :     {
    4558       595706 :       tree offset_type = offset_types_to_try[i];
    4559       595706 :       tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
    4560       595706 :       if (!offset_vectype)
    4561        10158 :         continue;
    4562              : 
    4563              :       /* Try multiple scale values.  Start with exact match, then try
    4564              :          smaller common scales that a target might support .  */
    4565       585548 :       int scales_to_try[] = {scale, 1, 2, 4, 8};
    4566              : 
    4567      3513288 :       for (unsigned int j = 0;
    4568      3513288 :            j < sizeof (scales_to_try) / sizeof (*scales_to_try);
    4569              :            j++)
    4570              :         {
    4571      2927740 :           int try_scale = scales_to_try[j];
    4572              : 
    4573              :           /* Skip scales >= requested scale (except for exact match).  */
    4574      2927740 :           if (j > 0 && try_scale >= scale)
    4575      1754304 :             continue;
    4576              : 
    4577              :           /* Skip if requested scale is not a multiple of this scale.  */
    4578      1173580 :           if (j > 0 && scale % try_scale != 0)
    4579          144 :             continue;
    4580              : 
    4581      1173436 :           vec<int> elsvals = vNULL;
    4582              : 
    4583              :           /* If we haven't determined which IFN is supported yet, try all three
    4584              :              to find which one the target supports.  */
    4585      1173436 :           if (ifn == IFN_LAST)
    4586              :             {
    4587      1173436 :               ifn = vect_gather_scatter_which_ifn (read_p, masked_p,
    4588              :                                                    vectype, memory_type,
    4589              :                                                    offset_vectype, try_scale,
    4590              :                                                    &elsvals);
    4591      1173436 :               if (ifn != IFN_LAST)
    4592              :                 {
    4593              :                   /* Found which IFN is supported.  Save this configuration.  */
    4594            0 :                   gather_scatter_config config;
    4595            0 :                   config.ifn = ifn;
    4596            0 :                   config.offset_vectype = offset_vectype;
    4597            0 :                   config.scale = try_scale;
    4598            0 :                   config.elsvals = elsvals;
    4599            0 :                   configs.safe_push (config);
    4600              :                 }
    4601              :             }
    4602              :           else
    4603              :             {
    4604              :               /* We already know which IFN is supported, just check if this
    4605              :                  offset type and scale work with it.  */
    4606            0 :               if (internal_gather_scatter_fn_supported_p (ifn, vectype,
    4607              :                                                           memory_type,
    4608              :                                                           offset_vectype,
    4609              :                                                           try_scale,
    4610              :                                                           &elsvals))
    4611              :                 {
    4612            0 :                   gather_scatter_config config;
    4613            0 :                   config.ifn = ifn;
    4614            0 :                   config.offset_vectype = offset_vectype;
    4615            0 :                   config.scale = try_scale;
    4616            0 :                   config.elsvals = elsvals;
    4617            0 :                   configs.safe_push (config);
    4618              :                 }
    4619              :             }
    4620              :         }
    4621              :     }
    4622              : 
    4623        84449 :   return configs;
    4624        84449 : }
    4625              : 
    4626              : /* Check whether we can use an internal function for a gather load
    4627              :    or scatter store.  READ_P is true for loads and false for stores.
    4628              :    MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
    4629              :    the type of the memory elements being loaded or stored.  OFFSET_TYPE
    4630              :    is the type of the offset that is being applied to the invariant
    4631              :    base address.  If OFFSET_TYPE is scalar the function chooses an
    4632              :    appropriate vector type for it.  SCALE is the amount by which the
    4633              :    offset should be multiplied *after* it has been converted to address width.
    4634              :    If the target does not support the requested SCALE, SUPPORTED_SCALE
    4635              :    will contain the scale that is actually supported
    4636              :    (which may be smaller, requiring additional multiplication).
    4637              :    Otherwise SUPPORTED_SCALE is 0.
    4638              : 
    4639              :    Return true if the function is supported, storing the function id in
    4640              :    *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
    4641              :    If we support an offset vector type with different signedness than
    4642              :    OFFSET_TYPE store it in SUPPORTED_OFFSET_VECTYPE.
    4643              : 
    4644              :    If we can use gather/scatter and ELSVALS is nonzero, store the possible
    4645              :    else values in ELSVALS.  */
    4646              : 
    4647              : bool
    4648        84449 : vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
    4649              :                           tree vectype, tree memory_type, tree offset_type,
    4650              :                           int scale, int *supported_scale,
    4651              :                           internal_fn *ifn_out,
    4652              :                           tree *offset_vectype_out,
    4653              :                           tree *supported_offset_vectype,
    4654              :                           vec<int> *elsvals)
    4655              : {
    4656        84449 :   *supported_offset_vectype = NULL_TREE;
    4657        84449 :   *supported_scale = 0;
    4658        84449 :   unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
    4659        84449 :   unsigned int element_bits = vector_element_bits (vectype);
    4660        84449 :   if (element_bits != memory_bits)
    4661              :     /* For now the vector elements must be the same width as the
    4662              :        memory elements.  */
    4663              :     return false;
    4664              : 
    4665              :   /* Get the original offset vector type for comparison.  */
    4666        84449 :   tree offset_vectype = VECTOR_TYPE_P (offset_type)
    4667        84449 :     ? offset_type : get_vectype_for_scalar_type (vinfo, offset_type);
    4668              : 
    4669              :   /* If there is no offset vectype, bail.  */
    4670        70376 :   if (!offset_vectype)
    4671              :     return false;
    4672              : 
    4673        84449 :   offset_type = TREE_TYPE (offset_vectype);
    4674              : 
    4675              :   /* Get all supported configurations for this data vector type.  */
    4676        84449 :   auto_vec<gather_scatter_config> configs
    4677              :     = vect_gather_scatter_get_configs (vinfo, read_p, masked_p, vectype,
    4678        84449 :                                        memory_type, offset_type, scale);
    4679              : 
    4680        84449 :   if (configs.is_empty ())
    4681              :     return false;
    4682              : 
    4683              :   /* Selection priority:
    4684              :      1 - Exact scale match + offset type match
    4685              :      2 - Exact scale match + sign-swapped offset
    4686              :      3 - Smaller scale + offset type match
    4687              :      4 - Smaller scale + sign-swapped offset
    4688              :      Within each category, prefer smaller offset types.  */
    4689              : 
    4690              :   /* First pass: exact scale match with no conversion.  */
    4691            0 :   for (unsigned int i = 0; i < configs.length (); i++)
    4692              :     {
    4693            0 :       if (configs[i].scale == scale
    4694            0 :           && TYPE_SIGN (configs[i].offset_vectype)
    4695            0 :              == TYPE_SIGN (offset_vectype))
    4696              :         {
    4697            0 :           *ifn_out = configs[i].ifn;
    4698            0 :           *offset_vectype_out = configs[i].offset_vectype;
    4699            0 :           if (elsvals)
    4700            0 :             *elsvals = configs[i].elsvals;
    4701            0 :           return true;
    4702              :         }
    4703              :     }
    4704              : 
    4705              :   /* No direct match.  This means we try to find either
    4706              :       - a sign-swapped offset vectype or
    4707              :       - a different scale and 2x larger offset type
    4708              :       - a different scale and larger sign-swapped offset vectype.  */
    4709            0 :   unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE (offset_vectype));
    4710            0 :   unsigned int needed_precision
    4711            0 :     = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
    4712            0 :   needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
    4713              : 
    4714              :   /* Second pass: No direct match.  This means we try to find a sign-swapped
    4715              :      offset vectype.  */
    4716            0 :   enum tree_code tmp;
    4717            0 :   for (unsigned int i = 0; i < configs.length (); i++)
    4718              :     {
    4719            0 :       unsigned int precision
    4720            0 :         = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
    4721            0 :       if (configs[i].scale == scale
    4722            0 :           && precision >= needed_precision
    4723            0 :           && (supportable_convert_operation (CONVERT_EXPR,
    4724            0 :                                              configs[i].offset_vectype,
    4725              :                                              offset_vectype, &tmp)
    4726            0 :               || (needed_precision == offset_precision
    4727            0 :                   && tree_nop_conversion_p (configs[i].offset_vectype,
    4728              :                                             offset_vectype))))
    4729              :         {
    4730            0 :           *ifn_out = configs[i].ifn;
    4731            0 :           *offset_vectype_out = offset_vectype;
    4732            0 :           *supported_offset_vectype = configs[i].offset_vectype;
    4733            0 :           if (elsvals)
    4734            0 :             *elsvals = configs[i].elsvals;
    4735            0 :           return true;
    4736              :         }
    4737              :     }
    4738              : 
    4739              :   /* Third pass: Try a smaller scale with the same signedness.  */
    4740            0 :   needed_precision = offset_precision * 2;
    4741            0 :   needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
    4742              : 
    4743            0 :   for (unsigned int i = 0; i < configs.length (); i++)
    4744              :     {
    4745            0 :       unsigned int precision
    4746            0 :         = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
    4747            0 :       if (configs[i].scale < scale
    4748            0 :           && TYPE_SIGN (configs[i].offset_vectype)
    4749            0 :              == TYPE_SIGN (offset_vectype)
    4750            0 :           && precision >= needed_precision)
    4751              :         {
    4752            0 :           *ifn_out = configs[i].ifn;
    4753            0 :           *offset_vectype_out = configs[i].offset_vectype;
    4754            0 :           *supported_scale = configs[i].scale;
    4755              :           /* Only set SUPPORTED_OFFSET_VECTYPE if this is a real
    4756              :              conversion.  */
    4757            0 :           if (!useless_type_conversion_p (offset_vectype,
    4758            0 :                                           configs[i].offset_vectype))
    4759            0 :             *supported_offset_vectype = configs[i].offset_vectype;
    4760            0 :           if (elsvals)
    4761            0 :             *elsvals = configs[i].elsvals;
    4762            0 :           return true;
    4763              :         }
    4764              :     }
    4765              : 
    4766              :   /* Fourth pass: Try a smaller scale and sign-swapped offset vectype.  */
    4767            0 :   needed_precision
    4768            0 :     = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE;
    4769            0 :   needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE);
    4770              : 
    4771            0 :   for (unsigned int i = 0; i < configs.length (); i++)
    4772              :     {
    4773            0 :       unsigned int precision
    4774            0 :         = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype));
    4775            0 :       if (configs[i].scale < scale
    4776            0 :           && precision >= needed_precision
    4777            0 :           && (supportable_convert_operation (CONVERT_EXPR,
    4778            0 :                                              configs[i].offset_vectype,
    4779              :                                              offset_vectype, &tmp)
    4780            0 :               || (needed_precision == offset_precision
    4781            0 :                   && tree_nop_conversion_p (configs[i].offset_vectype,
    4782              :                                             offset_vectype))))
    4783              :         {
    4784            0 :           *ifn_out = configs[i].ifn;
    4785            0 :           *offset_vectype_out = offset_vectype;
    4786            0 :           *supported_offset_vectype = configs[i].offset_vectype;
    4787            0 :           *supported_scale = configs[i].scale;
    4788            0 :           if (elsvals)
    4789            0 :             *elsvals = configs[i].elsvals;
    4790            0 :           return true;
    4791              :         }
    4792              :     }
    4793              : 
    4794              :   return false;
    4795        84449 : }
    4796              : 
    4797              : /* STMT_INFO is a call to an internal gather load or scatter store function.
    4798              :    Describe the operation in INFO.  */
    4799              : 
    4800              : void
    4801            0 : vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
    4802              :                                    gather_scatter_info *info)
    4803              : {
    4804            0 :   gcall *call = as_a <gcall *> (stmt_info->stmt);
    4805            0 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    4806            0 :   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    4807              : 
    4808            0 :   info->ifn = gimple_call_internal_fn (call);
    4809            0 :   info->decl = NULL_TREE;
    4810            0 :   info->base = gimple_call_arg (call, 0);
    4811            0 :   info->alias_ptr = gimple_call_arg
    4812            0 :                      (call, internal_fn_alias_ptr_index (info->ifn));
    4813            0 :   info->offset = gimple_call_arg
    4814            0 :                   (call, internal_fn_offset_index (info->ifn));
    4815            0 :   info->offset_vectype = NULL_TREE;
    4816            0 :   info->scale = TREE_INT_CST_LOW (gimple_call_arg
    4817              :                                   (call, internal_fn_scale_index (info->ifn)));
    4818            0 :   info->element_type = TREE_TYPE (vectype);
    4819            0 :   info->memory_type = TREE_TYPE (DR_REF (dr));
    4820            0 : }
    4821              : 
    4822              : /* Return true if a non-affine read or write in STMT_INFO is suitable for a
    4823              :    gather load or scatter store with VECTYPE.  Describe the operation in *INFO
    4824              :    if so.  If it is suitable and ELSVALS is nonzero store the supported else
    4825              :    values in the vector it points to.  */
    4826              : 
    4827              : bool
    4828       350457 : vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype,
    4829              :                            loop_vec_info loop_vinfo,
    4830              :                            gather_scatter_info *info, vec<int> *elsvals)
    4831              : {
    4832       350457 :   HOST_WIDE_INT scale = 1;
    4833       350457 :   poly_int64 pbitpos, pbitsize;
    4834       350457 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    4835       350457 :   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    4836       350457 :   tree offtype = NULL_TREE;
    4837       350457 :   tree decl = NULL_TREE, base, off;
    4838       350457 :   tree memory_type = TREE_TYPE (DR_REF (dr));
    4839       350457 :   machine_mode pmode;
    4840       350457 :   int punsignedp, reversep, pvolatilep = 0;
    4841       350457 :   internal_fn ifn;
    4842       350457 :   tree offset_vectype;
    4843       350457 :   bool masked_p = false;
    4844              : 
    4845              :   /* See whether this is already a call to a gather/scatter internal function.
    4846              :      If not, see whether it's a masked load or store.  */
    4847       350457 :   gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
    4848         6282 :   if (call && gimple_call_internal_p (call))
    4849              :     {
    4850         6282 :       ifn = gimple_call_internal_fn (call);
    4851         6282 :       if (internal_gather_scatter_fn_p (ifn))
    4852              :         {
    4853            0 :           vect_describe_gather_scatter_call (stmt_info, info);
    4854              : 
    4855              :           /* In pattern recog we simply used a ZERO else value that
    4856              :              we need to correct here.  To that end just re-use the
    4857              :              (already successful) check if we support a gather IFN
    4858              :              and have it populate the else values.  */
    4859            0 :           if (DR_IS_READ (dr) && internal_fn_mask_index (ifn) >= 0 && elsvals)
    4860            0 :             supports_vec_gather_load_p (TYPE_MODE (vectype), elsvals);
    4861            0 :           return true;
    4862              :         }
    4863         6282 :       masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
    4864              :     }
    4865              : 
    4866              :   /* True if we should aim to use internal functions rather than
    4867              :      built-in functions.  */
    4868       350457 :   bool use_ifn_p = (DR_IS_READ (dr)
    4869       350457 :                     ? supports_vec_gather_load_p (TYPE_MODE (vectype),
    4870              :                                                   elsvals)
    4871       350457 :                     : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
    4872              : 
    4873       350457 :   base = DR_REF (dr);
    4874              :   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
    4875              :      see if we can use the def stmt of the address.  */
    4876       350457 :   if (masked_p
    4877         6282 :       && TREE_CODE (base) == MEM_REF
    4878         6282 :       && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
    4879         6282 :       && integer_zerop (TREE_OPERAND (base, 1))
    4880       356739 :       && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
    4881              :     {
    4882         6282 :       gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
    4883         6282 :       if (is_gimple_assign (def_stmt)
    4884         6282 :           && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
    4885          615 :         base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
    4886              :     }
    4887              : 
    4888              :   /* The gather and scatter builtins need address of the form
    4889              :      loop_invariant + vector * {1, 2, 4, 8}
    4890              :      or
    4891              :      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
    4892              :      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
    4893              :      of loop invariants/SSA_NAMEs defined in the loop, with casts,
    4894              :      multiplications and additions in it.  To get a vector, we need
    4895              :      a single SSA_NAME that will be defined in the loop and will
    4896              :      contain everything that is not loop invariant and that can be
    4897              :      vectorized.  The following code attempts to find such a preexistng
    4898              :      SSA_NAME OFF and put the loop invariants into a tree BASE
    4899              :      that can be gimplified before the loop.  */
    4900       350457 :   base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
    4901              :                               &punsignedp, &reversep, &pvolatilep);
    4902       350457 :   if (reversep)
    4903              :     return false;
    4904              : 
    4905              :   /* PR 107346.  Packed structs can have fields at offsets that are not
    4906              :      multiples of BITS_PER_UNIT.  Do not use gather/scatters in such cases.  */
    4907       350457 :   if (!multiple_p (pbitpos, BITS_PER_UNIT))
    4908              :     return false;
    4909              : 
    4910              :   /* We need to be able to form an address to the base which for example
    4911              :      isn't possible for hard registers.  */
    4912       350457 :   if (may_be_nonaddressable_p (base))
    4913              :     return false;
    4914              : 
    4915       350449 :   poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
    4916              : 
    4917       350449 :   if (TREE_CODE (base) == MEM_REF)
    4918              :     {
    4919       284194 :       if (!integer_zerop (TREE_OPERAND (base, 1)))
    4920              :         {
    4921        33615 :           if (off == NULL_TREE)
    4922        33298 :             off = wide_int_to_tree (sizetype, mem_ref_offset (base));
    4923              :           else
    4924          317 :             off = size_binop (PLUS_EXPR, off,
    4925              :                               fold_convert (sizetype, TREE_OPERAND (base, 1)));
    4926              :         }
    4927       284194 :       base = TREE_OPERAND (base, 0);
    4928              :     }
    4929              :   else
    4930        66255 :     base = build_fold_addr_expr (base);
    4931              : 
    4932       350449 :   if (off == NULL_TREE)
    4933       225830 :     off = size_zero_node;
    4934              : 
    4935              :   /* BASE must be loop invariant.  If it is not invariant, but OFF is, then we
    4936              :    * can fix that by swapping BASE and OFF.  */
    4937       350449 :   if (!expr_invariant_in_loop_p (loop, base))
    4938              :     {
    4939       259655 :       if (!expr_invariant_in_loop_p (loop, off))
    4940              :         return false;
    4941              : 
    4942       259386 :       std::swap (base, off);
    4943              :     }
    4944              : 
    4945       350180 :   base = fold_convert (sizetype, base);
    4946       350180 :   base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
    4947       350180 :   int tmp_scale;
    4948       350180 :   tree tmp_offset_vectype;
    4949              : 
    4950              :   /* OFF at this point may be either a SSA_NAME or some tree expression
    4951              :      from get_inner_reference.  Try to peel off loop invariants from it
    4952              :      into BASE as long as possible.  */
    4953       350180 :   STRIP_NOPS (off);
    4954       917927 :   while (offtype == NULL_TREE)
    4955              :     {
    4956       797453 :       enum tree_code code;
    4957       797453 :       tree op0, op1, add = NULL_TREE;
    4958              : 
    4959       797453 :       if (TREE_CODE (off) == SSA_NAME)
    4960              :         {
    4961       611170 :           gimple *def_stmt = SSA_NAME_DEF_STMT (off);
    4962              : 
    4963       611170 :           if (expr_invariant_in_loop_p (loop, off))
    4964            0 :             return false;
    4965              : 
    4966       611170 :           if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
    4967              :             break;
    4968              : 
    4969       480538 :           op0 = gimple_assign_rhs1 (def_stmt);
    4970       480538 :           code = gimple_assign_rhs_code (def_stmt);
    4971       480538 :           op1 = gimple_assign_rhs2 (def_stmt);
    4972              :         }
    4973              :       else
    4974              :         {
    4975       186283 :           if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
    4976              :             return false;
    4977       186283 :           code = TREE_CODE (off);
    4978       186283 :           extract_ops_from_tree (off, &code, &op0, &op1);
    4979              :         }
    4980       666821 :       switch (code)
    4981              :         {
    4982       203916 :         case POINTER_PLUS_EXPR:
    4983       203916 :         case PLUS_EXPR:
    4984       203916 :           if (expr_invariant_in_loop_p (loop, op0))
    4985              :             {
    4986       134744 :               add = op0;
    4987       134744 :               off = op1;
    4988       187876 :             do_add:
    4989       187876 :               add = fold_convert (sizetype, add);
    4990       187876 :               if (scale != 1)
    4991        46551 :                 add = size_binop (MULT_EXPR, add, size_int (scale));
    4992       187876 :               base = size_binop (PLUS_EXPR, base, add);
    4993       567747 :               continue;
    4994              :             }
    4995        69172 :           if (expr_invariant_in_loop_p (loop, op1))
    4996              :             {
    4997        52838 :               add = op1;
    4998        52838 :               off = op0;
    4999        52838 :               goto do_add;
    5000              :             }
    5001              :           break;
    5002          487 :         case MINUS_EXPR:
    5003          487 :           if (expr_invariant_in_loop_p (loop, op1))
    5004              :             {
    5005          294 :               add = fold_convert (sizetype, op1);
    5006          294 :               add = size_binop (MINUS_EXPR, size_zero_node, add);
    5007          294 :               off = op0;
    5008          294 :               goto do_add;
    5009              :             }
    5010              :           break;
    5011       203071 :         case MULT_EXPR:
    5012       203071 :           if (scale == 1 && tree_fits_shwi_p (op1))
    5013              :             {
    5014       170299 :               int new_scale = tree_to_shwi (op1);
    5015              :               /* Only treat this as a scaling operation if the target
    5016              :                  supports it for at least some offset type.  */
    5017       170299 :               if (use_ifn_p
    5018            0 :                   && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
    5019              :                                                 masked_p, vectype, memory_type,
    5020              :                                                 signed_char_type_node,
    5021              :                                                 new_scale, &tmp_scale,
    5022              :                                                 &ifn,
    5023              :                                                 &offset_vectype,
    5024              :                                                 &tmp_offset_vectype,
    5025              :                                                 elsvals)
    5026       170299 :                   && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
    5027              :                                                 masked_p, vectype, memory_type,
    5028              :                                                 unsigned_char_type_node,
    5029              :                                                 new_scale, &tmp_scale,
    5030              :                                                 &ifn,
    5031              :                                                 &offset_vectype,
    5032              :                                                 &tmp_offset_vectype,
    5033              :                                                 elsvals))
    5034              :                 break;
    5035       170299 :               scale = new_scale;
    5036       170299 :               off = op0;
    5037       170299 :               continue;
    5038       170299 :             }
    5039              :           break;
    5040            0 :         case SSA_NAME:
    5041            0 :           off = op0;
    5042            0 :           continue;
    5043       215639 :         CASE_CONVERT:
    5044       431262 :           if (!POINTER_TYPE_P (TREE_TYPE (op0))
    5045       431262 :               && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
    5046              :             break;
    5047              : 
    5048              :           /* Don't include the conversion if the target is happy with
    5049              :              the current offset type.  */
    5050       215639 :           if (use_ifn_p
    5051            0 :               && TREE_CODE (off) == SSA_NAME
    5052            0 :               && !POINTER_TYPE_P (TREE_TYPE (off))
    5053       215639 :               && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
    5054              :                                            masked_p, vectype, memory_type,
    5055            0 :                                            TREE_TYPE (off),
    5056              :                                            scale, &tmp_scale,
    5057              :                                            &ifn,
    5058              :                                            &offset_vectype,
    5059              :                                            &tmp_offset_vectype,
    5060              :                                            elsvals))
    5061              :             break;
    5062              : 
    5063       215639 :           if (TYPE_PRECISION (TREE_TYPE (op0))
    5064       215639 :               == TYPE_PRECISION (TREE_TYPE (off)))
    5065              :             {
    5066        89098 :               off = op0;
    5067        89098 :               continue;
    5068              :             }
    5069              : 
    5070              :           /* Include the conversion if it is widening and we're using
    5071              :              the IFN path or the target can handle the converted from
    5072              :              offset or the current size is not already the same as the
    5073              :              data vector element size.  */
    5074       126541 :           if ((TYPE_PRECISION (TREE_TYPE (op0))
    5075       126541 :                < TYPE_PRECISION (TREE_TYPE (off)))
    5076       126541 :               && (use_ifn_p
    5077       125813 :                   || (DR_IS_READ (dr)
    5078        82077 :                       ? (targetm.vectorize.builtin_gather
    5079        82077 :                          && targetm.vectorize.builtin_gather (vectype,
    5080        82077 :                                                               TREE_TYPE (op0),
    5081              :                                                               scale))
    5082        43736 :                       : (targetm.vectorize.builtin_scatter
    5083        43736 :                          && targetm.vectorize.builtin_scatter (vectype,
    5084        43736 :                                                                TREE_TYPE (op0),
    5085              :                                                                scale)))
    5086       124717 :                   || !operand_equal_p (TYPE_SIZE (TREE_TYPE (off)),
    5087       124717 :                                        TYPE_SIZE (TREE_TYPE (vectype)), 0)))
    5088              :             {
    5089       120474 :               off = op0;
    5090       120474 :               offtype = TREE_TYPE (off);
    5091       120474 :               STRIP_NOPS (off);
    5092       120474 :               continue;
    5093              :             }
    5094              :           break;
    5095              :         default:
    5096              :           break;
    5097            0 :         }
    5098              :       break;
    5099              :     }
    5100              : 
    5101              :   /* If at the end OFF still isn't a SSA_NAME or isn't
    5102              :      defined in the loop, punt.  */
    5103       350180 :   if (TREE_CODE (off) != SSA_NAME
    5104       350180 :       || expr_invariant_in_loop_p (loop, off))
    5105         6347 :     return false;
    5106              : 
    5107       343833 :   if (offtype == NULL_TREE)
    5108       223713 :     offtype = TREE_TYPE (off);
    5109              : 
    5110       343833 :   if (use_ifn_p)
    5111              :     {
    5112            0 :       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
    5113              :                                      vectype, memory_type, offtype,
    5114              :                                      scale, &tmp_scale,
    5115              :                                      &ifn, &offset_vectype,
    5116              :                                      &tmp_offset_vectype,
    5117              :                                      elsvals))
    5118            0 :         ifn = IFN_LAST;
    5119              :       decl = NULL_TREE;
    5120              :     }
    5121              :   else
    5122              :     {
    5123       343833 :       if (DR_IS_READ (dr))
    5124              :         {
    5125       259425 :           if (targetm.vectorize.builtin_gather)
    5126       259425 :             decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
    5127              :         }
    5128              :       else
    5129              :         {
    5130        84408 :           if (targetm.vectorize.builtin_scatter)
    5131        84408 :             decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
    5132              :         }
    5133       343833 :       ifn = IFN_LAST;
    5134              :       /* The offset vector type will be read from DECL when needed.  */
    5135       343833 :       offset_vectype = NULL_TREE;
    5136              :     }
    5137              : 
    5138       343833 :   gcc_checking_assert (expr_invariant_in_loop_p (loop, base));
    5139       343833 :   gcc_checking_assert (!expr_invariant_in_loop_p (loop, off));
    5140              : 
    5141       343833 :   info->ifn = ifn;
    5142       343833 :   info->decl = decl;
    5143       343833 :   info->base = base;
    5144              : 
    5145       687666 :   info->alias_ptr = build_int_cst
    5146       343833 :     (reference_alias_ptr_type (DR_REF (dr)),
    5147       343833 :      get_object_alignment (DR_REF (dr)));
    5148              : 
    5149       343833 :   info->offset = off;
    5150       343833 :   info->offset_vectype = offset_vectype;
    5151       343833 :   info->scale = scale;
    5152       343833 :   info->element_type = TREE_TYPE (vectype);
    5153       343833 :   info->memory_type = memory_type;
    5154       343833 :   return true;
    5155              : }
    5156              : 
    5157              : /* Find the data references in STMT, analyze them with respect to LOOP and
    5158              :    append them to DATAREFS.  Return false if datarefs in this stmt cannot
    5159              :    be handled.  */
    5160              : 
    5161              : opt_result
    5162     31984749 : vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
    5163              :                                vec<data_reference_p> *datarefs,
    5164              :                                vec<int> *dataref_groups, int group_id)
    5165              : {
    5166              :   /* We can ignore clobbers for dataref analysis - they are removed during
    5167              :      loop vectorization and BB vectorization checks dependences with a
    5168              :      stmt walk.  */
    5169     31984749 :   if (gimple_clobber_p (stmt))
    5170      1084230 :     return opt_result::success ();
    5171              : 
    5172     57508460 :   if (gimple_has_volatile_ops (stmt))
    5173       320469 :     return opt_result::failure_at (stmt, "not vectorized: volatile type: %G",
    5174              :                                    stmt);
    5175              : 
    5176     30580050 :   if (stmt_can_throw_internal (cfun, stmt))
    5177       677346 :     return opt_result::failure_at (stmt,
    5178              :                                    "not vectorized:"
    5179              :                                    " statement can throw an exception: %G",
    5180              :                                    stmt);
    5181              : 
    5182     29902704 :   auto_vec<data_reference_p, 2> refs;
    5183     29902704 :   opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
    5184     29902704 :   if (!res)
    5185      3638840 :     return res;
    5186              : 
    5187     26263864 :   if (refs.is_empty ())
    5188     15093792 :     return opt_result::success ();
    5189              : 
    5190     11170072 :   if (refs.length () > 1)
    5191              :     {
    5192      1245307 :       while (!refs.is_empty ())
    5193       830507 :         free_data_ref (refs.pop ());
    5194       414800 :       return opt_result::failure_at (stmt,
    5195              :                                      "not vectorized: more than one "
    5196              :                                      "data ref in stmt: %G", stmt);
    5197              :     }
    5198              : 
    5199     10755272 :   data_reference_p dr = refs.pop ();
    5200     10755272 :   if (gcall *call = dyn_cast <gcall *> (stmt))
    5201        20563 :     if (!gimple_call_internal_p (call)
    5202        20563 :         || (gimple_call_internal_fn (call) != IFN_MASK_LOAD
    5203        17491 :             && gimple_call_internal_fn (call) != IFN_MASK_STORE))
    5204              :       {
    5205        16982 :         free_data_ref (dr);
    5206        16982 :         return opt_result::failure_at (stmt,
    5207              :                                        "not vectorized: dr in a call %G", stmt);
    5208              :       }
    5209              : 
    5210     10738290 :   if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
    5211     10738290 :       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
    5212              :     {
    5213        53619 :       free_data_ref (dr);
    5214        53619 :       return opt_result::failure_at (stmt,
    5215              :                                      "not vectorized:"
    5216              :                                      " statement is an unsupported"
    5217              :                                      " bitfield access %G", stmt);
    5218              :     }
    5219              : 
    5220     10684671 :   if (DR_BASE_ADDRESS (dr)
    5221     10597141 :       && TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
    5222              :     {
    5223          992 :       free_data_ref (dr);
    5224          992 :       return opt_result::failure_at (stmt,
    5225              :                                      "not vectorized:"
    5226              :                                      " base addr of dr is a constant\n");
    5227              :     }
    5228              : 
    5229              :   /* Check whether this may be a SIMD lane access and adjust the
    5230              :      DR to make it easier for us to handle it.  */
    5231     10683679 :   if (loop
    5232       599196 :       && loop->simduid
    5233        10711 :       && (!DR_BASE_ADDRESS (dr)
    5234         2960 :           || !DR_OFFSET (dr)
    5235         2960 :           || !DR_INIT (dr)
    5236         2960 :           || !DR_STEP (dr)))
    5237              :     {
    5238         7751 :       struct data_reference *newdr
    5239         7751 :         = create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
    5240         7751 :                            DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
    5241         7751 :       if (DR_BASE_ADDRESS (newdr)
    5242         7751 :           && DR_OFFSET (newdr)
    5243         7751 :           && DR_INIT (newdr)
    5244         7751 :           && DR_STEP (newdr)
    5245         7751 :           && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
    5246        15502 :           && integer_zerop (DR_STEP (newdr)))
    5247              :         {
    5248         7751 :           tree base_address = DR_BASE_ADDRESS (newdr);
    5249         7751 :           tree off = DR_OFFSET (newdr);
    5250         7751 :           tree step = ssize_int (1);
    5251         7751 :           if (integer_zerop (off)
    5252         7751 :               && TREE_CODE (base_address) == POINTER_PLUS_EXPR)
    5253              :             {
    5254           82 :               off = TREE_OPERAND (base_address, 1);
    5255           82 :               base_address = TREE_OPERAND (base_address, 0);
    5256              :             }
    5257         7751 :           STRIP_NOPS (off);
    5258         7751 :           if (TREE_CODE (off) == MULT_EXPR
    5259         7751 :               && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
    5260              :             {
    5261         7500 :               step = TREE_OPERAND (off, 1);
    5262         7500 :               off = TREE_OPERAND (off, 0);
    5263         7500 :               STRIP_NOPS (off);
    5264              :             }
    5265          541 :           if (CONVERT_EXPR_P (off)
    5266         7751 :               && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
    5267         7210 :                   < TYPE_PRECISION (TREE_TYPE (off))))
    5268         7210 :             off = TREE_OPERAND (off, 0);
    5269         7751 :           if (TREE_CODE (off) == SSA_NAME)
    5270              :             {
    5271         7226 :               gimple *def = SSA_NAME_DEF_STMT (off);
    5272              :               /* Look through widening conversion.  */
    5273         7226 :               if (is_gimple_assign (def)
    5274         7226 :                   && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
    5275              :                 {
    5276            0 :                   tree rhs1 = gimple_assign_rhs1 (def);
    5277            0 :                   if (TREE_CODE (rhs1) == SSA_NAME
    5278            0 :                       && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
    5279            0 :                       && (TYPE_PRECISION (TREE_TYPE (off))
    5280            0 :                           > TYPE_PRECISION (TREE_TYPE (rhs1))))
    5281            0 :                     def = SSA_NAME_DEF_STMT (rhs1);
    5282              :                 }
    5283         7226 :               if (is_gimple_call (def)
    5284         7090 :                   && gimple_call_internal_p (def)
    5285        14316 :                   && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
    5286              :                 {
    5287         7090 :                   tree arg = gimple_call_arg (def, 0);
    5288         7090 :                   tree reft = TREE_TYPE (DR_REF (newdr));
    5289         7090 :                   gcc_assert (TREE_CODE (arg) == SSA_NAME);
    5290         7090 :                   arg = SSA_NAME_VAR (arg);
    5291         7090 :                   if (arg == loop->simduid
    5292              :                       /* For now.  */
    5293         7090 :                       && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
    5294              :                     {
    5295         7065 :                       DR_BASE_ADDRESS (newdr) = base_address;
    5296         7065 :                       DR_OFFSET (newdr) = ssize_int (0);
    5297         7065 :                       DR_STEP (newdr) = step;
    5298         7065 :                       DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
    5299         7065 :                       DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
    5300              :                       /* Mark as simd-lane access.  */
    5301         7065 :                       tree arg2 = gimple_call_arg (def, 1);
    5302         7065 :                       newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
    5303         7065 :                       free_data_ref (dr);
    5304         7065 :                       datarefs->safe_push (newdr);
    5305         7065 :                       if (dataref_groups)
    5306            0 :                         dataref_groups->safe_push (group_id);
    5307         7065 :                       return opt_result::success ();
    5308              :                     }
    5309              :                 }
    5310              :             }
    5311              :         }
    5312          686 :       free_data_ref (newdr);
    5313              :     }
    5314              : 
    5315     10676614 :   datarefs->safe_push (dr);
    5316     10676614 :   if (dataref_groups)
    5317     10084483 :     dataref_groups->safe_push (group_id);
    5318     10676614 :   return opt_result::success ();
    5319     29902704 : }
    5320              : 
    5321              : /* Function vect_analyze_data_refs.
    5322              : 
    5323              :   Find all the data references in the loop or basic block.
    5324              : 
    5325              :    The general structure of the analysis of data refs in the vectorizer is as
    5326              :    follows:
    5327              :    1- vect_analyze_data_refs(loop/bb): call
    5328              :       compute_data_dependences_for_loop/bb to find and analyze all data-refs
    5329              :       in the loop/bb and their dependences.
    5330              :    2- vect_analyze_dependences(): apply dependence testing using ddrs.
    5331              :    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
    5332              :    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
    5333              : 
    5334              : */
    5335              : 
    5336              : opt_result
    5337      2696649 : vect_analyze_data_refs (vec_info *vinfo, bool *fatal)
    5338              : {
    5339      2696649 :   class loop *loop = NULL;
    5340      2696649 :   unsigned int i;
    5341      2696649 :   struct data_reference *dr;
    5342      2696649 :   tree scalar_type;
    5343              : 
    5344      2696649 :   DUMP_VECT_SCOPE ("vect_analyze_data_refs");
    5345              : 
    5346      2696649 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    5347       512928 :     loop = LOOP_VINFO_LOOP (loop_vinfo);
    5348              : 
    5349              :   /* Go through the data-refs, check that the analysis succeeded.  Update
    5350              :      pointer from stmt_vec_info struct to DR and vectype.  */
    5351              : 
    5352      2696649 :   vec<data_reference_p> datarefs = vinfo->shared->datarefs;
    5353     17725997 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    5354              :     {
    5355     15101824 :       enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
    5356              : 
    5357     15101824 :       gcc_assert (DR_REF (dr));
    5358     15101824 :       stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
    5359     15101824 :       gcc_assert (!stmt_info->dr_aux.dr);
    5360     15101824 :       stmt_info->dr_aux.dr = dr;
    5361     15101824 :       stmt_info->dr_aux.stmt = stmt_info;
    5362              : 
    5363              :       /* Check that analysis of the data-ref succeeded.  */
    5364     15101824 :       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
    5365     14984836 :           || !DR_STEP (dr))
    5366              :         {
    5367       233976 :           bool maybe_gather
    5368       116988 :             = DR_IS_READ (dr)
    5369       116988 :               && !TREE_THIS_VOLATILE (DR_REF (dr));
    5370       233976 :           bool maybe_scatter
    5371              :             = DR_IS_WRITE (dr)
    5372       116988 :               && !TREE_THIS_VOLATILE (DR_REF (dr));
    5373              : 
    5374              :           /* If target supports vector gather loads or scatter stores,
    5375              :              see if they can't be used.  */
    5376       116988 :           if (is_a <loop_vec_info> (vinfo)
    5377       116988 :               && !nested_in_vect_loop_p (loop, stmt_info))
    5378              :             {
    5379       113614 :               if (maybe_gather || maybe_scatter)
    5380              :                 {
    5381       113614 :                   if (maybe_gather)
    5382              :                     gatherscatter = GATHER;
    5383              :                   else
    5384        21742 :                     gatherscatter = SCATTER;
    5385              :                 }
    5386              :             }
    5387              : 
    5388        21742 :           if (gatherscatter == SG_NONE)
    5389              :             {
    5390         3374 :               if (dump_enabled_p ())
    5391            5 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5392              :                                  "not vectorized: data ref analysis "
    5393              :                                  "failed %G", stmt_info->stmt);
    5394         3374 :               if (is_a <bb_vec_info> (vinfo))
    5395              :                 {
    5396              :                   /* In BB vectorization the ref can still participate
    5397              :                      in dependence analysis, we just can't vectorize it.  */
    5398         3024 :                   STMT_VINFO_VECTORIZABLE (stmt_info) = false;
    5399         3024 :                   continue;
    5400              :                 }
    5401          350 :               return opt_result::failure_at (stmt_info->stmt,
    5402              :                                              "not vectorized:"
    5403              :                                              " data ref analysis failed: %G",
    5404              :                                              stmt_info->stmt);
    5405              :             }
    5406              :         }
    5407              : 
    5408              :       /* See if this was detected as SIMD lane access.  */
    5409     15098450 :       if (dr->aux == (void *)-1
    5410     15098450 :           || dr->aux == (void *)-2
    5411     15089548 :           || dr->aux == (void *)-3
    5412     15088708 :           || dr->aux == (void *)-4)
    5413              :         {
    5414        10542 :           if (nested_in_vect_loop_p (loop, stmt_info))
    5415            0 :             return opt_result::failure_at (stmt_info->stmt,
    5416              :                                            "not vectorized:"
    5417              :                                            " data ref analysis failed: %G",
    5418              :                                            stmt_info->stmt);
    5419        10542 :           STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
    5420        10542 :             = -(uintptr_t) dr->aux;
    5421              :         }
    5422              : 
    5423     15098450 :       tree base = get_base_address (DR_REF (dr));
    5424     15098450 :       if (base && VAR_P (base) && DECL_NONALIASED (base))
    5425              :         {
    5426         8894 :           if (dump_enabled_p ())
    5427          186 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5428              :                              "not vectorized: base object not addressable "
    5429              :                              "for stmt: %G", stmt_info->stmt);
    5430         8894 :           if (is_a <bb_vec_info> (vinfo))
    5431              :             {
    5432              :               /* In BB vectorization the ref can still participate
    5433              :                  in dependence analysis, we just can't vectorize it.  */
    5434         8893 :               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
    5435         8893 :               continue;
    5436              :             }
    5437            1 :           return opt_result::failure_at (stmt_info->stmt,
    5438              :                                          "not vectorized: base object not"
    5439              :                                          " addressable for stmt: %G",
    5440              :                                          stmt_info->stmt);
    5441              :         }
    5442              : 
    5443     15089556 :       if (is_a <loop_vec_info> (vinfo)
    5444      1167342 :           && DR_STEP (dr)
    5445     16143284 :           && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
    5446              :         {
    5447        44874 :           if (nested_in_vect_loop_p (loop, stmt_info))
    5448          389 :             return opt_result::failure_at (stmt_info->stmt,
    5449              :                                            "not vectorized: "
    5450              :                                            "not suitable for strided load %G",
    5451              :                                            stmt_info->stmt);
    5452        44485 :           STMT_VINFO_STRIDED_P (stmt_info) = true;
    5453              :         }
    5454              : 
    5455              :       /* Update DR field in stmt_vec_info struct.  */
    5456              : 
    5457              :       /* If the dataref is in an inner-loop of the loop that is considered for
    5458              :          for vectorization, we also want to analyze the access relative to
    5459              :          the outer-loop (DR contains information only relative to the
    5460              :          inner-most enclosing loop).  We do that by building a reference to the
    5461              :          first location accessed by the inner-loop, and analyze it relative to
    5462              :          the outer-loop.  */
    5463     15089167 :       if (loop && nested_in_vect_loop_p (loop, stmt_info))
    5464              :         {
    5465              :           /* Build a reference to the first location accessed by the
    5466              :              inner loop: *(BASE + INIT + OFFSET).  By construction,
    5467              :              this address must be invariant in the inner loop, so we
    5468              :              can consider it as being used in the outer loop.  */
    5469        11900 :           tree base = unshare_expr (DR_BASE_ADDRESS (dr));
    5470        11900 :           tree offset = unshare_expr (DR_OFFSET (dr));
    5471        11900 :           tree init = unshare_expr (DR_INIT (dr));
    5472        11900 :           tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
    5473              :                                           init, offset);
    5474        11900 :           tree init_addr = fold_build_pointer_plus (base, init_offset);
    5475        11900 :           tree init_ref = build_fold_indirect_ref (init_addr);
    5476              : 
    5477        11900 :           if (dump_enabled_p ())
    5478         1228 :             dump_printf_loc (MSG_NOTE, vect_location,
    5479              :                              "analyze in outer loop: %T\n", init_ref);
    5480              : 
    5481        11900 :           opt_result res
    5482        11900 :             = dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
    5483        11900 :                                     init_ref, loop, stmt_info->stmt);
    5484        11900 :           if (!res)
    5485              :             /* dr_analyze_innermost already explained the failure.  */
    5486          166 :             return res;
    5487              : 
    5488        11734 :           if (dump_enabled_p ())
    5489         1224 :             dump_printf_loc (MSG_NOTE, vect_location,
    5490              :                              "\touter base_address: %T\n"
    5491              :                              "\touter offset from base address: %T\n"
    5492              :                              "\touter constant offset from base address: %T\n"
    5493              :                              "\touter step: %T\n"
    5494              :                              "\touter base alignment: %d\n\n"
    5495              :                              "\touter base misalignment: %d\n"
    5496              :                              "\touter offset alignment: %d\n"
    5497              :                              "\touter step alignment: %d\n",
    5498              :                              STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
    5499              :                              STMT_VINFO_DR_OFFSET (stmt_info),
    5500              :                              STMT_VINFO_DR_INIT (stmt_info),
    5501              :                              STMT_VINFO_DR_STEP (stmt_info),
    5502              :                              STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
    5503              :                              STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
    5504              :                              STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
    5505              :                              STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
    5506              :         }
    5507              : 
    5508              :       /* Set vectype for STMT.  */
    5509     15089001 :       scalar_type = TREE_TYPE (DR_REF (dr));
    5510     15089001 :       tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
    5511     15089001 :       if (!vectype)
    5512              :         {
    5513      1800363 :           if (dump_enabled_p ())
    5514              :             {
    5515         2043 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5516              :                                "not vectorized: no vectype for stmt: %G",
    5517              :                                stmt_info->stmt);
    5518         2043 :               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
    5519         2043 :               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
    5520              :                                  scalar_type);
    5521         2043 :               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
    5522              :             }
    5523              : 
    5524      1800363 :           if (is_a <bb_vec_info> (vinfo))
    5525              :             {
    5526              :               /* No vector type is fine, the ref can still participate
    5527              :                  in dependence analysis, we just can't vectorize it.  */
    5528      1736262 :               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
    5529      1736262 :               continue;
    5530              :             }
    5531        64101 :           if (fatal)
    5532        64101 :             *fatal = false;
    5533        64101 :           return opt_result::failure_at (stmt_info->stmt,
    5534              :                                          "not vectorized:"
    5535              :                                          " no vectype for stmt: %G"
    5536              :                                          " scalar_type: %T\n",
    5537              :                                          stmt_info->stmt, scalar_type);
    5538              :         }
    5539              :       else
    5540              :         {
    5541     13288638 :           if (dump_enabled_p ())
    5542        83298 :             dump_printf_loc (MSG_NOTE, vect_location,
    5543              :                              "got vectype for stmt: %G%T\n",
    5544              :                              stmt_info->stmt, vectype);
    5545              :         }
    5546              : 
    5547              :       /* Leave the BB vectorizer to pick the vector type later, based on
    5548              :          the final dataref group size and SLP node size.  */
    5549     13288638 :       if (is_a <loop_vec_info> (vinfo))
    5550      1102686 :         STMT_VINFO_VECTYPE (stmt_info) = vectype;
    5551              : 
    5552     13288638 :       if (gatherscatter != SG_NONE)
    5553              :         {
    5554       107908 :           gather_scatter_info gs_info;
    5555       107908 :           if (!vect_check_gather_scatter (stmt_info, vectype,
    5556              :                                           as_a <loop_vec_info> (vinfo),
    5557              :                                           &gs_info)
    5558       211834 :               || !get_vectype_for_scalar_type (vinfo,
    5559       103926 :                                                TREE_TYPE (gs_info.offset)))
    5560              :             {
    5561         7469 :               if (fatal)
    5562         7469 :                 *fatal = false;
    5563         7469 :               return opt_result::failure_at
    5564         7837 :                         (stmt_info->stmt,
    5565              :                          (gatherscatter == GATHER)
    5566              :                          ? "not vectorized: not suitable for gather load %G"
    5567              :                          : "not vectorized: not suitable for scatter store %G",
    5568              :                          stmt_info->stmt);
    5569              :             }
    5570       100439 :           STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
    5571              :         }
    5572              :     }
    5573              : 
    5574              :   /* We used to stop processing and prune the list here.  Verify we no
    5575              :      longer need to.  */
    5576      4199518 :   gcc_assert (i == datarefs.length ());
    5577              : 
    5578      2624173 :   return opt_result::success ();
    5579              : }
    5580              : 
    5581              : 
    5582              : /* Function vect_get_new_vect_var.
    5583              : 
    5584              :    Returns a name for a new variable.  The current naming scheme appends the
    5585              :    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
    5586              :    the name of vectorizer generated variables, and appends that to NAME if
    5587              :    provided.  */
    5588              : 
    5589              : tree
    5590      1945854 : vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
    5591              : {
    5592      1945854 :   const char *prefix;
    5593      1945854 :   tree new_vect_var;
    5594              : 
    5595      1945854 :   switch (var_kind)
    5596              :   {
    5597              :   case vect_simple_var:
    5598              :     prefix = "vect";
    5599              :     break;
    5600        23041 :   case vect_scalar_var:
    5601        23041 :     prefix = "stmp";
    5602        23041 :     break;
    5603        20208 :   case vect_mask_var:
    5604        20208 :     prefix = "mask";
    5605        20208 :     break;
    5606      1394242 :   case vect_pointer_var:
    5607      1394242 :     prefix = "vectp";
    5608      1394242 :     break;
    5609            0 :   default:
    5610            0 :     gcc_unreachable ();
    5611              :   }
    5612              : 
    5613      1945854 :   if (name)
    5614              :     {
    5615      1099449 :       char* tmp = concat (prefix, "_", name, NULL);
    5616      1099449 :       new_vect_var = create_tmp_reg (type, tmp);
    5617      1099449 :       free (tmp);
    5618              :     }
    5619              :   else
    5620       846405 :     new_vect_var = create_tmp_reg (type, prefix);
    5621              : 
    5622      1945854 :   return new_vect_var;
    5623              : }
    5624              : 
    5625              : /* Like vect_get_new_vect_var but return an SSA name.  */
    5626              : 
    5627              : tree
    5628         7111 : vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
    5629              : {
    5630         7111 :   const char *prefix;
    5631         7111 :   tree new_vect_var;
    5632              : 
    5633         7111 :   switch (var_kind)
    5634              :   {
    5635              :   case vect_simple_var:
    5636              :     prefix = "vect";
    5637              :     break;
    5638          312 :   case vect_scalar_var:
    5639          312 :     prefix = "stmp";
    5640          312 :     break;
    5641            0 :   case vect_pointer_var:
    5642            0 :     prefix = "vectp";
    5643            0 :     break;
    5644            0 :   default:
    5645            0 :     gcc_unreachable ();
    5646              :   }
    5647              : 
    5648         7111 :   if (name)
    5649              :     {
    5650         6634 :       char* tmp = concat (prefix, "_", name, NULL);
    5651         6634 :       new_vect_var = make_temp_ssa_name (type, NULL, tmp);
    5652         6634 :       free (tmp);
    5653              :     }
    5654              :   else
    5655          477 :     new_vect_var = make_temp_ssa_name (type, NULL, prefix);
    5656              : 
    5657         7111 :   return new_vect_var;
    5658              : }
    5659              : 
    5660              : /* Duplicate points-to info on NAME from DR_INFO.  */
    5661              : 
    5662              : static void
    5663       432024 : vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
    5664              : {
    5665       432024 :   if (DR_PTR_INFO (dr_info->dr))
    5666              :     {
    5667       290105 :       duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
    5668              :       /* DR_PTR_INFO is for a base SSA name, not including constant or
    5669              :          variable offsets in the ref so its alignment info does not apply.  */
    5670       290105 :       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
    5671              :     }
    5672       141919 :   else if (!SSA_NAME_PTR_INFO (name))
    5673              :     {
    5674       141919 :       tree base = get_base_address (dr_info->dr->ref);
    5675       141919 :       if (VAR_P (base)
    5676              :           || TREE_CODE (base) == PARM_DECL
    5677              :           || TREE_CODE (base) == RESULT_DECL)
    5678              :         {
    5679       130075 :           struct ptr_info_def *pi = get_ptr_info (name);
    5680       130075 :           pt_solution_set_var (&pi->pt, base);
    5681              :         }
    5682              :     }
    5683       432024 : }
    5684              : 
    5685              : /* Function vect_create_addr_base_for_vector_ref.
    5686              : 
    5687              :    Create an expression that computes the address of the first memory location
    5688              :    that will be accessed for a data reference.
    5689              : 
    5690              :    Input:
    5691              :    STMT_INFO: The statement containing the data reference.
    5692              :    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
    5693              :    OFFSET: Optional. If supplied, it is be added to the initial address.
    5694              :    LOOP:    Specify relative to which loop-nest should the address be computed.
    5695              :             For example, when the dataref is in an inner-loop nested in an
    5696              :             outer-loop that is now being vectorized, LOOP can be either the
    5697              :             outer-loop, or the inner-loop.  The first memory location accessed
    5698              :             by the following dataref ('in' points to short):
    5699              : 
    5700              :                 for (i=0; i<N; i++)
    5701              :                    for (j=0; j<M; j++)
    5702              :                      s += in[i+j]
    5703              : 
    5704              :             is as follows:
    5705              :             if LOOP=i_loop:     &in         (relative to i_loop)
    5706              :             if LOOP=j_loop:     &in+i*2B    (relative to j_loop)
    5707              : 
    5708              :    Output:
    5709              :    1. Return an SSA_NAME whose value is the address of the memory location of
    5710              :       the first vector of the data reference.
    5711              :    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
    5712              :       these statement(s) which define the returned SSA_NAME.
    5713              : 
    5714              :    FORNOW: We are only handling array accesses with step 1.  */
    5715              : 
    5716              : tree
    5717       697255 : vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
    5718              :                                       gimple_seq *new_stmt_list,
    5719              :                                       tree offset)
    5720              : {
    5721       697255 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    5722       697255 :   struct data_reference *dr = dr_info->dr;
    5723       697255 :   const char *base_name;
    5724       697255 :   tree addr_base;
    5725       697255 :   tree dest;
    5726       697255 :   gimple_seq seq = NULL;
    5727       697255 :   tree vect_ptr_type;
    5728       697255 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5729       697255 :   innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
    5730              : 
    5731       697255 :   tree data_ref_base = unshare_expr (drb->base_address);
    5732       697255 :   tree base_offset = unshare_expr (get_dr_vinfo_offset (vinfo, dr_info, true));
    5733       697255 :   tree init = unshare_expr (drb->init);
    5734              : 
    5735       697255 :   if (loop_vinfo)
    5736       128630 :     base_name = get_name (data_ref_base);
    5737              :   else
    5738              :     {
    5739       568625 :       base_offset = ssize_int (0);
    5740       568625 :       init = ssize_int (0);
    5741       568625 :       base_name = get_name (DR_REF (dr));
    5742              :     }
    5743              : 
    5744              :   /* Create base_offset */
    5745       697255 :   base_offset = size_binop (PLUS_EXPR,
    5746              :                             fold_convert (sizetype, base_offset),
    5747              :                             fold_convert (sizetype, init));
    5748              : 
    5749       697255 :   if (offset)
    5750              :     {
    5751         3110 :       offset = fold_convert (sizetype, offset);
    5752         3110 :       base_offset = fold_build2 (PLUS_EXPR, sizetype,
    5753              :                                  base_offset, offset);
    5754              :     }
    5755              : 
    5756              :   /* base + base_offset */
    5757       697255 :   if (loop_vinfo)
    5758       128630 :     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
    5759              :   else
    5760      1137250 :     addr_base = build1 (ADDR_EXPR,
    5761       568625 :                         build_pointer_type (TREE_TYPE (DR_REF (dr))),
    5762              :                         /* Strip zero offset components since we don't need
    5763              :                            them and they can confuse late diagnostics if
    5764              :                            we CSE them wrongly.  See PR106904 for example.  */
    5765              :                         unshare_expr (strip_zero_offset_components
    5766              :                                                                 (DR_REF (dr))));
    5767              : 
    5768       697255 :   vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
    5769       697255 :   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
    5770       697255 :   addr_base = force_gimple_operand (addr_base, &seq, true, dest);
    5771       697255 :   gimple_seq_add_seq (new_stmt_list, seq);
    5772              : 
    5773       697255 :   if (TREE_CODE (addr_base) == SSA_NAME
    5774              :       /* We should only duplicate pointer info to newly created SSA names.  */
    5775       703990 :       && SSA_NAME_VAR (addr_base) == dest)
    5776              :     {
    5777       174644 :       gcc_assert (!SSA_NAME_PTR_INFO (addr_base));
    5778       174644 :       vect_duplicate_ssa_name_ptr_info (addr_base, dr_info);
    5779              :     }
    5780              : 
    5781       697255 :   if (dump_enabled_p ())
    5782        25315 :     dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
    5783              : 
    5784       697255 :   return addr_base;
    5785              : }
    5786              : 
    5787              : 
    5788              : /* Function vect_create_data_ref_ptr.
    5789              : 
    5790              :    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
    5791              :    location accessed in the loop by STMT_INFO, along with the def-use update
    5792              :    chain to appropriately advance the pointer through the loop iterations.
    5793              :    Also set aliasing information for the pointer.  This pointer is used by
    5794              :    the callers to this function to create a memory reference expression for
    5795              :    vector load/store access.
    5796              : 
    5797              :    Input:
    5798              :    1. STMT_INFO: a stmt that references memory. Expected to be of the form
    5799              :          GIMPLE_ASSIGN <name, data-ref> or
    5800              :          GIMPLE_ASSIGN <data-ref, name>.
    5801              :    2. AGGR_TYPE: the type of the reference, which should be either a vector
    5802              :         or an array.
    5803              :    3. AT_LOOP: the loop where the vector memref is to be created.
    5804              :    4. OFFSET (optional): a byte offset to be added to the initial address
    5805              :         accessed by the data-ref in STMT_INFO.
    5806              :    5. BSI: location where the new stmts are to be placed if there is no loop
    5807              :    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
    5808              :         pointing to the initial address.
    5809              :    8. IV_STEP (optional, defaults to NULL): the amount that should be added
    5810              :         to the IV during each iteration of the loop.  NULL says to move
    5811              :         by one copy of AGGR_TYPE up or down, depending on the step of the
    5812              :         data reference.
    5813              : 
    5814              :    Output:
    5815              :    1. Declare a new ptr to vector_type, and have it point to the base of the
    5816              :       data reference (initial addressed accessed by the data reference).
    5817              :       For example, for vector of type V8HI, the following code is generated:
    5818              : 
    5819              :       v8hi *ap;
    5820              :       ap = (v8hi *)initial_address;
    5821              : 
    5822              :       if OFFSET is not supplied:
    5823              :          initial_address = &a[init];
    5824              :       if OFFSET is supplied:
    5825              :          initial_address = &a[init] + OFFSET;
    5826              :       if BYTE_OFFSET is supplied:
    5827              :          initial_address = &a[init] + BYTE_OFFSET;
    5828              : 
    5829              :       Return the initial_address in INITIAL_ADDRESS.
    5830              : 
    5831              :    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
    5832              :       update the pointer in each iteration of the loop.
    5833              : 
    5834              :       Return the increment stmt that updates the pointer in PTR_INCR.
    5835              : 
    5836              :    3. Return the pointer.  */
    5837              : 
    5838              : tree
    5839       696987 : vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
    5840              :                           tree aggr_type, class loop *at_loop, tree offset,
    5841              :                           tree *initial_address, gimple_stmt_iterator *gsi,
    5842              :                           gimple **ptr_incr, bool only_init,
    5843              :                           tree iv_step)
    5844              : {
    5845       696987 :   const char *base_name;
    5846       696987 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5847       696987 :   class loop *loop = NULL;
    5848       696987 :   bool nested_in_vect_loop = false;
    5849       696987 :   class loop *containing_loop = NULL;
    5850       696987 :   tree aggr_ptr_type;
    5851       696987 :   tree aggr_ptr;
    5852       696987 :   tree new_temp;
    5853       696987 :   gimple_seq new_stmt_list = NULL;
    5854       696987 :   edge pe = NULL;
    5855       696987 :   basic_block new_bb;
    5856       696987 :   tree aggr_ptr_init;
    5857       696987 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    5858       696987 :   struct data_reference *dr = dr_info->dr;
    5859       696987 :   tree aptr;
    5860       696987 :   gimple_stmt_iterator incr_gsi;
    5861       696987 :   bool insert_after;
    5862       696987 :   tree indx_before_incr, indx_after_incr;
    5863       696987 :   gimple *incr;
    5864       696987 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    5865              : 
    5866       696987 :   gcc_assert (iv_step != NULL_TREE
    5867              :               || TREE_CODE (aggr_type) == ARRAY_TYPE
    5868              :               || TREE_CODE (aggr_type) == VECTOR_TYPE);
    5869              : 
    5870       696987 :   if (loop_vinfo)
    5871              :     {
    5872       128362 :       loop = LOOP_VINFO_LOOP (loop_vinfo);
    5873       128362 :       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
    5874       128362 :       containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
    5875       128362 :       pe = loop_preheader_edge (loop);
    5876              :     }
    5877              :   else
    5878              :     {
    5879       568625 :       gcc_assert (bb_vinfo);
    5880       568625 :       only_init = true;
    5881       568625 :       *ptr_incr = NULL;
    5882              :     }
    5883              : 
    5884              :   /* Create an expression for the first address accessed by this load
    5885              :      in LOOP.  */
    5886       696987 :   base_name = get_name (DR_BASE_ADDRESS (dr));
    5887              : 
    5888       696987 :   if (dump_enabled_p ())
    5889              :     {
    5890        25212 :       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
    5891        25212 :       dump_printf_loc (MSG_NOTE, vect_location,
    5892              :                        "create %s-pointer variable to type: %T",
    5893        25212 :                        get_tree_code_name (TREE_CODE (aggr_type)),
    5894              :                        aggr_type);
    5895        25212 :       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
    5896        13519 :         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
    5897        11693 :       else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
    5898            0 :         dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
    5899        11693 :       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
    5900         1638 :         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
    5901              :       else
    5902        10055 :         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
    5903        25212 :       dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
    5904              :     }
    5905              : 
    5906              :   /* (1) Create the new aggregate-pointer variable.
    5907              :      Vector and array types inherit the alias set of their component
    5908              :      type by default so we need to use a ref-all pointer if the data
    5909              :      reference does not conflict with the created aggregated data
    5910              :      reference because it is not addressable.  */
    5911       696987 :   bool need_ref_all = false;
    5912       696987 :   if (!alias_sets_conflict_p (get_alias_set (aggr_type),
    5913              :                               get_alias_set (DR_REF (dr))))
    5914              :     need_ref_all = true;
    5915              :   /* Likewise for any of the data references in the stmt group.  */
    5916       593523 :   else if (DR_GROUP_SIZE (stmt_info) > 1)
    5917              :     {
    5918       479820 :       stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
    5919      1341370 :       do
    5920              :         {
    5921      1341370 :           struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
    5922      1341370 :           if (!alias_sets_conflict_p (get_alias_set (aggr_type),
    5923              :                                       get_alias_set (DR_REF (sdr))))
    5924              :             {
    5925              :               need_ref_all = true;
    5926              :               break;
    5927              :             }
    5928      1340311 :           sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
    5929              :         }
    5930      1340311 :       while (sinfo);
    5931              :     }
    5932       696987 :   aggr_ptr_type = build_pointer_type_for_mode (aggr_type, VOIDmode,
    5933              :                                                need_ref_all);
    5934       696987 :   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
    5935              : 
    5936              : 
    5937              :   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
    5938              :      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
    5939              :      def-use update cycles for the pointer: one relative to the outer-loop
    5940              :      (LOOP), which is what steps (3) and (4) below do.  The other is relative
    5941              :      to the inner-loop (which is the inner-most loop containing the dataref),
    5942              :      and this is done be step (5) below.
    5943              : 
    5944              :      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
    5945              :      inner-most loop, and so steps (3),(4) work the same, and step (5) is
    5946              :      redundant.  Steps (3),(4) create the following:
    5947              : 
    5948              :         vp0 = &base_addr;
    5949              :         LOOP:   vp1 = phi(vp0,vp2)
    5950              :                 ...
    5951              :                 ...
    5952              :                 vp2 = vp1 + step
    5953              :                 goto LOOP
    5954              : 
    5955              :      If there is an inner-loop nested in loop, then step (5) will also be
    5956              :      applied, and an additional update in the inner-loop will be created:
    5957              : 
    5958              :         vp0 = &base_addr;
    5959              :         LOOP:   vp1 = phi(vp0,vp2)
    5960              :                 ...
    5961              :         inner:     vp3 = phi(vp1,vp4)
    5962              :                    vp4 = vp3 + inner_step
    5963              :                    if () goto inner
    5964              :                 ...
    5965              :                 vp2 = vp1 + step
    5966              :                 if () goto LOOP   */
    5967              : 
    5968              :   /* (2) Calculate the initial address of the aggregate-pointer, and set
    5969              :      the aggregate-pointer to point to it before the loop.  */
    5970              : 
    5971              :   /* Create: (&(base[init_val]+offset) in the loop preheader.  */
    5972              : 
    5973       696987 :   new_temp = vect_create_addr_base_for_vector_ref (vinfo,
    5974              :                                                    stmt_info, &new_stmt_list,
    5975              :                                                    offset);
    5976       696987 :   if (new_stmt_list)
    5977              :     {
    5978       174523 :       if (pe)
    5979              :         {
    5980        54619 :           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
    5981        54619 :           gcc_assert (!new_bb);
    5982              :         }
    5983              :       else
    5984       119904 :         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
    5985              :     }
    5986              : 
    5987       696987 :   *initial_address = new_temp;
    5988       696987 :   aggr_ptr_init = new_temp;
    5989              : 
    5990              :   /* (3) Handle the updating of the aggregate-pointer inside the loop.
    5991              :      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
    5992              :      inner-loop nested in LOOP (during outer-loop vectorization).  */
    5993              : 
    5994              :   /* No update in loop is required.  */
    5995       696987 :   if (only_init && (!loop_vinfo || at_loop == loop))
    5996              :     aptr = aggr_ptr_init;
    5997              :   else
    5998              :     {
    5999              :       /* Accesses to invariant addresses should be handled specially
    6000              :          by the caller.  */
    6001       128354 :       tree step = vect_dr_behavior (vinfo, dr_info)->step;
    6002       128354 :       gcc_assert (!integer_zerop (step));
    6003              : 
    6004       128354 :       if (iv_step == NULL_TREE)
    6005              :         {
    6006              :           /* The step of the aggregate pointer is the type size,
    6007              :              negated for downward accesses.  */
    6008            0 :           iv_step = TYPE_SIZE_UNIT (aggr_type);
    6009            0 :           if (tree_int_cst_sgn (step) == -1)
    6010            0 :             iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
    6011              :         }
    6012              : 
    6013       128354 :       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
    6014              : 
    6015       128354 :       create_iv (aggr_ptr_init, PLUS_EXPR,
    6016              :                  iv_step, aggr_ptr, loop, &incr_gsi, insert_after,
    6017              :                  &indx_before_incr, &indx_after_incr);
    6018       128354 :       incr = gsi_stmt (incr_gsi);
    6019              : 
    6020              :       /* Copy the points-to information if it exists. */
    6021       128354 :       vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
    6022       128354 :       vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
    6023       128354 :       if (ptr_incr)
    6024       128354 :         *ptr_incr = incr;
    6025              : 
    6026       128354 :       aptr = indx_before_incr;
    6027              :     }
    6028              : 
    6029       696987 :   if (!nested_in_vect_loop || only_init)
    6030              :     return aptr;
    6031              : 
    6032              : 
    6033              :   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
    6034              :      nested in LOOP, if exists.  */
    6035              : 
    6036          336 :   gcc_assert (nested_in_vect_loop);
    6037          336 :   if (!only_init)
    6038              :     {
    6039          336 :       standard_iv_increment_position (containing_loop, &incr_gsi,
    6040              :                                       &insert_after);
    6041          336 :       create_iv (aptr, PLUS_EXPR, DR_STEP (dr),
    6042              :                  aggr_ptr, containing_loop, &incr_gsi, insert_after,
    6043              :                  &indx_before_incr, &indx_after_incr);
    6044          336 :       incr = gsi_stmt (incr_gsi);
    6045              : 
    6046              :       /* Copy the points-to information if it exists. */
    6047          336 :       vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
    6048          336 :       vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
    6049          336 :       if (ptr_incr)
    6050          336 :         *ptr_incr = incr;
    6051              : 
    6052          336 :       return indx_before_incr;
    6053              :     }
    6054              :   else
    6055              :     gcc_unreachable ();
    6056              : }
    6057              : 
    6058              : 
    6059              : /* Function bump_vector_ptr
    6060              : 
    6061              :    Increment a pointer (to a vector type) by vector-size. If requested,
    6062              :    i.e. if PTR-INCR is given, then also connect the new increment stmt
    6063              :    to the existing def-use update-chain of the pointer, by modifying
    6064              :    the PTR_INCR as illustrated below:
    6065              : 
    6066              :    The pointer def-use update-chain before this function:
    6067              :                         DATAREF_PTR = phi (p_0, p_2)
    6068              :                         ....
    6069              :         PTR_INCR:       p_2 = DATAREF_PTR + step
    6070              : 
    6071              :    The pointer def-use update-chain after this function:
    6072              :                         DATAREF_PTR = phi (p_0, p_2)
    6073              :                         ....
    6074              :                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
    6075              :                         ....
    6076              :         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
    6077              : 
    6078              :    Input:
    6079              :    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
    6080              :                  in the loop.
    6081              :    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
    6082              :               the loop.  The increment amount across iterations is expected
    6083              :               to be vector_size.
    6084              :    BSI - location where the new update stmt is to be placed.
    6085              :    STMT_INFO - the original scalar memory-access stmt that is being vectorized.
    6086              :    UPDATE - The offset by which to bump the pointer.
    6087              : 
    6088              :    Output: Return NEW_DATAREF_PTR as illustrated above.
    6089              : 
    6090              : */
    6091              : 
    6092              : tree
    6093       242027 : bump_vector_ptr (vec_info *vinfo,
    6094              :                  tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
    6095              :                  stmt_vec_info stmt_info, tree update)
    6096              : {
    6097       242027 :   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    6098       242027 :   gimple *incr_stmt;
    6099       242027 :   ssa_op_iter iter;
    6100       242027 :   use_operand_p use_p;
    6101       242027 :   tree new_dataref_ptr;
    6102              : 
    6103       242027 :   if (TREE_CODE (dataref_ptr) == SSA_NAME)
    6104       113158 :     new_dataref_ptr = copy_ssa_name (dataref_ptr);
    6105       128869 :   else if (is_gimple_min_invariant (dataref_ptr))
    6106              :     /* When possible avoid emitting a separate increment stmt that will
    6107              :        force the addressed object addressable.  */
    6108       257738 :     return build1 (ADDR_EXPR, TREE_TYPE (dataref_ptr),
    6109       128869 :                    fold_build2 (MEM_REF,
    6110              :                                 TREE_TYPE (TREE_TYPE (dataref_ptr)),
    6111              :                                 dataref_ptr,
    6112       128869 :                                 fold_convert (ptr_type_node, update)));
    6113              :   else
    6114            0 :     new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
    6115       113158 :   incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
    6116              :                                    dataref_ptr, update);
    6117       113158 :   vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
    6118              :   /* Fold the increment, avoiding excessive chains use-def chains of
    6119              :      those, leading to compile-time issues for passes until the next
    6120              :      forwprop pass which would do this as well.  */
    6121       113158 :   gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
    6122       113158 :   if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
    6123              :     {
    6124        73697 :       incr_stmt = gsi_stmt (fold_gsi);
    6125        73697 :       update_stmt (incr_stmt);
    6126              :     }
    6127              : 
    6128              :   /* Copy the points-to information if it exists. */
    6129       113158 :   duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
    6130              : 
    6131       113158 :   if (!ptr_incr)
    6132              :     return new_dataref_ptr;
    6133              : 
    6134              :   /* Update the vector-pointer's cross-iteration increment.  */
    6135       116046 :   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
    6136              :     {
    6137        58023 :       tree use = USE_FROM_PTR (use_p);
    6138              : 
    6139        58023 :       if (use == dataref_ptr)
    6140        58023 :         SET_USE (use_p, new_dataref_ptr);
    6141              :       else
    6142            0 :         gcc_assert (operand_equal_p (use, update, 0));
    6143              :     }
    6144              : 
    6145              :   return new_dataref_ptr;
    6146              : }
    6147              : 
    6148              : 
    6149              : /* Copy memory reference info such as base/clique from the SRC reference
    6150              :    to the DEST MEM_REF.  */
    6151              : 
    6152              : void
    6153       949970 : vect_copy_ref_info (tree dest, tree src)
    6154              : {
    6155       949970 :   if (TREE_CODE (dest) != MEM_REF)
    6156              :     return;
    6157              : 
    6158              :   tree src_base = src;
    6159      1907512 :   while (handled_component_p (src_base))
    6160       962201 :     src_base = TREE_OPERAND (src_base, 0);
    6161       945311 :   if (TREE_CODE (src_base) != MEM_REF
    6162       945311 :       && TREE_CODE (src_base) != TARGET_MEM_REF)
    6163              :     return;
    6164              : 
    6165       515269 :   MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
    6166       515269 :   MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
    6167              : }
    6168              : 
    6169              : 
    6170              : /* Function vect_create_destination_var.
    6171              : 
    6172              :    Create a new temporary of type VECTYPE.  */
    6173              : 
    6174              : tree
    6175       534680 : vect_create_destination_var (tree scalar_dest, tree vectype)
    6176              : {
    6177       534680 :   tree vec_dest;
    6178       534680 :   const char *name;
    6179       534680 :   char *new_name;
    6180       534680 :   tree type;
    6181       534680 :   enum vect_var_kind kind;
    6182              : 
    6183       534680 :   kind = vectype
    6184      1046319 :     ? VECTOR_BOOLEAN_TYPE_P (vectype)
    6185       511639 :     ? vect_mask_var
    6186              :     : vect_simple_var
    6187              :     : vect_scalar_var;
    6188        23041 :   type = vectype ? vectype : TREE_TYPE (scalar_dest);
    6189              : 
    6190       534680 :   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
    6191              : 
    6192       534680 :   name = get_name (scalar_dest);
    6193       534680 :   if (name)
    6194       191164 :     new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
    6195              :   else
    6196       343516 :     new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
    6197       534680 :   vec_dest = vect_get_new_vect_var (type, kind, new_name);
    6198       534680 :   free (new_name);
    6199              : 
    6200       534680 :   return vec_dest;
    6201              : }
    6202              : 
    6203              : /* Function vect_grouped_store_supported.
    6204              : 
    6205              :    Returns TRUE if interleave high and interleave low permutations
    6206              :    are supported, and FALSE otherwise.  */
    6207              : 
    6208              : bool
    6209         2756 : vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
    6210              : {
    6211         2756 :   machine_mode mode = TYPE_MODE (vectype);
    6212              : 
    6213              :   /* vect_permute_store_chain requires the group size to be equal to 3 or
    6214              :      be a power of two.  */
    6215         2756 :   if (count != 3 && exact_log2 (count) == -1)
    6216              :     {
    6217          548 :       if (dump_enabled_p ())
    6218           11 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6219              :                          "the size of the group of accesses"
    6220              :                          " is not a power of 2 or not equal to 3\n");
    6221          548 :       return false;
    6222              :     }
    6223              : 
    6224              :   /* Check that the permutation is supported.  */
    6225         2208 :   if (VECTOR_MODE_P (mode))
    6226              :     {
    6227         2208 :       unsigned int i;
    6228         2208 :       if (count == 3)
    6229              :         {
    6230          955 :           unsigned int j0 = 0, j1 = 0, j2 = 0;
    6231          955 :           unsigned int i, j;
    6232              : 
    6233          955 :           unsigned int nelt;
    6234         1910 :           if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
    6235              :             {
    6236              :               if (dump_enabled_p ())
    6237              :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6238              :                                  "cannot handle groups of 3 stores for"
    6239              :                                  " variable-length vectors\n");
    6240              :               return false;
    6241              :             }
    6242              : 
    6243          955 :           vec_perm_builder sel (nelt, nelt, 1);
    6244          955 :           sel.quick_grow (nelt);
    6245          955 :           vec_perm_indices indices;
    6246         3595 :           for (j = 0; j < 3; j++)
    6247              :             {
    6248         2715 :               int nelt0 = ((3 - j) * nelt) % 3;
    6249         2715 :               int nelt1 = ((3 - j) * nelt + 1) % 3;
    6250         2715 :               int nelt2 = ((3 - j) * nelt + 2) % 3;
    6251         9621 :               for (i = 0; i < nelt; i++)
    6252              :                 {
    6253         6906 :                   if (3 * i + nelt0 < nelt)
    6254         2340 :                     sel[3 * i + nelt0] = j0++;
    6255         6906 :                   if (3 * i + nelt1 < nelt)
    6256         2301 :                     sel[3 * i + nelt1] = nelt + j1++;
    6257         6906 :                   if (3 * i + nelt2 < nelt)
    6258         2265 :                     sel[3 * i + nelt2] = 0;
    6259              :                 }
    6260         2715 :               indices.new_vector (sel, 2, nelt);
    6261         2715 :               if (!can_vec_perm_const_p (mode, mode, indices))
    6262              :                 {
    6263           66 :                   if (dump_enabled_p ())
    6264           37 :                     dump_printf (MSG_MISSED_OPTIMIZATION,
    6265              :                                  "permutation op not supported by target.\n");
    6266           66 :                   return false;
    6267              :                 }
    6268              : 
    6269         8979 :               for (i = 0; i < nelt; i++)
    6270              :                 {
    6271         6330 :                   if (3 * i + nelt0 < nelt)
    6272         2116 :                     sel[3 * i + nelt0] = 3 * i + nelt0;
    6273         6330 :                   if (3 * i + nelt1 < nelt)
    6274         2107 :                     sel[3 * i + nelt1] = 3 * i + nelt1;
    6275         6330 :                   if (3 * i + nelt2 < nelt)
    6276         2107 :                     sel[3 * i + nelt2] = nelt + j2++;
    6277              :                 }
    6278         2649 :               indices.new_vector (sel, 2, nelt);
    6279         2649 :               if (!can_vec_perm_const_p (mode, mode, indices))
    6280              :                 {
    6281            9 :                   if (dump_enabled_p ())
    6282            9 :                     dump_printf (MSG_MISSED_OPTIMIZATION,
    6283              :                                  "permutation op not supported by target.\n");
    6284            9 :                   return false;
    6285              :                 }
    6286              :             }
    6287              :           return true;
    6288          955 :         }
    6289              :       else
    6290              :         {
    6291              :           /* If length is not equal to 3 then only power of 2 is supported.  */
    6292         1253 :           gcc_assert (pow2p_hwi (count));
    6293         2506 :           poly_uint64 nelt = GET_MODE_NUNITS (mode);
    6294              : 
    6295              :           /* The encoding has 2 interleaved stepped patterns.  */
    6296         2506 :           if(!multiple_p (nelt, 2))
    6297         1207 :             return false;
    6298         1253 :           vec_perm_builder sel (nelt, 2, 3);
    6299         1253 :           sel.quick_grow (6);
    6300         6265 :           for (i = 0; i < 3; i++)
    6301              :             {
    6302         3759 :               sel[i * 2] = i;
    6303         3759 :               sel[i * 2 + 1] = i + nelt;
    6304              :             }
    6305         1253 :           vec_perm_indices indices (sel, 2, nelt);
    6306         1253 :           if (can_vec_perm_const_p (mode, mode, indices))
    6307              :             {
    6308         8449 :               for (i = 0; i < 6; i++)
    6309         7242 :                 sel[i] += exact_div (nelt, 2);
    6310         1207 :               indices.new_vector (sel, 2, nelt);
    6311         1207 :               if (can_vec_perm_const_p (mode, mode, indices))
    6312         1207 :                 return true;
    6313              :             }
    6314         1253 :         }
    6315              :     }
    6316              : 
    6317           46 :   if (dump_enabled_p ())
    6318            3 :     dump_printf (MSG_MISSED_OPTIMIZATION,
    6319              :                  "permutation op not supported by target.\n");
    6320              :   return false;
    6321              : }
    6322              : 
    6323              : /* Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT vectors
    6324              :    of type VECTYPE.  MASKED_P says whether the masked form is needed.  */
    6325              : 
    6326              : internal_fn
    6327        40486 : vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
    6328              :                             bool masked_p)
    6329              : {
    6330        40486 :   if (vect_lanes_optab_supported_p ("vec_mask_len_store_lanes",
    6331              :                                     vec_mask_len_store_lanes_optab, vectype,
    6332              :                                     count))
    6333              :     return IFN_MASK_LEN_STORE_LANES;
    6334        40486 :   else if (masked_p)
    6335              :     {
    6336          159 :       if (vect_lanes_optab_supported_p ("vec_mask_store_lanes",
    6337              :                                         vec_mask_store_lanes_optab, vectype,
    6338              :                                         count))
    6339              :         return IFN_MASK_STORE_LANES;
    6340              :     }
    6341              :   else
    6342              :     {
    6343        40327 :       if (vect_lanes_optab_supported_p ("vec_store_lanes",
    6344              :                                         vec_store_lanes_optab, vectype, count))
    6345              :         return IFN_STORE_LANES;
    6346              :     }
    6347              :   return IFN_LAST;
    6348              : }
    6349              : 
    6350              : 
    6351              : /* Function vect_setup_realignment
    6352              : 
    6353              :    This function is called when vectorizing an unaligned load using
    6354              :    the dr_explicit_realign[_optimized] scheme.
    6355              :    This function generates the following code at the loop prolog:
    6356              : 
    6357              :       p = initial_addr;
    6358              :    x  msq_init = *(floor(p));   # prolog load
    6359              :       realignment_token = call target_builtin;
    6360              :     loop:
    6361              :    x  msq = phi (msq_init, ---)
    6362              : 
    6363              :    The stmts marked with x are generated only for the case of
    6364              :    dr_explicit_realign_optimized.
    6365              : 
    6366              :    The code above sets up a new (vector) pointer, pointing to the first
    6367              :    location accessed by STMT_INFO, and a "floor-aligned" load using that
    6368              :    pointer.  It also generates code to compute the "realignment-token"
    6369              :    (if the relevant target hook was defined), and creates a phi-node at the
    6370              :    loop-header bb whose arguments are the result of the prolog-load (created
    6371              :    by this function) and the result of a load that takes place in the loop
    6372              :    (to be created by the caller to this function).
    6373              : 
    6374              :    For the case of dr_explicit_realign_optimized:
    6375              :    The caller to this function uses the phi-result (msq) to create the
    6376              :    realignment code inside the loop, and sets up the missing phi argument,
    6377              :    as follows:
    6378              :     loop:
    6379              :       msq = phi (msq_init, lsq)
    6380              :       lsq = *(floor(p'));        # load in loop
    6381              :       result = realign_load (msq, lsq, realignment_token);
    6382              : 
    6383              :    For the case of dr_explicit_realign:
    6384              :     loop:
    6385              :       msq = *(floor(p));        # load in loop
    6386              :       p' = p + (VS-1);
    6387              :       lsq = *(floor(p'));       # load in loop
    6388              :       result = realign_load (msq, lsq, realignment_token);
    6389              : 
    6390              :    Input:
    6391              :    STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
    6392              :                a memory location that may be unaligned.
    6393              :    BSI - place where new code is to be inserted.
    6394              :    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
    6395              :                               is used.
    6396              : 
    6397              :    Output:
    6398              :    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
    6399              :                        target hook, if defined.
    6400              :    Return value - the result of the loop-header phi node.  */
    6401              : 
    6402              : tree
    6403            0 : vect_setup_realignment (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
    6404              :                         gimple_stmt_iterator *gsi, tree *realignment_token,
    6405              :                         enum dr_alignment_support alignment_support_scheme,
    6406              :                         tree init_addr,
    6407              :                         class loop **at_loop)
    6408              : {
    6409            0 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6410            0 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    6411            0 :   struct data_reference *dr = dr_info->dr;
    6412            0 :   class loop *loop = NULL;
    6413            0 :   edge pe = NULL;
    6414            0 :   tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
    6415            0 :   tree vec_dest;
    6416            0 :   gimple *inc;
    6417            0 :   tree ptr;
    6418            0 :   tree data_ref;
    6419            0 :   basic_block new_bb;
    6420            0 :   tree msq_init = NULL_TREE;
    6421            0 :   tree new_temp;
    6422            0 :   gphi *phi_stmt;
    6423            0 :   tree msq = NULL_TREE;
    6424            0 :   gimple_seq stmts = NULL;
    6425            0 :   bool compute_in_loop = false;
    6426            0 :   bool nested_in_vect_loop = false;
    6427            0 :   class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
    6428            0 :   class loop *loop_for_initial_load = NULL;
    6429              : 
    6430            0 :   if (loop_vinfo)
    6431              :     {
    6432            0 :       loop = LOOP_VINFO_LOOP (loop_vinfo);
    6433            0 :       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
    6434              :     }
    6435              : 
    6436            0 :   gcc_assert (alignment_support_scheme == dr_explicit_realign
    6437              :               || alignment_support_scheme == dr_explicit_realign_optimized);
    6438              : 
    6439              :   /* We need to generate three things:
    6440              :      1. the misalignment computation
    6441              :      2. the extra vector load (for the optimized realignment scheme).
    6442              :      3. the phi node for the two vectors from which the realignment is
    6443              :       done (for the optimized realignment scheme).  */
    6444              : 
    6445              :   /* 1. Determine where to generate the misalignment computation.
    6446              : 
    6447              :      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
    6448              :      calculation will be generated by this function, outside the loop (in the
    6449              :      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
    6450              :      caller, inside the loop.
    6451              : 
    6452              :      Background: If the misalignment remains fixed throughout the iterations of
    6453              :      the loop, then both realignment schemes are applicable, and also the
    6454              :      misalignment computation can be done outside LOOP.  This is because we are
    6455              :      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
    6456              :      are a multiple of VS (the Vector Size), and therefore the misalignment in
    6457              :      different vectorized LOOP iterations is always the same.
    6458              :      The problem arises only if the memory access is in an inner-loop nested
    6459              :      inside LOOP, which is now being vectorized using outer-loop vectorization.
    6460              :      This is the only case when the misalignment of the memory access may not
    6461              :      remain fixed throughout the iterations of the inner-loop (as explained in
    6462              :      detail in vect_supportable_dr_alignment).  In this case, not only is the
    6463              :      optimized realignment scheme not applicable, but also the misalignment
    6464              :      computation (and generation of the realignment token that is passed to
    6465              :      REALIGN_LOAD) have to be done inside the loop.
    6466              : 
    6467              :      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
    6468              :      or not, which in turn determines if the misalignment is computed inside
    6469              :      the inner-loop, or outside LOOP.  */
    6470              : 
    6471            0 :   if (init_addr != NULL_TREE || !loop_vinfo)
    6472              :     {
    6473            0 :       compute_in_loop = true;
    6474            0 :       gcc_assert (alignment_support_scheme == dr_explicit_realign);
    6475              :     }
    6476              : 
    6477              : 
    6478              :   /* 2. Determine where to generate the extra vector load.
    6479              : 
    6480              :      For the optimized realignment scheme, instead of generating two vector
    6481              :      loads in each iteration, we generate a single extra vector load in the
    6482              :      preheader of the loop, and in each iteration reuse the result of the
    6483              :      vector load from the previous iteration.  In case the memory access is in
    6484              :      an inner-loop nested inside LOOP, which is now being vectorized using
    6485              :      outer-loop vectorization, we need to determine whether this initial vector
    6486              :      load should be generated at the preheader of the inner-loop, or can be
    6487              :      generated at the preheader of LOOP.  If the memory access has no evolution
    6488              :      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
    6489              :      to be generated inside LOOP (in the preheader of the inner-loop).  */
    6490              : 
    6491            0 :   if (nested_in_vect_loop)
    6492              :     {
    6493            0 :       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
    6494            0 :       bool invariant_in_outerloop =
    6495            0 :             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
    6496            0 :       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
    6497              :     }
    6498              :   else
    6499              :     loop_for_initial_load = loop;
    6500            0 :   if (at_loop)
    6501            0 :     *at_loop = loop_for_initial_load;
    6502              : 
    6503            0 :   tree vuse = NULL_TREE;
    6504            0 :   if (loop_for_initial_load)
    6505              :     {
    6506            0 :       pe = loop_preheader_edge (loop_for_initial_load);
    6507            0 :       if (gphi *vphi = get_virtual_phi (loop_for_initial_load->header))
    6508            0 :         vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
    6509              :     }
    6510            0 :   if (!vuse)
    6511            0 :     vuse = gimple_vuse (gsi_stmt (*gsi));
    6512              : 
    6513              :   /* 3. For the case of the optimized realignment, create the first vector
    6514              :       load at the loop preheader.  */
    6515              : 
    6516            0 :   if (alignment_support_scheme == dr_explicit_realign_optimized)
    6517              :     {
    6518              :       /* Create msq_init = *(floor(p1)) in the loop preheader  */
    6519            0 :       gassign *new_stmt;
    6520              : 
    6521            0 :       gcc_assert (!compute_in_loop);
    6522            0 :       vec_dest = vect_create_destination_var (scalar_dest, vectype);
    6523            0 :       ptr = vect_create_data_ref_ptr (vinfo, stmt_info, vectype,
    6524              :                                       loop_for_initial_load, NULL_TREE,
    6525              :                                       &init_addr, NULL, &inc, true);
    6526            0 :       if (TREE_CODE (ptr) == SSA_NAME)
    6527            0 :         new_temp = copy_ssa_name (ptr);
    6528              :       else
    6529            0 :         new_temp = make_ssa_name (TREE_TYPE (ptr));
    6530            0 :       poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
    6531            0 :       tree type = TREE_TYPE (ptr);
    6532            0 :       new_stmt = gimple_build_assign
    6533            0 :                    (new_temp, BIT_AND_EXPR, ptr,
    6534            0 :                     fold_build2 (MINUS_EXPR, type,
    6535              :                                  build_int_cst (type, 0),
    6536              :                                  build_int_cst (type, align)));
    6537            0 :       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
    6538            0 :       gcc_assert (!new_bb);
    6539            0 :       data_ref
    6540            0 :         = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
    6541              :                   build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
    6542            0 :       vect_copy_ref_info (data_ref, DR_REF (dr));
    6543            0 :       new_stmt = gimple_build_assign (vec_dest, data_ref);
    6544            0 :       new_temp = make_ssa_name (vec_dest, new_stmt);
    6545            0 :       gimple_assign_set_lhs (new_stmt, new_temp);
    6546            0 :       gimple_set_vuse (new_stmt, vuse);
    6547            0 :       if (pe)
    6548              :         {
    6549            0 :           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
    6550            0 :           gcc_assert (!new_bb);
    6551              :         }
    6552              :       else
    6553            0 :          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
    6554              : 
    6555            0 :       msq_init = gimple_assign_lhs (new_stmt);
    6556              :     }
    6557              : 
    6558              :   /* 4. Create realignment token using a target builtin, if available.
    6559              :       It is done either inside the containing loop, or before LOOP (as
    6560              :       determined above).  */
    6561              : 
    6562            0 :   if (targetm.vectorize.builtin_mask_for_load)
    6563              :     {
    6564            0 :       gcall *new_stmt;
    6565            0 :       tree builtin_decl;
    6566              : 
    6567              :       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
    6568            0 :       if (!init_addr)
    6569              :         {
    6570              :           /* Generate the INIT_ADDR computation outside LOOP.  */
    6571            0 :           init_addr = vect_create_addr_base_for_vector_ref (vinfo,
    6572              :                                                             stmt_info, &stmts,
    6573              :                                                             NULL_TREE);
    6574            0 :           if (loop)
    6575              :             {
    6576            0 :               pe = loop_preheader_edge (loop);
    6577            0 :               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
    6578            0 :               gcc_assert (!new_bb);
    6579              :             }
    6580              :           else
    6581            0 :              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
    6582              :         }
    6583              : 
    6584            0 :       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
    6585            0 :       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
    6586            0 :       vec_dest =
    6587            0 :         vect_create_destination_var (scalar_dest,
    6588              :                                      gimple_call_return_type (new_stmt));
    6589            0 :       new_temp = make_ssa_name (vec_dest, new_stmt);
    6590            0 :       gimple_call_set_lhs (new_stmt, new_temp);
    6591              : 
    6592            0 :       if (compute_in_loop)
    6593            0 :         gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
    6594              :       else
    6595              :         {
    6596              :           /* Generate the misalignment computation outside LOOP.  */
    6597            0 :           pe = loop_preheader_edge (loop);
    6598            0 :           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
    6599            0 :           gcc_assert (!new_bb);
    6600              :         }
    6601              : 
    6602            0 :       *realignment_token = gimple_call_lhs (new_stmt);
    6603              : 
    6604              :       /* The result of the CALL_EXPR to this builtin is determined from
    6605              :          the value of the parameter and no global variables are touched
    6606              :          which makes the builtin a "const" function.  Requiring the
    6607              :          builtin to have the "const" attribute makes it unnecessary
    6608              :          to call mark_call_clobbered.  */
    6609            0 :       gcc_assert (TREE_READONLY (builtin_decl));
    6610              :     }
    6611              : 
    6612            0 :   if (alignment_support_scheme == dr_explicit_realign)
    6613              :     return msq;
    6614              : 
    6615            0 :   gcc_assert (!compute_in_loop);
    6616            0 :   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
    6617              : 
    6618              : 
    6619              :   /* 5. Create msq = phi <msq_init, lsq> in loop  */
    6620              : 
    6621            0 :   pe = loop_preheader_edge (containing_loop);
    6622            0 :   vec_dest = vect_create_destination_var (scalar_dest, vectype);
    6623            0 :   msq = make_ssa_name (vec_dest);
    6624            0 :   phi_stmt = create_phi_node (msq, containing_loop->header);
    6625            0 :   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
    6626              : 
    6627            0 :   return msq;
    6628              : }
    6629              : 
    6630              : 
    6631              : /* Function vect_grouped_load_supported.
    6632              : 
    6633              :    COUNT is the size of the load group (the number of statements plus the
    6634              :    number of gaps).  SINGLE_ELEMENT_P is true if there is actually
    6635              :    only one statement, with a gap of COUNT - 1.
    6636              : 
    6637              :    Returns true if a suitable permute exists.  */
    6638              : 
    6639              : bool
    6640         1957 : vect_grouped_load_supported (tree vectype, bool single_element_p,
    6641              :                              unsigned HOST_WIDE_INT count)
    6642              : {
    6643         1957 :   machine_mode mode = TYPE_MODE (vectype);
    6644              : 
    6645              :   /* If this is single-element interleaving with an element distance
    6646              :      that leaves unused vector loads around punt - we at least create
    6647              :      very sub-optimal code in that case (and blow up memory,
    6648              :      see PR65518).  */
    6649         1957 :   if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
    6650              :     {
    6651           42 :       if (dump_enabled_p ())
    6652            3 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6653              :                          "single-element interleaving not supported "
    6654              :                          "for not adjacent vector loads\n");
    6655           42 :       return false;
    6656              :     }
    6657              : 
    6658              :   /* vect_permute_load_chain requires the group size to be equal to 3 or
    6659              :      be a power of two.  */
    6660         1915 :   if (count != 3 && exact_log2 (count) == -1)
    6661              :     {
    6662          226 :       if (dump_enabled_p ())
    6663           14 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6664              :                          "the size of the group of accesses"
    6665              :                          " is not a power of 2 or not equal to 3\n");
    6666          226 :       return false;
    6667              :     }
    6668              : 
    6669              :   /* Check that the permutation is supported.  */
    6670         1689 :   if (VECTOR_MODE_P (mode))
    6671              :     {
    6672         1689 :       unsigned int i, j;
    6673         1689 :       if (count == 3)
    6674              :         {
    6675          843 :           unsigned int nelt;
    6676         1686 :           if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
    6677              :             {
    6678              :               if (dump_enabled_p ())
    6679              :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6680              :                                  "cannot handle groups of 3 loads for"
    6681              :                                  " variable-length vectors\n");
    6682              :               return false;
    6683              :             }
    6684              : 
    6685          843 :           vec_perm_builder sel (nelt, nelt, 1);
    6686          843 :           sel.quick_grow (nelt);
    6687          843 :           vec_perm_indices indices;
    6688          843 :           unsigned int k;
    6689         3336 :           for (k = 0; k < 3; k++)
    6690              :             {
    6691         8921 :               for (i = 0; i < nelt; i++)
    6692         6416 :                 if (3 * i + k < 2 * nelt)
    6693         4283 :                   sel[i] = 3 * i + k;
    6694              :                 else
    6695         2133 :                   sel[i] = 0;
    6696         2505 :               indices.new_vector (sel, 2, nelt);
    6697         2505 :               if (!can_vec_perm_const_p (mode, mode, indices))
    6698              :                 {
    6699           12 :                   if (dump_enabled_p ())
    6700            4 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6701              :                                      "shuffle of 3 loads is not supported by"
    6702              :                                      " target\n");
    6703           12 :                   return false;
    6704              :                 }
    6705         8757 :               for (i = 0, j = 0; i < nelt; i++)
    6706         6264 :                 if (3 * i + k < 2 * nelt)
    6707         4176 :                   sel[i] = i;
    6708              :                 else
    6709         2088 :                   sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
    6710         2493 :               indices.new_vector (sel, 2, nelt);
    6711         2493 :               if (!can_vec_perm_const_p (mode, mode, indices))
    6712              :                 {
    6713            0 :                   if (dump_enabled_p ())
    6714            0 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6715              :                                      "shuffle of 3 loads is not supported by"
    6716              :                                      " target\n");
    6717            0 :                   return false;
    6718              :                 }
    6719              :             }
    6720              :           return true;
    6721          843 :         }
    6722              :       else
    6723              :         {
    6724              :           /* If length is not equal to 3 then only power of 2 is supported.  */
    6725          846 :           gcc_assert (pow2p_hwi (count));
    6726         1692 :           poly_uint64 nelt = GET_MODE_NUNITS (mode);
    6727              : 
    6728              :           /* The encoding has a single stepped pattern.  */
    6729          846 :           vec_perm_builder sel (nelt, 1, 3);
    6730          846 :           sel.quick_grow (3);
    6731         4230 :           for (i = 0; i < 3; i++)
    6732         2538 :             sel[i] = i * 2;
    6733          846 :           vec_perm_indices indices (sel, 2, nelt);
    6734          846 :           if (can_vec_perm_const_p (mode, mode, indices))
    6735              :             {
    6736         3372 :               for (i = 0; i < 3; i++)
    6737         2529 :                 sel[i] = i * 2 + 1;
    6738          843 :               indices.new_vector (sel, 2, nelt);
    6739          843 :               if (can_vec_perm_const_p (mode, mode, indices))
    6740          843 :                 return true;
    6741              :             }
    6742          846 :         }
    6743              :     }
    6744              : 
    6745            3 :   if (dump_enabled_p ())
    6746            2 :     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6747              :                      "extract even/odd not supported by target\n");
    6748              :   return false;
    6749              : }
    6750              : 
    6751              : /* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT vectors
    6752              :    of type VECTYPE.  MASKED_P says whether the masked form is needed.
    6753              :    If it is available and ELSVALS is nonzero store the possible else values
    6754              :    in the vector it points to.  */
    6755              : 
    6756              : internal_fn
    6757       144547 : vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
    6758              :                            bool masked_p, vec<int> *elsvals)
    6759              : {
    6760       144547 :   if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
    6761              :                                     vec_mask_len_load_lanes_optab, vectype,
    6762              :                                     count, elsvals))
    6763              :     return IFN_MASK_LEN_LOAD_LANES;
    6764       144547 :   else if (masked_p)
    6765              :     {
    6766           30 :       if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
    6767              :                                         vec_mask_load_lanes_optab, vectype,
    6768              :                                         count, elsvals))
    6769              :         return IFN_MASK_LOAD_LANES;
    6770              :     }
    6771              :   else
    6772              :     {
    6773       144517 :       if (vect_lanes_optab_supported_p ("vec_load_lanes", vec_load_lanes_optab,
    6774              :                                         vectype, count, elsvals))
    6775              :         return IFN_LOAD_LANES;
    6776              :     }
    6777              :   return IFN_LAST;
    6778              : }
    6779              : 
    6780              : /* Function vect_force_dr_alignment_p.
    6781              : 
    6782              :    Returns whether the alignment of a DECL can be forced to be aligned
    6783              :    on ALIGNMENT bit boundary.  */
    6784              : 
    6785              : bool
    6786       709306 : vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
    6787              : {
    6788       709306 :   if (!VAR_P (decl))
    6789              :     return false;
    6790              : 
    6791       210516 :   if (decl_in_symtab_p (decl)
    6792       210516 :       && (!symtab_node::get (decl)
    6793        22174 :           || !symtab_node::get (decl)->can_increase_alignment_p ()))
    6794        13470 :     return false;
    6795              : 
    6796       197046 :   if (TREE_STATIC (decl))
    6797         8704 :     return (known_le (alignment,
    6798         8704 :                       (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
    6799              :   else
    6800       188342 :     return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
    6801              : }
    6802              : 
    6803              : /* Return whether the data reference DR_INFO is supported with respect to its
    6804              :    alignment.
    6805              :    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
    6806              :    it is aligned, i.e., check if it is possible to vectorize it with different
    6807              :    alignment.  If IS_GATHER_SCATTER is true we are dealing with a
    6808              :    gather/scatter.  */
    6809              : 
    6810              : enum dr_alignment_support
    6811      2843093 : vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
    6812              :                                tree vectype, int misalignment,
    6813              :                                bool is_gather_scatter)
    6814              : {
    6815      2843093 :   data_reference *dr = dr_info->dr;
    6816      2843093 :   stmt_vec_info stmt_info = dr_info->stmt;
    6817      2843093 :   machine_mode mode = TYPE_MODE (vectype);
    6818      2843093 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6819      2843093 :   class loop *vect_loop = NULL;
    6820      2843093 :   bool nested_in_vect_loop = false;
    6821              : 
    6822      2843093 :   if (misalignment == 0)
    6823              :     return dr_aligned;
    6824      1748210 :   else if (dr_safe_speculative_read_required (stmt_info))
    6825              :     return dr_unaligned_unsupported;
    6826              : 
    6827      1359207 :   if (loop_vinfo)
    6828              :     {
    6829       954539 :       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
    6830       954539 :       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt_info);
    6831              :     }
    6832              : 
    6833              :   /* Possibly unaligned access.  */
    6834              : 
    6835              :   /* We can choose between using the implicit realignment scheme (generating
    6836              :      a misaligned_move stmt) and the explicit realignment scheme (generating
    6837              :      aligned loads with a REALIGN_LOAD).  There are two variants to the
    6838              :      explicit realignment scheme: optimized, and unoptimized.
    6839              :      We can optimize the realignment only if the step between consecutive
    6840              :      vector loads is equal to the vector size.  Since the vector memory
    6841              :      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
    6842              :      is guaranteed that the misalignment amount remains the same throughout the
    6843              :      execution of the vectorized loop.  Therefore, we can create the
    6844              :      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
    6845              :      at the loop preheader.
    6846              : 
    6847              :      However, in the case of outer-loop vectorization, when vectorizing a
    6848              :      memory access in the inner-loop nested within the LOOP that is now being
    6849              :      vectorized, while it is guaranteed that the misalignment of the
    6850              :      vectorized memory access will remain the same in different outer-loop
    6851              :      iterations, it is *not* guaranteed that is will remain the same throughout
    6852              :      the execution of the inner-loop.  This is because the inner-loop advances
    6853              :      with the original scalar step (and not in steps of VS).  If the inner-loop
    6854              :      step happens to be a multiple of VS, then the misalignment remains fixed
    6855              :      and we can use the optimized realignment scheme.  For example:
    6856              : 
    6857              :       for (i=0; i<N; i++)
    6858              :         for (j=0; j<M; j++)
    6859              :           s += a[i+j];
    6860              : 
    6861              :      When vectorizing the i-loop in the above example, the step between
    6862              :      consecutive vector loads is 1, and so the misalignment does not remain
    6863              :      fixed across the execution of the inner-loop, and the realignment cannot
    6864              :      be optimized (as illustrated in the following pseudo vectorized loop):
    6865              : 
    6866              :       for (i=0; i<N; i+=4)
    6867              :         for (j=0; j<M; j++){
    6868              :           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
    6869              :                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
    6870              :                          // (assuming that we start from an aligned address).
    6871              :           }
    6872              : 
    6873              :      We therefore have to use the unoptimized realignment scheme:
    6874              : 
    6875              :       for (i=0; i<N; i+=4)
    6876              :           for (j=k; j<M; j+=4)
    6877              :           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
    6878              :                            // that the misalignment of the initial address is
    6879              :                            // 0).
    6880              : 
    6881              :      The loop can then be vectorized as follows:
    6882              : 
    6883              :       for (k=0; k<4; k++){
    6884              :         rt = get_realignment_token (&vp[k]);
    6885              :         for (i=0; i<N; i+=4){
    6886              :           v1 = vp[i+k];
    6887              :           for (j=k; j<M; j+=4){
    6888              :             v2 = vp[i+j+VS-1];
    6889              :             va = REALIGN_LOAD <v1,v2,rt>;
    6890              :             vs += va;
    6891              :             v1 = v2;
    6892              :           }
    6893              :         }
    6894              :     } */
    6895              : 
    6896      1359207 :   if (DR_IS_READ (dr) && !is_gather_scatter)
    6897              :     {
    6898       609601 :       if (can_implement_p (vec_realign_load_optab, mode)
    6899       609601 :           && (!targetm.vectorize.builtin_mask_for_load
    6900            0 :               || targetm.vectorize.builtin_mask_for_load ()))
    6901              :         {
    6902              :           /* If we are doing SLP then the accesses need not have the
    6903              :              same alignment, instead it depends on the SLP group size.  */
    6904            0 :           if (loop_vinfo
    6905            0 :               && STMT_VINFO_GROUPED_ACCESS (stmt_info)
    6906            0 :               && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
    6907            0 :                               * (DR_GROUP_SIZE
    6908            0 :                                    (DR_GROUP_FIRST_ELEMENT (stmt_info))),
    6909            0 :                               TYPE_VECTOR_SUBPARTS (vectype)))
    6910              :             ;
    6911            0 :           else if (!loop_vinfo
    6912            0 :                    || (nested_in_vect_loop
    6913            0 :                        && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
    6914            0 :                                     GET_MODE_SIZE (TYPE_MODE (vectype)))))
    6915            0 :             return dr_explicit_realign;
    6916              :           else
    6917            0 :             return dr_explicit_realign_optimized;
    6918              :         }
    6919              :     }
    6920              : 
    6921      1359207 :   bool is_packed = not_size_aligned (DR_REF (dr));
    6922      1359207 :   if (misalignment == DR_MISALIGNMENT_UNKNOWN
    6923      1359207 :       && is_gather_scatter)
    6924         3174 :     misalignment = (get_object_alignment (DR_REF (dr))
    6925         3174 :                     % (GET_MODE_BITSIZE (GET_MODE_INNER (mode))))
    6926         3174 :       / BITS_PER_UNIT;
    6927      1359207 :   if (targetm.vectorize.support_vector_misalignment (mode, misalignment,
    6928              :                                                      is_packed,
    6929              :                                                      is_gather_scatter))
    6930              :     return dr_unaligned_supported;
    6931              : 
    6932              :   /* Unsupported.  */
    6933              :   return dr_unaligned_unsupported;
    6934              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.