LCOV - code coverage report
Current view: top level - gcc - tree-vect-data-refs.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 81.9 % 3016 2471
Test Date: 2025-03-08 13:07:09 Functions: 90.1 % 81 73
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: - 0 0

             Branch data     Line data    Source code
       1                 :             : /* Data References Analysis and Manipulation Utilities for Vectorization.
       2                 :             :    Copyright (C) 2003-2025 Free Software Foundation, Inc.
       3                 :             :    Contributed by Dorit Naishlos <dorit@il.ibm.com>
       4                 :             :    and Ira Rosen <irar@il.ibm.com>
       5                 :             : 
       6                 :             : This file is part of GCC.
       7                 :             : 
       8                 :             : GCC is free software; you can redistribute it and/or modify it under
       9                 :             : the terms of the GNU General Public License as published by the Free
      10                 :             : Software Foundation; either version 3, or (at your option) any later
      11                 :             : version.
      12                 :             : 
      13                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      14                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      15                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      16                 :             : for more details.
      17                 :             : 
      18                 :             : You should have received a copy of the GNU General Public License
      19                 :             : along with GCC; see the file COPYING3.  If not see
      20                 :             : <http://www.gnu.org/licenses/>.  */
      21                 :             : 
      22                 :             : #define INCLUDE_ALGORITHM
      23                 :             : #include "config.h"
      24                 :             : #include "system.h"
      25                 :             : #include "coretypes.h"
      26                 :             : #include "backend.h"
      27                 :             : #include "target.h"
      28                 :             : #include "rtl.h"
      29                 :             : #include "tree.h"
      30                 :             : #include "gimple.h"
      31                 :             : #include "predict.h"
      32                 :             : #include "memmodel.h"
      33                 :             : #include "tm_p.h"
      34                 :             : #include "ssa.h"
      35                 :             : #include "optabs-tree.h"
      36                 :             : #include "cgraph.h"
      37                 :             : #include "dumpfile.h"
      38                 :             : #include "pretty-print.h"
      39                 :             : #include "alias.h"
      40                 :             : #include "fold-const.h"
      41                 :             : #include "stor-layout.h"
      42                 :             : #include "tree-eh.h"
      43                 :             : #include "gimplify.h"
      44                 :             : #include "gimple-iterator.h"
      45                 :             : #include "gimplify-me.h"
      46                 :             : #include "tree-ssa-loop-ivopts.h"
      47                 :             : #include "tree-ssa-loop-manip.h"
      48                 :             : #include "tree-ssa-loop.h"
      49                 :             : #include "cfgloop.h"
      50                 :             : #include "tree-scalar-evolution.h"
      51                 :             : #include "tree-vectorizer.h"
      52                 :             : #include "expr.h"
      53                 :             : #include "builtins.h"
      54                 :             : #include "tree-cfg.h"
      55                 :             : #include "tree-hash-traits.h"
      56                 :             : #include "vec-perm-indices.h"
      57                 :             : #include "internal-fn.h"
      58                 :             : #include "gimple-fold.h"
      59                 :             : #include "optabs-query.h"
      60                 :             : 
      61                 :             : /* Return true if load- or store-lanes optab OPTAB is implemented for
      62                 :             :    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.
      63                 :             : 
      64                 :             :    If it is implemented and ELSVALS is nonzero store the possible else
      65                 :             :    values in the vector it points to.  */
      66                 :             : 
      67                 :             : static bool
      68                 :      283324 : vect_lanes_optab_supported_p (const char *name, convert_optab optab,
      69                 :             :                               tree vectype, unsigned HOST_WIDE_INT count,
      70                 :             :                               vec<int> *elsvals = nullptr)
      71                 :             : {
      72                 :      283324 :   machine_mode mode, array_mode;
      73                 :      283324 :   bool limit_p;
      74                 :             : 
      75                 :      283324 :   mode = TYPE_MODE (vectype);
      76                 :      283324 :   if (!targetm.array_mode (mode, count).exists (&array_mode))
      77                 :             :     {
      78                 :      566648 :       poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
      79                 :      283324 :       limit_p = !targetm.array_mode_supported_p (mode, count);
      80                 :      283324 :       if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
      81                 :             :         {
      82                 :      262676 :           if (dump_enabled_p ())
      83                 :       14162 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
      84                 :             :                              "no array mode for %s[%wu]\n",
      85                 :       14162 :                              GET_MODE_NAME (mode), count);
      86                 :      262676 :           return false;
      87                 :             :         }
      88                 :             :     }
      89                 :             : 
      90                 :       20648 :   enum insn_code icode;
      91                 :       20648 :   if ((icode = convert_optab_handler (optab, array_mode, mode))
      92                 :             :       == CODE_FOR_nothing)
      93                 :             :     {
      94                 :       20648 :       if (dump_enabled_p ())
      95                 :        4182 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
      96                 :             :                          "cannot use %s<%s><%s>\n", name,
      97                 :        4182 :                          GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
      98                 :       20648 :       return false;
      99                 :             :     }
     100                 :             : 
     101                 :           0 :   if (dump_enabled_p ())
     102                 :           0 :     dump_printf_loc (MSG_NOTE, vect_location,
     103                 :           0 :                      "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
     104                 :           0 :                      GET_MODE_NAME (mode));
     105                 :             : 
     106                 :           0 :   if (elsvals)
     107                 :           0 :     get_supported_else_vals (icode,
     108                 :           0 :                              internal_fn_else_index (IFN_MASK_LEN_LOAD_LANES),
     109                 :             :                              *elsvals);
     110                 :             : 
     111                 :             :   return true;
     112                 :             : }
     113                 :             : 
     114                 :             : /* Helper function to identify a simd clone call.  If this is a call to a
     115                 :             :    function with simd clones then return the corresponding cgraph_node,
     116                 :             :    otherwise return NULL.  */
     117                 :             : 
     118                 :             : static cgraph_node*
     119                 :      754027 : simd_clone_call_p (gimple *stmt)
     120                 :             : {
     121                 :      833433 :   gcall *call = dyn_cast <gcall *> (stmt);
     122                 :       81743 :   if (!call)
     123                 :             :     return NULL;
     124                 :             : 
     125                 :       81743 :   tree fndecl = NULL_TREE;
     126                 :       81743 :   if (gimple_call_internal_p (call, IFN_MASK_CALL))
     127                 :         349 :     fndecl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0);
     128                 :             :   else
     129                 :       81394 :     fndecl = gimple_call_fndecl (stmt);
     130                 :             : 
     131                 :       81743 :   if (fndecl == NULL_TREE)
     132                 :             :     return NULL;
     133                 :             : 
     134                 :       35492 :   cgraph_node *node = cgraph_node::get (fndecl);
     135                 :       35492 :   if (node && node->simd_clones != NULL)
     136                 :             :     return node;
     137                 :             : 
     138                 :             :   return NULL;
     139                 :             : }
     140                 :             : 
     141                 :             : 
     142                 :             : 
     143                 :             : /* Return the smallest scalar part of STMT_INFO.
     144                 :             :    This is used to determine the vectype of the stmt.  We generally set the
     145                 :             :    vectype according to the type of the result (lhs).  For stmts whose
     146                 :             :    result-type is different than the type of the arguments (e.g., demotion,
     147                 :             :    promotion), vectype will be reset appropriately (later).  Note that we have
     148                 :             :    to visit the smallest datatype in this function, because that determines the
     149                 :             :    VF.  If the smallest datatype in the loop is present only as the rhs of a
     150                 :             :    promotion operation - we'd miss it.
     151                 :             :    Such a case, where a variable of this datatype does not appear in the lhs
     152                 :             :    anywhere in the loop, can only occur if it's an invariant: e.g.:
     153                 :             :    'int_x = (int) short_inv', which we'd expect to have been optimized away by
     154                 :             :    invariant motion.  However, we cannot rely on invariant motion to always
     155                 :             :    take invariants out of the loop, and so in the case of promotion we also
     156                 :             :    have to check the rhs.
     157                 :             :    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
     158                 :             :    types.  */
     159                 :             : 
     160                 :             : tree
     161                 :     6071439 : vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type)
     162                 :             : {
     163                 :     6071439 :   HOST_WIDE_INT lhs, rhs;
     164                 :             : 
     165                 :             :   /* During the analysis phase, this function is called on arbitrary
     166                 :             :      statements that might not have scalar results.  */
     167                 :     6071439 :   if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
     168                 :             :     return scalar_type;
     169                 :             : 
     170                 :     6071439 :   lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
     171                 :             : 
     172                 :     6071439 :   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
     173                 :     6071439 :   if (assign)
     174                 :             :     {
     175                 :     5317412 :       scalar_type = TREE_TYPE (gimple_assign_lhs (assign));
     176                 :     5317412 :       if (gimple_assign_cast_p (assign)
     177                 :     4878813 :           || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
     178                 :     4878043 :           || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
     179                 :     4878043 :           || gimple_assign_rhs_code (assign) == SAD_EXPR
     180                 :     4877701 :           || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
     181                 :     4872794 :           || gimple_assign_rhs_code (assign) == WIDEN_MULT_PLUS_EXPR
     182                 :     4872794 :           || gimple_assign_rhs_code (assign) == WIDEN_MULT_MINUS_EXPR
     183                 :     4872794 :           || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
     184                 :    10190206 :           || gimple_assign_rhs_code (assign) == FLOAT_EXPR)
     185                 :             :         {
     186                 :      465265 :           tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
     187                 :             : 
     188                 :      465265 :           rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
     189                 :      465265 :           if (rhs < lhs)
     190                 :     6071439 :             scalar_type = rhs_type;
     191                 :             :         }
     192                 :             :     }
     193                 :      754027 :   else if (cgraph_node *node = simd_clone_call_p (stmt_info->stmt))
     194                 :             :     {
     195                 :        2337 :       auto clone = node->simd_clones->simdclone;
     196                 :        7528 :       for (unsigned int i = 0; i < clone->nargs; ++i)
     197                 :             :         {
     198                 :        5191 :           if (clone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
     199                 :             :             {
     200                 :        3028 :               tree arg_scalar_type = TREE_TYPE (clone->args[i].vector_type);
     201                 :        3028 :               rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (arg_scalar_type));
     202                 :        3028 :               if (rhs < lhs)
     203                 :             :                 {
     204                 :        5191 :                   scalar_type = arg_scalar_type;
     205                 :        5191 :                   lhs = rhs;
     206                 :             :                 }
     207                 :             :             }
     208                 :             :         }
     209                 :             :     }
     210                 :      751690 :   else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
     211                 :             :     {
     212                 :       79406 :       unsigned int i = 0;
     213                 :       79406 :       if (gimple_call_internal_p (call))
     214                 :             :         {
     215                 :       43004 :           internal_fn ifn = gimple_call_internal_fn (call);
     216                 :       43004 :           if (internal_load_fn_p (ifn))
     217                 :             :             /* For loads the LHS type does the trick.  */
     218                 :             :             i = ~0U;
     219                 :       36877 :           else if (internal_store_fn_p (ifn))
     220                 :             :             {
     221                 :             :               /* For stores use the tyep of the stored value.  */
     222                 :        3283 :               i = internal_fn_stored_value_index (ifn);
     223                 :        3283 :               scalar_type = TREE_TYPE (gimple_call_arg (call, i));
     224                 :        3283 :               i = ~0U;
     225                 :             :             }
     226                 :       33594 :           else if (internal_fn_mask_index (ifn) == 0)
     227                 :        8820 :             i = 1;
     228                 :             :         }
     229                 :       79406 :       if (i < gimple_call_num_args (call))
     230                 :             :         {
     231                 :       65481 :           tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
     232                 :       65481 :           if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
     233                 :             :             {
     234                 :       65481 :               rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
     235                 :       65481 :               if (rhs < lhs)
     236                 :     6071439 :                 scalar_type = rhs_type;
     237                 :             :             }
     238                 :             :         }
     239                 :             :     }
     240                 :             : 
     241                 :             :   return scalar_type;
     242                 :             : }
     243                 :             : 
     244                 :             : 
     245                 :             : /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
     246                 :             :    tested at run-time.  Return TRUE if DDR was successfully inserted.
     247                 :             :    Return false if versioning is not supported.  */
     248                 :             : 
     249                 :             : static opt_result
     250                 :       72680 : vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
     251                 :             : {
     252                 :       72680 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     253                 :             : 
     254                 :       72680 :   if ((unsigned) param_vect_max_version_for_alias_checks == 0)
     255                 :          54 :     return opt_result::failure_at (vect_location,
     256                 :             :                                    "will not create alias checks, as"
     257                 :             :                                    " --param vect-max-version-for-alias-checks"
     258                 :             :                                    " == 0\n");
     259                 :             : 
     260                 :       72626 :   opt_result res
     261                 :       72626 :     = runtime_alias_check_p (ddr, loop,
     262                 :       72626 :                              optimize_loop_nest_for_speed_p (loop));
     263                 :       72626 :   if (!res)
     264                 :         128 :     return res;
     265                 :             : 
     266                 :       72498 :   LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
     267                 :       72498 :   return opt_result::success ();
     268                 :             : }
     269                 :             : 
     270                 :             : /* Record that loop LOOP_VINFO needs to check that VALUE is nonzero.  */
     271                 :             : 
     272                 :             : static void
     273                 :        1201 : vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
     274                 :             : {
     275                 :        1201 :   const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
     276                 :        1852 :   for (unsigned int i = 0; i < checks.length(); ++i)
     277                 :         657 :     if (checks[i] == value)
     278                 :             :       return;
     279                 :             : 
     280                 :        1195 :   if (dump_enabled_p ())
     281                 :         425 :     dump_printf_loc (MSG_NOTE, vect_location,
     282                 :             :                      "need run-time check that %T is nonzero\n",
     283                 :             :                      value);
     284                 :        1195 :   LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
     285                 :             : }
     286                 :             : 
     287                 :             : /* Return true if we know that the order of vectorized DR_INFO_A and
     288                 :             :    vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
     289                 :             :    DR_INFO_B.  At least one of the accesses is a write.  */
     290                 :             : 
     291                 :             : static bool
     292                 :      101750 : vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
     293                 :             : {
     294                 :      101750 :   stmt_vec_info stmtinfo_a = dr_info_a->stmt;
     295                 :      101750 :   stmt_vec_info stmtinfo_b = dr_info_b->stmt;
     296                 :             : 
     297                 :             :   /* Single statements are always kept in their original order.  */
     298                 :      101750 :   if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
     299                 :      163445 :       && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
     300                 :             :     return true;
     301                 :             : 
     302                 :             :   /* If there is a loop invariant read involved we might vectorize it in
     303                 :             :      the prologue, breaking scalar oder with respect to the in-loop store.  */
     304                 :       20641 :   if ((DR_IS_READ (dr_info_a->dr) && integer_zerop (DR_STEP (dr_info_a->dr)))
     305                 :       62309 :       || (DR_IS_READ (dr_info_b->dr) && integer_zerop (DR_STEP (dr_info_b->dr))))
     306                 :        1305 :     return false;
     307                 :             : 
     308                 :             :   /* STMT_A and STMT_B belong to overlapping groups.  All loads are
     309                 :             :      emitted at the position of the first scalar load.
     310                 :             :      Stores in a group are emitted at the position of the last scalar store.
     311                 :             :      Compute that position and check whether the resulting order matches
     312                 :             :      the current one.  */
     313                 :       41356 :   stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
     314                 :       41356 :   if (il_a)
     315                 :             :     {
     316                 :       39743 :       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
     317                 :      158446 :         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
     318                 :      137785 :              s = DR_GROUP_NEXT_ELEMENT (s))
     319                 :      137785 :           il_a = get_later_stmt (il_a, s);
     320                 :             :       else /* DR_IS_READ */
     321                 :       78770 :         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
     322                 :       59688 :              s = DR_GROUP_NEXT_ELEMENT (s))
     323                 :       59688 :           if (get_later_stmt (il_a, s) == il_a)
     324                 :        1850 :             il_a = s;
     325                 :             :     }
     326                 :             :   else
     327                 :             :     il_a = stmtinfo_a;
     328                 :       41356 :   stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
     329                 :       41356 :   if (il_b)
     330                 :             :     {
     331                 :       37477 :       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
     332                 :      201027 :         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
     333                 :      171953 :              s = DR_GROUP_NEXT_ELEMENT (s))
     334                 :      171953 :           il_b = get_later_stmt (il_b, s);
     335                 :             :       else /* DR_IS_READ */
     336                 :       39019 :         for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
     337                 :       30616 :              s = DR_GROUP_NEXT_ELEMENT (s))
     338                 :       30616 :           if (get_later_stmt (il_b, s) == il_b)
     339                 :         207 :             il_b = s;
     340                 :             :     }
     341                 :             :   else
     342                 :             :     il_b = stmtinfo_b;
     343                 :       41356 :   bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
     344                 :       41356 :   return (get_later_stmt (il_a, il_b) == il_a) == a_after_b;
     345                 :             : }
     346                 :             : 
     347                 :             : /* A subroutine of vect_analyze_data_ref_dependence.  Handle
     348                 :             :    DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
     349                 :             :    distances.  These distances are conservatively correct but they don't
     350                 :             :    reflect a guaranteed dependence.
     351                 :             : 
     352                 :             :    Return true if this function does all the work necessary to avoid
     353                 :             :    an alias or false if the caller should use the dependence distances
     354                 :             :    to limit the vectorization factor in the usual way.  LOOP_DEPTH is
     355                 :             :    the depth of the loop described by LOOP_VINFO and the other arguments
     356                 :             :    are as for vect_analyze_data_ref_dependence.  */
     357                 :             : 
     358                 :             : static bool
     359                 :        7148 : vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
     360                 :             :                                        loop_vec_info loop_vinfo,
     361                 :             :                                        int loop_depth, unsigned int *max_vf)
     362                 :             : {
     363                 :        7148 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     364                 :       28610 :   for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr))
     365                 :             :     {
     366                 :       14146 :       int dist = dist_v[loop_depth];
     367                 :       14146 :       if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
     368                 :             :         {
     369                 :             :           /* If the user asserted safelen >= DIST consecutive iterations
     370                 :             :              can be executed concurrently, assume independence.
     371                 :             : 
     372                 :             :              ??? An alternative would be to add the alias check even
     373                 :             :              in this case, and vectorize the fallback loop with the
     374                 :             :              maximum VF set to safelen.  However, if the user has
     375                 :             :              explicitly given a length, it's less likely that that
     376                 :             :              would be a win.  */
     377                 :        7012 :           if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
     378                 :             :             {
     379                 :          32 :               if ((unsigned int) loop->safelen < *max_vf)
     380                 :           2 :                 *max_vf = loop->safelen;
     381                 :          32 :               LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
     382                 :          32 :               continue;
     383                 :             :             }
     384                 :             : 
     385                 :             :           /* For dependence distances of 2 or more, we have the option
     386                 :             :              of limiting VF or checking for an alias at runtime.
     387                 :             :              Prefer to check at runtime if we can, to avoid limiting
     388                 :             :              the VF unnecessarily when the bases are in fact independent.
     389                 :             : 
     390                 :             :              Note that the alias checks will be removed if the VF ends up
     391                 :             :              being small enough.  */
     392                 :        6980 :           dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
     393                 :        6980 :           dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
     394                 :        6980 :           return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
     395                 :        6980 :                   && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
     396                 :       13968 :                   && vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
     397                 :             :         }
     398                 :             :     }
     399                 :             :   return true;
     400                 :             : }
     401                 :             : 
     402                 :             : 
     403                 :             : /* Function vect_analyze_data_ref_dependence.
     404                 :             : 
     405                 :             :    FIXME: I needed to change the sense of the returned flag.
     406                 :             : 
     407                 :             :    Return FALSE if there (might) exist a dependence between a memory-reference
     408                 :             :    DRA and a memory-reference DRB.  When versioning for alias may check a
     409                 :             :    dependence at run-time, return TRUE.  Adjust *MAX_VF according to
     410                 :             :    the data dependence.  */
     411                 :             : 
     412                 :             : static opt_result
     413                 :     1774608 : vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
     414                 :             :                                   loop_vec_info loop_vinfo,
     415                 :             :                                   unsigned int *max_vf)
     416                 :             : {
     417                 :     1774608 :   unsigned int i;
     418                 :     1774608 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     419                 :     1774608 :   struct data_reference *dra = DDR_A (ddr);
     420                 :     1774608 :   struct data_reference *drb = DDR_B (ddr);
     421                 :     1774608 :   dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
     422                 :     1774608 :   dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
     423                 :     1774608 :   stmt_vec_info stmtinfo_a = dr_info_a->stmt;
     424                 :     1774608 :   stmt_vec_info stmtinfo_b = dr_info_b->stmt;
     425                 :     1774608 :   lambda_vector dist_v;
     426                 :     1774608 :   unsigned int loop_depth;
     427                 :             : 
     428                 :             :   /* If user asserted safelen consecutive iterations can be
     429                 :             :      executed concurrently, assume independence.  */
     430                 :     1855410 :   auto apply_safelen = [&]()
     431                 :             :     {
     432                 :       80802 :       if (loop->safelen >= 2)
     433                 :             :         {
     434                 :        7454 :           if ((unsigned int) loop->safelen < *max_vf)
     435                 :        1895 :             *max_vf = loop->safelen;
     436                 :        7454 :           LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
     437                 :        7454 :           return true;
     438                 :             :         }
     439                 :             :       return false;
     440                 :     1774608 :     };
     441                 :             : 
     442                 :             :   /* In loop analysis all data references should be vectorizable.  */
     443                 :     1774608 :   if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
     444                 :     1774608 :       || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
     445                 :           0 :     gcc_unreachable ();
     446                 :             : 
     447                 :             :   /* Independent data accesses.  */
     448                 :     1774608 :   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
     449                 :     1636019 :     return opt_result::success ();
     450                 :             : 
     451                 :      138589 :   if (dra == drb
     452                 :      138589 :       || (DR_IS_READ (dra) && DR_IS_READ (drb)))
     453                 :           0 :     return opt_result::success ();
     454                 :             : 
     455                 :             :   /* We do not have to consider dependences between accesses that belong
     456                 :             :      to the same group, unless the stride could be smaller than the
     457                 :             :      group size.  */
     458                 :      138589 :   if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
     459                 :       47907 :       && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
     460                 :       47907 :           == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
     461                 :      143372 :       && !STMT_VINFO_STRIDED_P (stmtinfo_a))
     462                 :         137 :     return opt_result::success ();
     463                 :             : 
     464                 :             :   /* Even if we have an anti-dependence then, as the vectorized loop covers at
     465                 :             :      least two scalar iterations, there is always also a true dependence.
     466                 :             :      As the vectorizer does not re-order loads and stores we can ignore
     467                 :             :      the anti-dependence if TBAA can disambiguate both DRs similar to the
     468                 :             :      case with known negative distance anti-dependences (positive
     469                 :             :      distance anti-dependences would violate TBAA constraints).  */
     470                 :       90374 :   if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
     471                 :       48078 :        || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
     472                 :      242891 :       && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
     473                 :             :                                  get_alias_set (DR_REF (drb))))
     474                 :        3990 :     return opt_result::success ();
     475                 :             : 
     476                 :      134462 :   if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
     477                 :      127227 :       || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
     478                 :             :     {
     479                 :        9038 :       if (apply_safelen ())
     480                 :        1390 :         return opt_result::success ();
     481                 :             : 
     482                 :        7648 :       return opt_result::failure_at
     483                 :        7648 :         (stmtinfo_a->stmt,
     484                 :             :          "possible alias involving gather/scatter between %T and %T\n",
     485                 :             :          DR_REF (dra), DR_REF (drb));
     486                 :             :     }
     487                 :             : 
     488                 :             :   /* Unknown data dependence.  */
     489                 :      125424 :   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
     490                 :             :     {
     491                 :       71265 :       if (apply_safelen ())
     492                 :        6064 :         return opt_result::success ();
     493                 :             : 
     494                 :       65201 :       if (dump_enabled_p ())
     495                 :        7014 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
     496                 :             :                          "versioning for alias required: "
     497                 :             :                          "can't determine dependence between %T and %T\n",
     498                 :             :                          DR_REF (dra), DR_REF (drb));
     499                 :             : 
     500                 :             :       /* Add to list of ddrs that need to be tested at run-time.  */
     501                 :       65201 :       return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
     502                 :             :     }
     503                 :             : 
     504                 :             :   /* Known data dependence.  */
     505                 :       54159 :   if (DDR_NUM_DIST_VECTS (ddr) == 0)
     506                 :             :     {
     507                 :         499 :       if (apply_safelen ())
     508                 :           0 :         return opt_result::success ();
     509                 :             : 
     510                 :         499 :       if (dump_enabled_p ())
     511                 :         114 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
     512                 :             :                          "versioning for alias required: "
     513                 :             :                          "bad dist vector for %T and %T\n",
     514                 :             :                          DR_REF (dra), DR_REF (drb));
     515                 :             :       /* Add to list of ddrs that need to be tested at run-time.  */
     516                 :         499 :       return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
     517                 :             :     }
     518                 :             : 
     519                 :       53660 :   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
     520                 :             : 
     521                 :       53660 :   if (DDR_COULD_BE_INDEPENDENT_P (ddr)
     522                 :       53660 :       && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
     523                 :             :                                                 loop_depth, max_vf))
     524                 :        7140 :     return opt_result::success ();
     525                 :             : 
     526                 :       87247 :   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
     527                 :             :     {
     528                 :       46540 :       int dist = dist_v[loop_depth];
     529                 :             : 
     530                 :       46540 :       if (dump_enabled_p ())
     531                 :        3670 :         dump_printf_loc (MSG_NOTE, vect_location,
     532                 :             :                          "dependence distance  = %d.\n", dist);
     533                 :             : 
     534                 :       46540 :       if (dist == 0)
     535                 :             :         {
     536                 :       37821 :           if (dump_enabled_p ())
     537                 :        3262 :             dump_printf_loc (MSG_NOTE, vect_location,
     538                 :             :                              "dependence distance == 0 between %T and %T\n",
     539                 :             :                              DR_REF (dra), DR_REF (drb));
     540                 :             : 
     541                 :             :           /* When we perform grouped accesses and perform implicit CSE
     542                 :             :              by detecting equal accesses and doing disambiguation with
     543                 :             :              runtime alias tests like for
     544                 :             :                 .. = a[i];
     545                 :             :                 .. = a[i+1];
     546                 :             :                 a[i] = ..;
     547                 :             :                 a[i+1] = ..;
     548                 :             :                 *p = ..;
     549                 :             :                 .. = a[i];
     550                 :             :                 .. = a[i+1];
     551                 :             :              where we will end up loading { a[i], a[i+1] } once, make
     552                 :             :              sure that inserting group loads before the first load and
     553                 :             :              stores after the last store will do the right thing.
     554                 :             :              Similar for groups like
     555                 :             :                 a[i] = ...;
     556                 :             :                 ... = a[i];
     557                 :             :                 a[i+1] = ...;
     558                 :             :              where loads from the group interleave with the store.  */
     559                 :       37821 :           if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
     560                 :           0 :             return opt_result::failure_at (stmtinfo_a->stmt,
     561                 :             :                                            "READ_WRITE dependence"
     562                 :             :                                            " in interleaving.\n");
     563                 :             : 
     564                 :       37821 :           if (loop->safelen < 2)
     565                 :             :             {
     566                 :       34082 :               tree indicator = dr_zero_step_indicator (dra);
     567                 :       34082 :               if (!indicator || integer_zerop (indicator))
     568                 :           0 :                 return opt_result::failure_at (stmtinfo_a->stmt,
     569                 :             :                                                "access also has a zero step\n");
     570                 :       34082 :               else if (TREE_CODE (indicator) != INTEGER_CST)
     571                 :        1201 :                 vect_check_nonzero_value (loop_vinfo, indicator);
     572                 :             :             }
     573                 :       37821 :           continue;
     574                 :       37821 :         }
     575                 :             : 
     576                 :        8719 :       if (dist > 0 && DDR_REVERSED_P (ddr))
     577                 :             :         {
     578                 :             :           /* If DDR_REVERSED_P the order of the data-refs in DDR was
     579                 :             :              reversed (to make distance vector positive), and the actual
     580                 :             :              distance is negative.  */
     581                 :        2505 :           if (dump_enabled_p ())
     582                 :         105 :             dump_printf_loc (MSG_NOTE, vect_location,
     583                 :             :                              "dependence distance negative.\n");
     584                 :             :           /* When doing outer loop vectorization, we need to check if there is
     585                 :             :              a backward dependence at the inner loop level if the dependence
     586                 :             :              at the outer loop is reversed.  See PR81740.  */
     587                 :        2505 :           if (nested_in_vect_loop_p (loop, stmtinfo_a)
     588                 :        2493 :               || nested_in_vect_loop_p (loop, stmtinfo_b))
     589                 :             :             {
     590                 :          12 :               unsigned inner_depth = index_in_loop_nest (loop->inner->num,
     591                 :          12 :                                                          DDR_LOOP_NEST (ddr));
     592                 :          12 :               if (dist_v[inner_depth] < 0)
     593                 :           9 :                 return opt_result::failure_at (stmtinfo_a->stmt,
     594                 :             :                                                "not vectorized, dependence "
     595                 :             :                                                "between data-refs %T and %T\n",
     596                 :             :                                                DR_REF (dra), DR_REF (drb));
     597                 :             :             }
     598                 :             :           /* Record a negative dependence distance to later limit the
     599                 :             :              amount of stmt copying / unrolling we can perform.
     600                 :             :              Only need to handle read-after-write dependence.  */
     601                 :        2496 :           if (DR_IS_READ (drb)
     602                 :          76 :               && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
     603                 :          12 :                   || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
     604                 :          76 :             STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
     605                 :        2496 :           continue;
     606                 :        2496 :         }
     607                 :             : 
     608                 :        6214 :       unsigned int abs_dist = abs (dist);
     609                 :        6214 :       if (abs_dist >= 2 && abs_dist < *max_vf)
     610                 :             :         {
     611                 :             :           /* The dependence distance requires reduction of the maximal
     612                 :             :              vectorization factor.  */
     613                 :         301 :           *max_vf = abs_dist;
     614                 :         301 :           if (dump_enabled_p ())
     615                 :          30 :             dump_printf_loc (MSG_NOTE, vect_location,
     616                 :             :                              "adjusting maximal vectorization factor to %i\n",
     617                 :             :                              *max_vf);
     618                 :             :         }
     619                 :             : 
     620                 :        6214 :       if (abs_dist >= *max_vf)
     621                 :             :         {
     622                 :             :           /* Dependence distance does not create dependence, as far as
     623                 :             :              vectorization is concerned, in this case.  */
     624                 :         410 :           if (dump_enabled_p ())
     625                 :          57 :             dump_printf_loc (MSG_NOTE, vect_location,
     626                 :             :                              "dependence distance >= VF.\n");
     627                 :         410 :           continue;
     628                 :             :         }
     629                 :             : 
     630                 :        5804 :       return opt_result::failure_at (stmtinfo_a->stmt,
     631                 :             :                                      "not vectorized, possible dependence "
     632                 :             :                                      "between data-refs %T and %T\n",
     633                 :             :                                      DR_REF (dra), DR_REF (drb));
     634                 :             :     }
     635                 :             : 
     636                 :       40707 :   return opt_result::success ();
     637                 :             : }
     638                 :             : 
     639                 :             : /* Function vect_analyze_early_break_dependences.
     640                 :             : 
     641                 :             :    Examine all the data references in the loop and make sure that if we have
     642                 :             :    multiple exits that we are able to safely move stores such that they become
     643                 :             :    safe for vectorization.  The function also calculates the place where to move
     644                 :             :    the instructions to and computes what the new vUSE chain should be.
     645                 :             : 
     646                 :             :    This works in tandem with the CFG that will be produced by
     647                 :             :    slpeel_tree_duplicate_loop_to_edge_cfg later on.
     648                 :             : 
     649                 :             :    This function tries to validate whether an early break vectorization
     650                 :             :    is possible for the current instruction sequence. Returns True i
     651                 :             :    possible, otherwise False.
     652                 :             : 
     653                 :             :    Requirements:
     654                 :             :      - Any memory access must be to a fixed size buffer.
     655                 :             :      - There must not be any loads and stores to the same object.
     656                 :             :      - Multiple loads are allowed as long as they don't alias.
     657                 :             : 
     658                 :             :    NOTE:
     659                 :             :      This implementation is very conservative. Any overlapping loads/stores
     660                 :             :      that take place before the early break statement gets rejected aside from
     661                 :             :      WAR dependencies.
     662                 :             : 
     663                 :             :      i.e.:
     664                 :             : 
     665                 :             :         a[i] = 8
     666                 :             :         c = a[i]
     667                 :             :         if (b[i])
     668                 :             :           ...
     669                 :             : 
     670                 :             :         is not allowed, but
     671                 :             : 
     672                 :             :         c = a[i]
     673                 :             :         a[i] = 8
     674                 :             :         if (b[i])
     675                 :             :           ...
     676                 :             : 
     677                 :             :         is which is the common case.  */
     678                 :             : 
     679                 :             : static opt_result
     680                 :      102594 : vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
     681                 :             : {
     682                 :      102594 :   DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences");
     683                 :             : 
     684                 :             :   /* List of all load data references found during traversal.  */
     685                 :      102594 :   auto_vec<data_reference *> bases;
     686                 :      102594 :   basic_block dest_bb = NULL;
     687                 :             : 
     688                 :      102594 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
     689                 :      102594 :   class loop *loop_nest = loop_outer (loop);
     690                 :             : 
     691                 :      102594 :   if (dump_enabled_p ())
     692                 :        1323 :     dump_printf_loc (MSG_NOTE, vect_location,
     693                 :             :                      "loop contains multiple exits, analyzing"
     694                 :             :                      " statement dependencies.\n");
     695                 :             : 
     696                 :      102594 :   if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
     697                 :        4365 :     if (dump_enabled_p ())
     698                 :         159 :       dump_printf_loc (MSG_NOTE, vect_location,
     699                 :             :                        "alternate exit has been chosen as main exit.\n");
     700                 :             : 
     701                 :             :   /* Since we don't support general control flow, the location we'll move the
     702                 :             :      side-effects to is always the latch connected exit.  When we support
     703                 :             :      general control flow we can do better but for now this is fine.  Move
     704                 :             :      side-effects to the in-loop destination of the last early exit.  For the
     705                 :             :      PEELED case we move the side-effects to the latch block as this is
     706                 :             :      guaranteed to be the last block to be executed when a vector iteration
     707                 :             :      finished.  */
     708                 :      102594 :   if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
     709                 :        4365 :     dest_bb = loop->latch;
     710                 :             :   else
     711                 :       98229 :     dest_bb = single_pred (loop->latch);
     712                 :             : 
     713                 :             :   /* We start looking from dest_bb, for the non-PEELED case we don't want to
     714                 :             :      move any stores already present, but we do want to read and validate the
     715                 :             :      loads.  */
     716                 :      102594 :   basic_block bb = dest_bb;
     717                 :             : 
     718                 :             :   /* We move stores across all loads to the beginning of dest_bb, so
     719                 :             :      the first block processed below doesn't need dependence checking.  */
     720                 :      102594 :   bool check_deps = false;
     721                 :             : 
     722                 :      385318 :   do
     723                 :             :     {
     724                 :      243956 :       gimple_stmt_iterator gsi = gsi_last_bb (bb);
     725                 :             : 
     726                 :             :       /* Now analyze all the remaining statements and try to determine which
     727                 :             :          instructions are allowed/needed to be moved.  */
     728                 :     1744195 :       while (!gsi_end_p (gsi))
     729                 :             :         {
     730                 :     1500266 :           gimple *stmt = gsi_stmt (gsi);
     731                 :     1500266 :           gsi_prev (&gsi);
     732                 :     1500266 :           if (is_gimple_debug (stmt))
     733                 :     1300865 :             continue;
     734                 :             : 
     735                 :      884145 :           stmt_vec_info stmt_vinfo
     736                 :      884145 :             = vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (stmt));
     737                 :      884145 :           stmt = STMT_VINFO_STMT (stmt_vinfo);
     738                 :      884145 :           auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
     739                 :      884145 :           if (!dr_ref)
     740                 :      676363 :             continue;
     741                 :             : 
     742                 :             :           /* We know everything below dest_bb is safe since we know we
     743                 :             :              had a full vector iteration when reaching it.  Either by
     744                 :             :              the loop entry / IV exit test being last or because this
     745                 :             :              is the loop latch itself.  */
     746                 :      207782 :           if (!check_deps)
     747                 :        8381 :             continue;
     748                 :             : 
     749                 :             :           /* Check if vector accesses to the object will be within bounds.
     750                 :             :              must be a constant or assume loop will be versioned or niters
     751                 :             :              bounded by VF so accesses are within range.  We only need to check
     752                 :             :              the reads since writes are moved to a safe place where if we get
     753                 :             :              there we know they are safe to perform.  */
     754                 :      199401 :           if (DR_IS_READ (dr_ref))
     755                 :             :             {
     756                 :      186356 :               dr_set_safe_speculative_read_required (stmt_vinfo, true);
     757                 :      186356 :               bool inbounds = ref_within_array_bound (stmt, DR_REF (dr_ref));
     758                 :      186356 :               DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_vinfo)) = inbounds;
     759                 :             : 
     760                 :      186356 :               if (dump_enabled_p ())
     761                 :        2188 :                 dump_printf_loc (MSG_NOTE, vect_location,
     762                 :             :                                  "marking DR (read) as possibly needing peeling "
     763                 :             :                                  "for alignment at %G", stmt);
     764                 :             :             }
     765                 :             : 
     766                 :      199401 :           if (DR_IS_READ (dr_ref))
     767                 :      186356 :             bases.safe_push (dr_ref);
     768                 :       13045 :           else if (DR_IS_WRITE (dr_ref))
     769                 :             :             {
     770                 :             :               /* We are moving writes down in the CFG.  To be sure that this
     771                 :             :                  is valid after vectorization we have to check all the loads
     772                 :             :                  we are sinking the stores past to see if any of them may
     773                 :             :                  alias or are the same object.
     774                 :             : 
     775                 :             :                  Same objects will not be an issue because unless the store
     776                 :             :                  is marked volatile the value can be forwarded.  If the
     777                 :             :                  store is marked volatile we don't vectorize the loop
     778                 :             :                  anyway.
     779                 :             : 
     780                 :             :                  That leaves the check for aliasing.  We don't really need
     781                 :             :                  to care about the stores aliasing with each other since the
     782                 :             :                  stores are moved in order so the effects are still observed
     783                 :             :                  correctly.  This leaves the check for WAR dependencies
     784                 :             :                  which we would be introducing here if the DR can alias.
     785                 :             :                  The check is quadratic in loads/stores but I have not found
     786                 :             :                  a better API to do this.  I believe all loads and stores
     787                 :             :                  must be checked.  We also must check them when we
     788                 :             :                  encountered the store, since we don't care about loads past
     789                 :             :                  the store.  */
     790                 :             : 
     791                 :       43980 :               for (auto dr_read : bases)
     792                 :       14442 :                 if (dr_may_alias_p (dr_ref, dr_read, loop_nest))
     793                 :             :                   {
     794                 :          27 :                     if (dump_enabled_p ())
     795                 :           0 :                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
     796                 :             :                                        vect_location,
     797                 :             :                                        "early breaks not supported: "
     798                 :             :                                        "overlapping loads and stores "
     799                 :             :                                        "found before the break "
     800                 :             :                                        "statement.\n");
     801                 :             : 
     802                 :          27 :                     return opt_result::failure_at (stmt,
     803                 :             :                              "can't safely apply code motion to dependencies"
     804                 :             :                              " to vectorize the early exit. %G may alias with"
     805                 :             :                              " %G\n", stmt, dr_read->stmt);
     806                 :             :                   }
     807                 :             :             }
     808                 :             : 
     809                 :      398748 :           if (gimple_vdef (stmt))
     810                 :             :             {
     811                 :       13014 :               if (dump_enabled_p ())
     812                 :         248 :                 dump_printf_loc (MSG_NOTE, vect_location,
     813                 :             :                                  "==> recording stmt %G", stmt);
     814                 :             : 
     815                 :       13014 :               LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (stmt);
     816                 :             :             }
     817                 :      572090 :           else if (gimple_vuse (stmt))
     818                 :             :             {
     819                 :      186356 :               LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_insert (0, stmt);
     820                 :      186356 :               if (dump_enabled_p ())
     821                 :        2188 :                 dump_printf_loc (MSG_NOTE, vect_location,
     822                 :             :                                  "marked statement for vUSE update: %G", stmt);
     823                 :             :             }
     824                 :             :         }
     825                 :             : 
     826                 :      243929 :       if (!single_pred_p (bb))
     827                 :             :         {
     828                 :      102567 :           gcc_assert (bb == loop->header);
     829                 :      102567 :           break;
     830                 :             :         }
     831                 :             : 
     832                 :             :       /* If we possibly sink through a virtual PHI make sure to elide that.  */
     833                 :      141362 :       if (gphi *vphi = get_virtual_phi (bb))
     834                 :          62 :         LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (vphi);
     835                 :             : 
     836                 :             :       /* All earlier blocks need dependence checking.  */
     837                 :      141362 :       check_deps = true;
     838                 :      141362 :       bb = single_pred (bb);
     839                 :      141362 :     }
     840                 :             :   while (1);
     841                 :             : 
     842                 :             :   /* We don't allow outer -> inner loop transitions which should have been
     843                 :             :      trapped already during loop form analysis.  */
     844                 :      102567 :   gcc_assert (dest_bb->loop_father == loop);
     845                 :             : 
     846                 :             :   /* Check that the destination block we picked has only one pred.  To relax this we
     847                 :             :      have to take special care when moving the statements.  We don't currently support
     848                 :             :      such control flow however this check is there to simplify how we handle
     849                 :             :      labels that may be present anywhere in the IL.  This check is to ensure that the
     850                 :             :      labels aren't significant for the CFG.  */
     851                 :      102567 :   if (!single_pred (dest_bb))
     852                 :           0 :     return opt_result::failure_at (vect_location,
     853                 :             :                              "chosen loop exit block (BB %d) does not have a "
     854                 :             :                              "single predecessor which is currently not "
     855                 :             :                              "supported for early break vectorization.\n",
     856                 :             :                              dest_bb->index);
     857                 :             : 
     858                 :      102567 :   LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
     859                 :             : 
     860                 :      102567 :   if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
     861                 :             :     {
     862                 :             :       /* All uses shall be updated to that of the first load.  Entries are
     863                 :             :          stored in reverse order.  */
     864                 :       96047 :       tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).last ());
     865                 :      282374 :       for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
     866                 :             :         {
     867                 :      186327 :           if (dump_enabled_p ())
     868                 :        2188 :           dump_printf_loc (MSG_NOTE, vect_location,
     869                 :             :                            "will update use: %T, mem_ref: %G", vuse, g);
     870                 :             :         }
     871                 :             :     }
     872                 :             : 
     873                 :      102567 :   if (dump_enabled_p ())
     874                 :        1323 :     dump_printf_loc (MSG_NOTE, vect_location,
     875                 :             :                      "recorded statements to be moved to BB %d\n",
     876                 :        1323 :                      LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
     877                 :             : 
     878                 :      102567 :   return opt_result::success ();
     879                 :      102594 : }
     880                 :             : 
     881                 :             : /* Function vect_analyze_data_ref_dependences.
     882                 :             : 
     883                 :             :    Examine all the data references in the loop, and make sure there do not
     884                 :             :    exist any data dependences between them.  Set *MAX_VF according to
     885                 :             :    the maximum vectorization factor the data dependences allow.  */
     886                 :             : 
     887                 :             : opt_result
     888                 :      281473 : vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
     889                 :             :                                    unsigned int *max_vf)
     890                 :             : {
     891                 :      281473 :   unsigned int i;
     892                 :      281473 :   struct data_dependence_relation *ddr;
     893                 :             : 
     894                 :      281473 :   DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
     895                 :             : 
     896                 :      281473 :   if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
     897                 :             :     {
     898                 :      137449 :       LOOP_VINFO_DDRS (loop_vinfo)
     899                 :      137449 :         .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
     900                 :      137449 :                  * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
     901                 :             :       /* We do not need read-read dependences.  */
     902                 :      274898 :       bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
     903                 :             :                                           &LOOP_VINFO_DDRS (loop_vinfo),
     904                 :      137449 :                                           LOOP_VINFO_LOOP_NEST (loop_vinfo),
     905                 :             :                                           false);
     906                 :      137449 :       gcc_assert (res);
     907                 :             :     }
     908                 :             : 
     909                 :      281473 :   LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
     910                 :             : 
     911                 :             :   /* For epilogues we either have no aliases or alias versioning
     912                 :             :      was applied to original loop.  Therefore we may just get max_vf
     913                 :             :      using VF of original loop.  */
     914                 :      281473 :   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
     915                 :       24490 :     *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
     916                 :             :   else
     917                 :     2017956 :     FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
     918                 :             :       {
     919                 :     1774608 :         opt_result res
     920                 :     1774608 :           = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
     921                 :     1774608 :         if (!res)
     922                 :       13635 :           return res;
     923                 :             :       }
     924                 :             : 
     925                 :             :   /* If we have early break statements in the loop, check to see if they
     926                 :             :      are of a form we can vectorizer.  */
     927                 :      267838 :   if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
     928                 :      102594 :     return vect_analyze_early_break_dependences (loop_vinfo);
     929                 :             : 
     930                 :      165244 :   return opt_result::success ();
     931                 :             : }
     932                 :             : 
     933                 :             : 
     934                 :             : /* Function vect_slp_analyze_data_ref_dependence.
     935                 :             : 
     936                 :             :    Return TRUE if there (might) exist a dependence between a memory-reference
     937                 :             :    DRA and a memory-reference DRB for VINFO.  When versioning for alias
     938                 :             :    may check a dependence at run-time, return FALSE.  Adjust *MAX_VF
     939                 :             :    according to the data dependence.  */
     940                 :             : 
     941                 :             : static bool
     942                 :     6122553 : vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
     943                 :             :                                       struct data_dependence_relation *ddr)
     944                 :             : {
     945                 :     6122553 :   struct data_reference *dra = DDR_A (ddr);
     946                 :     6122553 :   struct data_reference *drb = DDR_B (ddr);
     947                 :     6122553 :   dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
     948                 :     6122553 :   dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
     949                 :             : 
     950                 :             :   /* We need to check dependences of statements marked as unvectorizable
     951                 :             :      as well, they still can prohibit vectorization.  */
     952                 :             : 
     953                 :             :   /* Independent data accesses.  */
     954                 :     6122553 :   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
     955                 :             :     return false;
     956                 :             : 
     957                 :     1055109 :   if (dra == drb)
     958                 :             :     return false;
     959                 :             : 
     960                 :             :   /* Read-read is OK.  */
     961                 :        6115 :   if (DR_IS_READ (dra) && DR_IS_READ (drb))
     962                 :             :     return false;
     963                 :             : 
     964                 :             :   /* If dra and drb are part of the same interleaving chain consider
     965                 :             :      them independent.  */
     966                 :        6115 :   if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
     967                 :        6115 :       && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
     968                 :        6115 :           == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
     969                 :             :     return false;
     970                 :             : 
     971                 :             :   /* Unknown data dependence.  */
     972                 :        6115 :   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
     973                 :             :     {
     974                 :        6115 :       if  (dump_enabled_p ())
     975                 :           6 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
     976                 :             :                          "can't determine dependence between %T and %T\n",
     977                 :             :                          DR_REF (dra), DR_REF (drb));
     978                 :             :     }
     979                 :           0 :   else if (dump_enabled_p ())
     980                 :           0 :     dump_printf_loc (MSG_NOTE, vect_location,
     981                 :             :                      "determined dependence between %T and %T\n",
     982                 :             :                      DR_REF (dra), DR_REF (drb));
     983                 :             : 
     984                 :             :   return true;
     985                 :             : }
     986                 :             : 
     987                 :             : 
     988                 :             : /* Analyze dependences involved in the transform of a store SLP NODE.  */
     989                 :             : 
     990                 :             : static bool
     991                 :      652296 : vect_slp_analyze_store_dependences (vec_info *vinfo, slp_tree node)
     992                 :             : {
     993                 :             :   /* This walks over all stmts involved in the SLP store done
     994                 :             :      in NODE verifying we can sink them up to the last stmt in the
     995                 :             :      group.  */
     996                 :      652296 :   stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
     997                 :      652296 :   gcc_assert (DR_IS_WRITE (STMT_VINFO_DATA_REF (last_access_info)));
     998                 :             : 
     999                 :     2342071 :   for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
    1000                 :             :     {
    1001                 :     1695892 :       stmt_vec_info access_info
    1002                 :     1695892 :         = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
    1003                 :     1695892 :       if (access_info == last_access_info)
    1004                 :      646898 :         continue;
    1005                 :     1048994 :       data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
    1006                 :     1048994 :       ao_ref ref;
    1007                 :     1048994 :       bool ref_initialized_p = false;
    1008                 :     1048994 :       for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
    1009                 :     9899555 :            gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
    1010                 :             :         {
    1011                 :     8856678 :           gimple *stmt = gsi_stmt (gsi);
    1012                 :    15538449 :           if (! gimple_vuse (stmt))
    1013                 :     2657895 :             continue;
    1014                 :             : 
    1015                 :             :           /* If we couldn't record a (single) data reference for this
    1016                 :             :              stmt we have to resort to the alias oracle.  */
    1017                 :     6198783 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
    1018                 :     6198783 :           data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
    1019                 :     6198783 :           if (!dr_b)
    1020                 :             :             {
    1021                 :             :               /* We are moving a store - this means
    1022                 :             :                  we cannot use TBAA for disambiguation.  */
    1023                 :       77026 :               if (!ref_initialized_p)
    1024                 :       77026 :                 ao_ref_init (&ref, DR_REF (dr_a));
    1025                 :       77026 :               if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
    1026                 :       77026 :                   || ref_maybe_used_by_stmt_p (stmt, &ref, false))
    1027                 :        6117 :                 return false;
    1028                 :       76985 :               continue;
    1029                 :             :             }
    1030                 :             : 
    1031                 :     6121757 :           gcc_assert (!gimple_visited_p (stmt));
    1032                 :             : 
    1033                 :     6121757 :           ddr_p ddr = initialize_data_dependence_relation (dr_a,
    1034                 :             :                                                            dr_b, vNULL);
    1035                 :     6121757 :           bool dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    1036                 :     6121757 :           free_dependence_relation (ddr);
    1037                 :     6121757 :           if (dependent)
    1038                 :             :             return false;
    1039                 :             :         }
    1040                 :             :     }
    1041                 :             :   return true;
    1042                 :             : }
    1043                 :             : 
    1044                 :             : /* Analyze dependences involved in the transform of a load SLP NODE.  STORES
    1045                 :             :    contain the vector of scalar stores of this instance if we are
    1046                 :             :    disambiguating the loads.  */
    1047                 :             : 
    1048                 :             : static bool
    1049                 :      161691 : vect_slp_analyze_load_dependences (vec_info *vinfo, slp_tree node,
    1050                 :             :                                    vec<stmt_vec_info> stores,
    1051                 :             :                                    stmt_vec_info last_store_info)
    1052                 :             : {
    1053                 :             :   /* This walks over all stmts involved in the SLP load done
    1054                 :             :      in NODE verifying we can hoist them up to the first stmt in the
    1055                 :             :      group.  */
    1056                 :      161691 :   stmt_vec_info first_access_info = vect_find_first_scalar_stmt_in_slp (node);
    1057                 :      161691 :   gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (first_access_info)));
    1058                 :             : 
    1059                 :      555704 :   for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
    1060                 :             :     {
    1061                 :      394062 :       if (! SLP_TREE_SCALAR_STMTS (node)[k])
    1062                 :      165691 :         continue;
    1063                 :      394062 :       stmt_vec_info access_info
    1064                 :      394062 :         = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
    1065                 :      394062 :       if (access_info == first_access_info)
    1066                 :      165691 :         continue;
    1067                 :      228371 :       data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
    1068                 :      228371 :       ao_ref ref;
    1069                 :      228371 :       bool ref_initialized_p = false;
    1070                 :      228371 :       hash_set<stmt_vec_info> grp_visited;
    1071                 :      228371 :       for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
    1072                 :     4674525 :            gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi))
    1073                 :             :         {
    1074                 :     2223126 :           gimple *stmt = gsi_stmt (gsi);
    1075                 :     3577003 :           if (! gimple_vdef (stmt))
    1076                 :     2137492 :             continue;
    1077                 :             : 
    1078                 :      346121 :           stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
    1079                 :             : 
    1080                 :             :           /* If we run into a store of this same instance (we've just
    1081                 :             :              marked those) then delay dependence checking until we run
    1082                 :             :              into the last store because this is where it will have
    1083                 :             :              been sunk to (and we verified that we can do that already).  */
    1084                 :      346121 :           if (gimple_visited_p (stmt))
    1085                 :             :             {
    1086                 :      260487 :               if (stmt_info != last_store_info)
    1087                 :      260485 :                 continue;
    1088                 :             : 
    1089                 :          10 :               for (stmt_vec_info &store_info : stores)
    1090                 :             :                 {
    1091                 :           4 :                   data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
    1092                 :           4 :                   ddr_p ddr = initialize_data_dependence_relation
    1093                 :           4 :                                 (dr_a, store_dr, vNULL);
    1094                 :           4 :                   bool dependent
    1095                 :           4 :                     = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    1096                 :           4 :                   free_dependence_relation (ddr);
    1097                 :           4 :                   if (dependent)
    1098                 :          49 :                     return false;
    1099                 :             :                 }
    1100                 :           2 :               continue;
    1101                 :           2 :             }
    1102                 :             : 
    1103                 :      174661 :           auto check_hoist = [&] (stmt_vec_info stmt_info) -> bool
    1104                 :             :             {
    1105                 :             :               /* We are hoisting a load - this means we can use TBAA for
    1106                 :             :                  disambiguation.  */
    1107                 :       89027 :               if (!ref_initialized_p)
    1108                 :       89027 :                 ao_ref_init (&ref, DR_REF (dr_a));
    1109                 :       89027 :               if (stmt_may_clobber_ref_p_1 (stmt_info->stmt, &ref, true))
    1110                 :             :                 {
    1111                 :             :                   /* If we couldn't record a (single) data reference for this
    1112                 :             :                      stmt we have to give up now.  */
    1113                 :         802 :                   data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
    1114                 :         802 :                   if (!dr_b)
    1115                 :             :                     return false;
    1116                 :         792 :                   ddr_p ddr = initialize_data_dependence_relation (dr_a,
    1117                 :             :                                                                    dr_b, vNULL);
    1118                 :         792 :                   bool dependent
    1119                 :         792 :                     = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
    1120                 :         792 :                   free_dependence_relation (ddr);
    1121                 :         792 :                   if (dependent)
    1122                 :             :                     return false;
    1123                 :             :                 }
    1124                 :             :               /* No dependence.  */
    1125                 :             :               return true;
    1126                 :       85634 :             };
    1127                 :       85634 :           if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    1128                 :             :             {
    1129                 :             :               /* When we run into a store group we have to honor
    1130                 :             :                  that earlier stores might be moved here.  We don't
    1131                 :             :                  know exactly which and where to since we lack a
    1132                 :             :                  back-mapping from DR to SLP node, so assume all
    1133                 :             :                  earlier stores are sunk here.  It's enough to
    1134                 :             :                  consider the last stmt of a group for this.
    1135                 :             :                  ???  Both this and the fact that we disregard that
    1136                 :             :                  the conflicting instance might be removed later
    1137                 :             :                  is overly conservative.  */
    1138                 :       66592 :               if (!grp_visited.add (DR_GROUP_FIRST_ELEMENT (stmt_info)))
    1139                 :       12592 :                 for (auto store_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
    1140                 :      156077 :                      store_info != NULL;
    1141                 :      143485 :                      store_info = DR_GROUP_NEXT_ELEMENT (store_info))
    1142                 :      143524 :                   if ((store_info == stmt_info
    1143                 :      130941 :                        || get_later_stmt (store_info, stmt_info) == stmt_info)
    1144                 :      200926 :                       && !check_hoist (store_info))
    1145                 :             :                     return false;
    1146                 :             :             }
    1147                 :             :           else
    1148                 :             :             {
    1149                 :       19042 :               if (!check_hoist (stmt_info))
    1150                 :             :                 return false;
    1151                 :             :             }
    1152                 :             :         }
    1153                 :      228371 :     }
    1154                 :             :   return true;
    1155                 :             : }
    1156                 :             : 
    1157                 :             : 
    1158                 :             : /* Function vect_analyze_data_ref_dependences.
    1159                 :             : 
    1160                 :             :    Examine all the data references in the basic-block, and make sure there
    1161                 :             :    do not exist any data dependences between them.  Set *MAX_VF according to
    1162                 :             :    the maximum vectorization factor the data dependences allow.  */
    1163                 :             : 
    1164                 :             : bool
    1165                 :      763321 : vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
    1166                 :             : {
    1167                 :      763321 :   DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
    1168                 :             : 
    1169                 :             :   /* The stores of this instance are at the root of the SLP tree.  */
    1170                 :      763321 :   slp_tree store = NULL;
    1171                 :      763321 :   if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store)
    1172                 :      652296 :     store = SLP_INSTANCE_TREE (instance);
    1173                 :             : 
    1174                 :             :   /* Verify we can sink stores to the vectorized stmt insert location.  */
    1175                 :      652296 :   stmt_vec_info last_store_info = NULL;
    1176                 :      652296 :   if (store)
    1177                 :             :     {
    1178                 :      652296 :       if (! vect_slp_analyze_store_dependences (vinfo, store))
    1179                 :             :         return false;
    1180                 :             : 
    1181                 :             :       /* Mark stores in this instance and remember the last one.  */
    1182                 :      646179 :       last_store_info = vect_find_last_scalar_stmt_in_slp (store);
    1183                 :     2335204 :       for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
    1184                 :     1689025 :         gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true);
    1185                 :             :     }
    1186                 :             : 
    1187                 :      757204 :   bool res = true;
    1188                 :             : 
    1189                 :             :   /* Verify we can sink loads to the vectorized stmt insert location,
    1190                 :             :      special-casing stores of this instance.  */
    1191                 :     1185506 :   for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
    1192                 :      161691 :     if (! vect_slp_analyze_load_dependences (vinfo, load,
    1193                 :             :                                              store
    1194                 :             :                                              ? SLP_TREE_SCALAR_STMTS (store)
    1195                 :             :                                              : vNULL, last_store_info))
    1196                 :             :       {
    1197                 :             :         res = false;
    1198                 :             :         break;
    1199                 :             :       }
    1200                 :             : 
    1201                 :             :   /* Unset the visited flag.  */
    1202                 :      757204 :   if (store)
    1203                 :     3098525 :     for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
    1204                 :     1689025 :       gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
    1205                 :             : 
    1206                 :             :   return res;
    1207                 :             : }
    1208                 :             : 
    1209                 :             : /* Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET
    1210                 :             :    applied.  */
    1211                 :             : 
    1212                 :             : int
    1213                 :     6679228 : dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
    1214                 :             : {
    1215                 :     6679228 :   HOST_WIDE_INT diff = 0;
    1216                 :             :   /* Alignment is only analyzed for the first element of a DR group,
    1217                 :             :      use that but adjust misalignment by the offset of the access.  */
    1218                 :     6679228 :   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
    1219                 :             :     {
    1220                 :     3348158 :       dr_vec_info *first_dr
    1221                 :     3348158 :         = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
    1222                 :             :       /* vect_analyze_data_ref_accesses guarantees that DR_INIT are
    1223                 :             :          INTEGER_CSTs and the first element in the group has the lowest
    1224                 :             :          address.  */
    1225                 :     3348158 :       diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
    1226                 :     3348158 :               - TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
    1227                 :     3348158 :       gcc_assert (diff >= 0);
    1228                 :             :       dr_info = first_dr;
    1229                 :             :     }
    1230                 :             : 
    1231                 :     6679228 :   int misalign = dr_info->misalignment;
    1232                 :     6679228 :   gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
    1233                 :     6679228 :   if (misalign == DR_MISALIGNMENT_UNKNOWN)
    1234                 :             :     return misalign;
    1235                 :             : 
    1236                 :             :   /* If the access is only aligned for a vector type with smaller alignment
    1237                 :             :      requirement the access has unknown misalignment.  */
    1238                 :     4485179 :   if (maybe_lt (dr_info->target_alignment * BITS_PER_UNIT,
    1239                 :     4485179 :                 targetm.vectorize.preferred_vector_alignment (vectype)))
    1240                 :             :     return DR_MISALIGNMENT_UNKNOWN;
    1241                 :             : 
    1242                 :             :   /* Apply the offset from the DR group start and the externally supplied
    1243                 :             :      offset which can for example result from a negative stride access.  */
    1244                 :     4485174 :   poly_int64 misalignment = misalign + diff + offset;
    1245                 :             : 
    1246                 :             :   /* Below we reject compile-time non-constant target alignments, but if
    1247                 :             :      our misalignment is zero, then we are known to already be aligned
    1248                 :             :      w.r.t. any such possible target alignment.  */
    1249                 :     4485174 :   if (known_eq (misalignment, 0))
    1250                 :             :     return 0;
    1251                 :             : 
    1252                 :      946001 :   unsigned HOST_WIDE_INT target_alignment_c;
    1253                 :      946001 :   if (!dr_info->target_alignment.is_constant (&target_alignment_c)
    1254                 :      946001 :       || !known_misalignment (misalignment, target_alignment_c, &misalign))
    1255                 :             :     return DR_MISALIGNMENT_UNKNOWN;
    1256                 :      946001 :   return misalign;
    1257                 :             : }
    1258                 :             : 
    1259                 :             : /* Record the base alignment guarantee given by DRB, which occurs
    1260                 :             :    in STMT_INFO.  */
    1261                 :             : 
    1262                 :             : static void
    1263                 :     4224730 : vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info,
    1264                 :             :                             innermost_loop_behavior *drb)
    1265                 :             : {
    1266                 :     4224730 :   bool existed;
    1267                 :     4224730 :   std::pair<stmt_vec_info, innermost_loop_behavior *> &entry
    1268                 :     4224730 :     = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
    1269                 :     4224730 :   if (!existed || entry.second->base_alignment < drb->base_alignment)
    1270                 :             :     {
    1271                 :     1303259 :       entry = std::make_pair (stmt_info, drb);
    1272                 :     1303259 :       if (dump_enabled_p ())
    1273                 :       34935 :         dump_printf_loc (MSG_NOTE, vect_location,
    1274                 :             :                          "recording new base alignment for %T\n"
    1275                 :             :                          "  alignment:    %d\n"
    1276                 :             :                          "  misalignment: %d\n"
    1277                 :             :                          "  based on:     %G",
    1278                 :             :                          drb->base_address,
    1279                 :             :                          drb->base_alignment,
    1280                 :             :                          drb->base_misalignment,
    1281                 :             :                          stmt_info->stmt);
    1282                 :             :     }
    1283                 :     4224730 : }
    1284                 :             : 
    1285                 :             : /* If the region we're going to vectorize is reached, all unconditional
    1286                 :             :    data references occur at least once.  We can therefore pool the base
    1287                 :             :    alignment guarantees from each unconditional reference.  Do this by
    1288                 :             :    going through all the data references in VINFO and checking whether
    1289                 :             :    the containing statement makes the reference unconditionally.  If so,
    1290                 :             :    record the alignment of the base address in VINFO so that it can be
    1291                 :             :    used for all other references with the same base.  */
    1292                 :             : 
    1293                 :             : void
    1294                 :      936018 : vect_record_base_alignments (vec_info *vinfo)
    1295                 :             : {
    1296                 :      936018 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    1297                 :      339941 :   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
    1298                 :    13773503 :   for (data_reference *dr : vinfo->shared->datarefs)
    1299                 :             :     {
    1300                 :    11061647 :       dr_vec_info *dr_info = vinfo->lookup_dr (dr);
    1301                 :    11061647 :       stmt_vec_info stmt_info = dr_info->stmt;
    1302                 :    11061647 :       if (!DR_IS_CONDITIONAL_IN_STMT (dr)
    1303                 :    11054755 :           && STMT_VINFO_VECTORIZABLE (stmt_info)
    1304                 :     4237679 :           && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    1305                 :             :         {
    1306                 :     4223245 :           vect_record_base_alignment (vinfo, stmt_info, &DR_INNERMOST (dr));
    1307                 :             : 
    1308                 :             :           /* If DR is nested in the loop that is being vectorized, we can also
    1309                 :             :              record the alignment of the base wrt the outer loop.  */
    1310                 :    11846035 :           if (loop && nested_in_vect_loop_p (loop, stmt_info))
    1311                 :        1485 :             vect_record_base_alignment
    1312                 :        1485 :               (vinfo, stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
    1313                 :             :         }
    1314                 :             :     }
    1315                 :      936018 : }
    1316                 :             : 
    1317                 :             : /* Function vect_compute_data_ref_alignment
    1318                 :             : 
    1319                 :             :    Compute the misalignment of the data reference DR_INFO when vectorizing
    1320                 :             :    with VECTYPE.
    1321                 :             : 
    1322                 :             :    Output:
    1323                 :             :    1. initialized misalignment info for DR_INFO
    1324                 :             : 
    1325                 :             :    FOR NOW: No analysis is actually performed. Misalignment is calculated
    1326                 :             :    only for trivial cases. TODO.  */
    1327                 :             : 
    1328                 :             : static void
    1329                 :     1473217 : vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
    1330                 :             :                                  tree vectype)
    1331                 :             : {
    1332                 :     1473217 :   stmt_vec_info stmt_info = dr_info->stmt;
    1333                 :     1473217 :   vec_base_alignments *base_alignments = &vinfo->base_alignments;
    1334                 :     1473217 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    1335                 :     1473217 :   class loop *loop = NULL;
    1336                 :     1473217 :   tree ref = DR_REF (dr_info->dr);
    1337                 :             : 
    1338                 :     1473217 :   if (dump_enabled_p ())
    1339                 :       55422 :     dump_printf_loc (MSG_NOTE, vect_location,
    1340                 :             :                      "vect_compute_data_ref_alignment:\n");
    1341                 :             : 
    1342                 :     1473217 :   if (loop_vinfo)
    1343                 :      682324 :     loop = LOOP_VINFO_LOOP (loop_vinfo);
    1344                 :             : 
    1345                 :             :   /* Initialize misalignment to unknown.  */
    1346                 :     1473217 :   SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
    1347                 :             : 
    1348                 :     1473217 :   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    1349                 :             :     return;
    1350                 :             : 
    1351                 :     1457002 :   innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
    1352                 :     1457002 :   bool step_preserves_misalignment_p;
    1353                 :             : 
    1354                 :     1457002 :   poly_uint64 vector_alignment
    1355                 :     1457002 :     = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
    1356                 :             :                  BITS_PER_UNIT);
    1357                 :             : 
    1358                 :     1457002 :   if (loop_vinfo
    1359                 :     1457002 :       && dr_safe_speculative_read_required (stmt_info))
    1360                 :             :     {
    1361                 :      255532 :       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    1362                 :      255532 :       auto vectype_size
    1363                 :      255532 :         = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    1364                 :      255532 :       poly_uint64 new_alignment = vf * vectype_size;
    1365                 :             :       /* If we have a grouped access we require that the alignment be N * elem.  */
    1366                 :      255532 :       if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    1367                 :       80802 :         new_alignment *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
    1368                 :             : 
    1369                 :      255532 :       unsigned HOST_WIDE_INT target_alignment;
    1370                 :      255532 :       if (new_alignment.is_constant (&target_alignment)
    1371                 :      511064 :           && pow2p_hwi (target_alignment))
    1372                 :             :         {
    1373                 :      200562 :           if (dump_enabled_p ())
    1374                 :             :             {
    1375                 :        3169 :               dump_printf_loc (MSG_NOTE, vect_location,
    1376                 :             :                                "alignment increased due to early break to ");
    1377                 :        3169 :               dump_dec (MSG_NOTE, new_alignment);
    1378                 :        3169 :               dump_printf (MSG_NOTE, " bytes.\n");
    1379                 :             :             }
    1380                 :             :           vector_alignment = target_alignment;
    1381                 :             :         }
    1382                 :             :     }
    1383                 :             : 
    1384                 :     1457002 :   SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
    1385                 :             : 
    1386                 :             :   /* If the main loop has peeled for alignment we have no way of knowing
    1387                 :             :      whether the data accesses in the epilogues are aligned.  We can't at
    1388                 :             :      compile time answer the question whether we have entered the main loop or
    1389                 :             :      not.  Fixes PR 92351.  */
    1390                 :     1457002 :   if (loop_vinfo)
    1391                 :             :     {
    1392                 :      666109 :       loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
    1393                 :      666109 :       if (orig_loop_vinfo
    1394                 :       58010 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
    1395                 :             :         return;
    1396                 :             :     }
    1397                 :             : 
    1398                 :     1456547 :   unsigned HOST_WIDE_INT vect_align_c;
    1399                 :     1456547 :   if (!vector_alignment.is_constant (&vect_align_c))
    1400                 :             :     return;
    1401                 :             : 
    1402                 :             :   /* No step for BB vectorization.  */
    1403                 :     1456547 :   if (!loop)
    1404                 :             :     {
    1405                 :      790893 :       gcc_assert (integer_zerop (drb->step));
    1406                 :             :       step_preserves_misalignment_p = true;
    1407                 :             :     }
    1408                 :             : 
    1409                 :             :   else
    1410                 :             :     {
    1411                 :             :       /* We can only use base and misalignment information relative to
    1412                 :             :          an innermost loop if the misalignment stays the same throughout the
    1413                 :             :          execution of the loop.  As above, this is the case if the stride of
    1414                 :             :          the dataref evenly divides by the alignment.  */
    1415                 :      665654 :       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    1416                 :      665654 :       step_preserves_misalignment_p
    1417                 :      665654 :         = multiple_p (drb->step_alignment * vf, vect_align_c);
    1418                 :             : 
    1419                 :      665654 :       if (!step_preserves_misalignment_p && dump_enabled_p ())
    1420                 :         302 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1421                 :             :                          "step doesn't divide the vector alignment.\n");
    1422                 :             : 
    1423                 :             :       /* In case the dataref is in an inner-loop of the loop that is being
    1424                 :             :          vectorized (LOOP), we use the base and misalignment information
    1425                 :             :          relative to the outer-loop (LOOP).  This is ok only if the
    1426                 :             :          misalignment stays the same throughout the execution of the
    1427                 :             :          inner-loop, which is why we have to check that the stride of the
    1428                 :             :          dataref in the inner-loop evenly divides by the vector alignment.  */
    1429                 :      665654 :       if (step_preserves_misalignment_p
    1430                 :      665654 :           && nested_in_vect_loop_p (loop, stmt_info))
    1431                 :             :         {
    1432                 :        1484 :           step_preserves_misalignment_p
    1433                 :        1484 :             = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
    1434                 :             : 
    1435                 :        1484 :           if (dump_enabled_p ())
    1436                 :             :             {
    1437                 :         517 :               if (step_preserves_misalignment_p)
    1438                 :         371 :                 dump_printf_loc (MSG_NOTE, vect_location,
    1439                 :             :                                  "inner step divides the vector alignment.\n");
    1440                 :             :               else
    1441                 :         146 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1442                 :             :                                  "inner step doesn't divide the vector"
    1443                 :             :                                  " alignment.\n");
    1444                 :             :             }
    1445                 :             :         }
    1446                 :             :     }
    1447                 :             : 
    1448                 :     1456547 :   unsigned int base_alignment = drb->base_alignment;
    1449                 :     1456547 :   unsigned int base_misalignment = drb->base_misalignment;
    1450                 :             : 
    1451                 :             :   /* Calculate the maximum of the pooled base address alignment and the
    1452                 :             :      alignment that we can compute for DR itself.  */
    1453                 :     1456547 :   std::pair<stmt_vec_info, innermost_loop_behavior *> *entry
    1454                 :     1456547 :     = base_alignments->get (drb->base_address);
    1455                 :     1456547 :   if (entry
    1456                 :     1453023 :       && base_alignment < (*entry).second->base_alignment
    1457                 :     1457433 :       && (loop_vinfo
    1458                 :         732 :           || (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt_info->stmt),
    1459                 :         732 :                               gimple_bb (entry->first->stmt))
    1460                 :         609 :               && (gimple_bb (stmt_info->stmt) != gimple_bb (entry->first->stmt)
    1461                 :         449 :                   || (entry->first->dr_aux.group <= dr_info->group)))))
    1462                 :             :     {
    1463                 :         746 :       base_alignment = entry->second->base_alignment;
    1464                 :         746 :       base_misalignment = entry->second->base_misalignment;
    1465                 :             :     }
    1466                 :             : 
    1467                 :     1456547 :   if (drb->offset_alignment < vect_align_c
    1468                 :     1384359 :       || !step_preserves_misalignment_p
    1469                 :             :       /* We need to know whether the step wrt the vectorized loop is
    1470                 :             :          negative when computing the starting misalignment below.  */
    1471                 :     1375187 :       || TREE_CODE (drb->step) != INTEGER_CST)
    1472                 :             :     {
    1473                 :      109524 :       if (dump_enabled_p ())
    1474                 :        4329 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1475                 :             :                          "Unknown alignment for access: %T\n", ref);
    1476                 :      109524 :       return;
    1477                 :             :     }
    1478                 :             : 
    1479                 :     1347023 :   if (base_alignment < vect_align_c)
    1480                 :             :     {
    1481                 :      628595 :       unsigned int max_alignment;
    1482                 :      628595 :       tree base = get_base_for_alignment (drb->base_address, &max_alignment);
    1483                 :      628595 :       if (max_alignment < vect_align_c
    1484                 :      628595 :           || !vect_can_force_dr_alignment_p (base,
    1485                 :      628399 :                                              vect_align_c * BITS_PER_UNIT))
    1486                 :             :         {
    1487                 :      431532 :           if (dump_enabled_p ())
    1488                 :       15600 :             dump_printf_loc (MSG_NOTE, vect_location,
    1489                 :             :                              "can't force alignment of ref: %T\n", ref);
    1490                 :      431532 :           return;
    1491                 :             :         }
    1492                 :             : 
    1493                 :             :       /* Force the alignment of the decl.
    1494                 :             :          NOTE: This is the only change to the code we make during
    1495                 :             :          the analysis phase, before deciding to vectorize the loop.  */
    1496                 :      197063 :       if (dump_enabled_p ())
    1497                 :        8182 :         dump_printf_loc (MSG_NOTE, vect_location,
    1498                 :             :                          "force alignment of %T\n", ref);
    1499                 :             : 
    1500                 :      197063 :       dr_info->base_decl = base;
    1501                 :      197063 :       dr_info->base_misaligned = true;
    1502                 :      197063 :       base_misalignment = 0;
    1503                 :             :     }
    1504                 :      915491 :   poly_int64 misalignment
    1505                 :      915491 :     = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
    1506                 :             : 
    1507                 :      915491 :   unsigned int const_misalignment;
    1508                 :      915491 :   if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
    1509                 :             :     {
    1510                 :             :       if (dump_enabled_p ())
    1511                 :             :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1512                 :             :                          "Non-constant misalignment for access: %T\n", ref);
    1513                 :             :       return;
    1514                 :             :     }
    1515                 :             : 
    1516                 :      915491 :   SET_DR_MISALIGNMENT (dr_info, const_misalignment);
    1517                 :             : 
    1518                 :      915491 :   if (dump_enabled_p ())
    1519                 :       33980 :     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1520                 :             :                      "misalign = %d bytes of ref %T\n",
    1521                 :             :                      const_misalignment, ref);
    1522                 :             : 
    1523                 :             :   return;
    1524                 :             : }
    1525                 :             : 
    1526                 :             : /* Return whether DR_INFO, which is related to DR_PEEL_INFO in
    1527                 :             :    that it only differs in DR_INIT, is aligned if DR_PEEL_INFO
    1528                 :             :    is made aligned via peeling.  */
    1529                 :             : 
    1530                 :             : static bool
    1531                 :     3412737 : vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info,
    1532                 :             :                                          dr_vec_info *dr_peel_info)
    1533                 :             : {
    1534                 :     3412737 :   if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info),
    1535                 :     3413812 :                   DR_TARGET_ALIGNMENT (dr_info)))
    1536                 :             :     {
    1537                 :     3411662 :       poly_offset_int diff
    1538                 :     3411662 :         = (wi::to_poly_offset (DR_INIT (dr_peel_info->dr))
    1539                 :     3411662 :            - wi::to_poly_offset (DR_INIT (dr_info->dr)));
    1540                 :     3411662 :       if (known_eq (diff, 0)
    1541                 :     3411662 :           || multiple_p (diff, DR_TARGET_ALIGNMENT (dr_info)))
    1542                 :     1508764 :         return true;
    1543                 :             :     }
    1544                 :             :   return false;
    1545                 :             : }
    1546                 :             : 
    1547                 :             : /* Return whether DR_INFO is aligned if DR_PEEL_INFO is made
    1548                 :             :    aligned via peeling.  */
    1549                 :             : 
    1550                 :             : static bool
    1551                 :      141373 : vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
    1552                 :             :                                  dr_vec_info *dr_peel_info)
    1553                 :             : {
    1554                 :      141373 :   if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr),
    1555                 :      141373 :                         DR_BASE_ADDRESS (dr_peel_info->dr), 0)
    1556                 :       47318 :       || !operand_equal_p (DR_OFFSET (dr_info->dr),
    1557                 :       47318 :                            DR_OFFSET (dr_peel_info->dr), 0)
    1558                 :      187891 :       || !operand_equal_p (DR_STEP (dr_info->dr),
    1559                 :       46518 :                            DR_STEP (dr_peel_info->dr), 0))
    1560                 :       95198 :     return false;
    1561                 :             : 
    1562                 :       46175 :   return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
    1563                 :             : }
    1564                 :             : 
    1565                 :             : /* Compute the value for dr_info->misalign so that the access appears
    1566                 :             :    aligned.  This is used by peeling to compensate for dr_misalignment
    1567                 :             :    applying the offset for negative step.  */
    1568                 :             : 
    1569                 :             : int
    1570                 :       14991 : vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
    1571                 :             : {
    1572                 :       14991 :   if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= 0)
    1573                 :             :     return 0;
    1574                 :             : 
    1575                 :         144 :   tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
    1576                 :         144 :   poly_int64 misalignment
    1577                 :         144 :     = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
    1578                 :         144 :        * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
    1579                 :             : 
    1580                 :         144 :   unsigned HOST_WIDE_INT target_alignment_c;
    1581                 :         144 :   int misalign;
    1582                 :         144 :   if (!dr_info->target_alignment.is_constant (&target_alignment_c)
    1583                 :         144 :       || !known_misalignment (misalignment, target_alignment_c, &misalign))
    1584                 :             :     return DR_MISALIGNMENT_UNKNOWN;
    1585                 :         144 :   return misalign;
    1586                 :             : }
    1587                 :             : 
    1588                 :             : /* Function vect_update_misalignment_for_peel.
    1589                 :             :    Sets DR_INFO's misalignment
    1590                 :             :    - to 0 if it has the same alignment as DR_PEEL_INFO,
    1591                 :             :    - to the misalignment computed using NPEEL if DR_INFO's salignment is known,
    1592                 :             :    - to -1 (unknown) otherwise.
    1593                 :             : 
    1594                 :             :    DR_INFO - the data reference whose misalignment is to be adjusted.
    1595                 :             :    DR_PEEL_INFO - the data reference whose misalignment is being made
    1596                 :             :                   zero in the vector loop by the peel.
    1597                 :             :    NPEEL - the number of iterations in the peel loop if the misalignment
    1598                 :             :            of DR_PEEL_INFO is known at compile time.  */
    1599                 :             : 
    1600                 :             : static void
    1601                 :         846 : vect_update_misalignment_for_peel (dr_vec_info *dr_info,
    1602                 :             :                                    dr_vec_info *dr_peel_info, int npeel)
    1603                 :             : {
    1604                 :             :   /* If dr_info is aligned of dr_peel_info is, then mark it so.  */
    1605                 :         846 :   if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
    1606                 :             :     {
    1607                 :         408 :       SET_DR_MISALIGNMENT (dr_info,
    1608                 :             :                            vect_dr_misalign_for_aligned_access (dr_peel_info));
    1609                 :         408 :       return;
    1610                 :             :     }
    1611                 :             : 
    1612                 :         438 :   unsigned HOST_WIDE_INT alignment;
    1613                 :         438 :   if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
    1614                 :         438 :       && known_alignment_for_access_p (dr_info,
    1615                 :         438 :                                        STMT_VINFO_VECTYPE (dr_info->stmt))
    1616                 :         193 :       && known_alignment_for_access_p (dr_peel_info,
    1617                 :         193 :                                        STMT_VINFO_VECTYPE (dr_peel_info->stmt)))
    1618                 :             :     {
    1619                 :         185 :       int misal = dr_info->misalignment;
    1620                 :         185 :       misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
    1621                 :         185 :       misal &= alignment - 1;
    1622                 :         185 :       set_dr_misalignment (dr_info, misal);
    1623                 :         185 :       return;
    1624                 :             :     }
    1625                 :             : 
    1626                 :         253 :   if (dump_enabled_p ())
    1627                 :          23 :     dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
    1628                 :             :                      "to unknown (-1).\n");
    1629                 :         253 :   SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
    1630                 :             : }
    1631                 :             : 
    1632                 :             : /* Return true if alignment is relevant for DR_INFO.  */
    1633                 :             : 
    1634                 :             : static bool
    1635                 :     1375317 : vect_relevant_for_alignment_p (dr_vec_info *dr_info)
    1636                 :             : {
    1637                 :     1375317 :   stmt_vec_info stmt_info = dr_info->stmt;
    1638                 :             : 
    1639                 :     1375317 :   if (!STMT_VINFO_RELEVANT_P (stmt_info))
    1640                 :             :     return false;
    1641                 :             : 
    1642                 :             :   /* For interleaving, only the alignment of the first access matters.  */
    1643                 :     1374087 :   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
    1644                 :     1592204 :       && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
    1645                 :             :     return false;
    1646                 :             : 
    1647                 :             :   /* Scatter-gather and invariant accesses continue to address individual
    1648                 :             :      scalars, so vector-level alignment is irrelevant.  */
    1649                 :     1279460 :   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
    1650                 :     1279460 :       || integer_zerop (DR_STEP (dr_info->dr)))
    1651                 :       39667 :     return false;
    1652                 :             : 
    1653                 :             :   /* Strided accesses perform only component accesses, alignment is
    1654                 :             :      irrelevant for them.  */
    1655                 :     1239793 :   if (STMT_VINFO_STRIDED_P (stmt_info)
    1656                 :     1239793 :       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
    1657                 :             :     return false;
    1658                 :             : 
    1659                 :             :   return true;
    1660                 :             : }
    1661                 :             : 
    1662                 :             : /* Given an memory reference EXP return whether its alignment is less
    1663                 :             :    than its size.  */
    1664                 :             : 
    1665                 :             : static bool
    1666                 :     1109542 : not_size_aligned (tree exp)
    1667                 :             : {
    1668                 :     1109542 :   if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
    1669                 :             :     return true;
    1670                 :             : 
    1671                 :     1109542 :   return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
    1672                 :     1109542 :           > get_object_alignment (exp));
    1673                 :             : }
    1674                 :             : 
    1675                 :             : /* Function vector_alignment_reachable_p
    1676                 :             : 
    1677                 :             :    Return true if vector alignment for DR_INFO is reachable by peeling
    1678                 :             :    a few loop iterations.  Return false otherwise.  */
    1679                 :             : 
    1680                 :             : static bool
    1681                 :      471855 : vector_alignment_reachable_p (dr_vec_info *dr_info, poly_uint64 vf)
    1682                 :             : {
    1683                 :      471855 :   stmt_vec_info stmt_info = dr_info->stmt;
    1684                 :      471855 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    1685                 :      471855 :   poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
    1686                 :      943710 :   poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
    1687                 :      471855 :   unsigned elem_size = vector_element_size (vector_size, nelements);
    1688                 :      471855 :   unsigned group_size = 1;
    1689                 :             : 
    1690                 :      471855 :   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    1691                 :             :     {
    1692                 :             :       /* For interleaved access we peel only if number of iterations in
    1693                 :             :          the prolog loop ({VF - misalignment}), is a multiple of the
    1694                 :             :          number of the interleaved accesses.  */
    1695                 :             : 
    1696                 :             :       /* FORNOW: handle only known alignment.  */
    1697                 :       77309 :       if (!known_alignment_for_access_p (dr_info, vectype))
    1698                 :      471855 :         return false;
    1699                 :             : 
    1700                 :       45817 :       unsigned mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
    1701                 :       57183 :       if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
    1702                 :             :         return false;
    1703                 :             : 
    1704                 :       11366 :       group_size = DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
    1705                 :             :     }
    1706                 :             : 
    1707                 :             :   /* If the vectorization factor does not guarantee DR advancement of
    1708                 :             :      a multiple of the target alignment no peeling will help.  */
    1709                 :      405912 :   if (!multiple_p (elem_size * group_size * vf, dr_target_alignment (dr_info)))
    1710                 :          68 :     return false;
    1711                 :             : 
    1712                 :             :   /* If misalignment is known at the compile time then allow peeling
    1713                 :             :      only if natural alignment is reachable through peeling.  */
    1714                 :      405844 :   if (known_alignment_for_access_p (dr_info, vectype)
    1715                 :      640935 :       && !aligned_access_p (dr_info, vectype))
    1716                 :             :     {
    1717                 :       12086 :       HOST_WIDE_INT elmsize =
    1718                 :       12086 :                 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
    1719                 :       12086 :       if (dump_enabled_p ())
    1720                 :             :         {
    1721                 :         781 :           dump_printf_loc (MSG_NOTE, vect_location,
    1722                 :             :                            "data size = %wd. misalignment = %d.\n", elmsize,
    1723                 :         781 :                            dr_misalignment (dr_info, vectype));
    1724                 :             :         }
    1725                 :       12086 :       if (dr_misalignment (dr_info, vectype) % elmsize)
    1726                 :             :         {
    1727                 :          34 :           if (dump_enabled_p ())
    1728                 :           7 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1729                 :             :                              "data size does not divide the misalignment.\n");
    1730                 :          34 :           return false;
    1731                 :             :         }
    1732                 :             :     }
    1733                 :             : 
    1734                 :      405810 :   if (!known_alignment_for_access_p (dr_info, vectype))
    1735                 :             :     {
    1736                 :      170753 :       tree type = TREE_TYPE (DR_REF (dr_info->dr));
    1737                 :      170753 :       bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
    1738                 :      170753 :       if (dump_enabled_p ())
    1739                 :       14011 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    1740                 :             :                          "Unknown misalignment, %snaturally aligned\n",
    1741                 :             :                          is_packed ? "not " : "");
    1742                 :      170753 :       return targetm.vectorize.vector_alignment_reachable (type, is_packed);
    1743                 :             :     }
    1744                 :             : 
    1745                 :             :   return true;
    1746                 :             : }
    1747                 :             : 
    1748                 :             : 
    1749                 :             : /* Calculate the cost of the memory access represented by DR_INFO.  */
    1750                 :             : 
    1751                 :             : static void
    1752                 :      519460 : vect_get_data_access_cost (vec_info *vinfo, dr_vec_info *dr_info,
    1753                 :             :                            dr_alignment_support alignment_support_scheme,
    1754                 :             :                            int misalignment,
    1755                 :             :                            unsigned int *inside_cost,
    1756                 :             :                            unsigned int *outside_cost,
    1757                 :             :                            stmt_vector_for_cost *body_cost_vec,
    1758                 :             :                            stmt_vector_for_cost *prologue_cost_vec)
    1759                 :             : {
    1760                 :      519460 :   stmt_vec_info stmt_info = dr_info->stmt;
    1761                 :      519460 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    1762                 :      519460 :   int ncopies;
    1763                 :             : 
    1764                 :      519460 :   if (PURE_SLP_STMT (stmt_info))
    1765                 :             :     ncopies = 1;
    1766                 :             :   else
    1767                 :       15336 :     ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
    1768                 :             : 
    1769                 :      519460 :   if (DR_IS_READ (dr_info->dr))
    1770                 :      363617 :     vect_get_load_cost (vinfo, stmt_info, NULL, ncopies,
    1771                 :             :                         alignment_support_scheme, misalignment, true,
    1772                 :             :                         inside_cost, outside_cost, prologue_cost_vec,
    1773                 :             :                         body_cost_vec, false);
    1774                 :             :   else
    1775                 :      155843 :     vect_get_store_cost (vinfo,stmt_info, NULL, ncopies,
    1776                 :             :                          alignment_support_scheme, misalignment, inside_cost,
    1777                 :             :                          body_cost_vec);
    1778                 :             : 
    1779                 :      519460 :   if (dump_enabled_p ())
    1780                 :       26626 :     dump_printf_loc (MSG_NOTE, vect_location,
    1781                 :             :                      "vect_get_data_access_cost: inside_cost = %d, "
    1782                 :             :                      "outside_cost = %d.\n", *inside_cost, *outside_cost);
    1783                 :      519460 : }
    1784                 :             : 
    1785                 :             : 
    1786                 :             : typedef struct _vect_peel_info
    1787                 :             : {
    1788                 :             :   dr_vec_info *dr_info;
    1789                 :             :   int npeel;
    1790                 :             :   unsigned int count;
    1791                 :             : } *vect_peel_info;
    1792                 :             : 
    1793                 :             : typedef struct _vect_peel_extended_info
    1794                 :             : {
    1795                 :             :   vec_info *vinfo;
    1796                 :             :   struct _vect_peel_info peel_info;
    1797                 :             :   unsigned int inside_cost;
    1798                 :             :   unsigned int outside_cost;
    1799                 :             : } *vect_peel_extended_info;
    1800                 :             : 
    1801                 :             : 
    1802                 :             : /* Peeling hashtable helpers.  */
    1803                 :             : 
    1804                 :             : struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
    1805                 :             : {
    1806                 :             :   static inline hashval_t hash (const _vect_peel_info *);
    1807                 :             :   static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
    1808                 :             : };
    1809                 :             : 
    1810                 :             : inline hashval_t
    1811                 :      683829 : peel_info_hasher::hash (const _vect_peel_info *peel_info)
    1812                 :             : {
    1813                 :      683829 :   return (hashval_t) peel_info->npeel;
    1814                 :             : }
    1815                 :             : 
    1816                 :             : inline bool
    1817                 :      385893 : peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
    1818                 :             : {
    1819                 :      385893 :   return (a->npeel == b->npeel);
    1820                 :             : }
    1821                 :             : 
    1822                 :             : 
    1823                 :             : /* Insert DR_INFO into peeling hash table with NPEEL as key.  */
    1824                 :             : 
    1825                 :             : static void
    1826                 :      298676 : vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
    1827                 :             :                           loop_vec_info loop_vinfo, dr_vec_info *dr_info,
    1828                 :             :                           int npeel, bool supportable_if_not_aligned)
    1829                 :             : {
    1830                 :      298676 :   struct _vect_peel_info elem, *slot;
    1831                 :      298676 :   _vect_peel_info **new_slot;
    1832                 :             : 
    1833                 :      298676 :   elem.npeel = npeel;
    1834                 :      298676 :   slot = peeling_htab->find (&elem);
    1835                 :      298676 :   if (slot)
    1836                 :      130657 :     slot->count++;
    1837                 :             :   else
    1838                 :             :     {
    1839                 :      168019 :       slot = XNEW (struct _vect_peel_info);
    1840                 :      168019 :       slot->npeel = npeel;
    1841                 :      168019 :       slot->dr_info = dr_info;
    1842                 :      168019 :       slot->count = 1;
    1843                 :      168019 :       new_slot = peeling_htab->find_slot (slot, INSERT);
    1844                 :      168019 :       *new_slot = slot;
    1845                 :             :     }
    1846                 :             : 
    1847                 :             :   /* If this DR is not supported with unknown misalignment then bias
    1848                 :             :      this slot when the cost model is disabled.  */
    1849                 :      298676 :   if (!supportable_if_not_aligned
    1850                 :      298676 :       && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
    1851                 :        4912 :     slot->count += VECT_MAX_COST;
    1852                 :      298676 : }
    1853                 :             : 
    1854                 :             : 
    1855                 :             : /* Traverse peeling hash table to find peeling option that aligns maximum
    1856                 :             :    number of data accesses.  */
    1857                 :             : 
    1858                 :             : int
    1859                 :       38108 : vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
    1860                 :             :                                      _vect_peel_extended_info *max)
    1861                 :             : {
    1862                 :       38108 :   vect_peel_info elem = *slot;
    1863                 :             : 
    1864                 :       38108 :   if (elem->count > max->peel_info.count
    1865                 :       23546 :       || (elem->count == max->peel_info.count
    1866                 :       18571 :           && max->peel_info.npeel > elem->npeel))
    1867                 :             :     {
    1868                 :       14574 :       max->peel_info.npeel = elem->npeel;
    1869                 :       14574 :       max->peel_info.count = elem->count;
    1870                 :       14574 :       max->peel_info.dr_info = elem->dr_info;
    1871                 :             :     }
    1872                 :             : 
    1873                 :       38108 :   return 1;
    1874                 :             : }
    1875                 :             : 
    1876                 :             : /* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking
    1877                 :             :    data access costs for all data refs.  If UNKNOWN_MISALIGNMENT is true,
    1878                 :             :    npeel is computed at runtime but DR0_INFO's misalignment will be zero
    1879                 :             :    after peeling.  */
    1880                 :             : 
    1881                 :             : static void
    1882                 :      290533 : vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
    1883                 :             :                                 dr_vec_info *dr0_info,
    1884                 :             :                                 unsigned int *inside_cost,
    1885                 :             :                                 unsigned int *outside_cost,
    1886                 :             :                                 stmt_vector_for_cost *body_cost_vec,
    1887                 :             :                                 stmt_vector_for_cost *prologue_cost_vec,
    1888                 :             :                                 unsigned int npeel)
    1889                 :             : {
    1890                 :      290533 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    1891                 :             : 
    1892                 :      290533 :   bool dr0_alignment_known_p
    1893                 :             :     = (dr0_info
    1894                 :      531458 :        && known_alignment_for_access_p (dr0_info,
    1895                 :      240925 :                                         STMT_VINFO_VECTYPE (dr0_info->stmt)));
    1896                 :             : 
    1897                 :     1425025 :   for (data_reference *dr : datarefs)
    1898                 :             :     {
    1899                 :      553426 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    1900                 :      553426 :       if (!vect_relevant_for_alignment_p (dr_info))
    1901                 :       33966 :         continue;
    1902                 :             : 
    1903                 :      519460 :       tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
    1904                 :      519460 :       dr_alignment_support alignment_support_scheme;
    1905                 :      519460 :       int misalignment;
    1906                 :      519460 :       unsigned HOST_WIDE_INT alignment;
    1907                 :             : 
    1908                 :      519460 :       bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
    1909                 :      519460 :                                             size_zero_node) < 0;
    1910                 :      519460 :       poly_int64 off = 0;
    1911                 :      519460 :       if (negative)
    1912                 :       19264 :         off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
    1913                 :       19264 :                * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
    1914                 :             : 
    1915                 :      519460 :       if (npeel == 0)
    1916                 :      262500 :         misalignment = dr_misalignment (dr_info, vectype, off);
    1917                 :      256960 :       else if (dr_info == dr0_info
    1918                 :      256960 :                || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
    1919                 :             :         misalignment = 0;
    1920                 :       81036 :       else if (!dr0_alignment_known_p
    1921                 :        6639 :                || !known_alignment_for_access_p (dr_info, vectype)
    1922                 :       87675 :                || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
    1923                 :             :         misalignment = DR_MISALIGNMENT_UNKNOWN;
    1924                 :             :       else
    1925                 :             :         {
    1926                 :        5733 :           misalignment = dr_misalignment (dr_info, vectype, off);
    1927                 :        5733 :           misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
    1928                 :        5733 :           misalignment &= alignment - 1;
    1929                 :             :         }
    1930                 :      519460 :       alignment_support_scheme
    1931                 :      519460 :         = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
    1932                 :             :                                          misalignment);
    1933                 :             : 
    1934                 :      519460 :       vect_get_data_access_cost (loop_vinfo, dr_info,
    1935                 :             :                                  alignment_support_scheme, misalignment,
    1936                 :             :                                  inside_cost, outside_cost,
    1937                 :             :                                  body_cost_vec, prologue_cost_vec);
    1938                 :             :     }
    1939                 :      290533 : }
    1940                 :             : 
    1941                 :             : /* Traverse peeling hash table and calculate cost for each peeling option.
    1942                 :             :    Find the one with the lowest cost.  */
    1943                 :             : 
    1944                 :             : int
    1945                 :      109995 : vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
    1946                 :             :                                    _vect_peel_extended_info *min)
    1947                 :             : {
    1948                 :      109995 :   vect_peel_info elem = *slot;
    1949                 :      109995 :   int dummy;
    1950                 :      109995 :   unsigned int inside_cost = 0, outside_cost = 0;
    1951                 :      109995 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (min->vinfo);
    1952                 :      109995 :   stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
    1953                 :             :                        epilogue_cost_vec;
    1954                 :             : 
    1955                 :      109995 :   prologue_cost_vec.create (2);
    1956                 :      109995 :   body_cost_vec.create (2);
    1957                 :      109995 :   epilogue_cost_vec.create (2);
    1958                 :             : 
    1959                 :      109995 :   vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost,
    1960                 :             :                                   &outside_cost, &body_cost_vec,
    1961                 :      109995 :                                   &prologue_cost_vec, elem->npeel);
    1962                 :             : 
    1963                 :      109995 :   body_cost_vec.release ();
    1964                 :             : 
    1965                 :      219990 :   outside_cost += vect_get_known_peeling_cost
    1966                 :      109995 :     (loop_vinfo, elem->npeel, &dummy,
    1967                 :             :      &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
    1968                 :             :      &prologue_cost_vec, &epilogue_cost_vec);
    1969                 :             : 
    1970                 :             :   /* Prologue and epilogue costs are added to the target model later.
    1971                 :             :      These costs depend only on the scalar iteration cost, the
    1972                 :             :      number of peeling iterations finally chosen, and the number of
    1973                 :             :      misaligned statements.  So discard the information found here.  */
    1974                 :      109995 :   prologue_cost_vec.release ();
    1975                 :      109995 :   epilogue_cost_vec.release ();
    1976                 :             : 
    1977                 :      109995 :   if (inside_cost < min->inside_cost
    1978                 :        1096 :       || (inside_cost == min->inside_cost
    1979                 :         914 :           && outside_cost < min->outside_cost))
    1980                 :             :     {
    1981                 :      108905 :       min->inside_cost = inside_cost;
    1982                 :      108905 :       min->outside_cost = outside_cost;
    1983                 :      108905 :       min->peel_info.dr_info = elem->dr_info;
    1984                 :      108905 :       min->peel_info.npeel = elem->npeel;
    1985                 :      108905 :       min->peel_info.count = elem->count;
    1986                 :             :     }
    1987                 :             : 
    1988                 :      109995 :   return 1;
    1989                 :             : }
    1990                 :             : 
    1991                 :             : 
    1992                 :             : /* Choose best peeling option by traversing peeling hash table and either
    1993                 :             :    choosing an option with the lowest cost (if cost model is enabled) or the
    1994                 :             :    option that aligns as many accesses as possible.  */
    1995                 :             : 
    1996                 :             : static struct _vect_peel_extended_info
    1997                 :      122092 : vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
    1998                 :             :                                        loop_vec_info loop_vinfo)
    1999                 :             : {
    2000                 :      122092 :    struct _vect_peel_extended_info res;
    2001                 :             : 
    2002                 :      122092 :    res.peel_info.dr_info = NULL;
    2003                 :      122092 :    res.vinfo = loop_vinfo;
    2004                 :             : 
    2005                 :      122092 :    if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
    2006                 :             :      {
    2007                 :      107581 :        res.inside_cost = INT_MAX;
    2008                 :      107581 :        res.outside_cost = INT_MAX;
    2009                 :      107581 :        peeling_htab->traverse <_vect_peel_extended_info *,
    2010                 :      217576 :                                vect_peeling_hash_get_lowest_cost> (&res);
    2011                 :             :      }
    2012                 :             :    else
    2013                 :             :      {
    2014                 :       14511 :        res.peel_info.count = 0;
    2015                 :       14511 :        peeling_htab->traverse <_vect_peel_extended_info *,
    2016                 :       52619 :                                vect_peeling_hash_get_most_frequent> (&res);
    2017                 :       14511 :        res.inside_cost = 0;
    2018                 :       14511 :        res.outside_cost = 0;
    2019                 :             :      }
    2020                 :             : 
    2021                 :      122092 :    return res;
    2022                 :             : }
    2023                 :             : 
    2024                 :             : /* Return true if the new peeling NPEEL is supported.  */
    2025                 :             : 
    2026                 :             : static bool
    2027                 :       51420 : vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
    2028                 :             :                           unsigned npeel)
    2029                 :             : {
    2030                 :       51420 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    2031                 :       51420 :   enum dr_alignment_support supportable_dr_alignment;
    2032                 :             : 
    2033                 :       51420 :   bool dr0_alignment_known_p
    2034                 :      102840 :     = known_alignment_for_access_p (dr0_info,
    2035                 :       51420 :                                     STMT_VINFO_VECTYPE (dr0_info->stmt));
    2036                 :             : 
    2037                 :             :   /* Ensure that all data refs can be vectorized after the peel.  */
    2038                 :      202224 :   for (data_reference *dr : datarefs)
    2039                 :             :     {
    2040                 :       67932 :       if (dr == dr0_info->dr)
    2041                 :       42474 :         continue;
    2042                 :             : 
    2043                 :       25458 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2044                 :       25458 :       if (!vect_relevant_for_alignment_p (dr_info)
    2045                 :       25458 :           || vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
    2046                 :        4716 :         continue;
    2047                 :             : 
    2048                 :       20742 :       tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
    2049                 :       20742 :       int misalignment;
    2050                 :       20742 :       unsigned HOST_WIDE_INT alignment;
    2051                 :       20742 :       if (!dr0_alignment_known_p
    2052                 :        1733 :           || !known_alignment_for_access_p (dr_info, vectype)
    2053                 :       22475 :           || !DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment))
    2054                 :             :         misalignment = DR_MISALIGNMENT_UNKNOWN;
    2055                 :             :       else
    2056                 :             :         {
    2057                 :        1719 :           misalignment = dr_misalignment (dr_info, vectype);
    2058                 :        1719 :           misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
    2059                 :        1719 :           misalignment &= alignment - 1;
    2060                 :             :         }
    2061                 :       20742 :       supportable_dr_alignment
    2062                 :       20742 :         = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
    2063                 :             :                                          misalignment);
    2064                 :       20742 :       if (supportable_dr_alignment == dr_unaligned_unsupported)
    2065                 :       51420 :         return false;
    2066                 :             :     }
    2067                 :             : 
    2068                 :             :   return true;
    2069                 :             : }
    2070                 :             : 
    2071                 :             : /* Compare two data-references DRA and DRB to group them into chunks
    2072                 :             :    with related alignment.  */
    2073                 :             : 
    2074                 :             : static int
    2075                 :     3968563 : dr_align_group_sort_cmp (const void *dra_, const void *drb_)
    2076                 :             : {
    2077                 :     3968563 :   data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
    2078                 :     3968563 :   data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
    2079                 :     3968563 :   int cmp;
    2080                 :             : 
    2081                 :             :   /* Stabilize sort.  */
    2082                 :     3968563 :   if (dra == drb)
    2083                 :             :     return 0;
    2084                 :             : 
    2085                 :             :   /* Ordering of DRs according to base.  */
    2086                 :     3968563 :   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
    2087                 :             :                                DR_BASE_ADDRESS (drb));
    2088                 :     3968563 :   if (cmp != 0)
    2089                 :             :     return cmp;
    2090                 :             : 
    2091                 :             :   /* And according to DR_OFFSET.  */
    2092                 :     1954109 :   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
    2093                 :     1954109 :   if (cmp != 0)
    2094                 :             :     return cmp;
    2095                 :             : 
    2096                 :             :   /* And after step.  */
    2097                 :     1941063 :   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
    2098                 :     1941063 :   if (cmp != 0)
    2099                 :             :     return cmp;
    2100                 :             : 
    2101                 :             :   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
    2102                 :     1936780 :   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
    2103                 :     1936780 :   if (cmp == 0)
    2104                 :      209814 :     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
    2105                 :             :   return cmp;
    2106                 :             : }
    2107                 :             : 
    2108                 :             : /* Function vect_enhance_data_refs_alignment
    2109                 :             : 
    2110                 :             :    This pass will use loop versioning and loop peeling in order to enhance
    2111                 :             :    the alignment of data references in the loop.
    2112                 :             : 
    2113                 :             :    FOR NOW: we assume that whatever versioning/peeling takes place, only the
    2114                 :             :    original loop is to be vectorized.  Any other loops that are created by
    2115                 :             :    the transformations performed in this pass - are not supposed to be
    2116                 :             :    vectorized.  This restriction will be relaxed.
    2117                 :             : 
    2118                 :             :    This pass will require a cost model to guide it whether to apply peeling
    2119                 :             :    or versioning or a combination of the two.  For example, the scheme that
    2120                 :             :    intel uses when given a loop with several memory accesses, is as follows:
    2121                 :             :    choose one memory access ('p') which alignment you want to force by doing
    2122                 :             :    peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
    2123                 :             :    other accesses are not necessarily aligned, or (2) use loop versioning to
    2124                 :             :    generate one loop in which all accesses are aligned, and another loop in
    2125                 :             :    which only 'p' is necessarily aligned.
    2126                 :             : 
    2127                 :             :    ("Automatic Intra-Register Vectorization for the Intel Architecture",
    2128                 :             :    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
    2129                 :             :    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
    2130                 :             : 
    2131                 :             :    Devising a cost model is the most critical aspect of this work.  It will
    2132                 :             :    guide us on which access to peel for, whether to use loop versioning, how
    2133                 :             :    many versions to create, etc.  The cost model will probably consist of
    2134                 :             :    generic considerations as well as target specific considerations (on
    2135                 :             :    powerpc for example, misaligned stores are more painful than misaligned
    2136                 :             :    loads).
    2137                 :             : 
    2138                 :             :    Here are the general steps involved in alignment enhancements:
    2139                 :             : 
    2140                 :             :      -- original loop, before alignment analysis:
    2141                 :             :         for (i=0; i<N; i++){
    2142                 :             :           x = q[i];                     # DR_MISALIGNMENT(q) = unknown
    2143                 :             :           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
    2144                 :             :         }
    2145                 :             : 
    2146                 :             :      -- After vect_compute_data_refs_alignment:
    2147                 :             :         for (i=0; i<N; i++){
    2148                 :             :           x = q[i];                     # DR_MISALIGNMENT(q) = 3
    2149                 :             :           p[i] = y;                     # DR_MISALIGNMENT(p) = unknown
    2150                 :             :         }
    2151                 :             : 
    2152                 :             :      -- Possibility 1: we do loop versioning:
    2153                 :             :      if (p is aligned) {
    2154                 :             :         for (i=0; i<N; i++){ # loop 1A
    2155                 :             :           x = q[i];                     # DR_MISALIGNMENT(q) = 3
    2156                 :             :           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
    2157                 :             :         }
    2158                 :             :      }
    2159                 :             :      else {
    2160                 :             :         for (i=0; i<N; i++){ # loop 1B
    2161                 :             :           x = q[i];                     # DR_MISALIGNMENT(q) = 3
    2162                 :             :           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
    2163                 :             :         }
    2164                 :             :      }
    2165                 :             : 
    2166                 :             :      -- Possibility 2: we do loop peeling:
    2167                 :             :      for (i = 0; i < 3; i++){        # (scalar loop, not to be vectorized).
    2168                 :             :         x = q[i];
    2169                 :             :         p[i] = y;
    2170                 :             :      }
    2171                 :             :      for (i = 3; i < N; i++){        # loop 2A
    2172                 :             :         x = q[i];                       # DR_MISALIGNMENT(q) = 0
    2173                 :             :         p[i] = y;                       # DR_MISALIGNMENT(p) = unknown
    2174                 :             :      }
    2175                 :             : 
    2176                 :             :      -- Possibility 3: combination of loop peeling and versioning:
    2177                 :             :      for (i = 0; i < 3; i++){        # (scalar loop, not to be vectorized).
    2178                 :             :         x = q[i];
    2179                 :             :         p[i] = y;
    2180                 :             :      }
    2181                 :             :      if (p is aligned) {
    2182                 :             :         for (i = 3; i<N; i++){       # loop 3A
    2183                 :             :           x = q[i];                     # DR_MISALIGNMENT(q) = 0
    2184                 :             :           p[i] = y;                     # DR_MISALIGNMENT(p) = 0
    2185                 :             :         }
    2186                 :             :      }
    2187                 :             :      else {
    2188                 :             :         for (i = 3; i<N; i++){       # loop 3B
    2189                 :             :           x = q[i];                     # DR_MISALIGNMENT(q) = 0
    2190                 :             :           p[i] = y;                     # DR_MISALIGNMENT(p) = unaligned
    2191                 :             :         }
    2192                 :             :      }
    2193                 :             : 
    2194                 :             :      These loops are later passed to loop_transform to be vectorized.  The
    2195                 :             :      vectorizer will use the alignment information to guide the transformation
    2196                 :             :      (whether to generate regular loads/stores, or with special handling for
    2197                 :             :      misalignment).  */
    2198                 :             : 
    2199                 :             : opt_result
    2200                 :      302441 : vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
    2201                 :             : {
    2202                 :      302441 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    2203                 :      302441 :   dr_vec_info *first_store = NULL;
    2204                 :      302441 :   dr_vec_info *dr0_info = NULL;
    2205                 :      302441 :   struct data_reference *dr;
    2206                 :      302441 :   unsigned int i;
    2207                 :      302441 :   bool do_peeling = false;
    2208                 :      302441 :   bool do_versioning = false;
    2209                 :      302441 :   unsigned int npeel = 0;
    2210                 :      302441 :   bool one_misalignment_known = false;
    2211                 :      302441 :   bool one_misalignment_unknown = false;
    2212                 :      302441 :   bool one_dr_unsupportable = false;
    2213                 :      302441 :   dr_vec_info *unsupportable_dr_info = NULL;
    2214                 :      302441 :   unsigned int dr0_same_align_drs = 0, first_store_same_align_drs = 0;
    2215                 :      302441 :   hash_table<peel_info_hasher> peeling_htab (1);
    2216                 :             : 
    2217                 :      302441 :   DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
    2218                 :             : 
    2219                 :             :   /* Reset data so we can safely be called multiple times.  */
    2220                 :      302441 :   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
    2221                 :      302441 :   LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
    2222                 :             : 
    2223                 :      302441 :   if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ())
    2224                 :       12618 :     return opt_result::success ();
    2225                 :             : 
    2226                 :             :   /* Sort the vector of datarefs so DRs that have the same or dependent
    2227                 :             :      alignment are next to each other.  */
    2228                 :      289823 :   auto_vec<data_reference_p> datarefs
    2229                 :      289823 :     = LOOP_VINFO_DATAREFS (loop_vinfo).copy ();
    2230                 :      289823 :   datarefs.qsort (dr_align_group_sort_cmp);
    2231                 :             : 
    2232                 :             :   /* Compute the number of DRs that become aligned when we peel
    2233                 :             :      a dataref so it becomes aligned.  */
    2234                 :      579646 :   auto_vec<unsigned> n_same_align_refs (datarefs.length ());
    2235                 :      289823 :   n_same_align_refs.quick_grow_cleared (datarefs.length ());
    2236                 :      289823 :   unsigned i0;
    2237                 :      594617 :   for (i0 = 0; i0 < datarefs.length (); ++i0)
    2238                 :      301580 :     if (DR_BASE_ADDRESS (datarefs[i0]))
    2239                 :             :       break;
    2240                 :     1928048 :   for (i = i0 + 1; i <= datarefs.length (); ++i)
    2241                 :             :     {
    2242                 :      674201 :       if (i == datarefs.length ()
    2243                 :      387592 :           || !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0]),
    2244                 :      387592 :                                DR_BASE_ADDRESS (datarefs[i]), 0)
    2245                 :      197487 :           || !operand_equal_p (DR_OFFSET (datarefs[i0]),
    2246                 :      197487 :                                DR_OFFSET (datarefs[i]), 0)
    2247                 :      870456 :           || !operand_equal_p (DR_STEP (datarefs[i0]),
    2248                 :      196255 :                                DR_STEP (datarefs[i]), 0))
    2249                 :             :         {
    2250                 :             :           /* The subgroup [i0, i-1] now only differs in DR_INIT and
    2251                 :             :              possibly DR_TARGET_ALIGNMENT.  Still the whole subgroup
    2252                 :             :              will get known misalignment if we align one of the refs
    2253                 :             :              with the largest DR_TARGET_ALIGNMENT.  */
    2254                 :     1152602 :           for (unsigned j = i0; j < i; ++j)
    2255                 :             :             {
    2256                 :      674201 :               dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs[j]);
    2257                 :     4714964 :               for (unsigned k = i0; k < i; ++k)
    2258                 :             :                 {
    2259                 :     4040763 :                   if (k == j)
    2260                 :      674201 :                     continue;
    2261                 :     3366562 :                   dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs[k]);
    2262                 :     3366562 :                   if (vect_dr_aligned_if_related_peeled_dr_is (dr_infok,
    2263                 :             :                                                                dr_infoj))
    2264                 :     1469607 :                     n_same_align_refs[j]++;
    2265                 :             :                 }
    2266                 :             :             }
    2267                 :             :           i0 = i;
    2268                 :             :         }
    2269                 :             :     }
    2270                 :             : 
    2271                 :             :   /* While cost model enhancements are expected in the future, the high level
    2272                 :             :      view of the code at this time is as follows:
    2273                 :             : 
    2274                 :             :      A) If there is a misaligned access then see if peeling to align
    2275                 :             :         this access can make all data references satisfy
    2276                 :             :         vect_supportable_dr_alignment.  If so, update data structures
    2277                 :             :         as needed and return true.
    2278                 :             : 
    2279                 :             :      B) If peeling wasn't possible and there is a data reference with an
    2280                 :             :         unknown misalignment that does not satisfy vect_supportable_dr_alignment
    2281                 :             :         then see if loop versioning checks can be used to make all data
    2282                 :             :         references satisfy vect_supportable_dr_alignment.  If so, update
    2283                 :             :         data structures as needed and return true.
    2284                 :             : 
    2285                 :             :      C) If neither peeling nor versioning were successful then return false if
    2286                 :             :         any data reference does not satisfy vect_supportable_dr_alignment.
    2287                 :             : 
    2288                 :             :      D) Return true (all data references satisfy vect_supportable_dr_alignment).
    2289                 :             : 
    2290                 :             :      Note, Possibility 3 above (which is peeling and versioning together) is not
    2291                 :             :      being done at this time.  */
    2292                 :             : 
    2293                 :             :   /* (1) Peeling to force alignment.  */
    2294                 :             : 
    2295                 :             :   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
    2296                 :             :      Considerations:
    2297                 :             :      + How many accesses will become aligned due to the peeling
    2298                 :             :      - How many accesses will become unaligned due to the peeling,
    2299                 :             :        and the cost of misaligned accesses.
    2300                 :             :      - The cost of peeling (the extra runtime checks, the increase
    2301                 :             :        in code size).  */
    2302                 :             : 
    2303                 :      824334 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    2304                 :             :     {
    2305                 :      575607 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2306                 :      575607 :       if (!vect_relevant_for_alignment_p (dr_info))
    2307                 :      103752 :         continue;
    2308                 :             : 
    2309                 :      471855 :       stmt_vec_info stmt_info = dr_info->stmt;
    2310                 :      471855 :       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2311                 :      471855 :       do_peeling
    2312                 :      471855 :         = vector_alignment_reachable_p (dr_info,
    2313                 :             :                                         LOOP_VINFO_VECT_FACTOR (loop_vinfo));
    2314                 :      471855 :       if (do_peeling)
    2315                 :             :         {
    2316                 :      403835 :           if (known_alignment_for_access_p (dr_info, vectype))
    2317                 :             :             {
    2318                 :      235057 :               unsigned int npeel_tmp = 0;
    2319                 :      235057 :               bool negative = tree_int_cst_compare (DR_STEP (dr),
    2320                 :      235057 :                                                     size_zero_node) < 0;
    2321                 :             : 
    2322                 :             :               /* If known_alignment_for_access_p then we have set
    2323                 :             :                  DR_MISALIGNMENT which is only done if we know it at compiler
    2324                 :             :                  time, so it is safe to assume target alignment is constant.
    2325                 :             :                */
    2326                 :      235057 :               unsigned int target_align =
    2327                 :      235057 :                 DR_TARGET_ALIGNMENT (dr_info).to_constant ();
    2328                 :      235057 :               unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
    2329                 :      235057 :               poly_int64 off = 0;
    2330                 :      235057 :               if (negative)
    2331                 :        1998 :                 off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
    2332                 :      235057 :               unsigned int mis = dr_misalignment (dr_info, vectype, off);
    2333                 :      235057 :               mis = negative ? mis : -mis;
    2334                 :      235057 :               if (mis != 0)
    2335                 :       11296 :                 npeel_tmp = (mis & (target_align - 1)) / dr_size;
    2336                 :             : 
    2337                 :             :               /* For multiple types, it is possible that the bigger type access
    2338                 :             :                  will have more than one peeling option.  E.g., a loop with two
    2339                 :             :                  types: one of size (vector size / 4), and the other one of
    2340                 :             :                  size (vector size / 8).  Vectorization factor will 8.  If both
    2341                 :             :                  accesses are misaligned by 3, the first one needs one scalar
    2342                 :             :                  iteration to be aligned, and the second one needs 5.  But the
    2343                 :             :                  first one will be aligned also by peeling 5 scalar
    2344                 :             :                  iterations, and in that case both accesses will be aligned.
    2345                 :             :                  Hence, except for the immediate peeling amount, we also want
    2346                 :             :                  to try to add full vector size, while we don't exceed
    2347                 :             :                  vectorization factor.
    2348                 :             :                  We do this automatically for cost model, since we calculate
    2349                 :             :                  cost for every peeling option.  */
    2350                 :      235057 :               poly_uint64 nscalars = npeel_tmp;
    2351                 :      235057 :               if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
    2352                 :             :                 {
    2353                 :       41477 :                   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    2354                 :       41477 :                   unsigned group_size = 1;
    2355                 :       41477 :                   if (STMT_SLP_TYPE (stmt_info)
    2356                 :       41477 :                       && STMT_VINFO_GROUPED_ACCESS (stmt_info))
    2357                 :        1890 :                     group_size = DR_GROUP_SIZE (stmt_info);
    2358                 :       41477 :                   nscalars = vf * group_size;
    2359                 :             :                 }
    2360                 :             : 
    2361                 :             :               /* Save info about DR in the hash table.  Also include peeling
    2362                 :             :                  amounts according to the explanation above.  Indicate
    2363                 :             :                  the alignment status when the ref is not aligned.
    2364                 :             :                  ???  Rather than using unknown alignment here we should
    2365                 :             :                  prune all entries from the peeling hashtable which cause
    2366                 :             :                  DRs to be not supported.  */
    2367                 :      235057 :               bool supportable_if_not_aligned
    2368                 :             :                 = vect_supportable_dr_alignment
    2369                 :      235057 :                     (loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN);
    2370                 :      533733 :               while (known_le (npeel_tmp, nscalars))
    2371                 :             :                 {
    2372                 :      298676 :                   vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
    2373                 :             :                                             dr_info, npeel_tmp,
    2374                 :             :                                             supportable_if_not_aligned);
    2375                 :      298676 :                   npeel_tmp += MAX (1, target_align / dr_size);
    2376                 :             :                 }
    2377                 :             : 
    2378                 :      235057 :               one_misalignment_known = true;
    2379                 :             :             }
    2380                 :             :           else
    2381                 :             :             {
    2382                 :             :               /* If we don't know any misalignment values, we prefer
    2383                 :             :                  peeling for data-ref that has the maximum number of data-refs
    2384                 :             :                  with the same alignment, unless the target prefers to align
    2385                 :             :                  stores over load.  */
    2386                 :      168778 :               unsigned same_align_drs = n_same_align_refs[i];
    2387                 :      168778 :               if (!dr0_info
    2388                 :      168778 :                   || dr0_same_align_drs < same_align_drs)
    2389                 :             :                 {
    2390                 :             :                   dr0_same_align_drs = same_align_drs;
    2391                 :             :                   dr0_info = dr_info;
    2392                 :             :                 }
    2393                 :             :               /* For data-refs with the same number of related
    2394                 :             :                  accesses prefer the one where the misalign
    2395                 :             :                  computation will be invariant in the outermost loop.  */
    2396                 :       57458 :               else if (dr0_same_align_drs == same_align_drs)
    2397                 :             :                 {
    2398                 :       56543 :                   class loop *ivloop0, *ivloop;
    2399                 :       56543 :                   ivloop0 = outermost_invariant_loop_for_expr
    2400                 :       56543 :                     (loop, DR_BASE_ADDRESS (dr0_info->dr));
    2401                 :       56543 :                   ivloop = outermost_invariant_loop_for_expr
    2402                 :       56543 :                     (loop, DR_BASE_ADDRESS (dr));
    2403                 :       56543 :                   if ((ivloop && !ivloop0)
    2404                 :       56543 :                       || (ivloop && ivloop0
    2405                 :       56537 :                           && flow_loop_nested_p (ivloop, ivloop0)))
    2406                 :             :                     dr0_info = dr_info;
    2407                 :             :                 }
    2408                 :             : 
    2409                 :      168778 :               one_misalignment_unknown = true;
    2410                 :             : 
    2411                 :             :               /* Check for data refs with unsupportable alignment that
    2412                 :             :                  can be peeled.  */
    2413                 :      168778 :               enum dr_alignment_support supportable_dr_alignment
    2414                 :      168778 :                 = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
    2415                 :             :                                                  DR_MISALIGNMENT_UNKNOWN);
    2416                 :      168778 :               if (supportable_dr_alignment == dr_unaligned_unsupported)
    2417                 :             :                 {
    2418                 :       65926 :                   one_dr_unsupportable = true;
    2419                 :       65926 :                   unsupportable_dr_info = dr_info;
    2420                 :             :                 }
    2421                 :             : 
    2422                 :      168778 :               if (!first_store && DR_IS_WRITE (dr))
    2423                 :             :                 {
    2424                 :       42361 :                   first_store = dr_info;
    2425                 :       42361 :                   first_store_same_align_drs = same_align_drs;
    2426                 :             :                 }
    2427                 :             :             }
    2428                 :             :         }
    2429                 :             :       else
    2430                 :             :         {
    2431                 :       68020 :           if (!aligned_access_p (dr_info, vectype))
    2432                 :             :             {
    2433                 :       41096 :               if (dump_enabled_p ())
    2434                 :        1939 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    2435                 :             :                                  "vector alignment may not be reachable\n");
    2436                 :             :               break;
    2437                 :             :             }
    2438                 :             :         }
    2439                 :             :     }
    2440                 :             : 
    2441                 :             :   /* Check if we can possibly peel the loop.  */
    2442                 :      289823 :   if (!vect_can_advance_ivs_p (loop_vinfo)
    2443                 :      283334 :       || !slpeel_can_duplicate_loop_p (loop, LOOP_VINFO_IV_EXIT (loop_vinfo),
    2444                 :      283334 :                                        loop_preheader_edge (loop))
    2445                 :      283334 :       || loop->inner
    2446                 :             :       /* We don't currently maintaing the LCSSA for prologue peeled inversed
    2447                 :             :          loops.  */
    2448                 :      571556 :       || LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    2449                 :             :     do_peeling = false;
    2450                 :             : 
    2451                 :      289823 :   struct _vect_peel_extended_info peel_for_known_alignment;
    2452                 :      289823 :   struct _vect_peel_extended_info peel_for_unknown_alignment;
    2453                 :      289823 :   struct _vect_peel_extended_info best_peel;
    2454                 :             : 
    2455                 :      289823 :   peel_for_unknown_alignment.inside_cost = INT_MAX;
    2456                 :      289823 :   peel_for_unknown_alignment.outside_cost = INT_MAX;
    2457                 :      289823 :   peel_for_unknown_alignment.peel_info.count = 0;
    2458                 :             : 
    2459                 :      289823 :   if (do_peeling
    2460                 :      289823 :       && one_misalignment_unknown)
    2461                 :             :     {
    2462                 :             :       /* Check if the target requires to prefer stores over loads, i.e., if
    2463                 :             :          misaligned stores are more expensive than misaligned loads (taking
    2464                 :             :          drs with same alignment into account).  */
    2465                 :       96714 :       unsigned int load_inside_cost = 0;
    2466                 :       96714 :       unsigned int load_outside_cost = 0;
    2467                 :       96714 :       unsigned int store_inside_cost = 0;
    2468                 :       96714 :       unsigned int store_outside_cost = 0;
    2469                 :       96714 :       unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
    2470                 :             : 
    2471                 :       96714 :       stmt_vector_for_cost dummy;
    2472                 :       96714 :       dummy.create (2);
    2473                 :       96714 :       vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
    2474                 :             :                                       &load_inside_cost,
    2475                 :             :                                       &load_outside_cost,
    2476                 :             :                                       &dummy, &dummy, estimated_npeels);
    2477                 :       96714 :       dummy.release ();
    2478                 :             : 
    2479                 :       96714 :       if (first_store)
    2480                 :             :         {
    2481                 :       34216 :           dummy.create (2);
    2482                 :       34216 :           vect_get_peeling_costs_all_drs (loop_vinfo, first_store,
    2483                 :             :                                           &store_inside_cost,
    2484                 :             :                                           &store_outside_cost,
    2485                 :             :                                           &dummy, &dummy,
    2486                 :             :                                           estimated_npeels);
    2487                 :       34216 :           dummy.release ();
    2488                 :             :         }
    2489                 :             :       else
    2490                 :             :         {
    2491                 :       62498 :           store_inside_cost = INT_MAX;
    2492                 :       62498 :           store_outside_cost = INT_MAX;
    2493                 :             :         }
    2494                 :             : 
    2495                 :       96714 :       if (load_inside_cost > store_inside_cost
    2496                 :       96714 :           || (load_inside_cost == store_inside_cost
    2497                 :       34045 :               && load_outside_cost > store_outside_cost))
    2498                 :             :         {
    2499                 :       96714 :           dr0_info = first_store;
    2500                 :       96714 :           dr0_same_align_drs = first_store_same_align_drs;
    2501                 :       96714 :           peel_for_unknown_alignment.inside_cost = store_inside_cost;
    2502                 :       96714 :           peel_for_unknown_alignment.outside_cost = store_outside_cost;
    2503                 :             :         }
    2504                 :             :       else
    2505                 :             :         {
    2506                 :       96714 :           peel_for_unknown_alignment.inside_cost = load_inside_cost;
    2507                 :       96714 :           peel_for_unknown_alignment.outside_cost = load_outside_cost;
    2508                 :             :         }
    2509                 :             : 
    2510                 :       96714 :       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
    2511                 :       96714 :       prologue_cost_vec.create (2);
    2512                 :       96714 :       epilogue_cost_vec.create (2);
    2513                 :             : 
    2514                 :       96714 :       int dummy2;
    2515                 :      193428 :       peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
    2516                 :       96714 :         (loop_vinfo, estimated_npeels, &dummy2,
    2517                 :             :          &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
    2518                 :             :          &prologue_cost_vec, &epilogue_cost_vec);
    2519                 :             : 
    2520                 :       96714 :       prologue_cost_vec.release ();
    2521                 :       96714 :       epilogue_cost_vec.release ();
    2522                 :             : 
    2523                 :       96714 :       peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + 1;
    2524                 :             :     }
    2525                 :             : 
    2526                 :      289823 :   peel_for_unknown_alignment.peel_info.npeel = 0;
    2527                 :      289823 :   peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
    2528                 :             : 
    2529                 :      289823 :   best_peel = peel_for_unknown_alignment;
    2530                 :             : 
    2531                 :      289823 :   peel_for_known_alignment.inside_cost = INT_MAX;
    2532                 :      289823 :   peel_for_known_alignment.outside_cost = INT_MAX;
    2533                 :      289823 :   peel_for_known_alignment.peel_info.count = 0;
    2534                 :      289823 :   peel_for_known_alignment.peel_info.dr_info = NULL;
    2535                 :             : 
    2536                 :      289823 :   if (do_peeling && one_misalignment_known)
    2537                 :             :     {
    2538                 :             :       /* Peeling is possible, but there is no data access that is not supported
    2539                 :             :          unless aligned.  So we try to choose the best possible peeling from
    2540                 :             :          the hash table.  */
    2541                 :      122092 :       peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
    2542                 :      122092 :         (&peeling_htab, loop_vinfo);
    2543                 :             :     }
    2544                 :             : 
    2545                 :             :   /* Compare costs of peeling for known and unknown alignment. */
    2546                 :      289823 :   if (peel_for_known_alignment.peel_info.dr_info != NULL
    2547                 :      122092 :       && peel_for_unknown_alignment.inside_cost
    2548                 :             :       >= peel_for_known_alignment.inside_cost)
    2549                 :             :     {
    2550                 :      115587 :       best_peel = peel_for_known_alignment;
    2551                 :             : 
    2552                 :             :       /* If the best peeling for known alignment has NPEEL == 0, perform no
    2553                 :             :          peeling at all except if there is an unsupportable dr that we can
    2554                 :             :          align.  */
    2555                 :      115587 :       if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
    2556                 :             :         do_peeling = false;
    2557                 :             :     }
    2558                 :             : 
    2559                 :             :   /* If there is an unsupportable data ref, prefer this over all choices so far
    2560                 :             :      since we'd have to discard a chosen peeling except when it accidentally
    2561                 :             :      aligned the unsupportable data ref.  */
    2562                 :      180429 :   if (one_dr_unsupportable)
    2563                 :             :     dr0_info = unsupportable_dr_info;
    2564                 :      238195 :   else if (do_peeling)
    2565                 :             :     {
    2566                 :             :       /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
    2567                 :             :          TODO: Use nopeel_outside_cost or get rid of it?  */
    2568                 :       49608 :       unsigned nopeel_inside_cost = 0;
    2569                 :       49608 :       unsigned nopeel_outside_cost = 0;
    2570                 :             : 
    2571                 :       49608 :       stmt_vector_for_cost dummy;
    2572                 :       49608 :       dummy.create (2);
    2573                 :       49608 :       vect_get_peeling_costs_all_drs (loop_vinfo, NULL, &nopeel_inside_cost,
    2574                 :             :                                       &nopeel_outside_cost, &dummy, &dummy, 0);
    2575                 :       49608 :       dummy.release ();
    2576                 :             : 
    2577                 :             :       /* Add epilogue costs.  As we do not peel for alignment here, no prologue
    2578                 :             :          costs will be recorded.  */
    2579                 :       49608 :       stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
    2580                 :       49608 :       prologue_cost_vec.create (2);
    2581                 :       49608 :       epilogue_cost_vec.create (2);
    2582                 :             : 
    2583                 :       49608 :       int dummy2;
    2584                 :       99216 :       nopeel_outside_cost += vect_get_known_peeling_cost
    2585                 :       49608 :         (loop_vinfo, 0, &dummy2,
    2586                 :             :          &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
    2587                 :             :          &prologue_cost_vec, &epilogue_cost_vec);
    2588                 :             : 
    2589                 :       49608 :       prologue_cost_vec.release ();
    2590                 :       49608 :       epilogue_cost_vec.release ();
    2591                 :             : 
    2592                 :       49608 :       npeel = best_peel.peel_info.npeel;
    2593                 :       49608 :       dr0_info = best_peel.peel_info.dr_info;
    2594                 :             : 
    2595                 :             :       /* If no peeling is not more expensive than the best peeling we
    2596                 :             :          have so far, don't perform any peeling.  */
    2597                 :       49608 :       if (nopeel_inside_cost <= best_peel.inside_cost)
    2598                 :       44279 :         do_peeling = false;
    2599                 :             :     }
    2600                 :             : 
    2601                 :      101236 :   if (do_peeling)
    2602                 :             :     {
    2603                 :       51420 :       stmt_vec_info stmt_info = dr0_info->stmt;
    2604                 :       51420 :       if (known_alignment_for_access_p (dr0_info,
    2605                 :             :                                         STMT_VINFO_VECTYPE (stmt_info)))
    2606                 :             :         {
    2607                 :        5308 :           bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
    2608                 :        5308 :                                                 size_zero_node) < 0;
    2609                 :        5308 :           if (!npeel)
    2610                 :             :             {
    2611                 :             :               /* Since it's known at compile time, compute the number of
    2612                 :             :                  iterations in the peeled loop (the peeling factor) for use in
    2613                 :             :                  updating DR_MISALIGNMENT values.  The peeling factor is the
    2614                 :             :                  vectorization factor minus the misalignment as an element
    2615                 :             :                  count.  */
    2616                 :           0 :               tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2617                 :           0 :               poly_int64 off = 0;
    2618                 :           0 :               if (negative)
    2619                 :           0 :                 off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
    2620                 :           0 :                        * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
    2621                 :           0 :               unsigned int mis
    2622                 :           0 :                 = dr_misalignment (dr0_info, vectype, off);
    2623                 :           0 :               mis = negative ? mis : -mis;
    2624                 :             :               /* If known_alignment_for_access_p then we have set
    2625                 :             :                  DR_MISALIGNMENT which is only done if we know it at compiler
    2626                 :             :                  time, so it is safe to assume target alignment is constant.
    2627                 :             :                */
    2628                 :           0 :               unsigned int target_align =
    2629                 :           0 :                 DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
    2630                 :           0 :               npeel = ((mis & (target_align - 1))
    2631                 :           0 :                        / vect_get_scalar_dr_size (dr0_info));
    2632                 :             :             }
    2633                 :             : 
    2634                 :             :           /* For interleaved data access every iteration accesses all the
    2635                 :             :              members of the group, therefore we divide the number of iterations
    2636                 :             :              by the group size.  */
    2637                 :        5308 :           if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
    2638                 :         221 :             npeel /= DR_GROUP_SIZE (stmt_info);
    2639                 :             : 
    2640                 :        5308 :           if (dump_enabled_p ())
    2641                 :         260 :             dump_printf_loc (MSG_NOTE, vect_location,
    2642                 :             :                              "Try peeling by %d\n", npeel);
    2643                 :             :         }
    2644                 :             : 
    2645                 :             :       /* Ensure that all datarefs can be vectorized after the peel.  */
    2646                 :       51420 :       if (!vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
    2647                 :             :         do_peeling = false;
    2648                 :             : 
    2649                 :             :       /* Check if all datarefs are supportable and log.  */
    2650                 :       51420 :       if (do_peeling
    2651                 :       51420 :           && npeel == 0
    2652                 :       51420 :           && known_alignment_for_access_p (dr0_info,
    2653                 :             :                                            STMT_VINFO_VECTYPE (stmt_info)))
    2654                 :           3 :         return opt_result::success ();
    2655                 :             : 
    2656                 :             :       /* Cost model #1 - honor --param vect-max-peeling-for-alignment.  */
    2657                 :       51417 :       if (do_peeling)
    2658                 :             :         {
    2659                 :       31449 :           unsigned max_allowed_peel
    2660                 :       31449 :             = param_vect_max_peeling_for_alignment;
    2661                 :       31449 :           if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP)
    2662                 :             :             max_allowed_peel = 0;
    2663                 :        6351 :           if (max_allowed_peel != (unsigned)-1)
    2664                 :             :             {
    2665                 :       25101 :               unsigned max_peel = npeel;
    2666                 :       25101 :               if (max_peel == 0)
    2667                 :             :                 {
    2668                 :       22448 :                   poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
    2669                 :       22448 :                   unsigned HOST_WIDE_INT target_align_c;
    2670                 :       22448 :                   if (target_align.is_constant (&target_align_c))
    2671                 :       44896 :                     max_peel =
    2672                 :       22448 :                       target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
    2673                 :             :                   else
    2674                 :             :                     {
    2675                 :             :                       do_peeling = false;
    2676                 :             :                       if (dump_enabled_p ())
    2677                 :             :                         dump_printf_loc (MSG_NOTE, vect_location,
    2678                 :             :                           "Disable peeling, max peels set and vector"
    2679                 :             :                           " alignment unknown\n");
    2680                 :             :                     }
    2681                 :             :                 }
    2682                 :       25101 :               if (max_peel > max_allowed_peel)
    2683                 :             :                 {
    2684                 :       25101 :                   do_peeling = false;
    2685                 :       25101 :                   if (dump_enabled_p ())
    2686                 :          51 :                     dump_printf_loc (MSG_NOTE, vect_location,
    2687                 :             :                         "Disable peeling, max peels reached: %d\n", max_peel);
    2688                 :             :                 }
    2689                 :             :             }
    2690                 :             :         }
    2691                 :             : 
    2692                 :             :       /* Cost model #2 - if peeling may result in a remaining loop not
    2693                 :             :          iterating enough to be vectorized then do not peel.  Since this
    2694                 :             :          is a cost heuristic rather than a correctness decision, use the
    2695                 :             :          most likely runtime value for variable vectorization factors.  */
    2696                 :          51 :       if (do_peeling
    2697                 :        6348 :           && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
    2698                 :             :         {
    2699                 :        2366 :           unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
    2700                 :        2366 :           unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
    2701                 :        2366 :           if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
    2702                 :        2366 :               < assumed_vf + max_peel)
    2703                 :             :             do_peeling = false;
    2704                 :             :         }
    2705                 :             : 
    2706                 :             :       if (do_peeling)
    2707                 :             :         {
    2708                 :             :           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
    2709                 :             :              If the misalignment of DR_i is identical to that of dr0 then set
    2710                 :             :              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
    2711                 :             :              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
    2712                 :             :              by the peeling factor times the element size of DR_i (MOD the
    2713                 :             :              vectorization factor times the size).  Otherwise, the
    2714                 :             :              misalignment of DR_i must be set to unknown.  */
    2715                 :       12990 :           FOR_EACH_VEC_ELT (datarefs, i, dr)
    2716                 :        7090 :             if (dr != dr0_info->dr)
    2717                 :             :               {
    2718                 :        1190 :                 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2719                 :        1190 :                 if (!vect_relevant_for_alignment_p (dr_info))
    2720                 :         344 :                   continue;
    2721                 :             : 
    2722                 :         846 :                 vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
    2723                 :             :               }
    2724                 :             : 
    2725                 :        5900 :           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
    2726                 :        5900 :           if (npeel)
    2727                 :        1153 :             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
    2728                 :             :           else
    2729                 :        4747 :             LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -1;
    2730                 :        5900 :           SET_DR_MISALIGNMENT (dr0_info,
    2731                 :             :                                vect_dr_misalign_for_aligned_access (dr0_info));
    2732                 :        5900 :           if (dump_enabled_p ())
    2733                 :             :             {
    2734                 :         282 :               dump_printf_loc (MSG_NOTE, vect_location,
    2735                 :             :                                "Alignment of access forced using peeling.\n");
    2736                 :         282 :               dump_printf_loc (MSG_NOTE, vect_location,
    2737                 :             :                                "Peeling for alignment will be applied.\n");
    2738                 :             :             }
    2739                 :             : 
    2740                 :             :           /* The inside-loop cost will be accounted for in vectorizable_load
    2741                 :             :              and vectorizable_store correctly with adjusted alignments.
    2742                 :             :              Drop the body_cst_vec on the floor here.  */
    2743                 :        5900 :           return opt_result::success ();
    2744                 :             :         }
    2745                 :             :     }
    2746                 :             : 
    2747                 :             :   /* (2) Versioning to force alignment.  */
    2748                 :             : 
    2749                 :             :   /* Try versioning if:
    2750                 :             :      1) optimize loop for speed and the cost-model is not cheap
    2751                 :             :      2) there is at least one unsupported misaligned data ref with an unknown
    2752                 :             :         misalignment, and
    2753                 :             :      3) all misaligned data refs with a known misalignment are supported, and
    2754                 :             :      4) the number of runtime alignment checks is within reason.  */
    2755                 :             : 
    2756                 :      283920 :   do_versioning
    2757                 :      283920 :     = (optimize_loop_nest_for_speed_p (loop)
    2758                 :      283477 :        && !loop->inner /* FORNOW */
    2759                 :      565796 :        && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
    2760                 :             : 
    2761                 :             :   if (do_versioning)
    2762                 :             :     {
    2763                 :      289691 :       FOR_EACH_VEC_ELT (datarefs, i, dr)
    2764                 :             :         {
    2765                 :      219636 :           dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2766                 :      219636 :           if (!vect_relevant_for_alignment_p (dr_info))
    2767                 :      159599 :             continue;
    2768                 :             : 
    2769                 :      151157 :           stmt_vec_info stmt_info = dr_info->stmt;
    2770                 :      151157 :           if (STMT_VINFO_STRIDED_P (stmt_info))
    2771                 :             :             {
    2772                 :             :               do_versioning = false;
    2773                 :        5163 :               break;
    2774                 :             :             }
    2775                 :             : 
    2776                 :      150251 :           tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    2777                 :      150251 :           bool negative = tree_int_cst_compare (DR_STEP (dr),
    2778                 :      150251 :                                                 size_zero_node) < 0;
    2779                 :      150251 :           poly_int64 off = 0;
    2780                 :      150251 :           if (negative)
    2781                 :        2816 :             off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
    2782                 :        2816 :                    * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
    2783                 :      150251 :           int misalignment;
    2784                 :      150251 :           if ((misalignment = dr_misalignment (dr_info, vectype, off)) == 0)
    2785                 :       91120 :             continue;
    2786                 :             : 
    2787                 :       59131 :           enum dr_alignment_support supportable_dr_alignment
    2788                 :       59131 :             = vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
    2789                 :             :                                              misalignment);
    2790                 :       59131 :           if (supportable_dr_alignment == dr_unaligned_unsupported)
    2791                 :             :             {
    2792                 :       16300 :               if (misalignment != DR_MISALIGNMENT_UNKNOWN
    2793                 :       16300 :                   || (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
    2794                 :       12597 :                       >= (unsigned) param_vect_max_version_for_alignment_checks))
    2795                 :             :                 {
    2796                 :             :                   do_versioning = false;
    2797                 :        5163 :                   break;
    2798                 :             :                 }
    2799                 :             : 
    2800                 :             :               /* At present we don't support versioning for alignment
    2801                 :             :                  with variable VF, since there's no guarantee that the
    2802                 :             :                  VF is a power of two.  We could relax this if we added
    2803                 :             :                  a way of enforcing a power-of-two size.  */
    2804                 :       12043 :               unsigned HOST_WIDE_INT size;
    2805                 :       24086 :               if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
    2806                 :             :                 {
    2807                 :             :                   do_versioning = false;
    2808                 :             :                   break;
    2809                 :             :                 }
    2810                 :             : 
    2811                 :             :               /* Forcing alignment in the first iteration is no good if
    2812                 :             :                  we don't keep it across iterations.  For now, just disable
    2813                 :             :                  versioning in this case.
    2814                 :             :                  ?? We could actually unroll the loop to achieve the required
    2815                 :             :                  overall step alignment, and forcing the alignment could be
    2816                 :             :                  done by doing some iterations of the non-vectorized loop.  */
    2817                 :       12043 :               if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
    2818                 :       12043 :                                * DR_STEP_ALIGNMENT (dr),
    2819                 :       12043 :                                DR_TARGET_ALIGNMENT (dr_info)))
    2820                 :             :                 {
    2821                 :             :                   do_versioning = false;
    2822                 :             :                   break;
    2823                 :             :                 }
    2824                 :             : 
    2825                 :             :               /* The rightmost bits of an aligned address must be zeros.
    2826                 :             :                  Construct the mask needed for this test.  For example,
    2827                 :             :                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
    2828                 :             :                  mask must be 15 = 0xf. */
    2829                 :       12043 :               int mask = size - 1;
    2830                 :             : 
    2831                 :             :               /* FORNOW: use the same mask to test all potentially unaligned
    2832                 :             :                  references in the loop.  */
    2833                 :       12043 :               if (LOOP_VINFO_PTR_MASK (loop_vinfo)
    2834                 :        9013 :                   && LOOP_VINFO_PTR_MASK (loop_vinfo) != mask)
    2835                 :             :                 {
    2836                 :             :                   do_versioning = false;
    2837                 :             :                   break;
    2838                 :             :                 }
    2839                 :             : 
    2840                 :       12043 :               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
    2841                 :       12043 :               LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
    2842                 :             :             }
    2843                 :             :         }
    2844                 :             : 
    2845                 :             :       /* Versioning requires at least one misaligned data reference.  */
    2846                 :       75218 :       if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
    2847                 :             :         do_versioning = false;
    2848                 :        6031 :       else if (!do_versioning)
    2849                 :         566 :         LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
    2850                 :             :     }
    2851                 :             : 
    2852                 :         566 :   if (do_versioning)
    2853                 :             :     {
    2854                 :             :       const vec<stmt_vec_info> &may_misalign_stmts
    2855                 :             :         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
    2856                 :             :       stmt_vec_info stmt_info;
    2857                 :             : 
    2858                 :             :       /* It can now be assumed that the data references in the statements
    2859                 :             :          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
    2860                 :             :          of the loop being vectorized.  */
    2861                 :       14148 :       FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
    2862                 :             :         {
    2863                 :        8683 :           dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    2864                 :        8683 :           SET_DR_MISALIGNMENT (dr_info,
    2865                 :             :                                vect_dr_misalign_for_aligned_access (dr_info));
    2866                 :        8683 :           if (dump_enabled_p ())
    2867                 :         145 :             dump_printf_loc (MSG_NOTE, vect_location,
    2868                 :             :                              "Alignment of access forced using versioning.\n");
    2869                 :             :         }
    2870                 :             : 
    2871                 :        5465 :       if (dump_enabled_p ())
    2872                 :          86 :         dump_printf_loc (MSG_NOTE, vect_location,
    2873                 :             :                          "Versioning for alignment will be applied.\n");
    2874                 :             : 
    2875                 :             :       /* Peeling and versioning can't be done together at this time.  */
    2876                 :        5465 :       gcc_assert (! (do_peeling && do_versioning));
    2877                 :             : 
    2878                 :        5465 :       return opt_result::success ();
    2879                 :             :     }
    2880                 :             : 
    2881                 :             :   /* This point is reached if neither peeling nor versioning is being done.  */
    2882                 :      278455 :   gcc_assert (! (do_peeling || do_versioning));
    2883                 :             : 
    2884                 :      278455 :   return opt_result::success ();
    2885                 :      592264 : }
    2886                 :             : 
    2887                 :             : 
    2888                 :             : /* Function vect_analyze_data_refs_alignment
    2889                 :             : 
    2890                 :             :    Analyze the alignment of the data-references in the loop.
    2891                 :             :    Return FALSE if a data reference is found that cannot be vectorized.  */
    2892                 :             : 
    2893                 :             : opt_result
    2894                 :      339941 : vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
    2895                 :             : {
    2896                 :      339941 :   DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
    2897                 :             : 
    2898                 :      339941 :   vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
    2899                 :      339941 :   struct data_reference *dr;
    2900                 :      339941 :   unsigned int i;
    2901                 :             : 
    2902                 :      339941 :   vect_record_base_alignments (loop_vinfo);
    2903                 :     1144431 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    2904                 :             :     {
    2905                 :      804490 :       dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
    2906                 :      804490 :       if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
    2907                 :             :         {
    2908                 :      804490 :           if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
    2909                 :     1065272 :               && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
    2910                 :      122166 :             continue;
    2911                 :             : 
    2912                 :      682324 :           vect_compute_data_ref_alignment (loop_vinfo, dr_info,
    2913                 :             :                                            STMT_VINFO_VECTYPE (dr_info->stmt));
    2914                 :             :         }
    2915                 :             :     }
    2916                 :             : 
    2917                 :      339941 :   return opt_result::success ();
    2918                 :             : }
    2919                 :             : 
    2920                 :             : 
    2921                 :             : /* Analyze alignment of DRs of stmts in NODE.  */
    2922                 :             : 
    2923                 :             : static bool
    2924                 :      817796 : vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
    2925                 :             : {
    2926                 :             :   /* Alignment is maintained in the first element of the group.  */
    2927                 :      817796 :   stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
    2928                 :      817796 :   first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
    2929                 :      817796 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
    2930                 :      817796 :   tree vectype = SLP_TREE_VECTYPE (node);
    2931                 :      817796 :   poly_uint64 vector_alignment
    2932                 :      817796 :     = exact_div (targetm.vectorize.preferred_vector_alignment (vectype),
    2933                 :             :                  BITS_PER_UNIT);
    2934                 :      817796 :   if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
    2935                 :      790815 :     vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
    2936                 :             :   /* Re-analyze alignment when we're facing a vectorization with a bigger
    2937                 :             :      alignment requirement.  */
    2938                 :       26981 :   else if (known_lt (dr_info->target_alignment, vector_alignment))
    2939                 :             :     {
    2940                 :          78 :       poly_uint64 old_target_alignment = dr_info->target_alignment;
    2941                 :          78 :       int old_misalignment = dr_info->misalignment;
    2942                 :          78 :       vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
    2943                 :             :       /* But keep knowledge about a smaller alignment.  */
    2944                 :          78 :       if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
    2945                 :          47 :           && dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
    2946                 :             :         {
    2947                 :           1 :           dr_info->target_alignment = old_target_alignment;
    2948                 :           1 :           dr_info->misalignment = old_misalignment;
    2949                 :             :         }
    2950                 :             :     }
    2951                 :             :   /* When we ever face unordered target alignments the first one wins in terms
    2952                 :             :      of analyzing and the other will become unknown in dr_misalignment.  */
    2953                 :      817796 :   return true;
    2954                 :             : }
    2955                 :             : 
    2956                 :             : /* Function vect_slp_analyze_instance_alignment
    2957                 :             : 
    2958                 :             :    Analyze the alignment of the data-references in the SLP instance.
    2959                 :             :    Return FALSE if a data reference is found that cannot be vectorized.  */
    2960                 :             : 
    2961                 :             : bool
    2962                 :      763321 : vect_slp_analyze_instance_alignment (vec_info *vinfo,
    2963                 :             :                                                 slp_instance instance)
    2964                 :             : {
    2965                 :      763321 :   DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment");
    2966                 :             : 
    2967                 :      763321 :   slp_tree node;
    2968                 :      763321 :   unsigned i;
    2969                 :      928821 :   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
    2970                 :      165500 :     if (! vect_slp_analyze_node_alignment (vinfo, node))
    2971                 :             :       return false;
    2972                 :             : 
    2973                 :      763321 :   if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
    2974                 :      763321 :       && ! vect_slp_analyze_node_alignment
    2975                 :      652296 :              (vinfo, SLP_INSTANCE_TREE (instance)))
    2976                 :             :     return false;
    2977                 :             : 
    2978                 :             :   return true;
    2979                 :             : }
    2980                 :             : 
    2981                 :             : 
    2982                 :             : /* Analyze groups of accesses: check that DR_INFO belongs to a group of
    2983                 :             :    accesses of legal size, step, etc.  Detect gaps, single element
    2984                 :             :    interleaving, and other special cases. Set grouped access info.
    2985                 :             :    Collect groups of strided stores for further use in SLP analysis.
    2986                 :             :    Worker for vect_analyze_group_access.  */
    2987                 :             : 
    2988                 :             : static bool
    2989                 :    11754694 : vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
    2990                 :             : {
    2991                 :    11754694 :   data_reference *dr = dr_info->dr;
    2992                 :    11754694 :   tree step = DR_STEP (dr);
    2993                 :    11754694 :   tree scalar_type = TREE_TYPE (DR_REF (dr));
    2994                 :    11754694 :   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
    2995                 :    11754694 :   stmt_vec_info stmt_info = dr_info->stmt;
    2996                 :    11754694 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    2997                 :    11754694 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    2998                 :    11754694 :   HOST_WIDE_INT dr_step = -1;
    2999                 :    11754694 :   HOST_WIDE_INT groupsize, last_accessed_element = 1;
    3000                 :    11754694 :   bool slp_impossible = false;
    3001                 :             : 
    3002                 :             :   /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
    3003                 :             :      size of the interleaving group (including gaps).  */
    3004                 :    11754694 :   if (tree_fits_shwi_p (step))
    3005                 :             :     {
    3006                 :    11746061 :       dr_step = tree_to_shwi (step);
    3007                 :             :       /* Check that STEP is a multiple of type size.  Otherwise there is
    3008                 :             :          a non-element-sized gap at the end of the group which we
    3009                 :             :          cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
    3010                 :             :          ???  As we can handle non-constant step fine here we should
    3011                 :             :          simply remove uses of DR_GROUP_GAP between the last and first
    3012                 :             :          element and instead rely on DR_STEP.  DR_GROUP_SIZE then would
    3013                 :             :          simply not include that gap.  */
    3014                 :    11746061 :       if ((dr_step % type_size) != 0)
    3015                 :             :         {
    3016                 :         478 :           if (dump_enabled_p ())
    3017                 :          27 :             dump_printf_loc (MSG_NOTE, vect_location,
    3018                 :             :                              "Step %T is not a multiple of the element size"
    3019                 :             :                              " for %T\n",
    3020                 :             :                              step, DR_REF (dr));
    3021                 :         478 :           return false;
    3022                 :             :         }
    3023                 :    11745583 :       groupsize = absu_hwi (dr_step) / type_size;
    3024                 :             :     }
    3025                 :             :   else
    3026                 :             :     groupsize = 0;
    3027                 :             : 
    3028                 :             :   /* Not consecutive access is possible only if it is a part of interleaving.  */
    3029                 :    11754216 :   if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
    3030                 :             :     {
    3031                 :             :       /* Check if it this DR is a part of interleaving, and is a single
    3032                 :             :          element of the group that is accessed in the loop.  */
    3033                 :             : 
    3034                 :             :       /* Gaps are supported only for loads. STEP must be a multiple of the type
    3035                 :             :          size.  */
    3036                 :     7905241 :       if (DR_IS_READ (dr)
    3037                 :     4755880 :           && (dr_step % type_size) == 0
    3038                 :             :           && groupsize > 0
    3039                 :             :           /* This could be UINT_MAX but as we are generating code in a very
    3040                 :             :              inefficient way we have to cap earlier.
    3041                 :             :              See PR91403 for example.  */
    3042                 :     4755880 :           && groupsize <= 4096)
    3043                 :             :         {
    3044                 :       56406 :           DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
    3045                 :       56406 :           DR_GROUP_SIZE (stmt_info) = groupsize;
    3046                 :       56406 :           DR_GROUP_GAP (stmt_info) = groupsize - 1;
    3047                 :       56406 :           if (dump_enabled_p ())
    3048                 :        1324 :             dump_printf_loc (MSG_NOTE, vect_location,
    3049                 :             :                              "Detected single element interleaving %T"
    3050                 :             :                              " step %T\n",
    3051                 :             :                              DR_REF (dr), step);
    3052                 :             : 
    3053                 :       56406 :           return true;
    3054                 :             :         }
    3055                 :             : 
    3056                 :     7848835 :       if (dump_enabled_p ())
    3057                 :        3077 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3058                 :             :                          "not consecutive access %G", stmt_info->stmt);
    3059                 :             : 
    3060                 :     7848835 :       if (bb_vinfo)
    3061                 :             :         {
    3062                 :             :           /* Mark the statement as unvectorizable.  */
    3063                 :     7834665 :           STMT_VINFO_VECTORIZABLE (stmt_info) = false;
    3064                 :     7834665 :           return true;
    3065                 :             :         }
    3066                 :             : 
    3067                 :       14170 :       if (dump_enabled_p ())
    3068                 :         330 :         dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
    3069                 :       14170 :       STMT_VINFO_STRIDED_P (stmt_info) = true;
    3070                 :       14170 :       return true;
    3071                 :             :     }
    3072                 :             : 
    3073                 :     3848975 :   if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
    3074                 :             :     {
    3075                 :             :       /* First stmt in the interleaving chain. Check the chain.  */
    3076                 :     1417616 :       stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
    3077                 :     1417616 :       struct data_reference *data_ref = dr;
    3078                 :     1417616 :       unsigned int count = 1;
    3079                 :     1417616 :       tree prev_init = DR_INIT (data_ref);
    3080                 :     1417616 :       HOST_WIDE_INT diff, gaps = 0;
    3081                 :             : 
    3082                 :             :       /* By construction, all group members have INTEGER_CST DR_INITs.  */
    3083                 :     3851091 :       while (next)
    3084                 :             :         {
    3085                 :             :           /* We never have the same DR multiple times.  */
    3086                 :     2433547 :           gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
    3087                 :             :                                 DR_INIT (STMT_VINFO_DATA_REF (next))) != 0);
    3088                 :             : 
    3089                 :     2433547 :           data_ref = STMT_VINFO_DATA_REF (next);
    3090                 :             : 
    3091                 :             :           /* All group members have the same STEP by construction.  */
    3092                 :     2433547 :           gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
    3093                 :             : 
    3094                 :             :           /* Check that the distance between two accesses is equal to the type
    3095                 :             :              size. Otherwise, we have gaps.  */
    3096                 :     2433547 :           diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
    3097                 :     2433547 :                   - TREE_INT_CST_LOW (prev_init)) / type_size;
    3098                 :     2433547 :           if (diff < 1 || diff > UINT_MAX)
    3099                 :             :             {
    3100                 :             :               /* For artificial testcases with array accesses with large
    3101                 :             :                  constant indices we can run into overflow issues which
    3102                 :             :                  can end up fooling the groupsize constraint below so
    3103                 :             :                  check the individual gaps (which are represented as
    3104                 :             :                  unsigned int) as well.  */
    3105                 :          18 :               if (dump_enabled_p ())
    3106                 :           0 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3107                 :             :                                  "interleaved access with gap larger "
    3108                 :             :                                  "than representable\n");
    3109                 :          18 :               return false;
    3110                 :             :             }
    3111                 :     2433529 :           if (diff != 1)
    3112                 :             :             {
    3113                 :             :               /* FORNOW: SLP of accesses with gaps is not supported.  */
    3114                 :       82014 :               slp_impossible = true;
    3115                 :       82014 :               if (DR_IS_WRITE (data_ref))
    3116                 :             :                 {
    3117                 :          54 :                   if (dump_enabled_p ())
    3118                 :           0 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3119                 :             :                                      "interleaved store with gaps\n");
    3120                 :          54 :                   return false;
    3121                 :             :                 }
    3122                 :             : 
    3123                 :       81960 :               gaps += diff - 1;
    3124                 :             :             }
    3125                 :             : 
    3126                 :     2433475 :           last_accessed_element += diff;
    3127                 :             : 
    3128                 :             :           /* Store the gap from the previous member of the group. If there is no
    3129                 :             :              gap in the access, DR_GROUP_GAP is always 1.  */
    3130                 :     2433475 :           DR_GROUP_GAP (next) = diff;
    3131                 :             : 
    3132                 :     2433475 :           prev_init = DR_INIT (data_ref);
    3133                 :     2433475 :           next = DR_GROUP_NEXT_ELEMENT (next);
    3134                 :             :           /* Count the number of data-refs in the chain.  */
    3135                 :     2433475 :           count++;
    3136                 :             :         }
    3137                 :             : 
    3138                 :     1417544 :       if (groupsize == 0)
    3139                 :     1360039 :         groupsize = count + gaps;
    3140                 :             : 
    3141                 :             :       /* This could be UINT_MAX but as we are generating code in a very
    3142                 :             :          inefficient way we have to cap earlier.  See PR78699 for example.  */
    3143                 :     1417544 :       if (groupsize > 4096)
    3144                 :             :         {
    3145                 :         295 :           if (dump_enabled_p ())
    3146                 :           1 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3147                 :             :                              "group is too large\n");
    3148                 :         295 :           return false;
    3149                 :             :         }
    3150                 :             : 
    3151                 :             :       /* Check that the size of the interleaving is equal to count for stores,
    3152                 :             :          i.e., that there are no gaps.  */
    3153                 :     1417249 :       if (groupsize != count
    3154                 :       84844 :           && !DR_IS_READ (dr))
    3155                 :             :         {
    3156                 :        4510 :           groupsize = count;
    3157                 :        4510 :           STMT_VINFO_STRIDED_P (stmt_info) = true;
    3158                 :             :         }
    3159                 :             : 
    3160                 :             :       /* If there is a gap after the last load in the group it is the
    3161                 :             :          difference between the groupsize and the last accessed
    3162                 :             :          element.
    3163                 :             :          When there is no gap, this difference should be 0.  */
    3164                 :     1417249 :       DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
    3165                 :             : 
    3166                 :     1417249 :       DR_GROUP_SIZE (stmt_info) = groupsize;
    3167                 :     1417249 :       if (dump_enabled_p ())
    3168                 :             :         {
    3169                 :        7950 :           dump_printf_loc (MSG_NOTE, vect_location,
    3170                 :             :                            "Detected interleaving ");
    3171                 :        7950 :           if (DR_IS_READ (dr))
    3172                 :        4227 :             dump_printf (MSG_NOTE, "load ");
    3173                 :        3723 :           else if (STMT_VINFO_STRIDED_P (stmt_info))
    3174                 :         571 :             dump_printf (MSG_NOTE, "strided store ");
    3175                 :             :           else
    3176                 :        3152 :             dump_printf (MSG_NOTE, "store ");
    3177                 :        7950 :           dump_printf (MSG_NOTE, "of size %u\n",
    3178                 :             :                        (unsigned)groupsize);
    3179                 :        7950 :           dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
    3180                 :        7950 :           next = DR_GROUP_NEXT_ELEMENT (stmt_info);
    3181                 :       38239 :           while (next)
    3182                 :             :             {
    3183                 :       30289 :               if (DR_GROUP_GAP (next) != 1)
    3184                 :         309 :                 dump_printf_loc (MSG_NOTE, vect_location,
    3185                 :             :                                  "\t<gap of %d elements>\n",
    3186                 :         309 :                                  DR_GROUP_GAP (next) - 1);
    3187                 :       30289 :               dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
    3188                 :       30289 :               next = DR_GROUP_NEXT_ELEMENT (next);
    3189                 :             :             }
    3190                 :        7950 :           if (DR_GROUP_GAP (stmt_info) != 0)
    3191                 :         364 :             dump_printf_loc (MSG_NOTE, vect_location,
    3192                 :             :                              "\t<gap of %d elements>\n",
    3193                 :         364 :                              DR_GROUP_GAP (stmt_info));
    3194                 :             :         }
    3195                 :             : 
    3196                 :             :       /* SLP: create an SLP data structure for every interleaving group of
    3197                 :             :          stores for further analysis in vect_analyse_slp.  */
    3198                 :     1417249 :       if (DR_IS_WRITE (dr) && !slp_impossible)
    3199                 :             :         {
    3200                 :      849646 :           if (loop_vinfo)
    3201                 :       18352 :             LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt_info);
    3202                 :      849646 :           if (bb_vinfo)
    3203                 :      831294 :             BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
    3204                 :             :         }
    3205                 :             :     }
    3206                 :             : 
    3207                 :             :   return true;
    3208                 :             : }
    3209                 :             : 
    3210                 :             : /* Analyze groups of accesses: check that DR_INFO belongs to a group of
    3211                 :             :    accesses of legal size, step, etc.  Detect gaps, single element
    3212                 :             :    interleaving, and other special cases. Set grouped access info.
    3213                 :             :    Collect groups of strided stores for further use in SLP analysis.  */
    3214                 :             : 
    3215                 :             : static bool
    3216                 :    11754694 : vect_analyze_group_access (vec_info *vinfo, dr_vec_info *dr_info)
    3217                 :             : {
    3218                 :    11754694 :   if (!vect_analyze_group_access_1 (vinfo, dr_info))
    3219                 :             :     {
    3220                 :             :       /* Dissolve the group if present.  */
    3221                 :         845 :       stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
    3222                 :        4457 :       while (stmt_info)
    3223                 :             :         {
    3224                 :        3612 :           stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
    3225                 :        3612 :           DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
    3226                 :        3612 :           DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
    3227                 :        3612 :           stmt_info = next;
    3228                 :             :         }
    3229                 :             :       return false;
    3230                 :             :     }
    3231                 :             :   return true;
    3232                 :             : }
    3233                 :             : 
    3234                 :             : /* Analyze the access pattern of the data-reference DR_INFO.
    3235                 :             :    In case of non-consecutive accesses call vect_analyze_group_access() to
    3236                 :             :    analyze groups of accesses.  */
    3237                 :             : 
    3238                 :             : static bool
    3239                 :    12245621 : vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info)
    3240                 :             : {
    3241                 :    12245621 :   data_reference *dr = dr_info->dr;
    3242                 :    12245621 :   tree step = DR_STEP (dr);
    3243                 :    12245621 :   tree scalar_type = TREE_TYPE (DR_REF (dr));
    3244                 :    12245621 :   stmt_vec_info stmt_info = dr_info->stmt;
    3245                 :    12245621 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    3246                 :    12245621 :   class loop *loop = NULL;
    3247                 :             : 
    3248                 :    12245621 :   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
    3249                 :             :     return true;
    3250                 :             : 
    3251                 :    12213599 :   if (loop_vinfo)
    3252                 :      712574 :     loop = LOOP_VINFO_LOOP (loop_vinfo);
    3253                 :             : 
    3254                 :    12213599 :   if (loop_vinfo && !step)
    3255                 :             :     {
    3256                 :           0 :       if (dump_enabled_p ())
    3257                 :           0 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3258                 :             :                          "bad data-ref access in loop\n");
    3259                 :           0 :       return false;
    3260                 :             :     }
    3261                 :             : 
    3262                 :             :   /* Allow loads with zero step in inner-loop vectorization.  */
    3263                 :    12213599 :   if (loop_vinfo && integer_zerop (step))
    3264                 :             :     {
    3265                 :        9559 :       DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
    3266                 :        9559 :       DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
    3267                 :        9559 :       if (!nested_in_vect_loop_p (loop, stmt_info))
    3268                 :        9295 :         return DR_IS_READ (dr);
    3269                 :             :       /* Allow references with zero step for outer loops marked
    3270                 :             :          with pragma omp simd only - it guarantees absence of
    3271                 :             :          loop-carried dependencies between inner loop iterations.  */
    3272                 :         264 :       if (loop->safelen < 2)
    3273                 :             :         {
    3274                 :         228 :           if (dump_enabled_p ())
    3275                 :           5 :             dump_printf_loc (MSG_NOTE, vect_location,
    3276                 :             :                              "zero step in inner loop of nest\n");
    3277                 :         228 :           return false;
    3278                 :             :         }
    3279                 :             :     }
    3280                 :             : 
    3281                 :    12204040 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    3282                 :             :     {
    3283                 :             :       /* Interleaved accesses are not yet supported within outer-loop
    3284                 :             :         vectorization for references in the inner-loop.  */
    3285                 :        5136 :       DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
    3286                 :        5136 :       DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
    3287                 :             : 
    3288                 :             :       /* For the rest of the analysis we use the outer-loop step.  */
    3289                 :        5136 :       step = STMT_VINFO_DR_STEP (stmt_info);
    3290                 :        5136 :       if (integer_zerop (step))
    3291                 :             :         {
    3292                 :        1184 :           if (dump_enabled_p ())
    3293                 :         226 :             dump_printf_loc (MSG_NOTE, vect_location,
    3294                 :             :                              "zero step in outer loop.\n");
    3295                 :        1184 :           return DR_IS_READ (dr);
    3296                 :             :         }
    3297                 :             :     }
    3298                 :             : 
    3299                 :             :   /* Consecutive?  */
    3300                 :    12202892 :   if (TREE_CODE (step) == INTEGER_CST)
    3301                 :             :     {
    3302                 :    12166189 :       HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
    3303                 :    12166189 :       if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
    3304                 :    12166189 :           || (dr_step < 0
    3305                 :       19285 :               && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
    3306                 :             :         {
    3307                 :             :           /* Mark that it is not interleaving.  */
    3308                 :      417682 :           DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
    3309                 :      417682 :           DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
    3310                 :      417682 :           return true;
    3311                 :             :         }
    3312                 :             :     }
    3313                 :             : 
    3314                 :    11785210 :   if (loop && nested_in_vect_loop_p (loop, stmt_info))
    3315                 :             :     {
    3316                 :        2850 :       if (dump_enabled_p ())
    3317                 :         141 :         dump_printf_loc (MSG_NOTE, vect_location,
    3318                 :             :                          "grouped access in outer loop.\n");
    3319                 :        2850 :       return false;
    3320                 :             :     }
    3321                 :             : 
    3322                 :             : 
    3323                 :             :   /* Assume this is a DR handled by non-constant strided load case.  */
    3324                 :    11782360 :   if (TREE_CODE (step) != INTEGER_CST)
    3325                 :       36299 :     return (STMT_VINFO_STRIDED_P (stmt_info)
    3326                 :       36299 :             && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
    3327                 :        8633 :                 || vect_analyze_group_access (vinfo, dr_info)));
    3328                 :             : 
    3329                 :             :   /* Not consecutive access - check if it's a part of interleaving group.  */
    3330                 :    11746061 :   return vect_analyze_group_access (vinfo, dr_info);
    3331                 :             : }
    3332                 :             : 
    3333                 :             : /* Compare two data-references DRA and DRB to group them into chunks
    3334                 :             :    suitable for grouping.  */
    3335                 :             : 
    3336                 :             : static int
    3337                 :   317432095 : dr_group_sort_cmp (const void *dra_, const void *drb_)
    3338                 :             : {
    3339                 :   317432095 :   dr_vec_info *dra_info = *(dr_vec_info **)const_cast<void *>(dra_);
    3340                 :   317432095 :   dr_vec_info *drb_info = *(dr_vec_info **)const_cast<void *>(drb_);
    3341                 :   317432095 :   data_reference_p dra = dra_info->dr;
    3342                 :   317432095 :   data_reference_p drb = drb_info->dr;
    3343                 :   317432095 :   int cmp;
    3344                 :             : 
    3345                 :             :   /* Stabilize sort.  */
    3346                 :   317432095 :   if (dra == drb)
    3347                 :             :     return 0;
    3348                 :             : 
    3349                 :             :   /* Different group IDs lead never belong to the same group.  */
    3350                 :   317432095 :   if (dra_info->group != drb_info->group)
    3351                 :   345878138 :     return dra_info->group < drb_info->group ? -1 : 1;
    3352                 :             : 
    3353                 :             :   /* Ordering of DRs according to base.  */
    3354                 :    89932960 :   cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
    3355                 :             :                                DR_BASE_ADDRESS (drb));
    3356                 :    89932960 :   if (cmp != 0)
    3357                 :             :     return cmp;
    3358                 :             : 
    3359                 :             :   /* And according to DR_OFFSET.  */
    3360                 :    47472018 :   cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
    3361                 :    47472018 :   if (cmp != 0)
    3362                 :             :     return cmp;
    3363                 :             : 
    3364                 :             :   /* Put reads before writes.  */
    3365                 :    47119997 :   if (DR_IS_READ (dra) != DR_IS_READ (drb))
    3366                 :     3903139 :     return DR_IS_READ (dra) ? -1 : 1;
    3367                 :             : 
    3368                 :             :   /* Then sort after access size.  */
    3369                 :    44444169 :   cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
    3370                 :    44444169 :                                TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
    3371                 :    44444169 :   if (cmp != 0)
    3372                 :             :     return cmp;
    3373                 :             : 
    3374                 :             :   /* And after step.  */
    3375                 :    38856654 :   cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
    3376                 :    38856654 :   if (cmp != 0)
    3377                 :             :     return cmp;
    3378                 :             : 
    3379                 :             :   /* Then sort after DR_INIT.  In case of identical DRs sort after stmt UID.  */
    3380                 :    38850320 :   cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
    3381                 :    38850320 :   if (cmp == 0)
    3382                 :      304848 :     return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
    3383                 :             :   return cmp;
    3384                 :             : }
    3385                 :             : 
    3386                 :             : /* If OP is the result of a conversion, return the unconverted value,
    3387                 :             :    otherwise return null.  */
    3388                 :             : 
    3389                 :             : static tree
    3390                 :         306 : strip_conversion (tree op)
    3391                 :             : {
    3392                 :         306 :   if (TREE_CODE (op) != SSA_NAME)
    3393                 :             :     return NULL_TREE;
    3394                 :         306 :   gimple *stmt = SSA_NAME_DEF_STMT (op);
    3395                 :         306 :   if (!is_gimple_assign (stmt)
    3396                 :         306 :       || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
    3397                 :             :     return NULL_TREE;
    3398                 :         196 :   return gimple_assign_rhs1 (stmt);
    3399                 :             : }
    3400                 :             : 
    3401                 :             : /* Return true if vectorizable_* routines can handle statements STMT1_INFO
    3402                 :             :    and STMT2_INFO being in a single group.  When ALLOW_SLP_P, masked loads can
    3403                 :             :    be grouped in SLP mode.  */
    3404                 :             : 
    3405                 :             : static bool
    3406                 :     6480274 : can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
    3407                 :             :                    bool allow_slp_p)
    3408                 :             : {
    3409                 :     6480274 :   if (gimple_assign_single_p (stmt1_info->stmt))
    3410                 :     6479682 :     return gimple_assign_single_p (stmt2_info->stmt);
    3411                 :             : 
    3412                 :         592 :   gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
    3413                 :         592 :   if (call1 && gimple_call_internal_p (call1))
    3414                 :             :     {
    3415                 :             :       /* Check for two masked loads or two masked stores.  */
    3416                 :         796 :       gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt);
    3417                 :         581 :       if (!call2 || !gimple_call_internal_p (call2))
    3418                 :             :         return false;
    3419                 :         581 :       internal_fn ifn = gimple_call_internal_fn (call1);
    3420                 :         581 :       if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
    3421                 :             :         return false;
    3422                 :         581 :       if (ifn != gimple_call_internal_fn (call2))
    3423                 :             :         return false;
    3424                 :             : 
    3425                 :             :       /* Check that the masks are the same.  Cope with casts of masks,
    3426                 :             :          like those created by build_mask_conversion.  */
    3427                 :         581 :       tree mask1 = gimple_call_arg (call1, 2);
    3428                 :         581 :       tree mask2 = gimple_call_arg (call2, 2);
    3429                 :         581 :       if (!operand_equal_p (mask1, mask2, 0) && !allow_slp_p)
    3430                 :             :         {
    3431                 :         208 :           mask1 = strip_conversion (mask1);
    3432                 :         208 :           if (!mask1)
    3433                 :             :             return false;
    3434                 :          98 :           mask2 = strip_conversion (mask2);
    3435                 :          98 :           if (!mask2)
    3436                 :             :             return false;
    3437                 :          98 :           if (!operand_equal_p (mask1, mask2, 0))
    3438                 :             :             return false;
    3439                 :             :         }
    3440                 :         377 :       return true;
    3441                 :             :     }
    3442                 :             : 
    3443                 :             :   return false;
    3444                 :             : }
    3445                 :             : 
    3446                 :             : /* Function vect_analyze_data_ref_accesses.
    3447                 :             : 
    3448                 :             :    Analyze the access pattern of all the data references in the loop.
    3449                 :             : 
    3450                 :             :    FORNOW: the only access pattern that is considered vectorizable is a
    3451                 :             :            simple step 1 (consecutive) access.
    3452                 :             : 
    3453                 :             :    FORNOW: handle only arrays and pointer accesses.  */
    3454                 :             : 
    3455                 :             : opt_result
    3456                 :     2612207 : vect_analyze_data_ref_accesses (vec_info *vinfo,
    3457                 :             :                                 vec<int> *dataref_groups)
    3458                 :             : {
    3459                 :     2612207 :   unsigned int i;
    3460                 :     2612207 :   vec<data_reference_p> datarefs = vinfo->shared->datarefs;
    3461                 :             : 
    3462                 :     2612207 :   DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
    3463                 :             : 
    3464                 :     2612207 :   if (datarefs.is_empty ())
    3465                 :     1177467 :     return opt_result::success ();
    3466                 :             : 
    3467                 :             :   /* Sort the array of datarefs to make building the interleaving chains
    3468                 :             :      linear.  Don't modify the original vector's order, it is needed for
    3469                 :             :      determining what dependencies are reversed.  */
    3470                 :     1434740 :   vec<dr_vec_info *> datarefs_copy;
    3471                 :     1434740 :   datarefs_copy.create (datarefs.length ());
    3472                 :    15262451 :   for (unsigned i = 0; i < datarefs.length (); i++)
    3473                 :             :     {
    3474                 :    13827711 :       dr_vec_info *dr_info = vinfo->lookup_dr (datarefs[i]);
    3475                 :             :       /* If the caller computed DR grouping use that, otherwise group by
    3476                 :             :          basic blocks.  */
    3477                 :    13827711 :       if (dataref_groups)
    3478                 :    13072903 :         dr_info->group = (*dataref_groups)[i];
    3479                 :             :       else
    3480                 :      754808 :         dr_info->group = gimple_bb (DR_STMT (datarefs[i]))->index;
    3481                 :    13827711 :       datarefs_copy.quick_push (dr_info);
    3482                 :             :     }
    3483                 :     1434740 :   datarefs_copy.qsort (dr_group_sort_cmp);
    3484                 :     1434740 :   hash_set<stmt_vec_info> to_fixup;
    3485                 :             : 
    3486                 :             :   /* Build the interleaving chains.  */
    3487                 :    13056525 :   for (i = 0; i < datarefs_copy.length () - 1;)
    3488                 :             :     {
    3489                 :    10187045 :       dr_vec_info *dr_info_a = datarefs_copy[i];
    3490                 :    10187045 :       data_reference_p dra = dr_info_a->dr;
    3491                 :    10187045 :       int dra_group_id = dr_info_a->group;
    3492                 :    10187045 :       stmt_vec_info stmtinfo_a = dr_info_a->stmt;
    3493                 :    10187045 :       stmt_vec_info lastinfo = NULL;
    3494                 :    10187045 :       if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
    3495                 :     8737662 :           || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
    3496                 :             :         {
    3497                 :     1475024 :           ++i;
    3498                 :     1475024 :           continue;
    3499                 :             :         }
    3500                 :    22796280 :       for (i = i + 1; i < datarefs_copy.length (); ++i)
    3501                 :             :         {
    3502                 :    10917947 :           dr_vec_info *dr_info_b = datarefs_copy[i];
    3503                 :    10917947 :           data_reference_p drb = dr_info_b->dr;
    3504                 :    10917947 :           int drb_group_id = dr_info_b->group;
    3505                 :    10917947 :           stmt_vec_info stmtinfo_b = dr_info_b->stmt;
    3506                 :    10917947 :           if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
    3507                 :    10637354 :               || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
    3508                 :             :             break;
    3509                 :             : 
    3510                 :             :           /* ???  Imperfect sorting (non-compatible types, non-modulo
    3511                 :             :              accesses, same accesses) can lead to a group to be artificially
    3512                 :             :              split here as we don't just skip over those.  If it really
    3513                 :             :              matters we can push those to a worklist and re-iterate
    3514                 :             :              over them.  The we can just skip ahead to the next DR here.  */
    3515                 :             : 
    3516                 :             :           /* DRs in a different DR group should not be put into the same
    3517                 :             :              interleaving group.  */
    3518                 :    10634458 :           if (dra_group_id != drb_group_id)
    3519                 :             :             break;
    3520                 :             : 
    3521                 :             :           /* Check that the data-refs have same first location (except init)
    3522                 :             :              and they are both either store or load (not load and store,
    3523                 :             :              not masked loads or stores).  */
    3524                 :     6799805 :           if (DR_IS_READ (dra) != DR_IS_READ (drb)
    3525                 :     5558498 :               || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
    3526                 :             :                                         DR_BASE_ADDRESS (drb)) != 0
    3527                 :     4037310 :               || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
    3528                 :    10817635 :               || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
    3529                 :             :             break;
    3530                 :             : 
    3531                 :             :           /* Check that the data-refs have the same constant size.  */
    3532                 :     4017813 :           tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
    3533                 :     4017813 :           tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
    3534                 :     4017813 :           if (!tree_fits_uhwi_p (sza)
    3535                 :     4017813 :               || !tree_fits_uhwi_p (szb)
    3536                 :     8035626 :               || !tree_int_cst_equal (sza, szb))
    3537                 :             :             break;
    3538                 :             : 
    3539                 :             :           /* Check that the data-refs have the same step.  */
    3540                 :     3712102 :           if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
    3541                 :             :             break;
    3542                 :             : 
    3543                 :             :           /* Check the types are compatible.
    3544                 :             :              ???  We don't distinguish this during sorting.  */
    3545                 :     3711482 :           if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
    3546                 :     3711482 :                                    TREE_TYPE (DR_REF (drb))))
    3547                 :             :             break;
    3548                 :             : 
    3549                 :             :           /* Check that the DR_INITs are compile-time constants.  */
    3550                 :     2632590 :           if (!tree_fits_shwi_p (DR_INIT (dra))
    3551                 :     2632590 :               || !tree_fits_shwi_p (DR_INIT (drb)))
    3552                 :             :             break;
    3553                 :             : 
    3554                 :             :           /* Different .GOMP_SIMD_LANE calls still give the same lane,
    3555                 :             :              just hold extra information.  */
    3556                 :     2632590 :           if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
    3557                 :        1240 :               && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
    3558                 :     2633830 :               && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
    3559                 :             :             break;
    3560                 :             : 
    3561                 :             :           /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb).  */
    3562                 :     2631350 :           HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
    3563                 :     2631350 :           HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
    3564                 :     2631350 :           HOST_WIDE_INT init_prev
    3565                 :     2631350 :             = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]->dr));
    3566                 :     2631350 :           gcc_assert (init_a <= init_b
    3567                 :             :                       && init_a <= init_prev
    3568                 :             :                       && init_prev <= init_b);
    3569                 :             : 
    3570                 :             :           /* Do not place the same access in the interleaving chain twice.  */
    3571                 :     2631350 :           if (init_b == init_prev)
    3572                 :             :             {
    3573                 :       25481 :               gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]->dr))
    3574                 :             :                           < gimple_uid (DR_STMT (drb)));
    3575                 :             :               /* Simply link in duplicates and fix up the chain below.  */
    3576                 :             :             }
    3577                 :             :           else
    3578                 :             :             {
    3579                 :             :               /* If init_b == init_a + the size of the type * k, we have an
    3580                 :             :                  interleaving, and DRA is accessed before DRB.  */
    3581                 :     2605869 :               unsigned HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
    3582                 :     2605869 :               if (type_size_a == 0
    3583                 :     2605869 :                   || (((unsigned HOST_WIDE_INT)init_b - init_a)
    3584                 :     2605869 :                       % type_size_a != 0))
    3585                 :             :                 break;
    3586                 :             : 
    3587                 :             :               /* If we have a store, the accesses are adjacent.  This splits
    3588                 :             :                  groups into chunks we support (we don't support vectorization
    3589                 :             :                  of stores with gaps).  */
    3590                 :     2604816 :               if (!DR_IS_READ (dra)
    3591                 :     1685533 :                   && (((unsigned HOST_WIDE_INT)init_b - init_prev)
    3592                 :             :                       != type_size_a))
    3593                 :             :                 break;
    3594                 :             : 
    3595                 :             :               /* If the step (if not zero or non-constant) is smaller than the
    3596                 :             :                  difference between data-refs' inits this splits groups into
    3597                 :             :                  suitable sizes.  */
    3598                 :     2452460 :               if (tree_fits_shwi_p (DR_STEP (dra)))
    3599                 :             :                 {
    3600                 :     2447060 :                   unsigned HOST_WIDE_INT step
    3601                 :     2447060 :                     = absu_hwi (tree_to_shwi (DR_STEP (dra)));
    3602                 :     2447060 :                   if (step != 0
    3603                 :      134557 :                       && step <= ((unsigned HOST_WIDE_INT)init_b - init_a))
    3604                 :             :                     break;
    3605                 :             :                 }
    3606                 :             :             }
    3607                 :             : 
    3608                 :     2462474 :           if (dump_enabled_p ())
    3609                 :       31069 :             dump_printf_loc (MSG_NOTE, vect_location,
    3610                 :       31069 :                              DR_IS_READ (dra)
    3611                 :             :                              ? "Detected interleaving load %T and %T\n"
    3612                 :             :                              : "Detected interleaving store %T and %T\n",
    3613                 :             :                              DR_REF (dra), DR_REF (drb));
    3614                 :             : 
    3615                 :             :           /* Link the found element into the group list.  */
    3616                 :     2462474 :           if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
    3617                 :             :             {
    3618                 :     1399593 :               DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
    3619                 :     1399593 :               lastinfo = stmtinfo_a;
    3620                 :             :             }
    3621                 :     2462474 :           DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
    3622                 :     2462474 :           DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
    3623                 :     2462474 :           lastinfo = stmtinfo_b;
    3624                 :             : 
    3625                 :     2462474 :           if (! STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
    3626                 :             :             {
    3627                 :     2462444 :               STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
    3628                 :     2462444 :                 = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
    3629                 :             : 
    3630                 :     2462444 :               if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
    3631                 :          73 :                 dump_printf_loc (MSG_NOTE, vect_location,
    3632                 :             :                                  "Load suitable for SLP vectorization only.\n");
    3633                 :             :             }
    3634                 :             : 
    3635                 :     2462474 :           if (init_b == init_prev
    3636                 :       25481 :               && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
    3637                 :     2477720 :               && dump_enabled_p ())
    3638                 :         246 :             dump_printf_loc (MSG_NOTE, vect_location,
    3639                 :             :                              "Queuing group with duplicate access for fixup\n");
    3640                 :             :         }
    3641                 :             :     }
    3642                 :             : 
    3643                 :             :   /* Fixup groups with duplicate entries by splitting it.  */
    3644                 :     1472524 :   while (1)
    3645                 :             :     {
    3646                 :     1472524 :       hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
    3647                 :     1472524 :       if (!(it != to_fixup.end ()))
    3648                 :             :         break;
    3649                 :       37784 :       stmt_vec_info grp = *it;
    3650                 :       37784 :       to_fixup.remove (grp);
    3651                 :             : 
    3652                 :             :       /* Find the earliest duplicate group member.  */
    3653                 :       37784 :       unsigned first_duplicate = -1u;
    3654                 :       37784 :       stmt_vec_info next, g = grp;
    3655                 :      192555 :       while ((next = DR_GROUP_NEXT_ELEMENT (g)))
    3656                 :             :         {
    3657                 :      116987 :           if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
    3658                 :      116987 :                                   DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
    3659                 :      116987 :               && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
    3660                 :             :             first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
    3661                 :             :           g = next;
    3662                 :             :         }
    3663                 :       37784 :       if (first_duplicate == -1U)
    3664                 :       15246 :         continue;
    3665                 :             : 
    3666                 :             :       /* Then move all stmts after the first duplicate to a new group.
    3667                 :             :          Note this is a heuristic but one with the property that *it
    3668                 :             :          is fixed up completely.  */
    3669                 :       22538 :       g = grp;
    3670                 :       22538 :       stmt_vec_info newgroup = NULL, ng = grp;
    3671                 :      160168 :       while ((next = DR_GROUP_NEXT_ELEMENT (g)))
    3672                 :             :         {
    3673                 :      115092 :           if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
    3674                 :             :             {
    3675                 :      109831 :               DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
    3676                 :      109831 :               if (!newgroup)
    3677                 :             :                 {
    3678                 :       22538 :                   newgroup = next;
    3679                 :       22538 :                   STMT_VINFO_SLP_VECT_ONLY (newgroup)
    3680                 :       22538 :                     = STMT_VINFO_SLP_VECT_ONLY (grp);
    3681                 :             :                 }
    3682                 :             :               else
    3683                 :       87293 :                 DR_GROUP_NEXT_ELEMENT (ng) = next;
    3684                 :      109831 :               ng = next;
    3685                 :      109831 :               DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
    3686                 :             :             }
    3687                 :             :           else
    3688                 :             :             g = DR_GROUP_NEXT_ELEMENT (g);
    3689                 :             :         }
    3690                 :       22538 :       DR_GROUP_NEXT_ELEMENT (ng) = NULL;
    3691                 :             : 
    3692                 :             :       /* Fixup the new group which still may contain duplicates.  */
    3693                 :       22538 :       to_fixup.add (newgroup);
    3694                 :             :     }
    3695                 :             : 
    3696                 :     1434740 :   dr_vec_info *dr_info;
    3697                 :    15246315 :   FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)
    3698                 :             :     {
    3699                 :    13817499 :       if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
    3700                 :    13817499 :           && !vect_analyze_data_ref_access (vinfo, dr_info))
    3701                 :             :         {
    3702                 :        6285 :           if (dump_enabled_p ())
    3703                 :         255 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    3704                 :             :                              "not vectorized: complicated access pattern.\n");
    3705                 :             : 
    3706                 :        6285 :           if (is_a <bb_vec_info> (vinfo))
    3707                 :             :             {
    3708                 :             :               /* Mark the statement as not vectorizable.  */
    3709                 :         361 :               STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
    3710                 :         361 :               continue;
    3711                 :             :             }
    3712                 :             :           else
    3713                 :             :             {
    3714                 :        5924 :               datarefs_copy.release ();
    3715                 :        5924 :               return opt_result::failure_at (dr_info->stmt->stmt,
    3716                 :             :                                              "not vectorized:"
    3717                 :             :                                              " complicated access pattern.\n");
    3718                 :             :             }
    3719                 :             :         }
    3720                 :             :     }
    3721                 :             : 
    3722                 :     1428816 :   datarefs_copy.release ();
    3723                 :     1428816 :   return opt_result::success ();
    3724                 :     1434740 : }
    3725                 :             : 
    3726                 :             : /* Function vect_vfa_segment_size.
    3727                 :             : 
    3728                 :             :    Input:
    3729                 :             :      DR_INFO: The data reference.
    3730                 :             :      LENGTH_FACTOR: segment length to consider.
    3731                 :             : 
    3732                 :             :    Return a value suitable for the dr_with_seg_len::seg_len field.
    3733                 :             :    This is the "distance travelled" by the pointer from the first
    3734                 :             :    iteration in the segment to the last.  Note that it does not include
    3735                 :             :    the size of the access; in effect it only describes the first byte.  */
    3736                 :             : 
    3737                 :             : static tree
    3738                 :      108592 : vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
    3739                 :             : {
    3740                 :      108592 :   length_factor = size_binop (MINUS_EXPR,
    3741                 :             :                               fold_convert (sizetype, length_factor),
    3742                 :             :                               size_one_node);
    3743                 :      108592 :   return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
    3744                 :             :                      length_factor);
    3745                 :             : }
    3746                 :             : 
    3747                 :             : /* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),
    3748                 :             :    gives the worst-case number of bytes covered by the segment.  */
    3749                 :             : 
    3750                 :             : static unsigned HOST_WIDE_INT
    3751                 :      109074 : vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info)
    3752                 :             : {
    3753                 :      109074 :   stmt_vec_info stmt_vinfo = dr_info->stmt;
    3754                 :      109074 :   tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
    3755                 :      109074 :   unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
    3756                 :      109074 :   unsigned HOST_WIDE_INT access_size = ref_size;
    3757                 :      109074 :   if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
    3758                 :             :     {
    3759                 :       35534 :       gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
    3760                 :       35534 :       access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
    3761                 :             :     }
    3762                 :      109074 :   tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
    3763                 :      109074 :   int misalignment;
    3764                 :      109074 :   if (STMT_VINFO_VEC_STMTS (stmt_vinfo).exists ()
    3765                 :      109074 :       && ((misalignment = dr_misalignment (dr_info, vectype)), true)
    3766                 :      109074 :       && (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
    3767                 :             :           == dr_explicit_realign_optimized))
    3768                 :             :     {
    3769                 :             :       /* We might access a full vector's worth.  */
    3770                 :           0 :       access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
    3771                 :             :     }
    3772                 :      109074 :   return access_size;
    3773                 :             : }
    3774                 :             : 
    3775                 :             : /* Get the minimum alignment for all the scalar accesses that DR_INFO
    3776                 :             :    describes.  */
    3777                 :             : 
    3778                 :             : static unsigned int
    3779                 :      109074 : vect_vfa_align (dr_vec_info *dr_info)
    3780                 :             : {
    3781                 :           0 :   return dr_alignment (dr_info->dr);
    3782                 :             : }
    3783                 :             : 
    3784                 :             : /* Function vect_no_alias_p.
    3785                 :             : 
    3786                 :             :    Given data references A and B with equal base and offset, see whether
    3787                 :             :    the alias relation can be decided at compilation time.  Return 1 if
    3788                 :             :    it can and the references alias, 0 if it can and the references do
    3789                 :             :    not alias, and -1 if we cannot decide at compile time.  SEGMENT_LENGTH_A,
    3790                 :             :    SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
    3791                 :             :    of dr_with_seg_len::{seg_len,access_size} for A and B.  */
    3792                 :             : 
    3793                 :             : static int
    3794                 :        1520 : vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b,
    3795                 :             :                          tree segment_length_a, tree segment_length_b,
    3796                 :             :                          unsigned HOST_WIDE_INT access_size_a,
    3797                 :             :                          unsigned HOST_WIDE_INT access_size_b)
    3798                 :             : {
    3799                 :        1520 :   poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
    3800                 :        1520 :   poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
    3801                 :        1520 :   poly_uint64 const_length_a;
    3802                 :        1520 :   poly_uint64 const_length_b;
    3803                 :             : 
    3804                 :             :   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
    3805                 :             :      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
    3806                 :             :      [a, a+12) */
    3807                 :        1520 :   if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < 0)
    3808                 :             :     {
    3809                 :         136 :       const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
    3810                 :         136 :       offset_a -= const_length_a;
    3811                 :             :     }
    3812                 :             :   else
    3813                 :        1384 :     const_length_a = tree_to_poly_uint64 (segment_length_a);
    3814                 :        1520 :   if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < 0)
    3815                 :             :     {
    3816                 :         290 :       const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
    3817                 :         290 :       offset_b -= const_length_b;
    3818                 :             :     }
    3819                 :             :   else
    3820                 :        1230 :     const_length_b = tree_to_poly_uint64 (segment_length_b);
    3821                 :             : 
    3822                 :        1520 :   const_length_a += access_size_a;
    3823                 :        1520 :   const_length_b += access_size_b;
    3824                 :             : 
    3825                 :        1520 :   if (ranges_known_overlap_p (offset_a, const_length_a,
    3826                 :             :                               offset_b, const_length_b))
    3827                 :             :     return 1;
    3828                 :             : 
    3829                 :         458 :   if (!ranges_maybe_overlap_p (offset_a, const_length_a,
    3830                 :             :                                offset_b, const_length_b))
    3831                 :         458 :     return 0;
    3832                 :             : 
    3833                 :             :   return -1;
    3834                 :             : }
    3835                 :             : 
    3836                 :             : /* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
    3837                 :             :    in DDR is >= VF.  */
    3838                 :             : 
    3839                 :             : static bool
    3840                 :       64030 : dependence_distance_ge_vf (data_dependence_relation *ddr,
    3841                 :             :                            unsigned int loop_depth, poly_uint64 vf)
    3842                 :             : {
    3843                 :       64030 :   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
    3844                 :       68121 :       || DDR_NUM_DIST_VECTS (ddr) == 0)
    3845                 :             :     return false;
    3846                 :             : 
    3847                 :             :   /* If the dependence is exact, we should have limited the VF instead.  */
    3848                 :        4146 :   gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
    3849                 :             : 
    3850                 :             :   unsigned int i;
    3851                 :             :   lambda_vector dist_v;
    3852                 :        8343 :   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
    3853                 :             :     {
    3854                 :        8288 :       HOST_WIDE_INT dist = dist_v[loop_depth];
    3855                 :        8288 :       if (dist != 0
    3856                 :        4146 :           && !(dist > 0 && DDR_REVERSED_P (ddr))
    3857                 :       12434 :           && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
    3858                 :             :         return false;
    3859                 :             :     }
    3860                 :             : 
    3861                 :          55 :   if (dump_enabled_p ())
    3862                 :          14 :     dump_printf_loc (MSG_NOTE, vect_location,
    3863                 :             :                      "dependence distance between %T and %T is >= VF\n",
    3864                 :          14 :                      DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
    3865                 :             : 
    3866                 :             :   return true;
    3867                 :             : }
    3868                 :             : 
    3869                 :             : /* Dump LOWER_BOUND using flags DUMP_KIND.  Dumps are known to be enabled.  */
    3870                 :             : 
    3871                 :             : static void
    3872                 :         436 : dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
    3873                 :             : {
    3874                 :         436 :   dump_printf (dump_kind, "%s (%T) >= ",
    3875                 :         436 :                lower_bound.unsigned_p ? "unsigned" : "abs",
    3876                 :         436 :                lower_bound.expr);
    3877                 :         436 :   dump_dec (dump_kind, lower_bound.min_value);
    3878                 :         436 : }
    3879                 :             : 
    3880                 :             : /* Record that the vectorized loop requires the vec_lower_bound described
    3881                 :             :    by EXPR, UNSIGNED_P and MIN_VALUE.  */
    3882                 :             : 
    3883                 :             : static void
    3884                 :        5131 : vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
    3885                 :             :                         poly_uint64 min_value)
    3886                 :             : {
    3887                 :        5131 :   vec<vec_lower_bound> &lower_bounds
    3888                 :             :     = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
    3889                 :        6103 :   for (unsigned int i = 0; i < lower_bounds.length (); ++i)
    3890                 :        4689 :     if (operand_equal_p (lower_bounds[i].expr, expr, 0))
    3891                 :             :       {
    3892                 :        3717 :         unsigned_p &= lower_bounds[i].unsigned_p;
    3893                 :        3717 :         min_value = upper_bound (lower_bounds[i].min_value, min_value);
    3894                 :        3717 :         if (lower_bounds[i].unsigned_p != unsigned_p
    3895                 :        3717 :             || maybe_lt (lower_bounds[i].min_value, min_value))
    3896                 :             :           {
    3897                 :         600 :             lower_bounds[i].unsigned_p = unsigned_p;
    3898                 :         600 :             lower_bounds[i].min_value = min_value;
    3899                 :         600 :             if (dump_enabled_p ())
    3900                 :             :               {
    3901                 :         246 :                 dump_printf_loc (MSG_NOTE, vect_location,
    3902                 :             :                                  "updating run-time check to ");
    3903                 :         246 :                 dump_lower_bound (MSG_NOTE, lower_bounds[i]);
    3904                 :         246 :                 dump_printf (MSG_NOTE, "\n");
    3905                 :             :               }
    3906                 :             :           }
    3907                 :        3717 :         return;
    3908                 :             :       }
    3909                 :             : 
    3910                 :        1414 :   vec_lower_bound lower_bound (expr, unsigned_p, min_value);
    3911                 :        1414 :   if (dump_enabled_p ())
    3912                 :             :     {
    3913                 :         190 :       dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
    3914                 :         190 :       dump_lower_bound (MSG_NOTE, lower_bound);
    3915                 :         190 :       dump_printf (MSG_NOTE, "\n");
    3916                 :             :     }
    3917                 :        1414 :   LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
    3918                 :             : }
    3919                 :             : 
    3920                 :             : /* Return true if it's unlikely that the step of the vectorized form of DR_INFO
    3921                 :             :    will span fewer than GAP bytes.  */
    3922                 :             : 
    3923                 :             : static bool
    3924                 :        3666 : vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
    3925                 :             :                   poly_int64 gap)
    3926                 :             : {
    3927                 :        3666 :   stmt_vec_info stmt_info = dr_info->stmt;
    3928                 :        3666 :   HOST_WIDE_INT count
    3929                 :        3666 :     = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
    3930                 :        3666 :   if (DR_GROUP_FIRST_ELEMENT (stmt_info))
    3931                 :        3618 :     count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
    3932                 :        3666 :   return (estimated_poly_value (gap)
    3933                 :        3666 :           <= count * vect_get_scalar_dr_size (dr_info));
    3934                 :             : }
    3935                 :             : 
    3936                 :             : /* Return true if we know that there is no alias between DR_INFO_A and
    3937                 :             :    DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
    3938                 :             :    When returning true, set *LOWER_BOUND_OUT to this N.  */
    3939                 :             : 
    3940                 :             : static bool
    3941                 :       17474 : vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b,
    3942                 :             :                                 poly_uint64 *lower_bound_out)
    3943                 :             : {
    3944                 :             :   /* Check that there is a constant gap of known sign between DR_A
    3945                 :             :      and DR_B.  */
    3946                 :       17474 :   data_reference *dr_a = dr_info_a->dr;
    3947                 :       17474 :   data_reference *dr_b = dr_info_b->dr;
    3948                 :       17474 :   poly_int64 init_a, init_b;
    3949                 :       17474 :   if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
    3950                 :        6986 :       || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
    3951                 :        5966 :       || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
    3952                 :        5956 :       || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
    3953                 :        5956 :       || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
    3954                 :       17474 :       || !ordered_p (init_a, init_b))
    3955                 :       11518 :     return false;
    3956                 :             : 
    3957                 :             :   /* Sort DR_A and DR_B by the address they access.  */
    3958                 :        5956 :   if (maybe_lt (init_b, init_a))
    3959                 :             :     {
    3960                 :         128 :       std::swap (init_a, init_b);
    3961                 :         128 :       std::swap (dr_info_a, dr_info_b);
    3962                 :         128 :       std::swap (dr_a, dr_b);
    3963                 :             :     }
    3964                 :             : 
    3965                 :             :   /* If the two accesses could be dependent within a scalar iteration,
    3966                 :             :      make sure that we'd retain their order.  */
    3967                 :        5956 :   if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
    3968                 :        5956 :       && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
    3969                 :             :     return false;
    3970                 :             : 
    3971                 :             :   /* There is no alias if abs (DR_STEP) is greater than or equal to
    3972                 :             :      the bytes spanned by the combination of the two accesses.  */
    3973                 :        5956 :   *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a;
    3974                 :        5956 :   return true;
    3975                 :             : }
    3976                 :             : 
    3977                 :             : /* Function vect_prune_runtime_alias_test_list.
    3978                 :             : 
    3979                 :             :    Prune a list of ddrs to be tested at run-time by versioning for alias.
    3980                 :             :    Merge several alias checks into one if possible.
    3981                 :             :    Return FALSE if resulting list of ddrs is longer then allowed by
    3982                 :             :    PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
    3983                 :             : 
    3984                 :             : opt_result
    3985                 :      339941 : vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
    3986                 :             : {
    3987                 :      339941 :   typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
    3988                 :      339941 :   hash_set <tree_pair_hash> compared_objects;
    3989                 :             : 
    3990                 :      339941 :   const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
    3991                 :      339941 :   vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
    3992                 :             :     = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
    3993                 :      339941 :   const vec<vec_object_pair> &check_unequal_addrs
    3994                 :             :     = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
    3995                 :      339941 :   poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
    3996                 :      339941 :   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
    3997                 :             : 
    3998                 :      339941 :   ddr_p ddr;
    3999                 :      339941 :   unsigned int i;
    4000                 :      339941 :   tree length_factor;
    4001                 :             : 
    4002                 :      339941 :   DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
    4003                 :             : 
    4004                 :             :   /* Step values are irrelevant for aliasing if the number of vector
    4005                 :             :      iterations is equal to the number of scalar iterations (which can
    4006                 :             :      happen for fully-SLP loops).  */
    4007                 :      339941 :   bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
    4008                 :             : 
    4009                 :      339941 :   if (!vf_one_p)
    4010                 :             :     {
    4011                 :             :       /* Convert the checks for nonzero steps into bound tests.  */
    4012                 :             :       tree value;
    4013                 :      337935 :       FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
    4014                 :        1513 :         vect_check_lower_bound (loop_vinfo, value, true, 1);
    4015                 :             :     }
    4016                 :             : 
    4017                 :      339941 :   if (may_alias_ddrs.is_empty ())
    4018                 :      323061 :     return opt_result::success ();
    4019                 :             : 
    4020                 :       16880 :   comp_alias_ddrs.create (may_alias_ddrs.length ());
    4021                 :             : 
    4022                 :       16880 :   unsigned int loop_depth
    4023                 :       16880 :     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
    4024                 :       16880 :                           LOOP_VINFO_LOOP_NEST (loop_vinfo));
    4025                 :             : 
    4026                 :             :   /* First, we collect all data ref pairs for aliasing checks.  */
    4027                 :       79848 :   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
    4028                 :             :     {
    4029                 :       64030 :       poly_uint64 lower_bound;
    4030                 :       64030 :       tree segment_length_a, segment_length_b;
    4031                 :       64030 :       unsigned HOST_WIDE_INT access_size_a, access_size_b;
    4032                 :       64030 :       unsigned HOST_WIDE_INT align_a, align_b;
    4033                 :             : 
    4034                 :             :       /* Ignore the alias if the VF we chose ended up being no greater
    4035                 :             :          than the dependence distance.  */
    4036                 :       64030 :       if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
    4037                 :        9951 :         continue;
    4038                 :             : 
    4039                 :       63975 :       if (DDR_OBJECT_A (ddr))
    4040                 :             :         {
    4041                 :          46 :           vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
    4042                 :          46 :           if (!compared_objects.add (new_pair))
    4043                 :             :             {
    4044                 :          14 :               if (dump_enabled_p ())
    4045                 :           8 :                 dump_printf_loc (MSG_NOTE, vect_location,
    4046                 :             :                                  "checking that %T and %T"
    4047                 :             :                                  " have different addresses\n",
    4048                 :             :                                  new_pair.first, new_pair.second);
    4049                 :          14 :               LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
    4050                 :             :             }
    4051                 :          46 :           continue;
    4052                 :          46 :         }
    4053                 :             : 
    4054                 :       63929 :       dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
    4055                 :       63929 :       stmt_vec_info stmt_info_a = dr_info_a->stmt;
    4056                 :             : 
    4057                 :       63929 :       dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
    4058                 :       63929 :       stmt_vec_info stmt_info_b = dr_info_b->stmt;
    4059                 :             : 
    4060                 :       63929 :       bool preserves_scalar_order_p
    4061                 :       63929 :         = vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
    4062                 :       63929 :       bool ignore_step_p
    4063                 :             :           = (vf_one_p
    4064                 :       63929 :              && (preserves_scalar_order_p
    4065                 :        2817 :                  || operand_equal_p (DR_STEP (dr_info_a->dr),
    4066                 :        2817 :                                      DR_STEP (dr_info_b->dr))));
    4067                 :             : 
    4068                 :             :       /* Skip the pair if inter-iteration dependencies are irrelevant
    4069                 :             :          and intra-iteration dependencies are guaranteed to be honored.  */
    4070                 :       11789 :       if (ignore_step_p
    4071                 :        6015 :           && (preserves_scalar_order_p
    4072                 :        2555 :               || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
    4073                 :             :                                                  &lower_bound)))
    4074                 :             :         {
    4075                 :        5774 :           if (dump_enabled_p ())
    4076                 :        2352 :             dump_printf_loc (MSG_NOTE, vect_location,
    4077                 :             :                              "no need for alias check between "
    4078                 :             :                              "%T and %T when VF is 1\n",
    4079                 :        2352 :                              DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
    4080                 :        5774 :           continue;
    4081                 :             :         }
    4082                 :             : 
    4083                 :             :       /* See whether we can handle the alias using a bounds check on
    4084                 :             :          the step, and whether that's likely to be the best approach.
    4085                 :             :          (It might not be, for example, if the minimum step is much larger
    4086                 :             :          than the number of bytes handled by one vector iteration.)  */
    4087                 :       58155 :       if (!ignore_step_p
    4088                 :       57914 :           && TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
    4089                 :       14919 :           && vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
    4090                 :             :                                              &lower_bound)
    4091                 :       61797 :           && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound)
    4092                 :          24 :               || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound)))
    4093                 :             :         {
    4094                 :        3618 :           bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
    4095                 :        3618 :           if (dump_enabled_p ())
    4096                 :             :             {
    4097                 :        3312 :               dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
    4098                 :             :                                "%T and %T when the step %T is outside ",
    4099                 :             :                                DR_REF (dr_info_a->dr),
    4100                 :        1656 :                                DR_REF (dr_info_b->dr),
    4101                 :        1656 :                                DR_STEP (dr_info_a->dr));
    4102                 :        1656 :               if (unsigned_p)
    4103                 :         504 :                 dump_printf (MSG_NOTE, "[0");
    4104                 :             :               else
    4105                 :             :                 {
    4106                 :        1152 :                   dump_printf (MSG_NOTE, "(");
    4107                 :        1152 :                   dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
    4108                 :             :                 }
    4109                 :        1656 :               dump_printf (MSG_NOTE, ", ");
    4110                 :        1656 :               dump_dec (MSG_NOTE, lower_bound);
    4111                 :        1656 :               dump_printf (MSG_NOTE, ")\n");
    4112                 :             :             }
    4113                 :        3618 :           vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
    4114                 :             :                                   unsigned_p, lower_bound);
    4115                 :        3618 :           continue;
    4116                 :        3618 :         }
    4117                 :             : 
    4118                 :       54537 :       stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
    4119                 :       54537 :       if (dr_group_first_a)
    4120                 :             :         {
    4121                 :       17835 :           stmt_info_a = dr_group_first_a;
    4122                 :       17835 :           dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
    4123                 :             :         }
    4124                 :             : 
    4125                 :       54537 :       stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
    4126                 :       54537 :       if (dr_group_first_b)
    4127                 :             :         {
    4128                 :       17699 :           stmt_info_b = dr_group_first_b;
    4129                 :       17699 :           dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
    4130                 :             :         }
    4131                 :             : 
    4132                 :       54537 :       if (ignore_step_p)
    4133                 :             :         {
    4134                 :         241 :           segment_length_a = size_zero_node;
    4135                 :         241 :           segment_length_b = size_zero_node;
    4136                 :             :         }
    4137                 :             :       else
    4138                 :             :         {
    4139                 :       54296 :           if (!operand_equal_p (DR_STEP (dr_info_a->dr),
    4140                 :       54296 :                                 DR_STEP (dr_info_b->dr), 0))
    4141                 :             :             length_factor = scalar_loop_iters;
    4142                 :             :           else
    4143                 :       42253 :             length_factor = size_int (vect_factor);
    4144                 :       54296 :           segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
    4145                 :       54296 :           segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
    4146                 :             :         }
    4147                 :       54537 :       access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a);
    4148                 :       54537 :       access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b);
    4149                 :       54537 :       align_a = vect_vfa_align (dr_info_a);
    4150                 :       54537 :       align_b = vect_vfa_align (dr_info_b);
    4151                 :             : 
    4152                 :             :       /* See whether the alias is known at compilation time.  */
    4153                 :       54537 :       if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
    4154                 :       54537 :                            DR_BASE_ADDRESS (dr_info_b->dr), 0)
    4155                 :        3435 :           && operand_equal_p (DR_OFFSET (dr_info_a->dr),
    4156                 :        3435 :                               DR_OFFSET (dr_info_b->dr), 0)
    4157                 :        1602 :           && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
    4158                 :        1568 :           && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
    4159                 :        1568 :           && poly_int_tree_p (segment_length_a)
    4160                 :       56068 :           && poly_int_tree_p (segment_length_b))
    4161                 :             :         {
    4162                 :        1520 :           int res = vect_compile_time_alias (dr_info_a, dr_info_b,
    4163                 :             :                                              segment_length_a,
    4164                 :             :                                              segment_length_b,
    4165                 :             :                                              access_size_a,
    4166                 :             :                                              access_size_b);
    4167                 :        1520 :           if (res >= 0 && dump_enabled_p ())
    4168                 :             :             {
    4169                 :         232 :               dump_printf_loc (MSG_NOTE, vect_location,
    4170                 :             :                                "can tell at compile time that %T and %T",
    4171                 :         116 :                                DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
    4172                 :         116 :               if (res == 0)
    4173                 :          57 :                 dump_printf (MSG_NOTE, " do not alias\n");
    4174                 :             :               else
    4175                 :          59 :                 dump_printf (MSG_NOTE, " alias\n");
    4176                 :             :             }
    4177                 :             : 
    4178                 :        1520 :           if (res == 0)
    4179                 :         458 :             continue;
    4180                 :             : 
    4181                 :        1062 :           if (res == 1)
    4182                 :        1062 :             return opt_result::failure_at (stmt_info_b->stmt,
    4183                 :             :                                            "not vectorized:"
    4184                 :             :                                            " compilation time alias: %G%G",
    4185                 :             :                                            stmt_info_a->stmt,
    4186                 :             :                                            stmt_info_b->stmt);
    4187                 :             :         }
    4188                 :             : 
    4189                 :             :       /* dr_with_seg_len requires the alignment to apply to the segment length
    4190                 :             :          and access size, not just the start address.  The access size can be
    4191                 :             :          smaller than the pointer alignment for grouped accesses and bitfield
    4192                 :             :          references; see PR115192 and PR116125 respectively.  */
    4193                 :       53017 :       align_a = std::min (align_a, least_bit_hwi (access_size_a));
    4194                 :       53017 :       align_b = std::min (align_b, least_bit_hwi (access_size_b));
    4195                 :             : 
    4196                 :       53017 :       dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
    4197                 :       53017 :                             access_size_a, align_a);
    4198                 :       53017 :       dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
    4199                 :       53017 :                             access_size_b, align_b);
    4200                 :             :       /* Canonicalize the order to be the one that's needed for accurate
    4201                 :             :          RAW, WAR and WAW flags, in cases where the data references are
    4202                 :             :          well-ordered.  The order doesn't really matter otherwise,
    4203                 :             :          but we might as well be consistent.  */
    4204                 :       53017 :       if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
    4205                 :        4391 :         std::swap (dr_a, dr_b);
    4206                 :             : 
    4207                 :       53017 :       dr_with_seg_len_pair_t dr_with_seg_len_pair
    4208                 :             :         (dr_a, dr_b, (preserves_scalar_order_p
    4209                 :             :                       ? dr_with_seg_len_pair_t::WELL_ORDERED
    4210                 :       58754 :                       : dr_with_seg_len_pair_t::REORDERED));
    4211                 :             : 
    4212                 :       53017 :       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
    4213                 :             :     }
    4214                 :             : 
    4215                 :       15818 :   prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
    4216                 :             : 
    4217                 :       31636 :   unsigned int count = (comp_alias_ddrs.length ()
    4218                 :       15818 :                         + check_unequal_addrs.length ());
    4219                 :             : 
    4220                 :       15818 :   if (count
    4221                 :       15818 :       && (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
    4222                 :             :           == VECT_COST_MODEL_VERY_CHEAP))
    4223                 :       10155 :     return opt_result::failure_at
    4224                 :       10155 :       (vect_location, "would need a runtime alias check\n");
    4225                 :             : 
    4226                 :        5663 :   if (dump_enabled_p ())
    4227                 :        1898 :     dump_printf_loc (MSG_NOTE, vect_location,
    4228                 :             :                      "improved number of alias checks from %d to %d\n",
    4229                 :             :                      may_alias_ddrs.length (), count);
    4230                 :        5663 :   unsigned limit = param_vect_max_version_for_alias_checks;
    4231                 :        5663 :   if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)) == VECT_COST_MODEL_CHEAP)
    4232                 :         741 :     limit = param_vect_max_version_for_alias_checks * 6 / 10;
    4233                 :        5663 :   if (count > limit)
    4234                 :         166 :     return opt_result::failure_at
    4235                 :         166 :       (vect_location,
    4236                 :             :        "number of versioning for alias run-time tests exceeds %d "
    4237                 :             :        "(--param vect-max-version-for-alias-checks)\n", limit);
    4238                 :             : 
    4239                 :        5497 :   return opt_result::success ();
    4240                 :      339941 : }
    4241                 :             : 
    4242                 :             : /* Check whether we can use an internal function for a gather load
    4243                 :             :    or scatter store.  READ_P is true for loads and false for stores.
    4244                 :             :    MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
    4245                 :             :    the type of the memory elements being loaded or stored.  OFFSET_TYPE
    4246                 :             :    is the type of the offset that is being applied to the invariant
    4247                 :             :    base address.  SCALE is the amount by which the offset should
    4248                 :             :    be multiplied *after* it has been converted to address width.
    4249                 :             : 
    4250                 :             :    Return true if the function is supported, storing the function id in
    4251                 :             :    *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
    4252                 :             : 
    4253                 :             :    If we can use gather and store the possible else values in ELSVALS.  */
    4254                 :             : 
    4255                 :             : bool
    4256                 :       45900 : vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
    4257                 :             :                           tree vectype, tree memory_type, tree offset_type,
    4258                 :             :                           int scale, internal_fn *ifn_out,
    4259                 :             :                           tree *offset_vectype_out, vec<int> *elsvals)
    4260                 :             : {
    4261                 :       45900 :   unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
    4262                 :       45900 :   unsigned int element_bits = vector_element_bits (vectype);
    4263                 :       45900 :   if (element_bits != memory_bits)
    4264                 :             :     /* For now the vector elements must be the same width as the
    4265                 :             :        memory elements.  */
    4266                 :             :     return false;
    4267                 :             : 
    4268                 :             :   /* Work out which function we need.  */
    4269                 :       45900 :   internal_fn ifn, alt_ifn, alt_ifn2;
    4270                 :       45900 :   if (read_p)
    4271                 :             :     {
    4272                 :       18866 :       ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
    4273                 :             :       alt_ifn = IFN_MASK_GATHER_LOAD;
    4274                 :             :       /* When target supports MASK_LEN_GATHER_LOAD, we always
    4275                 :             :          use MASK_LEN_GATHER_LOAD regardless whether len and
    4276                 :             :          mask are valid or not.  */
    4277                 :             :       alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
    4278                 :             :     }
    4279                 :             :   else
    4280                 :             :     {
    4281                 :       27034 :       ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
    4282                 :       45900 :       alt_ifn = IFN_MASK_SCATTER_STORE;
    4283                 :             :       /* When target supports MASK_LEN_SCATTER_STORE, we always
    4284                 :             :          use MASK_LEN_SCATTER_STORE regardless whether len and
    4285                 :             :          mask are valid or not.  */
    4286                 :       45900 :       alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
    4287                 :             :     }
    4288                 :             : 
    4289                 :      329282 :   for (;;)
    4290                 :             :     {
    4291                 :      187591 :       tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
    4292                 :      187591 :       if (!offset_vectype)
    4293                 :             :         return false;
    4294                 :             : 
    4295                 :             :       /* Test whether the target supports this combination.  */
    4296                 :      184085 :       if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
    4297                 :             :                                                   offset_vectype, scale,
    4298                 :             :                                                   elsvals))
    4299                 :             :         {
    4300                 :           0 :           *ifn_out = ifn;
    4301                 :           0 :           *offset_vectype_out = offset_vectype;
    4302                 :           0 :           return true;
    4303                 :             :         }
    4304                 :      184085 :       else if (!masked_p
    4305                 :      184085 :                && internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
    4306                 :             :                                                           memory_type,
    4307                 :             :                                                           offset_vectype,
    4308                 :             :                                                           scale, elsvals))
    4309                 :             :         {
    4310                 :           0 :           *ifn_out = alt_ifn;
    4311                 :           0 :           *offset_vectype_out = offset_vectype;
    4312                 :           0 :           return true;
    4313                 :             :         }
    4314                 :      184085 :       else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
    4315                 :             :                                                        memory_type,
    4316                 :             :                                                        offset_vectype, scale,
    4317                 :             :                                                        elsvals))
    4318                 :             :         {
    4319                 :           0 :           *ifn_out = alt_ifn2;
    4320                 :           0 :           *offset_vectype_out = offset_vectype;
    4321                 :           0 :           return true;
    4322                 :             :         }
    4323                 :             : 
    4324                 :      184085 :       if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
    4325                 :      184085 :           && TYPE_PRECISION (offset_type) >= element_bits)
    4326                 :             :         return false;
    4327                 :             : 
    4328                 :      141691 :       offset_type = build_nonstandard_integer_type
    4329                 :      141691 :         (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type));
    4330                 :      141691 :     }
    4331                 :             : }
    4332                 :             : 
    4333                 :             : /* STMT_INFO is a call to an internal gather load or scatter store function.
    4334                 :             :    Describe the operation in INFO.  */
    4335                 :             : 
    4336                 :             : static void
    4337                 :           0 : vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
    4338                 :             :                                    gather_scatter_info *info)
    4339                 :             : {
    4340                 :           0 :   gcall *call = as_a <gcall *> (stmt_info->stmt);
    4341                 :           0 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    4342                 :           0 :   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    4343                 :             : 
    4344                 :           0 :   info->ifn = gimple_call_internal_fn (call);
    4345                 :           0 :   info->decl = NULL_TREE;
    4346                 :           0 :   info->base = gimple_call_arg (call, 0);
    4347                 :           0 :   info->offset = gimple_call_arg (call, 1);
    4348                 :           0 :   info->offset_dt = vect_unknown_def_type;
    4349                 :           0 :   info->offset_vectype = NULL_TREE;
    4350                 :           0 :   info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
    4351                 :           0 :   info->element_type = TREE_TYPE (vectype);
    4352                 :           0 :   info->memory_type = TREE_TYPE (DR_REF (dr));
    4353                 :           0 : }
    4354                 :             : 
    4355                 :             : /* Return true if a non-affine read or write in STMT_INFO is suitable for a
    4356                 :             :    gather load or scatter store.  Describe the operation in *INFO if so.
    4357                 :             :    If it is suitable and ELSVALS is nonzero store the supported else values
    4358                 :             :    in the vector it points to.  */
    4359                 :             : 
    4360                 :             : bool
    4361                 :      177145 : vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
    4362                 :             :                            gather_scatter_info *info, vec<int> *elsvals)
    4363                 :             : {
    4364                 :      177145 :   HOST_WIDE_INT scale = 1;
    4365                 :      177145 :   poly_int64 pbitpos, pbitsize;
    4366                 :      177145 :   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
    4367                 :      177145 :   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    4368                 :      177145 :   tree offtype = NULL_TREE;
    4369                 :      177145 :   tree decl = NULL_TREE, base, off;
    4370                 :      177145 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    4371                 :      177145 :   tree memory_type = TREE_TYPE (DR_REF (dr));
    4372                 :      177145 :   machine_mode pmode;
    4373                 :      177145 :   int punsignedp, reversep, pvolatilep = 0;
    4374                 :      177145 :   internal_fn ifn;
    4375                 :      177145 :   tree offset_vectype;
    4376                 :      177145 :   bool masked_p = false;
    4377                 :             : 
    4378                 :             :   /* See whether this is already a call to a gather/scatter internal function.
    4379                 :             :      If not, see whether it's a masked load or store.  */
    4380                 :      177145 :   gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
    4381                 :        7348 :   if (call && gimple_call_internal_p (call))
    4382                 :             :     {
    4383                 :        7348 :       ifn = gimple_call_internal_fn (call);
    4384                 :        7348 :       if (internal_gather_scatter_fn_p (ifn))
    4385                 :             :         {
    4386                 :           0 :           vect_describe_gather_scatter_call (stmt_info, info);
    4387                 :             : 
    4388                 :             :           /* In pattern recog we simply used a ZERO else value that
    4389                 :             :              we need to correct here.  To that end just re-use the
    4390                 :             :              (already succesful) check if we support a gather IFN
    4391                 :             :              and have it populate the else values.  */
    4392                 :           0 :           if (DR_IS_READ (dr) && internal_fn_mask_index (ifn) >= 0 && elsvals)
    4393                 :           0 :             supports_vec_gather_load_p (TYPE_MODE (vectype), elsvals);
    4394                 :           0 :           return true;
    4395                 :             :         }
    4396                 :        7348 :       masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
    4397                 :             :     }
    4398                 :             : 
    4399                 :             :   /* ???  For epilogues we adjust DR_REF to make the following stmt-based
    4400                 :             :      analysis work, but this adjustment doesn't work for epilogues of
    4401                 :             :      epilogues during transform, so disable gather/scatter in that case.  */
    4402                 :      177145 :   if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
    4403                 :        9318 :       && LOOP_VINFO_EPILOGUE_P (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)))
    4404                 :             :     return false;
    4405                 :             : 
    4406                 :             :   /* True if we should aim to use internal functions rather than
    4407                 :             :      built-in functions.  */
    4408                 :      177121 :   bool use_ifn_p = (DR_IS_READ (dr)
    4409                 :      177121 :                     ? supports_vec_gather_load_p (TYPE_MODE (vectype),
    4410                 :             :                                                   elsvals)
    4411                 :       69953 :                     : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
    4412                 :             : 
    4413                 :      177121 :   base = DR_REF (dr);
    4414                 :             :   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
    4415                 :             :      see if we can use the def stmt of the address.  */
    4416                 :      177121 :   if (masked_p
    4417                 :        7348 :       && TREE_CODE (base) == MEM_REF
    4418                 :        7348 :       && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
    4419                 :        7348 :       && integer_zerop (TREE_OPERAND (base, 1))
    4420                 :      184469 :       && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
    4421                 :             :     {
    4422                 :        7348 :       gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
    4423                 :        7348 :       if (is_gimple_assign (def_stmt)
    4424                 :        7348 :           && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
    4425                 :         791 :         base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
    4426                 :             :     }
    4427                 :             : 
    4428                 :             :   /* The gather and scatter builtins need address of the form
    4429                 :             :      loop_invariant + vector * {1, 2, 4, 8}
    4430                 :             :      or
    4431                 :             :      loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
    4432                 :             :      Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
    4433                 :             :      of loop invariants/SSA_NAMEs defined in the loop, with casts,
    4434                 :             :      multiplications and additions in it.  To get a vector, we need
    4435                 :             :      a single SSA_NAME that will be defined in the loop and will
    4436                 :             :      contain everything that is not loop invariant and that can be
    4437                 :             :      vectorized.  The following code attempts to find such a preexistng
    4438                 :             :      SSA_NAME OFF and put the loop invariants into a tree BASE
    4439                 :             :      that can be gimplified before the loop.  */
    4440                 :      177121 :   base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
    4441                 :             :                               &punsignedp, &reversep, &pvolatilep);
    4442                 :      177121 :   if (reversep)
    4443                 :             :     return false;
    4444                 :             : 
    4445                 :             :   /* PR 107346.  Packed structs can have fields at offsets that are not
    4446                 :             :      multiples of BITS_PER_UNIT.  Do not use gather/scatters in such cases.  */
    4447                 :      177121 :   if (!multiple_p (pbitpos, BITS_PER_UNIT))
    4448                 :             :     return false;
    4449                 :             : 
    4450                 :             :   /* We need to be able to form an address to the base which for example
    4451                 :             :      isn't possible for hard registers.  */
    4452                 :      177121 :   if (may_be_nonaddressable_p (base))
    4453                 :             :     return false;
    4454                 :             : 
    4455                 :      177113 :   poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
    4456                 :             : 
    4457                 :      177113 :   if (TREE_CODE (base) == MEM_REF)
    4458                 :             :     {
    4459                 :      118762 :       if (!integer_zerop (TREE_OPERAND (base, 1)))
    4460                 :             :         {
    4461                 :        1941 :           if (off == NULL_TREE)
    4462                 :        1756 :             off = wide_int_to_tree (sizetype, mem_ref_offset (base));
    4463                 :             :           else
    4464                 :         185 :             off = size_binop (PLUS_EXPR, off,
    4465                 :             :                               fold_convert (sizetype, TREE_OPERAND (base, 1)));
    4466                 :             :         }
    4467                 :      118762 :       base = TREE_OPERAND (base, 0);
    4468                 :             :     }
    4469                 :             :   else
    4470                 :       58351 :     base = build_fold_addr_expr (base);
    4471                 :             : 
    4472                 :      177113 :   if (off == NULL_TREE)
    4473                 :      104528 :     off = size_zero_node;
    4474                 :             : 
    4475                 :             :   /* If base is not loop invariant, either off is 0, then we start with just
    4476                 :             :      the constant offset in the loop invariant BASE and continue with base
    4477                 :             :      as OFF, otherwise give up.
    4478                 :             :      We could handle that case by gimplifying the addition of base + off
    4479                 :             :      into some SSA_NAME and use that as off, but for now punt.  */
    4480                 :      177113 :   if (!expr_invariant_in_loop_p (loop, base))
    4481                 :             :     {
    4482                 :      106779 :       if (!integer_zerop (off))
    4483                 :             :         return false;
    4484                 :      104525 :       off = base;
    4485                 :      104525 :       base = size_int (pbytepos);
    4486                 :             :     }
    4487                 :             :   /* Otherwise put base + constant offset into the loop invariant BASE
    4488                 :             :      and continue with OFF.  */
    4489                 :             :   else
    4490                 :             :     {
    4491                 :       70334 :       base = fold_convert (sizetype, base);
    4492                 :       70334 :       base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
    4493                 :             :     }
    4494                 :             : 
    4495                 :             :   /* OFF at this point may be either a SSA_NAME or some tree expression
    4496                 :             :      from get_inner_reference.  Try to peel off loop invariants from it
    4497                 :             :      into BASE as long as possible.  */
    4498                 :      174859 :   STRIP_NOPS (off);
    4499                 :      579164 :   while (offtype == NULL_TREE)
    4500                 :             :     {
    4501                 :      504379 :       enum tree_code code;
    4502                 :      504379 :       tree op0, op1, add = NULL_TREE;
    4503                 :             : 
    4504                 :      504379 :       if (TREE_CODE (off) == SSA_NAME)
    4505                 :             :         {
    4506                 :      354568 :           gimple *def_stmt = SSA_NAME_DEF_STMT (off);
    4507                 :             : 
    4508                 :      354568 :           if (expr_invariant_in_loop_p (loop, off))
    4509                 :          16 :             return false;
    4510                 :             : 
    4511                 :      354552 :           if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
    4512                 :             :             break;
    4513                 :             : 
    4514                 :      332780 :           op0 = gimple_assign_rhs1 (def_stmt);
    4515                 :      332780 :           code = gimple_assign_rhs_code (def_stmt);
    4516                 :      332780 :           op1 = gimple_assign_rhs2 (def_stmt);
    4517                 :             :         }
    4518                 :             :       else
    4519                 :             :         {
    4520                 :      149811 :           if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
    4521                 :             :             return false;
    4522                 :      149811 :           code = TREE_CODE (off);
    4523                 :      149811 :           extract_ops_from_tree (off, &code, &op0, &op1);
    4524                 :             :         }
    4525                 :      482591 :       switch (code)
    4526                 :             :         {
    4527                 :      138667 :         case POINTER_PLUS_EXPR:
    4528                 :      138667 :         case PLUS_EXPR:
    4529                 :      138667 :           if (expr_invariant_in_loop_p (loop, op0))
    4530                 :             :             {
    4531                 :       88382 :               add = op0;
    4532                 :       88382 :               off = op1;
    4533                 :      128263 :             do_add:
    4534                 :      128263 :               add = fold_convert (sizetype, add);
    4535                 :      128263 :               if (scale != 1)
    4536                 :       47292 :                 add = size_binop (MULT_EXPR, add, size_int (scale));
    4537                 :      128263 :               base = size_binop (PLUS_EXPR, base, add);
    4538                 :      404305 :               continue;
    4539                 :             :             }
    4540                 :       50285 :           if (expr_invariant_in_loop_p (loop, op1))
    4541                 :             :             {
    4542                 :       39555 :               add = op1;
    4543                 :       39555 :               off = op0;
    4544                 :       39555 :               goto do_add;
    4545                 :             :             }
    4546                 :             :           break;
    4547                 :         488 :         case MINUS_EXPR:
    4548                 :         488 :           if (expr_invariant_in_loop_p (loop, op1))
    4549                 :             :             {
    4550                 :         326 :               add = fold_convert (sizetype, op1);
    4551                 :         326 :               add = size_binop (MINUS_EXPR, size_zero_node, add);
    4552                 :         326 :               off = op0;
    4553                 :         326 :               goto do_add;
    4554                 :             :             }
    4555                 :             :           break;
    4556                 :      160597 :         case MULT_EXPR:
    4557                 :      160597 :           if (scale == 1 && tree_fits_shwi_p (op1))
    4558                 :             :             {
    4559                 :      127646 :               int new_scale = tree_to_shwi (op1);
    4560                 :             :               /* Only treat this as a scaling operation if the target
    4561                 :             :                  supports it for at least some offset type.  */
    4562                 :      127646 :               if (use_ifn_p
    4563                 :           0 :                   && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
    4564                 :             :                                                 masked_p, vectype, memory_type,
    4565                 :             :                                                 signed_char_type_node,
    4566                 :             :                                                 new_scale, &ifn,
    4567                 :             :                                                 &offset_vectype,
    4568                 :             :                                                 elsvals)
    4569                 :      127646 :                   && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
    4570                 :             :                                                 masked_p, vectype, memory_type,
    4571                 :             :                                                 unsigned_char_type_node,
    4572                 :             :                                                 new_scale, &ifn,
    4573                 :             :                                                 &offset_vectype,
    4574                 :             :                                                 elsvals))
    4575                 :             :                 break;
    4576                 :      127646 :               scale = new_scale;
    4577                 :      127646 :               off = op0;
    4578                 :      127646 :               continue;
    4579                 :      127646 :             }
    4580                 :             :           break;
    4581                 :           0 :         case SSA_NAME:
    4582                 :           0 :           off = op0;
    4583                 :           0 :           continue;
    4584                 :      151850 :         CASE_CONVERT:
    4585                 :      303700 :           if (!POINTER_TYPE_P (TREE_TYPE (op0))
    4586                 :      303700 :               && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
    4587                 :             :             break;
    4588                 :             : 
    4589                 :             :           /* Don't include the conversion if the target is happy with
    4590                 :             :              the current offset type.  */
    4591                 :      151850 :           if (use_ifn_p
    4592                 :           0 :               && TREE_CODE (off) == SSA_NAME
    4593                 :           0 :               && !POINTER_TYPE_P (TREE_TYPE (off))
    4594                 :      151850 :               && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
    4595                 :             :                                            masked_p, vectype, memory_type,
    4596                 :           0 :                                            TREE_TYPE (off), scale, &ifn,
    4597                 :             :                                            &offset_vectype, elsvals))
    4598                 :             :             break;
    4599                 :             : 
    4600                 :      151850 :           if (TYPE_PRECISION (TREE_TYPE (op0))
    4601                 :      151850 :               == TYPE_PRECISION (TREE_TYPE (off)))
    4602                 :             :             {
    4603                 :       73611 :               off = op0;
    4604                 :       73611 :               continue;
    4605                 :             :             }
    4606                 :             : 
    4607                 :             :           /* Include the conversion if it is widening and we're using
    4608                 :             :              the IFN path or the target can handle the converted from
    4609                 :             :              offset or the current size is not already the same as the
    4610                 :             :              data vector element size.  */
    4611                 :       78239 :           if ((TYPE_PRECISION (TREE_TYPE (op0))
    4612                 :       78239 :                < TYPE_PRECISION (TREE_TYPE (off)))
    4613                 :       78239 :               && (use_ifn_p
    4614                 :       78189 :                   || (DR_IS_READ (dr)
    4615                 :       46517 :                       ? (targetm.vectorize.builtin_gather
    4616                 :       46517 :                          && targetm.vectorize.builtin_gather (vectype,
    4617                 :       46517 :                                                               TREE_TYPE (op0),
    4618                 :             :                                                               scale))
    4619                 :       31672 :                       : (targetm.vectorize.builtin_scatter
    4620                 :       31672 :                          && targetm.vectorize.builtin_scatter (vectype,
    4621                 :       31672 :                                                                TREE_TYPE (op0),
    4622                 :             :                                                                scale)))
    4623                 :       76954 :                   || !operand_equal_p (TYPE_SIZE (TREE_TYPE (off)),
    4624                 :       76954 :                                        TYPE_SIZE (TREE_TYPE (vectype)), 0)))
    4625                 :             :             {
    4626                 :       74785 :               off = op0;
    4627                 :       74785 :               offtype = TREE_TYPE (off);
    4628                 :       74785 :               STRIP_NOPS (off);
    4629                 :       74785 :               continue;
    4630                 :             :             }
    4631                 :             :           break;
    4632                 :             :         default:
    4633                 :             :           break;
    4634                 :           0 :         }
    4635                 :             :       break;
    4636                 :             :     }
    4637                 :             : 
    4638                 :             :   /* If at the end OFF still isn't a SSA_NAME or isn't
    4639                 :             :      defined in the loop, punt.  */
    4640                 :      174843 :   if (TREE_CODE (off) != SSA_NAME
    4641                 :      174843 :       || expr_invariant_in_loop_p (loop, off))
    4642                 :        3900 :     return false;
    4643                 :             : 
    4644                 :      170943 :   if (offtype == NULL_TREE)
    4645                 :       96236 :     offtype = TREE_TYPE (off);
    4646                 :             : 
    4647                 :      170943 :   if (use_ifn_p)
    4648                 :             :     {
    4649                 :           0 :       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
    4650                 :             :                                      vectype, memory_type, offtype, scale,
    4651                 :             :                                      &ifn, &offset_vectype, elsvals))
    4652                 :           0 :         ifn = IFN_LAST;
    4653                 :             :       decl = NULL_TREE;
    4654                 :             :     }
    4655                 :             :   else
    4656                 :             :     {
    4657                 :      170943 :       if (DR_IS_READ (dr))
    4658                 :             :         {
    4659                 :      103620 :           if (targetm.vectorize.builtin_gather)
    4660                 :      103620 :             decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
    4661                 :             :         }
    4662                 :             :       else
    4663                 :             :         {
    4664                 :       67323 :           if (targetm.vectorize.builtin_scatter)
    4665                 :       67323 :             decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
    4666                 :             :         }
    4667                 :      170943 :       ifn = IFN_LAST;
    4668                 :             :       /* The offset vector type will be read from DECL when needed.  */
    4669                 :      170943 :       offset_vectype = NULL_TREE;
    4670                 :             :     }
    4671                 :             : 
    4672                 :      170943 :   info->ifn = ifn;
    4673                 :      170943 :   info->decl = decl;
    4674                 :      170943 :   info->base = base;
    4675                 :      170943 :   info->offset = off;
    4676                 :      170943 :   info->offset_dt = vect_unknown_def_type;
    4677                 :      170943 :   info->offset_vectype = offset_vectype;
    4678                 :      170943 :   info->scale = scale;
    4679                 :      170943 :   info->element_type = TREE_TYPE (vectype);
    4680                 :      170943 :   info->memory_type = memory_type;
    4681                 :      170943 :   return true;
    4682                 :             : }
    4683                 :             : 
    4684                 :             : /* Find the data references in STMT, analyze them with respect to LOOP and
    4685                 :             :    append them to DATAREFS.  Return false if datarefs in this stmt cannot
    4686                 :             :    be handled.  */
    4687                 :             : 
    4688                 :             : opt_result
    4689                 :    30166912 : vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
    4690                 :             :                                vec<data_reference_p> *datarefs,
    4691                 :             :                                vec<int> *dataref_groups, int group_id)
    4692                 :             : {
    4693                 :             :   /* We can ignore clobbers for dataref analysis - they are removed during
    4694                 :             :      loop vectorization and BB vectorization checks dependences with a
    4695                 :             :      stmt walk.  */
    4696                 :    30166912 :   if (gimple_clobber_p (stmt))
    4697                 :     1195137 :     return opt_result::success ();
    4698                 :             : 
    4699                 :    53976825 :   if (gimple_has_volatile_ops (stmt))
    4700                 :      314115 :     return opt_result::failure_at (stmt, "not vectorized: volatile type: %G",
    4701                 :             :                                    stmt);
    4702                 :             : 
    4703                 :    28657660 :   if (stmt_can_throw_internal (cfun, stmt))
    4704                 :      763856 :     return opt_result::failure_at (stmt,
    4705                 :             :                                    "not vectorized:"
    4706                 :             :                                    " statement can throw an exception: %G",
    4707                 :             :                                    stmt);
    4708                 :             : 
    4709                 :    27893804 :   auto_vec<data_reference_p, 2> refs;
    4710                 :    27893804 :   opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
    4711                 :    27893804 :   if (!res)
    4712                 :     3422391 :     return res;
    4713                 :             : 
    4714                 :    24471413 :   if (refs.is_empty ())
    4715                 :    13955102 :     return opt_result::success ();
    4716                 :             : 
    4717                 :    10516311 :   if (refs.length () > 1)
    4718                 :             :     {
    4719                 :     1196151 :       while (!refs.is_empty ())
    4720                 :      797731 :         free_data_ref (refs.pop ());
    4721                 :      398420 :       return opt_result::failure_at (stmt,
    4722                 :             :                                      "not vectorized: more than one "
    4723                 :             :                                      "data ref in stmt: %G", stmt);
    4724                 :             :     }
    4725                 :             : 
    4726                 :    10117891 :   data_reference_p dr = refs.pop ();
    4727                 :    10117891 :   if (gcall *call = dyn_cast <gcall *> (stmt))
    4728                 :        5230 :     if (!gimple_call_internal_p (call)
    4729                 :        5230 :         || (gimple_call_internal_fn (call) != IFN_MASK_LOAD
    4730                 :        1339 :             && gimple_call_internal_fn (call) != IFN_MASK_STORE))
    4731                 :             :       {
    4732                 :        2307 :         free_data_ref (dr);
    4733                 :        2307 :         return opt_result::failure_at (stmt,
    4734                 :             :                                        "not vectorized: dr in a call %G", stmt);
    4735                 :             :       }
    4736                 :             : 
    4737                 :    10115584 :   if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
    4738                 :    10115584 :       && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
    4739                 :             :     {
    4740                 :       47661 :       free_data_ref (dr);
    4741                 :       47661 :       return opt_result::failure_at (stmt,
    4742                 :             :                                      "not vectorized:"
    4743                 :             :                                      " statement is an unsupported"
    4744                 :             :                                      " bitfield access %G", stmt);
    4745                 :             :     }
    4746                 :             : 
    4747                 :    10067923 :   if (DR_BASE_ADDRESS (dr)
    4748                 :    10034035 :       && TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
    4749                 :             :     {
    4750                 :         877 :       free_data_ref (dr);
    4751                 :         877 :       return opt_result::failure_at (stmt,
    4752                 :             :                                      "not vectorized:"
    4753                 :             :                                      " base addr of dr is a constant\n");
    4754                 :             :     }
    4755                 :             : 
    4756                 :             :   /* Check whether this may be a SIMD lane access and adjust the
    4757                 :             :      DR to make it easier for us to handle it.  */
    4758                 :    10067046 :   if (loop
    4759                 :      454258 :       && loop->simduid
    4760                 :       10714 :       && (!DR_BASE_ADDRESS (dr)
    4761                 :        2963 :           || !DR_OFFSET (dr)
    4762                 :        2963 :           || !DR_INIT (dr)
    4763                 :        2963 :           || !DR_STEP (dr)))
    4764                 :             :     {
    4765                 :        7751 :       struct data_reference *newdr
    4766                 :        7751 :         = create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
    4767                 :        7751 :                            DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
    4768                 :        7751 :       if (DR_BASE_ADDRESS (newdr)
    4769                 :        7751 :           && DR_OFFSET (newdr)
    4770                 :        7751 :           && DR_INIT (newdr)
    4771                 :        7751 :           && DR_STEP (newdr)
    4772                 :        7751 :           && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
    4773                 :       15502 :           && integer_zerop (DR_STEP (newdr)))
    4774                 :             :         {
    4775                 :        7751 :           tree base_address = DR_BASE_ADDRESS (newdr);
    4776                 :        7751 :           tree off = DR_OFFSET (newdr);
    4777                 :        7751 :           tree step = ssize_int (1);
    4778                 :        7751 :           if (integer_zerop (off)
    4779                 :        7751 :               && TREE_CODE (base_address) == POINTER_PLUS_EXPR)
    4780                 :             :             {
    4781                 :          89 :               off = TREE_OPERAND (base_address, 1);
    4782                 :          89 :               base_address = TREE_OPERAND (base_address, 0);
    4783                 :             :             }
    4784                 :        7751 :           STRIP_NOPS (off);
    4785                 :        7751 :           if (TREE_CODE (off) == MULT_EXPR
    4786                 :        7751 :               && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
    4787                 :             :             {
    4788                 :        7501 :               step = TREE_OPERAND (off, 1);
    4789                 :        7501 :               off = TREE_OPERAND (off, 0);
    4790                 :        7501 :               STRIP_NOPS (off);
    4791                 :             :             }
    4792                 :         544 :           if (CONVERT_EXPR_P (off)
    4793                 :        7751 :               && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
    4794                 :        7207 :                   < TYPE_PRECISION (TREE_TYPE (off))))
    4795                 :        7207 :             off = TREE_OPERAND (off, 0);
    4796                 :        7751 :           if (TREE_CODE (off) == SSA_NAME)
    4797                 :             :             {
    4798                 :        7223 :               gimple *def = SSA_NAME_DEF_STMT (off);
    4799                 :             :               /* Look through widening conversion.  */
    4800                 :        7223 :               if (is_gimple_assign (def)
    4801                 :        7223 :                   && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
    4802                 :             :                 {
    4803                 :           0 :                   tree rhs1 = gimple_assign_rhs1 (def);
    4804                 :           0 :                   if (TREE_CODE (rhs1) == SSA_NAME
    4805                 :           0 :                       && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
    4806                 :           0 :                       && (TYPE_PRECISION (TREE_TYPE (off))
    4807                 :           0 :                           > TYPE_PRECISION (TREE_TYPE (rhs1))))
    4808                 :           0 :                     def = SSA_NAME_DEF_STMT (rhs1);
    4809                 :             :                 }
    4810                 :        7223 :               if (is_gimple_call (def)
    4811                 :        7084 :                   && gimple_call_internal_p (def)
    4812                 :       14307 :                   && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
    4813                 :             :                 {
    4814                 :        7084 :                   tree arg = gimple_call_arg (def, 0);
    4815                 :        7084 :                   tree reft = TREE_TYPE (DR_REF (newdr));
    4816                 :        7084 :                   gcc_assert (TREE_CODE (arg) == SSA_NAME);
    4817                 :        7084 :                   arg = SSA_NAME_VAR (arg);
    4818                 :        7084 :                   if (arg == loop->simduid
    4819                 :             :                       /* For now.  */
    4820                 :        7084 :                       && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
    4821                 :             :                     {
    4822                 :        7059 :                       DR_BASE_ADDRESS (newdr) = base_address;
    4823                 :        7059 :                       DR_OFFSET (newdr) = ssize_int (0);
    4824                 :        7059 :                       DR_STEP (newdr) = step;
    4825                 :        7059 :                       DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
    4826                 :        7059 :                       DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
    4827                 :             :                       /* Mark as simd-lane access.  */
    4828                 :        7059 :                       tree arg2 = gimple_call_arg (def, 1);
    4829                 :        7059 :                       newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
    4830                 :        7059 :                       free_data_ref (dr);
    4831                 :        7059 :                       datarefs->safe_push (newdr);
    4832                 :        7059 :                       if (dataref_groups)
    4833                 :           0 :                         dataref_groups->safe_push (group_id);
    4834                 :        7059 :                       return opt_result::success ();
    4835                 :             :                     }
    4836                 :             :                 }
    4837                 :             :             }
    4838                 :             :         }
    4839                 :         692 :       free_data_ref (newdr);
    4840                 :             :     }
    4841                 :             : 
    4842                 :    10059987 :   datarefs->safe_push (dr);
    4843                 :    10059987 :   if (dataref_groups)
    4844                 :     9612788 :     dataref_groups->safe_push (group_id);
    4845                 :    10059987 :   return opt_result::success ();
    4846                 :    27893804 : }
    4847                 :             : 
    4848                 :             : /* Function vect_analyze_data_refs.
    4849                 :             : 
    4850                 :             :   Find all the data references in the loop or basic block.
    4851                 :             : 
    4852                 :             :    The general structure of the analysis of data refs in the vectorizer is as
    4853                 :             :    follows:
    4854                 :             :    1- vect_analyze_data_refs(loop/bb): call
    4855                 :             :       compute_data_dependences_for_loop/bb to find and analyze all data-refs
    4856                 :             :       in the loop/bb and their dependences.
    4857                 :             :    2- vect_analyze_dependences(): apply dependence testing using ddrs.
    4858                 :             :    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
    4859                 :             :    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
    4860                 :             : 
    4861                 :             : */
    4862                 :             : 
    4863                 :             : opt_result
    4864                 :     2662355 : vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
    4865                 :             : {
    4866                 :     2662355 :   class loop *loop = NULL;
    4867                 :     2662355 :   unsigned int i;
    4868                 :     2662355 :   struct data_reference *dr;
    4869                 :     2662355 :   tree scalar_type;
    4870                 :             : 
    4871                 :     2662355 :   DUMP_VECT_SCOPE ("vect_analyze_data_refs");
    4872                 :             : 
    4873                 :     2662355 :   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
    4874                 :      349488 :     loop = LOOP_VINFO_LOOP (loop_vinfo);
    4875                 :             : 
    4876                 :             :   /* Go through the data-refs, check that the analysis succeeded.  Update
    4877                 :             :      pointer from stmt_vec_info struct to DR and vectype.  */
    4878                 :             : 
    4879                 :     2662355 :   vec<data_reference_p> datarefs = vinfo->shared->datarefs;
    4880                 :    16500699 :   FOR_EACH_VEC_ELT (datarefs, i, dr)
    4881                 :             :     {
    4882                 :    13888492 :       enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
    4883                 :    13888492 :       poly_uint64 vf;
    4884                 :             : 
    4885                 :    13888492 :       gcc_assert (DR_REF (dr));
    4886                 :    13888492 :       stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
    4887                 :    13888492 :       gcc_assert (!stmt_info->dr_aux.dr);
    4888                 :    13888492 :       stmt_info->dr_aux.dr = dr;
    4889                 :    13888492 :       stmt_info->dr_aux.stmt = stmt_info;
    4890                 :             : 
    4891                 :             :       /* Check that analysis of the data-ref succeeded.  */
    4892                 :    13888492 :       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
    4893                 :    13845809 :           || !DR_STEP (dr))
    4894                 :             :         {
    4895                 :       85366 :           bool maybe_gather
    4896                 :       42683 :             = DR_IS_READ (dr)
    4897                 :       42683 :               && !TREE_THIS_VOLATILE (DR_REF (dr));
    4898                 :       85366 :           bool maybe_scatter
    4899                 :             :             = DR_IS_WRITE (dr)
    4900                 :       42683 :               && !TREE_THIS_VOLATILE (DR_REF (dr));
    4901                 :             : 
    4902                 :             :           /* If target supports vector gather loads or scatter stores,
    4903                 :             :              see if they can't be used.  */
    4904                 :       42683 :           if (is_a <loop_vec_info> (vinfo)
    4905                 :       42683 :               && !nested_in_vect_loop_p (loop, stmt_info))
    4906                 :             :             {
    4907                 :       39166 :               if (maybe_gather || maybe_scatter)
    4908                 :             :                 {
    4909                 :       39166 :                   if (maybe_gather)
    4910                 :             :                     gatherscatter = GATHER;
    4911                 :             :                   else
    4912                 :       12523 :                     gatherscatter = SCATTER;
    4913                 :             :                 }
    4914                 :             :             }
    4915                 :             : 
    4916                 :       12523 :           if (gatherscatter == SG_NONE)
    4917                 :             :             {
    4918                 :        3517 :               if (dump_enabled_p ())
    4919                 :           5 :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    4920                 :             :                                  "not vectorized: data ref analysis "
    4921                 :             :                                  "failed %G", stmt_info->stmt);
    4922                 :        3517 :               if (is_a <bb_vec_info> (vinfo))
    4923                 :             :                 {
    4924                 :             :                   /* In BB vectorization the ref can still participate
    4925                 :             :                      in dependence analysis, we just can't vectorize it.  */
    4926                 :        2903 :                   STMT_VINFO_VECTORIZABLE (stmt_info) = false;
    4927                 :     1571878 :                   continue;
    4928                 :             :                 }
    4929                 :         614 :               return opt_result::failure_at (stmt_info->stmt,
    4930                 :             :                                              "not vectorized:"
    4931                 :             :                                              " data ref analysis failed: %G",
    4932                 :             :                                              stmt_info->stmt);
    4933                 :             :             }
    4934                 :             :         }
    4935                 :             : 
    4936                 :             :       /* See if this was detected as SIMD lane access.  */
    4937                 :    13884975 :       if (dr->aux == (void *)-1
    4938                 :    13884975 :           || dr->aux == (void *)-2
    4939                 :    13876032 :           || dr->aux == (void *)-3
    4940                 :    13875192 :           || dr->aux == (void *)-4)
    4941                 :             :         {
    4942                 :       10583 :           if (nested_in_vect_loop_p (loop, stmt_info))
    4943                 :           0 :             return opt_result::failure_at (stmt_info->stmt,
    4944                 :             :                                            "not vectorized:"
    4945                 :             :                                            " data ref analysis failed: %G",
    4946                 :             :                                            stmt_info->stmt);
    4947                 :       10583 :           STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
    4948                 :       10583 :             = -(uintptr_t) dr->aux;
    4949                 :             :         }
    4950                 :             : 
    4951                 :    13884975 :       tree base = get_base_address (DR_REF (dr));
    4952                 :    13884975 :       if (base && VAR_P (base) && DECL_NONALIASED (base))
    4953                 :             :         {
    4954                 :        7699 :           if (dump_enabled_p ())
    4955                 :         186 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    4956                 :             :                              "not vectorized: base object not addressable "
    4957                 :             :                              "for stmt: %G", stmt_info->stmt);
    4958                 :        7699 :           if (is_a <bb_vec_info> (vinfo))
    4959                 :             :             {
    4960                 :             :               /* In BB vectorization the ref can still participate
    4961                 :             :                  in dependence analysis, we just can't vectorize it.  */
    4962                 :        7699 :               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
    4963                 :        7699 :               continue;
    4964                 :             :             }
    4965                 :           0 :           return opt_result::failure_at (stmt_info->stmt,
    4966                 :             :                                          "not vectorized: base object not"
    4967                 :             :                                          " addressable for stmt: %G",
    4968                 :             :                                          stmt_info->stmt);
    4969                 :             :         }
    4970                 :             : 
    4971                 :    13877276 :       if (is_a <loop_vec_info> (vinfo)
    4972                 :      814975 :           && DR_STEP (dr)
    4973                 :    14653085 :           && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
    4974                 :             :         {
    4975                 :       41588 :           if (nested_in_vect_loop_p (loop, stmt_info))
    4976                 :         375 :             return opt_result::failure_at (stmt_info->stmt,
    4977                 :             :                                            "not vectorized: "
    4978                 :             :                                            "not suitable for strided load %G",
    4979                 :             :                                            stmt_info->stmt);
    4980                 :       41213 :           STMT_VINFO_STRIDED_P (stmt_info) = true;
    4981                 :             :         }
    4982                 :             : 
    4983                 :             :       /* Update DR field in stmt_vec_info struct.  */
    4984                 :             : 
    4985                 :             :       /* If the dataref is in an inner-loop of the loop that is considered for
    4986                 :             :          for vectorization, we also want to analyze the access relative to
    4987                 :             :          the outer-loop (DR contains information only relative to the
    4988                 :             :          inner-most enclosing loop).  We do that by building a reference to the
    4989                 :             :          first location accessed by the inner-loop, and analyze it relative to
    4990                 :             :          the outer-loop.  */
    4991                 :    13876901 :       if (loop && nested_in_vect_loop_p (loop, stmt_info))
    4992                 :             :         {
    4993                 :             :           /* Build a reference to the first location accessed by the
    4994                 :             :              inner loop: *(BASE + INIT + OFFSET).  By construction,
    4995                 :             :              this address must be invariant in the inner loop, so we
    4996                 :             :              can consider it as being used in the outer loop.  */
    4997                 :       11028 :           tree base = unshare_expr (DR_BASE_ADDRESS (dr));
    4998                 :       11028 :           tree offset = unshare_expr (DR_OFFSET (dr));
    4999                 :       11028 :           tree init = unshare_expr (DR_INIT (dr));
    5000                 :       11028 :           tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
    5001                 :             :                                           init, offset);
    5002                 :       11028 :           tree init_addr = fold_build_pointer_plus (base, init_offset);
    5003                 :       11028 :           tree init_ref = build_fold_indirect_ref (init_addr);
    5004                 :             : 
    5005                 :       11028 :           if (dump_enabled_p ())
    5006                 :        1142 :             dump_printf_loc (MSG_NOTE, vect_location,
    5007                 :             :                              "analyze in outer loop: %T\n", init_ref);
    5008                 :             : 
    5009                 :       11028 :           opt_result res
    5010                 :       11028 :             = dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
    5011                 :       11028 :                                     init_ref, loop, stmt_info->stmt);
    5012                 :       11028 :           if (!res)
    5013                 :             :             /* dr_analyze_innermost already explained the failure.  */
    5014                 :         154 :             return res;
    5015                 :             : 
    5016                 :       10874 :           if (dump_enabled_p ())
    5017                 :        1142 :             dump_printf_loc (MSG_NOTE, vect_location,
    5018                 :             :                              "\touter base_address: %T\n"
    5019                 :             :                              "\touter offset from base address: %T\n"
    5020                 :             :                              "\touter constant offset from base address: %T\n"
    5021                 :             :                              "\touter step: %T\n"
    5022                 :             :                              "\touter base alignment: %d\n\n"
    5023                 :             :                              "\touter base misalignment: %d\n"
    5024                 :             :                              "\touter offset alignment: %d\n"
    5025                 :             :                              "\touter step alignment: %d\n",
    5026                 :             :                              STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
    5027                 :             :                              STMT_VINFO_DR_OFFSET (stmt_info),
    5028                 :             :                              STMT_VINFO_DR_INIT (stmt_info),
    5029                 :             :                              STMT_VINFO_DR_STEP (stmt_info),
    5030                 :             :                              STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
    5031                 :             :                              STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
    5032                 :             :                              STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
    5033                 :             :                              STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
    5034                 :             :         }
    5035                 :             : 
    5036                 :             :       /* Set vectype for STMT.  */
    5037                 :    13876747 :       scalar_type = TREE_TYPE (DR_REF (dr));
    5038                 :    13876747 :       tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
    5039                 :    13876747 :       if (!vectype)
    5040                 :             :         {
    5041                 :     1605993 :           if (dump_enabled_p ())
    5042                 :             :             {
    5043                 :        1800 :               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5044                 :             :                                "not vectorized: no vectype for stmt: %G",
    5045                 :             :                                stmt_info->stmt);
    5046                 :        1800 :               dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
    5047                 :        1800 :               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
    5048                 :             :                                  scalar_type);
    5049                 :        1800 :               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
    5050                 :             :             }
    5051                 :             : 
    5052                 :     1605993 :           if (is_a <bb_vec_info> (vinfo))
    5053                 :             :             {
    5054                 :             :               /* No vector type is fine, the ref can still participate
    5055                 :             :                  in dependence analysis, we just can't vectorize it.  */
    5056                 :     1561276 :               STMT_VINFO_VECTORIZABLE (stmt_info) = false;
    5057                 :     1561276 :               continue;
    5058                 :             :             }
    5059                 :       44717 :           if (fatal)
    5060                 :       44717 :             *fatal = false;
    5061                 :       44717 :           return opt_result::failure_at (stmt_info->stmt,
    5062                 :             :                                          "not vectorized:"
    5063                 :             :                                          " no vectype for stmt: %G"
    5064                 :             :                                          " scalar_type: %T\n",
    5065                 :             :                                          stmt_info->stmt, scalar_type);
    5066                 :             :         }
    5067                 :             :       else
    5068                 :             :         {
    5069                 :    12270754 :           if (dump_enabled_p ())
    5070                 :       82223 :             dump_printf_loc (MSG_NOTE, vect_location,
    5071                 :             :                              "got vectype for stmt: %G%T\n",
    5072                 :             :                              stmt_info->stmt, vectype);
    5073                 :             :         }
    5074                 :             : 
    5075                 :             :       /* Adjust the minimal vectorization factor according to the
    5076                 :             :          vector type.  */
    5077                 :    12270754 :       vf = TYPE_VECTOR_SUBPARTS (vectype);
    5078                 :    12270754 :       *min_vf = upper_bound (*min_vf, vf);
    5079                 :             : 
    5080                 :             :       /* Leave the BB vectorizer to pick the vector type later, based on
    5081                 :             :          the final dataref group size and SLP node size.  */
    5082                 :    12270754 :       if (is_a <loop_vec_info> (vinfo))
    5083                 :      769729 :         STMT_VINFO_VECTYPE (stmt_info) = vectype;
    5084                 :             : 
    5085                 :    12270754 :       if (gatherscatter != SG_NONE)
    5086                 :             :         {
    5087                 :       36732 :           gather_scatter_info gs_info;
    5088                 :       36732 :           if (!vect_check_gather_scatter (stmt_info,
    5089                 :             :                                           as_a <loop_vec_info> (vinfo),
    5090                 :             :                                           &gs_info)
    5091                 :       69904 :               || !get_vectype_for_scalar_type (vinfo,
    5092                 :       33172 :                                                TREE_TYPE (gs_info.offset)))
    5093                 :             :             {
    5094                 :        4288 :               if (fatal)
    5095                 :        4288 :                 *fatal = false;
    5096                 :        4288 :               return opt_result::failure_at
    5097                 :        4948 :                         (stmt_info->stmt,
    5098                 :             :                          (gatherscatter == GATHER)
    5099                 :             :                          ? "not vectorized: not suitable for gather load %G"
    5100                 :             :                          : "not vectorized: not suitable for scatter store %G",
    5101                 :             :                          stmt_info->stmt);
    5102                 :             :             }
    5103                 :       32444 :           STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
    5104                 :             :         }
    5105                 :             :     }
    5106                 :             : 
    5107                 :             :   /* We used to stop processing and prune the list here.  Verify we no
    5108                 :             :      longer need to.  */
    5109                 :     4046947 :   gcc_assert (i == datarefs.length ());
    5110                 :             : 
    5111                 :     2612207 :   return opt_result::success ();
    5112                 :             : }
    5113                 :             : 
    5114                 :             : 
    5115                 :             : /* Function vect_get_new_vect_var.
    5116                 :             : 
    5117                 :             :    Returns a name for a new variable.  The current naming scheme appends the
    5118                 :             :    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
    5119                 :             :    the name of vectorizer generated variables, and appends that to NAME if
    5120                 :             :    provided.  */
    5121                 :             : 
    5122                 :             : tree
    5123                 :     1814209 : vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
    5124                 :             : {
    5125                 :     1814209 :   const char *prefix;
    5126                 :     1814209 :   tree new_vect_var;
    5127                 :             : 
    5128                 :     1814209 :   switch (var_kind)
    5129                 :             :   {
    5130                 :             :   case vect_simple_var:
    5131                 :             :     prefix = "vect";
    5132                 :             :     break;
    5133                 :       21773 :   case vect_scalar_var:
    5134                 :       21773 :     prefix = "stmp";
    5135                 :       21773 :     break;
    5136                 :       12929 :   case vect_mask_var:
    5137                 :       12929 :     prefix = "mask";
    5138                 :       12929 :     break;
    5139                 :     1340292 :   case vect_pointer_var:
    5140                 :     1340292 :     prefix = "vectp";
    5141                 :     1340292 :     break;
    5142                 :           0 :   default:
    5143                 :           0 :     gcc_unreachable ();
    5144                 :             :   }
    5145                 :             : 
    5146                 :     1814209 :   if (name)
    5147                 :             :     {
    5148                 :     1004242 :       char* tmp = concat (prefix, "_", name, NULL);
    5149                 :     1004242 :       new_vect_var = create_tmp_reg (type, tmp);
    5150                 :     1004242 :       free (tmp);
    5151                 :             :     }
    5152                 :             :   else
    5153                 :      809967 :     new_vect_var = create_tmp_reg (type, prefix);
    5154                 :             : 
    5155                 :     1814209 :   return new_vect_var;
    5156                 :             : }
    5157                 :             : 
    5158                 :             : /* Like vect_get_new_vect_var but return an SSA name.  */
    5159                 :             : 
    5160                 :             : tree
    5161                 :        6005 : vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
    5162                 :             : {
    5163                 :        6005 :   const char *prefix;
    5164                 :        6005 :   tree new_vect_var;
    5165                 :             : 
    5166                 :        6005 :   switch (var_kind)
    5167                 :             :   {
    5168                 :             :   case vect_simple_var:
    5169                 :             :     prefix = "vect";
    5170                 :             :     break;
    5171                 :         313 :   case vect_scalar_var:
    5172                 :         313 :     prefix = "stmp";
    5173                 :         313 :     break;
    5174                 :           0 :   case vect_pointer_var:
    5175                 :           0 :     prefix = "vectp";
    5176                 :           0 :     break;
    5177                 :           0 :   default:
    5178                 :           0 :     gcc_unreachable ();
    5179                 :             :   }
    5180                 :             : 
    5181                 :        6005 :   if (name)
    5182                 :             :     {
    5183                 :        5519 :       char* tmp = concat (prefix, "_", name, NULL);
    5184                 :        5519 :       new_vect_var = make_temp_ssa_name (type, NULL, tmp);
    5185                 :        5519 :       free (tmp);
    5186                 :             :     }
    5187                 :             :   else
    5188                 :         486 :     new_vect_var = make_temp_ssa_name (type, NULL, prefix);
    5189                 :             : 
    5190                 :        6005 :   return new_vect_var;
    5191                 :             : }
    5192                 :             : 
    5193                 :             : /* Duplicate points-to info on NAME from DR_INFO.  */
    5194                 :             : 
    5195                 :             : static void
    5196                 :      261316 : vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
    5197                 :             : {
    5198                 :      261316 :   duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
    5199                 :             :   /* DR_PTR_INFO is for a base SSA name, not including constant or
    5200                 :             :      variable offsets in the ref so its alignment info does not apply.  */
    5201                 :      261316 :   mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
    5202                 :      261316 : }
    5203                 :             : 
    5204                 :             : /* Function vect_create_addr_base_for_vector_ref.
    5205                 :             : 
    5206                 :             :    Create an expression that computes the address of the first memory location
    5207                 :             :    that will be accessed for a data reference.
    5208                 :             : 
    5209                 :             :    Input:
    5210                 :             :    STMT_INFO: The statement containing the data reference.
    5211                 :             :    NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
    5212                 :             :    OFFSET: Optional. If supplied, it is be added to the initial address.
    5213                 :             :    LOOP:    Specify relative to which loop-nest should the address be computed.
    5214                 :             :             For example, when the dataref is in an inner-loop nested in an
    5215                 :             :             outer-loop that is now being vectorized, LOOP can be either the
    5216                 :             :             outer-loop, or the inner-loop.  The first memory location accessed
    5217                 :             :             by the following dataref ('in' points to short):
    5218                 :             : 
    5219                 :             :                 for (i=0; i<N; i++)
    5220                 :             :                    for (j=0; j<M; j++)
    5221                 :             :                      s += in[i+j]
    5222                 :             : 
    5223                 :             :             is as follows:
    5224                 :             :             if LOOP=i_loop:     &in         (relative to i_loop)
    5225                 :             :             if LOOP=j_loop:     &in+i*2B    (relative to j_loop)
    5226                 :             : 
    5227                 :             :    Output:
    5228                 :             :    1. Return an SSA_NAME whose value is the address of the memory location of
    5229                 :             :       the first vector of the data reference.
    5230                 :             :    2. If new_stmt_list is not NULL_TREE after return then the caller must insert
    5231                 :             :       these statement(s) which define the returned SSA_NAME.
    5232                 :             : 
    5233                 :             :    FORNOW: We are only handling array accesses with step 1.  */
    5234                 :             : 
    5235                 :             : tree
    5236                 :      670227 : vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
    5237                 :             :                                       gimple_seq *new_stmt_list,
    5238                 :             :                                       tree offset)
    5239                 :             : {
    5240                 :      670227 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    5241                 :      670227 :   struct data_reference *dr = dr_info->dr;
    5242                 :      670227 :   const char *base_name;
    5243                 :      670227 :   tree addr_base;
    5244                 :      670227 :   tree dest;
    5245                 :      670227 :   gimple_seq seq = NULL;
    5246                 :      670227 :   tree vect_ptr_type;
    5247                 :      670227 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5248                 :      670227 :   innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
    5249                 :             : 
    5250                 :      670227 :   tree data_ref_base = unshare_expr (drb->base_address);
    5251                 :      670227 :   tree base_offset = unshare_expr (get_dr_vinfo_offset (vinfo, dr_info, true));
    5252                 :      670227 :   tree init = unshare_expr (drb->init);
    5253                 :             : 
    5254                 :      670227 :   if (loop_vinfo)
    5255                 :      116368 :     base_name = get_name (data_ref_base);
    5256                 :             :   else
    5257                 :             :     {
    5258                 :      553859 :       base_offset = ssize_int (0);
    5259                 :      553859 :       init = ssize_int (0);
    5260                 :      553859 :       base_name = get_name (DR_REF (dr));
    5261                 :             :     }
    5262                 :             : 
    5263                 :             :   /* Create base_offset */
    5264                 :      670227 :   base_offset = size_binop (PLUS_EXPR,
    5265                 :             :                             fold_convert (sizetype, base_offset),
    5266                 :             :                             fold_convert (sizetype, init));
    5267                 :             : 
    5268                 :      670227 :   if (offset)
    5269                 :             :     {
    5270                 :        2967 :       offset = fold_convert (sizetype, offset);
    5271                 :        2967 :       base_offset = fold_build2 (PLUS_EXPR, sizetype,
    5272                 :             :                                  base_offset, offset);
    5273                 :             :     }
    5274                 :             : 
    5275                 :             :   /* base + base_offset */
    5276                 :      670227 :   if (loop_vinfo)
    5277                 :      116368 :     addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
    5278                 :             :   else
    5279                 :     1107718 :     addr_base = build1 (ADDR_EXPR,
    5280                 :      553859 :                         build_pointer_type (TREE_TYPE (DR_REF (dr))),
    5281                 :             :                         /* Strip zero offset components since we don't need
    5282                 :             :                            them and they can confuse late diagnostics if
    5283                 :             :                            we CSE them wrongly.  See PR106904 for example.  */
    5284                 :             :                         unshare_expr (strip_zero_offset_components
    5285                 :             :                                                                 (DR_REF (dr))));
    5286                 :             : 
    5287                 :      670227 :   vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
    5288                 :      670227 :   dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
    5289                 :      670227 :   addr_base = force_gimple_operand (addr_base, &seq, true, dest);
    5290                 :      670227 :   gimple_seq_add_seq (new_stmt_list, seq);
    5291                 :             : 
    5292                 :      670227 :   if (DR_PTR_INFO (dr)
    5293                 :      161631 :       && TREE_CODE (addr_base) == SSA_NAME
    5294                 :             :       /* We should only duplicate pointer info to newly created SSA names.  */
    5295                 :      831408 :       && SSA_NAME_VAR (addr_base) == dest)
    5296                 :             :     {
    5297                 :      136456 :       gcc_assert (!SSA_NAME_PTR_INFO (addr_base));
    5298                 :      136456 :       vect_duplicate_ssa_name_ptr_info (addr_base, dr_info);
    5299                 :             :     }
    5300                 :             : 
    5301                 :      670227 :   if (dump_enabled_p ())
    5302                 :       23425 :     dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
    5303                 :             : 
    5304                 :      670227 :   return addr_base;
    5305                 :             : }
    5306                 :             : 
    5307                 :             : 
    5308                 :             : /* Function vect_create_data_ref_ptr.
    5309                 :             : 
    5310                 :             :    Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
    5311                 :             :    location accessed in the loop by STMT_INFO, along with the def-use update
    5312                 :             :    chain to appropriately advance the pointer through the loop iterations.
    5313                 :             :    Also set aliasing information for the pointer.  This pointer is used by
    5314                 :             :    the callers to this function to create a memory reference expression for
    5315                 :             :    vector load/store access.
    5316                 :             : 
    5317                 :             :    Input:
    5318                 :             :    1. STMT_INFO: a stmt that references memory. Expected to be of the form
    5319                 :             :          GIMPLE_ASSIGN <name, data-ref> or
    5320                 :             :          GIMPLE_ASSIGN <data-ref, name>.
    5321                 :             :    2. AGGR_TYPE: the type of the reference, which should be either a vector
    5322                 :             :         or an array.
    5323                 :             :    3. AT_LOOP: the loop where the vector memref is to be created.
    5324                 :             :    4. OFFSET (optional): a byte offset to be added to the initial address
    5325                 :             :         accessed by the data-ref in STMT_INFO.
    5326                 :             :    5. BSI: location where the new stmts are to be placed if there is no loop
    5327                 :             :    6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
    5328                 :             :         pointing to the initial address.
    5329                 :             :    8. IV_STEP (optional, defaults to NULL): the amount that should be added
    5330                 :             :         to the IV during each iteration of the loop.  NULL says to move
    5331                 :             :         by one copy of AGGR_TYPE up or down, depending on the step of the
    5332                 :             :         data reference.
    5333                 :             : 
    5334                 :             :    Output:
    5335                 :             :    1. Declare a new ptr to vector_type, and have it point to the base of the
    5336                 :             :       data reference (initial addressed accessed by the data reference).
    5337                 :             :       For example, for vector of type V8HI, the following code is generated:
    5338                 :             : 
    5339                 :             :       v8hi *ap;
    5340                 :             :       ap = (v8hi *)initial_address;
    5341                 :             : 
    5342                 :             :       if OFFSET is not supplied:
    5343                 :             :          initial_address = &a[init];
    5344                 :             :       if OFFSET is supplied:
    5345                 :             :          initial_address = &a[init] + OFFSET;
    5346                 :             :       if BYTE_OFFSET is supplied:
    5347                 :             :          initial_address = &a[init] + BYTE_OFFSET;
    5348                 :             : 
    5349                 :             :       Return the initial_address in INITIAL_ADDRESS.
    5350                 :             : 
    5351                 :             :    2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
    5352                 :             :       update the pointer in each iteration of the loop.
    5353                 :             : 
    5354                 :             :       Return the increment stmt that updates the pointer in PTR_INCR.
    5355                 :             : 
    5356                 :             :    3. Return the pointer.  */
    5357                 :             : 
    5358                 :             : tree
    5359                 :      670065 : vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
    5360                 :             :                           tree aggr_type, class loop *at_loop, tree offset,
    5361                 :             :                           tree *initial_address, gimple_stmt_iterator *gsi,
    5362                 :             :                           gimple **ptr_incr, bool only_init,
    5363                 :             :                           tree iv_step)
    5364                 :             : {
    5365                 :      670065 :   const char *base_name;
    5366                 :      670065 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    5367                 :      670065 :   class loop *loop = NULL;
    5368                 :      670065 :   bool nested_in_vect_loop = false;
    5369                 :      670065 :   class loop *containing_loop = NULL;
    5370                 :      670065 :   tree aggr_ptr_type;
    5371                 :      670065 :   tree aggr_ptr;
    5372                 :      670065 :   tree new_temp;
    5373                 :      670065 :   gimple_seq new_stmt_list = NULL;
    5374                 :      670065 :   edge pe = NULL;
    5375                 :      670065 :   basic_block new_bb;
    5376                 :      670065 :   tree aggr_ptr_init;
    5377                 :      670065 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    5378                 :      670065 :   struct data_reference *dr = dr_info->dr;
    5379                 :      670065 :   tree aptr;
    5380                 :      670065 :   gimple_stmt_iterator incr_gsi;
    5381                 :      670065 :   bool insert_after;
    5382                 :      670065 :   tree indx_before_incr, indx_after_incr;
    5383                 :      670065 :   gimple *incr;
    5384                 :      670065 :   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    5385                 :             : 
    5386                 :      670065 :   gcc_assert (iv_step != NULL_TREE
    5387                 :             :               || TREE_CODE (aggr_type) == ARRAY_TYPE
    5388                 :             :               || TREE_CODE (aggr_type) == VECTOR_TYPE);
    5389                 :             : 
    5390                 :      670065 :   if (loop_vinfo)
    5391                 :             :     {
    5392                 :      116206 :       loop = LOOP_VINFO_LOOP (loop_vinfo);
    5393                 :      116206 :       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
    5394                 :      116206 :       containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
    5395                 :      116206 :       pe = loop_preheader_edge (loop);
    5396                 :             :     }
    5397                 :             :   else
    5398                 :             :     {
    5399                 :      553859 :       gcc_assert (bb_vinfo);
    5400                 :      553859 :       only_init = true;
    5401                 :      553859 :       *ptr_incr = NULL;
    5402                 :             :     }
    5403                 :             : 
    5404                 :             :   /* Create an expression for the first address accessed by this load
    5405                 :             :      in LOOP.  */
    5406                 :      670065 :   base_name = get_name (DR_BASE_ADDRESS (dr));
    5407                 :             : 
    5408                 :      670065 :   if (dump_enabled_p ())
    5409                 :             :     {
    5410                 :       23390 :       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
    5411                 :       23390 :       dump_printf_loc (MSG_NOTE, vect_location,
    5412                 :             :                        "create %s-pointer variable to type: %T",
    5413                 :       23390 :                        get_tree_code_name (TREE_CODE (aggr_type)),
    5414                 :             :                        aggr_type);
    5415                 :       23390 :       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
    5416                 :       12961 :         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
    5417                 :       10429 :       else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
    5418                 :           0 :         dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
    5419                 :       10429 :       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
    5420                 :        1543 :         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
    5421                 :             :       else
    5422                 :        8886 :         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
    5423                 :       23390 :       dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
    5424                 :             :     }
    5425                 :             : 
    5426                 :             :   /* (1) Create the new aggregate-pointer variable.
    5427                 :             :      Vector and array types inherit the alias set of their component
    5428                 :             :      type by default so we need to use a ref-all pointer if the data
    5429                 :             :      reference does not conflict with the created aggregated data
    5430                 :             :      reference because it is not addressable.  */
    5431                 :      670065 :   bool need_ref_all = false;
    5432                 :      670065 :   if (!alias_sets_conflict_p (get_alias_set (aggr_type),
    5433                 :             :                               get_alias_set (DR_REF (dr))))
    5434                 :             :     need_ref_all = true;
    5435                 :             :   /* Likewise for any of the data references in the stmt group.  */
    5436                 :      566655 :   else if (DR_GROUP_SIZE (stmt_info) > 1)
    5437                 :             :     {
    5438                 :      463667 :       stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
    5439                 :     1261645 :       do
    5440                 :             :         {
    5441                 :     1261645 :           struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
    5442                 :     1261645 :           if (!alias_sets_conflict_p (get_alias_set (aggr_type),
    5443                 :             :                                       get_alias_set (DR_REF (sdr))))
    5444                 :             :             {
    5445                 :             :               need_ref_all = true;
    5446                 :             :               break;
    5447                 :             :             }
    5448                 :     1260617 :           sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
    5449                 :             :         }
    5450                 :     1260617 :       while (sinfo);
    5451                 :             :     }
    5452                 :      670065 :   aggr_ptr_type = build_pointer_type_for_mode (aggr_type, VOIDmode,
    5453                 :             :                                                need_ref_all);
    5454                 :      670065 :   aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
    5455                 :             : 
    5456                 :             : 
    5457                 :             :   /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
    5458                 :             :      vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
    5459                 :             :      def-use update cycles for the pointer: one relative to the outer-loop
    5460                 :             :      (LOOP), which is what steps (3) and (4) below do.  The other is relative
    5461                 :             :      to the inner-loop (which is the inner-most loop containing the dataref),
    5462                 :             :      and this is done be step (5) below.
    5463                 :             : 
    5464                 :             :      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
    5465                 :             :      inner-most loop, and so steps (3),(4) work the same, and step (5) is
    5466                 :             :      redundant.  Steps (3),(4) create the following:
    5467                 :             : 
    5468                 :             :         vp0 = &base_addr;
    5469                 :             :         LOOP:   vp1 = phi(vp0,vp2)
    5470                 :             :                 ...
    5471                 :             :                 ...
    5472                 :             :                 vp2 = vp1 + step
    5473                 :             :                 goto LOOP
    5474                 :             : 
    5475                 :             :      If there is an inner-loop nested in loop, then step (5) will also be
    5476                 :             :      applied, and an additional update in the inner-loop will be created:
    5477                 :             : 
    5478                 :             :         vp0 = &base_addr;
    5479                 :             :         LOOP:   vp1 = phi(vp0,vp2)
    5480                 :             :                 ...
    5481                 :             :         inner:     vp3 = phi(vp1,vp4)
    5482                 :             :                    vp4 = vp3 + inner_step
    5483                 :             :                    if () goto inner
    5484                 :             :                 ...
    5485                 :             :                 vp2 = vp1 + step
    5486                 :             :                 if () goto LOOP   */
    5487                 :             : 
    5488                 :             :   /* (2) Calculate the initial address of the aggregate-pointer, and set
    5489                 :             :      the aggregate-pointer to point to it before the loop.  */
    5490                 :             : 
    5491                 :             :   /* Create: (&(base[init_val]+offset) in the loop preheader.  */
    5492                 :             : 
    5493                 :      670065 :   new_temp = vect_create_addr_base_for_vector_ref (vinfo,
    5494                 :             :                                                    stmt_info, &new_stmt_list,
    5495                 :             :                                                    offset);
    5496                 :      670065 :   if (new_stmt_list)
    5497                 :             :     {
    5498                 :      154645 :       if (pe)
    5499                 :             :         {
    5500                 :       51845 :           new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
    5501                 :       51845 :           gcc_assert (!new_bb);
    5502                 :             :         }
    5503                 :             :       else
    5504                 :      102800 :         gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
    5505                 :             :     }
    5506                 :             : 
    5507                 :      670065 :   *initial_address = new_temp;
    5508                 :      670065 :   aggr_ptr_init = new_temp;
    5509                 :             : 
    5510                 :             :   /* (3) Handle the updating of the aggregate-pointer inside the loop.
    5511                 :             :      This is needed when ONLY_INIT is false, and also when AT_LOOP is the
    5512                 :             :      inner-loop nested in LOOP (during outer-loop vectorization).  */
    5513                 :             : 
    5514                 :             :   /* No update in loop is required.  */
    5515                 :      670065 :   if (only_init && (!loop_vinfo || at_loop == loop))
    5516                 :             :     aptr = aggr_ptr_init;
    5517                 :             :   else
    5518                 :             :     {
    5519                 :             :       /* Accesses to invariant addresses should be handled specially
    5520                 :             :          by the caller.  */
    5521                 :      116198 :       tree step = vect_dr_behavior (vinfo, dr_info)->step;
    5522                 :      116198 :       gcc_assert (!integer_zerop (step));
    5523                 :             : 
    5524                 :      116198 :       if (iv_step == NULL_TREE)
    5525                 :             :         {
    5526                 :             :           /* The step of the aggregate pointer is the type size,
    5527                 :             :              negated for downward accesses.  */
    5528                 :           0 :           iv_step = TYPE_SIZE_UNIT (aggr_type);
    5529                 :           0 :           if (tree_int_cst_sgn (step) == -1)
    5530                 :           0 :             iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
    5531                 :             :         }
    5532                 :             : 
    5533                 :      116198 :       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
    5534                 :             : 
    5535                 :      116198 :       create_iv (aggr_ptr_init, PLUS_EXPR,
    5536                 :             :                  fold_convert (aggr_ptr_type, iv_step),
    5537                 :             :                  aggr_ptr, loop, &incr_gsi, insert_after,
    5538                 :             :                  &indx_before_incr, &indx_after_incr);
    5539                 :      116198 :       incr = gsi_stmt (incr_gsi);
    5540                 :             : 
    5541                 :             :       /* Copy the points-to information if it exists. */
    5542                 :      116198 :       if (DR_PTR_INFO (dr))
    5543                 :             :         {
    5544                 :       62356 :           vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
    5545                 :       62356 :           vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
    5546                 :             :         }
    5547                 :      116198 :       if (ptr_incr)
    5548                 :      116198 :         *ptr_incr = incr;
    5549                 :             : 
    5550                 :      116198 :       aptr = indx_before_incr;
    5551                 :             :     }
    5552                 :             : 
    5553                 :      670065 :   if (!nested_in_vect_loop || only_init)
    5554                 :             :     return aptr;
    5555                 :             : 
    5556                 :             : 
    5557                 :             :   /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
    5558                 :             :      nested in LOOP, if exists.  */
    5559                 :             : 
    5560                 :         337 :   gcc_assert (nested_in_vect_loop);
    5561                 :         337 :   if (!only_init)
    5562                 :             :     {
    5563                 :         337 :       standard_iv_increment_position (containing_loop, &incr_gsi,
    5564                 :             :                                       &insert_after);
    5565                 :         337 :       create_iv (aptr, PLUS_EXPR, fold_convert (aggr_ptr_type, DR_STEP (dr)),
    5566                 :             :                  aggr_ptr, containing_loop, &incr_gsi, insert_after,
    5567                 :             :                  &indx_before_incr, &indx_after_incr);
    5568                 :         337 :       incr = gsi_stmt (incr_gsi);
    5569                 :             : 
    5570                 :             :       /* Copy the points-to information if it exists. */
    5571                 :         337 :       if (DR_PTR_INFO (dr))
    5572                 :             :         {
    5573                 :          74 :           vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
    5574                 :          74 :           vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
    5575                 :             :         }
    5576                 :         337 :       if (ptr_incr)
    5577                 :         337 :         *ptr_incr = incr;
    5578                 :             : 
    5579                 :         337 :       return indx_before_incr;
    5580                 :             :     }
    5581                 :             :   else
    5582                 :             :     gcc_unreachable ();
    5583                 :             : }
    5584                 :             : 
    5585                 :             : 
    5586                 :             : /* Function bump_vector_ptr
    5587                 :             : 
    5588                 :             :    Increment a pointer (to a vector type) by vector-size. If requested,
    5589                 :             :    i.e. if PTR-INCR is given, then also connect the new increment stmt
    5590                 :             :    to the existing def-use update-chain of the pointer, by modifying
    5591                 :             :    the PTR_INCR as illustrated below:
    5592                 :             : 
    5593                 :             :    The pointer def-use update-chain before this function:
    5594                 :             :                         DATAREF_PTR = phi (p_0, p_2)
    5595                 :             :                         ....
    5596                 :             :         PTR_INCR:       p_2 = DATAREF_PTR + step
    5597                 :             : 
    5598                 :             :    The pointer def-use update-chain after this function:
    5599                 :             :                         DATAREF_PTR = phi (p_0, p_2)
    5600                 :             :                         ....
    5601                 :             :                         NEW_DATAREF_PTR = DATAREF_PTR + BUMP
    5602                 :             :                         ....
    5603                 :             :         PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
    5604                 :             : 
    5605                 :             :    Input:
    5606                 :             :    DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
    5607                 :             :                  in the loop.
    5608                 :             :    PTR_INCR - optional. The stmt that updates the pointer in each iteration of
    5609                 :             :               the loop.  The increment amount across iterations is expected
    5610                 :             :               to be vector_size.
    5611                 :             :    BSI - location where the new update stmt is to be placed.
    5612                 :             :    STMT_INFO - the original scalar memory-access stmt that is being vectorized.
    5613                 :             :    BUMP - optional. The offset by which to bump the pointer. If not given,
    5614                 :             :           the offset is assumed to be vector_size.
    5615                 :             : 
    5616                 :             :    Output: Return NEW_DATAREF_PTR as illustrated above.
    5617                 :             : 
    5618                 :             : */
    5619                 :             : 
    5620                 :             : tree
    5621                 :      205429 : bump_vector_ptr (vec_info *vinfo,
    5622                 :             :                  tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
    5623                 :             :                  stmt_vec_info stmt_info, tree bump)
    5624                 :             : {
    5625                 :      205429 :   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
    5626                 :      205429 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    5627                 :      205429 :   tree update = TYPE_SIZE_UNIT (vectype);
    5628                 :      205429 :   gimple *incr_stmt;
    5629                 :      205429 :   ssa_op_iter iter;
    5630                 :      205429 :   use_operand_p use_p;
    5631                 :      205429 :   tree new_dataref_ptr;
    5632                 :             : 
    5633                 :      205429 :   if (bump)
    5634                 :      205429 :     update = bump;
    5635                 :             : 
    5636                 :      205429 :   if (TREE_CODE (dataref_ptr) == SSA_NAME)
    5637                 :       95413 :     new_dataref_ptr = copy_ssa_name (dataref_ptr);
    5638                 :      110016 :   else if (is_gimple_min_invariant (dataref_ptr))
    5639                 :             :     /* When possible avoid emitting a separate increment stmt that will
    5640                 :             :        force the addressed object addressable.  */
    5641                 :      220032 :     return build1 (ADDR_EXPR, TREE_TYPE (dataref_ptr),
    5642                 :      110016 :                    fold_build2 (MEM_REF,
    5643                 :             :                                 TREE_TYPE (TREE_TYPE (dataref_ptr)),
    5644                 :             :                                 dataref_ptr,
    5645                 :      110016 :                                 fold_convert (ptr_type_node, update)));
    5646                 :             :   else
    5647                 :           0 :     new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
    5648                 :       95413 :   incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
    5649                 :             :                                    dataref_ptr, update);
    5650                 :       95413 :   vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
    5651                 :             :   /* Fold the increment, avoiding excessive chains use-def chains of
    5652                 :             :      those, leading to compile-time issues for passes until the next
    5653                 :             :      forwprop pass which would do this as well.  */
    5654                 :       95413 :   gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
    5655                 :       95413 :   if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
    5656                 :             :     {
    5657                 :       62991 :       incr_stmt = gsi_stmt (fold_gsi);
    5658                 :       62991 :       update_stmt (incr_stmt);
    5659                 :             :     }
    5660                 :             : 
    5661                 :             :   /* Copy the points-to information if it exists. */
    5662                 :       95413 :   if (DR_PTR_INFO (dr))
    5663                 :             :     {
    5664                 :       53372 :       duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
    5665                 :       53372 :       mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
    5666                 :             :     }
    5667                 :             : 
    5668                 :       95413 :   if (!ptr_incr)
    5669                 :             :     return new_dataref_ptr;
    5670                 :             : 
    5671                 :             :   /* Update the vector-pointer's cross-iteration increment.  */
    5672                 :       75000 :   FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
    5673                 :             :     {
    5674                 :       37500 :       tree use = USE_FROM_PTR (use_p);
    5675                 :             : 
    5676                 :       37500 :       if (use == dataref_ptr)
    5677                 :       37500 :         SET_USE (use_p, new_dataref_ptr);
    5678                 :             :       else
    5679                 :           0 :         gcc_assert (operand_equal_p (use, update, 0));
    5680                 :             :     }
    5681                 :             : 
    5682                 :             :   return new_dataref_ptr;
    5683                 :             : }
    5684                 :             : 
    5685                 :             : 
    5686                 :             : /* Copy memory reference info such as base/clique from the SRC reference
    5687                 :             :    to the DEST MEM_REF.  */
    5688                 :             : 
    5689                 :             : void
    5690                 :      886587 : vect_copy_ref_info (tree dest, tree src)
    5691                 :             : {
    5692                 :      886587 :   if (TREE_CODE (dest) != MEM_REF)
    5693                 :             :     return;
    5694                 :             : 
    5695                 :             :   tree src_base = src;
    5696                 :     1933265 :   while (handled_component_p (src_base))
    5697                 :     1047203 :     src_base = TREE_OPERAND (src_base, 0);
    5698                 :      886062 :   if (TREE_CODE (src_base) != MEM_REF
    5699                 :      886062 :       && TREE_CODE (src_base) != TARGET_MEM_REF)
    5700                 :             :     return;
    5701                 :             : 
    5702                 :      408888 :   MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
    5703                 :      408888 :   MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
    5704                 :             : }
    5705                 :             : 
    5706                 :             : 
    5707                 :             : /* Function vect_create_destination_var.
    5708                 :             : 
    5709                 :             :    Create a new temporary of type VECTYPE.  */
    5710                 :             : 
    5711                 :             : tree
    5712                 :      457745 : vect_create_destination_var (tree scalar_dest, tree vectype)
    5713                 :             : {
    5714                 :      457745 :   tree vec_dest;
    5715                 :      457745 :   const char *name;
    5716                 :      457745 :   char *new_name;
    5717                 :      457745 :   tree type;
    5718                 :      457745 :   enum vect_var_kind kind;
    5719                 :             : 
    5720                 :      937263 :   kind = vectype
    5721                 :      893717 :     ? VECTOR_BOOLEAN_TYPE_P (vectype)
    5722                 :      435972 :     ? vect_mask_var
    5723                 :             :     : vect_simple_var
    5724                 :             :     : vect_scalar_var;
    5725                 :       21773 :   type = vectype ? vectype : TREE_TYPE (scalar_dest);
    5726                 :             : 
    5727                 :      457745 :   gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
    5728                 :             : 
    5729                 :      457745 :   name = get_name (scalar_dest);
    5730                 :      457745 :   if (name)
    5731                 :      164503 :     new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
    5732                 :             :   else
    5733                 :      293242 :     new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
    5734                 :      457745 :   vec_dest = vect_get_new_vect_var (type, kind, new_name);
    5735                 :      457745 :   free (new_name);
    5736                 :             : 
    5737                 :      457745 :   return vec_dest;
    5738                 :             : }
    5739                 :             : 
    5740                 :             : /* Function vect_grouped_store_supported.
    5741                 :             : 
    5742                 :             :    Returns TRUE if interleave high and interleave low permutations
    5743                 :             :    are supported, and FALSE otherwise.  */
    5744                 :             : 
    5745                 :             : bool
    5746                 :        4187 : vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
    5747                 :             : {
    5748                 :        4187 :   machine_mode mode = TYPE_MODE (vectype);
    5749                 :             : 
    5750                 :             :   /* vect_permute_store_chain requires the group size to be equal to 3 or
    5751                 :             :      be a power of two.  */
    5752                 :        4187 :   if (count != 3 && exact_log2 (count) == -1)
    5753                 :             :     {
    5754                 :         580 :       if (dump_enabled_p ())
    5755                 :           9 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5756                 :             :                          "the size of the group of accesses"
    5757                 :             :                          " is not a power of 2 or not eqaul to 3\n");
    5758                 :         580 :       return false;
    5759                 :             :     }
    5760                 :             : 
    5761                 :             :   /* Check that the permutation is supported.  */
    5762                 :        3607 :   if (VECTOR_MODE_P (mode))
    5763                 :             :     {
    5764                 :        3607 :       unsigned int i;
    5765                 :        3607 :       if (count == 3)
    5766                 :             :         {
    5767                 :        1708 :           unsigned int j0 = 0, j1 = 0, j2 = 0;
    5768                 :        1708 :           unsigned int i, j;
    5769                 :             : 
    5770                 :        1708 :           unsigned int nelt;
    5771                 :        3416 :           if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
    5772                 :             :             {
    5773                 :             :               if (dump_enabled_p ())
    5774                 :             :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    5775                 :             :                                  "cannot handle groups of 3 stores for"
    5776                 :             :                                  " variable-length vectors\n");
    5777                 :             :               return false;
    5778                 :             :             }
    5779                 :             : 
    5780                 :        1708 :           vec_perm_builder sel (nelt, nelt, 1);
    5781                 :        1708 :           sel.quick_grow (nelt);
    5782                 :        1708 :           vec_perm_indices indices;
    5783                 :        6625 :           for (j = 0; j < 3; j++)
    5784                 :             :             {
    5785                 :        4986 :               int nelt0 = ((3 - j) * nelt) % 3;
    5786                 :        4986 :               int nelt1 = ((3 - j) * nelt + 1) % 3;
    5787                 :        4986 :               int nelt2 = ((3 - j) * nelt + 2) % 3;
    5788                 :       16894 :               for (i = 0; i < nelt; i++)
    5789                 :             :                 {
    5790                 :       11908 :                   if (3 * i + nelt0 < nelt)
    5791                 :        4004 :                     sel[3 * i + nelt0] = j0++;
    5792                 :       11908 :                   if (3 * i + nelt1 < nelt)
    5793                 :        3969 :                     sel[3 * i + nelt1] = nelt + j1++;
    5794                 :       11908 :                   if (3 * i + nelt2 < nelt)
    5795                 :        3935 :                     sel[3 * i + nelt2] = 0;
    5796                 :             :                 }
    5797                 :        4986 :               indices.new_vector (sel, 2, nelt);
    5798                 :        4986 :               if (!can_vec_perm_const_p (mode, mode, indices))
    5799                 :             :                 {
    5800                 :          60 :                   if (dump_enabled_p ())
    5801                 :          37 :                     dump_printf (MSG_MISSED_OPTIMIZATION,
    5802                 :             :                                  "permutation op not supported by target.\n");
    5803                 :          60 :                   return false;
    5804                 :             :                 }
    5805                 :             : 
    5806                 :       16314 :               for (i = 0; i < nelt; i++)
    5807                 :             :                 {
    5808                 :       11388 :                   if (3 * i + nelt0 < nelt)
    5809                 :        3802 :                     sel[3 * i + nelt0] = 3 * i + nelt0;
    5810                 :       11388 :                   if (3 * i + nelt1 < nelt)
    5811                 :        3793 :                     sel[3 * i + nelt1] = 3 * i + nelt1;
    5812                 :       11388 :                   if (3 * i + nelt2 < nelt)
    5813                 :        3793 :                     sel[3 * i + nelt2] = nelt + j2++;
    5814                 :             :                 }
    5815                 :        4926 :               indices.new_vector (sel, 2, nelt);
    5816                 :        4926 :               if (!can_vec_perm_const_p (mode, mode, indices))
    5817                 :             :                 {
    5818                 :           9 :                   if (dump_enabled_p ())
    5819                 :           9 :                     dump_printf (MSG_MISSED_OPTIMIZATION,
    5820                 :             :                                  "permutation op not supported by target.\n");
    5821                 :           9 :                   return false;
    5822                 :             :                 }
    5823                 :             :             }
    5824                 :             :           return true;
    5825                 :        1708 :         }
    5826                 :             :       else
    5827                 :             :         {
    5828                 :             :           /* If length is not equal to 3 then only power of 2 is supported.  */
    5829                 :        1899 :           gcc_assert (pow2p_hwi (count));
    5830                 :        3798 :           poly_uint64 nelt = GET_MODE_NUNITS (mode);
    5831                 :             : 
    5832                 :             :           /* The encoding has 2 interleaved stepped patterns.  */
    5833                 :        3798 :           if(!multiple_p (nelt, 2))
    5834                 :        1833 :             return false;
    5835                 :        1899 :           vec_perm_builder sel (nelt, 2, 3);
    5836                 :        1899 :           sel.quick_grow (6);
    5837                 :        9495 :           for (i = 0; i < 3; i++)
    5838                 :             :             {
    5839                 :        5697 :               sel[i * 2] = i;
    5840                 :        5697 :               sel[i * 2 + 1] = i + nelt;
    5841                 :             :             }
    5842                 :        1899 :           vec_perm_indices indices (sel, 2, nelt);
    5843                 :        1899 :           if (can_vec_perm_const_p (mode, mode, indices))
    5844                 :             :             {
    5845                 :       12831 :               for (i = 0; i < 6; i++)
    5846                 :       10998 :                 sel[i] += exact_div (nelt, 2);
    5847                 :        1833 :               indices.new_vector (sel, 2, nelt);
    5848                 :        1833 :               if (can_vec_perm_const_p (mode, mode, indices))
    5849                 :        1833 :                 return true;
    5850                 :             :             }
    5851                 :        1899 :         }
    5852                 :             :     }
    5853                 :             : 
    5854                 :          66 :   if (dump_enabled_p ())
    5855                 :           5 :     dump_printf (MSG_MISSED_OPTIMIZATION,
    5856                 :             :                  "permutation op not supported by target.\n");
    5857                 :             :   return false;
    5858                 :             : }
    5859                 :             : 
    5860                 :             : /* Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT vectors
    5861                 :             :    of type VECTYPE.  MASKED_P says whether the masked form is needed.  */
    5862                 :             : 
    5863                 :             : internal_fn
    5864                 :       27438 : vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
    5865                 :             :                             bool masked_p)
    5866                 :             : {
    5867                 :       27438 :   if (vect_lanes_optab_supported_p ("vec_mask_len_store_lanes",
    5868                 :             :                                     vec_mask_len_store_lanes_optab, vectype,
    5869                 :             :                                     count))
    5870                 :             :     return IFN_MASK_LEN_STORE_LANES;
    5871                 :       27438 :   else if (masked_p)
    5872                 :             :     {
    5873                 :         184 :       if (vect_lanes_optab_supported_p ("vec_mask_store_lanes",
    5874                 :             :                                         vec_mask_store_lanes_optab, vectype,
    5875                 :             :                                         count))
    5876                 :             :         return IFN_MASK_STORE_LANES;
    5877                 :             :     }
    5878                 :             :   else
    5879                 :             :     {
    5880                 :       27254 :       if (vect_lanes_optab_supported_p ("vec_store_lanes",
    5881                 :             :                                         vec_store_lanes_optab, vectype, count))
    5882                 :             :         return IFN_STORE_LANES;
    5883                 :             :     }
    5884                 :             :   return IFN_LAST;
    5885                 :             : }
    5886                 :             : 
    5887                 :             : 
    5888                 :             : /* Function vect_permute_store_chain.
    5889                 :             : 
    5890                 :             :    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
    5891                 :             :    a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
    5892                 :             :    the data correctly for the stores.  Return the final references for stores
    5893                 :             :    in RESULT_CHAIN.
    5894                 :             : 
    5895                 :             :    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
    5896                 :             :    The input is 4 vectors each containing 8 elements.  We assign a number to
    5897                 :             :    each element, the input sequence is:
    5898                 :             : 
    5899                 :             :    1st vec:   0  1  2  3  4  5  6  7
    5900                 :             :    2nd vec:   8  9 10 11 12 13 14 15
    5901                 :             :    3rd vec:  16 17 18 19 20 21 22 23
    5902                 :             :    4th vec:  24 25 26 27 28 29 30 31
    5903                 :             : 
    5904                 :             :    The output sequence should be:
    5905                 :             : 
    5906                 :             :    1st vec:  0  8 16 24  1  9 17 25
    5907                 :             :    2nd vec:  2 10 18 26  3 11 19 27
    5908                 :             :    3rd vec:  4 12 20 28  5 13 21 30
    5909                 :             :    4th vec:  6 14 22 30  7 15 23 31
    5910                 :             : 
    5911                 :             :    i.e., we interleave the contents of the four vectors in their order.
    5912                 :             : 
    5913                 :             :    We use interleave_high/low instructions to create such output.  The input of
    5914                 :             :    each interleave_high/low operation is two vectors:
    5915                 :             :    1st vec    2nd vec
    5916                 :             :    0 1 2 3    4 5 6 7
    5917                 :             :    the even elements of the result vector are obtained left-to-right from the
    5918                 :             :    high/low elements of the first vector.  The odd elements of the result are
    5919                 :             :    obtained left-to-right from the high/low elements of the second vector.
    5920                 :             :    The output of interleave_high will be:   0 4 1 5
    5921                 :             :    and of interleave_low:                   2 6 3 7
    5922                 :             : 
    5923                 :             : 
    5924                 :             :    The permutation is done in log LENGTH stages.  In each stage interleave_high
    5925                 :             :    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
    5926                 :             :    where the first argument is taken from the first half of DR_CHAIN and the
    5927                 :             :    second argument from it's second half.
    5928                 :             :    In our example,
    5929                 :             : 
    5930                 :             :    I1: interleave_high (1st vec, 3rd vec)
    5931                 :             :    I2: interleave_low (1st vec, 3rd vec)
    5932                 :             :    I3: interleave_high (2nd vec, 4th vec)
    5933                 :             :    I4: interleave_low (2nd vec, 4th vec)
    5934                 :             : 
    5935                 :             :    The output for the first stage is:
    5936                 :             : 
    5937                 :             :    I1:  0 16  1 17  2 18  3 19
    5938                 :             :    I2:  4 20  5 21  6 22  7 23
    5939                 :             :    I3:  8 24  9 25 10 26 11 27
    5940                 :             :    I4: 12 28 13 29 14 30 15 31
    5941                 :             : 
    5942                 :             :    The output of the second stage, i.e. the final result is:
    5943                 :             : 
    5944                 :             :    I1:  0  8 16 24  1  9 17 25
    5945                 :             :    I2:  2 10 18 26  3 11 19 27
    5946                 :             :    I3:  4 12 20 28  5 13 21 30
    5947                 :             :    I4:  6 14 22 30  7 15 23 31.  */
    5948                 :             : 
    5949                 :             : void
    5950                 :           0 : vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
    5951                 :             :                           unsigned int length,
    5952                 :             :                           stmt_vec_info stmt_info,
    5953                 :             :                           gimple_stmt_iterator *gsi,
    5954                 :             :                           vec<tree> *result_chain)
    5955                 :             : {
    5956                 :           0 :   tree vect1, vect2, high, low;
    5957                 :           0 :   gimple *perm_stmt;
    5958                 :           0 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    5959                 :           0 :   tree perm_mask_low, perm_mask_high;
    5960                 :           0 :   tree data_ref;
    5961                 :           0 :   tree perm3_mask_low, perm3_mask_high;
    5962                 :           0 :   unsigned int i, j, n, log_length = exact_log2 (length);
    5963                 :             : 
    5964                 :           0 :   result_chain->quick_grow (length);
    5965                 :           0 :   memcpy (result_chain->address (), dr_chain.address (),
    5966                 :             :           length * sizeof (tree));
    5967                 :             : 
    5968                 :           0 :   if (length == 3)
    5969                 :             :     {
    5970                 :             :       /* vect_grouped_store_supported ensures that this is constant.  */
    5971                 :           0 :       unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
    5972                 :           0 :       unsigned int j0 = 0, j1 = 0, j2 = 0;
    5973                 :             : 
    5974                 :           0 :       vec_perm_builder sel (nelt, nelt, 1);
    5975                 :           0 :       sel.quick_grow (nelt);
    5976                 :           0 :       vec_perm_indices indices;
    5977                 :           0 :       for (j = 0; j < 3; j++)
    5978                 :             :         {
    5979                 :           0 :           int nelt0 = ((3 - j) * nelt) % 3;
    5980                 :           0 :           int nelt1 = ((3 - j) * nelt + 1) % 3;
    5981                 :           0 :           int nelt2 = ((3 - j) * nelt + 2) % 3;
    5982                 :             : 
    5983                 :           0 :           for (i = 0; i < nelt; i++)
    5984                 :             :             {
    5985                 :           0 :               if (3 * i + nelt0 < nelt)
    5986                 :           0 :                 sel[3 * i + nelt0] = j0++;
    5987                 :           0 :               if (3 * i + nelt1 < nelt)
    5988                 :           0 :                 sel[3 * i + nelt1] = nelt + j1++;
    5989                 :           0 :               if (3 * i + nelt2 < nelt)
    5990                 :           0 :                 sel[3 * i + nelt2] = 0;
    5991                 :             :             }
    5992                 :           0 :           indices.new_vector (sel, 2, nelt);
    5993                 :           0 :           perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
    5994                 :             : 
    5995                 :           0 :           for (i = 0; i < nelt; i++)
    5996                 :             :             {
    5997                 :           0 :               if (3 * i + nelt0 < nelt)
    5998                 :           0 :                 sel[3 * i + nelt0] = 3 * i + nelt0;
    5999                 :           0 :               if (3 * i + nelt1 < nelt)
    6000                 :           0 :                 sel[3 * i + nelt1] = 3 * i + nelt1;
    6001                 :           0 :               if (3 * i + nelt2 < nelt)
    6002                 :           0 :                 sel[3 * i + nelt2] = nelt + j2++;
    6003                 :             :             }
    6004                 :           0 :           indices.new_vector (sel, 2, nelt);
    6005                 :           0 :           perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
    6006                 :             : 
    6007                 :           0 :           vect1 = dr_chain[0];
    6008                 :           0 :           vect2 = dr_chain[1];
    6009                 :             : 
    6010                 :             :           /* Create interleaving stmt:
    6011                 :             :              low = VEC_PERM_EXPR <vect1, vect2,
    6012                 :             :                                   {j, nelt, *, j + 1, nelt + j + 1, *,
    6013                 :             :                                    j + 2, nelt + j + 2, *, ...}>  */
    6014                 :           0 :           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
    6015                 :           0 :           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
    6016                 :             :                                            vect2, perm3_mask_low);
    6017                 :           0 :           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6018                 :             : 
    6019                 :           0 :           vect1 = data_ref;
    6020                 :           0 :           vect2 = dr_chain[2];
    6021                 :             :           /* Create interleaving stmt:
    6022                 :             :              low = VEC_PERM_EXPR <vect1, vect2,
    6023                 :             :                                   {0, 1, nelt + j, 3, 4, nelt + j + 1,
    6024                 :             :                                    6, 7, nelt + j + 2, ...}>  */
    6025                 :           0 :           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
    6026                 :           0 :           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
    6027                 :             :                                            vect2, perm3_mask_high);
    6028                 :           0 :           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6029                 :           0 :           (*result_chain)[j] = data_ref;
    6030                 :             :         }
    6031                 :           0 :     }
    6032                 :             :   else
    6033                 :             :     {
    6034                 :             :       /* If length is not equal to 3 then only power of 2 is supported.  */
    6035                 :           0 :       gcc_assert (pow2p_hwi (length));
    6036                 :             : 
    6037                 :             :       /* The encoding has 2 interleaved stepped patterns.  */
    6038                 :           0 :       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
    6039                 :           0 :       vec_perm_builder sel (nelt, 2, 3);
    6040                 :           0 :       sel.quick_grow (6);
    6041                 :           0 :       for (i = 0; i < 3; i++)
    6042                 :             :         {
    6043                 :           0 :           sel[i * 2] = i;
    6044                 :           0 :           sel[i * 2 + 1] = i + nelt;
    6045                 :             :         }
    6046                 :           0 :         vec_perm_indices indices (sel, 2, nelt);
    6047                 :           0 :         perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
    6048                 :             : 
    6049                 :           0 :         for (i = 0; i < 6; i++)
    6050                 :           0 :           sel[i] += exact_div (nelt, 2);
    6051                 :           0 :         indices.new_vector (sel, 2, nelt);
    6052                 :           0 :         perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
    6053                 :             : 
    6054                 :           0 :         for (i = 0, n = log_length; i < n; i++)
    6055                 :             :           {
    6056                 :           0 :             for (j = 0; j < length/2; j++)
    6057                 :             :               {
    6058                 :           0 :                 vect1 = dr_chain[j];
    6059                 :           0 :                 vect2 = dr_chain[j+length/2];
    6060                 :             : 
    6061                 :             :                 /* Create interleaving stmt:
    6062                 :             :                    high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
    6063                 :             :                                                         ...}>  */
    6064                 :           0 :                 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
    6065                 :           0 :                 perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
    6066                 :             :                                                  vect2, perm_mask_high);
    6067                 :           0 :                 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6068                 :           0 :                 (*result_chain)[2*j] = high;
    6069                 :             : 
    6070                 :             :                 /* Create interleaving stmt:
    6071                 :             :                    low = VEC_PERM_EXPR <vect1, vect2,
    6072                 :             :                                         {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
    6073                 :             :                                          ...}>  */
    6074                 :           0 :                 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
    6075                 :           0 :                 perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
    6076                 :             :                                                  vect2, perm_mask_low);
    6077                 :           0 :                 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6078                 :           0 :                 (*result_chain)[2*j+1] = low;
    6079                 :             :               }
    6080                 :           0 :             memcpy (dr_chain.address (), result_chain->address (),
    6081                 :             :                     length * sizeof (tree));
    6082                 :             :           }
    6083                 :           0 :     }
    6084                 :           0 : }
    6085                 :             : 
    6086                 :             : /* Function vect_setup_realignment
    6087                 :             : 
    6088                 :             :    This function is called when vectorizing an unaligned load using
    6089                 :             :    the dr_explicit_realign[_optimized] scheme.
    6090                 :             :    This function generates the following code at the loop prolog:
    6091                 :             : 
    6092                 :             :       p = initial_addr;
    6093                 :             :    x  msq_init = *(floor(p));   # prolog load
    6094                 :             :       realignment_token = call target_builtin;
    6095                 :             :     loop:
    6096                 :             :    x  msq = phi (msq_init, ---)
    6097                 :             : 
    6098                 :             :    The stmts marked with x are generated only for the case of
    6099                 :             :    dr_explicit_realign_optimized.
    6100                 :             : 
    6101                 :             :    The code above sets up a new (vector) pointer, pointing to the first
    6102                 :             :    location accessed by STMT_INFO, and a "floor-aligned" load using that
    6103                 :             :    pointer.  It also generates code to compute the "realignment-token"
    6104                 :             :    (if the relevant target hook was defined), and creates a phi-node at the
    6105                 :             :    loop-header bb whose arguments are the result of the prolog-load (created
    6106                 :             :    by this function) and the result of a load that takes place in the loop
    6107                 :             :    (to be created by the caller to this function).
    6108                 :             : 
    6109                 :             :    For the case of dr_explicit_realign_optimized:
    6110                 :             :    The caller to this function uses the phi-result (msq) to create the
    6111                 :             :    realignment code inside the loop, and sets up the missing phi argument,
    6112                 :             :    as follows:
    6113                 :             :     loop:
    6114                 :             :       msq = phi (msq_init, lsq)
    6115                 :             :       lsq = *(floor(p'));        # load in loop
    6116                 :             :       result = realign_load (msq, lsq, realignment_token);
    6117                 :             : 
    6118                 :             :    For the case of dr_explicit_realign:
    6119                 :             :     loop:
    6120                 :             :       msq = *(floor(p));        # load in loop
    6121                 :             :       p' = p + (VS-1);
    6122                 :             :       lsq = *(floor(p'));       # load in loop
    6123                 :             :       result = realign_load (msq, lsq, realignment_token);
    6124                 :             : 
    6125                 :             :    Input:
    6126                 :             :    STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
    6127                 :             :                a memory location that may be unaligned.
    6128                 :             :    BSI - place where new code is to be inserted.
    6129                 :             :    ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
    6130                 :             :                               is used.
    6131                 :             : 
    6132                 :             :    Output:
    6133                 :             :    REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
    6134                 :             :                        target hook, if defined.
    6135                 :             :    Return value - the result of the loop-header phi node.  */
    6136                 :             : 
    6137                 :             : tree
    6138                 :           0 : vect_setup_realignment (vec_info *vinfo, stmt_vec_info stmt_info,
    6139                 :             :                         gimple_stmt_iterator *gsi, tree *realignment_token,
    6140                 :             :                         enum dr_alignment_support alignment_support_scheme,
    6141                 :             :                         tree init_addr,
    6142                 :             :                         class loop **at_loop)
    6143                 :             : {
    6144                 :           0 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    6145                 :           0 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6146                 :           0 :   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
    6147                 :           0 :   struct data_reference *dr = dr_info->dr;
    6148                 :           0 :   class loop *loop = NULL;
    6149                 :           0 :   edge pe = NULL;
    6150                 :           0 :   tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
    6151                 :           0 :   tree vec_dest;
    6152                 :           0 :   gimple *inc;
    6153                 :           0 :   tree ptr;
    6154                 :           0 :   tree data_ref;
    6155                 :           0 :   basic_block new_bb;
    6156                 :           0 :   tree msq_init = NULL_TREE;
    6157                 :           0 :   tree new_temp;
    6158                 :           0 :   gphi *phi_stmt;
    6159                 :           0 :   tree msq = NULL_TREE;
    6160                 :           0 :   gimple_seq stmts = NULL;
    6161                 :           0 :   bool compute_in_loop = false;
    6162                 :           0 :   bool nested_in_vect_loop = false;
    6163                 :           0 :   class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
    6164                 :           0 :   class loop *loop_for_initial_load = NULL;
    6165                 :             : 
    6166                 :           0 :   if (loop_vinfo)
    6167                 :             :     {
    6168                 :           0 :       loop = LOOP_VINFO_LOOP (loop_vinfo);
    6169                 :           0 :       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
    6170                 :             :     }
    6171                 :             : 
    6172                 :           0 :   gcc_assert (alignment_support_scheme == dr_explicit_realign
    6173                 :             :               || alignment_support_scheme == dr_explicit_realign_optimized);
    6174                 :             : 
    6175                 :             :   /* We need to generate three things:
    6176                 :             :      1. the misalignment computation
    6177                 :             :      2. the extra vector load (for the optimized realignment scheme).
    6178                 :             :      3. the phi node for the two vectors from which the realignment is
    6179                 :             :       done (for the optimized realignment scheme).  */
    6180                 :             : 
    6181                 :             :   /* 1. Determine where to generate the misalignment computation.
    6182                 :             : 
    6183                 :             :      If INIT_ADDR is NULL_TREE, this indicates that the misalignment
    6184                 :             :      calculation will be generated by this function, outside the loop (in the
    6185                 :             :      preheader).  Otherwise, INIT_ADDR had already been computed for us by the
    6186                 :             :      caller, inside the loop.
    6187                 :             : 
    6188                 :             :      Background: If the misalignment remains fixed throughout the iterations of
    6189                 :             :      the loop, then both realignment schemes are applicable, and also the
    6190                 :             :      misalignment computation can be done outside LOOP.  This is because we are
    6191                 :             :      vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
    6192                 :             :      are a multiple of VS (the Vector Size), and therefore the misalignment in
    6193                 :             :      different vectorized LOOP iterations is always the same.
    6194                 :             :      The problem arises only if the memory access is in an inner-loop nested
    6195                 :             :      inside LOOP, which is now being vectorized using outer-loop vectorization.
    6196                 :             :      This is the only case when the misalignment of the memory access may not
    6197                 :             :      remain fixed throughout the iterations of the inner-loop (as explained in
    6198                 :             :      detail in vect_supportable_dr_alignment).  In this case, not only is the
    6199                 :             :      optimized realignment scheme not applicable, but also the misalignment
    6200                 :             :      computation (and generation of the realignment token that is passed to
    6201                 :             :      REALIGN_LOAD) have to be done inside the loop.
    6202                 :             : 
    6203                 :             :      In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
    6204                 :             :      or not, which in turn determines if the misalignment is computed inside
    6205                 :             :      the inner-loop, or outside LOOP.  */
    6206                 :             : 
    6207                 :           0 :   if (init_addr != NULL_TREE || !loop_vinfo)
    6208                 :             :     {
    6209                 :           0 :       compute_in_loop = true;
    6210                 :           0 :       gcc_assert (alignment_support_scheme == dr_explicit_realign);
    6211                 :             :     }
    6212                 :             : 
    6213                 :             : 
    6214                 :             :   /* 2. Determine where to generate the extra vector load.
    6215                 :             : 
    6216                 :             :      For the optimized realignment scheme, instead of generating two vector
    6217                 :             :      loads in each iteration, we generate a single extra vector load in the
    6218                 :             :      preheader of the loop, and in each iteration reuse the result of the
    6219                 :             :      vector load from the previous iteration.  In case the memory access is in
    6220                 :             :      an inner-loop nested inside LOOP, which is now being vectorized using
    6221                 :             :      outer-loop vectorization, we need to determine whether this initial vector
    6222                 :             :      load should be generated at the preheader of the inner-loop, or can be
    6223                 :             :      generated at the preheader of LOOP.  If the memory access has no evolution
    6224                 :             :      in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
    6225                 :             :      to be generated inside LOOP (in the preheader of the inner-loop).  */
    6226                 :             : 
    6227                 :           0 :   if (nested_in_vect_loop)
    6228                 :             :     {
    6229                 :           0 :       tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
    6230                 :           0 :       bool invariant_in_outerloop =
    6231                 :           0 :             (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
    6232                 :           0 :       loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
    6233                 :             :     }
    6234                 :             :   else
    6235                 :             :     loop_for_initial_load = loop;
    6236                 :           0 :   if (at_loop)
    6237                 :           0 :     *at_loop = loop_for_initial_load;
    6238                 :             : 
    6239                 :           0 :   tree vuse = NULL_TREE;
    6240                 :           0 :   if (loop_for_initial_load)
    6241                 :             :     {
    6242                 :           0 :       pe = loop_preheader_edge (loop_for_initial_load);
    6243                 :           0 :       if (gphi *vphi = get_virtual_phi (loop_for_initial_load->header))
    6244                 :           0 :         vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
    6245                 :             :     }
    6246                 :           0 :   if (!vuse)
    6247                 :           0 :     vuse = gimple_vuse (gsi_stmt (*gsi));
    6248                 :             : 
    6249                 :             :   /* 3. For the case of the optimized realignment, create the first vector
    6250                 :             :       load at the loop preheader.  */
    6251                 :             : 
    6252                 :           0 :   if (alignment_support_scheme == dr_explicit_realign_optimized)
    6253                 :             :     {
    6254                 :             :       /* Create msq_init = *(floor(p1)) in the loop preheader  */
    6255                 :           0 :       gassign *new_stmt;
    6256                 :             : 
    6257                 :           0 :       gcc_assert (!compute_in_loop);
    6258                 :           0 :       vec_dest = vect_create_destination_var (scalar_dest, vectype);
    6259                 :           0 :       ptr = vect_create_data_ref_ptr (vinfo, stmt_info, vectype,
    6260                 :             :                                       loop_for_initial_load, NULL_TREE,
    6261                 :             :                                       &init_addr, NULL, &inc, true);
    6262                 :           0 :       if (TREE_CODE (ptr) == SSA_NAME)
    6263                 :           0 :         new_temp = copy_ssa_name (ptr);
    6264                 :             :       else
    6265                 :           0 :         new_temp = make_ssa_name (TREE_TYPE (ptr));
    6266                 :           0 :       poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
    6267                 :           0 :       tree type = TREE_TYPE (ptr);
    6268                 :           0 :       new_stmt = gimple_build_assign
    6269                 :           0 :                    (new_temp, BIT_AND_EXPR, ptr,
    6270                 :           0 :                     fold_build2 (MINUS_EXPR, type,
    6271                 :             :                                  build_int_cst (type, 0),
    6272                 :             :                                  build_int_cst (type, align)));
    6273                 :           0 :       new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
    6274                 :           0 :       gcc_assert (!new_bb);
    6275                 :           0 :       data_ref
    6276                 :           0 :         = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
    6277                 :           0 :                   build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
    6278                 :           0 :       vect_copy_ref_info (data_ref, DR_REF (dr));
    6279                 :           0 :       new_stmt = gimple_build_assign (vec_dest, data_ref);
    6280                 :           0 :       new_temp = make_ssa_name (vec_dest, new_stmt);
    6281                 :           0 :       gimple_assign_set_lhs (new_stmt, new_temp);
    6282                 :           0 :       gimple_set_vuse (new_stmt, vuse);
    6283                 :           0 :       if (pe)
    6284                 :             :         {
    6285                 :           0 :           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
    6286                 :           0 :           gcc_assert (!new_bb);
    6287                 :             :         }
    6288                 :             :       else
    6289                 :           0 :          gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
    6290                 :             : 
    6291                 :           0 :       msq_init = gimple_assign_lhs (new_stmt);
    6292                 :             :     }
    6293                 :             : 
    6294                 :             :   /* 4. Create realignment token using a target builtin, if available.
    6295                 :             :       It is done either inside the containing loop, or before LOOP (as
    6296                 :             :       determined above).  */
    6297                 :             : 
    6298                 :           0 :   if (targetm.vectorize.builtin_mask_for_load)
    6299                 :             :     {
    6300                 :           0 :       gcall *new_stmt;
    6301                 :           0 :       tree builtin_decl;
    6302                 :             : 
    6303                 :             :       /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
    6304                 :           0 :       if (!init_addr)
    6305                 :             :         {
    6306                 :             :           /* Generate the INIT_ADDR computation outside LOOP.  */
    6307                 :           0 :           init_addr = vect_create_addr_base_for_vector_ref (vinfo,
    6308                 :             :                                                             stmt_info, &stmts,
    6309                 :             :                                                             NULL_TREE);
    6310                 :           0 :           if (loop)
    6311                 :             :             {
    6312                 :           0 :               pe = loop_preheader_edge (loop);
    6313                 :           0 :               new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
    6314                 :           0 :               gcc_assert (!new_bb);
    6315                 :             :             }
    6316                 :             :           else
    6317                 :           0 :              gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
    6318                 :             :         }
    6319                 :             : 
    6320                 :           0 :       builtin_decl = targetm.vectorize.builtin_mask_for_load ();
    6321                 :           0 :       new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
    6322                 :           0 :       vec_dest =
    6323                 :           0 :         vect_create_destination_var (scalar_dest,
    6324                 :             :                                      gimple_call_return_type (new_stmt));
    6325                 :           0 :       new_temp = make_ssa_name (vec_dest, new_stmt);
    6326                 :           0 :       gimple_call_set_lhs (new_stmt, new_temp);
    6327                 :             : 
    6328                 :           0 :       if (compute_in_loop)
    6329                 :           0 :         gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
    6330                 :             :       else
    6331                 :             :         {
    6332                 :             :           /* Generate the misalignment computation outside LOOP.  */
    6333                 :           0 :           pe = loop_preheader_edge (loop);
    6334                 :           0 :           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
    6335                 :           0 :           gcc_assert (!new_bb);
    6336                 :             :         }
    6337                 :             : 
    6338                 :           0 :       *realignment_token = gimple_call_lhs (new_stmt);
    6339                 :             : 
    6340                 :             :       /* The result of the CALL_EXPR to this builtin is determined from
    6341                 :             :          the value of the parameter and no global variables are touched
    6342                 :             :          which makes the builtin a "const" function.  Requiring the
    6343                 :             :          builtin to have the "const" attribute makes it unnecessary
    6344                 :             :          to call mark_call_clobbered.  */
    6345                 :           0 :       gcc_assert (TREE_READONLY (builtin_decl));
    6346                 :             :     }
    6347                 :             : 
    6348                 :           0 :   if (alignment_support_scheme == dr_explicit_realign)
    6349                 :             :     return msq;
    6350                 :             : 
    6351                 :           0 :   gcc_assert (!compute_in_loop);
    6352                 :           0 :   gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
    6353                 :             : 
    6354                 :             : 
    6355                 :             :   /* 5. Create msq = phi <msq_init, lsq> in loop  */
    6356                 :             : 
    6357                 :           0 :   pe = loop_preheader_edge (containing_loop);
    6358                 :           0 :   vec_dest = vect_create_destination_var (scalar_dest, vectype);
    6359                 :           0 :   msq = make_ssa_name (vec_dest);
    6360                 :           0 :   phi_stmt = create_phi_node (msq, containing_loop->header);
    6361                 :           0 :   add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
    6362                 :             : 
    6363                 :           0 :   return msq;
    6364                 :             : }
    6365                 :             : 
    6366                 :             : 
    6367                 :             : /* Function vect_grouped_load_supported.
    6368                 :             : 
    6369                 :             :    COUNT is the size of the load group (the number of statements plus the
    6370                 :             :    number of gaps).  SINGLE_ELEMENT_P is true if there is actually
    6371                 :             :    only one statement, with a gap of COUNT - 1.
    6372                 :             : 
    6373                 :             :    Returns true if a suitable permute exists.  */
    6374                 :             : 
    6375                 :             : bool
    6376                 :        3243 : vect_grouped_load_supported (tree vectype, bool single_element_p,
    6377                 :             :                              unsigned HOST_WIDE_INT count)
    6378                 :             : {
    6379                 :        3243 :   machine_mode mode = TYPE_MODE (vectype);
    6380                 :             : 
    6381                 :             :   /* If this is single-element interleaving with an element distance
    6382                 :             :      that leaves unused vector loads around punt - we at least create
    6383                 :             :      very sub-optimal code in that case (and blow up memory,
    6384                 :             :      see PR65518).  */
    6385                 :        3243 :   if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
    6386                 :             :     {
    6387                 :          69 :       if (dump_enabled_p ())
    6388                 :          13 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6389                 :             :                          "single-element interleaving not supported "
    6390                 :             :                          "for not adjacent vector loads\n");
    6391                 :          69 :       return false;
    6392                 :             :     }
    6393                 :             : 
    6394                 :             :   /* vect_permute_load_chain requires the group size to be equal to 3 or
    6395                 :             :      be a power of two.  */
    6396                 :        3174 :   if (count != 3 && exact_log2 (count) == -1)
    6397                 :             :     {
    6398                 :         441 :       if (dump_enabled_p ())
    6399                 :          14 :         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6400                 :             :                          "the size of the group of accesses"
    6401                 :             :                          " is not a power of 2 or not equal to 3\n");
    6402                 :         441 :       return false;
    6403                 :             :     }
    6404                 :             : 
    6405                 :             :   /* Check that the permutation is supported.  */
    6406                 :        2733 :   if (VECTOR_MODE_P (mode))
    6407                 :             :     {
    6408                 :        2733 :       unsigned int i, j;
    6409                 :        2733 :       if (count == 3)
    6410                 :             :         {
    6411                 :        1392 :           unsigned int nelt;
    6412                 :        2784 :           if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
    6413                 :             :             {
    6414                 :             :               if (dump_enabled_p ())
    6415                 :             :                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6416                 :             :                                  "cannot handle groups of 3 loads for"
    6417                 :             :                                  " variable-length vectors\n");
    6418                 :             :               return false;
    6419                 :             :             }
    6420                 :             : 
    6421                 :        1392 :           vec_perm_builder sel (nelt, nelt, 1);
    6422                 :        1392 :           sel.quick_grow (nelt);
    6423                 :        1392 :           vec_perm_indices indices;
    6424                 :        1392 :           unsigned int k;
    6425                 :        5532 :           for (k = 0; k < 3; k++)
    6426                 :             :             {
    6427                 :       14397 :               for (i = 0; i < nelt; i++)
    6428                 :       10245 :                 if (3 * i + k < 2 * nelt)
    6429                 :        6835 :                   sel[i] = 3 * i + k;
    6430                 :             :                 else
    6431                 :        3410 :                   sel[i] = 0;
    6432                 :        4152 :               indices.new_vector (sel, 2, nelt);
    6433                 :        4152 :               if (!can_vec_perm_const_p (mode, mode, indices))
    6434                 :             :                 {
    6435                 :          12 :                   if (dump_enabled_p ())
    6436                 :           3 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6437                 :             :                                      "shuffle of 3 loads is not supported by"
    6438                 :             :                                      " target\n");
    6439                 :          12 :                   return false;
    6440                 :             :                 }
    6441                 :       14268 :               for (i = 0, j = 0; i < nelt; i++)
    6442                 :       10128 :                 if (3 * i + k < 2 * nelt)
    6443                 :        6752 :                   sel[i] = i;
    6444                 :             :                 else
    6445                 :        3376 :                   sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
    6446                 :        4140 :               indices.new_vector (sel, 2, nelt);
    6447                 :        4140 :               if (!can_vec_perm_const_p (mode, mode, indices))
    6448                 :             :                 {
    6449                 :           0 :                   if (dump_enabled_p ())
    6450                 :           0 :                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6451                 :             :                                      "shuffle of 3 loads is not supported by"
    6452                 :             :                                      " target\n");
    6453                 :           0 :                   return false;
    6454                 :             :                 }
    6455                 :             :             }
    6456                 :             :           return true;
    6457                 :        1392 :         }
    6458                 :             :       else
    6459                 :             :         {
    6460                 :             :           /* If length is not equal to 3 then only power of 2 is supported.  */
    6461                 :        1341 :           gcc_assert (pow2p_hwi (count));
    6462                 :        2682 :           poly_uint64 nelt = GET_MODE_NUNITS (mode);
    6463                 :             : 
    6464                 :             :           /* The encoding has a single stepped pattern.  */
    6465                 :        1341 :           vec_perm_builder sel (nelt, 1, 3);
    6466                 :        1341 :           sel.quick_grow (3);
    6467                 :        6705 :           for (i = 0; i < 3; i++)
    6468                 :        4023 :             sel[i] = i * 2;
    6469                 :        1341 :           vec_perm_indices indices (sel, 2, nelt);
    6470                 :        1341 :           if (can_vec_perm_const_p (mode, mode, indices))
    6471                 :             :             {
    6472                 :        5344 :               for (i = 0; i < 3; i++)
    6473                 :        4008 :                 sel[i] = i * 2 + 1;
    6474                 :        1336 :               indices.new_vector (sel, 2, nelt);
    6475                 :        1336 :               if (can_vec_perm_const_p (mode, mode, indices))
    6476                 :        1336 :                 return true;
    6477                 :             :             }
    6478                 :        1341 :         }
    6479                 :             :     }
    6480                 :             : 
    6481                 :           5 :   if (dump_enabled_p ())
    6482                 :           4 :     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6483                 :             :                      "extract even/odd not supported by target\n");
    6484                 :             :   return false;
    6485                 :             : }
    6486                 :             : 
    6487                 :             : /* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT vectors
    6488                 :             :    of type VECTYPE.  MASKED_P says whether the masked form is needed.
    6489                 :             :    If it is available and ELSVALS is nonzero store the possible else values
    6490                 :             :    in the vector it points to.  */
    6491                 :             : 
    6492                 :             : internal_fn
    6493                 :      114224 : vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
    6494                 :             :                            bool masked_p, vec<int> *elsvals)
    6495                 :             : {
    6496                 :      114224 :   if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
    6497                 :             :                                     vec_mask_len_load_lanes_optab, vectype,
    6498                 :             :                                     count, elsvals))
    6499                 :             :     return IFN_MASK_LEN_LOAD_LANES;
    6500                 :      114224 :   else if (masked_p)
    6501                 :             :     {
    6502                 :           0 :       if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
    6503                 :             :                                         vec_mask_load_lanes_optab, vectype,
    6504                 :             :                                         count, elsvals))
    6505                 :             :         return IFN_MASK_LOAD_LANES;
    6506                 :             :     }
    6507                 :             :   else
    6508                 :             :     {
    6509                 :      114224 :       if (vect_lanes_optab_supported_p ("vec_load_lanes", vec_load_lanes_optab,
    6510                 :             :                                         vectype, count, elsvals))
    6511                 :             :         return IFN_LOAD_LANES;
    6512                 :             :     }
    6513                 :             :   return IFN_LAST;
    6514                 :             : }
    6515                 :             : 
    6516                 :             : /* Function vect_permute_load_chain.
    6517                 :             : 
    6518                 :             :    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
    6519                 :             :    a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
    6520                 :             :    the input data correctly.  Return the final references for loads in
    6521                 :             :    RESULT_CHAIN.
    6522                 :             : 
    6523                 :             :    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
    6524                 :             :    The input is 4 vectors each containing 8 elements. We assign a number to each
    6525                 :             :    element, the input sequence is:
    6526                 :             : 
    6527                 :             :    1st vec:   0  1  2  3  4  5  6  7
    6528                 :             :    2nd vec:   8  9 10 11 12 13 14 15
    6529                 :             :    3rd vec:  16 17 18 19 20 21 22 23
    6530                 :             :    4th vec:  24 25 26 27 28 29 30 31
    6531                 :             : 
    6532                 :             :    The output sequence should be:
    6533                 :             : 
    6534                 :             :    1st vec:  0 4  8 12 16 20 24 28
    6535                 :             :    2nd vec:  1 5  9 13 17 21 25 29
    6536                 :             :    3rd vec:  2 6 10 14 18 22 26 30
    6537                 :             :    4th vec:  3 7 11 15 19 23 27 31
    6538                 :             : 
    6539                 :             :    i.e., the first output vector should contain the first elements of each
    6540                 :             :    interleaving group, etc.
    6541                 :             : 
    6542                 :             :    We use extract_even/odd instructions to create such output.  The input of
    6543                 :             :    each extract_even/odd operation is two vectors
    6544                 :             :    1st vec    2nd vec
    6545                 :             :    0 1 2 3    4 5 6 7
    6546                 :             : 
    6547                 :             :    and the output is the vector of extracted even/odd elements.  The output of
    6548                 :             :    extract_even will be:   0 2 4 6
    6549                 :             :    and of extract_odd:     1 3 5 7
    6550                 :             : 
    6551                 :             : 
    6552                 :             :    The permutation is done in log LENGTH stages.  In each stage extract_even
    6553                 :             :    and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
    6554                 :             :    their order.  In our example,
    6555                 :             : 
    6556                 :             :    E1: extract_even (1st vec, 2nd vec)
    6557                 :             :    E2: extract_odd (1st vec, 2nd vec)
    6558                 :             :    E3: extract_even (3rd vec, 4th vec)
    6559                 :             :    E4: extract_odd (3rd vec, 4th vec)
    6560                 :             : 
    6561                 :             :    The output for the first stage will be:
    6562                 :             : 
    6563                 :             :    E1:  0  2  4  6  8 10 12 14
    6564                 :             :    E2:  1  3  5  7  9 11 13 15
    6565                 :             :    E3: 16 18 20 22 24 26 28 30
    6566                 :             :    E4: 17 19 21 23 25 27 29 31
    6567                 :             : 
    6568                 :             :    In order to proceed and create the correct sequence for the next stage (or
    6569                 :             :    for the correct output, if the second stage is the last one, as in our
    6570                 :             :    example), we first put the output of extract_even operation and then the
    6571                 :             :    output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
    6572                 :             :    The input for the second stage is:
    6573                 :             : 
    6574                 :             :    1st vec (E1):  0  2  4  6  8 10 12 14
    6575                 :             :    2nd vec (E3): 16 18 20 22 24 26 28 30
    6576                 :             :    3rd vec (E2):  1  3  5  7  9 11 13 15
    6577                 :             :    4th vec (E4): 17 19 21 23 25 27 29 31
    6578                 :             : 
    6579                 :             :    The output of the second stage:
    6580                 :             : 
    6581                 :             :    E1: 0 4  8 12 16 20 24 28
    6582                 :             :    E2: 2 6 10 14 18 22 26 30
    6583                 :             :    E3: 1 5  9 13 17 21 25 29
    6584                 :             :    E4: 3 7 11 15 19 23 27 31
    6585                 :             : 
    6586                 :             :    And RESULT_CHAIN after reordering:
    6587                 :             : 
    6588                 :             :    1st vec (E1):  0 4  8 12 16 20 24 28
    6589                 :             :    2nd vec (E3):  1 5  9 13 17 21 25 29
    6590                 :             :    3rd vec (E2):  2 6 10 14 18 22 26 30
    6591                 :             :    4th vec (E4):  3 7 11 15 19 23 27 31.  */
    6592                 :             : 
    6593                 :             : static void
    6594                 :           0 : vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
    6595                 :             :                          unsigned int length,
    6596                 :             :                          stmt_vec_info stmt_info,
    6597                 :             :                          gimple_stmt_iterator *gsi,
    6598                 :             :                          vec<tree> *result_chain)
    6599                 :             : {
    6600                 :           0 :   tree data_ref, first_vect, second_vect;
    6601                 :           0 :   tree perm_mask_even, perm_mask_odd;
    6602                 :           0 :   tree perm3_mask_low, perm3_mask_high;
    6603                 :           0 :   gimple *perm_stmt;
    6604                 :           0 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    6605                 :           0 :   unsigned int i, j, log_length = exact_log2 (length);
    6606                 :             : 
    6607                 :           0 :   result_chain->quick_grow (length);
    6608                 :           0 :   memcpy (result_chain->address (), dr_chain.address (),
    6609                 :             :           length * sizeof (tree));
    6610                 :             : 
    6611                 :           0 :   if (length == 3)
    6612                 :             :     {
    6613                 :             :       /* vect_grouped_load_supported ensures that this is constant.  */
    6614                 :           0 :       unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
    6615                 :           0 :       unsigned int k;
    6616                 :             : 
    6617                 :           0 :       vec_perm_builder sel (nelt, nelt, 1);
    6618                 :           0 :       sel.quick_grow (nelt);
    6619                 :           0 :       vec_perm_indices indices;
    6620                 :           0 :       for (k = 0; k < 3; k++)
    6621                 :             :         {
    6622                 :           0 :           for (i = 0; i < nelt; i++)
    6623                 :           0 :             if (3 * i + k < 2 * nelt)
    6624                 :           0 :               sel[i] = 3 * i + k;
    6625                 :             :             else
    6626                 :           0 :               sel[i] = 0;
    6627                 :           0 :           indices.new_vector (sel, 2, nelt);
    6628                 :           0 :           perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
    6629                 :             : 
    6630                 :           0 :           for (i = 0, j = 0; i < nelt; i++)
    6631                 :           0 :             if (3 * i + k < 2 * nelt)
    6632                 :           0 :               sel[i] = i;
    6633                 :             :             else
    6634                 :           0 :               sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
    6635                 :           0 :           indices.new_vector (sel, 2, nelt);
    6636                 :           0 :           perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
    6637                 :             : 
    6638                 :           0 :           first_vect = dr_chain[0];
    6639                 :           0 :           second_vect = dr_chain[1];
    6640                 :             : 
    6641                 :             :           /* Create interleaving stmt (low part of):
    6642                 :             :              low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
    6643                 :             :                                                              ...}>  */
    6644                 :           0 :           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
    6645                 :           0 :           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
    6646                 :             :                                            second_vect, perm3_mask_low);
    6647                 :           0 :           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6648                 :             : 
    6649                 :             :           /* Create interleaving stmt (high part of):
    6650                 :             :              high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
    6651                 :             :                                                               ...}>  */
    6652                 :           0 :           first_vect = data_ref;
    6653                 :           0 :           second_vect = dr_chain[2];
    6654                 :           0 :           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
    6655                 :           0 :           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
    6656                 :             :                                            second_vect, perm3_mask_high);
    6657                 :           0 :           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6658                 :           0 :           (*result_chain)[k] = data_ref;
    6659                 :             :         }
    6660                 :           0 :     }
    6661                 :             :   else
    6662                 :             :     {
    6663                 :             :       /* If length is not equal to 3 then only power of 2 is supported.  */
    6664                 :           0 :       gcc_assert (pow2p_hwi (length));
    6665                 :             : 
    6666                 :             :       /* The encoding has a single stepped pattern.  */
    6667                 :           0 :       poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
    6668                 :           0 :       vec_perm_builder sel (nelt, 1, 3);
    6669                 :           0 :       sel.quick_grow (3);
    6670                 :           0 :       for (i = 0; i < 3; ++i)
    6671                 :           0 :         sel[i] = i * 2;
    6672                 :           0 :       vec_perm_indices indices (sel, 2, nelt);
    6673                 :           0 :       perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
    6674                 :             : 
    6675                 :           0 :       for (i = 0; i < 3; ++i)
    6676                 :           0 :         sel[i] = i * 2 + 1;
    6677                 :           0 :       indices.new_vector (sel, 2, nelt);
    6678                 :           0 :       perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
    6679                 :             : 
    6680                 :           0 :       for (i = 0; i < log_length; i++)
    6681                 :             :         {
    6682                 :           0 :           for (j = 0; j < length; j += 2)
    6683                 :             :             {
    6684                 :           0 :               first_vect = dr_chain[j];
    6685                 :           0 :               second_vect = dr_chain[j+1];
    6686                 :             : 
    6687                 :             :               /* data_ref = permute_even (first_data_ref, second_data_ref);  */
    6688                 :           0 :               data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
    6689                 :           0 :               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
    6690                 :             :                                                first_vect, second_vect,
    6691                 :             :                                                perm_mask_even);
    6692                 :           0 :               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6693                 :           0 :               (*result_chain)[j/2] = data_ref;
    6694                 :             : 
    6695                 :             :               /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
    6696                 :           0 :               data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
    6697                 :           0 :               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
    6698                 :             :                                                first_vect, second_vect,
    6699                 :             :                                                perm_mask_odd);
    6700                 :           0 :               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6701                 :           0 :               (*result_chain)[j/2+length/2] = data_ref;
    6702                 :             :             }
    6703                 :           0 :           memcpy (dr_chain.address (), result_chain->address (),
    6704                 :             :                   length * sizeof (tree));
    6705                 :             :         }
    6706                 :           0 :     }
    6707                 :           0 : }
    6708                 :             : 
    6709                 :             : /* Function vect_shift_permute_load_chain.
    6710                 :             : 
    6711                 :             :    Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
    6712                 :             :    sequence of stmts to reorder the input data accordingly.
    6713                 :             :    Return the final references for loads in RESULT_CHAIN.
    6714                 :             :    Return true if successed, false otherwise.
    6715                 :             : 
    6716                 :             :    E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
    6717                 :             :    The input is 3 vectors each containing 8 elements.  We assign a
    6718                 :             :    number to each element, the input sequence is:
    6719                 :             : 
    6720                 :             :    1st vec:   0  1  2  3  4  5  6  7
    6721                 :             :    2nd vec:   8  9 10 11 12 13 14 15
    6722                 :             :    3rd vec:  16 17 18 19 20 21 22 23
    6723                 :             : 
    6724                 :             :    The output sequence should be:
    6725                 :             : 
    6726                 :             :    1st vec:  0 3 6  9 12 15 18 21
    6727                 :             :    2nd vec:  1 4 7 10 13 16 19 22
    6728                 :             :    3rd vec:  2 5 8 11 14 17 20 23
    6729                 :             : 
    6730                 :             :    We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
    6731                 :             : 
    6732                 :             :    First we shuffle all 3 vectors to get correct elements order:
    6733                 :             : 
    6734                 :             :    1st vec:  ( 0  3  6) ( 1  4  7) ( 2  5)
    6735                 :             :    2nd vec:  ( 8 11 14) ( 9 12 15) (10 13)
    6736                 :             :    3rd vec:  (16 19 22) (17 20 23) (18 21)
    6737                 :             : 
    6738                 :             :    Next we unite and shift vector 3 times:
    6739                 :             : 
    6740                 :             :    1st step:
    6741                 :             :      shift right by 6 the concatenation of:
    6742                 :             :      "1st vec" and  "2nd vec"
    6743                 :             :        ( 0  3  6) ( 1  4  7) |( 2  5) _ ( 8 11 14) ( 9 12 15)| (10 13)
    6744                 :             :      "2nd vec" and  "3rd vec"
    6745                 :             :        ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
    6746                 :             :      "3rd vec" and  "1st vec"
    6747                 :             :        (16 19 22) (17 20 23) |(18 21) _ ( 0  3  6) ( 1  4  7)| ( 2  5)
    6748                 :             :                              | New vectors                   |
    6749                 :             : 
    6750                 :             :      So that now new vectors are:
    6751                 :             : 
    6752                 :             :      1st vec:  ( 2  5) ( 8 11 14) ( 9 12 15)
    6753                 :             :      2nd vec:  (10 13) (16 19 22) (17 20 23)
    6754                 :             :      3rd vec:  (18 21) ( 0  3  6) ( 1  4  7)
    6755                 :             : 
    6756                 :             :    2nd step:
    6757                 :             :      shift right by 5 the concatenation of:
    6758                 :             :      "1st vec" and  "3rd vec"
    6759                 :             :        ( 2  5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0  3  6)| ( 1  4  7)
    6760                 :             :      "2nd vec" and  "1st vec"
    6761                 :             :        (10 13) (16 19 22) |(17 20 23) _ ( 2  5) ( 8 11 14)| ( 9 12 15)
    6762                 :             :      "3rd vec" and  "2nd vec"
    6763                 :             :        (18 21) ( 0  3  6) |( 1  4  7) _ (10 13) (16 19 22)| (17 20 23)
    6764                 :             :                           | New vectors                   |
    6765                 :             : 
    6766                 :             :      So that now new vectors are:
    6767                 :             : 
    6768                 :             :      1st vec:  ( 9 12 15) (18 21) ( 0  3  6)
    6769                 :             :      2nd vec:  (17 20 23) ( 2  5) ( 8 11 14)
    6770                 :             :      3rd vec:  ( 1  4  7) (10 13) (16 19 22) READY
    6771                 :             : 
    6772                 :             :    3rd step:
    6773                 :             :      shift right by 5 the concatenation of:
    6774                 :             :      "1st vec" and  "1st vec"
    6775                 :             :        ( 9 12 15) (18 21) |( 0  3  6) _ ( 9 12 15) (18 21)| ( 0  3  6)
    6776                 :             :      shift right by 3 the concatenation of:
    6777                 :             :      "2nd vec" and  "2nd vec"
    6778                 :             :                (17 20 23) |( 2  5) ( 8 11 14) _ (17 20 23)| ( 2  5) ( 8 11 14)
    6779                 :             :                           | New vectors                   |
    6780                 :             : 
    6781                 :             :      So that now all vectors are READY:
    6782                 :             :      1st vec:  ( 0  3  6) ( 9 12 15) (18 21)
    6783                 :             :      2nd vec:  ( 2  5) ( 8 11 14) (17 20 23)
    6784                 :             :      3rd vec:  ( 1  4  7) (10 13) (16 19 22)
    6785                 :             : 
    6786                 :             :    This algorithm is faster than one in vect_permute_load_chain if:
    6787                 :             :      1.  "shift of a concatination" is faster than general permutation.
    6788                 :             :          This is usually so.
    6789                 :             :      2.  The TARGET machine can't execute vector instructions in parallel.
    6790                 :             :          This is because each step of the algorithm depends on previous.
    6791                 :             :          The algorithm in vect_permute_load_chain is much more parallel.
    6792                 :             : 
    6793                 :             :    The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
    6794                 :             : */
    6795                 :             : 
    6796                 :             : static bool
    6797                 :           0 : vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
    6798                 :             :                                unsigned int length,
    6799                 :             :                                stmt_vec_info stmt_info,
    6800                 :             :                                gimple_stmt_iterator *gsi,
    6801                 :             :                                vec<tree> *result_chain)
    6802                 :             : {
    6803                 :           0 :   tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
    6804                 :           0 :   tree perm2_mask1, perm2_mask2, perm3_mask;
    6805                 :           0 :   tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
    6806                 :           0 :   gimple *perm_stmt;
    6807                 :             : 
    6808                 :           0 :   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
    6809                 :           0 :   machine_mode vmode = TYPE_MODE (vectype);
    6810                 :           0 :   unsigned int i;
    6811                 :           0 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    6812                 :             : 
    6813                 :           0 :   unsigned HOST_WIDE_INT nelt, vf;
    6814                 :           0 :   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
    6815                 :           0 :       || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
    6816                 :             :     /* Not supported for variable-length vectors.  */
    6817                 :             :     return false;
    6818                 :             : 
    6819                 :           0 :   vec_perm_builder sel (nelt, nelt, 1);
    6820                 :           0 :   sel.quick_grow (nelt);
    6821                 :             : 
    6822                 :           0 :   result_chain->quick_grow (length);
    6823                 :           0 :   memcpy (result_chain->address (), dr_chain.address (),
    6824                 :           0 :           length * sizeof (tree));
    6825                 :             : 
    6826                 :           0 :   if (pow2p_hwi (length) && vf > 4)
    6827                 :             :     {
    6828                 :           0 :       unsigned int j, log_length = exact_log2 (length);
    6829                 :           0 :       for (i = 0; i < nelt / 2; ++i)
    6830                 :           0 :         sel[i] = i * 2;
    6831                 :           0 :       for (i = 0; i < nelt / 2; ++i)
    6832                 :           0 :         sel[nelt / 2 + i] = i * 2 + 1;
    6833                 :           0 :       vec_perm_indices indices (sel, 2, nelt);
    6834                 :           0 :       if (!can_vec_perm_const_p (vmode, vmode, indices))
    6835                 :             :         {
    6836                 :           0 :           if (dump_enabled_p ())
    6837                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6838                 :             :                              "shuffle of 2 fields structure is not \
    6839                 :             :                               supported by target\n");
    6840                 :           0 :           return false;
    6841                 :             :         }
    6842                 :           0 :       perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
    6843                 :             : 
    6844                 :           0 :       for (i = 0; i < nelt / 2; ++i)
    6845                 :           0 :         sel[i] = i * 2 + 1;
    6846                 :           0 :       for (i = 0; i < nelt / 2; ++i)
    6847                 :           0 :         sel[nelt / 2 + i] = i * 2;
    6848                 :           0 :       indices.new_vector (sel, 2, nelt);
    6849                 :           0 :       if (!can_vec_perm_const_p (vmode, vmode, indices))
    6850                 :             :         {
    6851                 :           0 :           if (dump_enabled_p ())
    6852                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6853                 :             :                              "shuffle of 2 fields structure is not \
    6854                 :             :                               supported by target\n");
    6855                 :           0 :           return false;
    6856                 :             :         }
    6857                 :           0 :       perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
    6858                 :             : 
    6859                 :             :       /* Generating permutation constant to shift all elements.
    6860                 :             :          For vector length 8 it is {4 5 6 7 8 9 10 11}.  */
    6861                 :           0 :       for (i = 0; i < nelt; i++)
    6862                 :           0 :         sel[i] = nelt / 2 + i;
    6863                 :           0 :       indices.new_vector (sel, 2, nelt);
    6864                 :           0 :       if (!can_vec_perm_const_p (vmode, vmode, indices))
    6865                 :             :         {
    6866                 :           0 :           if (dump_enabled_p ())
    6867                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6868                 :             :                              "shift permutation is not supported by target\n");
    6869                 :           0 :           return false;
    6870                 :             :         }
    6871                 :           0 :       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
    6872                 :             : 
    6873                 :             :       /* Generating permutation constant to select vector from 2.
    6874                 :             :          For vector length 8 it is {0 1 2 3 12 13 14 15}.  */
    6875                 :           0 :       for (i = 0; i < nelt / 2; i++)
    6876                 :           0 :         sel[i] = i;
    6877                 :           0 :       for (i = nelt / 2; i < nelt; i++)
    6878                 :           0 :         sel[i] = nelt + i;
    6879                 :           0 :       indices.new_vector (sel, 2, nelt);
    6880                 :           0 :       if (!can_vec_perm_const_p (vmode, vmode, indices))
    6881                 :             :         {
    6882                 :           0 :           if (dump_enabled_p ())
    6883                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6884                 :             :                              "select is not supported by target\n");
    6885                 :           0 :           return false;
    6886                 :             :         }
    6887                 :           0 :       select_mask = vect_gen_perm_mask_checked (vectype, indices);
    6888                 :             : 
    6889                 :           0 :       for (i = 0; i < log_length; i++)
    6890                 :             :         {
    6891                 :           0 :           for (j = 0; j < length; j += 2)
    6892                 :             :             {
    6893                 :           0 :               first_vect = dr_chain[j];
    6894                 :           0 :               second_vect = dr_chain[j + 1];
    6895                 :             : 
    6896                 :           0 :               data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
    6897                 :           0 :               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
    6898                 :             :                                                first_vect, first_vect,
    6899                 :             :                                                perm2_mask1);
    6900                 :           0 :               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6901                 :           0 :               vect[0] = data_ref;
    6902                 :             : 
    6903                 :           0 :               data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
    6904                 :           0 :               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
    6905                 :             :                                                second_vect, second_vect,
    6906                 :             :                                                perm2_mask2);
    6907                 :           0 :               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6908                 :           0 :               vect[1] = data_ref;
    6909                 :             : 
    6910                 :           0 :               data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
    6911                 :           0 :               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
    6912                 :             :                                                vect[0], vect[1], shift1_mask);
    6913                 :           0 :               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6914                 :           0 :               (*result_chain)[j/2 + length/2] = data_ref;
    6915                 :             : 
    6916                 :           0 :               data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
    6917                 :           0 :               perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
    6918                 :             :                                                vect[0], vect[1], select_mask);
    6919                 :           0 :               vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    6920                 :           0 :               (*result_chain)[j/2] = data_ref;
    6921                 :             :             }
    6922                 :           0 :           memcpy (dr_chain.address (), result_chain->address (),
    6923                 :             :                   length * sizeof (tree));
    6924                 :             :         }
    6925                 :             :       return true;
    6926                 :           0 :     }
    6927                 :           0 :   if (length == 3 && vf > 2)
    6928                 :             :     {
    6929                 :             :       unsigned int k = 0, l = 0;
    6930                 :             : 
    6931                 :             :       /* Generating permutation constant to get all elements in rigth order.
    6932                 :             :          For vector length 8 it is {0 3 6 1 4 7 2 5}.  */
    6933                 :           0 :       for (i = 0; i < nelt; i++)
    6934                 :             :         {
    6935                 :           0 :           if (3 * k + (l % 3) >= nelt)
    6936                 :             :             {
    6937                 :           0 :               k = 0;
    6938                 :           0 :               l += (3 - (nelt % 3));
    6939                 :             :             }
    6940                 :           0 :           sel[i] = 3 * k + (l % 3);
    6941                 :           0 :           k++;
    6942                 :             :         }
    6943                 :           0 :       vec_perm_indices indices (sel, 2, nelt);
    6944                 :           0 :       if (!can_vec_perm_const_p (vmode, vmode, indices))
    6945                 :             :         {
    6946                 :           0 :           if (dump_enabled_p ())
    6947                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6948                 :             :                              "shuffle of 3 fields structure is not \
    6949                 :             :                               supported by target\n");
    6950                 :           0 :           return false;
    6951                 :             :         }
    6952                 :           0 :       perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
    6953                 :             : 
    6954                 :             :       /* Generating permutation constant to shift all elements.
    6955                 :             :          For vector length 8 it is {6 7 8 9 10 11 12 13}.  */
    6956                 :           0 :       for (i = 0; i < nelt; i++)
    6957                 :           0 :         sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
    6958                 :           0 :       indices.new_vector (sel, 2, nelt);
    6959                 :           0 :       if (!can_vec_perm_const_p (vmode, vmode, indices))
    6960                 :             :         {
    6961                 :           0 :           if (dump_enabled_p ())
    6962                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6963                 :             :                              "shift permutation is not supported by target\n");
    6964                 :           0 :           return false;
    6965                 :             :         }
    6966                 :           0 :       shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
    6967                 :             : 
    6968                 :             :       /* Generating permutation constant to shift all elements.
    6969                 :             :          For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
    6970                 :           0 :       for (i = 0; i < nelt; i++)
    6971                 :           0 :         sel[i] = 2 * (nelt / 3) + 1 + i;
    6972                 :           0 :       indices.new_vector (sel, 2, nelt);
    6973                 :           0 :       if (!can_vec_perm_const_p (vmode, vmode, indices))
    6974                 :             :         {
    6975                 :           0 :           if (dump_enabled_p ())
    6976                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6977                 :             :                              "shift permutation is not supported by target\n");
    6978                 :           0 :           return false;
    6979                 :             :         }
    6980                 :           0 :       shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
    6981                 :             : 
    6982                 :             :       /* Generating permutation constant to shift all elements.
    6983                 :             :          For vector length 8 it is {3 4 5 6 7 8 9 10}.  */
    6984                 :           0 :       for (i = 0; i < nelt; i++)
    6985                 :           0 :         sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
    6986                 :           0 :       indices.new_vector (sel, 2, nelt);
    6987                 :           0 :       if (!can_vec_perm_const_p (vmode, vmode, indices))
    6988                 :             :         {
    6989                 :           0 :           if (dump_enabled_p ())
    6990                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    6991                 :             :                              "shift permutation is not supported by target\n");
    6992                 :           0 :           return false;
    6993                 :             :         }
    6994                 :           0 :       shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
    6995                 :             : 
    6996                 :             :       /* Generating permutation constant to shift all elements.
    6997                 :             :          For vector length 8 it is {5 6 7 8 9 10 11 12}.  */
    6998                 :           0 :       for (i = 0; i < nelt; i++)
    6999                 :           0 :         sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
    7000                 :           0 :       indices.new_vector (sel, 2, nelt);
    7001                 :           0 :       if (!can_vec_perm_const_p (vmode, vmode, indices))
    7002                 :             :         {
    7003                 :           0 :           if (dump_enabled_p ())
    7004                 :           0 :             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
    7005                 :             :                              "shift permutation is not supported by target\n");
    7006                 :           0 :           return false;
    7007                 :             :         }
    7008                 :           0 :       shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
    7009                 :             : 
    7010                 :           0 :       for (k = 0; k < 3; k++)
    7011                 :             :         {
    7012                 :           0 :           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
    7013                 :           0 :           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
    7014                 :           0 :                                            dr_chain[k], dr_chain[k],
    7015                 :             :                                            perm3_mask);
    7016                 :           0 :           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    7017                 :           0 :           vect[k] = data_ref;
    7018                 :             :         }
    7019                 :             : 
    7020                 :           0 :       for (k = 0; k < 3; k++)
    7021                 :             :         {
    7022                 :           0 :           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
    7023                 :           0 :           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
    7024                 :           0 :                                            vect[k % 3], vect[(k + 1) % 3],
    7025                 :             :                                            shift1_mask);
    7026                 :           0 :           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    7027                 :           0 :           vect_shift[k] = data_ref;
    7028                 :             :         }
    7029                 :             : 
    7030                 :           0 :       for (k = 0; k < 3; k++)
    7031                 :             :         {
    7032                 :           0 :           data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
    7033                 :           0 :           perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
    7034                 :           0 :                                            vect_shift[(4 - k) % 3],
    7035                 :           0 :                                            vect_shift[(3 - k) % 3],
    7036                 :             :                                            shift2_mask);
    7037                 :           0 :           vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    7038                 :           0 :           vect[k] = data_ref;
    7039                 :             :         }
    7040                 :             : 
    7041                 :           0 :       (*result_chain)[3 - (nelt % 3)] = vect[2];
    7042                 :             : 
    7043                 :           0 :       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
    7044                 :           0 :       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
    7045                 :             :                                        vect[0], shift3_mask);
    7046                 :           0 :       vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    7047                 :           0 :       (*result_chain)[nelt % 3] = data_ref;
    7048                 :             : 
    7049                 :           0 :       data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
    7050                 :           0 :       perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
    7051                 :             :                                        vect[1], shift4_mask);
    7052                 :           0 :       vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
    7053                 :           0 :       (*result_chain)[0] = data_ref;
    7054                 :           0 :       return true;
    7055                 :           0 :     }
    7056                 :             :   return false;
    7057                 :           0 : }
    7058                 :             : 
    7059                 :             : /* Function vect_transform_grouped_load.
    7060                 :             : 
    7061                 :             :    Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
    7062                 :             :    to perform their permutation and ascribe the result vectorized statements to
    7063                 :             :    the scalar statements.
    7064                 :             : */
    7065                 :             : 
    7066                 :             : void
    7067                 :           0 : vect_transform_grouped_load (vec_info *vinfo, stmt_vec_info stmt_info,
    7068                 :             :                              vec<tree> dr_chain,
    7069                 :             :                              int size, gimple_stmt_iterator *gsi)
    7070                 :             : {
    7071                 :           0 :   machine_mode mode;
    7072                 :           0 :   vec<tree> result_chain = vNULL;
    7073                 :             : 
    7074                 :             :   /* DR_CHAIN contains input data-refs that are a part of the interleaving.
    7075                 :             :      RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
    7076                 :             :      vectors, that are ready for vector computation.  */
    7077                 :           0 :   result_chain.create (size);
    7078                 :             : 
    7079                 :             :   /* If reassociation width for vector type is 2 or greater target machine can
    7080                 :             :      execute 2 or more vector instructions in parallel.  Otherwise try to
    7081                 :             :      get chain for loads group using vect_shift_permute_load_chain.  */
    7082                 :           0 :   mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
    7083                 :           0 :   if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
    7084                 :           0 :       || pow2p_hwi (size)
    7085                 :           0 :       || !vect_shift_permute_load_chain (vinfo, dr_chain, size, stmt_info,
    7086                 :             :                                          gsi, &result_chain))
    7087                 :           0 :     vect_permute_load_chain (vinfo, dr_chain,
    7088                 :             :                              size, stmt_info, gsi, &result_chain);
    7089                 :           0 :   vect_record_grouped_load_vectors (vinfo, stmt_info, result_chain);
    7090                 :           0 :   result_chain.release ();
    7091                 :           0 : }
    7092                 :             : 
    7093                 :             : /* RESULT_CHAIN contains the output of a group of grouped loads that were
    7094                 :             :    generated as part of the vectorization of STMT_INFO.  Assign the statement
    7095                 :             :    for each vector to the associated scalar statement.  */
    7096                 :             : 
    7097                 :             : void
    7098                 :           0 : vect_record_grouped_load_vectors (vec_info *, stmt_vec_info stmt_info,
    7099                 :             :                                   vec<tree> result_chain)
    7100                 :             : {
    7101                 :           0 :   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
    7102                 :           0 :   unsigned int i, gap_count;
    7103                 :           0 :   tree tmp_data_ref;
    7104                 :             : 
    7105                 :             :   /* Put a permuted data-ref in the VECTORIZED_STMT field.
    7106                 :             :      Since we scan the chain starting from it's first node, their order
    7107                 :             :      corresponds the order of data-refs in RESULT_CHAIN.  */
    7108                 :           0 :   stmt_vec_info next_stmt_info = first_stmt_info;
    7109                 :           0 :   gap_count = 1;
    7110                 :           0 :   FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
    7111                 :             :     {
    7112                 :           0 :       if (!next_stmt_info)
    7113                 :             :         break;
    7114                 :             : 
    7115                 :             :       /* Skip the gaps.  Loads created for the gaps will be removed by dead
    7116                 :             :        code elimination pass later.  No need to check for the first stmt in
    7117                 :             :        the group, since it always exists.
    7118                 :             :        DR_GROUP_GAP is the number of steps in elements from the previous
    7119                 :             :        access (if there is no gap DR_GROUP_GAP is 1).  We skip loads that
    7120                 :             :        correspond to the gaps.  */
    7121                 :           0 :       if (next_stmt_info != first_stmt_info
    7122                 :           0 :           && gap_count < DR_GROUP_GAP (next_stmt_info))
    7123                 :             :         {
    7124                 :           0 :           gap_count++;
    7125                 :           0 :           continue;
    7126                 :             :         }
    7127                 :             : 
    7128                 :             :       /* ???  The following needs cleanup after the removal of
    7129                 :             :          DR_GROUP_SAME_DR_STMT.  */
    7130                 :           0 :       if (next_stmt_info)
    7131                 :             :         {
    7132                 :           0 :           gimple *new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
    7133                 :             :           /* We assume that if VEC_STMT is not NULL, this is a case of multiple
    7134                 :             :              copies, and we put the new vector statement last.  */
    7135                 :           0 :           STMT_VINFO_VEC_STMTS (next_stmt_info).safe_push (new_stmt);
    7136                 :             : 
    7137                 :           0 :           next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
    7138                 :           0 :           gap_count = 1;
    7139                 :             :         }
    7140                 :             :     }
    7141                 :           0 : }
    7142                 :             : 
    7143                 :             : /* Function vect_force_dr_alignment_p.
    7144                 :             : 
    7145                 :             :    Returns whether the alignment of a DECL can be forced to be aligned
    7146                 :             :    on ALIGNMENT bit boundary.  */
    7147                 :             : 
    7148                 :             : bool
    7149                 :      628399 : vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
    7150                 :             : {
    7151                 :      628399 :   if (!VAR_P (decl))
    7152                 :             :     return false;
    7153                 :             : 
    7154                 :      205576 :   if (decl_in_symtab_p (decl)
    7155                 :      205576 :       && !symtab_node::get (decl)->can_increase_alignment_p ())
    7156                 :             :     return false;
    7157                 :             : 
    7158                 :      197063 :   if (TREE_STATIC (decl))
    7159                 :        8325 :     return (known_le (alignment,
    7160                 :        8325 :                       (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
    7161                 :             :   else
    7162                 :      188738 :     return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
    7163                 :             : }
    7164                 :             : 
    7165                 :             : /* Return whether the data reference DR_INFO is supported with respect to its
    7166                 :             :    alignment.
    7167                 :             :    If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
    7168                 :             :    it is aligned, i.e., check if it is possible to vectorize it with different
    7169                 :             :    alignment.  */
    7170                 :             : 
    7171                 :             : enum dr_alignment_support
    7172                 :     2903688 : vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
    7173                 :             :                                tree vectype, int misalignment)
    7174                 :             : {
    7175                 :     2903688 :   data_reference *dr = dr_info->dr;
    7176                 :     2903688 :   stmt_vec_info stmt_info = dr_info->stmt;
    7177                 :     2903688 :   machine_mode mode = TYPE_MODE (vectype);
    7178                 :     2903688 :   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    7179                 :     2903688 :   class loop *vect_loop = NULL;
    7180                 :     2903688 :   bool nested_in_vect_loop = false;
    7181                 :             : 
    7182                 :     2903688 :   if (misalignment == 0)
    7183                 :             :     return dr_aligned;
    7184                 :     1627311 :   else if (dr_safe_speculative_read_required (stmt_info))
    7185                 :             :     return dr_unaligned_unsupported;
    7186                 :             : 
    7187                 :             :   /* For now assume all conditional loads/stores support unaligned
    7188                 :             :      access without any special code.  */
    7189                 :     1330261 :   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
    7190                 :        7791 :     if (gimple_call_internal_p (stmt)
    7191                 :        7791 :         && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
    7192                 :        3053 :             || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
    7193                 :             :       return dr_unaligned_supported;
    7194                 :             : 
    7195                 :     1322470 :   if (loop_vinfo)
    7196                 :             :     {
    7197                 :      648747 :       vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
    7198                 :      648747 :       nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt_info);
    7199                 :             :     }
    7200                 :             : 
    7201                 :             :   /* Possibly unaligned access.  */
    7202                 :             : 
    7203                 :             :   /* We can choose between using the implicit realignment scheme (generating
    7204                 :             :      a misaligned_move stmt) and the explicit realignment scheme (generating
    7205                 :             :      aligned loads with a REALIGN_LOAD).  There are two variants to the
    7206                 :             :      explicit realignment scheme: optimized, and unoptimized.
    7207                 :             :      We can optimize the realignment only if the step between consecutive
    7208                 :             :      vector loads is equal to the vector size.  Since the vector memory
    7209                 :             :      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
    7210                 :             :      is guaranteed that the misalignment amount remains the same throughout the
    7211                 :             :      execution of the vectorized loop.  Therefore, we can create the
    7212                 :             :      "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
    7213                 :             :      at the loop preheader.
    7214                 :             : 
    7215                 :             :      However, in the case of outer-loop vectorization, when vectorizing a
    7216                 :             :      memory access in the inner-loop nested within the LOOP that is now being
    7217                 :             :      vectorized, while it is guaranteed that the misalignment of the
    7218                 :             :      vectorized memory access will remain the same in different outer-loop
    7219                 :             :      iterations, it is *not* guaranteed that is will remain the same throughout
    7220                 :             :      the execution of the inner-loop.  This is because the inner-loop advances
    7221                 :             :      with the original scalar step (and not in steps of VS).  If the inner-loop
    7222                 :             :      step happens to be a multiple of VS, then the misalignment remains fixed
    7223                 :             :      and we can use the optimized realignment scheme.  For example:
    7224                 :             : 
    7225                 :             :       for (i=0; i<N; i++)
    7226                 :             :         for (j=0; j<M; j++)
    7227                 :             :           s += a[i+j];
    7228                 :             : 
    7229                 :             :      When vectorizing the i-loop in the above example, the step between
    7230                 :             :      consecutive vector loads is 1, and so the misalignment does not remain
    7231                 :             :      fixed across the execution of the inner-loop, and the realignment cannot
    7232                 :             :      be optimized (as illustrated in the following pseudo vectorized loop):
    7233                 :             : 
    7234                 :             :       for (i=0; i<N; i+=4)
    7235                 :             :         for (j=0; j<M; j++){
    7236                 :             :           vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
    7237                 :             :                          // when j is {0,1,2,3,4,5,6,7,...} respectively.
    7238                 :             :                          // (assuming that we start from an aligned address).
    7239                 :             :           }
    7240                 :             : 
    7241                 :             :      We therefore have to use the unoptimized realignment scheme:
    7242                 :             : 
    7243                 :             :       for (i=0; i<N; i+=4)
    7244                 :             :           for (j=k; j<M; j+=4)
    7245                 :             :           vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
    7246                 :             :                            // that the misalignment of the initial address is
    7247                 :             :                            // 0).
    7248                 :             : 
    7249                 :             :      The loop can then be vectorized as follows:
    7250                 :             : 
    7251                 :             :       for (k=0; k<4; k++){
    7252                 :             :         rt = get_realignment_token (&vp[k]);
    7253                 :             :         for (i=0; i<N; i+=4){
    7254                 :             :           v1 = vp[i+k];
    7255                 :             :           for (j=k; j<M; j+=4){
    7256                 :             :             v2 = vp[i+j+VS-1];
    7257                 :             :             va = REALIGN_LOAD <v1,v2,rt>;
    7258                 :             :             vs += va;
    7259                 :             :             v1 = v2;
    7260                 :             :           }
    7261                 :             :         }
    7262                 :             :     } */
    7263                 :             : 
    7264                 :     1322470 :   if (DR_IS_READ (dr))
    7265                 :             :     {
    7266                 :      503758 :       if (can_implement_p (vec_realign_load_optab, mode)
    7267                 :      503758 :           && (!targetm.vectorize.builtin_mask_for_load
    7268                 :           0 :               || targetm.vectorize.builtin_mask_for_load ()))
    7269                 :             :         {
    7270                 :             :           /* If we are doing SLP then the accesses need not have the
    7271                 :             :              same alignment, instead it depends on the SLP group size.  */
    7272                 :           0 :           if (loop_vinfo
    7273                 :           0 :               && STMT_SLP_TYPE (stmt_info)
    7274                 :           0 :               && STMT_VINFO_GROUPED_ACCESS (stmt_info)
    7275                 :           0 :               && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
    7276                 :           0 :                               * (DR_GROUP_SIZE
    7277                 :           0 :                                    (DR_GROUP_FIRST_ELEMENT (stmt_info))),
    7278                 :           0 :                               TYPE_VECTOR_SUBPARTS (vectype)))
    7279                 :             :             ;
    7280                 :           0 :           else if (!loop_vinfo
    7281                 :           0 :                    || (nested_in_vect_loop
    7282                 :           0 :                        && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
    7283                 :           0 :                                     GET_MODE_SIZE (TYPE_MODE (vectype)))))
    7284                 :           0 :             return dr_explicit_realign;
    7285                 :             :           else
    7286                 :           0 :             return dr_explicit_realign_optimized;
    7287                 :             :         }
    7288                 :             :     }
    7289                 :             : 
    7290                 :     1322470 :   bool is_packed = false;
    7291                 :     1322470 :   tree type = TREE_TYPE (DR_REF (dr));
    7292                 :     1322470 :   if (misalignment == DR_MISALIGNMENT_UNKNOWN)
    7293                 :      938789 :     is_packed = not_size_aligned (DR_REF (dr));
    7294                 :     1322470 :   if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment,
    7295                 :             :                                                      is_packed))
    7296                 :             :     return dr_unaligned_supported;
    7297                 :             : 
    7298                 :             :   /* Unsupported.  */
    7299                 :             :   return dr_unaligned_unsupported;
    7300                 :             : }
        

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.